mirror of
https://github.com/badlogic/heissepreise.git
synced 2024-09-22 00:00:59 +02:00
134 lines
4.3 KiB
JavaScript
134 lines
4.3 KiB
JavaScript
const axios = require("axios");
|
|
const utils = require("./utils");
|
|
const HTMLParser = require("node-html-parser");
|
|
|
|
const units = {
|
|
mbe: { unit: "wg", factor: 1 },
|
|
};
|
|
|
|
exports.getUnimarktCategoryPages = async () => {
|
|
const categoryPages = [];
|
|
|
|
try {
|
|
var res = await axios.get(exports.urlBase, {
|
|
validateStatus: function (status) {
|
|
return status >= 200 && status < 300;
|
|
},
|
|
});
|
|
|
|
if (res && res.data) {
|
|
var root = HTMLParser.parse(res.data);
|
|
root.querySelectorAll("#menu > li > ul").forEach((list) => {
|
|
// don't scrape category "Themen & Marken" since there are only duplicate
|
|
// products and this will currupt the category mapping logic
|
|
if (list._attrs["id"] !== "submenu-themen-marken") {
|
|
list.querySelectorAll("a[href^='/']").forEach((category) => {
|
|
const categoryHref = category._attrs["href"].replace("/", "");
|
|
categoryPages.push(categoryHref);
|
|
});
|
|
}
|
|
});
|
|
}
|
|
} catch (err) {
|
|
console.log("Error while getting Unimarkt Category Pages");
|
|
}
|
|
|
|
return categoryPages;
|
|
};
|
|
|
|
exports.getCanonical = function (item, today) {
|
|
let [quantity, unit] = utils.parseUnitAndQuantityAtEnd(item.unit.replace("/ EINWEG", "").replace("/ MEHRWEG", ""));
|
|
return utils.convertUnit(
|
|
{
|
|
id: item.id,
|
|
name: item.name,
|
|
// description: "", not available
|
|
price: item.price,
|
|
priceHistory: [{ date: today, price: item.price }],
|
|
quantity,
|
|
unit,
|
|
bio: item.name.toLowerCase().includes("bio"),
|
|
url: item.canonicalUrl,
|
|
},
|
|
units,
|
|
"unimarkt",
|
|
{
|
|
unit: "stk",
|
|
quantity: 1,
|
|
}
|
|
);
|
|
};
|
|
|
|
exports.fetchData = async function () {
|
|
let unimarktItems = [];
|
|
|
|
const UNIMARKT_CATEGORIES = await exports.getUnimarktCategoryPages();
|
|
|
|
for (let category of UNIMARKT_CATEGORIES) {
|
|
var res = await axios.get(`${exports.urlBase}/${category}`, {
|
|
validateStatus: function (status) {
|
|
return status >= 200 && status < 300;
|
|
},
|
|
});
|
|
|
|
if (res && res.data) {
|
|
var root = HTMLParser.parse(res.data);
|
|
|
|
root.querySelectorAll(".articleListItem .produktContainer").forEach((product) => {
|
|
unimarktItems.push({
|
|
id: product._attrs["data-articleid"],
|
|
name: product.querySelector(".name").text,
|
|
price: parseFloat(product._attrs["data-price"]),
|
|
unit: product.querySelector(".grammatur").text,
|
|
canonicalUrl: product.querySelector(".image > a")._attrs["href"],
|
|
categoryPath: category,
|
|
});
|
|
});
|
|
}
|
|
}
|
|
return unimarktItems;
|
|
};
|
|
|
|
exports.initializeCategoryMapping = async () => {
|
|
let categories = [];
|
|
|
|
const UNIMARKT_CATEGORIES = await exports.getUnimarktCategoryPages();
|
|
|
|
for (let category of UNIMARKT_CATEGORIES) {
|
|
var res = await axios.get(`${exports.urlBase}/${category}`, {
|
|
validateStatus: function (status) {
|
|
return status >= 200 && status < 300;
|
|
},
|
|
});
|
|
|
|
if (res && res.data) {
|
|
var root = HTMLParser.parse(res.data);
|
|
|
|
let categoryName = [];
|
|
// don't select the first child because it's the homepage-link in the breadcrumb
|
|
root.querySelectorAll(".breadcrumb li:not(:first-child)").forEach((listItem) => {
|
|
categoryName.push(listItem.querySelector("span").text);
|
|
});
|
|
|
|
categories.push({
|
|
id: category,
|
|
description: categoryName.join(" -> "),
|
|
url: `${exports.urlBase}/${category}`,
|
|
code: null,
|
|
});
|
|
}
|
|
}
|
|
|
|
utils.mergeAndSaveCategories("unimarkt", categories);
|
|
exports.categoryLookup = {};
|
|
for (const category of categories) {
|
|
exports.categoryLookup[category.id] = category;
|
|
}
|
|
};
|
|
|
|
exports.mapCategory = (rawItem) => {
|
|
return exports.categoryLookup[rawItem.categoryPath]?.code;
|
|
};
|
|
|
|
exports.urlBase = "https://shop.unimarkt.at";
|