2023-06-01 14:40:28 +02:00
|
|
|
const axios = require("axios");
|
|
|
|
const utils = require("./utils");
|
2023-06-22 23:54:44 +02:00
|
|
|
const HTMLParser = require("node-html-parser");
|
2023-06-01 14:40:28 +02:00
|
|
|
const MAXITEMS = 10000;
|
|
|
|
|
2023-06-03 15:09:29 +02:00
|
|
|
const units = {
|
2023-06-02 16:45:54 +02:00
|
|
|
bd: { unit: "stk", factor: 1 },
|
|
|
|
gr: { unit: "g", factor: 1 },
|
|
|
|
lt: { unit: "ml", factor: 1000 },
|
|
|
|
pk: { unit: "stk", factor: 1 },
|
|
|
|
pa: { unit: "stk", factor: 1 },
|
|
|
|
rl: { unit: "stk", factor: 1 },
|
|
|
|
tb: { unit: "stk", factor: 1 },
|
2023-06-01 14:40:28 +02:00
|
|
|
};
|
|
|
|
|
|
|
|
exports.getCanonical = function (item, today) {
|
|
|
|
let quantity = item.amount;
|
|
|
|
let unit = item.volumeLabelKey;
|
2023-06-03 15:09:29 +02:00
|
|
|
return utils.convertUnit(
|
|
|
|
{
|
|
|
|
id: item.productId,
|
|
|
|
name: item.name,
|
2023-06-17 20:54:45 +02:00
|
|
|
// description: "", not available
|
2023-06-03 15:09:29 +02:00
|
|
|
price: item.price.regular.value / 100,
|
|
|
|
priceHistory: [{ date: today, price: item.price.regular.value / 100 }],
|
|
|
|
isWeighted: item.isWeightArticle,
|
|
|
|
unit,
|
|
|
|
quantity,
|
|
|
|
bio: item.name.toLowerCase().includes("bio") && !item.name.toLowerCase().includes("fabio"),
|
2023-06-21 16:15:05 +02:00
|
|
|
url: item.sku.replace("-", ""),
|
2023-06-03 15:09:29 +02:00
|
|
|
},
|
|
|
|
units,
|
|
|
|
"penny"
|
|
|
|
);
|
|
|
|
};
|
2023-06-01 14:40:28 +02:00
|
|
|
|
|
|
|
exports.fetchData = async function () {
|
|
|
|
hits = 100;
|
|
|
|
page = 0;
|
|
|
|
done = false;
|
|
|
|
result = [];
|
|
|
|
while (!done) {
|
|
|
|
const PENNY_SEARCH = `https://www.penny.at/api/products?page=${page}&pageSize=${hits}`;
|
|
|
|
data = (await axios.get(PENNY_SEARCH)).data;
|
2023-06-02 16:45:54 +02:00
|
|
|
done = data.count < hits || page * hits > MAXITEMS;
|
2023-06-01 14:40:28 +02:00
|
|
|
page++;
|
|
|
|
result = result.concat(data.results);
|
|
|
|
}
|
|
|
|
return result;
|
2023-06-03 15:09:29 +02:00
|
|
|
};
|
2023-06-02 10:38:14 +02:00
|
|
|
|
2023-07-11 15:31:02 +02:00
|
|
|
async function parseCategory(url, parent, result, lookup) {
|
2023-06-22 23:54:44 +02:00
|
|
|
const data = (await axios.get(url)).data;
|
|
|
|
const dom = HTMLParser.parse(data);
|
2023-07-11 15:31:02 +02:00
|
|
|
const categoryTitle = dom.querySelector('[data-test="category-title"]')?.textContent;
|
|
|
|
if (url != "https://www.penny.at/kategorie" && categoryTitle.includes("Alle Kategorien")) return;
|
2023-06-22 23:54:44 +02:00
|
|
|
const categories = dom.querySelectorAll('[data-test="category-tree-navigation-button"]');
|
|
|
|
for (const category of categories) {
|
|
|
|
const link = "https://www.penny.at" + category.getAttribute("href");
|
|
|
|
if (!category.querySelector(".subtitle-2")) continue;
|
|
|
|
const name = (parent ? parent + " -> " : "") + category.querySelector(".subtitle-2").innerText.trim().replace("&", "&");
|
|
|
|
if (name.startsWith("Alle Angebote")) continue;
|
2023-06-21 01:28:38 +02:00
|
|
|
|
2023-07-11 15:31:02 +02:00
|
|
|
if (!lookup.has(link)) {
|
|
|
|
lookup.add(link);
|
|
|
|
result.push({
|
|
|
|
id: name,
|
|
|
|
url: link,
|
|
|
|
code: null,
|
|
|
|
});
|
2023-06-22 23:54:44 +02:00
|
|
|
|
2023-07-11 15:31:02 +02:00
|
|
|
try {
|
|
|
|
await parseCategory(link, name, result, lookup);
|
|
|
|
} catch (e) {
|
|
|
|
// Ignore, sometimes the server responds with 502. No idea why
|
|
|
|
}
|
|
|
|
}
|
2023-06-22 23:54:44 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
exports.initializeCategoryMapping = async () => {
|
|
|
|
const categories = [];
|
2023-07-11 15:31:02 +02:00
|
|
|
await parseCategory("https://www.penny.at/kategorie", null, categories, new Set());
|
2023-06-22 23:54:44 +02:00
|
|
|
utils.mergeAndSaveCategories("penny", categories);
|
|
|
|
|
|
|
|
exports.categoryLookup = {};
|
|
|
|
for (const category of categories) {
|
|
|
|
exports.categoryLookup[category.id] = category;
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
exports.mapCategory = (rawItem) => {
|
|
|
|
const categoryPath = rawItem.parentCategories.filter((path) => path.length > 0 && !path[0].name.includes("ngebot"))[0];
|
2023-06-26 17:13:16 +02:00
|
|
|
if (!categoryPath) return null;
|
2023-06-22 23:54:44 +02:00
|
|
|
const categoryName = categoryPath.map((path) => path.name).join(" -> ");
|
|
|
|
const category = exports.categoryLookup[categoryName];
|
|
|
|
if (category) return category.code;
|
|
|
|
return null;
|
|
|
|
};
|
2023-06-21 01:28:38 +02:00
|
|
|
|
2023-06-03 15:09:29 +02:00
|
|
|
exports.urlBase = "https://www.penny.at/produkte/";
|
2023-07-11 15:31:02 +02:00
|
|
|
|
|
|
|
if (require.main == module) {
|
|
|
|
(async () => {
|
|
|
|
await exports.initializeCategoryMapping();
|
|
|
|
})();
|
|
|
|
}
|