heissepreise/stores/penny.js
Mario Zechner dd938ba7cd Closes #135
Penny now has a subcategory that leads back to the all categories page. This triggered an infinite recursion which eventually goes OOM boom.
2023-07-11 15:31:02 +02:00

108 lines
3.6 KiB
JavaScript

const axios = require("axios");
const utils = require("./utils");
const HTMLParser = require("node-html-parser");
const MAXITEMS = 10000;
const units = {
bd: { unit: "stk", factor: 1 },
gr: { unit: "g", factor: 1 },
lt: { unit: "ml", factor: 1000 },
pk: { unit: "stk", factor: 1 },
pa: { unit: "stk", factor: 1 },
rl: { unit: "stk", factor: 1 },
tb: { unit: "stk", factor: 1 },
};
exports.getCanonical = function (item, today) {
let quantity = item.amount;
let unit = item.volumeLabelKey;
return utils.convertUnit(
{
id: item.productId,
name: item.name,
// description: "", not available
price: item.price.regular.value / 100,
priceHistory: [{ date: today, price: item.price.regular.value / 100 }],
isWeighted: item.isWeightArticle,
unit,
quantity,
bio: item.name.toLowerCase().includes("bio") && !item.name.toLowerCase().includes("fabio"),
url: item.sku.replace("-", ""),
},
units,
"penny"
);
};
exports.fetchData = async function () {
hits = 100;
page = 0;
done = false;
result = [];
while (!done) {
const PENNY_SEARCH = `https://www.penny.at/api/products?page=${page}&pageSize=${hits}`;
data = (await axios.get(PENNY_SEARCH)).data;
done = data.count < hits || page * hits > MAXITEMS;
page++;
result = result.concat(data.results);
}
return result;
};
async function parseCategory(url, parent, result, lookup) {
const data = (await axios.get(url)).data;
const dom = HTMLParser.parse(data);
const categoryTitle = dom.querySelector('[data-test="category-title"]')?.textContent;
if (url != "https://www.penny.at/kategorie" && categoryTitle.includes("Alle Kategorien")) return;
const categories = dom.querySelectorAll('[data-test="category-tree-navigation-button"]');
for (const category of categories) {
const link = "https://www.penny.at" + category.getAttribute("href");
if (!category.querySelector(".subtitle-2")) continue;
const name = (parent ? parent + " -> " : "") + category.querySelector(".subtitle-2").innerText.trim().replace("&amp;", "&");
if (name.startsWith("Alle Angebote")) continue;
if (!lookup.has(link)) {
lookup.add(link);
result.push({
id: name,
url: link,
code: null,
});
try {
await parseCategory(link, name, result, lookup);
} catch (e) {
// Ignore, sometimes the server responds with 502. No idea why
}
}
}
}
exports.initializeCategoryMapping = async () => {
const categories = [];
await parseCategory("https://www.penny.at/kategorie", null, categories, new Set());
utils.mergeAndSaveCategories("penny", categories);
exports.categoryLookup = {};
for (const category of categories) {
exports.categoryLookup[category.id] = category;
}
};
exports.mapCategory = (rawItem) => {
const categoryPath = rawItem.parentCategories.filter((path) => path.length > 0 && !path[0].name.includes("ngebot"))[0];
if (!categoryPath) return null;
const categoryName = categoryPath.map((path) => path.name).join(" -> ");
const category = exports.categoryLookup[categoryName];
if (category) return category.code;
return null;
};
exports.urlBase = "https://www.penny.at/produkte/";
if (require.main == module) {
(async () => {
await exports.initializeCategoryMapping();
})();
}