Penny now has a subcategory that leads back to the all categories page. This triggered an infinite recursion which eventually goes OOM boom.
This commit is contained in:
Mario Zechner 2023-07-11 15:31:02 +02:00
parent b95f39b809
commit dd938ba7cd
8 changed files with 165 additions and 106 deletions

View File

@ -55,15 +55,32 @@ function currentDate() {
return `${year}-${month}-${day}`;
}
const strings = new Map();
const internString = (value) => {
if (strings.has(value)) {
return strings.get(value);
} else {
strings.set(value, value);
return value;
}
};
function getCanonicalFor(store, rawItems, today) {
const canonicalItems = [];
for (let i = 0; i < rawItems.length; i++) {
const item = stores[store]?.getCanonical(rawItems[i], today);
if (item)
canonicalItems.push({
let item = stores[store]?.getCanonical(rawItems[i], today);
if (item) {
item = {
store,
...item,
});
};
for (const property of Object.keys(item)) {
if (typeof item[property] === "string") {
item[property] = internString(item[property]);
}
}
canonicalItems.push(item);
}
}
return canonicalItems;
}
@ -360,13 +377,13 @@ exports.dedupItems = (items) => {
dedupItems.push(item);
} else {
if (seenItem.quantity != item.quantity || seenItem.unit != item.unit) {
console.log(`Item with same id but different quantity and unit: ${item.store}-${item.id} '${item.name}'`);
// console.log(`Item with same id but different quantity and unit: ${item.store}-${item.id} '${item.name}'`);
}
duplicates[item.store] = duplicates[item.store] ? duplicates[item.store] + 1 : 1;
}
}
console.log("Deduplicated items");
console.log(JSON.stringify(duplicates, null, 2));
//console.log("Deduplicated items");
//console.log(JSON.stringify(duplicates, null, 2));
return dedupItems;
};

View File

@ -5,8 +5,8 @@
"main": "server.js",
"scripts": {
"prepare": "husky install",
"dev": "cross-env NODE_ENV=development PORT=$PORT node --trace-warnings server.js",
"start": "cross-env NODE_ENV=production PORT=$PORT node server.js",
"dev": "cross-env NODE_ENV=development PORT=$PORT node --max_old_space_size=8192 --trace-warnings server.js",
"start": "cross-env NODE_ENV=production PORT=$PORT node --max_old_space_size=8192 server.js",
"format": "npx prettier --write ."
},
"repository": {

View File

@ -845,6 +845,12 @@
"url": "https://www.bipa.at/c/haushalt/waschen/textilerfrischer",
"code": "8A"
},
{
"id": "haushalt-waschen-textilfarben",
"description": null,
"url": "https://www.bipa.at/c/haushalt/waschen/textilfarben",
"code": null
},
{
"id": "haushalt-waschen-waescheparfum",
"description": null,

View File

@ -423,11 +423,6 @@
"url": "https://www.roksh.at/hofer/angebot/kase-geschnitten-gerieben",
"code": "33"
},
{
"id": "kasespezialitaten",
"url": "https://www.roksh.at/hofer/angebot/kasespezialitaten",
"code": "33"
},
{
"id": "weichkase",
"url": "https://www.roksh.at/hofer/angebot/weichkase",
@ -438,6 +433,11 @@
"url": "https://www.roksh.at/hofer/angebot/hart-schnittkase",
"code": "33"
},
{
"id": "kasespezialitaten",
"url": "https://www.roksh.at/hofer/angebot/kasespezialitaten",
"code": "33"
},
{
"id": "streichwurst-pasteten",
"url": "https://www.roksh.at/hofer/angebot/streichwurst-pasteten",
@ -458,6 +458,11 @@
"url": "https://www.roksh.at/hofer/angebot/stangenwurst",
"code": "37"
},
{
"id": "speck-rohschinken",
"url": "https://www.roksh.at/hofer/angebot/speck-rohschinken",
"code": "37"
},
{
"id": "wurstel",
"url": "https://www.roksh.at/hofer/angebot/wurstel",
@ -468,11 +473,6 @@
"url": "https://www.roksh.at/hofer/angebot/leberkase-co",
"code": "37"
},
{
"id": "speck-rohschinken",
"url": "https://www.roksh.at/hofer/angebot/speck-rohschinken",
"code": "37"
},
{
"id": "hühnchen-geflügel",
"url": "https://www.roksh.at/hofer/angebot/hühnchen-geflügel",
@ -484,13 +484,13 @@
"code": "32"
},
{
"id": "rind",
"url": "https://www.roksh.at/hofer/angebot/rind",
"id": "faschiertes",
"url": "https://www.roksh.at/hofer/angebot/faschiertes",
"code": "32"
},
{
"id": "faschiertes",
"url": "https://www.roksh.at/hofer/angebot/faschiertes",
"id": "rind",
"url": "https://www.roksh.at/hofer/angebot/rind",
"code": "32"
},
{
@ -533,11 +533,6 @@
"url": "https://www.roksh.at/hofer/angebot/trockenfruchte",
"code": "03"
},
{
"id": "tafelschokolade",
"url": "https://www.roksh.at/hofer/angebot/tafelschokolade",
"code": "64"
},
{
"id": "bonbons-kaugummi",
"url": "https://www.roksh.at/hofer/angebot/bonbons-kaugummi",
@ -548,6 +543,11 @@
"url": "https://www.roksh.at/hofer/angebot/waffeln",
"code": "60"
},
{
"id": "tafelschokolade",
"url": "https://www.roksh.at/hofer/angebot/tafelschokolade",
"code": "64"
},
{
"id": "kekse",
"url": "https://www.roksh.at/hofer/angebot/kekse",
@ -583,6 +583,11 @@
"url": "https://www.roksh.at/hofer/angebot/sekt",
"code": "23"
},
{
"id": "roseweine-spezialweine",
"url": "https://www.roksh.at/hofer/angebot/roseweine-spezialweine",
"code": "25"
},
{
"id": "rotweine",
"url": "https://www.roksh.at/hofer/angebot/rotweine",
@ -593,11 +598,6 @@
"url": "https://www.roksh.at/hofer/angebot/weißweine",
"code": "25"
},
{
"id": "roseweine-spezialweine",
"url": "https://www.roksh.at/hofer/angebot/roseweine-spezialweine",
"code": "25"
},
{
"id": "tee",
"url": "https://www.roksh.at/hofer/angebot/tee",

View File

@ -809,6 +809,11 @@
"url": "https://www.mueller.at/naturshop/pflege/hand-fusspflege/handpflege/",
"code": "75"
},
{
"id": "Naturshop/Pflege/Hand- & Fußpflege/Handseife",
"url": "https://www.mueller.at/naturshop/pflege/hand-fusspflege/handseife/",
"code": null
},
{
"id": "Naturshop/Pflege/Hand- & Fußpflege/Nagelpflege",
"url": "https://www.mueller.at/naturshop/pflege/hand-fusspflege/nagelpflege/",
@ -2049,11 +2054,6 @@
"url": "https://www.mueller.at/drogerie/lebensmittel/suessigkeiten/kaugummi/",
"code": "64"
},
{
"id": "Drogerie/Lebensmittel/Süßigkeiten/Schokolade",
"url": "https://www.mueller.at/drogerie/lebensmittel/suessigkeiten/schokolade/",
"code": "64"
},
{
"id": "Drogerie/Lebensmittel/Süßigkeiten/Gebäck",
"url": "https://www.mueller.at/drogerie/lebensmittel/suessigkeiten/gebaeck/",
@ -2889,16 +2889,6 @@
"url": "https://www.mueller.at/genusswelt/haushalt/",
"code": null
},
{
"id": "Genusswelt/Aktionen",
"url": "https://www.mueller.at/genusswelt/aktionen/",
"code": null
},
{
"id": "Genusswelt/Aktionen/Aus dem Prospekt",
"url": "https://www.mueller.at/genusswelt/aktionen/aus-dem-prospekt/",
"code": null
},
{
"id": "Tiershop/Tiernahrung",
"url": "https://www.mueller.at/tiershop/tiernahrung/",
@ -3123,5 +3113,20 @@
"id": "Tiershop/Sale",
"url": "https://www.mueller.at/tiershop/sale/",
"code": "90"
},
{
"id": "Genusswelt/Aktionen",
"url": "https://www.mueller.at/genusswelt/aktionen/",
"code": null
},
{
"id": "Drogerie/Lebensmittel/Süßigkeiten/Schokolade",
"url": "https://www.mueller.at/drogerie/lebensmittel/suessigkeiten/schokolade/",
"code": "64"
},
{
"id": "Genusswelt/Aktionen/Aus dem Prospekt",
"url": "https://www.mueller.at/genusswelt/aktionen/aus-dem-prospekt/",
"code": null
}
]

View File

@ -114,11 +114,6 @@
"url": "https://www.penny.at/kategorie/tofu-und-vegetarischevegane-produkte-13046",
"code": "3B"
},
{
"id": "Kühlwaren -> Blätterteig & Strudelteig",
"url": "https://www.penny.at/kategorie/blaetterteig-und-strudelteig-13043",
"code": "36"
},
{
"id": "Tiefkühl",
"url": "https://www.penny.at/kategorie/tiefkuehl-13047",
@ -139,26 +134,11 @@
"url": "https://www.penny.at/kategorie/fertiggerichte-13049",
"code": "42"
},
{
"id": "Tiefkühl -> Pizza & Baguette",
"url": "https://www.penny.at/kategorie/pizza-und-baguette-13053",
"code": "46"
},
{
"id": "Tiefkühl -> Fisch & Garnelen",
"url": "https://www.penny.at/kategorie/fisch-und-garnelen-13050",
"code": "43"
},
{
"id": "Grundnahrungsmittel",
"url": "https://www.penny.at/kategorie/grundnahrungsmittel-13055",
"code": "50"
},
{
"id": "Grundnahrungsmittel -> Fertiggerichte",
"url": "https://www.penny.at/kategorie/fertiggerichte-13059",
"code": "54"
},
{
"id": "Grundnahrungsmittel -> Konserven & Sauerwaren",
"url": "https://www.penny.at/kategorie/konserven-und-sauerwaren-13062",
@ -169,6 +149,11 @@
"url": "https://www.penny.at/kategorie/essig-und-oele-13058",
"code": "53"
},
{
"id": "Grundnahrungsmittel -> Mehl & Getreideprodukte",
"url": "https://www.penny.at/kategorie/mehl-und-getreideprodukte-13064",
"code": null
},
{
"id": "Grundnahrungsmittel -> Gewürze & Würzmittel",
"url": "https://www.penny.at/kategorie/gewuerze-und-wuerzmittel-13060",
@ -279,36 +264,16 @@
"url": "https://www.penny.at/kategorie/pflanzen-und-blumen-13100",
"code": "81"
},
{
"id": "Haushalt -> Lampen & Batterien",
"url": "https://www.penny.at/kategorie/lampen-und-batterien-13102",
"code": "85"
},
{
"id": "Pflege",
"url": "https://www.penny.at/kategorie/pflege-13076",
"code": "70"
},
{
"id": "Pflege -> Haut- & Lippenpflege",
"url": "https://www.penny.at/kategorie/haut-und-lippenpflege-13082",
"code": "75"
},
{
"id": "Pflege -> Seifen & Duschbäder",
"url": "https://www.penny.at/kategorie/seifen-und-duschbaeder-13085",
"code": "78"
},
{
"id": "Pflege -> Mund- & Zahnhygiene",
"url": "https://www.penny.at/kategorie/mund-und-zahnhygiene-13083",
"code": null
},
{
"id": "Pflege -> Deodrants",
"url": "https://www.penny.at/kategorie/deodrants-13079",
"code": "72"
},
{
"id": "Pflege -> Haarpflege & Haarfarben",
"url": "https://www.penny.at/kategorie/haarpflege-und-haarfarben-13080",
@ -329,16 +294,6 @@
"url": "https://www.penny.at/kategorie/nonfood-13106",
"code": "80"
},
{
"id": "Non-Food -> Spiele, Bücher & Co.",
"url": "https://www.penny.at/kategorie/spiele-buecher-und-co-13110",
"code": "8E"
},
{
"id": "Non-Food -> Körbe, Koffer & Co.",
"url": "https://www.penny.at/kategorie/koerbe-koffer-und-co-13112",
"code": null
},
{
"id": "Non-Food -> Haushalt",
"url": "https://www.penny.at/kategorie/haushalt-13109",
@ -359,6 +314,56 @@
"url": "https://www.penny.at/kategorie/kueche-13108",
"code": "83"
},
{
"id": "Kühlwaren -> Blätterteig & Strudelteig",
"url": "https://www.penny.at/kategorie/blaetterteig-und-strudelteig-13043",
"code": "36"
},
{
"id": "Tiefkühl -> Pizza & Baguette",
"url": "https://www.penny.at/kategorie/pizza-und-baguette-13053",
"code": "46"
},
{
"id": "Tiefkühl -> Fisch & Garnelen",
"url": "https://www.penny.at/kategorie/fisch-und-garnelen-13050",
"code": "43"
},
{
"id": "Grundnahrungsmittel -> Fertiggerichte",
"url": "https://www.penny.at/kategorie/fertiggerichte-13059",
"code": "54"
},
{
"id": "Haushalt -> Lampen & Batterien",
"url": "https://www.penny.at/kategorie/lampen-und-batterien-13102",
"code": "85"
},
{
"id": "Pflege -> Haut- & Lippenpflege",
"url": "https://www.penny.at/kategorie/haut-und-lippenpflege-13082",
"code": "75"
},
{
"id": "Pflege -> Seifen & Duschbäder",
"url": "https://www.penny.at/kategorie/seifen-und-duschbaeder-13085",
"code": "78"
},
{
"id": "Pflege -> Deodrants",
"url": "https://www.penny.at/kategorie/deodrants-13079",
"code": "72"
},
{
"id": "Non-Food -> Spiele, Bücher & Co.",
"url": "https://www.penny.at/kategorie/spiele-buecher-und-co-13110",
"code": "8E"
},
{
"id": "Non-Food -> Körbe, Koffer & Co.",
"url": "https://www.penny.at/kategorie/koerbe-koffer-und-co-13112",
"code": null
},
{
"id": "Brot & Gebäck -> Aufbackbrötchen & Toast",
"url": "https://www.penny.at/kategorie/aufbackbroetchen-und-toast-13019",

View File

@ -49,9 +49,11 @@ exports.fetchData = async function () {
return result;
};
async function parseCategory(url, parent, result) {
async function parseCategory(url, parent, result, lookup) {
const data = (await axios.get(url)).data;
const dom = HTMLParser.parse(data);
const categoryTitle = dom.querySelector('[data-test="category-title"]')?.textContent;
if (url != "https://www.penny.at/kategorie" && categoryTitle.includes("Alle Kategorien")) return;
const categories = dom.querySelectorAll('[data-test="category-tree-navigation-button"]');
for (const category of categories) {
const link = "https://www.penny.at" + category.getAttribute("href");
@ -59,19 +61,26 @@ async function parseCategory(url, parent, result) {
const name = (parent ? parent + " -> " : "") + category.querySelector(".subtitle-2").innerText.trim().replace("&amp;", "&");
if (name.startsWith("Alle Angebote")) continue;
result.push({
id: name,
url: link,
code: null,
});
if (!lookup.has(link)) {
lookup.add(link);
result.push({
id: name,
url: link,
code: null,
});
await parseCategory(link, name, result);
try {
await parseCategory(link, name, result, lookup);
} catch (e) {
// Ignore, sometimes the server responds with 502. No idea why
}
}
}
}
exports.initializeCategoryMapping = async () => {
const categories = [];
await parseCategory("https://www.penny.at/kategorie", null, categories);
await parseCategory("https://www.penny.at/kategorie", null, categories, new Set());
utils.mergeAndSaveCategories("penny", categories);
exports.categoryLookup = {};
@ -90,3 +99,9 @@ exports.mapCategory = (rawItem) => {
};
exports.urlBase = "https://www.penny.at/produkte/";
if (require.main == module) {
(async () => {
await exports.initializeCategoryMapping();
})();
}

View File

@ -105,3 +105,14 @@ exports.parseUnitAndQuantityAtEnd = function (name) {
}
return [undefined, undefined];
};
exports.showHeap = () => {
setInterval(() => {
const mu = process.memoryUsage();
// # bytes / KB / MB / GB
const gbNow = mu["heapUsed"] / 1024 / 1024 / 1024;
const gbRounded = Math.round(gbNow * 100) / 100;
console.log(`Heap allocated ${gbRounded} GB`);
}, 5000);
};