mirror of
https://github.com/badlogic/heissepreise.git
synced 2024-09-22 00:00:59 +02:00
Closes #135
Penny now has a subcategory that leads back to the all categories page. This triggered an infinite recursion which eventually goes OOM boom.
This commit is contained in:
parent
b95f39b809
commit
dd938ba7cd
31
analysis.js
31
analysis.js
|
@ -55,15 +55,32 @@ function currentDate() {
|
|||
return `${year}-${month}-${day}`;
|
||||
}
|
||||
|
||||
const strings = new Map();
|
||||
const internString = (value) => {
|
||||
if (strings.has(value)) {
|
||||
return strings.get(value);
|
||||
} else {
|
||||
strings.set(value, value);
|
||||
return value;
|
||||
}
|
||||
};
|
||||
|
||||
function getCanonicalFor(store, rawItems, today) {
|
||||
const canonicalItems = [];
|
||||
for (let i = 0; i < rawItems.length; i++) {
|
||||
const item = stores[store]?.getCanonical(rawItems[i], today);
|
||||
if (item)
|
||||
canonicalItems.push({
|
||||
let item = stores[store]?.getCanonical(rawItems[i], today);
|
||||
if (item) {
|
||||
item = {
|
||||
store,
|
||||
...item,
|
||||
});
|
||||
};
|
||||
for (const property of Object.keys(item)) {
|
||||
if (typeof item[property] === "string") {
|
||||
item[property] = internString(item[property]);
|
||||
}
|
||||
}
|
||||
canonicalItems.push(item);
|
||||
}
|
||||
}
|
||||
return canonicalItems;
|
||||
}
|
||||
|
@ -360,13 +377,13 @@ exports.dedupItems = (items) => {
|
|||
dedupItems.push(item);
|
||||
} else {
|
||||
if (seenItem.quantity != item.quantity || seenItem.unit != item.unit) {
|
||||
console.log(`Item with same id but different quantity and unit: ${item.store}-${item.id} '${item.name}'`);
|
||||
// console.log(`Item with same id but different quantity and unit: ${item.store}-${item.id} '${item.name}'`);
|
||||
}
|
||||
duplicates[item.store] = duplicates[item.store] ? duplicates[item.store] + 1 : 1;
|
||||
}
|
||||
}
|
||||
console.log("Deduplicated items");
|
||||
console.log(JSON.stringify(duplicates, null, 2));
|
||||
//console.log("Deduplicated items");
|
||||
//console.log(JSON.stringify(duplicates, null, 2));
|
||||
return dedupItems;
|
||||
};
|
||||
|
||||
|
|
|
@ -5,8 +5,8 @@
|
|||
"main": "server.js",
|
||||
"scripts": {
|
||||
"prepare": "husky install",
|
||||
"dev": "cross-env NODE_ENV=development PORT=$PORT node --trace-warnings server.js",
|
||||
"start": "cross-env NODE_ENV=production PORT=$PORT node server.js",
|
||||
"dev": "cross-env NODE_ENV=development PORT=$PORT node --max_old_space_size=8192 --trace-warnings server.js",
|
||||
"start": "cross-env NODE_ENV=production PORT=$PORT node --max_old_space_size=8192 server.js",
|
||||
"format": "npx prettier --write ."
|
||||
},
|
||||
"repository": {
|
||||
|
|
|
@ -845,6 +845,12 @@
|
|||
"url": "https://www.bipa.at/c/haushalt/waschen/textilerfrischer",
|
||||
"code": "8A"
|
||||
},
|
||||
{
|
||||
"id": "haushalt-waschen-textilfarben",
|
||||
"description": null,
|
||||
"url": "https://www.bipa.at/c/haushalt/waschen/textilfarben",
|
||||
"code": null
|
||||
},
|
||||
{
|
||||
"id": "haushalt-waschen-waescheparfum",
|
||||
"description": null,
|
||||
|
|
|
@ -423,11 +423,6 @@
|
|||
"url": "https://www.roksh.at/hofer/angebot/kase-geschnitten-gerieben",
|
||||
"code": "33"
|
||||
},
|
||||
{
|
||||
"id": "kasespezialitaten",
|
||||
"url": "https://www.roksh.at/hofer/angebot/kasespezialitaten",
|
||||
"code": "33"
|
||||
},
|
||||
{
|
||||
"id": "weichkase",
|
||||
"url": "https://www.roksh.at/hofer/angebot/weichkase",
|
||||
|
@ -438,6 +433,11 @@
|
|||
"url": "https://www.roksh.at/hofer/angebot/hart-schnittkase",
|
||||
"code": "33"
|
||||
},
|
||||
{
|
||||
"id": "kasespezialitaten",
|
||||
"url": "https://www.roksh.at/hofer/angebot/kasespezialitaten",
|
||||
"code": "33"
|
||||
},
|
||||
{
|
||||
"id": "streichwurst-pasteten",
|
||||
"url": "https://www.roksh.at/hofer/angebot/streichwurst-pasteten",
|
||||
|
@ -458,6 +458,11 @@
|
|||
"url": "https://www.roksh.at/hofer/angebot/stangenwurst",
|
||||
"code": "37"
|
||||
},
|
||||
{
|
||||
"id": "speck-rohschinken",
|
||||
"url": "https://www.roksh.at/hofer/angebot/speck-rohschinken",
|
||||
"code": "37"
|
||||
},
|
||||
{
|
||||
"id": "wurstel",
|
||||
"url": "https://www.roksh.at/hofer/angebot/wurstel",
|
||||
|
@ -468,11 +473,6 @@
|
|||
"url": "https://www.roksh.at/hofer/angebot/leberkase-co",
|
||||
"code": "37"
|
||||
},
|
||||
{
|
||||
"id": "speck-rohschinken",
|
||||
"url": "https://www.roksh.at/hofer/angebot/speck-rohschinken",
|
||||
"code": "37"
|
||||
},
|
||||
{
|
||||
"id": "hühnchen-geflügel",
|
||||
"url": "https://www.roksh.at/hofer/angebot/hühnchen-geflügel",
|
||||
|
@ -484,13 +484,13 @@
|
|||
"code": "32"
|
||||
},
|
||||
{
|
||||
"id": "rind",
|
||||
"url": "https://www.roksh.at/hofer/angebot/rind",
|
||||
"id": "faschiertes",
|
||||
"url": "https://www.roksh.at/hofer/angebot/faschiertes",
|
||||
"code": "32"
|
||||
},
|
||||
{
|
||||
"id": "faschiertes",
|
||||
"url": "https://www.roksh.at/hofer/angebot/faschiertes",
|
||||
"id": "rind",
|
||||
"url": "https://www.roksh.at/hofer/angebot/rind",
|
||||
"code": "32"
|
||||
},
|
||||
{
|
||||
|
@ -533,11 +533,6 @@
|
|||
"url": "https://www.roksh.at/hofer/angebot/trockenfruchte",
|
||||
"code": "03"
|
||||
},
|
||||
{
|
||||
"id": "tafelschokolade",
|
||||
"url": "https://www.roksh.at/hofer/angebot/tafelschokolade",
|
||||
"code": "64"
|
||||
},
|
||||
{
|
||||
"id": "bonbons-kaugummi",
|
||||
"url": "https://www.roksh.at/hofer/angebot/bonbons-kaugummi",
|
||||
|
@ -548,6 +543,11 @@
|
|||
"url": "https://www.roksh.at/hofer/angebot/waffeln",
|
||||
"code": "60"
|
||||
},
|
||||
{
|
||||
"id": "tafelschokolade",
|
||||
"url": "https://www.roksh.at/hofer/angebot/tafelschokolade",
|
||||
"code": "64"
|
||||
},
|
||||
{
|
||||
"id": "kekse",
|
||||
"url": "https://www.roksh.at/hofer/angebot/kekse",
|
||||
|
@ -583,6 +583,11 @@
|
|||
"url": "https://www.roksh.at/hofer/angebot/sekt",
|
||||
"code": "23"
|
||||
},
|
||||
{
|
||||
"id": "roseweine-spezialweine",
|
||||
"url": "https://www.roksh.at/hofer/angebot/roseweine-spezialweine",
|
||||
"code": "25"
|
||||
},
|
||||
{
|
||||
"id": "rotweine",
|
||||
"url": "https://www.roksh.at/hofer/angebot/rotweine",
|
||||
|
@ -593,11 +598,6 @@
|
|||
"url": "https://www.roksh.at/hofer/angebot/weißweine",
|
||||
"code": "25"
|
||||
},
|
||||
{
|
||||
"id": "roseweine-spezialweine",
|
||||
"url": "https://www.roksh.at/hofer/angebot/roseweine-spezialweine",
|
||||
"code": "25"
|
||||
},
|
||||
{
|
||||
"id": "tee",
|
||||
"url": "https://www.roksh.at/hofer/angebot/tee",
|
||||
|
|
|
@ -809,6 +809,11 @@
|
|||
"url": "https://www.mueller.at/naturshop/pflege/hand-fusspflege/handpflege/",
|
||||
"code": "75"
|
||||
},
|
||||
{
|
||||
"id": "Naturshop/Pflege/Hand- & Fußpflege/Handseife",
|
||||
"url": "https://www.mueller.at/naturshop/pflege/hand-fusspflege/handseife/",
|
||||
"code": null
|
||||
},
|
||||
{
|
||||
"id": "Naturshop/Pflege/Hand- & Fußpflege/Nagelpflege",
|
||||
"url": "https://www.mueller.at/naturshop/pflege/hand-fusspflege/nagelpflege/",
|
||||
|
@ -2049,11 +2054,6 @@
|
|||
"url": "https://www.mueller.at/drogerie/lebensmittel/suessigkeiten/kaugummi/",
|
||||
"code": "64"
|
||||
},
|
||||
{
|
||||
"id": "Drogerie/Lebensmittel/Süßigkeiten/Schokolade",
|
||||
"url": "https://www.mueller.at/drogerie/lebensmittel/suessigkeiten/schokolade/",
|
||||
"code": "64"
|
||||
},
|
||||
{
|
||||
"id": "Drogerie/Lebensmittel/Süßigkeiten/Gebäck",
|
||||
"url": "https://www.mueller.at/drogerie/lebensmittel/suessigkeiten/gebaeck/",
|
||||
|
@ -2889,16 +2889,6 @@
|
|||
"url": "https://www.mueller.at/genusswelt/haushalt/",
|
||||
"code": null
|
||||
},
|
||||
{
|
||||
"id": "Genusswelt/Aktionen",
|
||||
"url": "https://www.mueller.at/genusswelt/aktionen/",
|
||||
"code": null
|
||||
},
|
||||
{
|
||||
"id": "Genusswelt/Aktionen/Aus dem Prospekt",
|
||||
"url": "https://www.mueller.at/genusswelt/aktionen/aus-dem-prospekt/",
|
||||
"code": null
|
||||
},
|
||||
{
|
||||
"id": "Tiershop/Tiernahrung",
|
||||
"url": "https://www.mueller.at/tiershop/tiernahrung/",
|
||||
|
@ -3123,5 +3113,20 @@
|
|||
"id": "Tiershop/Sale",
|
||||
"url": "https://www.mueller.at/tiershop/sale/",
|
||||
"code": "90"
|
||||
},
|
||||
{
|
||||
"id": "Genusswelt/Aktionen",
|
||||
"url": "https://www.mueller.at/genusswelt/aktionen/",
|
||||
"code": null
|
||||
},
|
||||
{
|
||||
"id": "Drogerie/Lebensmittel/Süßigkeiten/Schokolade",
|
||||
"url": "https://www.mueller.at/drogerie/lebensmittel/suessigkeiten/schokolade/",
|
||||
"code": "64"
|
||||
},
|
||||
{
|
||||
"id": "Genusswelt/Aktionen/Aus dem Prospekt",
|
||||
"url": "https://www.mueller.at/genusswelt/aktionen/aus-dem-prospekt/",
|
||||
"code": null
|
||||
}
|
||||
]
|
|
@ -114,11 +114,6 @@
|
|||
"url": "https://www.penny.at/kategorie/tofu-und-vegetarischevegane-produkte-13046",
|
||||
"code": "3B"
|
||||
},
|
||||
{
|
||||
"id": "Kühlwaren -> Blätterteig & Strudelteig",
|
||||
"url": "https://www.penny.at/kategorie/blaetterteig-und-strudelteig-13043",
|
||||
"code": "36"
|
||||
},
|
||||
{
|
||||
"id": "Tiefkühl",
|
||||
"url": "https://www.penny.at/kategorie/tiefkuehl-13047",
|
||||
|
@ -139,26 +134,11 @@
|
|||
"url": "https://www.penny.at/kategorie/fertiggerichte-13049",
|
||||
"code": "42"
|
||||
},
|
||||
{
|
||||
"id": "Tiefkühl -> Pizza & Baguette",
|
||||
"url": "https://www.penny.at/kategorie/pizza-und-baguette-13053",
|
||||
"code": "46"
|
||||
},
|
||||
{
|
||||
"id": "Tiefkühl -> Fisch & Garnelen",
|
||||
"url": "https://www.penny.at/kategorie/fisch-und-garnelen-13050",
|
||||
"code": "43"
|
||||
},
|
||||
{
|
||||
"id": "Grundnahrungsmittel",
|
||||
"url": "https://www.penny.at/kategorie/grundnahrungsmittel-13055",
|
||||
"code": "50"
|
||||
},
|
||||
{
|
||||
"id": "Grundnahrungsmittel -> Fertiggerichte",
|
||||
"url": "https://www.penny.at/kategorie/fertiggerichte-13059",
|
||||
"code": "54"
|
||||
},
|
||||
{
|
||||
"id": "Grundnahrungsmittel -> Konserven & Sauerwaren",
|
||||
"url": "https://www.penny.at/kategorie/konserven-und-sauerwaren-13062",
|
||||
|
@ -169,6 +149,11 @@
|
|||
"url": "https://www.penny.at/kategorie/essig-und-oele-13058",
|
||||
"code": "53"
|
||||
},
|
||||
{
|
||||
"id": "Grundnahrungsmittel -> Mehl & Getreideprodukte",
|
||||
"url": "https://www.penny.at/kategorie/mehl-und-getreideprodukte-13064",
|
||||
"code": null
|
||||
},
|
||||
{
|
||||
"id": "Grundnahrungsmittel -> Gewürze & Würzmittel",
|
||||
"url": "https://www.penny.at/kategorie/gewuerze-und-wuerzmittel-13060",
|
||||
|
@ -279,36 +264,16 @@
|
|||
"url": "https://www.penny.at/kategorie/pflanzen-und-blumen-13100",
|
||||
"code": "81"
|
||||
},
|
||||
{
|
||||
"id": "Haushalt -> Lampen & Batterien",
|
||||
"url": "https://www.penny.at/kategorie/lampen-und-batterien-13102",
|
||||
"code": "85"
|
||||
},
|
||||
{
|
||||
"id": "Pflege",
|
||||
"url": "https://www.penny.at/kategorie/pflege-13076",
|
||||
"code": "70"
|
||||
},
|
||||
{
|
||||
"id": "Pflege -> Haut- & Lippenpflege",
|
||||
"url": "https://www.penny.at/kategorie/haut-und-lippenpflege-13082",
|
||||
"code": "75"
|
||||
},
|
||||
{
|
||||
"id": "Pflege -> Seifen & Duschbäder",
|
||||
"url": "https://www.penny.at/kategorie/seifen-und-duschbaeder-13085",
|
||||
"code": "78"
|
||||
},
|
||||
{
|
||||
"id": "Pflege -> Mund- & Zahnhygiene",
|
||||
"url": "https://www.penny.at/kategorie/mund-und-zahnhygiene-13083",
|
||||
"code": null
|
||||
},
|
||||
{
|
||||
"id": "Pflege -> Deodrants",
|
||||
"url": "https://www.penny.at/kategorie/deodrants-13079",
|
||||
"code": "72"
|
||||
},
|
||||
{
|
||||
"id": "Pflege -> Haarpflege & Haarfarben",
|
||||
"url": "https://www.penny.at/kategorie/haarpflege-und-haarfarben-13080",
|
||||
|
@ -329,16 +294,6 @@
|
|||
"url": "https://www.penny.at/kategorie/nonfood-13106",
|
||||
"code": "80"
|
||||
},
|
||||
{
|
||||
"id": "Non-Food -> Spiele, Bücher & Co.",
|
||||
"url": "https://www.penny.at/kategorie/spiele-buecher-und-co-13110",
|
||||
"code": "8E"
|
||||
},
|
||||
{
|
||||
"id": "Non-Food -> Körbe, Koffer & Co.",
|
||||
"url": "https://www.penny.at/kategorie/koerbe-koffer-und-co-13112",
|
||||
"code": null
|
||||
},
|
||||
{
|
||||
"id": "Non-Food -> Haushalt",
|
||||
"url": "https://www.penny.at/kategorie/haushalt-13109",
|
||||
|
@ -359,6 +314,56 @@
|
|||
"url": "https://www.penny.at/kategorie/kueche-13108",
|
||||
"code": "83"
|
||||
},
|
||||
{
|
||||
"id": "Kühlwaren -> Blätterteig & Strudelteig",
|
||||
"url": "https://www.penny.at/kategorie/blaetterteig-und-strudelteig-13043",
|
||||
"code": "36"
|
||||
},
|
||||
{
|
||||
"id": "Tiefkühl -> Pizza & Baguette",
|
||||
"url": "https://www.penny.at/kategorie/pizza-und-baguette-13053",
|
||||
"code": "46"
|
||||
},
|
||||
{
|
||||
"id": "Tiefkühl -> Fisch & Garnelen",
|
||||
"url": "https://www.penny.at/kategorie/fisch-und-garnelen-13050",
|
||||
"code": "43"
|
||||
},
|
||||
{
|
||||
"id": "Grundnahrungsmittel -> Fertiggerichte",
|
||||
"url": "https://www.penny.at/kategorie/fertiggerichte-13059",
|
||||
"code": "54"
|
||||
},
|
||||
{
|
||||
"id": "Haushalt -> Lampen & Batterien",
|
||||
"url": "https://www.penny.at/kategorie/lampen-und-batterien-13102",
|
||||
"code": "85"
|
||||
},
|
||||
{
|
||||
"id": "Pflege -> Haut- & Lippenpflege",
|
||||
"url": "https://www.penny.at/kategorie/haut-und-lippenpflege-13082",
|
||||
"code": "75"
|
||||
},
|
||||
{
|
||||
"id": "Pflege -> Seifen & Duschbäder",
|
||||
"url": "https://www.penny.at/kategorie/seifen-und-duschbaeder-13085",
|
||||
"code": "78"
|
||||
},
|
||||
{
|
||||
"id": "Pflege -> Deodrants",
|
||||
"url": "https://www.penny.at/kategorie/deodrants-13079",
|
||||
"code": "72"
|
||||
},
|
||||
{
|
||||
"id": "Non-Food -> Spiele, Bücher & Co.",
|
||||
"url": "https://www.penny.at/kategorie/spiele-buecher-und-co-13110",
|
||||
"code": "8E"
|
||||
},
|
||||
{
|
||||
"id": "Non-Food -> Körbe, Koffer & Co.",
|
||||
"url": "https://www.penny.at/kategorie/koerbe-koffer-und-co-13112",
|
||||
"code": null
|
||||
},
|
||||
{
|
||||
"id": "Brot & Gebäck -> Aufbackbrötchen & Toast",
|
||||
"url": "https://www.penny.at/kategorie/aufbackbroetchen-und-toast-13019",
|
||||
|
|
|
@ -49,9 +49,11 @@ exports.fetchData = async function () {
|
|||
return result;
|
||||
};
|
||||
|
||||
async function parseCategory(url, parent, result) {
|
||||
async function parseCategory(url, parent, result, lookup) {
|
||||
const data = (await axios.get(url)).data;
|
||||
const dom = HTMLParser.parse(data);
|
||||
const categoryTitle = dom.querySelector('[data-test="category-title"]')?.textContent;
|
||||
if (url != "https://www.penny.at/kategorie" && categoryTitle.includes("Alle Kategorien")) return;
|
||||
const categories = dom.querySelectorAll('[data-test="category-tree-navigation-button"]');
|
||||
for (const category of categories) {
|
||||
const link = "https://www.penny.at" + category.getAttribute("href");
|
||||
|
@ -59,19 +61,26 @@ async function parseCategory(url, parent, result) {
|
|||
const name = (parent ? parent + " -> " : "") + category.querySelector(".subtitle-2").innerText.trim().replace("&", "&");
|
||||
if (name.startsWith("Alle Angebote")) continue;
|
||||
|
||||
result.push({
|
||||
id: name,
|
||||
url: link,
|
||||
code: null,
|
||||
});
|
||||
if (!lookup.has(link)) {
|
||||
lookup.add(link);
|
||||
result.push({
|
||||
id: name,
|
||||
url: link,
|
||||
code: null,
|
||||
});
|
||||
|
||||
await parseCategory(link, name, result);
|
||||
try {
|
||||
await parseCategory(link, name, result, lookup);
|
||||
} catch (e) {
|
||||
// Ignore, sometimes the server responds with 502. No idea why
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
exports.initializeCategoryMapping = async () => {
|
||||
const categories = [];
|
||||
await parseCategory("https://www.penny.at/kategorie", null, categories);
|
||||
await parseCategory("https://www.penny.at/kategorie", null, categories, new Set());
|
||||
utils.mergeAndSaveCategories("penny", categories);
|
||||
|
||||
exports.categoryLookup = {};
|
||||
|
@ -90,3 +99,9 @@ exports.mapCategory = (rawItem) => {
|
|||
};
|
||||
|
||||
exports.urlBase = "https://www.penny.at/produkte/";
|
||||
|
||||
if (require.main == module) {
|
||||
(async () => {
|
||||
await exports.initializeCategoryMapping();
|
||||
})();
|
||||
}
|
||||
|
|
|
@ -105,3 +105,14 @@ exports.parseUnitAndQuantityAtEnd = function (name) {
|
|||
}
|
||||
return [undefined, undefined];
|
||||
};
|
||||
|
||||
exports.showHeap = () => {
|
||||
setInterval(() => {
|
||||
const mu = process.memoryUsage();
|
||||
// # bytes / KB / MB / GB
|
||||
const gbNow = mu["heapUsed"] / 1024 / 1024 / 1024;
|
||||
const gbRounded = Math.round(gbNow * 100) / 100;
|
||||
|
||||
console.log(`Heap allocated ${gbRounded} GB`);
|
||||
}, 5000);
|
||||
};
|
||||
|
|
Loading…
Reference in New Issue
Block a user