mirror of
https://github.com/badlogic/heissepreise.git
synced 2024-06-05 00:05:28 +02:00
commit
958c9f2d00
|
@ -66,6 +66,14 @@ exports.stores = {
|
|||
getUrl: (item) => `https://www.penny.at/produkte/${item.url}`,
|
||||
removeOld: true,
|
||||
},
|
||||
bipa: {
|
||||
name: "Bipa",
|
||||
budgetBrands: ["babywell", "look by bipa", "bi care", "bi kids", "bi good", "bi life", "bi life dent"],
|
||||
color: "rose",
|
||||
defaultChecked: true,
|
||||
getUrl: (item) => `https://www.bipa.at${item.url}`,
|
||||
removeOld: false,
|
||||
},
|
||||
dmDe: {
|
||||
name: "DM DE",
|
||||
budgetBrands: ["balea"],
|
||||
|
|
2096
stores/bipa-categories.json
Normal file
2096
stores/bipa-categories.json
Normal file
File diff suppressed because it is too large
Load Diff
133
stores/bipa.js
Normal file
133
stores/bipa.js
Normal file
|
@ -0,0 +1,133 @@
|
|||
const axios = require("axios");
|
||||
const utils = require("./utils");
|
||||
const HTMLParser = require("node-html-parser");
|
||||
|
||||
const units = {
|
||||
mbe: { unit: "wg", factor: 1 },
|
||||
};
|
||||
|
||||
exports.getBipaCategoryPages = async () => {
|
||||
const categoryPages = [];
|
||||
|
||||
var res = await axios.get(`${exports.urlBase}/sitemap_2-category.xml`, {
|
||||
validateStatus: function (status) {
|
||||
return status >= 200 && status < 300;
|
||||
},
|
||||
});
|
||||
|
||||
if (res && res.data) {
|
||||
let pages = res.data.replace(/[\s]*/gm, "").match(/<url>(.*?)<\/url>/gm);
|
||||
pages = pages.filter((page) => /<changefreq>(daily|weekly)<\/changefreq>/g.test(page)); // only return pages which change daily or weekly ("monthly" are mainly seo, brand or offer pages)
|
||||
pages = pages.map((page) => page.match(/<loc>(.*)<\/loc>/gm)[0]);
|
||||
pages = pages.map((page) => page.replace(/<\/{0,1}loc>/g, "")); // remove <loc> xml-tags
|
||||
pages = pages.filter((page) => /\/c\/.*\/.{1,}/g.test(page)); // only return 2nd level category pages (level 1 is mostly landing pages or some special offer pages)
|
||||
categoryPages.push(...pages);
|
||||
}
|
||||
|
||||
return categoryPages;
|
||||
};
|
||||
|
||||
exports.getCanonical = function (item, today) {
|
||||
let [quantity, unit] = utils.parseUnitAndQuantityAtEnd(item.unit);
|
||||
return utils.convertUnit(
|
||||
{
|
||||
id: item.id,
|
||||
name: item.name,
|
||||
// description: "", not available
|
||||
price: item.price,
|
||||
priceHistory: [{ date: today, price: item.price }],
|
||||
quantity,
|
||||
unit,
|
||||
bio: item.name.toLowerCase().includes("bio"),
|
||||
url: item.canonicalUrl,
|
||||
},
|
||||
units,
|
||||
"bipa",
|
||||
{
|
||||
unit: "stk",
|
||||
quantity: 1,
|
||||
}
|
||||
);
|
||||
};
|
||||
|
||||
exports.fetchData = async function () {
|
||||
let bipaItems = [];
|
||||
|
||||
const BIPA_CATEGORIES = await exports.getBipaCategoryPages();
|
||||
|
||||
for (let categoryPageRawUrl of BIPA_CATEGORIES) {
|
||||
const res = await axios.get(`${categoryPageRawUrl}?start=0&sz=1000`, {
|
||||
validateStatus: function (status) {
|
||||
return status >= 200 && status < 300;
|
||||
},
|
||||
});
|
||||
|
||||
if (res && res.data) {
|
||||
const root = HTMLParser.parse(res.data);
|
||||
|
||||
root.querySelectorAll(".product-tile-card").forEach((product) => {
|
||||
const gtmdataRaw = product._attrs["data-gtmdata"];
|
||||
if (gtmdataRaw && gtmdataRaw !== "undefined") {
|
||||
const canonicalUrl = product.querySelector("a.stretched-link")._attrs["href"];
|
||||
try {
|
||||
const gtmdata = JSON.parse(gtmdataRaw);
|
||||
bipaItems.push({
|
||||
id: gtmdata.id,
|
||||
name: gtmdata.name,
|
||||
price: parseFloat(gtmdata.price),
|
||||
unit: product.querySelector(".product-info").text.replace("Inhalt:").trim(),
|
||||
canonicalUrl: canonicalUrl,
|
||||
category: gtmdata.category,
|
||||
});
|
||||
} catch (error) {
|
||||
console.log(`Error parsing json on ${categoryPageRawUrl} for product: ${canonicalUrl}`);
|
||||
}
|
||||
}
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
return bipaItems;
|
||||
};
|
||||
|
||||
exports.initializeCategoryMapping = async () => {
|
||||
let categories = [];
|
||||
|
||||
const BIPA_CATEGORIES = await exports.getBipaCategoryPages();
|
||||
|
||||
for (let categoryPageRawUrl of BIPA_CATEGORIES) {
|
||||
const res = await axios.get(`${categoryPageRawUrl}?start=0&sz=1`, {
|
||||
// sz=1 (one item) because we don't need much products here for faster loading
|
||||
validateStatus: function (status) {
|
||||
return status >= 200 && status < 300;
|
||||
},
|
||||
});
|
||||
|
||||
if (res && res.data) {
|
||||
const categoryId = /\?cgid=(.*)"/gm.exec(res.data);
|
||||
if (categoryId && categoryId[1]) {
|
||||
categories.push({
|
||||
id: categoryId[1],
|
||||
description: null,
|
||||
url: categoryPageRawUrl,
|
||||
code: null,
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// sort alphabetically for easier category mapping
|
||||
categories.sort((a, b) => a.id.localeCompare(b.id));
|
||||
|
||||
utils.mergeAndSaveCategories("bipa", categories);
|
||||
exports.categoryLookup = {};
|
||||
for (const category of categories) {
|
||||
exports.categoryLookup[category.id] = category;
|
||||
}
|
||||
};
|
||||
|
||||
exports.mapCategory = (rawItem) => {
|
||||
return exports.categoryLookup[rawItem.category]?.code;
|
||||
};
|
||||
|
||||
exports.urlBase = "https://www.bipa.at";
|
|
@ -478,11 +478,6 @@
|
|||
"url": "https://www.roksh.at/hofer/angebot/hühnchen-geflügel",
|
||||
"code": "32"
|
||||
},
|
||||
{
|
||||
"id": "faschiertes",
|
||||
"url": "https://www.roksh.at/hofer/angebot/faschiertes",
|
||||
"code": "32"
|
||||
},
|
||||
{
|
||||
"id": "schweinefleisch-spezialitaten",
|
||||
"url": "https://www.roksh.at/hofer/angebot/schweinefleisch-spezialitaten",
|
||||
|
@ -493,6 +488,11 @@
|
|||
"url": "https://www.roksh.at/hofer/angebot/rind",
|
||||
"code": "32"
|
||||
},
|
||||
{
|
||||
"id": "faschiertes",
|
||||
"url": "https://www.roksh.at/hofer/angebot/faschiertes",
|
||||
"code": "32"
|
||||
},
|
||||
{
|
||||
"id": "fruchtaufstriche",
|
||||
"url": "https://www.roksh.at/hofer/angebot/fruchtaufstriche",
|
||||
|
@ -514,13 +514,13 @@
|
|||
"code": "57"
|
||||
},
|
||||
{
|
||||
"id": "fleisch-fischkonserven",
|
||||
"url": "https://www.roksh.at/hofer/angebot/fleisch-fischkonserven",
|
||||
"id": "sauerkonserven",
|
||||
"url": "https://www.roksh.at/hofer/angebot/sauerkonserven",
|
||||
"code": "57"
|
||||
},
|
||||
{
|
||||
"id": "sauerkonserven",
|
||||
"url": "https://www.roksh.at/hofer/angebot/sauerkonserven",
|
||||
"id": "fleisch-fischkonserven",
|
||||
"url": "https://www.roksh.at/hofer/angebot/fleisch-fischkonserven",
|
||||
"code": "57"
|
||||
},
|
||||
{
|
||||
|
@ -629,13 +629,13 @@
|
|||
"code": "20"
|
||||
},
|
||||
{
|
||||
"id": "smoothies",
|
||||
"url": "https://www.roksh.at/hofer/angebot/smoothies",
|
||||
"id": "sirupe",
|
||||
"url": "https://www.roksh.at/hofer/angebot/sirupe",
|
||||
"code": "20"
|
||||
},
|
||||
{
|
||||
"id": "sirupe",
|
||||
"url": "https://www.roksh.at/hofer/angebot/sirupe",
|
||||
"id": "smoothies",
|
||||
"url": "https://www.roksh.at/hofer/angebot/smoothies",
|
||||
"code": "20"
|
||||
},
|
||||
{
|
||||
|
|
|
@ -11,3 +11,4 @@ exports.reweDe = require("./rewe-de");
|
|||
exports.penny = require("./penny");
|
||||
exports.mueller = require("./mueller");
|
||||
exports.muellerDe = require("./mueller-de");
|
||||
exports.bipa = require("./bipa");
|
||||
|
|
Loading…
Reference in New Issue
Block a user