Merge pull request #128 from tiefenb/bipa

Add Bipa
This commit is contained in:
Mario Zechner 2023-07-05 20:21:52 +02:00 committed by GitHub
commit 958c9f2d00
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 2251 additions and 13 deletions

View File

@ -66,6 +66,14 @@ exports.stores = {
getUrl: (item) => `https://www.penny.at/produkte/${item.url}`,
removeOld: true,
},
bipa: {
name: "Bipa",
budgetBrands: ["babywell", "look by bipa", "bi care", "bi kids", "bi good", "bi life", "bi life dent"],
color: "rose",
defaultChecked: true,
getUrl: (item) => `https://www.bipa.at${item.url}`,
removeOld: false,
},
dmDe: {
name: "DM DE",
budgetBrands: ["balea"],

2096
stores/bipa-categories.json Normal file

File diff suppressed because it is too large Load Diff

133
stores/bipa.js Normal file
View File

@ -0,0 +1,133 @@
const axios = require("axios");
const utils = require("./utils");
const HTMLParser = require("node-html-parser");
const units = {
mbe: { unit: "wg", factor: 1 },
};
exports.getBipaCategoryPages = async () => {
const categoryPages = [];
var res = await axios.get(`${exports.urlBase}/sitemap_2-category.xml`, {
validateStatus: function (status) {
return status >= 200 && status < 300;
},
});
if (res && res.data) {
let pages = res.data.replace(/[\s]*/gm, "").match(/<url>(.*?)<\/url>/gm);
pages = pages.filter((page) => /<changefreq>(daily|weekly)<\/changefreq>/g.test(page)); // only return pages which change daily or weekly ("monthly" are mainly seo, brand or offer pages)
pages = pages.map((page) => page.match(/<loc>(.*)<\/loc>/gm)[0]);
pages = pages.map((page) => page.replace(/<\/{0,1}loc>/g, "")); // remove <loc> xml-tags
pages = pages.filter((page) => /\/c\/.*\/.{1,}/g.test(page)); // only return 2nd level category pages (level 1 is mostly landing pages or some special offer pages)
categoryPages.push(...pages);
}
return categoryPages;
};
exports.getCanonical = function (item, today) {
let [quantity, unit] = utils.parseUnitAndQuantityAtEnd(item.unit);
return utils.convertUnit(
{
id: item.id,
name: item.name,
// description: "", not available
price: item.price,
priceHistory: [{ date: today, price: item.price }],
quantity,
unit,
bio: item.name.toLowerCase().includes("bio"),
url: item.canonicalUrl,
},
units,
"bipa",
{
unit: "stk",
quantity: 1,
}
);
};
exports.fetchData = async function () {
let bipaItems = [];
const BIPA_CATEGORIES = await exports.getBipaCategoryPages();
for (let categoryPageRawUrl of BIPA_CATEGORIES) {
const res = await axios.get(`${categoryPageRawUrl}?start=0&sz=1000`, {
validateStatus: function (status) {
return status >= 200 && status < 300;
},
});
if (res && res.data) {
const root = HTMLParser.parse(res.data);
root.querySelectorAll(".product-tile-card").forEach((product) => {
const gtmdataRaw = product._attrs["data-gtmdata"];
if (gtmdataRaw && gtmdataRaw !== "undefined") {
const canonicalUrl = product.querySelector("a.stretched-link")._attrs["href"];
try {
const gtmdata = JSON.parse(gtmdataRaw);
bipaItems.push({
id: gtmdata.id,
name: gtmdata.name,
price: parseFloat(gtmdata.price),
unit: product.querySelector(".product-info").text.replace("Inhalt:").trim(),
canonicalUrl: canonicalUrl,
category: gtmdata.category,
});
} catch (error) {
console.log(`Error parsing json on ${categoryPageRawUrl} for product: ${canonicalUrl}`);
}
}
});
}
}
return bipaItems;
};
exports.initializeCategoryMapping = async () => {
let categories = [];
const BIPA_CATEGORIES = await exports.getBipaCategoryPages();
for (let categoryPageRawUrl of BIPA_CATEGORIES) {
const res = await axios.get(`${categoryPageRawUrl}?start=0&sz=1`, {
// sz=1 (one item) because we don't need much products here for faster loading
validateStatus: function (status) {
return status >= 200 && status < 300;
},
});
if (res && res.data) {
const categoryId = /\?cgid=(.*)"/gm.exec(res.data);
if (categoryId && categoryId[1]) {
categories.push({
id: categoryId[1],
description: null,
url: categoryPageRawUrl,
code: null,
});
}
}
}
// sort alphabetically for easier category mapping
categories.sort((a, b) => a.id.localeCompare(b.id));
utils.mergeAndSaveCategories("bipa", categories);
exports.categoryLookup = {};
for (const category of categories) {
exports.categoryLookup[category.id] = category;
}
};
exports.mapCategory = (rawItem) => {
return exports.categoryLookup[rawItem.category]?.code;
};
exports.urlBase = "https://www.bipa.at";

View File

@ -478,11 +478,6 @@
"url": "https://www.roksh.at/hofer/angebot/hühnchen-geflügel",
"code": "32"
},
{
"id": "faschiertes",
"url": "https://www.roksh.at/hofer/angebot/faschiertes",
"code": "32"
},
{
"id": "schweinefleisch-spezialitaten",
"url": "https://www.roksh.at/hofer/angebot/schweinefleisch-spezialitaten",
@ -493,6 +488,11 @@
"url": "https://www.roksh.at/hofer/angebot/rind",
"code": "32"
},
{
"id": "faschiertes",
"url": "https://www.roksh.at/hofer/angebot/faschiertes",
"code": "32"
},
{
"id": "fruchtaufstriche",
"url": "https://www.roksh.at/hofer/angebot/fruchtaufstriche",
@ -514,13 +514,13 @@
"code": "57"
},
{
"id": "fleisch-fischkonserven",
"url": "https://www.roksh.at/hofer/angebot/fleisch-fischkonserven",
"id": "sauerkonserven",
"url": "https://www.roksh.at/hofer/angebot/sauerkonserven",
"code": "57"
},
{
"id": "sauerkonserven",
"url": "https://www.roksh.at/hofer/angebot/sauerkonserven",
"id": "fleisch-fischkonserven",
"url": "https://www.roksh.at/hofer/angebot/fleisch-fischkonserven",
"code": "57"
},
{
@ -629,13 +629,13 @@
"code": "20"
},
{
"id": "smoothies",
"url": "https://www.roksh.at/hofer/angebot/smoothies",
"id": "sirupe",
"url": "https://www.roksh.at/hofer/angebot/sirupe",
"code": "20"
},
{
"id": "sirupe",
"url": "https://www.roksh.at/hofer/angebot/sirupe",
"id": "smoothies",
"url": "https://www.roksh.at/hofer/angebot/smoothies",
"code": "20"
},
{

View File

@ -11,3 +11,4 @@ exports.reweDe = require("./rewe-de");
exports.penny = require("./penny");
exports.mueller = require("./mueller");
exports.muellerDe = require("./mueller-de");
exports.bipa = require("./bipa");