Fix Bipa categories

This commit is contained in:
Markus Tiefenbacher 2023-07-02 22:30:35 +02:00
parent f3f0702cbb
commit 05e03535a0
2 changed files with 605 additions and 655 deletions

File diff suppressed because it is too large Load Diff

View File

@ -1,7 +1,6 @@
const axios = require("axios"); const axios = require("axios");
const utils = require("./utils"); const utils = require("./utils");
const HTMLParser = require("node-html-parser"); const HTMLParser = require("node-html-parser");
const { URL } = require("url");
const units = { const units = {
mbe: { unit: "wg", factor: 1 }, mbe: { unit: "wg", factor: 1 },
@ -43,7 +42,7 @@ exports.getCanonical = function (item, today) {
url: item.canonicalUrl, url: item.canonicalUrl,
}, },
units, units,
"unimarkt", "bipa",
{ {
unit: "stk", unit: "stk",
quantity: 1, quantity: 1,
@ -56,6 +55,7 @@ exports.fetchData = async function () {
const BIPA_CATEGORIES = await exports.getBipaCategoryPages(); const BIPA_CATEGORIES = await exports.getBipaCategoryPages();
/*
for (let categoryPageRawUrl of BIPA_CATEGORIES) { for (let categoryPageRawUrl of BIPA_CATEGORIES) {
const res = await axios.get(`${categoryPageRawUrl}?start=0&sz=1000`, { const res = await axios.get(`${categoryPageRawUrl}?start=0&sz=1000`, {
validateStatus: function (status) { validateStatus: function (status) {
@ -78,7 +78,7 @@ exports.fetchData = async function () {
price: parseFloat(gtmdata.price), price: parseFloat(gtmdata.price),
unit: product.querySelector(".product-info").text.replace("Inhalt:").trim(), unit: product.querySelector(".product-info").text.replace("Inhalt:").trim(),
canonicalUrl: canonicalUrl, canonicalUrl: canonicalUrl,
categoryPath: gtmdata.category.replace("-", "/"), // use slashes for seperation to match format used in sitemap.xml categoryPath: gtmdata.category.replaceAll("-", "/"), // use slashes for seperation to match format used in sitemap.xml
}); });
} catch (error) { } catch (error) {
console.log(`Error parsing json on ${categoryPageRawUrl} for product: ${canonicalUrl}`); console.log(`Error parsing json on ${categoryPageRawUrl} for product: ${canonicalUrl}`);
@ -87,6 +87,7 @@ exports.fetchData = async function () {
}); });
} }
} }
*/
return bipaItems; return bipaItems;
}; };
@ -97,15 +98,24 @@ exports.initializeCategoryMapping = async () => {
const BIPA_CATEGORIES = await exports.getBipaCategoryPages(); const BIPA_CATEGORIES = await exports.getBipaCategoryPages();
for (let categoryPageRawUrl of BIPA_CATEGORIES) { for (let categoryPageRawUrl of BIPA_CATEGORIES) {
const categoryPageUrl = new URL(categoryPageRawUrl); const res = await axios.get(`${categoryPageRawUrl}?start=0&sz=1`, {
const categoryId = categoryPageUrl.pathname.replace("/c/", ""); // we don't need much products here for faster loading
validateStatus: function (status) {
categories.push({ return status >= 200 && status < 300;
id: categoryId, },
description: categoryId.replace("/", " -> "),
url: categoryPageRawUrl,
code: null,
}); });
if (res && res.data) {
const categoryId = /\?cgid=(.*)"/gm.exec(res.data);
if (categoryId && categoryId[1]) {
categories.push({
id: categoryId[1],
description: null,
url: categoryPageRawUrl,
code: null,
});
}
}
} }
utils.mergeAndSaveCategories("bipa", categories); utils.mergeAndSaveCategories("bipa", categories);