From a5cd0f5de8f340d53048ec58b546d71f10508c90 Mon Sep 17 00:00:00 2001 From: Mario Zechner Date: Tue, 6 Jun 2023 15:15:39 +0200 Subject: [PATCH] Fetch Billa data by category. Yields more items. Can be used for categorizing other store items, see categorize.js. --- categorize.js | 66 +++++++++++++++++++++++++++ stores/billa.js | 25 ++++++++++- stores/utils.js | 117 ++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 206 insertions(+), 2 deletions(-) create mode 100644 categorize.js diff --git a/categorize.js b/categorize.js new file mode 100644 index 0000000..941ac7e --- /dev/null +++ b/categorize.js @@ -0,0 +1,66 @@ +const axios = require("axios"); +const analysis = require("./analysis"); +const stores = require("./stores"); +const utils = require("./stores/utils"); +const HITS = Math.floor(30000 + Math.random() * 2000); + +(async () => { + let total = 0; + + let result = []; + + for (let i = 1; i <= utils.globalCategories.length; i++) { + const categoryName = utils.globalCategories[i - 1]; + const categoryCode = i < 10 ? "" + i : String.fromCharCode("A".charCodeAt(0) + (i - 10)); + + const category = { + code: categoryCode, + name: categoryName, + subCategories: [], + }; + result.push(category); + + for (let j = 1; j <= categoryName.subcategories.length; j++) { + const subCategoryName = categoryName.subcategories[j - 1]; + const subCategoryCode = j < 10 ? "" + j : String.fromCharCode("A".charCodeAt(0) + (j - 10)); + const code = `B2-${categoryCode}${subCategoryCode}`; + const subCategory = { + code: subCategoryCode, + name: `${categoryName.name}>${subCategoryName}`, + items: [], + }; + category.subCategories.push(subCategory); + + console.log("Fetching items for category " + code + ` ${categoryName.name} > ${subCategoryName}`); + const BILLA_SEARCH = `https://shop.billa.at/api/search/full?searchTerm=*&storeId=00-10&pageSize=${HITS}&category=${code}`; + const data = (await axios.get(BILLA_SEARCH)).data; + data.tiles.forEach((item) => { + try { + const canonicalItem = stores.billa.getCanonical(item); + canonicalItem.categoryCode = `${categoryCode}${subCategoryCode}`; + canonicalItem.categoryName = `${categoryName.name} > ${subCategoryName}`; + subCategory.items.push(canonicalItem); + } catch (e) { + // Ignore super tiles + } + }); + total += subCategory.items.length; + console.log(subCategory.items.length + " items"); + } + } + console.log("Total: " + total); + analysis.writeJSON("categories.json", result); + + const lookup = {}; + for (const category of result) { + for (const subCategory of category.subCategories) { + for (const item of subCategory.items) { + if (lookup[item.id]) { + console.log(`Duplicate item: ${item.name} in category ${item.categoryName} and ${lookup[item.id].categoryName}`); + } else { + lookup[(item.id = item)]; + } + } + } + } +})(); diff --git a/stores/billa.js b/stores/billa.js index aaeed2d..fda211d 100644 --- a/stores/billa.js +++ b/stores/billa.js @@ -40,8 +40,29 @@ exports.getCanonical = function (item, today) { }; exports.fetchData = async function () { - const BILLA_SEARCH = `https://shop.billa.at/api/search/full?searchTerm=*&storeId=00-10&pageSize=${HITS}`; - return (await axios.get(BILLA_SEARCH)).data.tiles; + let items = []; + + for (let i = 1; i <= utils.globalCategories.length; i++) { + const category = utils.globalCategories[i - 1]; + const categoryCode = i < 10 ? "" + i : String.fromCharCode("A".charCodeAt(0) + (i - 10)); + + for (let j = 1; j <= category.subcategories.length; j++) { + const subCategoryCode = j < 10 ? "" + j : String.fromCharCode("A".charCodeAt(0) + (j - 10)); + const code = `B2-${categoryCode}${subCategoryCode}`; + + const BILLA_SEARCH = `https://shop.billa.at/api/search/full?searchTerm=*&storeId=00-10&pageSize=${HITS}&category=${code}`; + const data = (await axios.get(BILLA_SEARCH)).data; + data.tiles.forEach((item) => { + try { + exports.getCanonical(item); + items.push(item); + } catch (e) { + // Ignore super tiles + } + }); + } + } + return items; }; exports.urlBase = "https://shop.billa.at"; diff --git a/stores/utils.js b/stores/utils.js index 42fb1c6..d479fee 100644 --- a/stores/utils.js +++ b/stores/utils.js @@ -1,3 +1,120 @@ +// These are a match of the Billa categories, which are organized in a 2-level hierarchy. +// Each category in the top level gets a code from 1-Z, each sub category also gets a code. +// Together the two codes from a unique id for the category, which we store in the item.category +// field. E.g. "Obst & Gemüse > Salate" has the code "13", "Kühlwaren > Tofu" has the code "4C" +exports.globalCategories = [ + { + name: "Obst & Gemüse", + subcategories: ["Obst", "Gemüse", "Salate", "Trockenfrüchte & Nüsse"], + }, + { + name: "Brot & Gebäck", + subcategories: ["Aufbackbrötchen & Toast", "Brot & Gebäck", "Knäckebrot & Zwieback", "Kuchen & Co.", "Semmelwürfel & Brösel"], + }, + { + name: "Getränke", + subcategories: ["Alkoholfreie Getränke", "Bier & Radler", "Kaffee, Tee & Co.", "Sekt & Champagner", "Spirituosen", "Wein", "Mineralwasser"], + }, + { + name: "Kühlwaren", + subcategories: [ + "Schnelle Küche", + "Eier", + "Fleisch", + "Käse, Aufstriche & Salate", + "Milchprodukte", + "Feinkostplatten & Brötchen", + "Blätterteig, Strudelteig", + "Wurst, Schinken & Speck", + "Feinkost", + "Fisch", + "Unbekannt", // Not available in Billa hierarchy, left blank + "Tofu", + ], + }, + { + name: "Tiefkühl", + subcategories: [ + "Eis", + "Unbekannt", // Not available in Billa hierarchy, left blank + "Fertiggerichte", + "Fisch & Garnelen", + "Gemüse & Kräuter", + "Pommes Frites & Co.", + "Pizza & Baguette", + "Desserts & Früchte", + ], + }, + { + name: "Grundnahrungsmittel", + subcategories: [ + "Asia & Mexican Produkte", + "Baby", + "Backen", + "Essig & Öl", + "Fertiggerichte", + "Gewürze & Würzmittel", + "Honig, Marmelade & Co.", + "Konserven & Sauerwaren", + "Kuchen & Co.", + "Mehl & Getreideprodukte", + "Müsli & Cerealien", + "Reis, Teigwaren & Sugo", + "Saucen & Dressings", + "Spezielle Ernährung", + "Zucker & Süßstoffe", + "Fixprodukte", + ], + }, + { + name: "Süßes & Salziges", + subcategories: ["Biskotten & Eiswaffeln", "Für kluge Naschkatzen", "Müsliriegel", "Chips & Co.", "Süßes"], + }, + { + name: "Pflege", + subcategories: [ + "Baby", + "Damenhygiene", + "Deodorants", + "Haarpflege & Haarfarben", + "Pflaster & Verbandsmaterial", + "Haut- & Lippenpflege", + "Mund- & Zahnhygiene", + "Rasierbedarf", + "Seife & Duschbäder", + "Sonnen- & Gelsenschutzmittel", + "Verhütungsmittel", + "Fußpflege", + "Strumpfhosen & Socken", + ], + }, + { + name: "Haushalt", + subcategories: [ + "Büro- & Schulartikel", + "Garten", + "Kleben & Befestigen", + "Küchenartikel", + "Küchenrollen & WC-Papier", + "Lampen & Batterien", + "Müllsäcke, Gefrierbeutel & Co.", + "Raumsprays & Kerzen", + "Reinigen & Pflegen", + "Taschentücher & Servietten", + "Waschmittel & Weichspüler", + "Schuhpflege", + "Kunststoffbehälter", + "Insektenschutz", + "Spielwaren", + "Hygiene-Schutzartikel", + ], + }, + { + name: "Haustier", + subcategories: ["Hunde", "Katzen", "Nager", "Vögel"], + }, +]; + exports.globalUnits = { "stk.": { unit: "stk", factor: 1 }, blatt: { unit: "stk", factor: 1 },