Mario Zechner 303d25ccb5 Categories for Billa & Spar, infra to add catgories for other stores.
Billa maps directly to the canonical categories. Spar uses a mapping file stores/spar-categories.json.

Each store has a generateCategoryMapping() function which is called once in analysis.js:updateData() and analysis.js:replay(). The function is responsible for

* Fetching the latest categories
* Merging them with already mapped categories
* Report new categories that haven't been mapped yet
* Report categories that have been mapped but are no longer part of the latest set of categories
* Save the merged mappings to disk

This schema might not work for all stores, in which case updateData() and replay() will use a knn approach to figure out the category for an item. See #81
2023-06-21 01:29:00 +02:00

119 lines
4.1 KiB

const axios = require("axios");
const util = require("util");
const exec = util.promisify(require("child_process").exec);
const utils = require("./utils");
const { decompress } = require("../site/model/items");
const units = {
beutel: { unit: "stk", factor: 1 },
bund: { unit: "stk", factor: 1 },
packung: { unit: "stk", factor: 1 },
portion: { unit: "stk", factor: 1 },
rollen: { unit: "stk", factor: 1 },
teebeutel: { unit: "stk", factor: 1 },
waschgang: { unit: "wg", factor: 1 },
exports.getCanonical = function (item, today) {
if (item.isCanonical) return item;
let quantity = 1,
unit = "kg";
if (item.grammage && item.grammage.length > 0) {
let grammage = item.grammage
.replace(/\([^)]*\)/g, "")
.replace(",", ".")
let multiplier = 1;
if (grammage.indexOf("x") != -1) {
let tokens = grammage.split("x");
multiplier = Number.parseFloat(tokens[0]);
grammage = tokens[1];
let tokens = grammage.split(" ");
if (tokens.length > 1) {
quantity = Number.parseFloat(tokens[0]);
unit = tokens[1];
} else {
[quantity, unit] = grammage.match(/^(\d+(?:\.\d+)?)(\D+)$/).slice(1);
quantity = Number.parseFloat(quantity);
quantity *= multiplier;
} else {
quantity = 1;
unit = "Stk";
let price = Number.parseFloat(item.currentPrice.split(" ")[0].replace(",", "."));
return utils.convertUnit(
id: item.id,
name: item.name,
// description: "", not available
priceHistory: [{ date: today, price }],
isWeighted: false,
bio: false,
exports.fetchData = async function () {
// For some unholy reason, Axios returns 403 when accessing the endpoint
// Hack: use curl...
/*const agent = new https.Agent({
rejectUnauthorized: false
let axiosNoDefaults = axios.create({ headers: {} });
const headers = {
'Rd-Service-Types': 'PICKUP',
'Rd-Market-Id': '440405',
"User-Agent": "curl/7.84.0"
return (await axiosNoDefaults.get('https://mobile-api.rewe.de/api/v3/product-search?searchTerm=*&page=1&sorting=RELEVANCE_DESC&objectsPerPage=250&marketCode=440405&serviceTypes=PICKUP', { headers, httpsAgent: agent })).data;*/
try {
let pageId = 1;
let result = (
await exec(
`curl -s "https://mobile-api.rewe.de/api/v3/product-search\?searchTerm\=\*\&page\=${pageId++}\&sorting\=RELEVANCE_DESC\&objectsPerPage\=250\&marketCode\=440405\&serviceTypes\=PICKUP" -H "Rd-Service-Types: PICKUP" -H "Rd-Market-Id: 440405"`
const firstPage = JSON.parse(result);
const totalPages = firstPage.totalPages;
const items = [...firstPage.products];
for (let i = 2; i <= totalPages; i++) {
await exec(
`curl -s "https://mobile-api.rewe.de/api/v3/product-search\?searchTerm\=\*\&page\=${pageId++}\&sorting\=RELEVANCE_DESC\&objectsPerPage\=250\&marketCode\=440405\&serviceTypes\=PICKUP" -H "Rd-Service-Types: PICKUP" -H "Rd-Market-Id: 440405"`
return items;
} catch (e) {
console.log("Failed to fetch REWE-DE data, either CURL is not installed, or CloudFlare protection kicked in.");
const compressedItems = (await axios.get("https://heissepreise.github.io/data/latest-canonical.reweDe.compressed.json")).data;
const items = decompress(compressedItems);
for (const item of items) {
item.isCanonical = true;
return items;
exports.initializeCategoryMapping = async () => {};
exports.mapCategory = (rawItem) => {};
exports.urlBase = "";