2023-05-20 15:38:29 +02:00
|
|
|
const fs = require("fs");
|
2023-05-29 02:13:52 +02:00
|
|
|
const stores = require("./stores");
|
|
|
|
const STORE_KEYS = Object.keys(stores);
|
2023-05-20 15:38:29 +02:00
|
|
|
|
2023-05-22 16:02:01 +02:00
|
|
|
function grammageAnalysis() {
|
|
|
|
const items = JSON.parse(fs.readFileSync("docker/data/latest-canonical.json"));
|
|
|
|
items.sort(item => item.priceHistory.length);
|
2023-05-20 15:38:29 +02:00
|
|
|
|
2023-05-22 16:02:01 +02:00
|
|
|
for (item of items) {
|
|
|
|
if (item.priceHistory.length > 2)
|
|
|
|
console.log(JSON.stringify(item, null, 2));
|
|
|
|
}
|
2023-05-20 15:38:29 +02:00
|
|
|
|
2023-05-22 16:02:01 +02:00
|
|
|
const units = {};
|
|
|
|
const unitsSmall = {}
|
2023-05-20 15:38:29 +02:00
|
|
|
|
2023-05-22 16:02:01 +02:00
|
|
|
for (item of items) {
|
|
|
|
const tokens = item.unit ? item.unit.split(/\s+/) : [];
|
|
|
|
if (tokens.length == 0) continue;
|
|
|
|
if (tokens[0].charAt(0) >= '0' && tokens[0].charAt(0) <= '9') {
|
|
|
|
tokens.splice(0, 1);
|
|
|
|
}
|
|
|
|
units[tokens.join(" ")] = item;
|
|
|
|
unitsSmall[tokens[0]] = item;
|
2023-05-20 15:38:29 +02:00
|
|
|
}
|
2023-05-22 16:02:01 +02:00
|
|
|
console.log(JSON.stringify(Object.keys(units), null, 2));
|
|
|
|
console.log(Object.keys(units).length);
|
|
|
|
console.log(JSON.stringify(Object.keys(unitsSmall), null, 2));
|
|
|
|
console.log(Object.keys(unitsSmall).length);
|
|
|
|
|
|
|
|
const hofer = JSON.parse(fs.readFileSync("docker/data/hofer-2023-05-19.json"));
|
|
|
|
const unitTypes = {}
|
|
|
|
for (item of hofer) {
|
|
|
|
unitTypes[item.UnitType] = true;
|
|
|
|
}
|
|
|
|
console.log(JSON.stringify(unitTypes, null, 2));
|
|
|
|
|
|
|
|
{
|
|
|
|
console.log("BILLA Units ===============");
|
|
|
|
const billa = JSON.parse(fs.readFileSync("docker/data/billa-2023-05-19.json"));
|
|
|
|
const billaUnits = {};
|
|
|
|
let noGrammage = 0;
|
|
|
|
let zeroTokens = 0;
|
|
|
|
let zeroUnits = {};
|
|
|
|
for (item of billa) {
|
|
|
|
if (!item.data.grammage) {
|
|
|
|
noGrammage++;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
let tokens = item.data.grammage.split(" ");
|
|
|
|
if (tokens.length > 0) {
|
|
|
|
if (tokens[0].charAt(0) >= 0 && tokens[0].charAt(0) <= 9 && tokens.length >= 2) {
|
|
|
|
tokens.splice(0, 1);
|
|
|
|
billaUnits[tokens[0]] = billaUnits[tokens[0]] ? billaUnits[tokens[0]] + 1 : 1;
|
|
|
|
} else {
|
|
|
|
zeroUnits[item.data.grammage] = zeroUnits[item.data.grammage] ? zeroUnits[item.data.grammage] + 1 : 1;
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
zeroTokens++;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
console.log(JSON.stringify(billaUnits, null, 2));
|
|
|
|
console.log(`no grammage: ${noGrammage}, zero tokens: ${zeroTokens}`);
|
|
|
|
console.log(JSON.stringify(zeroUnits, null, 2));
|
|
|
|
}
|
|
|
|
|
|
|
|
{
|
|
|
|
console.log("SPAR Units ===============");
|
|
|
|
const billa = JSON.parse(fs.readFileSync("docker/data/spar-2023-05-19.json"));
|
|
|
|
const billaUnits = {};
|
|
|
|
let noGrammage = [];
|
|
|
|
let noGrammageUnits = {};
|
|
|
|
let zeroTokens = 0;
|
|
|
|
let zeroUnits = {};
|
|
|
|
for (item of billa) {
|
|
|
|
let unit;
|
|
|
|
if (item.masterValues["quantity-selector"]) {
|
|
|
|
const [str_price, str_unit] = item.masterValues["price-per-unit"].split('/');
|
|
|
|
unit = str_unit.trim();
|
|
|
|
} else {
|
|
|
|
unit = item.masterValues["short-description-3"];
|
|
|
|
}
|
2023-05-20 15:38:29 +02:00
|
|
|
|
2023-05-22 16:02:01 +02:00
|
|
|
if (!unit) {
|
|
|
|
noGrammage.push(item);
|
|
|
|
noGrammageUnits[item.masterValues["sales-unit"]] = noGrammageUnits[item.masterValues["sales-unit"]] ? noGrammageUnits[item.masterValues["sales-unit"]] + 1 : 1;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
let tokens = unit.split(" ");
|
|
|
|
if (tokens.length > 0) {
|
|
|
|
if (tokens[0].charAt(0) >= 0 && tokens[0].charAt(0) <= 9 && tokens.length >= 2) {
|
|
|
|
tokens.splice(0, 1);
|
|
|
|
billaUnits[tokens[0]] = billaUnits[tokens[0]] ? billaUnits[tokens[0]] + 1 : 1;
|
|
|
|
} else {
|
|
|
|
zeroUnits[unit] = zeroUnits[unit] ? zeroUnits[unit] + 1 : 1;
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
zeroTokens++;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
console.log(JSON.stringify(billaUnits, null, 2));
|
|
|
|
console.log(`no grammage: ${noGrammage.length}, zero tokens: ${zeroTokens}`);
|
|
|
|
console.log(JSON.stringify(zeroUnits, null, 2));
|
|
|
|
console.log(JSON.stringify(noGrammageUnits, null, 2));
|
|
|
|
fs.writeFileSync("spar-no-grammage.json", JSON.stringify(noGrammage, null, 2));
|
2023-05-20 15:38:29 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2023-05-22 16:02:01 +02:00
|
|
|
function momentumCartConversion() {
|
|
|
|
const items = JSON.parse(fs.readFileSync("docker/data/latest-canonical.json"));
|
|
|
|
const lookup = {};
|
|
|
|
for (item of items) {
|
|
|
|
lookup[item.sparId ? item.sparId : item.id] = item;
|
|
|
|
}
|
|
|
|
|
|
|
|
const lines = fs.readFileSync("momentum-cart.csv").toString().split(/\r?\n/);
|
|
|
|
const cart = {
|
|
|
|
name: "Momentum Eigenmarken Vergleich",
|
|
|
|
items: []
|
2023-05-20 15:38:29 +02:00
|
|
|
}
|
2023-05-22 16:02:01 +02:00
|
|
|
for (line of lines) {
|
|
|
|
const [sparId, billaId] = line.split(/\s+/);
|
|
|
|
const sparItem = lookup[sparId];
|
|
|
|
const billaItem = lookup[billaId];
|
|
|
|
if (!sparItem) {
|
|
|
|
console.log(`Spar item ${sparId} not found`);
|
|
|
|
console.log(line);
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
if (!billaItem) {
|
|
|
|
console.log(`Billa item ${billaId} not found`);
|
|
|
|
console.log(line);
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
cart.items.push(sparItem);
|
|
|
|
cart.items.push(billaItem);
|
|
|
|
}
|
|
|
|
fs.writeFileSync("site/momentum-cart.json", JSON.stringify(cart, null, 2));
|
|
|
|
}
|
|
|
|
|
2023-05-26 16:05:42 +02:00
|
|
|
function fixSparHistoricalData(dataDir) {
|
|
|
|
const files = fs.readdirSync(dataDir).filter(file => file.indexOf("canonical") == -1 && file.indexOf(`spar-`) == 0);
|
|
|
|
console.log(files);
|
|
|
|
|
|
|
|
for (file of files) {
|
|
|
|
const items = JSON.parse(fs.readFileSync(`${dataDir}/${file}`));
|
|
|
|
if (items.hits) {
|
|
|
|
console.log(`Rewriting ${file}`);
|
|
|
|
fs.writeFileSync(`${dataDir}/${file}`, JSON.stringify(items.hits, null, 2));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2023-05-27 12:33:18 +02:00
|
|
|
const nReadlines = require('n-readlines');
|
|
|
|
|
|
|
|
function convertDossierData(dataDir, file) {
|
|
|
|
console.log(`Converting ${file}`);
|
|
|
|
const lookup = {};
|
|
|
|
for (item of JSON.parse(fs.readFileSync(`${dataDir}/latest-canonical.json`))) {
|
|
|
|
lookup[item.store + item.id] = item;
|
|
|
|
if (item.sparId)
|
|
|
|
lookup[item.store + "-" + item.sparId] = item;
|
|
|
|
}
|
|
|
|
|
|
|
|
const lines = new nReadlines(file);
|
|
|
|
|
|
|
|
const itemsPerDate = {};
|
|
|
|
let line = null;
|
|
|
|
const store = file.indexOf("spar") == 0 ? "spar" : "billa";
|
|
|
|
lines.next()
|
|
|
|
let itemsTotal = 0;
|
|
|
|
let notFound = 0;
|
|
|
|
while(line = lines.next()) {
|
|
|
|
itemsTotal++;
|
|
|
|
const tokens = line.toString("utf-8").split(";");
|
|
|
|
const dateTokens = tokens[0].split(".");
|
|
|
|
const date = "20" + dateTokens[2] + "-" + dateTokens[1] + "-" + dateTokens[0];
|
|
|
|
const producer = tokens[5];
|
|
|
|
const name = tokens[3];
|
|
|
|
const unit = tokens[6];
|
|
|
|
const price = Number.parseFloat(tokens[7].replace("€", "").trim().replace(",", "."));
|
|
|
|
const id = tokens[4].replace("ARTIKELNUMMER: ", "").replace("Art. Nr.: ", "");
|
|
|
|
let item = lookup[store + id];
|
|
|
|
if (!item)
|
|
|
|
item = lookup[store + "-" + id]
|
|
|
|
if (!item) {
|
|
|
|
// console.log("Couldn't find item " + name);
|
|
|
|
notFound++;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
let items = itemsPerDate[date];
|
|
|
|
if (!items) itemsPerDate[date] = items = [];
|
|
|
|
if (store == "spar") {
|
|
|
|
items.push({
|
|
|
|
masterValues: {
|
|
|
|
"code-internal": item.id,
|
|
|
|
"product-number": id,
|
|
|
|
price,
|
|
|
|
title: producer,
|
|
|
|
"short-description": name,
|
|
|
|
"short-description-3": unit,
|
|
|
|
bioLevel: ""
|
|
|
|
}
|
|
|
|
});
|
|
|
|
} else {
|
|
|
|
items.push({
|
|
|
|
data: {
|
|
|
|
articleId: id,
|
|
|
|
name: name,
|
|
|
|
price: {
|
|
|
|
final: price
|
|
|
|
},
|
|
|
|
grammagePriceFactor: 1,
|
|
|
|
grammage: unit,
|
|
|
|
}
|
|
|
|
})
|
|
|
|
}
|
|
|
|
}
|
|
|
|
console.log("total: " + itemsTotal);
|
|
|
|
console.log("not found: " + notFound);
|
|
|
|
|
|
|
|
const dates = Object.keys(itemsPerDate).sort((a, b) => b.localeCompare(a));
|
|
|
|
for (date of dates) {
|
|
|
|
fs.writeFileSync(`${dataDir}/${store}-${date}.json`, JSON.stringify(itemsPerDate[date], null, 2));
|
|
|
|
}
|
|
|
|
console.log(`Wrote files for ${file}`);
|
|
|
|
}
|
2023-05-29 02:13:52 +02:00
|
|
|
|
|
|
|
function clownCompress(dataDir) {
|
|
|
|
const items = JSON.parse(fs.readFileSync(`${dataDir}/latest-canonical.json`));
|
|
|
|
const compressed = {
|
|
|
|
stores: STORE_KEYS,
|
|
|
|
n: items.length,
|
|
|
|
data: []
|
|
|
|
}
|
|
|
|
const data = compressed.data;
|
|
|
|
for (item of items) {
|
|
|
|
data.push(STORE_KEYS.indexOf(item.store));
|
|
|
|
data.push(item.id);
|
|
|
|
data.push(item.name);
|
|
|
|
data.push(item.priceHistory.length);
|
|
|
|
for (price of item.priceHistory) {
|
|
|
|
data.push(price.date.replace("-", ""));
|
|
|
|
data.push(price.price);
|
|
|
|
}
|
|
|
|
data.push(item.unit);
|
|
|
|
data.push(item.quantity);
|
|
|
|
data.push(item.isWeighted ? 1 : 0);
|
|
|
|
data.push(item.bio ? 1 : 0);
|
|
|
|
switch (item.store) {
|
|
|
|
case "billa":
|
|
|
|
data.push(item.url.replace("https://shop.billa.at", ""));
|
|
|
|
break;
|
|
|
|
case "dm":
|
|
|
|
data.push("");
|
|
|
|
break;
|
|
|
|
case "hofer":
|
|
|
|
data.push(item.url.replace("https://www.roksh.at/hofer/produkte/", ""));
|
|
|
|
break;
|
|
|
|
case "lidl":
|
|
|
|
data.push(item.url.replace("https://www.lidl.at", ""));
|
|
|
|
break;
|
|
|
|
case "mpreis":
|
|
|
|
data.push("");
|
|
|
|
break;
|
|
|
|
case "spar":
|
|
|
|
data.push(item.url.replace("https://www.interspar.at/shop/lebensmittel", ""));
|
|
|
|
break;
|
|
|
|
case "unimarkt":
|
|
|
|
data.push(item.url.replace("https://shop.unimarkt.at", ""));
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
fs.writeFileSync(`${dataDir}/clown.json`, JSON.stringify(compressed));
|
|
|
|
}
|
|
|
|
|
|
|
|
clownCompress("data");
|
|
|
|
|
2023-05-27 12:33:18 +02:00
|
|
|
// momentumCartConversion();
|
2023-05-29 02:13:52 +02:00
|
|
|
// convertDossierData("data", "spar-2020.csv");
|
|
|
|
// convertDossierData("data", "billa-2020.csv");
|
2023-05-27 12:33:18 +02:00
|
|
|
|