This commit is contained in:
Mario Zechner 2023-05-30 10:34:25 +02:00
parent c9cae1ec51
commit f26b5c3625
7 changed files with 144 additions and 13 deletions

3
.gitignore vendored
View File

@ -11,4 +11,5 @@ access.log
billa-2020.csv
report.html
spar-2020.csv
tmp-dir/
tmp-dir/
site/latest-canonical-compressed.json

View File

@ -85,6 +85,55 @@ function sortItems(items) {
});
}
// Keep this in sync with utils.js:decompress
function compress(items) {
const compressed = {
stores: STORE_KEYS,
n: items.length,
data: []
}
const data = compressed.data;
for (item of items) {
data.push(STORE_KEYS.indexOf(item.store));
data.push(item.id);
data.push(item.name);
data.push(item.priceHistory.length);
for (price of item.priceHistory) {
data.push(price.date.replaceAll("-", ""));
data.push(price.price);
}
data.push(item.unit);
data.push(item.quantity);
data.push(item.isWeighted ? 1 : 0);
data.push(item.bio ? 1 : 0);
switch (item.store) {
case "billa":
data.push(item.url.replace("https://shop.billa.at", ""));
break;
case "dm":
data.push("");
break;
case "hofer":
data.push(item.url.replace("https://www.roksh.at/hofer/produkte/", ""));
break;
case "lidl":
data.push(item.url.replace("https://www.lidl.at", ""));
break;
case "mpreis":
data.push("");
break;
case "spar":
data.push(item.url.replace("https://www.interspar.at/shop/lebensmittel", ""));
break;
case "unimarkt":
data.push(item.url.replace("https://shop.unimarkt.at", ""));
break;
}
}
return compressed;
}
exports.compress = compress;
/// Given a directory of raw data of the form `$store-$date.json`, constructs
/// a canonical list of all products and their historical price data.
exports.replay = function(rawDataDir) {
@ -169,4 +218,4 @@ exports.updateData = async function (dataDir, done) {
if (done) done(items);
return items;
}
}

View File

@ -1,6 +1,13 @@
const fs = require("fs");
const analysis = require("./analysis");
function copyItemsToSite(dataDir) {
fs.copyFileSync(`${dataDir}/latest-canonical.json`, `site/latest-canonical.json`);
const items = JSON.parse(fs.readFileSync(`${dataDir}/latest-canonical.json`));
const compressedItems = analysis.compress(items);
fs.writeFileSync(`site/latest-canonical-compressed.json`, JSON.stringify(compressedItems));
}
(async () => {
const dataDir = 'data';
@ -9,17 +16,17 @@ const analysis = require("./analysis");
}
if (fs.existsSync(`${dataDir}/latest-canonical.json`)) {
fs.copyFileSync(`${dataDir}/latest-canonical.json`, `site/latest-canonical.json`);
copyItemsToSite(dataDir);
analysis.updateData(dataDir, (_newItems) => {
fs.copyFileSync(`${dataDir}/latest-canonical.json`, `site/latest-canonical.json`);
copyItemsToSite(dataDir);
});
} else {
await analysis.updateData(dataDir)
fs.copyFileSync(`${dataDir}/latest-canonical.json`, `site/latest-canonical.json`);
copyItemsToSite(dataDir);
}
setInterval(async () => {
items = await analysis.updateData(dataDir)
fs.copyFileSync(`${dataDir}/latest-canonical.json`, `site/latest-canonical.json`);
copyItemsToSite(dataDir);
}, 1000 * 60 * 60 * 24);
const express = require('express')
@ -27,8 +34,8 @@ const analysis = require("./analysis");
const app = express()
const port = process?.argv?.[2] ?? 3000
app.use(express.static('site'));
app.use(compression());
app.use(express.static('site'));
app.listen(port, () => {
console.log(`Example app listening on port ${port}`)

View File

@ -17,7 +17,10 @@ if (!fs.existsSync(dataDir)) {
(async function () {
try {
await analysis.updateData(dataDir);
console.log(`Wrote ${JSON.parse(fs.readFileSync(`${dataDir}/latest-canonical.json`)).length} to ${dataDir}/latest-canonical.json`);
const items = JSON.parse(fs.readFileSync(`${dataDir}/latest-canonical.json`));
const compressedItems = analysis.compress(items);
fs.writeFileSync(`${dataDir}/latest-canonical-compressed.json`, JSON.stringify(compressedItems));
console.log(`Wrote ${items.length} items to ${dataDir}/latest-canonical(-compressed).json`);
} catch(e) {
process.exit(1);
}

View File

@ -14,7 +14,7 @@ cp docs/latest-canonical.json tmp-data
npm install
node pages.js tmp-data
cp tmp-data/latest-canonical.json docs
cp tmp-data/latest-canonical* docs
cp site/* docs
pushd docs

View File

@ -6,6 +6,7 @@ console.log("Restoring data from raw data.");
/*console.log("Items: " + JSON.parse(fs.readFileSync("docker/data/latest-canonical.json")).length);
await analysis.updateData(dataDir);
fs.copyFileSync(`${dataDir}/latest-canonical.json`, `${dataDir}/latest-canonical-old.json`);*/
fs.writeFileSync(`${dataDir}/latest-canonical.json`, JSON.stringify(analysis.replay(dataDir), null, 2));
const items = analysis.replay(dataDir);
fs.writeFileSync(`${dataDir}/latest-canonical.json`, JSON.stringify(items, null, 2));
console.log(`Wrote ${JSON.parse(fs.readFileSync(`${dataDir}/latest-canonical.json`)).length} to ${dataDir}/latest-canonical.json`);
})();

View File

@ -85,10 +85,79 @@ function dom(el, html) {
return element;
}
async function loadItems() {
const response = await fetch("latest-canonical.json");
const items = await response.json();
function decompress(compressedItems) {
const items = [];
const stores = compressedItems.stores;
const data = compressedItems.data;
const numItems = compressedItems.n;
let i = 0;
while (items.length < numItems) {
const store = stores[data[i++]];
const id = data[i++];
const name = data[i++];
const numPrices = data[i++];
const prices = [];
for (let j = 0; j < numPrices; j++) {
const date = data[i++];
const price = data[i++];
prices.push({
date: date.substring(0, 4) + "-" + date.substring(4, 6) + "-" + date.substring(6, 8),
price
});
}
const unit = data[i++];
const quantity = data[i++];
const isWeighted = data[i++] == 1;
const bio = data[i++] == 1;
let url = data[i++];
switch (store) {
case "billa":
url = "https://shop.billa.at" + url;
break;
case "dm":
url = `https://www.dm.at/product-p${id}.html`;
break;
case "hofer":
url = "https://www.roksh.at/hofer/produkte/" + url;
break;
case "lidl":
url = "https://www.lidl.at" + url;
break;
case "mpreis":
url = "https://www.mpreis.at/shop/p/" + id;
break;
case "spar":
url = "https://www.interspar.at/shop/lebensmittel" + url;
break;
case "unimarkt":
url = "https://shop.unimarkt.at" + url;
break;
}
items.push({
store,
id,
name,
price: prices[0].price,
priceHistory: prices,
isWeighted,
unit,
quantity,
bio,
url
});
}
return items;
}
async function loadItems() {
now = performance.now();
const response = await fetch("latest-canonical-compressed.json");
const compressedItems = await response.json();
const items = decompress(compressedItems);
console.log("Loading compressed items took " + (performance.now() - now) / 1000 + " secs");
now = performance.now();
for (const item of items) {
item.search = item.name + " " + item.unit;
item.search = item.search.toLowerCase().replace(",", ".");
@ -106,6 +175,7 @@ async function loadItems() {
if (highestPriceBefore == -1) highestPriceBefore = item.price;
item.highestBefore = highestPriceBefore;
}
console.log("Processing items took " + (performance.now() - now) / 1000 + " secs");
return items;
}