From f26b5c36254b2c0adfaf171ca2228caef08ef40a Mon Sep 17 00:00:00 2001 From: Mario Zechner Date: Tue, 30 May 2023 10:34:25 +0200 Subject: [PATCH] Closes #34 --- .gitignore | 3 +- analysis.js | 51 +++++++++++++++++++++++++++++++++- index.js | 17 ++++++++---- pages.js | 5 +++- pages.sh | 2 +- restore.js | 3 +- site/utils.js | 76 +++++++++++++++++++++++++++++++++++++++++++++++++-- 7 files changed, 144 insertions(+), 13 deletions(-) diff --git a/.gitignore b/.gitignore index 8b775ae..175bfd1 100644 --- a/.gitignore +++ b/.gitignore @@ -11,4 +11,5 @@ access.log billa-2020.csv report.html spar-2020.csv -tmp-dir/ \ No newline at end of file +tmp-dir/ +site/latest-canonical-compressed.json diff --git a/analysis.js b/analysis.js index fc7b465..402ee36 100644 --- a/analysis.js +++ b/analysis.js @@ -85,6 +85,55 @@ function sortItems(items) { }); } +// Keep this in sync with utils.js:decompress +function compress(items) { + const compressed = { + stores: STORE_KEYS, + n: items.length, + data: [] + } + const data = compressed.data; + for (item of items) { + data.push(STORE_KEYS.indexOf(item.store)); + data.push(item.id); + data.push(item.name); + data.push(item.priceHistory.length); + for (price of item.priceHistory) { + data.push(price.date.replaceAll("-", "")); + data.push(price.price); + } + data.push(item.unit); + data.push(item.quantity); + data.push(item.isWeighted ? 1 : 0); + data.push(item.bio ? 1 : 0); + switch (item.store) { + case "billa": + data.push(item.url.replace("https://shop.billa.at", "")); + break; + case "dm": + data.push(""); + break; + case "hofer": + data.push(item.url.replace("https://www.roksh.at/hofer/produkte/", "")); + break; + case "lidl": + data.push(item.url.replace("https://www.lidl.at", "")); + break; + case "mpreis": + data.push(""); + break; + case "spar": + data.push(item.url.replace("https://www.interspar.at/shop/lebensmittel", "")); + break; + case "unimarkt": + data.push(item.url.replace("https://shop.unimarkt.at", "")); + break; + } + } + return compressed; +} +exports.compress = compress; + /// Given a directory of raw data of the form `$store-$date.json`, constructs /// a canonical list of all products and their historical price data. exports.replay = function(rawDataDir) { @@ -169,4 +218,4 @@ exports.updateData = async function (dataDir, done) { if (done) done(items); return items; -} +} \ No newline at end of file diff --git a/index.js b/index.js index 626a152..7b48d28 100644 --- a/index.js +++ b/index.js @@ -1,6 +1,13 @@ const fs = require("fs"); const analysis = require("./analysis"); +function copyItemsToSite(dataDir) { + fs.copyFileSync(`${dataDir}/latest-canonical.json`, `site/latest-canonical.json`); + const items = JSON.parse(fs.readFileSync(`${dataDir}/latest-canonical.json`)); + const compressedItems = analysis.compress(items); + fs.writeFileSync(`site/latest-canonical-compressed.json`, JSON.stringify(compressedItems)); +} + (async () => { const dataDir = 'data'; @@ -9,17 +16,17 @@ const analysis = require("./analysis"); } if (fs.existsSync(`${dataDir}/latest-canonical.json`)) { - fs.copyFileSync(`${dataDir}/latest-canonical.json`, `site/latest-canonical.json`); + copyItemsToSite(dataDir); analysis.updateData(dataDir, (_newItems) => { - fs.copyFileSync(`${dataDir}/latest-canonical.json`, `site/latest-canonical.json`); + copyItemsToSite(dataDir); }); } else { await analysis.updateData(dataDir) - fs.copyFileSync(`${dataDir}/latest-canonical.json`, `site/latest-canonical.json`); + copyItemsToSite(dataDir); } setInterval(async () => { items = await analysis.updateData(dataDir) - fs.copyFileSync(`${dataDir}/latest-canonical.json`, `site/latest-canonical.json`); + copyItemsToSite(dataDir); }, 1000 * 60 * 60 * 24); const express = require('express') @@ -27,8 +34,8 @@ const analysis = require("./analysis"); const app = express() const port = process?.argv?.[2] ?? 3000 - app.use(express.static('site')); app.use(compression()); + app.use(express.static('site')); app.listen(port, () => { console.log(`Example app listening on port ${port}`) diff --git a/pages.js b/pages.js index d806bb3..1a141a5 100644 --- a/pages.js +++ b/pages.js @@ -17,7 +17,10 @@ if (!fs.existsSync(dataDir)) { (async function () { try { await analysis.updateData(dataDir); - console.log(`Wrote ${JSON.parse(fs.readFileSync(`${dataDir}/latest-canonical.json`)).length} to ${dataDir}/latest-canonical.json`); + const items = JSON.parse(fs.readFileSync(`${dataDir}/latest-canonical.json`)); + const compressedItems = analysis.compress(items); + fs.writeFileSync(`${dataDir}/latest-canonical-compressed.json`, JSON.stringify(compressedItems)); + console.log(`Wrote ${items.length} items to ${dataDir}/latest-canonical(-compressed).json`); } catch(e) { process.exit(1); } diff --git a/pages.sh b/pages.sh index 4b2a921..d2b3c8a 100755 --- a/pages.sh +++ b/pages.sh @@ -14,7 +14,7 @@ cp docs/latest-canonical.json tmp-data npm install node pages.js tmp-data -cp tmp-data/latest-canonical.json docs +cp tmp-data/latest-canonical* docs cp site/* docs pushd docs diff --git a/restore.js b/restore.js index ced7e54..834677b 100644 --- a/restore.js +++ b/restore.js @@ -6,6 +6,7 @@ console.log("Restoring data from raw data."); /*console.log("Items: " + JSON.parse(fs.readFileSync("docker/data/latest-canonical.json")).length); await analysis.updateData(dataDir); fs.copyFileSync(`${dataDir}/latest-canonical.json`, `${dataDir}/latest-canonical-old.json`);*/ - fs.writeFileSync(`${dataDir}/latest-canonical.json`, JSON.stringify(analysis.replay(dataDir), null, 2)); + const items = analysis.replay(dataDir); + fs.writeFileSync(`${dataDir}/latest-canonical.json`, JSON.stringify(items, null, 2)); console.log(`Wrote ${JSON.parse(fs.readFileSync(`${dataDir}/latest-canonical.json`)).length} to ${dataDir}/latest-canonical.json`); })(); diff --git a/site/utils.js b/site/utils.js index 1116aca..1ad0e24 100644 --- a/site/utils.js +++ b/site/utils.js @@ -85,10 +85,79 @@ function dom(el, html) { return element; } -async function loadItems() { - const response = await fetch("latest-canonical.json"); - const items = await response.json(); +function decompress(compressedItems) { + const items = []; + const stores = compressedItems.stores; + const data = compressedItems.data; + const numItems = compressedItems.n; + let i = 0; + while (items.length < numItems) { + const store = stores[data[i++]]; + const id = data[i++]; + const name = data[i++]; + const numPrices = data[i++]; + const prices = []; + for (let j = 0; j < numPrices; j++) { + const date = data[i++]; + const price = data[i++]; + prices.push({ + date: date.substring(0, 4) + "-" + date.substring(4, 6) + "-" + date.substring(6, 8), + price + }); + } + const unit = data[i++]; + const quantity = data[i++]; + const isWeighted = data[i++] == 1; + const bio = data[i++] == 1; + let url = data[i++]; + switch (store) { + case "billa": + url = "https://shop.billa.at" + url; + break; + case "dm": + url = `https://www.dm.at/product-p${id}.html`; + break; + case "hofer": + url = "https://www.roksh.at/hofer/produkte/" + url; + break; + case "lidl": + url = "https://www.lidl.at" + url; + break; + case "mpreis": + url = "https://www.mpreis.at/shop/p/" + id; + break; + case "spar": + url = "https://www.interspar.at/shop/lebensmittel" + url; + break; + case "unimarkt": + url = "https://shop.unimarkt.at" + url; + break; + } + items.push({ + store, + id, + name, + price: prices[0].price, + priceHistory: prices, + isWeighted, + unit, + quantity, + bio, + url + }); + } + return items; +} + +async function loadItems() { + now = performance.now(); + const response = await fetch("latest-canonical-compressed.json"); + const compressedItems = await response.json(); + const items = decompress(compressedItems); + console.log("Loading compressed items took " + (performance.now() - now) / 1000 + " secs"); + + now = performance.now(); for (const item of items) { item.search = item.name + " " + item.unit; item.search = item.search.toLowerCase().replace(",", "."); @@ -106,6 +175,7 @@ async function loadItems() { if (highestPriceBefore == -1) highestPriceBefore = item.price; item.highestBefore = highestPriceBefore; } + console.log("Processing items took " + (performance.now() - now) / 1000 + " secs"); return items; }