Remove binary encoding, web worker, clean-up.

This commit is contained in:
Mario Zechner 2023-06-18 23:23:02 +02:00
parent be552fcd9e
commit 6569b17da2
10 changed files with 1419 additions and 435 deletions

View File

@ -47,7 +47,7 @@ Merged price history
App listening on port 3000
```
Once the app is listening per default on port 3000, open <http://localhost:3000> in your browser.\
Once the app is listening per default on port 3000, open <http://localhost:3000> in your browser.
Subsequent starts will fetch the data asynchronously, so you can start working immediately.

View File

@ -149,117 +149,6 @@ function sortItems(items) {
});
}
function compressBinary(items) {
const buffer = [];
buffer.push(STORE_KEYS.length);
for (const key of STORE_KEYS) {
const nameBuffer = Buffer.from(key, "utf8");
const nameLengthBuffer = Buffer.allocUnsafe(2);
nameLengthBuffer.writeUInt16LE(nameBuffer.length, 0);
buffer.push(...nameLengthBuffer, ...nameBuffer);
}
const dictionary = {};
const words = [];
let id = 0;
for (const item of items) {
const tokens = item.name.split(/\s+/);
for (const token of tokens) {
if (!dictionary[token]) {
dictionary[token] = id++;
words.push(token);
if (token.length > 256) {
console.log("Dictionary word > 256 characters: " + token);
}
}
}
}
const numWordsBuffer = Buffer.allocUnsafe(4);
numWordsBuffer.writeUint32LE(id, 0);
buffer.push(...numWordsBuffer);
for (const word of words) {
const wordBuffer = Buffer.from(word, "utf8");
buffer.push(wordBuffer.length);
buffer.push(...wordBuffer);
}
for (const item of items) {
const idBuffer = Buffer.from("" + item.id, "utf8");
buffer.push(idBuffer.length);
buffer.push(...idBuffer);
let flagsByte = 0;
if (item.bio) flagsByte |= 1;
if (item.isWeighted) flagsByte |= 2;
if (item.unit === "ml") flagsByte |= 4;
if (item.unit === "g") flagsByte |= 8;
if (item.unit === "stk") flagsByte |= 16;
if (item.unit === "cm") flagsByte |= 32;
if (item.unit === "wg") flagsByte |= 64;
buffer.push(flagsByte);
const quantityBuffer = Buffer.allocUnsafe(2);
let quantity = Math.min(64000, item.quantity);
if (quantity > 64000) {
console.log(`Item quantity > 64000 ${item.id} - ${item.store} - ${item.name}`);
}
quantityBuffer.writeUint16LE(quantity, 0);
buffer.push(...quantityBuffer);
const storeByte = STORE_KEYS.findIndex((store) => store == item.store);
buffer.push(storeByte);
const tokenIds = item.name.split(/\s+/).map((token) => {
const id = dictionary[token];
if (id === undefined) {
console.log(`Undefined token ${token} ${item.id} - ${item.store} - ${item.name}`);
}
return id;
});
buffer.push(tokenIds.length);
for (const tokenId of tokenIds) {
const tokenIdBuffer = Buffer.allocUnsafe(4);
tokenIdBuffer.writeUint32LE(tokenId, 0);
buffer.push(tokenIdBuffer[0], tokenIdBuffer[1], tokenIdBuffer[2]);
}
if (item.url !== undefined) {
const urlBuffer = Buffer.from(item.url, "utf8");
const urlLengthBuffer = Buffer.allocUnsafe(2);
urlLengthBuffer.writeUInt16LE(urlBuffer.length, 0);
buffer.push(...urlLengthBuffer, ...urlBuffer);
} else {
buffer.push(0);
buffer.push(0);
}
const priceHistoryLengthBuffer = Buffer.allocUnsafe(2);
priceHistoryLengthBuffer.writeUInt16LE(item.priceHistory.length, 0);
buffer.push(...priceHistoryLengthBuffer);
for (const priceEntry of item.priceHistory) {
const priceEntryBuffer = Buffer.allocUnsafe(2);
if (priceEntry.price == 999) priceEntry.price = 9.99;
let price = Math.round(priceEntry.price * 100);
if (price > 64000) {
console.log(`Item price > 64000 ${item.id} - ${item.store} - ${item.name}`);
price = 64000;
}
priceEntryBuffer.writeUint16LE(price, 0);
buffer.push(...priceEntryBuffer);
const dateBuffer = Buffer.allocUnsafe(2);
dateBuffer.writeUint16LE(dateToUint16(priceEntry.date), 0);
buffer.push(...dateBuffer);
}
}
return Buffer.from(buffer);
}
exports.compressBinary = compressBinary;
// Keep this in sync with utils.js:decompress
function compress(items) {
const compressed = {

View File

@ -108,7 +108,6 @@ async function bundleJS(inputDir, outputDir, watch) {
changes: `${inputDir}/changes.js`,
settings: `${inputDir}/settings.js`,
index: `${inputDir}/index.js`,
"items-loader": `${inputDir}/model/items-loader.js`,
},
bundle: true,
sourcemap: true,

View File

@ -13,7 +13,6 @@ function copyItemsToSite(dataDir) {
for (const store of analysis.STORE_KEYS) {
const storeItems = items.filter((item) => item.store === store);
analysis.writeJSON(`site/output/data/latest-canonical.${store}.compressed.json`, storeItems, false, 0, true);
fs.writeFileSync(`site/output/data/latest-canonical.${store}.bin.json`, analysis.compressBinary(storeItems));
}
}

View File

@ -6,7 +6,7 @@
<a href="https://github.com/badlogic/heissepreise"><svg xmlns="http://www.w3.org/2000/svg" width="24" height="24" viewBox="0 0 24 24"><path d="M12 0c-6.626 0-12 5.373-12 12 0 5.302 3.438 9.8 8.207 11.387.599.111.793-.261.793-.577v-2.234c-3.338.726-4.033-1.416-4.033-1.416-.546-1.387-1.333-1.756-1.333-1.756-1.089-.745.083-.729.083-.729 1.205.084 1.839 1.237 1.839 1.237 1.07 1.834 2.807 1.304 3.492.997.107-.775.418-1.305.762-1.604-2.665-.305-5.467-1.334-5.467-5.931 0-1.311.469-2.381 1.236-3.221-.124-.303-.535-1.524.117-3.176 0 0 1.008-.322 3.301 1.23.957-.266 1.983-.399 3.003-.404 1.02.005 2.047.138 3.006.404 2.291-1.552 3.297-1.23 3.297-1.23.653 1.653.242 2.874.118 3.176.77.84 1.235 1.911 1.235 3.221 0 4.609-2.807 5.624-5.479 5.921.43.372.823 1.102.823 2.222v3.293c0 .319.192.694.801.576 4.765-1.589 8.199-6.086 8.199-11.386 0-6.627-5.373-12-12-12z"/></svg></a>
</div>
<div class="flex align-center justify-center gap-2 pt-4 pb-2">
<span>Historische Daten von <a href="http://h.43z.one" style="text-decoration: underline;">@h43z</a> &amp; <a href="https://www.dossier.at/dossiers/supermaerkte/quellen/anatomie-eines-supermarkts-die-methodik/">Dossier</a></span>
<span>Historische Daten von <a href="http://h.43z.one" style="text-decoration: underline;">@h43z</a> &amp; <a href="https://www.dossier.at/dossiers/supermaerkte/quellen/anatomie-eines-supermarkts-die-methodik/" style="text-decoration: underline;">Dossier</a></span>
</div>
<small class="text-center mb-6">
<p>Alle Angaben ohne Gewähr, Irrtümer vorbehalten. <br />

View File

@ -63,18 +63,6 @@ exports.today = () => {
return `${year}-${month}-${day}`;
};
exports.dateToUint16 = (dateString) => {
const [year, month, day] = dateString.split("-").map(Number);
return (((year - 2000) << 9) | (month << 5) | day) & 0xffff;
};
exports.uint16ToDate = (encodedDate) => {
const year = (encodedDate >> 9) + 2000;
const month = (encodedDate >> 5) & 0xf;
const day = encodedDate & 0x1f;
return `${year}-${month.toString().padStart(2, "0")}-${day.toString().padStart(2, "0")}`;
};
exports.fetchJSON = async (url) => {
const response = await fetch(url);
return await response.json();

View File

@ -1,289 +0,0 @@
const { deltaTime, log, uint16ToDate } = require("../js/misc");
const { stores, STORE_KEYS } = require("./stores");
function decompressBinary(buffer) {
const objects = [];
let offset = 0;
const view = new DataView(buffer);
const baseDate = new Date("2000-01-01");
const textDecoder = new TextDecoder("utf-8");
const numStores = view.getUint8(offset++);
const stores = [];
for (let i = 0; i < numStores; i++) {
const nameLength = view.getUint16(offset, true);
offset += 2;
const nameBuffer = new Uint8Array(buffer, offset, nameLength);
stores.push(textDecoder.decode(nameBuffer));
offset += nameLength;
}
const numWords = view.getUint32(offset, true);
offset += 4;
const words = new Array(numWords);
for (let i = 0; i < numWords; i++) {
const nameLength = view.getUint8(offset++);
const nameBuffer = new Uint8Array(buffer, offset, nameLength);
words[i] = textDecoder.decode(nameBuffer);
offset += nameLength;
}
while (offset < buffer.byteLength) {
const obj = {};
const idLength = view.getUint8(offset++);
const idBuffer = new Uint8Array(buffer, offset, idLength);
obj.id = textDecoder.decode(idBuffer);
offset += idLength;
const flagsByte = view.getUint8(offset++);
obj.bio = (flagsByte & 1) !== 0;
obj.isWeighted = (flagsByte & 2) !== 0;
if (flagsByte & 4) obj.unit = "ml";
if (flagsByte & 8) obj.unit = "g";
if (flagsByte & 16) obj.unit = "stk";
if (flagsByte & 32) obj.unit = "cm";
if (flagsByte & 64) obj.unit = "wg";
obj.quantity = view.getUint16(offset, true);
offset += 2;
obj.store = stores[view.getUint8(offset++)];
let name = "";
const numTokens = view.getUint8(offset++);
for (let i = 0; i < numTokens; i++) {
const b1 = view.getUint8(offset++);
const b2 = view.getUint8(offset++);
const b3 = view.getUint8(offset++);
const tokenId = (b3 << 16) | (b2 << 8) | b1;
name += words[tokenId];
if (i < numTokens - 1) name += " ";
}
obj.name = name;
const urlLength = view.getUint16(offset, true);
offset += 2;
if (urlLength !== 0) {
const urlBuffer = new Uint8Array(buffer, offset, urlLength);
obj.url = textDecoder.decode(urlBuffer);
} else {
obj.url = undefined;
}
offset += urlLength;
const priceHistoryLength = view.getUint16(offset, true);
offset += 2;
obj.priceHistory = new Array(priceHistoryLength);
for (let i = 0; i < priceHistoryLength; i++) {
const price = view.getUint16(offset, true) / 100;
offset += 2;
const date = uint16ToDate(view.getUint16(offset, true));
offset += 2;
obj.priceHistory[i] = { date, price };
}
obj.price = obj.priceHistory[0].price;
objects.push(obj);
}
return objects;
}
function decompress(compressedItems) {
const storeLookup = compressedItems.stores;
const data = compressedItems.data;
const dates = compressedItems.dates;
const numItems = compressedItems.n;
const items = new Array(numItems);
let i = 0;
for (let l = 0; l < numItems; l++) {
const store = storeLookup[data[i++]];
const id = data[i++];
const name = data[i++];
const numPrices = data[i++];
const prices = new Array(numPrices);
for (let j = 0; j < numPrices; j++) {
const date = dates[data[i++]];
const price = data[i++];
prices[j] = {
date: date.substring(0, 4) + "-" + date.substring(4, 6) + "-" + date.substring(6, 8),
price,
};
}
const unit = data[i++];
const quantity = data[i++];
const isWeighted = data[i++] == 1;
const bio = data[i++] == 1;
const url = data[i++];
items[l] = {
store,
id,
name,
price: prices[0].price,
priceHistory: prices,
isWeighted,
unit,
quantity,
bio,
url,
};
}
return items;
}
function processItems(items) {
const lookup = {};
const start = performance.now();
const interns = new Map();
const intern = (value) => {
if (interns.has(value)) {
return interns.get(value);
} else {
interns.set(value, value);
return value;
}
};
const getters = {
unitPrice: {
get() {
const unitPriceFactor = this.unit == "g" || this.unit == "ml" ? 1000 : 1;
return (this.price / this.quantity) * unitPriceFactor;
},
},
numPrices: {
get() {
return this.priceHistory.length;
},
},
date: {
get() {
return this.priceHistory[0].date;
},
},
priceOldest: {
get() {
return this.priceHistory[this.priceHistory.length - 1].price;
},
},
dateOldest: {
get() {
return this.priceHistory[this.priceHistory.length - 1].date;
},
},
};
for (let i = 1; i < 3; i++) {
(getters[`price${i}`] = {
get() {
return this.priceHistory[i] ? this.priceHistory[i].price : 0;
},
}),
(getters[`date${i}`] = {
get() {
return this.priceHistory[i] ? this.priceHistory[i].date : null;
},
});
}
items.forEach((item) => {
lookup[item.store + item.id] = item;
for (const getter in getters) {
Object.defineProperty(item, getter, getters[getter]);
}
item.store = intern(item.store);
item.id = intern(item.id);
item.name = intern(item.name);
item.category = intern(item.category);
item.price = intern(item.price);
for (const price of item.priceHistory) {
price.date = intern(price.date);
price.price = intern(price.price);
}
item.unit = intern(item.unit);
item.quantity = intern(item.quantity);
item.search = item.name + " " + item.quantity + " " + item.unit;
item.search = intern(item.search.toLowerCase().replace(",", "."));
const unitPriceFactor = item.unit == "g" || item.unit == "ml" ? 1000 : 1;
for (let i = 0; i < item.priceHistory.length; i++) {
const price = item.priceHistory[i];
price.unitPrice = (price.price / item.quantity) * unitPriceFactor;
}
});
items.sort((a, b) => {
if (a.store < b.store) {
return -1;
} else if (a.store > b.store) {
return 1;
}
if (a.name < b.name) {
return -1;
} else if (a.name > b.name) {
return 1;
}
return 0;
});
log(`Loader - processing ${items.length} items took ${deltaTime(start).toFixed(4)} secs`);
return { items, lookup };
}
exports.loadItems = async (settings) => {
let start = performance.now();
const compressedItemsPerStore = [];
log(`Loader - load using JSON: ${settings.useJson}`);
for (const store of STORE_KEYS) {
compressedItemsPerStore.push(
new Promise(async (resolve) => {
let start = performance.now();
try {
const useJSON = true; // settings.useJson;
if (useJSON) {
const response = await fetch(`data/latest-canonical.${store}.compressed.json`);
const json = await response.json();
log(`Loader - loading compressed items for ${store} took ${deltaTime(start)} secs`);
start = performance.now();
let items = decompress(json);
log(`Loader - Decompressing items for ${store} took ${deltaTime(start)} secs`);
resolve(items);
} else {
const response = await fetch(`data/latest-canonical.${store}.bin.json`);
const binary = await response.arrayBuffer();
log(`Loader - loading compressed binary items for ${store} took ${deltaTime(start)} secs`);
start = performance.now();
let items = decompressBinary(binary);
log(`Loader - Decompressing items for ${store} took ${deltaTime(start)} secs`);
resolve(items);
}
} catch (e) {
log(`Loader - error while loading compressed items for ${store} ${e.message}`);
resolve([]);
}
})
);
}
const items = [].concat(...(await Promise.all(compressedItemsPerStore)));
log(`Loader - loaded ${items.length} items took ${deltaTime(start).toFixed(4)} secs`);
const result = processItems(items);
log(`Loader - total loading took ${deltaTime(start).toFixed(4)} secs`);
return result;
};
onmessage = async (event) => {
const settings = event.data.settings;
const result = await exports.loadItems(settings);
postMessage(result);
};

View File

@ -1,6 +1,7 @@
const { Model } = require("./model");
const { STORE_KEYS } = require("./stores");
const { Settings } = require("./settings");
const { loadItems } = require("./items-loader");
const { log, deltaTime } = require("../js/misc");
class Items extends Model {
constructor() {
@ -29,22 +30,181 @@ class Items extends Model {
async load() {
const settings = new Settings();
if (window.Worker && false) {
const self = this;
return new Promise((resolve, reject) => {
const loader = new Worker("items-loader.js");
loader.onmessage = (event) => {
self._items = event.data.items;
self._lookup = event.data.lookup;
resolve();
};
loader.postMessage({ settings });
});
} else {
const { items, lookup } = await loadItems(settings);
this._items = items;
this._lookup = lookup;
let start = performance.now();
const compressedItemsPerStore = [];
for (const store of STORE_KEYS) {
compressedItemsPerStore.push(
new Promise(async (resolve) => {
let start = performance.now();
try {
const response = await fetch(`data/latest-canonical.${store}.compressed.json`);
const json = await response.json();
log(`Loader - loading compressed items for ${store} took ${deltaTime(start)} secs`);
start = performance.now();
let items = this.decompress(json);
log(`Loader - Decompressing items for ${store} took ${deltaTime(start)} secs`);
resolve(items);
} catch (e) {
log(`Loader - error while loading compressed items for ${store} ${e.message}`);
resolve([]);
}
})
);
}
let items = [].concat(...(await Promise.all(compressedItemsPerStore)));
log(`Loader - loaded ${items.length} items took ${deltaTime(start).toFixed(4)} secs`);
const result = this.processItems(items);
log(`Loader - total loading took ${deltaTime(start).toFixed(4)} secs`);
this._items = result.items;
this._lookup = result.lookup;
}
processItems(items) {
const lookup = {};
const start = performance.now();
const interns = new Map();
const intern = (value) => {
if (interns.has(value)) {
return interns.get(value);
} else {
interns.set(value, value);
return value;
}
};
const getters = {
unitPrice: {
get() {
const unitPriceFactor = this.unit == "g" || this.unit == "ml" ? 1000 : 1;
return (this.price / this.quantity) * unitPriceFactor;
},
},
numPrices: {
get() {
return this.priceHistory.length;
},
},
date: {
get() {
return this.priceHistory[0].date;
},
},
priceOldest: {
get() {
return this.priceHistory[this.priceHistory.length - 1].price;
},
},
dateOldest: {
get() {
return this.priceHistory[this.priceHistory.length - 1].date;
},
},
};
for (let i = 1; i < 3; i++) {
(getters[`price${i}`] = {
get() {
return this.priceHistory[i] ? this.priceHistory[i].price : 0;
},
}),
(getters[`date${i}`] = {
get() {
return this.priceHistory[i] ? this.priceHistory[i].date : null;
},
});
}
items.forEach((item) => {
lookup[item.store + item.id] = item;
for (const getter in getters) {
Object.defineProperty(item, getter, getters[getter]);
}
item.store = intern(item.store);
item.id = intern(item.id);
item.name = intern(item.name);
item.category = intern(item.category);
item.price = intern(item.price);
for (const price of item.priceHistory) {
price.date = intern(price.date);
price.price = intern(price.price);
}
item.unit = intern(item.unit);
item.quantity = intern(item.quantity);
item.search = item.name + " " + item.quantity + " " + item.unit;
item.search = intern(item.search.toLowerCase().replace(",", "."));
const unitPriceFactor = item.unit == "g" || item.unit == "ml" ? 1000 : 1;
for (let i = 0; i < item.priceHistory.length; i++) {
const price = item.priceHistory[i];
price.unitPrice = (price.price / item.quantity) * unitPriceFactor;
}
});
items.sort((a, b) => {
if (a.store < b.store) {
return -1;
} else if (a.store > b.store) {
return 1;
}
if (a.name < b.name) {
return -1;
} else if (a.name > b.name) {
return 1;
}
return 0;
});
log(`Loader - processing ${items.length} items took ${deltaTime(start).toFixed(4)} secs`);
return { items, lookup };
}
decompress(compressedItems) {
const storeLookup = compressedItems.stores;
const data = compressedItems.data;
const dates = compressedItems.dates;
const numItems = compressedItems.n;
const items = new Array(numItems);
let i = 0;
for (let l = 0; l < numItems; l++) {
const store = storeLookup[data[i++]];
const id = data[i++];
const name = data[i++];
const numPrices = data[i++];
const prices = new Array(numPrices);
for (let j = 0; j < numPrices; j++) {
const date = dates[data[i++]];
const price = data[i++];
prices[j] = {
date: date.substring(0, 4) + "-" + date.substring(4, 6) + "-" + date.substring(6, 8),
price,
};
}
const unit = data[i++];
const quantity = data[i++];
const isWeighted = data[i++] == 1;
const bio = data[i++] == 1;
const url = data[i++];
items[l] = {
store,
id,
name,
price: prices[0].price,
priceHistory: prices,
isWeighted,
unit,
quantity,
bio,
url,
};
}
return items;
}
}

View File

@ -10,7 +10,6 @@ class Settings extends Model {
STORE_KEYS.forEach((store) => {
this[store] = stores[store].defaultChecked;
});
this.useJson = true;
let settings = localStorage.getItem("settings");
if (settings) {
@ -19,8 +18,6 @@ class Settings extends Model {
this[prop] = settings[prop];
}
}
this.useJson = true;
log(`Settings - using JSON: ${this.useJson}`);
}
save() {

File diff suppressed because it is too large Load Diff