Binary format optimization 4.4mb -> 3.9mb, don't store urls were not needed, use product-id instead of code-internal for spar items,

This commit is contained in:
Mario Zechner 2023-06-17 01:11:21 +02:00
parent c9740b8660
commit c7537c341e
12 changed files with 16598 additions and 27 deletions

View File

@ -159,7 +159,36 @@ function compressBinary(items) {
buffer.push(...nameLengthBuffer, ...nameBuffer);
}
const dictionary = {};
const words = [];
let id = 0;
for (const item of items) {
const tokens = item.name.split(/\s+/);
for (const token of tokens) {
if (!dictionary[token]) {
dictionary[token] = id++;
words.push(token);
if (token.length > 256) {
console.log("Dictionary word > 256 characters: " + token);
}
}
}
}
const numWordsBuffer = Buffer.allocUnsafe(4);
numWordsBuffer.writeUint32LE(id, 0);
buffer.push(...numWordsBuffer);
for (const word of words) {
const wordBuffer = Buffer.from(word, "utf8");
buffer.push(wordBuffer.length);
buffer.push(...wordBuffer);
}
for (const item of items) {
const idBuffer = Buffer.from("" + item.id, "utf8");
buffer.push(idBuffer.length);
buffer.push(...idBuffer);
let flagsByte = 0;
if (item.bio) flagsByte |= 1;
if (item.isWeighted) flagsByte |= 2;
@ -178,10 +207,20 @@ function compressBinary(items) {
const storeByte = STORE_KEYS.findIndex((store) => store == item.store);
buffer.push(storeByte);
const nameBuffer = Buffer.from(item.name, "utf8");
const nameLengthBuffer = Buffer.allocUnsafe(2);
nameLengthBuffer.writeUInt16LE(nameBuffer.length, 0);
buffer.push(...nameLengthBuffer, ...nameBuffer);
const tokenIds = item.name.split(/\s+/).map((token) => {
const id = dictionary[token];
if (id === undefined) {
console.log(`Undefined token ${token} ${item.id} - ${item.store} - ${item.name}`);
}
return id;
});
buffer.push(tokenIds.length);
for (const tokenId of tokenIds) {
const tokenIdBuffer = Buffer.allocUnsafe(4);
tokenIdBuffer.writeUint32LE(tokenId, 0);
buffer.push(tokenIdBuffer[0], tokenIdBuffer[1], tokenIdBuffer[2]);
}
if (item.url !== undefined) {
const urlBuffer = Buffer.from(item.url, "utf8");
@ -189,8 +228,8 @@ function compressBinary(items) {
urlLengthBuffer.writeUInt16LE(urlBuffer.length, 0);
buffer.push(...urlLengthBuffer, ...urlBuffer);
} else {
const urlLengthBuffer = Buffer.allocUnsafe(2).fill(0);
buffer.push(...urlLengthBuffer);
buffer.push(0);
buffer.push(0);
}
const priceHistoryLengthBuffer = Buffer.allocUnsafe(2);

View File

@ -12,7 +12,6 @@ exports.importH43zData = (sqliteFile, outputFile) => {
const item = {
store: row.shop == "billa" ? "billa" : "spar",
id: row.product_id,
sparId: row.sparId,
name: row.name,
price: 0,
priceHistory: [],
@ -61,7 +60,7 @@ exports.mergeWithLatestCanonical = (h43zFile, latestCanonicalFile) => {
});
const currItems = analysis.readJSON(latestCanonicalFile + "." + analysis.FILE_COMPRESSOR);
const currLookup = {};
currItems.forEach((item) => (currLookup[item.store + (item.sparId ? item.sparId : item.id)] = item));
currItems.forEach((item) => (currLookup[item.store + item.id] = item));
let missingItems = {
spar: 0,
billa: 0,

File diff suppressed because it is too large Load Diff

View File

@ -16,8 +16,9 @@ class Carts extends Model {
const carts = (this._carts = val ? JSON.parse(val) : []);
// Add Momentum cart if it is not in the list of carts
if (!carts.some((cart) => cart.name === "Momentum Eigenmarken Vergleich")) {
const momentumCart = await misc.fetchJSON("data/momentum-cart.json");
if (!localStorage.getItem("updatedMomentum") || !carts.some((cart) => cart.name === "Momentum Eigenmarken Vergleich")) {
localStorage.setItem("updatedMomentum", "true");
const momentumCart = await misc.fetchJSON("data/momentum-cart.new.json");
carts.unshift(momentumCart);
}

View File

@ -20,8 +20,22 @@ function decompressBinary(buffer) {
offset += nameLength;
}
const numWords = view.getUint32(offset, true);
offset += 4;
const words = new Array(numWords);
for (let i = 0; i < numWords; i++) {
const nameLength = view.getUint8(offset++);
const nameBuffer = new Uint8Array(buffer, offset, nameLength);
words[i] = textDecoder.decode(nameBuffer);
offset += nameLength;
}
while (offset < buffer.byteLength) {
const obj = {};
const idLength = view.getUint8(offset++);
const idBuffer = new Uint8Array(buffer, offset, idLength);
obj.id = textDecoder.decode(idBuffer);
offset += idLength;
const flagsByte = view.getUint8(offset++);
obj.bio = (flagsByte & 1) !== 0;
@ -33,11 +47,17 @@ function decompressBinary(buffer) {
obj.store = stores[view.getUint8(offset++)];
const nameLength = view.getUint16(offset, true);
offset += 2;
const nameBuffer = new Uint8Array(buffer, offset, nameLength);
obj.name = textDecoder.decode(nameBuffer);
offset += nameLength;
let name = "";
const numTokens = view.getUint8(offset++);
for (let i = 0; i < numTokens; i++) {
const b1 = view.getUint8(offset++);
const b2 = view.getUint8(offset++);
const b3 = view.getUint8(offset++);
const tokenId = (b3 << 16) | (b2 << 8) | b1;
name += words[tokenId];
if (i < numTokens - 1) name += " ";
}
obj.name = name;
const urlLength = view.getUint16(offset, true);
offset += 2;

View File

@ -6,14 +6,14 @@ exports.stores = {
budgetBrands: ["clever"],
color: "yellow",
defaultChecked: true,
getUrl: (item) => `https://shop.billa.at${item.url}`,
getUrl: (item) => `https://shop.billa.at/produkte/${item.id}`,
},
spar: {
name: "Spar",
budgetBrands: ["s-budget"],
color: "green",
defaultChecked: true,
getUrl: (item) => `https://www.interspar.at/shop/lebensmittel${item.url}`,
getUrl: (item) => `https://www.interspar.at/shop/lebensmittel/p/${item.id}`,
},
hofer: {
name: "Hofer",
@ -56,7 +56,7 @@ exports.stores = {
budgetBrands: ["bravo", "echt bio!", "san fabio", "federike", "blik", "berida", "today", "ich bin österreich"],
color: "purple",
defaultChecked: true,
getUrl: (item) => `https://www.penny.at/produkte/${item.url}`,
getUrl: (item) => `https://www.penny.at/produkte/${item.id}`,
},
dmDe: {
name: "DM DE",
@ -77,7 +77,7 @@ exports.stores = {
budgetBrands: ["s-budget"],
color: "emerald",
defaultChecked: false,
getUrl: (item) => `https://www.spar.si/online/${item.url}`,
getUrl: (item) => `https://www.spar.si/online/p/${item.id}`,
},
};

View File

@ -288,7 +288,7 @@ class ItemsList extends View {
itemDom.setAttribute("x-notraverse", "true");
const elements = View.elements(itemDom);
elements.store.innerText = item.store;
elements.name.href = item.url;
elements.name.href = stores[item.store].getUrl(item);
elements.name.innerText = item.name;
elements.quantity.innerText = (item.isWeighted ? "⚖ " : "") + `${quantity} ${unit}`;
elements.price.innerText = `${Number(showUnitPrice ? unitPrice : price).toFixed(2)} ${priceUnit}`;

View File

@ -32,7 +32,6 @@ exports.getCanonical = function (item, today) {
unit,
quantity,
bio: item.data.attributes && item.data.attributes.includes("s_bio"),
url: item.data.canonicalPath,
},
units,
"billa"

View File

@ -16,7 +16,7 @@ exports.getCanonical = function (item, today) {
let unit = item.contentUnit || item.basePriceUnit;
return utils.convertUnit(
{
id: item.gtin,
id: "" + item.gtin,
name: `${item.brandName} ${item.title}`,
price: item.price.value,
priceHistory: [{ date: today, price: item.price.value }],

View File

@ -25,7 +25,6 @@ exports.getCanonical = function (item, today) {
unit,
quantity,
bio: item.name.toLowerCase().includes("bio") && !item.name.toLowerCase().includes("fabio"),
url: item.slug,
},
units,
"penny"

View File

@ -69,7 +69,6 @@ exports.getCanonical = function (item, today) {
quantity,
isWeighted,
bio: item.masterValues.biolevel === "Bio",
url: item.masterValues.url,
},
units,
"sparSi",

View File

@ -53,8 +53,7 @@ exports.getCanonical = function (item, today) {
return utils.convertUnit(
{
id: item.masterValues["code-internal"],
sparId: item.masterValues["product-number"],
id: item.masterValues["product-number"],
name: item.masterValues.title + " " + (item.masterValues["short-description"] ?? item.masterValues.name),
price,
priceHistory: [{ date: today, price }],
@ -62,7 +61,6 @@ exports.getCanonical = function (item, today) {
quantity,
isWeighted,
bio: item.masterValues.biolevel === "Bio",
url: item.masterValues.url,
},
units,
"spar",