Binary format optimization 4.4mb -> 3.9mb, don't store urls were not needed, use product-id instead of code-internal for spar items,
This commit is contained in:
parent
c9740b8660
commit
c7537c341e
51
analysis.js
51
analysis.js
|
@ -159,7 +159,36 @@ function compressBinary(items) {
|
|||
buffer.push(...nameLengthBuffer, ...nameBuffer);
|
||||
}
|
||||
|
||||
const dictionary = {};
|
||||
const words = [];
|
||||
let id = 0;
|
||||
for (const item of items) {
|
||||
const tokens = item.name.split(/\s+/);
|
||||
for (const token of tokens) {
|
||||
if (!dictionary[token]) {
|
||||
dictionary[token] = id++;
|
||||
words.push(token);
|
||||
if (token.length > 256) {
|
||||
console.log("Dictionary word > 256 characters: " + token);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const numWordsBuffer = Buffer.allocUnsafe(4);
|
||||
numWordsBuffer.writeUint32LE(id, 0);
|
||||
buffer.push(...numWordsBuffer);
|
||||
for (const word of words) {
|
||||
const wordBuffer = Buffer.from(word, "utf8");
|
||||
buffer.push(wordBuffer.length);
|
||||
buffer.push(...wordBuffer);
|
||||
}
|
||||
|
||||
for (const item of items) {
|
||||
const idBuffer = Buffer.from("" + item.id, "utf8");
|
||||
buffer.push(idBuffer.length);
|
||||
buffer.push(...idBuffer);
|
||||
|
||||
let flagsByte = 0;
|
||||
if (item.bio) flagsByte |= 1;
|
||||
if (item.isWeighted) flagsByte |= 2;
|
||||
|
@ -178,10 +207,20 @@ function compressBinary(items) {
|
|||
const storeByte = STORE_KEYS.findIndex((store) => store == item.store);
|
||||
buffer.push(storeByte);
|
||||
|
||||
const nameBuffer = Buffer.from(item.name, "utf8");
|
||||
const nameLengthBuffer = Buffer.allocUnsafe(2);
|
||||
nameLengthBuffer.writeUInt16LE(nameBuffer.length, 0);
|
||||
buffer.push(...nameLengthBuffer, ...nameBuffer);
|
||||
const tokenIds = item.name.split(/\s+/).map((token) => {
|
||||
const id = dictionary[token];
|
||||
if (id === undefined) {
|
||||
console.log(`Undefined token ${token} ${item.id} - ${item.store} - ${item.name}`);
|
||||
}
|
||||
return id;
|
||||
});
|
||||
|
||||
buffer.push(tokenIds.length);
|
||||
for (const tokenId of tokenIds) {
|
||||
const tokenIdBuffer = Buffer.allocUnsafe(4);
|
||||
tokenIdBuffer.writeUint32LE(tokenId, 0);
|
||||
buffer.push(tokenIdBuffer[0], tokenIdBuffer[1], tokenIdBuffer[2]);
|
||||
}
|
||||
|
||||
if (item.url !== undefined) {
|
||||
const urlBuffer = Buffer.from(item.url, "utf8");
|
||||
|
@ -189,8 +228,8 @@ function compressBinary(items) {
|
|||
urlLengthBuffer.writeUInt16LE(urlBuffer.length, 0);
|
||||
buffer.push(...urlLengthBuffer, ...urlBuffer);
|
||||
} else {
|
||||
const urlLengthBuffer = Buffer.allocUnsafe(2).fill(0);
|
||||
buffer.push(...urlLengthBuffer);
|
||||
buffer.push(0);
|
||||
buffer.push(0);
|
||||
}
|
||||
|
||||
const priceHistoryLengthBuffer = Buffer.allocUnsafe(2);
|
||||
|
|
3
h43z.js
3
h43z.js
|
@ -12,7 +12,6 @@ exports.importH43zData = (sqliteFile, outputFile) => {
|
|||
const item = {
|
||||
store: row.shop == "billa" ? "billa" : "spar",
|
||||
id: row.product_id,
|
||||
sparId: row.sparId,
|
||||
name: row.name,
|
||||
price: 0,
|
||||
priceHistory: [],
|
||||
|
@ -61,7 +60,7 @@ exports.mergeWithLatestCanonical = (h43zFile, latestCanonicalFile) => {
|
|||
});
|
||||
const currItems = analysis.readJSON(latestCanonicalFile + "." + analysis.FILE_COMPRESSOR);
|
||||
const currLookup = {};
|
||||
currItems.forEach((item) => (currLookup[item.store + (item.sparId ? item.sparId : item.id)] = item));
|
||||
currItems.forEach((item) => (currLookup[item.store + item.id] = item));
|
||||
let missingItems = {
|
||||
spar: 0,
|
||||
billa: 0,
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -16,8 +16,9 @@ class Carts extends Model {
|
|||
const carts = (this._carts = val ? JSON.parse(val) : []);
|
||||
|
||||
// Add Momentum cart if it is not in the list of carts
|
||||
if (!carts.some((cart) => cart.name === "Momentum Eigenmarken Vergleich")) {
|
||||
const momentumCart = await misc.fetchJSON("data/momentum-cart.json");
|
||||
if (!localStorage.getItem("updatedMomentum") || !carts.some((cart) => cart.name === "Momentum Eigenmarken Vergleich")) {
|
||||
localStorage.setItem("updatedMomentum", "true");
|
||||
const momentumCart = await misc.fetchJSON("data/momentum-cart.new.json");
|
||||
carts.unshift(momentumCart);
|
||||
}
|
||||
|
||||
|
|
|
@ -20,8 +20,22 @@ function decompressBinary(buffer) {
|
|||
offset += nameLength;
|
||||
}
|
||||
|
||||
const numWords = view.getUint32(offset, true);
|
||||
offset += 4;
|
||||
const words = new Array(numWords);
|
||||
for (let i = 0; i < numWords; i++) {
|
||||
const nameLength = view.getUint8(offset++);
|
||||
const nameBuffer = new Uint8Array(buffer, offset, nameLength);
|
||||
words[i] = textDecoder.decode(nameBuffer);
|
||||
offset += nameLength;
|
||||
}
|
||||
|
||||
while (offset < buffer.byteLength) {
|
||||
const obj = {};
|
||||
const idLength = view.getUint8(offset++);
|
||||
const idBuffer = new Uint8Array(buffer, offset, idLength);
|
||||
obj.id = textDecoder.decode(idBuffer);
|
||||
offset += idLength;
|
||||
|
||||
const flagsByte = view.getUint8(offset++);
|
||||
obj.bio = (flagsByte & 1) !== 0;
|
||||
|
@ -33,11 +47,17 @@ function decompressBinary(buffer) {
|
|||
|
||||
obj.store = stores[view.getUint8(offset++)];
|
||||
|
||||
const nameLength = view.getUint16(offset, true);
|
||||
offset += 2;
|
||||
const nameBuffer = new Uint8Array(buffer, offset, nameLength);
|
||||
obj.name = textDecoder.decode(nameBuffer);
|
||||
offset += nameLength;
|
||||
let name = "";
|
||||
const numTokens = view.getUint8(offset++);
|
||||
for (let i = 0; i < numTokens; i++) {
|
||||
const b1 = view.getUint8(offset++);
|
||||
const b2 = view.getUint8(offset++);
|
||||
const b3 = view.getUint8(offset++);
|
||||
const tokenId = (b3 << 16) | (b2 << 8) | b1;
|
||||
name += words[tokenId];
|
||||
if (i < numTokens - 1) name += " ";
|
||||
}
|
||||
obj.name = name;
|
||||
|
||||
const urlLength = view.getUint16(offset, true);
|
||||
offset += 2;
|
||||
|
|
|
@ -6,14 +6,14 @@ exports.stores = {
|
|||
budgetBrands: ["clever"],
|
||||
color: "yellow",
|
||||
defaultChecked: true,
|
||||
getUrl: (item) => `https://shop.billa.at${item.url}`,
|
||||
getUrl: (item) => `https://shop.billa.at/produkte/${item.id}`,
|
||||
},
|
||||
spar: {
|
||||
name: "Spar",
|
||||
budgetBrands: ["s-budget"],
|
||||
color: "green",
|
||||
defaultChecked: true,
|
||||
getUrl: (item) => `https://www.interspar.at/shop/lebensmittel${item.url}`,
|
||||
getUrl: (item) => `https://www.interspar.at/shop/lebensmittel/p/${item.id}`,
|
||||
},
|
||||
hofer: {
|
||||
name: "Hofer",
|
||||
|
@ -56,7 +56,7 @@ exports.stores = {
|
|||
budgetBrands: ["bravo", "echt bio!", "san fabio", "federike", "blik", "berida", "today", "ich bin österreich"],
|
||||
color: "purple",
|
||||
defaultChecked: true,
|
||||
getUrl: (item) => `https://www.penny.at/produkte/${item.url}`,
|
||||
getUrl: (item) => `https://www.penny.at/produkte/${item.id}`,
|
||||
},
|
||||
dmDe: {
|
||||
name: "DM DE",
|
||||
|
@ -77,7 +77,7 @@ exports.stores = {
|
|||
budgetBrands: ["s-budget"],
|
||||
color: "emerald",
|
||||
defaultChecked: false,
|
||||
getUrl: (item) => `https://www.spar.si/online/${item.url}`,
|
||||
getUrl: (item) => `https://www.spar.si/online/p/${item.id}`,
|
||||
},
|
||||
};
|
||||
|
||||
|
|
|
@ -288,7 +288,7 @@ class ItemsList extends View {
|
|||
itemDom.setAttribute("x-notraverse", "true");
|
||||
const elements = View.elements(itemDom);
|
||||
elements.store.innerText = item.store;
|
||||
elements.name.href = item.url;
|
||||
elements.name.href = stores[item.store].getUrl(item);
|
||||
elements.name.innerText = item.name;
|
||||
elements.quantity.innerText = (item.isWeighted ? "⚖ " : "") + `${quantity} ${unit}`;
|
||||
elements.price.innerText = `€ ${Number(showUnitPrice ? unitPrice : price).toFixed(2)} ${priceUnit}`;
|
||||
|
|
|
@ -32,7 +32,6 @@ exports.getCanonical = function (item, today) {
|
|||
unit,
|
||||
quantity,
|
||||
bio: item.data.attributes && item.data.attributes.includes("s_bio"),
|
||||
url: item.data.canonicalPath,
|
||||
},
|
||||
units,
|
||||
"billa"
|
||||
|
|
|
@ -16,7 +16,7 @@ exports.getCanonical = function (item, today) {
|
|||
let unit = item.contentUnit || item.basePriceUnit;
|
||||
return utils.convertUnit(
|
||||
{
|
||||
id: item.gtin,
|
||||
id: "" + item.gtin,
|
||||
name: `${item.brandName} ${item.title}`,
|
||||
price: item.price.value,
|
||||
priceHistory: [{ date: today, price: item.price.value }],
|
||||
|
|
|
@ -25,7 +25,6 @@ exports.getCanonical = function (item, today) {
|
|||
unit,
|
||||
quantity,
|
||||
bio: item.name.toLowerCase().includes("bio") && !item.name.toLowerCase().includes("fabio"),
|
||||
url: item.slug,
|
||||
},
|
||||
units,
|
||||
"penny"
|
||||
|
|
|
@ -69,7 +69,6 @@ exports.getCanonical = function (item, today) {
|
|||
quantity,
|
||||
isWeighted,
|
||||
bio: item.masterValues.biolevel === "Bio",
|
||||
url: item.masterValues.url,
|
||||
},
|
||||
units,
|
||||
"sparSi",
|
||||
|
|
|
@ -53,8 +53,7 @@ exports.getCanonical = function (item, today) {
|
|||
|
||||
return utils.convertUnit(
|
||||
{
|
||||
id: item.masterValues["code-internal"],
|
||||
sparId: item.masterValues["product-number"],
|
||||
id: item.masterValues["product-number"],
|
||||
name: item.masterValues.title + " " + (item.masterValues["short-description"] ?? item.masterValues.name),
|
||||
price,
|
||||
priceHistory: [{ date: today, price }],
|
||||
|
@ -62,7 +61,6 @@ exports.getCanonical = function (item, today) {
|
|||
quantity,
|
||||
isWeighted,
|
||||
bio: item.masterValues.biolevel === "Bio",
|
||||
url: item.masterValues.url,
|
||||
},
|
||||
units,
|
||||
"spar",
|
||||
|
|
Loading…
Reference in New Issue