2023-06-14 00:11:45 +02:00
|
|
|
const fs = require("fs");
|
|
|
|
const analysis = require("./analysis");
|
|
|
|
const Database = require("better-sqlite3");
|
|
|
|
|
2023-06-14 17:07:02 +02:00
|
|
|
exports.importH43zData = (sqliteFile, outputFile) => {
|
|
|
|
const db = new Database(sqliteFile, { verbose: console.log });
|
|
|
|
let items = [];
|
|
|
|
const lookup = {};
|
|
|
|
|
2023-06-14 00:11:45 +02:00
|
|
|
let stmt = db.prepare("select * from product");
|
|
|
|
for (const row of stmt.iterate()) {
|
|
|
|
const item = {
|
2023-06-14 01:52:35 +02:00
|
|
|
store: row.shop == "billa" ? "billa" : "spar",
|
2023-06-14 00:11:45 +02:00
|
|
|
id: row.product_id,
|
|
|
|
name: row.name,
|
|
|
|
price: 0,
|
|
|
|
priceHistory: [],
|
|
|
|
isWeighted: false,
|
|
|
|
unit: "g",
|
|
|
|
quantity: 0,
|
|
|
|
bio: false,
|
|
|
|
url: "",
|
|
|
|
};
|
|
|
|
items.push(item);
|
|
|
|
lookup[row.product_id] = item;
|
|
|
|
}
|
|
|
|
console.log(items.length + " products");
|
|
|
|
|
2023-06-14 11:51:07 +02:00
|
|
|
stmt = db.prepare("select * from pricehistory order by date asc");
|
2023-06-14 00:11:45 +02:00
|
|
|
let i = 0;
|
|
|
|
let unknown = 0;
|
|
|
|
for (const row of stmt.iterate()) {
|
|
|
|
i++;
|
|
|
|
const item = lookup[row.product_id];
|
|
|
|
if (!item) {
|
|
|
|
unknown++;
|
|
|
|
} else {
|
|
|
|
if (item.priceHistory.length == 0 || item.priceHistory[item.priceHistory.length - 1].price != row.price)
|
|
|
|
item.priceHistory.push({ date: row.date, price: row.price });
|
|
|
|
}
|
|
|
|
if (i % 1000 == 0) {
|
|
|
|
console.log("Processed " + i + " prices, " + unknown + " unknown");
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
items = items.filter((item) => item.priceHistory.length > 0);
|
|
|
|
items.forEach((item) => {
|
2023-06-14 11:51:07 +02:00
|
|
|
item.priceHistory.reverse();
|
2023-06-14 00:11:45 +02:00
|
|
|
item.price = item.priceHistory[0];
|
|
|
|
});
|
2023-06-14 17:07:02 +02:00
|
|
|
analysis.writeJSON(outputFile, items);
|
2023-06-14 01:52:35 +02:00
|
|
|
};
|
2023-06-14 11:51:07 +02:00
|
|
|
|
2023-06-14 17:07:02 +02:00
|
|
|
exports.mergeWithLatestCanonical = (h43zFile, latestCanonicalFile) => {
|
|
|
|
const items = analysis.readJSON(h43zFile);
|
|
|
|
const lookup = {};
|
|
|
|
items.forEach((item) => {
|
|
|
|
// item.priceHistory = item.priceHistory.filter(price => price.date > "2020-01-01")
|
|
|
|
lookup[item.id] = item;
|
|
|
|
});
|
|
|
|
const currItems = analysis.readJSON(latestCanonicalFile + "." + analysis.FILE_COMPRESSOR);
|
|
|
|
const currLookup = {};
|
2023-06-17 01:11:21 +02:00
|
|
|
currItems.forEach((item) => (currLookup[item.store + item.id] = item));
|
2023-06-14 17:07:02 +02:00
|
|
|
let missingItems = {
|
|
|
|
spar: 0,
|
|
|
|
billa: 0,
|
|
|
|
};
|
|
|
|
let foundItems = {
|
|
|
|
spar: 0,
|
|
|
|
billa: 0,
|
|
|
|
};
|
|
|
|
for (item of items) {
|
|
|
|
const i = lookup[item.id];
|
|
|
|
const currItem = currLookup[item.store + item.id];
|
|
|
|
if (!currItem) {
|
|
|
|
missingItems[item.store]++;
|
|
|
|
} else {
|
|
|
|
foundItems[item.store]++;
|
|
|
|
const oldHistory = [...currItem.priceHistory];
|
|
|
|
currItem.priceHistory.push(...item.priceHistory);
|
|
|
|
currItem.priceHistory.sort((a, b) => new Date(a.date) - new Date(b.date));
|
|
|
|
|
|
|
|
const mergedHistory = [];
|
|
|
|
currItem.priceHistory.forEach((price) => {
|
|
|
|
if (mergedHistory.length == 0) {
|
|
|
|
mergedHistory.push(price);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
if (mergedHistory[mergedHistory.length - 1].price != price.price) {
|
|
|
|
mergedHistory.push(price);
|
|
|
|
}
|
|
|
|
});
|
|
|
|
mergedHistory.reverse();
|
|
|
|
currItem.priceHistory = mergedHistory;
|
|
|
|
}
|
2023-06-14 01:52:35 +02:00
|
|
|
}
|
2023-06-14 17:07:02 +02:00
|
|
|
console.log(JSON.stringify(missingItems, null, 2));
|
|
|
|
console.log(JSON.stringify(foundItems, null, 2));
|
|
|
|
analysis.writeJSON(latestCanonicalFile, currItems, analysis.FILE_COMPRESSOR);
|
|
|
|
};
|
|
|
|
|
|
|
|
if (require.main === module) {
|
|
|
|
exports.importH43zData("/Users/badlogic/Downloads/shops.db", "h43z.json");
|
|
|
|
exports.mergeWithLatestCanonical("h43z.json", "data/latest-canonical.json");
|
2023-06-14 01:52:35 +02:00
|
|
|
}
|