Fix up restore(), mergePriceHistory(), sort items by store and name before writting canonical list. Closes #23

This commit is contained in:
Mario Zechner 2023-05-26 08:56:58 +02:00
parent f277d207cc
commit 9b907b52b1
2 changed files with 56 additions and 19 deletions

View File

@ -246,6 +246,7 @@ function mergePriceHistory(oldItems, items) {
for (item of items) { for (item of items) {
let oldItem = lookup[item.store + item.id]; let oldItem = lookup[item.store + item.id];
delete lookup[item.store + item.id];
let currPrice = item.priceHistory[0]; let currPrice = item.priceHistory[0];
if (oldItem) { if (oldItem) {
if (oldItem.priceHistory[0].price == currPrice.price) { if (oldItem.priceHistory[0].price == currPrice.price) {
@ -259,16 +260,43 @@ function mergePriceHistory(oldItems, items) {
} }
} }
console.log(`${Object.keys(lookup).length} not in latest list.`)
for (key of Object.keys(lookup)) {
items.push(lookup[key]);
}
sortItems(items);
console.log(`Items: ${items.length}`);
return items; return items;
} }
function sortItems(items) {
items.sort((a, b) => {
if (a.store < b.store) {
return -1;
} else if (a.store > b.store) {
return 1;
}
if (a.name < b.name) {
return -1;
} else if (a.name > b.name) {
return 1;
}
return 0;
});
}
/// Given a directory of raw data of the form `billa-$date.json` and `spar-$date.json`, constructs /// Given a directory of raw data of the form `billa-$date.json` and `spar-$date.json`, constructs
/// a canonical list of all products and their historical price data. /// a canonical list of all products and their historical price data.
exports.replay = function(rawDataDir) { exports.replay = function(rawDataDir) {
const today = currentDate(); const today = currentDate();
const files = fs.readdirSync(rawDataDir).filter( const files = fs.readdirSync(rawDataDir).filter(
file => file.indexOf("canonical") == -1 && (file.indexOf("billa-") == 0 || file.indexOf("spar") == 0 || file.indexOf("hofer") == 0) file => file.indexOf("canonical") == -1 &&
(file.indexOf("billa-") == 0 || file.indexOf("spar") == 0 || file.indexOf("hofer") == 0 || file.indexOf("dm") == 0 || file.indexOf("lidl") == 0 || file.indexOf("mpreis") == 0)
); );
const dateSort = (a, b) => { const dateSort = (a, b) => {
@ -277,7 +305,7 @@ exports.replay = function(rawDataDir) {
return dateA - dateB; return dateA - dateB;
}; };
const getFilteredFilesFor = (identifier) => files.filter(file => file.indexOf(`${identifier}-` == 0).sort(dateSort).map(file => rawDataDir + "/" + file)); const getFilteredFilesFor = (identifier) => files.filter(file => file.indexOf(`${identifier}-`) == 0).sort(dateSort).map(file => rawDataDir + "/" + file);
const sparFiles = getFilteredFilesFor("spar"); const sparFiles = getFilteredFilesFor("spar");
const sparFilesCanonical = sparFiles.map(file => sparToCanonical(readJSON(file), file.match(/\d{4}-\d{2}-\d{2}/)[0])); const sparFilesCanonical = sparFiles.map(file => sparToCanonical(readJSON(file), file.match(/\d{4}-\d{2}-\d{2}/)[0]));
@ -285,7 +313,7 @@ exports.replay = function(rawDataDir) {
const billaFilesCanonical = billaFiles.map(file => billaToCanonical(readJSON(file), file.match(/\d{4}-\d{2}-\d{2}/)[0])); const billaFilesCanonical = billaFiles.map(file => billaToCanonical(readJSON(file), file.match(/\d{4}-\d{2}-\d{2}/)[0]));
const hoferFiles = getFilteredFilesFor("hofer"); const hoferFiles = getFilteredFilesFor("hofer");
const hoferFilesCanonical = hoferFiles.map(file => hoferToCanonical(readJSON(file), file.match(/\d{4}-\d{2}-\d{2}/)[0])); const hoferFilesCanonical = hoferFiles.map(file => hoferToCanonical(readJSON(file), file.match(/\d{4}-\d{2}-\d{2}/)[0]));
const dmFiles = files.filter(file => file.indexOf("dm-") == 0).sort(dateSort).map(file => rawDataDir + "/" + file); const dmFiles = getFilteredFilesFor("dm");
const dmFilesCanonical = dmFiles.map(file => dmToCanonical(readJSON(file), file.match(/\d{4}-\d{2}-\d{2}/)[0])); const dmFilesCanonical = dmFiles.map(file => dmToCanonical(readJSON(file), file.match(/\d{4}-\d{2}-\d{2}/)[0]));
const lidlFiles = getFilteredFilesFor("lidl"); const lidlFiles = getFilteredFilesFor("lidl");
const lidlFilesCanonical = lidlFiles.map(file => lidlToCanonical(readJSON(file), file.match(/\d{4}-\d{2}-\d{2}/)[0])); const lidlFilesCanonical = lidlFiles.map(file => lidlToCanonical(readJSON(file), file.match(/\d{4}-\d{2}-\d{2}/)[0]));
@ -302,20 +330,24 @@ exports.replay = function(rawDataDir) {
mpreisFilesCanonical.reverse(); mpreisFilesCanonical.reverse();
for (let i = 0; i < len; i++) { for (let i = 0; i < len; i++) {
const canonical = []; const canonical = [];
let billa = billaFilesCanonical.pop(); const billa = billaFilesCanonical.pop();
if (billa) canonical.push(...billa); if (billa) canonical.push(...billa);
let spar = sparFilesCanonical.pop();
const spar = sparFilesCanonical.pop();
if (spar) canonical.push(...spar); if (spar) canonical.push(...spar);
let hofer = hoferFilesCanonical.pop();
const hofer = hoferFilesCanonical.pop();
if (hofer) canonical.push(...hofer); if (hofer) canonical.push(...hofer);
allFilesCanonical.push(canonical);
let dm = dmFilesCanonical.pop(); const dm = dmFilesCanonical.pop();
if (dm) canonical.push(...dmFilesCanonical.pop()); if (dm) canonical.push(...dm);
let lidl = lidlFilesCanonical.pop();
const lidl = lidlFilesCanonical.pop();
if (lidl) canonical.push(...lidl); if (lidl) canonical.push(...lidl);
allFilesCanonical.push(canonical);
let mpreis = mpreisFilesCanonical.pop(); const mpreis = mpreisFilesCanonical.pop();
if (mpreis) canonical.push(...mpreis); if (mpreis) canonical.push(...mpreis);
allFilesCanonical.push(canonical); allFilesCanonical.push(canonical);
} }
@ -405,6 +437,8 @@ exports.updateData = async function (dataDir, done) {
mergePriceHistory(oldItems, items); mergePriceHistory(oldItems, items);
console.log("Merged price history"); console.log("Merged price history");
} }
sortItems(items);
fs.writeFileSync(`${dataDir}/latest-canonical.json`, JSON.stringify(items, null, 2)); fs.writeFileSync(`${dataDir}/latest-canonical.json`, JSON.stringify(items, null, 2));
if (done) done(items); if (done) done(items);

View File

@ -1,8 +1,11 @@
const fs = require("fs"); const fs = require("fs");
const analysis = require("./analysis.js"); const analysis = require("./analysis.js");
const dataDir = process?.argv?.[2] ?? "docker/data"
console.log("Restoring data from raw data.");
(async function () { (async function () {
fs.copyFileSync("docker/data/latest-canonical.json", "docker/data/latest-canonical-old.json"); console.log("Items: " + JSON.parse(fs.readFileSync("docker/data/latest-canonical.json")).length);
await analysis.updateData("docker/data"); /*await analysis.updateData(dataDir);
fs.writeFileSync("docker/data/latest-canonical.json", JSON.stringify(analysis.replay("docker/data"), null, 2)); fs.copyFileSync(`${dataDir}/latest-canonical.json`, `${dataDir}/latest-canonical-old.json`);*/
fs.writeFileSync(`${dataDir}/latest-canonical.json`, JSON.stringify(analysis.replay(dataDir), null, 2));
console.log("Items: " + JSON.parse(fs.readFileSync(`${dataDir}/latest-canonical.json`)).length);
})(); })();