Closes #131, deduplicate items generally, so newly added store code that doesn't won't fuck up the canonical data.

This commit is contained in:
Mario Zechner 2023-07-05 21:23:23 +02:00
parent da6500b5b9
commit b95f39b809

View File

@ -226,7 +226,7 @@ exports.replay = async (rawDataDir) => {
canonicalFiles[store] = storeFiles[store].map((file) => {
console.log(`Creating canonical items for ${file}`);
const rawItems = readJSON(file);
const items = getCanonicalFor(store, rawItems, file.match(/\d{4}-\d{2}-\d{2}/)[0]);
const items = exports.dedupItems(getCanonicalFor(store, rawItems, file.match(/\d{4}-\d{2}-\d{2}/)[0]));
for (let i = 0; i < items.length; i++) {
const rawItem = rawItems[i];
const item = items[i];
@ -279,7 +279,7 @@ exports.updateData = async function (dataDir, done) {
rawItems = await stores[store].fetchData();
writeJSON(rawDataFile, rawItems, FILE_COMPRESSOR);
}
const items = getCanonicalFor(store, rawItems, today);
const items = exports.dedupItems(getCanonicalFor(store, rawItems, today));
await stores[store].initializeCategoryMapping(rawItems);
let numUncategorized = 0;
@ -304,7 +304,7 @@ exports.updateData = async function (dataDir, done) {
);
}
const items = [].concat(...(await Promise.all(storeFetchPromises)));
let items = [].concat(...(await Promise.all(storeFetchPromises)));
if (fs.existsSync(`${dataDir}/latest-canonical.json.${FILE_COMPRESSOR}`)) {
const oldItems = readJSON(`${dataDir}/latest-canonical.json.${FILE_COMPRESSOR}`);
@ -319,6 +319,7 @@ exports.updateData = async function (dataDir, done) {
}
sortItems(items);
items = exports.dedupItems(items);
writeJSON(`${dataDir}/latest-canonical.json`, items, FILE_COMPRESSOR);
if (done) done(items);
@ -347,3 +348,29 @@ exports.migrateCompression = (dataDir, fromSuffix, toSuffix, remove = true) => {
}
}
};
exports.dedupItems = (items) => {
const lookup = {};
const dedupItems = [];
let duplicates = {};
for (const item of items) {
const seenItem = lookup[item.store + item.id];
if (!seenItem) {
lookup[item.store + item.id] = item;
dedupItems.push(item);
} else {
if (seenItem.quantity != item.quantity || seenItem.unit != item.unit) {
console.log(`Item with same id but different quantity and unit: ${item.store}-${item.id} '${item.name}'`);
}
duplicates[item.store] = duplicates[item.store] ? duplicates[item.store] + 1 : 1;
}
}
console.log("Deduplicated items");
console.log(JSON.stringify(duplicates, null, 2));
return dedupItems;
};
if (require.main == module) {
const items = exports.readJSON("latest-canonical.json.br");
exports.dedupItems(items);
}