From 3638b80c0297e312b463e7eff304f38c1ad4b6f2 Mon Sep 17 00:00:00 2001 From: Mario Zechner Date: Sat, 3 Jun 2023 00:01:26 +0200 Subject: [PATCH] Refactor migration, switch from gzip to brotli compression. See #44 See migration.js if you want to manually convert raw data files between formats. --- analysis.js | 117 +++++++++++++++++++++++++++++----------------------- index.js | 7 ++-- migrate.js | 19 +++++++++ pages.js | 2 +- restore.js | 9 ++-- 5 files changed, 95 insertions(+), 59 deletions(-) create mode 100644 migrate.js diff --git a/analysis.js b/analysis.js index da4fadd..2890321 100644 --- a/analysis.js +++ b/analysis.js @@ -1,11 +1,39 @@ const fs = require("fs"); const zlib = require("zlib"); const stores = require("./stores"); +const { FILE } = require("dns"); const STORE_KEYS = Object.keys(stores); - exports.STORE_KEYS = STORE_KEYS; +const BROTLI_OPTIONS = { + params: { + [zlib.constants.BROTLI_PARAM_MODE]: zlib.constants.BROTLI_MODE_GENERIC, + [zlib.constants.BROTLI_PARAM_QUALITY]: 9, + [zlib.constants.BROTLI_PARAM_LGWIN]: 22, + }, +}; + +const FILE_COMPRESSOR = "br"; +exports.FILE_COMPRESSOR = FILE_COMPRESSOR; + +function readJSON(file) { + let data = fs.readFileSync(file); + if (file.endsWith(".gz")) data = zlib.gunzipSync(data); + if (file.endsWith(".br")) data = zlib.brotliDecompressSync(data); + return JSON.parse(data); +} +exports.readJSON = readJSON; + +function writeJSON(file, data, fileCompressor = false, spacer = 2, compressData = false) { + if (compressData) data = compress(data); + data = JSON.stringify(data, null, spacer); + if (fileCompressor == "gz") data = zlib.gzipSync(data); + if (fileCompressor == "br") data = zlib.brotliCompressSync(data, BROTLI_OPTIONS); + fs.writeFileSync(`${file}${fileCompressor ? "." + fileCompressor : ""}`, data); +} +exports.writeJSON = writeJSON; + function currentDate() { const currentDate = new Date(); const year = currentDate.getFullYear(); @@ -14,23 +42,6 @@ function currentDate() { return `${year}-${month}-${day}`; } -function readJSON(file) { - let data = fs.readFileSync(file) - if (file.endsWith(".gz")) data = zlib.gunzipSync(data); - return JSON.parse(data); -} -exports.readJSON = readJSON; - -function writeJSON(file, data, gzipped = false, spacer = 2, compressData = false) { - if (compressData) { - data = compress(data); - } - data = JSON.stringify(data, null, spacer); - if (gzipped) data = zlib.gzipSync(data); - fs.writeFileSync(`${file}${gzipped ? ".gz" : ""}`, data); -} -exports.writeJSON = writeJSON; - function getCanonicalFor(store, rawItems, today) { console.log(`Converting ${store}-${today} to canonical.`); const canonicalItems = []; @@ -151,7 +162,7 @@ exports.replay = function (rawDataDir) { for (const store of STORE_KEYS) { storeFiles[store] = getFilteredFilesFor(store); - canonicalFiles[store] = storeFiles[store].map(file => getCanonicalFor(store, readJSON(file), file.match(/\d{4}-\d{2}-\d{2}/)[0])); + canonicalFiles[store] = storeFiles[store].map((file) => getCanonicalFor(store, readJSON(file), file.match(/\d{4}-\d{2}-\d{2}/)[0])); canonicalFiles[store].reverse(); } @@ -184,53 +195,57 @@ exports.updateData = async function (dataDir, done) { console.log("Fetching data for date: " + today); const storeFetchPromises = []; for (const store of STORE_KEYS) { - storeFetchPromises.push(new Promise(async (resolve) => { - const start = performance.now(); - try { - const storeItems = await stores[store].fetchData(); - writeJSON(`${dataDir}/${store}-${today}.json`, storeItems, true); - const storeItemsCanonical = getCanonicalFor(store, storeItems, today); - console.log(`Fetched ${store.toUpperCase()} data, took ${(performance.now() - start) / 1000} seconds`); - resolve(storeItemsCanonical) - } catch (e) { - console.error(`Error while fetching data from ${store}, continuing after ${(performance.now() - start) / 1000} seconds...`, e); - resolve([]) - } - })); + storeFetchPromises.push( + new Promise(async (resolve) => { + const start = performance.now(); + try { + const storeItems = await stores[store].fetchData(); + writeJSON(`${dataDir}/${store}-${today}.json`, storeItems, FILE_COMPRESSOR); + const storeItemsCanonical = getCanonicalFor(store, storeItems, today); + console.log(`Fetched ${store.toUpperCase()} data, took ${(performance.now() - start) / 1000} seconds`); + resolve(storeItemsCanonical); + } catch (e) { + console.error(`Error while fetching data from ${store}, continuing after ${(performance.now() - start) / 1000} seconds...`, e); + resolve([]); + } + }) + ); } const items = [].concat(...(await Promise.all(storeFetchPromises))); - if (fs.existsSync(`${dataDir}/latest-canonical.json.gz`)) { - const oldItems = readJSON(`${dataDir}/latest-canonical.json.gz`); + if (fs.existsSync(`${dataDir}/latest-canonical.json.${FILE_COMPRESSOR}`)) { + const oldItems = readJSON(`${dataDir}/latest-canonical.json.${FILE_COMPRESSOR}`); mergePriceHistory(oldItems, items); console.log("Merged price history"); } sortItems(items); - writeJSON(`${dataDir}/latest-canonical.json`, items, true); + writeJSON(`${dataDir}/latest-canonical.json`, items, FILE_COMPRESSOR); if (done) done(items); return items; }; -exports.migrateToGzip = (dataDir) => { - if (fs.existsSync(`${dataDir}/latest-canonical.json`)) { - console.log("Migrating old .json data to .json.gz"); - const files = fs.readdirSync(dataDir).filter( - file => file.indexOf("canonical") == -1 && - STORE_KEYS.some(store => file.indexOf(`${store}-`) == 0) +exports.migrateCompression = (dataDir, fromSuffix, toSuffix, remove = true) => { + console.log(`Migrating ${fromSuffix} data to ${toSuffix}`); + let fileCompressor = toSuffix == ".json" ? false : toSuffix.replace(".json.", ""); + const files = fs + .readdirSync(dataDir) + .filter( + (file) => (file.startsWith("latest-canonical") || STORE_KEYS.some((store) => file.startsWith(`${store}-`))) && file.endsWith(fromSuffix) ); - files.push(`latest-canonical.json`); - for(const file of files) { - // skip if already gzipped - if (file.indexOf(".gz") != -1) continue; - - const path = `${dataDir}/${file}` - console.log(`${path} -> ${path}.gz`); - const data = readJSON(path); - writeJSON(path, data, true); + for (const file of files) { + const fromPath = `${dataDir}/${file}`; + const toPath = fromPath.substring(0, fromPath.length - fromSuffix.length) + toSuffix; + console.log(`${fromPath} -> ${toPath}`); + const data = readJSON(fromPath); + writeJSON(toPath.substring(0, toPath.lastIndexOf(".json") + 5), data, fileCompressor); + } + if (remove) { + for (const file of files) { + const path = `${dataDir}/${file}`; fs.unlinkSync(path); } } -} +}; diff --git a/index.js b/index.js index 1358faf..85cd8d6 100644 --- a/index.js +++ b/index.js @@ -2,7 +2,7 @@ const fs = require("fs"); const analysis = require("./analysis"); function copyItemsToSite(dataDir) { - const items = analysis.readJSON(`${dataDir}/latest-canonical.json.gz`); + const items = analysis.readJSON(`${dataDir}/latest-canonical.json.${analysis.FILE_COMPRESSOR}`); for (const store of analysis.STORE_KEYS) { const storeItems = items.filter(item => item.store === store); analysis.writeJSON(`site/latest-canonical.${store}.compressed.json`, storeItems, false, 0, true); @@ -37,9 +37,10 @@ function scheduleFunction(hour, minute, second, func) { fs.mkdirSync(dataDir); } - analysis.migrateToGzip(dataDir); + analysis.migrateCompression(dataDir, ".json", ".json.br"); + analysis.migrateCompression(dataDir, ".json.gz", ".json.br"); - if (fs.existsSync(`${dataDir}/latest-canonical.json.gz`)) { + if (fs.existsSync(`${dataDir}/latest-canonical.json.${analysis.FILE_COMPRESSOR}`)) { copyItemsToSite(dataDir); analysis.updateData(dataDir, (_newItems) => { copyItemsToSite(dataDir); diff --git a/migrate.js b/migrate.js new file mode 100644 index 0000000..486216a --- /dev/null +++ b/migrate.js @@ -0,0 +1,19 @@ +const fs = require("fs"); +const path = require("path"); +const analysis = require("./analysis"); +const [, , dataDir, fromSuffix, toSuffix] = process.argv; + +const errorExit = (message) => { + console.log(message); + console.log(); + console.log("Usage: node migrate.js "); + console.log(); + console.log(`E.g.: node migrate.js data ".json" ".json.gz`); + process.exit(1); +}; + +if (!fs.existsSync(dataDir) || !fs.lstatSync(dataDir).isDirectory()) errorExit("Error: The specified data directory does not exist."); +if (!fromSuffix || typeof fromSuffix !== "string") errorExit('Error: The "from-suffix" parameter must be a non-empty string.'); +if (!toSuffix || typeof toSuffix !== "string") errorExit('Error: The "to-suffix" parameter must be a non-empty string.'); + +analysis.migrateCompression(dataDir, fromSuffix, toSuffix); \ No newline at end of file diff --git a/pages.js b/pages.js index 5820c5d..26f47c2 100644 --- a/pages.js +++ b/pages.js @@ -17,7 +17,7 @@ if (!fs.existsSync(dataDir)) { (async function () { try { await analysis.updateData(dataDir); - const items = analysis.readJSON(`${dataDir}/latest-canonical.json.gz`); + const items = analysis.readJSON(`${dataDir}/latest-canonical.json.${analysis.FILE_COMPRESSOR}`); for (const store of analysis.STORE_KEYS) { const storeItems = items.filter(item => item.store === store); analysis.writeJSON(`${dataDir}/latest-canonical.${store}.compressed.json`, false, storeItems, 0, true); diff --git a/restore.js b/restore.js index 024c834..0ad3144 100644 --- a/restore.js +++ b/restore.js @@ -1,9 +1,10 @@ const analysis = require("./analysis.js"); -const dataDir = process?.argv?.[2] ?? "docker/data"; +const dataDir = process?.argv?.[2] ?? "data"; console.log("Restoring data from raw data."); (async function () { - analysis.migrateToGzip(dataDir); + analysis.migrateCompression(dataDir, ".json", ".json.br", false); + analysis.migrateCompression(dataDir, ".json.gz", ".json.br"); const items = analysis.replay(dataDir); - analysis.writeJSON(`${dataDir}/latest-canonical.json`, items, true); - console.log(`Wrote ${analysis.readJSON(`${dataDir}/latest-canonical.json.gz`).length} items to ${dataDir}/latest-canonical.json.gz`); + analysis.writeJSON(`${dataDir}/latest-canonical.json`, items, analysis.FILE_COMPRESSOR); + console.log(`Wrote ${analysis.readJSON(`${dataDir}/latest-canonical.json.${FILE_COMPRESSOR}`).length} items to ${dataDir}/latest-canonical.json.${FILE_COMPRESSOR}`); })();