mirror of
https://github.com/badlogic/heissepreise.git
synced 2024-06-03 15:25:31 +02:00
Refactor migration, switch from gzip to brotli compression. See #44
See migration.js if you want to manually convert raw data files between formats.
This commit is contained in:
parent
573a6e3648
commit
3638b80c02
117
analysis.js
117
analysis.js
|
@ -1,11 +1,39 @@
|
||||||
const fs = require("fs");
|
const fs = require("fs");
|
||||||
const zlib = require("zlib");
|
const zlib = require("zlib");
|
||||||
const stores = require("./stores");
|
const stores = require("./stores");
|
||||||
|
const { FILE } = require("dns");
|
||||||
|
|
||||||
const STORE_KEYS = Object.keys(stores);
|
const STORE_KEYS = Object.keys(stores);
|
||||||
|
|
||||||
exports.STORE_KEYS = STORE_KEYS;
|
exports.STORE_KEYS = STORE_KEYS;
|
||||||
|
|
||||||
|
const BROTLI_OPTIONS = {
|
||||||
|
params: {
|
||||||
|
[zlib.constants.BROTLI_PARAM_MODE]: zlib.constants.BROTLI_MODE_GENERIC,
|
||||||
|
[zlib.constants.BROTLI_PARAM_QUALITY]: 9,
|
||||||
|
[zlib.constants.BROTLI_PARAM_LGWIN]: 22,
|
||||||
|
},
|
||||||
|
};
|
||||||
|
|
||||||
|
const FILE_COMPRESSOR = "br";
|
||||||
|
exports.FILE_COMPRESSOR = FILE_COMPRESSOR;
|
||||||
|
|
||||||
|
function readJSON(file) {
|
||||||
|
let data = fs.readFileSync(file);
|
||||||
|
if (file.endsWith(".gz")) data = zlib.gunzipSync(data);
|
||||||
|
if (file.endsWith(".br")) data = zlib.brotliDecompressSync(data);
|
||||||
|
return JSON.parse(data);
|
||||||
|
}
|
||||||
|
exports.readJSON = readJSON;
|
||||||
|
|
||||||
|
function writeJSON(file, data, fileCompressor = false, spacer = 2, compressData = false) {
|
||||||
|
if (compressData) data = compress(data);
|
||||||
|
data = JSON.stringify(data, null, spacer);
|
||||||
|
if (fileCompressor == "gz") data = zlib.gzipSync(data);
|
||||||
|
if (fileCompressor == "br") data = zlib.brotliCompressSync(data, BROTLI_OPTIONS);
|
||||||
|
fs.writeFileSync(`${file}${fileCompressor ? "." + fileCompressor : ""}`, data);
|
||||||
|
}
|
||||||
|
exports.writeJSON = writeJSON;
|
||||||
|
|
||||||
function currentDate() {
|
function currentDate() {
|
||||||
const currentDate = new Date();
|
const currentDate = new Date();
|
||||||
const year = currentDate.getFullYear();
|
const year = currentDate.getFullYear();
|
||||||
|
@ -14,23 +42,6 @@ function currentDate() {
|
||||||
return `${year}-${month}-${day}`;
|
return `${year}-${month}-${day}`;
|
||||||
}
|
}
|
||||||
|
|
||||||
function readJSON(file) {
|
|
||||||
let data = fs.readFileSync(file)
|
|
||||||
if (file.endsWith(".gz")) data = zlib.gunzipSync(data);
|
|
||||||
return JSON.parse(data);
|
|
||||||
}
|
|
||||||
exports.readJSON = readJSON;
|
|
||||||
|
|
||||||
function writeJSON(file, data, gzipped = false, spacer = 2, compressData = false) {
|
|
||||||
if (compressData) {
|
|
||||||
data = compress(data);
|
|
||||||
}
|
|
||||||
data = JSON.stringify(data, null, spacer);
|
|
||||||
if (gzipped) data = zlib.gzipSync(data);
|
|
||||||
fs.writeFileSync(`${file}${gzipped ? ".gz" : ""}`, data);
|
|
||||||
}
|
|
||||||
exports.writeJSON = writeJSON;
|
|
||||||
|
|
||||||
function getCanonicalFor(store, rawItems, today) {
|
function getCanonicalFor(store, rawItems, today) {
|
||||||
console.log(`Converting ${store}-${today} to canonical.`);
|
console.log(`Converting ${store}-${today} to canonical.`);
|
||||||
const canonicalItems = [];
|
const canonicalItems = [];
|
||||||
|
@ -151,7 +162,7 @@ exports.replay = function (rawDataDir) {
|
||||||
|
|
||||||
for (const store of STORE_KEYS) {
|
for (const store of STORE_KEYS) {
|
||||||
storeFiles[store] = getFilteredFilesFor(store);
|
storeFiles[store] = getFilteredFilesFor(store);
|
||||||
canonicalFiles[store] = storeFiles[store].map(file => getCanonicalFor(store, readJSON(file), file.match(/\d{4}-\d{2}-\d{2}/)[0]));
|
canonicalFiles[store] = storeFiles[store].map((file) => getCanonicalFor(store, readJSON(file), file.match(/\d{4}-\d{2}-\d{2}/)[0]));
|
||||||
canonicalFiles[store].reverse();
|
canonicalFiles[store].reverse();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -184,53 +195,57 @@ exports.updateData = async function (dataDir, done) {
|
||||||
console.log("Fetching data for date: " + today);
|
console.log("Fetching data for date: " + today);
|
||||||
const storeFetchPromises = [];
|
const storeFetchPromises = [];
|
||||||
for (const store of STORE_KEYS) {
|
for (const store of STORE_KEYS) {
|
||||||
storeFetchPromises.push(new Promise(async (resolve) => {
|
storeFetchPromises.push(
|
||||||
const start = performance.now();
|
new Promise(async (resolve) => {
|
||||||
try {
|
const start = performance.now();
|
||||||
const storeItems = await stores[store].fetchData();
|
try {
|
||||||
writeJSON(`${dataDir}/${store}-${today}.json`, storeItems, true);
|
const storeItems = await stores[store].fetchData();
|
||||||
const storeItemsCanonical = getCanonicalFor(store, storeItems, today);
|
writeJSON(`${dataDir}/${store}-${today}.json`, storeItems, FILE_COMPRESSOR);
|
||||||
console.log(`Fetched ${store.toUpperCase()} data, took ${(performance.now() - start) / 1000} seconds`);
|
const storeItemsCanonical = getCanonicalFor(store, storeItems, today);
|
||||||
resolve(storeItemsCanonical)
|
console.log(`Fetched ${store.toUpperCase()} data, took ${(performance.now() - start) / 1000} seconds`);
|
||||||
} catch (e) {
|
resolve(storeItemsCanonical);
|
||||||
console.error(`Error while fetching data from ${store}, continuing after ${(performance.now() - start) / 1000} seconds...`, e);
|
} catch (e) {
|
||||||
resolve([])
|
console.error(`Error while fetching data from ${store}, continuing after ${(performance.now() - start) / 1000} seconds...`, e);
|
||||||
}
|
resolve([]);
|
||||||
}));
|
}
|
||||||
|
})
|
||||||
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
const items = [].concat(...(await Promise.all(storeFetchPromises)));
|
const items = [].concat(...(await Promise.all(storeFetchPromises)));
|
||||||
|
|
||||||
if (fs.existsSync(`${dataDir}/latest-canonical.json.gz`)) {
|
if (fs.existsSync(`${dataDir}/latest-canonical.json.${FILE_COMPRESSOR}`)) {
|
||||||
const oldItems = readJSON(`${dataDir}/latest-canonical.json.gz`);
|
const oldItems = readJSON(`${dataDir}/latest-canonical.json.${FILE_COMPRESSOR}`);
|
||||||
mergePriceHistory(oldItems, items);
|
mergePriceHistory(oldItems, items);
|
||||||
console.log("Merged price history");
|
console.log("Merged price history");
|
||||||
}
|
}
|
||||||
|
|
||||||
sortItems(items);
|
sortItems(items);
|
||||||
writeJSON(`${dataDir}/latest-canonical.json`, items, true);
|
writeJSON(`${dataDir}/latest-canonical.json`, items, FILE_COMPRESSOR);
|
||||||
|
|
||||||
if (done) done(items);
|
if (done) done(items);
|
||||||
return items;
|
return items;
|
||||||
};
|
};
|
||||||
|
|
||||||
exports.migrateToGzip = (dataDir) => {
|
exports.migrateCompression = (dataDir, fromSuffix, toSuffix, remove = true) => {
|
||||||
if (fs.existsSync(`${dataDir}/latest-canonical.json`)) {
|
console.log(`Migrating ${fromSuffix} data to ${toSuffix}`);
|
||||||
console.log("Migrating old .json data to .json.gz");
|
let fileCompressor = toSuffix == ".json" ? false : toSuffix.replace(".json.", "");
|
||||||
const files = fs.readdirSync(dataDir).filter(
|
const files = fs
|
||||||
file => file.indexOf("canonical") == -1 &&
|
.readdirSync(dataDir)
|
||||||
STORE_KEYS.some(store => file.indexOf(`${store}-`) == 0)
|
.filter(
|
||||||
|
(file) => (file.startsWith("latest-canonical") || STORE_KEYS.some((store) => file.startsWith(`${store}-`))) && file.endsWith(fromSuffix)
|
||||||
);
|
);
|
||||||
files.push(`latest-canonical.json`);
|
for (const file of files) {
|
||||||
for(const file of files) {
|
const fromPath = `${dataDir}/${file}`;
|
||||||
// skip if already gzipped
|
const toPath = fromPath.substring(0, fromPath.length - fromSuffix.length) + toSuffix;
|
||||||
if (file.indexOf(".gz") != -1) continue;
|
console.log(`${fromPath} -> ${toPath}`);
|
||||||
|
const data = readJSON(fromPath);
|
||||||
const path = `${dataDir}/${file}`
|
writeJSON(toPath.substring(0, toPath.lastIndexOf(".json") + 5), data, fileCompressor);
|
||||||
console.log(`${path} -> ${path}.gz`);
|
}
|
||||||
const data = readJSON(path);
|
if (remove) {
|
||||||
writeJSON(path, data, true);
|
for (const file of files) {
|
||||||
|
const path = `${dataDir}/${file}`;
|
||||||
fs.unlinkSync(path);
|
fs.unlinkSync(path);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
};
|
||||||
|
|
7
index.js
7
index.js
|
@ -2,7 +2,7 @@ const fs = require("fs");
|
||||||
const analysis = require("./analysis");
|
const analysis = require("./analysis");
|
||||||
|
|
||||||
function copyItemsToSite(dataDir) {
|
function copyItemsToSite(dataDir) {
|
||||||
const items = analysis.readJSON(`${dataDir}/latest-canonical.json.gz`);
|
const items = analysis.readJSON(`${dataDir}/latest-canonical.json.${analysis.FILE_COMPRESSOR}`);
|
||||||
for (const store of analysis.STORE_KEYS) {
|
for (const store of analysis.STORE_KEYS) {
|
||||||
const storeItems = items.filter(item => item.store === store);
|
const storeItems = items.filter(item => item.store === store);
|
||||||
analysis.writeJSON(`site/latest-canonical.${store}.compressed.json`, storeItems, false, 0, true);
|
analysis.writeJSON(`site/latest-canonical.${store}.compressed.json`, storeItems, false, 0, true);
|
||||||
|
@ -37,9 +37,10 @@ function scheduleFunction(hour, minute, second, func) {
|
||||||
fs.mkdirSync(dataDir);
|
fs.mkdirSync(dataDir);
|
||||||
}
|
}
|
||||||
|
|
||||||
analysis.migrateToGzip(dataDir);
|
analysis.migrateCompression(dataDir, ".json", ".json.br");
|
||||||
|
analysis.migrateCompression(dataDir, ".json.gz", ".json.br");
|
||||||
|
|
||||||
if (fs.existsSync(`${dataDir}/latest-canonical.json.gz`)) {
|
if (fs.existsSync(`${dataDir}/latest-canonical.json.${analysis.FILE_COMPRESSOR}`)) {
|
||||||
copyItemsToSite(dataDir);
|
copyItemsToSite(dataDir);
|
||||||
analysis.updateData(dataDir, (_newItems) => {
|
analysis.updateData(dataDir, (_newItems) => {
|
||||||
copyItemsToSite(dataDir);
|
copyItemsToSite(dataDir);
|
||||||
|
|
19
migrate.js
Normal file
19
migrate.js
Normal file
|
@ -0,0 +1,19 @@
|
||||||
|
const fs = require("fs");
|
||||||
|
const path = require("path");
|
||||||
|
const analysis = require("./analysis");
|
||||||
|
const [, , dataDir, fromSuffix, toSuffix] = process.argv;
|
||||||
|
|
||||||
|
const errorExit = (message) => {
|
||||||
|
console.log(message);
|
||||||
|
console.log();
|
||||||
|
console.log("Usage: node migrate.js <data-dir> <from-suffix> <to-suffix>");
|
||||||
|
console.log();
|
||||||
|
console.log(`E.g.: node migrate.js data ".json" ".json.gz`);
|
||||||
|
process.exit(1);
|
||||||
|
};
|
||||||
|
|
||||||
|
if (!fs.existsSync(dataDir) || !fs.lstatSync(dataDir).isDirectory()) errorExit("Error: The specified data directory does not exist.");
|
||||||
|
if (!fromSuffix || typeof fromSuffix !== "string") errorExit('Error: The "from-suffix" parameter must be a non-empty string.');
|
||||||
|
if (!toSuffix || typeof toSuffix !== "string") errorExit('Error: The "to-suffix" parameter must be a non-empty string.');
|
||||||
|
|
||||||
|
analysis.migrateCompression(dataDir, fromSuffix, toSuffix);
|
2
pages.js
2
pages.js
|
@ -17,7 +17,7 @@ if (!fs.existsSync(dataDir)) {
|
||||||
(async function () {
|
(async function () {
|
||||||
try {
|
try {
|
||||||
await analysis.updateData(dataDir);
|
await analysis.updateData(dataDir);
|
||||||
const items = analysis.readJSON(`${dataDir}/latest-canonical.json.gz`);
|
const items = analysis.readJSON(`${dataDir}/latest-canonical.json.${analysis.FILE_COMPRESSOR}`);
|
||||||
for (const store of analysis.STORE_KEYS) {
|
for (const store of analysis.STORE_KEYS) {
|
||||||
const storeItems = items.filter(item => item.store === store);
|
const storeItems = items.filter(item => item.store === store);
|
||||||
analysis.writeJSON(`${dataDir}/latest-canonical.${store}.compressed.json`, false, storeItems, 0, true);
|
analysis.writeJSON(`${dataDir}/latest-canonical.${store}.compressed.json`, false, storeItems, 0, true);
|
||||||
|
|
|
@ -1,9 +1,10 @@
|
||||||
const analysis = require("./analysis.js");
|
const analysis = require("./analysis.js");
|
||||||
const dataDir = process?.argv?.[2] ?? "docker/data";
|
const dataDir = process?.argv?.[2] ?? "data";
|
||||||
console.log("Restoring data from raw data.");
|
console.log("Restoring data from raw data.");
|
||||||
(async function () {
|
(async function () {
|
||||||
analysis.migrateToGzip(dataDir);
|
analysis.migrateCompression(dataDir, ".json", ".json.br", false);
|
||||||
|
analysis.migrateCompression(dataDir, ".json.gz", ".json.br");
|
||||||
const items = analysis.replay(dataDir);
|
const items = analysis.replay(dataDir);
|
||||||
analysis.writeJSON(`${dataDir}/latest-canonical.json`, items, true);
|
analysis.writeJSON(`${dataDir}/latest-canonical.json`, items, analysis.FILE_COMPRESSOR);
|
||||||
console.log(`Wrote ${analysis.readJSON(`${dataDir}/latest-canonical.json.gz`).length} items to ${dataDir}/latest-canonical.json.gz`);
|
console.log(`Wrote ${analysis.readJSON(`${dataDir}/latest-canonical.json.${FILE_COMPRESSOR}`).length} items to ${dataDir}/latest-canonical.json.${FILE_COMPRESSOR}`);
|
||||||
})();
|
})();
|
||||||
|
|
Loading…
Reference in New Issue
Block a user