Add DM support

This commit is contained in:
Simeon Macke (01505675) 2023-05-25 12:28:12 +02:00
parent 0509d18c7b
commit fadb104d72
4 changed files with 119 additions and 4 deletions

View File

@ -79,6 +79,24 @@ function hoferToCanonical(rawItems, today) {
return canonicalItems;
}
function dmToCanonical(rawItems, today) {
const canonicalItems = [];
for (let i = 0; i < rawItems.length; i++) {
const item = rawItems[i];
canonicalItems.push({
store: "dm",
id: item.gtin,
name: `${item.brandName} ${item.title}`,
price: item.price.value,
priceHistory: [{ date: today, price: item.price.value }],
unit: `${item.netQuantityContent} ${item.contentUnit}`,
...(item.brandName === "dmBio" || (item.name ? (item.name.startsWith("Bio ") | item.name.startsWith("Bio-")) : false)) && {bio: true},
});
}
return canonicalItems;
}
async function fetchHofer() {
const BASE_URL = `https://shopservice.roksh.at`
const CATEGORIES = BASE_URL + `/category/GetFullCategoryList/`
@ -108,6 +126,67 @@ async function fetchHofer() {
return hoferItems;
}
async function fetchDm() {
// The DM API is pretty aggressive regarding rate limiting, also, every query returns at most 1000 items,
// so we have to split the queries into multiple ones.
const BASE_URL = `https://product-search.services.dmtech.com/at/search/crawl?pageSize=1000&`
const QUERIES = [
'allCategories.id=010000&price.value.to=2', //~500 items
'allCategories.id=010000&price.value.from=2&price.value.to=3', //~600 items
'allCategories.id=010000&price.value.from=3&price.value.to=4', //~500 items
'allCategories.id=010000&price.value.from=4&price.value.to=7', //~800 items
'allCategories.id=010000&price.value.from=7&price.value.to=10', //~900 items
'allCategories.id=010000&price.value.from=10&price.value.to=15', //~900 items
'allCategories.id=010000&price.value.from=15', //~300 items
'allCategories.id=020000&price.value.to=2', //~600 items
'allCategories.id=020000&price.value.from=2&price.value.to=3', //~550 items
'allCategories.id=020000&price.value.from=3&price.value.to=4', //~600 items
'allCategories.id=020000&price.value.from=4&price.value.to=6', //~800 items
'allCategories.id=020000&price.value.from=6&price.value.to=10', //~850 items
'allCategories.id=020000&price.value.from=10&price.value.to=18', //~900 items
'allCategories.id=020000&price.value.from=18', //~960 items (!)
'allCategories.id=030000&price.value.to=8', //~900 items
'allCategories.id=030000&price.value.from=8', //~500 items
'allCategories.id=040000&price.value.to=2', //~600 items
'allCategories.id=040000&price.value.from=2&price.value.to=4', //~900 items
'allCategories.id=040000&price.value.from=4', //~400 items
'allCategories.id=050000&price.value.to=4', //~600 items
'allCategories.id=050000&price.value.from=4', //~800 items
'allCategories.id=060000&price.value.to=4', //~900 items
'allCategories.id=060000&price.value.from=4', //~500 items
'allCategories.id=070000', //~300 items
]
let dmItems = [];
for (let query of QUERIES) {
var res = (await axios.get(BASE_URL + query, {
validateStatus: function (status) {
return (status >= 200 && status < 300) || status == 429;
}
}));
// exponential backoff
backoff = 2000;
while (res.status == 429) {
console.info(`DM API returned 429, retrying in ${backoff/1000}s.`);
await new Promise(resolve => setTimeout(resolve, backoff));
backoff *= 2;
res = (await axios.get(BASE_URL + query, {
validateStatus: function (status) {
return (status >= 200 && status < 300) || status == 429;
}
}));
}
let items = res.data;
if (items.count > 1000) {
console.warn(`Query returned more than 1000 items! Items may be missing. Adjust queries. Query: ${query}`);
}
dmItems = dmItems.concat(items.products);
await new Promise(resolve => setTimeout(resolve, 1000));
}
return dmItems;
}
function mergePriceHistory(oldItems, items) {
if (oldItems == null) return items;
@ -154,12 +233,15 @@ exports.replay = function(rawDataDir) {
const billaFilesCanonical = billaFiles.map(file => billaToCanonical(readJSON(file), file.match(/\d{4}-\d{2}-\d{2}/)[0]));
const hoferFiles = files.filter(file => file.indexOf("hofer-") == 0).sort(dateSort).map(file => rawDataDir + "/" + file);
const hoferFilesCanonical = hoferFiles.map(file => hoferToCanonical(readJSON(file), file.match(/\d{4}-\d{2}-\d{2}/)[0]));
const dmFiles = files.filter(file => file.indexOf("dm-") == 0).sort(dateSort).map(file => rawDataDir + "/" + file);
const dmFilesCanonical = dmFiles.map(file => dmToCanonical(readJSON(file), file.match(/\d{4}-\d{2}-\d{2}/)[0]));
const allFilesCanonical = [];
const len = Math.max(sparFilesCanonical.length, Math.max(billaFilesCanonical.length, hoferFilesCanonical.length));
const len = Math.max(Math.max(sparFilesCanonical.length, Math.max(billaFilesCanonical.length, hoferFilesCanonical.length)), dmFilesCanonical.length);
sparFilesCanonical.reverse();
billaFilesCanonical.reverse();
hoferFilesCanonical.reverse();
dmFilesCanonical.reverse();
for (let i = 0; i < len; i++) {
const canonical = [];
let billa = billaFilesCanonical.pop();
@ -169,6 +251,9 @@ exports.replay = function(rawDataDir) {
let hofer = hoferFilesCanonical.pop();
if (hofer) canonical.push(...hofer);
allFilesCanonical.push(canonical);
let dm = dmFilesCanonical.pop();
if (dm) canonical.push(...dmFilesCanonical.pop());
allFilesCanonical.push(canonical);
}
if (allFilesCanonical.length == 0) return null;
@ -210,7 +295,13 @@ exports.updateData = async function (dataDir, done) {
const hoferItemsCanonical = hoferToCanonical(hoferItems, today);
console.log("Fetched HOFER data, took " + (performance.now() - start) / 1000 + " seconds");
const items = [...billaItemsCanonical, ...sparItemsCanonical, ...hoferItemsCanonical];
start = performance.now();
const dmItems = await fetchDm();
fs.writeFileSync(`${dataDir}/dm-${today}.json`, JSON.stringify(dmItems, null, 2));
const dmItemsCanonical = dmToCanonical(dmItems, today);
console.log("Fetched DM data, took " + (performance.now() - start) / 1000 + " seconds");
const items = [...billaItemsCanonical, ...sparItemsCanonical, ...hoferItemsCanonical, ...dmItemsCanonical];
if (fs.existsSync(`${dataDir}/latest-canonical.json`)) {
const oldItems = JSON.parse(fs.readFileSync(`${dataDir}/latest-canonical.json`));
mergePriceHistory(oldItems, items);

View File

@ -25,6 +25,7 @@
<label>Summe Billa<input type="checkbox" id="sumbilla"></label>
<label>Summe Spar<input type="checkbox" id="sumspar"></label>
<label>Summe Hofer<input type="checkbox" id="sumhofer"></label>
<label>Summe dm<input type="checkbox" id="sumdm"></label>
</div>
<table id="cartitems"></table>
</div>

View File

@ -34,6 +34,9 @@ async function load() {
document.querySelector("#sumhofer").addEventListener("change", () => {
showCharts(canvasDom, cart, lookup);
})
document.querySelector("#sumdm").addEventListener("change", () => {
showCharts(canvasDom, cart, lookup);
})
}
function showSearch(cart, items, lookup) {
@ -112,6 +115,16 @@ function showCharts(canvasDom, cart, lookup) {
}
}
if (document.querySelector("#sumdn").checked) {
const itemsDm = items.filter(item => item.store == "dm");
if (itemsDm.length > 0) {
itemsToShow.push({
name: "Summe dm",
priceHistory: calculateOverallPriceChanges(itemsDm)
});
}
}
cart.items.forEach((cartItem) => {
const item = lookup[cartItem.id];
if (!item) return;

View File

@ -95,6 +95,8 @@ function itemToStoreLink(item) {
return `<a target="_blank" href="https://shop.billa.at/search/results?category=&searchTerm=${encodeURIComponent(item.name)}">${item.name}</a>`;
if (item.store == "hofer")
return `<a target="_blank" href="https://www.roksh.at/hofer/angebot/suche/${encodeURIComponent(item.name)}">${item.name}</a>`;
if (item.store == "dm")
return `<a target="_blank" href="https://www.dm.at/product-p${item.id}.html">${item.name}</a>`;
return item.name;
}
@ -138,6 +140,10 @@ function itemToDOM(item) {
case "hofer":
row.style["background"] = "rgb(230 230 255)";
break;
case "dm":
row.style["background"] = "rgb(255 240 230)";
break;
}
row.appendChild(storeDom);
row.appendChild(nameDom);
@ -148,7 +154,7 @@ function itemToDOM(item) {
let componentId = 0;
function searchItems(items, query, billa, spar, hofer, eigenmarken, minPrice, maxPrice, exact, bio) {
function searchItems(items, query, billa, spar, hofer, dm, eigenmarken, minPrice, maxPrice, exact, bio) {
query = query.trim();
if (query.length < 3) return [];
@ -194,6 +200,7 @@ function searchItems(items, query, billa, spar, hofer, eigenmarken, minPrice, ma
if (item.store == "billa" && !billa) continue;
if (item.store == "spar" && !spar) continue;
if (item.store == "hofer" && !hofer) continue;
if (item.store == "dm" && !dm) continue;
if (item.price < minPrice) continue;
if (item.price > maxPrice) continue;
if (eigenmarken && !(name.indexOf("clever") == 0 || name.indexOf("s-budget") == 0 || name.indexOf("milfina") == 0)) continue;
@ -213,6 +220,7 @@ function newSearchComponent(parentElement, items, searched, filter, headerModifi
<label><input id="billa-${id}" type="checkbox" checked="true"> Billa</label>
<label><input id="spar-${id}" type="checkbox" checked="true"> Spar</label>
<label><input id="hofer-${id}" type="checkbox" checked="true"> Hofer</label>
<label><input id="dm-${id}" type="checkbox" checked="true"> DM</label>
<label><input id="eigenmarken-${id}" type="checkbox"> Nur CLEVER / S-BUDGET / MILFINA</label>
<label><input id="bio-${id}" type="checkbox"> Nur Bio</label>
</div>
@ -233,6 +241,7 @@ function newSearchComponent(parentElement, items, searched, filter, headerModifi
const billa = parentElement.querySelector(`#billa-${id}`);
const spar = parentElement.querySelector(`#spar-${id}`);
const hofer = parentElement.querySelector(`#hofer-${id}`);
const dm = parentElement.querySelector(`#dm-${id}`);
const minPrice = parentElement.querySelector(`#minprice-${id}`);
const maxPrice = parentElement.querySelector(`#maxprice-${id}`);
const numResults = parentElement.querySelector(`#numresults-${id}`);
@ -241,7 +250,7 @@ function newSearchComponent(parentElement, items, searched, filter, headerModifi
let hits = [];
try {
hits = searchItems(items, query,
billa.checked, spar.checked, hofer.checked, eigenmarken.checked,
billa.checked, spar.checked, hofer.checked, dm.checked, eigenmarken.checked,
toNumber(minPrice.value, 0), toNumber(maxPrice.value, 100), exact.checked, bio.checked
);
} catch (e) {
@ -288,6 +297,7 @@ function newSearchComponent(parentElement, items, searched, filter, headerModifi
billa.addEventListener("change", () => search(searchInput.value));
spar.addEventListener("change", () => search(searchInput.value));
hofer.addEventListener("change", () => search(searchInput.value));
dm.addEventListener("change", () => search(searchInput.value));
exact.addEventListener("change", () => search(searchInput.value));
minPrice.addEventListener("change", () => search(searchInput.value));
maxPrice.addEventListener("change", () => search(searchInput.value));