diff --git a/app/crawler/specificCrawlers/olx.js b/app/crawler/specificCrawlers/olx.js index 1e3d63a..ddfbcd6 100644 --- a/app/crawler/specificCrawlers/olx.js +++ b/app/crawler/specificCrawlers/olx.js @@ -205,7 +205,7 @@ class OlxCrawler { } async scrapeAd(url) { - // console.log("Scraping : ", url); + //console.log("Scraping : ", url); try { const adPageSource = await fetch(url); const body = await adPageSource.text(); @@ -238,6 +238,8 @@ class OlxCrawler { //====== PRICE DETECTION AND EXTRACTION ===== let price = null; + let normalPrice = null; + let urgentPrice = null; const normalPriceValue = $("#pc > p:nth-child(2)").text(); const urgentPriceValue = $( "#artikal_glavni_div > div.artikal_lijevo > div:nth-child(5) > p" @@ -246,7 +248,10 @@ class OlxCrawler { .trim(); if (normalPriceValue && normalPriceValue.length > 0) { - price = normalPriceValue; + normalPrice = normalPriceValue + .replace(/\r\n|\n|\r/gm, "") + .replace("KM", "") + .trim(); if ( $("#pc > p.n") .text() @@ -256,21 +261,26 @@ class OlxCrawler { } else { status = AD_STATUS.STATUS_NORMAL; } - } else if (urgentPriceValue && urgentPriceValue.length > 0) { - const priceValues = urgentPriceValue.split("KM"); + } else { + throw { message: "Can't find normal price" }; + } + if (urgentPriceValue && urgentPriceValue.length > 0) { + const priceValues = urgentPriceValue.replace("Cijena", "").split("KM"); //priceValues will contain values like ["100000", "90000", ...], second element is urgent price - if (priceValues.length > 1) { - price = priceValues[1].trim(); - status = AD_STATUS.STATUS_DISCOUNTED; + if (priceValues.length > 0) { + if (priceValues[0].trim().indexOf("Hitno") != -1) { + urgentPrice = priceValues[0].replace("Hitno", "").trim(); + status = AD_STATUS.STATUS_URGENT; + } else { + urgentPrice = priceValues[0].trim(); + } } else { throw { message: "Can't find urgent price" }; } - } else { - throw { - message: "Can't find price (it is not normal nor urgent price ?)" - }; } + price = status === AD_STATUS.STATUS_URGENT ? urgentPrice : normalPrice; + //====== OTHER AD INFORMATION =============== let adType = null; let olxId = null; @@ -768,6 +778,9 @@ class OlxCrawler { if (!priceText) { return NaN; } + if (priceText === "Po dogovoru") { + return null; + } const formattedPriceText = priceText.replace(".", "").replace(",", "."); return parseFloat(formattedPriceText); }