diff --git a/app/crawler/specificCrawlers/olx.js b/app/crawler/specificCrawlers/olx.js index ddfbcd6..548d5ac 100644 --- a/app/crawler/specificCrawlers/olx.js +++ b/app/crawler/specificCrawlers/olx.js @@ -205,7 +205,7 @@ class OlxCrawler { } async scrapeAd(url) { - //console.log("Scraping : ", url); + console.log("Scraping : ", url); try { const adPageSource = await fetch(url); const body = await adPageSource.text(); @@ -240,13 +240,21 @@ class OlxCrawler { let price = null; let normalPrice = null; let urgentPrice = null; - const normalPriceValue = $("#pc > p:nth-child(2)").text(); + const normalPriceValue = $("#pc > p:nth-child(2)") + .text() + .trim(); const urgentPriceValue = $( "#artikal_glavni_div > div.artikal_lijevo > div:nth-child(5) > p" ) .text() .trim(); + //Debug + //console.log("Title:", title); + //console.log("Url scraped:", url); + // console.log("Normal price value:", normalPriceValue); + // console.log("Urgent price value:", urgentPriceValue); + // if (normalPriceValue && normalPriceValue.length > 0) { normalPrice = normalPriceValue .replace(/\r\n|\n|\r/gm, "") @@ -288,7 +296,7 @@ class OlxCrawler { let otherInformationDivId; //We need to locate DIV ID where other information are stored - for (let possibleId = 10; possibleId <= 20; possibleId++) { + for (let possibleId = 1; possibleId <= 30; possibleId++) { const adTypeFieldTitle = $( `#artikal_glavni_div > div.artikal_lijevo > div:nth-child(${possibleId}) > div:nth-child(2) > div.df1` ) @@ -660,6 +668,8 @@ class OlxCrawler { distanceToRiver, numberOfViewsAgency }; + // + //console.log("Scraped data:", data); return data; } catch (e) { diff --git a/app/crawler/specificCrawlers/saljic.js b/app/crawler/specificCrawlers/saljic.js index aa1d3f4..8afc751 100644 --- a/app/crawler/specificCrawlers/saljic.js +++ b/app/crawler/specificCrawlers/saljic.js @@ -274,10 +274,14 @@ class SaljicCrawler { .trim(); const latAndLongSrc = $(propertySelectors.latAndLong).attr("src"); - const tmpLatLong = latAndLongSrc.split("marker=")[1]; - const latText = tmpLatLong.split("%2C")[0]; - const longText = tmpLatLong.split("%2C")[1]; - + let tmpLatLong; + let latText; + let longText; + if (latAndLongSrc) { + tmpLatLong = latAndLongSrc.split("marker=")[1]; + latText = tmpLatLong.split("%2C")[0]; + longText = tmpLatLong.split("%2C")[1]; + } const locationLat = parseFloat(latText) || null; const locationLong = parseFloat(longText) || null; diff --git a/test/olxScrapeTest.js b/test/olxScrapeTest.js index 745dbcb..12d55f0 100644 --- a/test/olxScrapeTest.js +++ b/test/olxScrapeTest.js @@ -9,7 +9,7 @@ if (urlToScrape) { (async () => { const data = await crawler.scrapeAd(urlToScrape); - console.log(data); + console.log("Scraped data:", data); })(); } else { console.log("No URL to scrape. Use like this : ");