From 747f56941a901662b7c3ac1db645ddfa920cd022 Mon Sep 17 00:00:00 2001 From: Naida Vatric Date: Fri, 28 Feb 2020 14:21:53 +0100 Subject: [PATCH 1/4] Logged olx price scraping. --- app/crawler/specificCrawlers/olx.js | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/app/crawler/specificCrawlers/olx.js b/app/crawler/specificCrawlers/olx.js index ddfbcd6..cb1206d 100644 --- a/app/crawler/specificCrawlers/olx.js +++ b/app/crawler/specificCrawlers/olx.js @@ -240,13 +240,18 @@ class OlxCrawler { let price = null; let normalPrice = null; let urgentPrice = null; - const normalPriceValue = $("#pc > p:nth-child(2)").text(); + const normalPriceValue = $("#pc > p:nth-child(2)") + .text() + .trim(); const urgentPriceValue = $( "#artikal_glavni_div > div.artikal_lijevo > div:nth-child(5) > p" ) .text() .trim(); - + // + console.log("Normal price value:", normalPriceValue); + console.log("Urgent price value:", urgentPriceValue); + // if (normalPriceValue && normalPriceValue.length > 0) { normalPrice = normalPriceValue .replace(/\r\n|\n|\r/gm, "") -- 2.47.3 From 90e171d07d3cad89e0a3a05093634c8892d15398 Mon Sep 17 00:00:00 2001 From: Naida Vatric Date: Fri, 28 Feb 2020 15:41:20 +0100 Subject: [PATCH 2/4] Olx scraper debugging. --- app/crawler/specificCrawlers/olx.js | 6 ++++-- test/olxScrapeTest.js | 2 +- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/app/crawler/specificCrawlers/olx.js b/app/crawler/specificCrawlers/olx.js index cb1206d..5b4bbe5 100644 --- a/app/crawler/specificCrawlers/olx.js +++ b/app/crawler/specificCrawlers/olx.js @@ -205,7 +205,7 @@ class OlxCrawler { } async scrapeAd(url) { - //console.log("Scraping : ", url); + console.log("Scraping : ", url); try { const adPageSource = await fetch(url); const body = await adPageSource.text(); @@ -249,6 +249,8 @@ class OlxCrawler { .text() .trim(); // + console.log("Title:", title); + console.log("Url scraped:", url); console.log("Normal price value:", normalPriceValue); console.log("Urgent price value:", urgentPriceValue); // @@ -293,7 +295,7 @@ class OlxCrawler { let otherInformationDivId; //We need to locate DIV ID where other information are stored - for (let possibleId = 10; possibleId <= 20; possibleId++) { + for (let possibleId = 1; possibleId <= 30; possibleId++) { const adTypeFieldTitle = $( `#artikal_glavni_div > div.artikal_lijevo > div:nth-child(${possibleId}) > div:nth-child(2) > div.df1` ) diff --git a/test/olxScrapeTest.js b/test/olxScrapeTest.js index 745dbcb..12d55f0 100644 --- a/test/olxScrapeTest.js +++ b/test/olxScrapeTest.js @@ -9,7 +9,7 @@ if (urlToScrape) { (async () => { const data = await crawler.scrapeAd(urlToScrape); - console.log(data); + console.log("Scraped data:", data); })(); } else { console.log("No URL to scrape. Use like this : "); -- 2.47.3 From feb2d04ed6e152482d8ef0075032643b9b770267 Mon Sep 17 00:00:00 2001 From: Naida Vatric Date: Fri, 28 Feb 2020 15:43:35 +0100 Subject: [PATCH 3/4] Olx debugg log. --- app/crawler/specificCrawlers/olx.js | 2 ++ 1 file changed, 2 insertions(+) diff --git a/app/crawler/specificCrawlers/olx.js b/app/crawler/specificCrawlers/olx.js index 5b4bbe5..7d4cf66 100644 --- a/app/crawler/specificCrawlers/olx.js +++ b/app/crawler/specificCrawlers/olx.js @@ -667,6 +667,8 @@ class OlxCrawler { distanceToRiver, numberOfViewsAgency }; + // + console.log("Scraped data:", data); return data; } catch (e) { -- 2.47.3 From df5e38092dbe46f4ad0ebafc91bb01d07e56a667 Mon Sep 17 00:00:00 2001 From: Naida Vatric Date: Fri, 28 Feb 2020 17:12:55 +0100 Subject: [PATCH 4/4] Decomment and saljic smal fix. --- app/crawler/specificCrawlers/olx.js | 13 +++++++------ app/crawler/specificCrawlers/saljic.js | 12 ++++++++---- 2 files changed, 15 insertions(+), 10 deletions(-) diff --git a/app/crawler/specificCrawlers/olx.js b/app/crawler/specificCrawlers/olx.js index 7d4cf66..548d5ac 100644 --- a/app/crawler/specificCrawlers/olx.js +++ b/app/crawler/specificCrawlers/olx.js @@ -248,11 +248,12 @@ class OlxCrawler { ) .text() .trim(); - // - console.log("Title:", title); - console.log("Url scraped:", url); - console.log("Normal price value:", normalPriceValue); - console.log("Urgent price value:", urgentPriceValue); + + //Debug + //console.log("Title:", title); + //console.log("Url scraped:", url); + // console.log("Normal price value:", normalPriceValue); + // console.log("Urgent price value:", urgentPriceValue); // if (normalPriceValue && normalPriceValue.length > 0) { normalPrice = normalPriceValue @@ -668,7 +669,7 @@ class OlxCrawler { numberOfViewsAgency }; // - console.log("Scraped data:", data); + //console.log("Scraped data:", data); return data; } catch (e) { diff --git a/app/crawler/specificCrawlers/saljic.js b/app/crawler/specificCrawlers/saljic.js index aa1d3f4..8afc751 100644 --- a/app/crawler/specificCrawlers/saljic.js +++ b/app/crawler/specificCrawlers/saljic.js @@ -274,10 +274,14 @@ class SaljicCrawler { .trim(); const latAndLongSrc = $(propertySelectors.latAndLong).attr("src"); - const tmpLatLong = latAndLongSrc.split("marker=")[1]; - const latText = tmpLatLong.split("%2C")[0]; - const longText = tmpLatLong.split("%2C")[1]; - + let tmpLatLong; + let latText; + let longText; + if (latAndLongSrc) { + tmpLatLong = latAndLongSrc.split("marker=")[1]; + latText = tmpLatLong.split("%2C")[0]; + longText = tmpLatLong.split("%2C")[1]; + } const locationLat = parseFloat(latText) || null; const locationLong = parseFloat(longText) || null; -- 2.47.3