From 90e171d07d3cad89e0a3a05093634c8892d15398 Mon Sep 17 00:00:00 2001 From: Naida Vatric Date: Fri, 28 Feb 2020 15:41:20 +0100 Subject: [PATCH] Olx scraper debugging. --- app/crawler/specificCrawlers/olx.js | 6 ++++-- test/olxScrapeTest.js | 2 +- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/app/crawler/specificCrawlers/olx.js b/app/crawler/specificCrawlers/olx.js index cb1206d..5b4bbe5 100644 --- a/app/crawler/specificCrawlers/olx.js +++ b/app/crawler/specificCrawlers/olx.js @@ -205,7 +205,7 @@ class OlxCrawler { } async scrapeAd(url) { - //console.log("Scraping : ", url); + console.log("Scraping : ", url); try { const adPageSource = await fetch(url); const body = await adPageSource.text(); @@ -249,6 +249,8 @@ class OlxCrawler { .text() .trim(); // + console.log("Title:", title); + console.log("Url scraped:", url); console.log("Normal price value:", normalPriceValue); console.log("Urgent price value:", urgentPriceValue); // @@ -293,7 +295,7 @@ class OlxCrawler { let otherInformationDivId; //We need to locate DIV ID where other information are stored - for (let possibleId = 10; possibleId <= 20; possibleId++) { + for (let possibleId = 1; possibleId <= 30; possibleId++) { const adTypeFieldTitle = $( `#artikal_glavni_div > div.artikal_lijevo > div:nth-child(${possibleId}) > div:nth-child(2) > div.df1` ) diff --git a/test/olxScrapeTest.js b/test/olxScrapeTest.js index 745dbcb..12d55f0 100644 --- a/test/olxScrapeTest.js +++ b/test/olxScrapeTest.js @@ -9,7 +9,7 @@ if (urlToScrape) { (async () => { const data = await crawler.scrapeAd(urlToScrape); - console.log(data); + console.log("Scraped data:", data); })(); } else { console.log("No URL to scrape. Use like this : ");