From c4f6c6e1c3d13868ea3fdb0d404ed0c30ae6fb51 Mon Sep 17 00:00:00 2001 From: Bilal Catic Date: Sat, 21 Sep 2019 15:45:48 +0200 Subject: [PATCH] construct crawling url before indexing single page --- app/crawler/specific/olx.js | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/app/crawler/specific/olx.js b/app/crawler/specific/olx.js index 0bf0a35..9723c74 100644 --- a/app/crawler/specific/olx.js +++ b/app/crawler/specific/olx.js @@ -78,9 +78,9 @@ class OlxCrawler { const maxResultsPerPage = this.maxResults; for (let pageNumber = startPage; pageNumber <= endPage; pageNumber++) { + const pageUrl = `${url}&stranica=${pageNumber}`; const singlePageResults = await this.indexSinglePage( - url, - pageNumber, + pageUrl, maxResultsPerPage ); await this.saveCrawledResults(singlePageResults); @@ -88,10 +88,8 @@ class OlxCrawler { } } - async indexSinglePage(urlWithoutPageNumber, pageNumber, maxResultsPerPage) { + async indexSinglePage(url, maxResultsPerPage) { try { - const url = `${urlWithoutPageNumber}&stranica=${pageNumber}`; - const res = await fetch(url); const body = await res.text(); const $ = cheerio.load(body); @@ -114,8 +112,6 @@ class OlxCrawler { hrefs.length <= maxResultsPerPage ? hrefs.length : maxResultsPerPage; for (let i = 0; i < actualNoOfResults; i++) { - console.log(`Scraping : ${hrefs[i]}`); - const adData = await this.scrapeAd(hrefs[i]); if (adData) {