From d4fcd1950da0dd578304d65723732aaedab96f89 Mon Sep 17 00:00:00 2001 From: Senad Uka Date: Fri, 11 Sep 2020 04:51:54 +0200 Subject: [PATCH] Fix saljic --- app/crawler/specificCrawlers/olx.js | 8 ++++---- app/crawler/specificCrawlers/saljic.js | 18 +++++++++++++++--- 2 files changed, 19 insertions(+), 7 deletions(-) diff --git a/app/crawler/specificCrawlers/olx.js b/app/crawler/specificCrawlers/olx.js index e2a0492..0f87334 100644 --- a/app/crawler/specificCrawlers/olx.js +++ b/app/crawler/specificCrawlers/olx.js @@ -228,18 +228,18 @@ class OlxCrawler { } - const hamo = [] + const dataResults = [] const { scrapedData, errors } = await PromisePool .withConcurrency(2) .for(asyncScraping) .process(async data => { const result = await this.scrapeAd(data) await this.sleep(this.delayBetweenPages); - hamo.push(result) - return result; //TODO: this does not work, scrapedData is null, hamo works + dataResults.push(result) + return result; //TODO: this does not work, scrapedData is null, dataResults works }) - const filteredScrapedData = hamo.filter(adData => !!adData); + const filteredScrapedData = dataResults.filter(adData => !!adData); return filteredScrapedData; } catch (e) { console.error("Exception caught:" + e); diff --git a/app/crawler/specificCrawlers/saljic.js b/app/crawler/specificCrawlers/saljic.js index 9f5b0b7..ba13373 100644 --- a/app/crawler/specificCrawlers/saljic.js +++ b/app/crawler/specificCrawlers/saljic.js @@ -207,11 +207,23 @@ class SaljicCrawler { const asyncScraping = []; for (let i = 0; i < actualNoOfResults; i++) { - asyncScraping.push(this.scrapeAd(hrefsAbs[i], adTypes[i])); + asyncScraping.push([hrefsAbs[i], adTypes[i]]); } - const scrapedData = await Promise.all(asyncScraping); - const filteredScrapedData = scrapedData.filter(adData => !!adData); + + + const dataResults = [] + const { scrapedData, errors } = await PromisePool + .withConcurrency(2) + .for(asyncScraping) + .process(async data => { + const result = await this.scrapeAd(...data) + await this.sleep(this.delayBetweenPages); + dataResults.push(result) + return result; //TODO: this does not work, scrapedData is null, dataResults works + }) + + const filteredScrapedData = dataResults.filter(adData => !!adData); return filteredScrapedData; } catch (e) { console.error("[SALJIC] Exception caught:" + e);