diff --git a/app/helpers/crawlers/olixClawler.js b/app/helpers/crawlers/olixClawler.js index e69de29..1c37df9 100644 --- a/app/helpers/crawlers/olixClawler.js +++ b/app/helpers/crawlers/olixClawler.js @@ -0,0 +1,30 @@ +const fetch = require('node-fetch'); +const cheerio = require('cheerio'); + +export default class OlxCrawler { + + constructor(fromPage = 0, toPage = 10, maxResults = 1000) { + this.fromPage = fromPage; + this.toPage = toPage; + this.maxResults = maxResults; + } + + async sleep(ms) { + return new Promise(resolve => setTimeout(resolve, ms)); + } + + async indexPages(start, end, maxResults = 1000) { + let results = {}; + for (let i = start; i <= end; i++) { + let result = await this.indexPage(i, maxResults); + Object.assign(results, result) + await this.sleep(5000); + } + return results; + } + + async crawl() { + let results = await this.indexPages(this.fromPage, this.toPage, this.maxResults); + return results; + } +} \ No newline at end of file diff --git a/app/services/crawlerService.js b/app/services/crawlerService.js index e69de29..073195c 100644 --- a/app/services/crawlerService.js +++ b/app/services/crawlerService.js @@ -0,0 +1,30 @@ + + +const crawlers = [ + //new OlxCrawler(process.env.OLX_FROM_PAGE, process.env.OLX_TO_PAGE, process.env.OLX_MAX_RESULTS), + ]; + + async function crawlAll() { + + for (let crawler of crawlers) { + try { + let results = await crawler.crawl() + for (let saver of savers) { + try { + await saver.connect(); + await saver.save(results); + } catch (e) { + console.log("Error saving. Trying next saver! ", e); + } + } + } catch (e) { + console.log("Error crawling. Trying next crawler! ", e); + } + } + + for (let saver of savers) { + saver.close(); + } + } + + crawlAll(); \ No newline at end of file