diff --git a/crawler/.env b/crawler/.env index f54a161..8b637b6 100644 --- a/crawler/.env +++ b/crawler/.env @@ -1,4 +1,4 @@ MONGO_URL='mongodb://localhost:27017/kivi' OLX_FROM_PAGE=8 OLX_TO_PAGE=8 -OLX_MAX_RESULTS=1000 +OLX_MAX_RESULTS=3 diff --git a/crawler/crawl.js b/crawler/crawl.js index adc2dd5..d668453 100644 --- a/crawler/crawl.js +++ b/crawler/crawl.js @@ -24,25 +24,29 @@ let savers = [ new MongoSaver(process.env.MONGO_URL) ]; -let done = 0; -for (let crawler of crawlers) { - try { - crawler.crawl().then((results) => { +async function crawlAll() { + let crawlersAndSaversDone = 0; + + for (let crawler of crawlers) { + try { + let results = await crawler.crawl() for (let saver of savers) { try { - saver.open(); - saver.save(results); + await saver.connect(); + await saver.save(results); } catch (e) { console.log("Error saving. Trying next saver! ", e); - + crawlersAndSaversDone += 1; } } - }); - } catch (e) { - console.log("Error crawling. Trying next crawler! ", e); + } catch (e) { + console.log("Error crawling. Trying next crawler! ", e); + } + } + + for (let saver of savers) { + saver.close(); } } -for (let saver of savers) { - saver.close(); -} +crawlAll(); diff --git a/crawler/savers/mongo.js b/crawler/savers/mongo.js index 094ecd1..407208f 100644 --- a/crawler/savers/mongo.js +++ b/crawler/savers/mongo.js @@ -15,7 +15,7 @@ export default class MongoSaver { console.log('Unable to connect to the mongoDB server. Error:', err); reject(err); } else { - console.log('Connection established to', url); + console.log('Connection established to', this.url); saver.db = db; saver.collection = db.collection('results'); saver.ready = true; diff --git a/crawler/specific/olx.js b/crawler/specific/olx.js index 80a84d7..8fc6171 100644 --- a/crawler/specific/olx.js +++ b/crawler/specific/olx.js @@ -122,7 +122,7 @@ export default class OlxCrawler { async indexPages(start, end, maxResults = 1000) { let results = {}; for (let i = start; i <= end; i++) { - let result = await this.indexPage(i, numberOfResults); + let result = await this.indexPage(i, maxResults); Object.assign(results, result) await this.sleep(5000); }