crawling and saving to mongo

This commit is contained in:
Senad Uka
2016-11-10 14:03:58 +01:00
parent 5a819cc743
commit 2ad851b257
4 changed files with 20 additions and 16 deletions

View File

@@ -1,4 +1,4 @@
MONGO_URL='mongodb://localhost:27017/kivi' MONGO_URL='mongodb://localhost:27017/kivi'
OLX_FROM_PAGE=8 OLX_FROM_PAGE=8
OLX_TO_PAGE=8 OLX_TO_PAGE=8
OLX_MAX_RESULTS=1000 OLX_MAX_RESULTS=3

View File

@@ -24,25 +24,29 @@ let savers = [
new MongoSaver(process.env.MONGO_URL) new MongoSaver(process.env.MONGO_URL)
]; ];
let done = 0; async function crawlAll() {
for (let crawler of crawlers) { let crawlersAndSaversDone = 0;
try {
crawler.crawl().then((results) => { for (let crawler of crawlers) {
try {
let results = await crawler.crawl()
for (let saver of savers) { for (let saver of savers) {
try { try {
saver.open(); await saver.connect();
saver.save(results); await saver.save(results);
} catch (e) { } catch (e) {
console.log("Error saving. Trying next saver! ", e); console.log("Error saving. Trying next saver! ", e);
crawlersAndSaversDone += 1;
} }
} }
}); } catch (e) {
} catch (e) { console.log("Error crawling. Trying next crawler! ", e);
console.log("Error crawling. Trying next crawler! ", e); }
}
for (let saver of savers) {
saver.close();
} }
} }
for (let saver of savers) { crawlAll();
saver.close();
}

View File

@@ -15,7 +15,7 @@ export default class MongoSaver {
console.log('Unable to connect to the mongoDB server. Error:', err); console.log('Unable to connect to the mongoDB server. Error:', err);
reject(err); reject(err);
} else { } else {
console.log('Connection established to', url); console.log('Connection established to', this.url);
saver.db = db; saver.db = db;
saver.collection = db.collection('results'); saver.collection = db.collection('results');
saver.ready = true; saver.ready = true;

View File

@@ -122,7 +122,7 @@ export default class OlxCrawler {
async indexPages(start, end, maxResults = 1000) { async indexPages(start, end, maxResults = 1000) {
let results = {}; let results = {};
for (let i = start; i <= end; i++) { for (let i = start; i <= end; i++) {
let result = await this.indexPage(i, numberOfResults); let result = await this.indexPage(i, maxResults);
Object.assign(results, result) Object.assign(results, result)
await this.sleep(5000); await this.sleep(5000);
} }