2016-11-10 13:04:45 +01:00
|
|
|
'use strict'
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
Entry point for crawling functionality
|
|
|
|
|
All communication between crawlers and savers is here
|
|
|
|
|
All environment specific configuration is read here and
|
|
|
|
|
passed to the crawlers and savers.
|
|
|
|
|
|
|
|
|
|
*/
|
|
|
|
|
import {
|
|
|
|
|
install
|
|
|
|
|
} from 'source-map-support';
|
|
|
|
|
import 'dotenv/config';
|
|
|
|
|
import OlxCrawler from './specific/olx';
|
2017-04-10 05:28:37 +02:00
|
|
|
import ProstorCrawler from './specific/prostor';
|
2017-10-16 11:36:21 +02:00
|
|
|
import RentalCrawler from './specific/rental';
|
2016-11-10 13:04:45 +01:00
|
|
|
import MongoSaver from './savers/mongo'
|
|
|
|
|
|
|
|
|
|
install(); // for source maps to work
|
|
|
|
|
|
|
|
|
|
let crawlers = [
|
2017-04-10 05:28:37 +02:00
|
|
|
//new OlxCrawler(process.env.OLX_FROM_PAGE, process.env.OLX_TO_PAGE, process.env.OLX_MAX_RESULTS),
|
2017-10-16 11:36:21 +02:00
|
|
|
//new ProstorCrawler(parseInt(process.env.PROSTOR_FROM_PAGE), parseInt(process.env.PROSTOR_TO_PAGE), parseInt(process.env.PROSTOR_MAX_RESULTS)),
|
|
|
|
|
new RentalCrawler(parseInt(process.env.RENTAL_FROM_PAGE), parseInt(process.env.RENTAL_TO_PAGE), parseInt(process.env.RENTAL_MAX_RESULTS))
|
2016-11-10 13:04:45 +01:00
|
|
|
];
|
2017-04-10 05:28:37 +02:00
|
|
|
|
2016-11-10 13:04:45 +01:00
|
|
|
let savers = [
|
|
|
|
|
new MongoSaver(process.env.MONGO_URL)
|
|
|
|
|
];
|
|
|
|
|
|
2016-11-10 14:03:58 +01:00
|
|
|
async function crawlAll() {
|
|
|
|
|
|
|
|
|
|
for (let crawler of crawlers) {
|
|
|
|
|
try {
|
|
|
|
|
let results = await crawler.crawl()
|
2016-11-10 13:04:45 +01:00
|
|
|
for (let saver of savers) {
|
|
|
|
|
try {
|
2016-11-10 14:03:58 +01:00
|
|
|
await saver.connect();
|
|
|
|
|
await saver.save(results);
|
2016-11-10 13:04:45 +01:00
|
|
|
} catch (e) {
|
|
|
|
|
console.log("Error saving. Trying next saver! ", e);
|
|
|
|
|
}
|
|
|
|
|
}
|
2016-11-10 14:03:58 +01:00
|
|
|
} catch (e) {
|
|
|
|
|
console.log("Error crawling. Trying next crawler! ", e);
|
|
|
|
|
}
|
2016-11-10 13:04:45 +01:00
|
|
|
}
|
|
|
|
|
|
2016-11-10 14:03:58 +01:00
|
|
|
for (let saver of savers) {
|
|
|
|
|
saver.close();
|
|
|
|
|
}
|
2016-11-10 13:04:45 +01:00
|
|
|
}
|
2016-11-10 14:03:58 +01:00
|
|
|
|
|
|
|
|
crawlAll();
|