2019-09-16 15:59:53 +02:00
|
|
|
"use strict";
|
|
|
|
|
/*
|
|
|
|
|
Entry point for crawling functionality
|
|
|
|
|
All communication between crawlers and savers is here
|
|
|
|
|
All environment specific configuration is read here and
|
|
|
|
|
passed to the crawlers and savers.
|
|
|
|
|
*/
|
|
|
|
|
const OlxCrawler = require("./specific/olx");
|
2019-09-18 15:32:48 +02:00
|
|
|
const { OLX_CONFIG } = require("./crawlerConfig");
|
2019-09-16 15:59:53 +02:00
|
|
|
const PostgresSaver = require("./savers/postgres");
|
|
|
|
|
|
2019-09-18 15:32:48 +02:00
|
|
|
const crawlers = [
|
|
|
|
|
new OlxCrawler(
|
|
|
|
|
[new PostgresSaver()],
|
|
|
|
|
OLX_CONFIG.OLX_CRAWLER_AD_TYPE,
|
2019-09-23 10:46:31 +02:00
|
|
|
OLX_CONFIG.OLX_CRAWLER_AD_CATEGORIES,
|
|
|
|
|
OLX_CONFIG.OLX_MAX_PAGES,
|
|
|
|
|
OLX_CONFIG.OLX_MAX_RESULTS_PER_PAGE,
|
2019-09-25 08:54:33 +02:00
|
|
|
OLX_CONFIG.OLX_IGNORED_USERNAMES,
|
|
|
|
|
OLX_CONFIG.OLX_DELAY_BETWEEN_PAGES
|
2019-09-18 15:32:48 +02:00
|
|
|
)
|
2019-09-16 15:59:53 +02:00
|
|
|
];
|
|
|
|
|
|
|
|
|
|
async function crawlAll() {
|
|
|
|
|
for (let crawler of crawlers) {
|
|
|
|
|
try {
|
2019-09-26 17:30:06 +02:00
|
|
|
return await crawler.crawl();
|
2019-09-16 15:59:53 +02:00
|
|
|
} catch (e) {
|
|
|
|
|
console.log("Error crawling. Trying next crawler! ", e);
|
2019-09-26 17:30:06 +02:00
|
|
|
return [];
|
2019-09-16 15:59:53 +02:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2019-09-26 17:30:06 +02:00
|
|
|
module.exports = {
|
|
|
|
|
crawlAll
|
|
|
|
|
};
|