add crawler config and include specific crawler for "rental" agency

This commit is contained in:
Bilal Catic
2019-10-24 07:48:49 +02:00
parent abc591749e
commit ec798fe94c
4 changed files with 80 additions and 21 deletions

View File

@@ -6,30 +6,49 @@
passed to the crawlers and savers.
*/
const OlxCrawler = require("./specific/olx");
const { OLX_CONFIG } = require("./crawlerConfig");
const RentalCrawler = require("./specific/rental");
const { OLX_CONFIG, RENTAL_CONFIG } = require("./crawlerConfig");
const PostgresSaver = require("./savers/postgres");
const crawlers = [
new OlxCrawler(
[new PostgresSaver()],
OLX_CONFIG.OLX_CRAWLER_AD_TYPE,
OLX_CONFIG.OLX_CRAWLER_AD_CATEGORIES,
OLX_CONFIG.OLX_MAX_PAGES,
OLX_CONFIG.OLX_MAX_RESULTS_PER_PAGE,
OLX_CONFIG.OLX_IGNORED_USERNAMES,
OLX_CONFIG.OLX_DELAY_BETWEEN_PAGES
)
];
async function crawlAll() {
for (let crawler of crawlers) {
const postgresSaver = new PostgresSaver();
const crawlers = [
new OlxCrawler(
[postgresSaver],
OLX_CONFIG.OLX_CRAWLER_AD_TYPE,
OLX_CONFIG.OLX_CRAWLER_AD_CATEGORIES,
OLX_CONFIG.OLX_MAX_PAGES,
OLX_CONFIG.OLX_MAX_RESULTS_PER_PAGE,
OLX_CONFIG.OLX_IGNORED_USERNAMES,
OLX_CONFIG.OLX_DELAY_BETWEEN_PAGES
),
new RentalCrawler(
[postgresSaver],
RENTAL_CONFIG.RENTAL_CRAWLER_AD_TYPE,
RENTAL_CONFIG.RENTAL_CRAWLER_AD_CATEGORIES,
RENTAL_CONFIG.RENTAL_MAX_PAGES,
RENTAL_CONFIG.RENTAL_MAX_RESULTS_PER_PAGE,
RENTAL_CONFIG.RENTAL_IGNORED_USERNAMES,
RENTAL_CONFIG.RENTAL_DELAY_BETWEEN_PAGES
)
];
const newRealEstates = [];
for (const crawler of crawlers) {
try {
return await crawler.crawl();
const newRealEstatesFromSingleCrawler = await crawler.crawl();
if (Array.isArray(newRealEstatesFromSingleCrawler)) {
newRealEstates.push(...newRealEstatesFromSingleCrawler);
}
} catch (e) {
console.log("Error crawling. Trying next crawler! ", e);
return [];
}
}
return newRealEstates;
}
module.exports = {