Files
old-web/app/crawler/crawl.js
2019-10-24 16:57:23 +02:00

57 lines
1.6 KiB
JavaScript

"use strict";
/*
Entry point for crawling functionality
All communication between crawlers and savers is here
All environment specific configuration is read here and
passed to the crawlers and savers.
*/
const OlxCrawler = require("./specificCrawlers/olx");
const RentalCrawler = require("./specificCrawlers/rental");
const { OLX_CONFIG, RENTAL_CONFIG } = require("./crawlerConfig");
const PostgresSaver = require("./savers/postgres");
async function crawlAll() {
const postgresSaver = new PostgresSaver();
const crawlers = [
new OlxCrawler(
[postgresSaver],
OLX_CONFIG.OLX_CRAWLER_AD_TYPE,
OLX_CONFIG.OLX_CRAWLER_AD_CATEGORIES,
OLX_CONFIG.OLX_MAX_PAGES,
OLX_CONFIG.OLX_MAX_RESULTS_PER_PAGE,
OLX_CONFIG.OLX_IGNORED_USERNAMES,
OLX_CONFIG.OLX_DELAY_BETWEEN_PAGES
),
new RentalCrawler(
[postgresSaver],
RENTAL_CONFIG.RENTAL_CRAWLER_AD_TYPE,
RENTAL_CONFIG.RENTAL_CRAWLER_AD_CATEGORIES,
RENTAL_CONFIG.RENTAL_MAX_PAGES,
RENTAL_CONFIG.RENTAL_MAX_RESULTS_PER_PAGE,
RENTAL_CONFIG.RENTAL_IGNORED_USERNAMES,
RENTAL_CONFIG.RENTAL_DELAY_BETWEEN_PAGES
)
];
const newRealEstates = [];
for (const crawler of crawlers) {
try {
const newRealEstatesFromSingleCrawler = await crawler.crawl();
if (Array.isArray(newRealEstatesFromSingleCrawler)) {
newRealEstates.push(...newRealEstatesFromSingleCrawler);
}
} catch (e) {
console.log("Error crawling. Trying next crawler! ", e);
}
}
return newRealEstates;
}
module.exports = {
crawlAll
};