Files
old-web/app/crawler/crawl.js
2019-10-28 09:14:45 +01:00

82 lines
2.4 KiB
JavaScript

"use strict";
/*
Entry point for crawling functionality
All communication between crawlers and savers is here
All environment specific configuration is read here and
passed to the crawlers and savers.
*/
const OlxCrawler = require("./specificCrawlers/olx");
const RentalCrawler = require("./specificCrawlers/rental");
const ProstorCrawler = require("./specificCrawlers/prostor");
const AktidoCrawler = require("./specificCrawlers/aktido");
const {
OLX_CONFIG,
RENTAL_CONFIG,
PROSTOR_CONFIG,
AKTIDO_CONFIG
} = require("./crawlerConfig");
const PostgresSaver = require("./savers/postgres");
async function crawlAll() {
const postgresSaver = new PostgresSaver();
const crawlers = [
new OlxCrawler(
[postgresSaver],
OLX_CONFIG.OLX_CRAWLER_AD_TYPE,
OLX_CONFIG.OLX_CRAWLER_AD_CATEGORIES,
OLX_CONFIG.OLX_MAX_PAGES,
OLX_CONFIG.OLX_MAX_RESULTS_PER_PAGE,
OLX_CONFIG.OLX_IGNORED_USERNAMES,
OLX_CONFIG.OLX_DELAY_BETWEEN_PAGES
),
new RentalCrawler(
[postgresSaver],
RENTAL_CONFIG.RENTAL_CRAWLER_AD_TYPE,
RENTAL_CONFIG.RENTAL_CRAWLER_AD_CATEGORIES,
RENTAL_CONFIG.RENTAL_MAX_PAGES,
RENTAL_CONFIG.RENTAL_MAX_RESULTS_PER_PAGE,
RENTAL_CONFIG.RENTAL_IGNORED_USERNAMES,
RENTAL_CONFIG.RENTAL_DELAY_BETWEEN_PAGES
),
new ProstorCrawler(
[postgresSaver],
PROSTOR_CONFIG.PROSTOR_CRAWLER_AD_TYPE,
PROSTOR_CONFIG.PROSTOR_CRAWLER_AD_CATEGORIES,
PROSTOR_CONFIG.PROSTOR_MAX_PAGES,
PROSTOR_CONFIG.PROSTOR_MAX_RESULTS_PER_PAGE,
PROSTOR_CONFIG.PROSTOR_IGNORED_USERNAMES,
PROSTOR_CONFIG.PROSTOR_DELAY_BETWEEN_PAGES
),
new AktidoCrawler(
[postgresSaver],
AKTIDO_CONFIG.AKTIDO_CRAWLER_AD_TYPE,
AKTIDO_CONFIG.AKTIDO_CRAWLER_AD_CATEGORIES,
AKTIDO_CONFIG.AKTIDO_MAX_PAGES,
AKTIDO_CONFIG.AKTIDO_MAX_RESULTS_PER_PAGE,
AKTIDO_CONFIG.AKTIDO_IGNORED_USERNAMES,
AKTIDO_CONFIG.AKTIDO_DELAY_BETWEEN_PAGES
)
];
const newRealEstates = [];
for (const crawler of crawlers) {
try {
const newRealEstatesFromSingleCrawler = await crawler.crawl();
if (Array.isArray(newRealEstatesFromSingleCrawler)) {
newRealEstates.push(...newRealEstatesFromSingleCrawler);
}
} catch (e) {
console.log("Error crawling. Trying next crawler! ", e);
}
}
return newRealEstates;
}
module.exports = {
crawlAll
};