82 lines
2.4 KiB
JavaScript
82 lines
2.4 KiB
JavaScript
"use strict";
|
|
/*
|
|
Entry point for crawling functionality
|
|
All communication between crawlers and savers is here
|
|
All environment specific configuration is read here and
|
|
passed to the crawlers and savers.
|
|
*/
|
|
const OlxCrawler = require("./specificCrawlers/olx");
|
|
const RentalCrawler = require("./specificCrawlers/rental");
|
|
const ProstorCrawler = require("./specificCrawlers/prostor");
|
|
const AktidoCrawler = require("./specificCrawlers/aktido");
|
|
|
|
const {
|
|
OLX_CONFIG,
|
|
RENTAL_CONFIG,
|
|
PROSTOR_CONFIG,
|
|
AKTIDO_CONFIG
|
|
} = require("./crawlerConfig");
|
|
const PostgresSaver = require("./savers/postgres");
|
|
|
|
async function crawlAll() {
|
|
const postgresSaver = new PostgresSaver();
|
|
|
|
const crawlers = [
|
|
new OlxCrawler(
|
|
[postgresSaver],
|
|
OLX_CONFIG.OLX_CRAWLER_AD_TYPE,
|
|
OLX_CONFIG.OLX_CRAWLER_AD_CATEGORIES,
|
|
OLX_CONFIG.OLX_MAX_PAGES,
|
|
OLX_CONFIG.OLX_MAX_RESULTS_PER_PAGE,
|
|
OLX_CONFIG.OLX_IGNORED_USERNAMES,
|
|
OLX_CONFIG.OLX_DELAY_BETWEEN_PAGES
|
|
),
|
|
new RentalCrawler(
|
|
[postgresSaver],
|
|
RENTAL_CONFIG.RENTAL_CRAWLER_AD_TYPE,
|
|
RENTAL_CONFIG.RENTAL_CRAWLER_AD_CATEGORIES,
|
|
RENTAL_CONFIG.RENTAL_MAX_PAGES,
|
|
RENTAL_CONFIG.RENTAL_MAX_RESULTS_PER_PAGE,
|
|
RENTAL_CONFIG.RENTAL_IGNORED_USERNAMES,
|
|
RENTAL_CONFIG.RENTAL_DELAY_BETWEEN_PAGES
|
|
),
|
|
new ProstorCrawler(
|
|
[postgresSaver],
|
|
PROSTOR_CONFIG.PROSTOR_CRAWLER_AD_TYPE,
|
|
PROSTOR_CONFIG.PROSTOR_CRAWLER_AD_CATEGORIES,
|
|
PROSTOR_CONFIG.PROSTOR_MAX_PAGES,
|
|
PROSTOR_CONFIG.PROSTOR_MAX_RESULTS_PER_PAGE,
|
|
PROSTOR_CONFIG.PROSTOR_IGNORED_USERNAMES,
|
|
PROSTOR_CONFIG.PROSTOR_DELAY_BETWEEN_PAGES
|
|
),
|
|
new AktidoCrawler(
|
|
[postgresSaver],
|
|
AKTIDO_CONFIG.AKTIDO_CRAWLER_AD_TYPE,
|
|
AKTIDO_CONFIG.AKTIDO_CRAWLER_AD_CATEGORIES,
|
|
AKTIDO_CONFIG.AKTIDO_MAX_PAGES,
|
|
AKTIDO_CONFIG.AKTIDO_MAX_RESULTS_PER_PAGE,
|
|
AKTIDO_CONFIG.AKTIDO_IGNORED_USERNAMES,
|
|
AKTIDO_CONFIG.AKTIDO_DELAY_BETWEEN_PAGES
|
|
)
|
|
];
|
|
|
|
const newRealEstates = [];
|
|
|
|
for (const crawler of crawlers) {
|
|
try {
|
|
const newRealEstatesFromSingleCrawler = await crawler.crawl();
|
|
if (Array.isArray(newRealEstatesFromSingleCrawler)) {
|
|
newRealEstates.push(...newRealEstatesFromSingleCrawler);
|
|
}
|
|
} catch (e) {
|
|
console.log("Error crawling. Trying next crawler! ", e);
|
|
}
|
|
}
|
|
|
|
return newRealEstates;
|
|
}
|
|
|
|
module.exports = {
|
|
crawlAll
|
|
};
|