Files
old-web/app/crawler/crawl.js

56 lines
1.4 KiB
JavaScript

"use strict";
/*
Entry point for crawling functionality
All communication between crawlers and savers is here
All environment specific configuration is read here and
passed to the crawlers and savers.
*/
require("dotenv").config();
const OlxCrawler = require("./specific/olx");
const PostgresSaver = require("./savers/postgres");
let crawlers = [
// new OlxCrawler(
// process.env.OLX_FROM_PAGE,
// process.env.OLX_TO_PAGE,
// process.env.OLX_MAX_RESULTS
// )
// new ProstorCrawler(
// parseInt(process.env.PROSTOR_FROM_PAGE),
// parseInt(process.env.PROSTOR_TO_PAGE),
// parseInt(process.env.PROSTOR_MAX_RESULTS)
// ),
// new RentalCrawler(
// parseInt(process.env.RENTAL_FROM_PAGE),
// parseInt(process.env.RENTAL_TO_PAGE),
// parseInt(process.env.RENTAL_MAX_RESULTS)
// )
];
let savers = [new PostgresSaver(process.env.MONGO_URL)];
async function crawlAll() {
for (let crawler of crawlers) {
try {
const crawlerResults = await crawler.crawl();
for (let saver of savers) {
try {
await saver.connect();
await saver.save(crawlerResults);
} catch (e) {
console.log("Error saving. Trying next saver! ", e);
}
}
} catch (e) {
console.log("Error crawling. Trying next crawler! ", e);
}
}
for (let saver of savers) {
saver.close();
}
}
crawlAll();