replace old crawler, without specific crawler and saver implementation
This commit is contained in:
55
app/crawler/crawl.js
Normal file
55
app/crawler/crawl.js
Normal file
@@ -0,0 +1,55 @@
|
||||
"use strict";
|
||||
/*
|
||||
Entry point for crawling functionality
|
||||
All communication between crawlers and savers is here
|
||||
All environment specific configuration is read here and
|
||||
passed to the crawlers and savers.
|
||||
*/
|
||||
|
||||
require("dotenv").config();
|
||||
const OlxCrawler = require("./specific/olx");
|
||||
const PostgresSaver = require("./savers/postgres");
|
||||
|
||||
let crawlers = [
|
||||
// new OlxCrawler(
|
||||
// process.env.OLX_FROM_PAGE,
|
||||
// process.env.OLX_TO_PAGE,
|
||||
// process.env.OLX_MAX_RESULTS
|
||||
// )
|
||||
// new ProstorCrawler(
|
||||
// parseInt(process.env.PROSTOR_FROM_PAGE),
|
||||
// parseInt(process.env.PROSTOR_TO_PAGE),
|
||||
// parseInt(process.env.PROSTOR_MAX_RESULTS)
|
||||
// ),
|
||||
// new RentalCrawler(
|
||||
// parseInt(process.env.RENTAL_FROM_PAGE),
|
||||
// parseInt(process.env.RENTAL_TO_PAGE),
|
||||
// parseInt(process.env.RENTAL_MAX_RESULTS)
|
||||
// )
|
||||
];
|
||||
|
||||
let savers = [new PostgresSaver(process.env.MONGO_URL)];
|
||||
|
||||
async function crawlAll() {
|
||||
for (let crawler of crawlers) {
|
||||
try {
|
||||
const crawlerResults = await crawler.crawl();
|
||||
for (let saver of savers) {
|
||||
try {
|
||||
await saver.connect();
|
||||
await saver.save(crawlerResults);
|
||||
} catch (e) {
|
||||
console.log("Error saving. Trying next saver! ", e);
|
||||
}
|
||||
}
|
||||
} catch (e) {
|
||||
console.log("Error crawling. Trying next crawler! ", e);
|
||||
}
|
||||
}
|
||||
|
||||
for (let saver of savers) {
|
||||
saver.close();
|
||||
}
|
||||
}
|
||||
|
||||
crawlAll();
|
||||
Reference in New Issue
Block a user