Files
old-web/app/services/crawlerService.js
2019-09-05 11:14:54 +02:00

94 lines
2.7 KiB
JavaScript

const Promise = require("bluebird");
const OlxCrawler = require("../helpers/crawlers/olxClawler");
const db = require("../models/index");
const { allMarketAlerts } = require("../helpers/db/dbHelper");
async function crawlAll() {
console.log("CRAWLER SERVICE: crawlAll");
try {
const marketAlertsFromDb = await allMarketAlerts(true);
const hrefs = [];
marketAlertsFromDb.map(marketAlert => {
if (hrefs[marketAlert.request] === undefined) {
hrefs[marketAlert.request] = [];
}
hrefs[marketAlert.request].push(marketAlert.url);
});
console.log("CRAWLER SERVICE: GLOBAL HREFS");
console.log(hrefs);
const olxCrawler = new OlxCrawler(hrefs);
const crawlers = [olxCrawler];
return Promise.map(crawlers, function(crawler) {
return crawler.crawl();
}).then(async results => {
try {
const marketAlertsFromDb = await allMarketAlerts(false, true);
console.log(
"CRAWLER SERVICE: number of existing MarketAlerts from db: " +
marketAlertsFromDb.length
);
const marketAlerts = [];
const mergedResults = [].concat.apply([], results);
for (const result of mergedResults) {
marketAlerts.push({
url: result.url,
realestateOrigin: "OLX",
originId: 1,
size: result.size,
price: result.price,
email: result.email,
request: result.uuid,
municipality: result.municipality,
region: result.region,
gardenSize: isNaN(result.gardenSize) ? 0 : result.gardenSize,
realEstateType: result.realEstateType,
title: result.title,
notified: false,
hasLocation: result.hasLocation
});
}
console.log(
"CRAWLER SERVICE: Number of crawler results: " + marketAlerts.length
);
try {
const filteredMarketAlerts = marketAlerts.filter(
elem =>
!marketAlertsFromDb.find(({ url, request }) => {
return elem.url === url && elem.request === request;
})
);
console.log(
"CRAWLER SERVICE: Number of new crawler results: " +
filteredMarketAlerts.length
);
await db.MarketAlert.bulkCreate(filteredMarketAlerts);
} catch (e) {
console.log(
"CRAWLER SERVICE: Could not bulkCreate marketalers reason: ",
e
);
}
} catch (e) {
console.log(
"CRAWLER SERVICE: Error crawling. Trying next crawler! ",
e
);
}
});
} catch (e) {
console.error("CRAWLER SERVICE:could not fetch marketalerts ", e);
}
}
module.exports = crawlAll;