94 lines
2.7 KiB
JavaScript
94 lines
2.7 KiB
JavaScript
const Promise = require("bluebird");
|
|
const OlxCrawler = require("../helpers/crawlers/olxClawler");
|
|
const db = require("../models/index");
|
|
const { allMarketAlerts } = require("../helpers/db/dbHelper");
|
|
|
|
async function crawlAll() {
|
|
console.log("CRAWLER SERVICE: crawlAll");
|
|
|
|
try {
|
|
const marketAlertsFromDb = await allMarketAlerts(true);
|
|
const hrefs = [];
|
|
|
|
marketAlertsFromDb.map(marketAlert => {
|
|
if (hrefs[marketAlert.request] === undefined) {
|
|
hrefs[marketAlert.request] = [];
|
|
}
|
|
|
|
hrefs[marketAlert.request].push(marketAlert.url);
|
|
});
|
|
|
|
console.log("CRAWLER SERVICE: GLOBAL HREFS");
|
|
console.log(hrefs);
|
|
const olxCrawler = new OlxCrawler(hrefs);
|
|
|
|
const crawlers = [olxCrawler];
|
|
|
|
return Promise.map(crawlers, function(crawler) {
|
|
return crawler.crawl();
|
|
}).then(async results => {
|
|
try {
|
|
const marketAlertsFromDb = await allMarketAlerts(false, true);
|
|
|
|
console.log(
|
|
"CRAWLER SERVICE: number of existing MarketAlerts from db: " +
|
|
marketAlertsFromDb.length
|
|
);
|
|
|
|
const marketAlerts = [];
|
|
const mergedResults = [].concat.apply([], results);
|
|
|
|
for (const result of mergedResults) {
|
|
marketAlerts.push({
|
|
url: result.url,
|
|
realestateOrigin: "OLX",
|
|
originId: 1,
|
|
size: result.size,
|
|
price: result.price,
|
|
email: result.email,
|
|
request: result.uuid,
|
|
municipality: result.municipality,
|
|
region: result.region,
|
|
gardenSize: isNaN(result.gardenSize) ? 0 : result.gardenSize,
|
|
realEstateType: result.realEstateType,
|
|
title: result.title,
|
|
notified: false,
|
|
hasLocation: result.hasLocation
|
|
});
|
|
}
|
|
console.log(
|
|
"CRAWLER SERVICE: Number of crawler results: " + marketAlerts.length
|
|
);
|
|
|
|
try {
|
|
const filteredMarketAlerts = marketAlerts.filter(
|
|
elem =>
|
|
!marketAlertsFromDb.find(({ url, request }) => {
|
|
return elem.url === url && elem.request === request;
|
|
})
|
|
);
|
|
console.log(
|
|
"CRAWLER SERVICE: Number of new crawler results: " +
|
|
filteredMarketAlerts.length
|
|
);
|
|
|
|
await db.MarketAlert.bulkCreate(filteredMarketAlerts);
|
|
} catch (e) {
|
|
console.log(
|
|
"CRAWLER SERVICE: Could not bulkCreate marketalers reason: ",
|
|
e
|
|
);
|
|
}
|
|
} catch (e) {
|
|
console.log(
|
|
"CRAWLER SERVICE: Error crawling. Trying next crawler! ",
|
|
e
|
|
);
|
|
}
|
|
});
|
|
} catch (e) {
|
|
console.error("CRAWLER SERVICE:could not fetch marketalerts ", e);
|
|
}
|
|
}
|
|
module.exports = crawlAll;
|