const Promise = require("bluebird"); const OlxCrawler = require("../helpers/crawlers/olxClawler"); const db = require("../models/index"); const { allMarketAlerts } = require('../helpers/db/dbHelper'); const olxCrawler = new OlxCrawler(1, 2, 3); const crawlers = [ olxCrawler, ]; async function crawlAll() { console.log("CRAWLER SERVICE: crawlAll"); try { const marketAlertsFromDb = await allMarketAlerts(true); const hrefs = []; marketAlertsFromDb.map(marketAlert => { if (hrefs[marketAlert.request] === undefined) { hrefs[marketAlert.request] = [] } hrefs[marketAlert.request].push(marketAlert.url); }) global.hrefs = hrefs; console.log("CRAWLER SERVICE: GLOBAL HREFS"); console.log(global.hrefs); } catch (e) { console.error("CRAWLER SERVICE:could not fetch marketalerts ", e); } return Promise.map(crawlers, function (crawler) { return crawler.crawl(); }).then(async (results) => { try { const marketAlertsFromDb = await allMarketAlerts(false, true); console.log("CRAWLER SERVICE: number of existing MarketAlerts from db: " + marketAlertsFromDb.length); const marketAlerts = []; const mergedResults = [].concat.apply([], results); for (const result of mergedResults) { marketAlerts.push({ url: result.url, realestateOrigin: "OLX", originId: 1, size: result.size, price: result.price, email: result.email, request: result.uuid, // lastDate: DataTypes.STRING, municipality: result.municipality, region: result.region, gardenSize: isNaN(result.gardenSize) ? 0 : result.gardenSize, realEstateType: result.realEstateType, title: result.title, notified: false, hasLocation: result.hasLocation }) } console.log("CRAWLER SERVICE: Number of crawler results: " + marketAlerts.length); try { const filteredMarketAlerts = marketAlerts.filter((elem) => !marketAlertsFromDb.find(({ url }) => elem.url === url)); console.log("CRAWLER SERVICE: Number of new crawler results: " + filteredMarketAlerts.length); await db.MarketAlert.bulkCreate(filteredMarketAlerts); } catch (e) { console.log("CRAWLER SERVICE: Could not bulkCreate marketalers reason: ", e); } } catch (e) { console.log("CRAWLER SERVICE: Error crawling. Trying next crawler! ", e); } }) }; module.exports = crawlAll; // crawlAll();