Compare commits

..

1 Commits

Author SHA1 Message Date
Nedim Uka
64aee0167f Added hrefs to global varialbe 2019-07-10 12:27:30 +02:00
5 changed files with 20 additions and 64 deletions

View File

@@ -45,12 +45,7 @@ module.exports = class OlxCrawler {
if (finalResult.lat !== undefined && finalResult.lat !== null && finalResult.lat !== "") { if (finalResult.lat !== undefined && finalResult.lat !== null && finalResult.lat !== "") {
const pointInsideBoundingBox = await findPointInsideBoundingBox([finalResult.lng, finalResult.lat], finalResult.email); const pointInsideBoundingBox = await findPointInsideBoundingBox([finalResult.lng, finalResult.lat], finalResult.email);
if (pointInsideBoundingBox[0].length !== 0) { if (pointInsideBoundingBox[0].length !== 0) {
finalResult.hasLocation = true
filteredResults.push(finalResult);
} else {
finalResult.hasLocation = false
filteredResults.push(finalResult); filteredResults.push(finalResult);
} }
} }
@@ -275,16 +270,6 @@ class Indexer {
if (this.olxUrl.url === undefined) { if (this.olxUrl.url === undefined) {
return {} return {}
} }
if (global.hrefs) {
if (global.hrefs[this.olxUrl.uuid] && global.hrefs[this.olxUrl.uuid].includes(this.olxUrl.url)) {
console.log("We found duplicate URL");
return null
}
}
const res = await fetch(this.olxUrl.url); const res = await fetch(this.olxUrl.url);
const body = await res.text(); const body = await res.text();
const $ = cheerio.load(body); const $ = cheerio.load(body);

View File

@@ -26,7 +26,7 @@ const allRERequestByUiid = async (requestArray) => {
} }
/** /**
* Find all , or all depending on notified bolean marketalerts, that the hasLocation is true, and order them by email * Find all , or all depending on notified bolean marketalerts, and order them by email
* *
* @param fechAll bolean * @param fechAll bolean
* @param notified bolean * @param notified bolean
@@ -43,8 +43,7 @@ const allMarketAlerts = async (fetchAll, notified) => {
if (!fetchAll){ if (!fetchAll){
queryObject.where = { queryObject.where = {
notified: notified, notified: notified
hasLocation: true
} }
} }

View File

@@ -1,20 +0,0 @@
'use strict';
module.exports = {
up: (queryInterface, Sequelize) => {
return queryInterface.addColumn(
'MarketAlerts',
'hasLocation',
{
type: Sequelize.BOOLEAN
}
);
},
down: (queryInterface, Sequelize) => {
return queryInterface.removeColumn(
'MarketAlerts',
'hasLocation'
);
}
};

View File

@@ -14,7 +14,6 @@ module.exports = (sequelize, DataTypes) => {
notified : DataTypes.BOOLEAN, notified : DataTypes.BOOLEAN,
title : DataTypes.STRING, title : DataTypes.STRING,
request: DataTypes.STRING, request: DataTypes.STRING,
hasLocation: DataTypes.BOOLEAN,
email: { email: {
type: DataTypes.STRING, type: DataTypes.STRING,

View File

@@ -13,10 +13,17 @@ const crawlers = [
async function crawlAll() { async function crawlAll() {
console.log("CRAWLER SERVICE: crawlAll"); console.log("CRAWLER SERVICE: crawlAll");
return Promise.map(crawlers, function (crawler) {
return crawler.crawl();
}).then(async (results) => {
try { try {
const marketAlertsFromDb = await allMarketAlerts(true); const marketAlertsFromDb = await allMarketAlerts(true);
const hrefs = []; const hrefs = [];
const subscribedMakretAlerts = marketAlertsFromDb.filter(marketAlert => {
return marketAlert.subscribed;
});
marketAlertsFromDb.map(marketAlert => { marketAlertsFromDb.map(marketAlert => {
if (hrefs[marketAlert.request] === undefined) { if (hrefs[marketAlert.request] === undefined) {
hrefs[marketAlert.request] = [] hrefs[marketAlert.request] = []
@@ -26,22 +33,9 @@ async function crawlAll() {
}) })
global.hrefs = hrefs; global.hrefs = hrefs;
console.log("CRAWLER SERVICE: GLOBAL HREFS");
console.log(global.hrefs); console.log(global.hrefs);
} catch (e) { console.log("CRAWLER SERVICE: number of existing MarketAlerts from db: " + subscribedMakretAlerts.length);
console.error("CRAWLER SERVICE:could not fetch marketalerts ", e);
}
return Promise.map(crawlers, function (crawler) {
return crawler.crawl();
}).then(async (results) => {
try {
const marketAlertsFromDb = await allMarketAlerts(false, true);
console.log("CRAWLER SERVICE: number of existing MarketAlerts from db: " + marketAlertsFromDb.length);
const marketAlerts = []; const marketAlerts = [];
const mergedResults = [].concat.apply([], results); const mergedResults = [].concat.apply([], results);
@@ -61,15 +55,14 @@ async function crawlAll() {
gardenSize: isNaN(result.gardenSize) ? 0 : result.gardenSize, gardenSize: isNaN(result.gardenSize) ? 0 : result.gardenSize,
realEstateType: result.realEstateType, realEstateType: result.realEstateType,
title: result.title, title: result.title,
notified: false, notified: false
hasLocation: result.hasLocation
}) })
} }
console.log("CRAWLER SERVICE: Number of crawler results: " + marketAlerts.length); console.log("CRAWLER SERVICE: Number of crawler results: " + marketAlerts.length);
try { try {
const filteredMarketAlerts = marketAlerts.filter((elem) => !marketAlertsFromDb.find(({ url }) => elem.url === url)); const filteredMarketAlerts = marketAlerts.filter((elem) => !subscribedMakretAlerts.find(({ url }) => elem.url === url));
console.log("CRAWLER SERVICE: Number of new crawler results: " + filteredMarketAlerts.length); console.log("CRAWLER SERVICE: Number of new crawler results: " + filteredMarketAlerts.length);
await db.MarketAlert.bulkCreate(filteredMarketAlerts); await db.MarketAlert.bulkCreate(filteredMarketAlerts);