Compare commits
2 Commits
crawler-op
...
fetch-opti
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
33f9e37d93 | ||
|
|
5829de64e0 |
@@ -45,7 +45,12 @@ module.exports = class OlxCrawler {
|
|||||||
if (finalResult.lat !== undefined && finalResult.lat !== null && finalResult.lat !== "") {
|
if (finalResult.lat !== undefined && finalResult.lat !== null && finalResult.lat !== "") {
|
||||||
const pointInsideBoundingBox = await findPointInsideBoundingBox([finalResult.lng, finalResult.lat], finalResult.email);
|
const pointInsideBoundingBox = await findPointInsideBoundingBox([finalResult.lng, finalResult.lat], finalResult.email);
|
||||||
|
|
||||||
|
|
||||||
if (pointInsideBoundingBox[0].length !== 0) {
|
if (pointInsideBoundingBox[0].length !== 0) {
|
||||||
|
finalResult.hasLocation = true
|
||||||
|
filteredResults.push(finalResult);
|
||||||
|
} else {
|
||||||
|
finalResult.hasLocation = false
|
||||||
filteredResults.push(finalResult);
|
filteredResults.push(finalResult);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -270,6 +275,16 @@ class Indexer {
|
|||||||
if (this.olxUrl.url === undefined) {
|
if (this.olxUrl.url === undefined) {
|
||||||
return {}
|
return {}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (global.hrefs) {
|
||||||
|
|
||||||
|
if (global.hrefs[this.olxUrl.uuid] && global.hrefs[this.olxUrl.uuid].includes(this.olxUrl.url)) {
|
||||||
|
|
||||||
|
console.log("We found duplicate URL");
|
||||||
|
return null
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
const res = await fetch(this.olxUrl.url);
|
const res = await fetch(this.olxUrl.url);
|
||||||
const body = await res.text();
|
const body = await res.text();
|
||||||
const $ = cheerio.load(body);
|
const $ = cheerio.load(body);
|
||||||
|
|||||||
@@ -26,7 +26,7 @@ const allRERequestByUiid = async (requestArray) => {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Find all , or all depending on notified bolean marketalerts, and order them by email
|
* Find all , or all depending on notified bolean marketalerts, that the hasLocation is true, and order them by email
|
||||||
*
|
*
|
||||||
* @param fechAll bolean
|
* @param fechAll bolean
|
||||||
* @param notified bolean
|
* @param notified bolean
|
||||||
@@ -43,7 +43,8 @@ const allMarketAlerts = async (fetchAll, notified) => {
|
|||||||
|
|
||||||
if (!fetchAll){
|
if (!fetchAll){
|
||||||
queryObject.where = {
|
queryObject.where = {
|
||||||
notified: notified
|
notified: notified,
|
||||||
|
hasLocation: true
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -0,0 +1,20 @@
|
|||||||
|
'use strict';
|
||||||
|
|
||||||
|
module.exports = {
|
||||||
|
up: (queryInterface, Sequelize) => {
|
||||||
|
return queryInterface.addColumn(
|
||||||
|
'MarketAlerts',
|
||||||
|
'hasLocation',
|
||||||
|
{
|
||||||
|
type: Sequelize.BOOLEAN
|
||||||
|
}
|
||||||
|
);
|
||||||
|
},
|
||||||
|
|
||||||
|
down: (queryInterface, Sequelize) => {
|
||||||
|
return queryInterface.removeColumn(
|
||||||
|
'MarketAlerts',
|
||||||
|
'hasLocation'
|
||||||
|
);
|
||||||
|
}
|
||||||
|
};
|
||||||
@@ -14,6 +14,7 @@ module.exports = (sequelize, DataTypes) => {
|
|||||||
notified : DataTypes.BOOLEAN,
|
notified : DataTypes.BOOLEAN,
|
||||||
title : DataTypes.STRING,
|
title : DataTypes.STRING,
|
||||||
request: DataTypes.STRING,
|
request: DataTypes.STRING,
|
||||||
|
hasLocation: DataTypes.BOOLEAN,
|
||||||
|
|
||||||
email: {
|
email: {
|
||||||
type: DataTypes.STRING,
|
type: DataTypes.STRING,
|
||||||
|
|||||||
@@ -13,29 +13,35 @@ const crawlers = [
|
|||||||
async function crawlAll() {
|
async function crawlAll() {
|
||||||
console.log("CRAWLER SERVICE: crawlAll");
|
console.log("CRAWLER SERVICE: crawlAll");
|
||||||
|
|
||||||
|
try {
|
||||||
|
const marketAlertsFromDb = await allMarketAlerts(true);
|
||||||
|
const hrefs = [];
|
||||||
|
|
||||||
|
marketAlertsFromDb.map(marketAlert => {
|
||||||
|
if (hrefs[marketAlert.request] === undefined) {
|
||||||
|
hrefs[marketAlert.request] = []
|
||||||
|
}
|
||||||
|
|
||||||
|
hrefs[marketAlert.request].push(marketAlert.url);
|
||||||
|
})
|
||||||
|
|
||||||
|
global.hrefs = hrefs;
|
||||||
|
console.log("CRAWLER SERVICE: GLOBAL HREFS");
|
||||||
|
console.log(global.hrefs);
|
||||||
|
|
||||||
|
} catch (e) {
|
||||||
|
console.error("CRAWLER SERVICE:could not fetch marketalerts ", e);
|
||||||
|
}
|
||||||
|
|
||||||
return Promise.map(crawlers, function (crawler) {
|
return Promise.map(crawlers, function (crawler) {
|
||||||
return crawler.crawl();
|
return crawler.crawl();
|
||||||
}).then(async (results) => {
|
}).then(async (results) => {
|
||||||
|
|
||||||
try {
|
try {
|
||||||
|
|
||||||
const marketAlertsFromDb = await allMarketAlerts(true);
|
const marketAlertsFromDb = await allMarketAlerts(false, true);
|
||||||
const hrefs = [];
|
|
||||||
const subscribedMakretAlerts = marketAlertsFromDb.filter(marketAlert => {
|
|
||||||
return marketAlert.subscribed;
|
|
||||||
});
|
|
||||||
marketAlertsFromDb.map(marketAlert => {
|
|
||||||
if (hrefs[marketAlert.request] === undefined) {
|
|
||||||
hrefs[marketAlert.request] = []
|
|
||||||
}
|
|
||||||
|
|
||||||
hrefs[marketAlert.request].push(marketAlert.url);
|
console.log("CRAWLER SERVICE: number of existing MarketAlerts from db: " + marketAlertsFromDb.length);
|
||||||
})
|
|
||||||
|
|
||||||
global.hrefs = hrefs;
|
|
||||||
console.log(global.hrefs);
|
|
||||||
|
|
||||||
console.log("CRAWLER SERVICE: number of existing MarketAlerts from db: " + subscribedMakretAlerts.length);
|
|
||||||
|
|
||||||
const marketAlerts = [];
|
const marketAlerts = [];
|
||||||
const mergedResults = [].concat.apply([], results);
|
const mergedResults = [].concat.apply([], results);
|
||||||
@@ -55,14 +61,15 @@ async function crawlAll() {
|
|||||||
gardenSize: isNaN(result.gardenSize) ? 0 : result.gardenSize,
|
gardenSize: isNaN(result.gardenSize) ? 0 : result.gardenSize,
|
||||||
realEstateType: result.realEstateType,
|
realEstateType: result.realEstateType,
|
||||||
title: result.title,
|
title: result.title,
|
||||||
notified: false
|
notified: false,
|
||||||
|
hasLocation: result.hasLocation
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
console.log("CRAWLER SERVICE: Number of crawler results: " + marketAlerts.length);
|
console.log("CRAWLER SERVICE: Number of crawler results: " + marketAlerts.length);
|
||||||
|
|
||||||
try {
|
try {
|
||||||
|
|
||||||
const filteredMarketAlerts = marketAlerts.filter((elem) => !subscribedMakretAlerts.find(({ url }) => elem.url === url));
|
const filteredMarketAlerts = marketAlerts.filter((elem) => !marketAlertsFromDb.find(({ url }) => elem.url === url));
|
||||||
console.log("CRAWLER SERVICE: Number of new crawler results: " + filteredMarketAlerts.length);
|
console.log("CRAWLER SERVICE: Number of new crawler results: " + filteredMarketAlerts.length);
|
||||||
|
|
||||||
await db.MarketAlert.bulkCreate(filteredMarketAlerts);
|
await db.MarketAlert.bulkCreate(filteredMarketAlerts);
|
||||||
|
|||||||
Reference in New Issue
Block a user