Added send notification service, and queried unsent marketalerts, fixed some issues with crawler, and added proper logging

This commit is contained in:
Nedim Uka
2019-06-25 17:06:07 +02:00
parent 5ffdaef1bf
commit 208faa08df
8 changed files with 123 additions and 35 deletions

View File

@@ -87,14 +87,14 @@ module.exports = class OlxCrawler {
const data = {
realEstateType: this.getCategoryId(realEstateType),
email : email,
email: email,
olxId: olxId,
// category: category,
url,
title,
price: isNaN(parsedPrice) ? 0 : parsedPrice,
size: parseFloat(size),
gardenSize: isNaN(parseFloat(gardenSize)) ? 0 : parseFloat(gardenSize),
gardenSize: isNaN(parseFloat(gardenSize)) ? 0 : parseFloat(gardenSize),
address,
region,
municipality,
@@ -151,22 +151,22 @@ module.exports = class OlxCrawler {
}
}
getCategoryId (category) {
getCategoryId(category) {
switch(category) {
switch (category) {
case 'Stanovi':
return 'stan';
return 'stan';
case 'Vikendice':
return 'vikendica'
return 'vikendica'
case 'Kuće':
return 'kuca';
default:
return '';
}
}
return '';
}
}
async indexPages(urls, start, end, maxResults = 1000) {
//TODO fix paging
@@ -186,16 +186,18 @@ module.exports = class OlxCrawler {
}
async crawl() {
console.log("OLX CRAWLER: start crawl");
const filteredResults = [];
const realestateRequests = await allRERequest()
const realestateRequests = await allRERequest();
console.log("OLX CRAWLER: found " + realestateRequests.length + "subscribed RealEstateRequests");
const urls = this.createRequestUrls(realestateRequests);
let results = await this.indexPages(urls, this.fromPage, this.toPage, this.maxResults);
for (const result of results) {
for (const finalResult of result) {
if (finalResult.lat !== undefined && finalResult.lat !== null && finalResult.lat !== "") {
const pointInsideBoundingBox = await findPointInsideBoundingBox([finalResult.lng, finalResult.lat]);
const pointInsideBoundingBox = await findPointInsideBoundingBox([finalResult.lng, finalResult.lat], finalResult.email);
if (pointInsideBoundingBox[0].length !== 0) {
filteredResults.push(finalResult);
@@ -203,8 +205,7 @@ module.exports = class OlxCrawler {
}
}
}
console.log(filteredResults);
console.log("OLX CRAWLER: number of olx crawler results, after geo location filtering: " + filteredResults.length);
return filteredResults;
}

View File

@@ -1,15 +1,62 @@
const db = require('../../models/index');
// TODO Fetch only subscribed realestate requests
/**
* Find all subscribed RealEstateRequests
*/
const allRERequest = async () => {
return await db.RealEstateRequest.findAll();
return await db.RealEstateRequest.findAll({
where: {
subscribed: true
}
});
}
const findPointInsideBoundingBox = async (latLng) => {
return await db.sequelize.query("SELECT * FROM \"RealEstateRequests\" WHERE ST_Contains(\"RealEstateRequests\".bounding_box, ST_GEOMFROMTEXT(\'POINT (" + latLng[0] + " " + latLng[1]+ ")\'))");
/**
* Find all unnotified marketalerts, and order them by email
*
* @param notified bolean
*
* @returns array of MarketAlerts
*/
const allMarketAlerts = async (notified) => {
let queryObject = {
order: [
['email', 'DESC'],
]
}
if (notified){
queryObject.where = {
notified: notified
}
}
return await db.MarketAlert.findAll(queryObject);
// return await db.MarketAlerts.findAll({
// where: {
// notified: notified
// },
// order: [
// ['email', 'DESC'],
// ]
// });
}
/**
* Find all unnotified marketalerts
* @param latLng array
* @param email strig
*
* @returns array of MarketAlerts
*/
const findPointInsideBoundingBox = async (latLng, email) => {
return await db.sequelize.query(`SELECT * FROM "RealEstateRequests" WHERE email = '${email}' AND subscribed = true AND ST_Contains("RealEstateRequests".bounding_box, ST_GEOMFROMTEXT('POINT (${latLng[0]} ${latLng[1]})'))`);
}
module.exports = {
allRERequest,
allMarketAlerts,
findPointInsideBoundingBox
};

View File

@@ -7,17 +7,6 @@ const currentRERequest = async (req) => {
const request = await db.RealEstateRequest.findOne({ where: {uniqueId} });
return request;
};
// TODO Fetch only subscribed realestate requests
const allRERequest = async () => {
return await db.RealEstateRequest.findAll();
}
const findPointInsideBoundingBox = async (latLng) => {
return await db.sequelize.query("SELECT * FROM \"RealEstateRequests\" WHERE ST_Contains(\"RealEstateRequests\".bounding_box, ST_GEOMFROMTEXT(\'POINT (" + latLng[0] + " " + latLng[1]+ ")\'))");
}
module.exports = {
currentRERequest,
allRERequest,
findPointInsideBoundingBox
};

View File

@@ -0,0 +1,20 @@
'use strict';
module.exports = {
up: (queryInterface, Sequelize) => {
return queryInterface.addColumn(
'MarketAlerts',
'notified',
{
type: Sequelize.BOOLEAN
}
);
},
down: (queryInterface, Sequelize) => {
return queryInterface.removeColumn(
'MarketAlerts',
'notified'
);
}
};

View File

@@ -11,6 +11,7 @@ module.exports = (sequelize, DataTypes) => {
municipality : DataTypes.STRING,
region : DataTypes.STRING,
realEstateType : DataTypes.STRING,
notified : DataTypes.BOOLEAN,
email: {
type: DataTypes.STRING,

View File

@@ -2,6 +2,7 @@
const Promise = require("bluebird");
const OlxCrawler = require("../helpers/crawlers/olxClawler");
const db = require("../models/index");
const { allMarketAlerts } = require('../helpers/db/dbHelper');
const olxCrawler = new OlxCrawler(1, 2, 3);
@@ -10,6 +11,7 @@ const crawlers = [
];
async function crawlAll() {
console.log("CRAWLER SERVICE: crawlAll");
Promise.map(crawlers, function (crawler) {
return crawler.crawl();
@@ -17,7 +19,8 @@ async function crawlAll() {
try {
const marketAlertsFromDb = await db.MarketAlert.findAll();
const marketAlertsFromDb = await allMarketAlerts();
console.log("CRAWLER SERVICE: number of existing MarketAlerts from db: " + marketAlertsFromDb.length);
const marketAlerts = [];
const mergedResults = [].concat.apply([], results);
@@ -37,16 +40,23 @@ async function crawlAll() {
realEstateType: result.realEstateType
})
}
console.log("CRAWLER SERVICE: Number of crawler results: " + marketAlerts.length);
try {
console.log(marketAlerts);
const filteredMarketAlerts = marketAlerts.filter((elem) => !marketAlertsFromDb.find(({ url }) => elem.url === url));
const filteredMarketAlerts = marketAlerts.filter((elem) => !marketAlertsFromDb.find(({ url }) => elem.url === url));
console.log("CRAWLER SERVICE: Number of new crawler results: " + filteredMarketAlerts.length);
await db.MarketAlert.bulkCreate(filteredMarketAlerts);
process.exit()
process.exit();
} catch (e) {
console.log("Could not bulkCreate marketalers reason: ", e);
console.log("CRAWLER SERVICE: Could not bulkCreate marketalers reason: ", e);
process.exit();
}
} catch (e) {
console.log("Error crawling. Trying next crawler! ", e);
console.log("CRAWLER SERVICE: Error crawling. Trying next crawler! ", e);
process.exit();
}
})
};

View File

@@ -0,0 +1,19 @@
const Promise = require("bluebird");
const db = require("../models/index");
const { allMarketAlerts } = require('../helpers/db/dbHelper');
async function processNotifications() {
try {
const marketAlerts = await allMarketAlerts(false);
console.log(marketAlerts);
process.exit();
} catch (e) {
console.log("NOTIFICATION SERVICE: could not send notifications reason: ", e);
}
}
processNotifications();

View File

@@ -7,7 +7,8 @@
"test": "echo \"Error: no test specified\" && exit 1",
"start": "node ./index.js",
"start-mon": "nodemon ./index.js",
"scheduler": "node ./app/services/crawlerService.js",
"crawler": "node ./app/services/crawlerService.js",
"notification": "node ./app/services/notificationService.js",
"migrate": "cd app && npx sequelize db:migrate",
"setup": "docker build -t marketalerts . && docker run -e POSTGRES_USER=docker -e POSTGRES_PASSWORD=docker -e POSTGRES_DB=marketalerts --name pg_marketalerts -d -p 5432:5432 marketalerts && sleep 4 && npm run migrate",
"docker-start": "docker start pg_marketalerts",