Added send notification service, and queried unsent marketalerts, fixed some issues with crawler, and added proper logging
This commit is contained in:
@@ -87,14 +87,14 @@ module.exports = class OlxCrawler {
|
||||
|
||||
const data = {
|
||||
realEstateType: this.getCategoryId(realEstateType),
|
||||
email : email,
|
||||
email: email,
|
||||
olxId: olxId,
|
||||
// category: category,
|
||||
url,
|
||||
title,
|
||||
price: isNaN(parsedPrice) ? 0 : parsedPrice,
|
||||
size: parseFloat(size),
|
||||
gardenSize: isNaN(parseFloat(gardenSize)) ? 0 : parseFloat(gardenSize),
|
||||
gardenSize: isNaN(parseFloat(gardenSize)) ? 0 : parseFloat(gardenSize),
|
||||
address,
|
||||
region,
|
||||
municipality,
|
||||
@@ -151,22 +151,22 @@ module.exports = class OlxCrawler {
|
||||
}
|
||||
}
|
||||
|
||||
getCategoryId (category) {
|
||||
getCategoryId(category) {
|
||||
|
||||
switch(category) {
|
||||
switch (category) {
|
||||
case 'Stanovi':
|
||||
return 'stan';
|
||||
return 'stan';
|
||||
|
||||
case 'Vikendice':
|
||||
return 'vikendica'
|
||||
return 'vikendica'
|
||||
|
||||
case 'Kuće':
|
||||
return 'kuca';
|
||||
|
||||
default:
|
||||
return '';
|
||||
}
|
||||
}
|
||||
return '';
|
||||
}
|
||||
}
|
||||
|
||||
async indexPages(urls, start, end, maxResults = 1000) {
|
||||
//TODO fix paging
|
||||
@@ -186,16 +186,18 @@ module.exports = class OlxCrawler {
|
||||
}
|
||||
|
||||
async crawl() {
|
||||
console.log("OLX CRAWLER: start crawl");
|
||||
|
||||
const filteredResults = [];
|
||||
const realestateRequests = await allRERequest()
|
||||
const realestateRequests = await allRERequest();
|
||||
console.log("OLX CRAWLER: found " + realestateRequests.length + "subscribed RealEstateRequests");
|
||||
const urls = this.createRequestUrls(realestateRequests);
|
||||
let results = await this.indexPages(urls, this.fromPage, this.toPage, this.maxResults);
|
||||
|
||||
for (const result of results) {
|
||||
for (const finalResult of result) {
|
||||
if (finalResult.lat !== undefined && finalResult.lat !== null && finalResult.lat !== "") {
|
||||
const pointInsideBoundingBox = await findPointInsideBoundingBox([finalResult.lng, finalResult.lat]);
|
||||
const pointInsideBoundingBox = await findPointInsideBoundingBox([finalResult.lng, finalResult.lat], finalResult.email);
|
||||
|
||||
if (pointInsideBoundingBox[0].length !== 0) {
|
||||
filteredResults.push(finalResult);
|
||||
@@ -203,8 +205,7 @@ module.exports = class OlxCrawler {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
console.log(filteredResults);
|
||||
console.log("OLX CRAWLER: number of olx crawler results, after geo location filtering: " + filteredResults.length);
|
||||
return filteredResults;
|
||||
}
|
||||
|
||||
|
||||
@@ -1,15 +1,62 @@
|
||||
const db = require('../../models/index');
|
||||
|
||||
// TODO Fetch only subscribed realestate requests
|
||||
/**
|
||||
* Find all subscribed RealEstateRequests
|
||||
*/
|
||||
const allRERequest = async () => {
|
||||
return await db.RealEstateRequest.findAll();
|
||||
return await db.RealEstateRequest.findAll({
|
||||
where: {
|
||||
subscribed: true
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
const findPointInsideBoundingBox = async (latLng) => {
|
||||
return await db.sequelize.query("SELECT * FROM \"RealEstateRequests\" WHERE ST_Contains(\"RealEstateRequests\".bounding_box, ST_GEOMFROMTEXT(\'POINT (" + latLng[0] + " " + latLng[1]+ ")\'))");
|
||||
|
||||
/**
|
||||
* Find all unnotified marketalerts, and order them by email
|
||||
*
|
||||
* @param notified bolean
|
||||
*
|
||||
* @returns array of MarketAlerts
|
||||
*/
|
||||
const allMarketAlerts = async (notified) => {
|
||||
|
||||
let queryObject = {
|
||||
order: [
|
||||
['email', 'DESC'],
|
||||
]
|
||||
}
|
||||
|
||||
if (notified){
|
||||
queryObject.where = {
|
||||
notified: notified
|
||||
}
|
||||
}
|
||||
return await db.MarketAlert.findAll(queryObject);
|
||||
|
||||
// return await db.MarketAlerts.findAll({
|
||||
// where: {
|
||||
// notified: notified
|
||||
// },
|
||||
// order: [
|
||||
// ['email', 'DESC'],
|
||||
// ]
|
||||
// });
|
||||
}
|
||||
|
||||
/**
|
||||
* Find all unnotified marketalerts
|
||||
* @param latLng array
|
||||
* @param email strig
|
||||
*
|
||||
* @returns array of MarketAlerts
|
||||
*/
|
||||
const findPointInsideBoundingBox = async (latLng, email) => {
|
||||
return await db.sequelize.query(`SELECT * FROM "RealEstateRequests" WHERE email = '${email}' AND subscribed = true AND ST_Contains("RealEstateRequests".bounding_box, ST_GEOMFROMTEXT('POINT (${latLng[0]} ${latLng[1]})'))`);
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
allRERequest,
|
||||
allMarketAlerts,
|
||||
findPointInsideBoundingBox
|
||||
};
|
||||
|
||||
@@ -7,17 +7,6 @@ const currentRERequest = async (req) => {
|
||||
const request = await db.RealEstateRequest.findOne({ where: {uniqueId} });
|
||||
return request;
|
||||
};
|
||||
// TODO Fetch only subscribed realestate requests
|
||||
const allRERequest = async () => {
|
||||
return await db.RealEstateRequest.findAll();
|
||||
}
|
||||
|
||||
const findPointInsideBoundingBox = async (latLng) => {
|
||||
return await db.sequelize.query("SELECT * FROM \"RealEstateRequests\" WHERE ST_Contains(\"RealEstateRequests\".bounding_box, ST_GEOMFROMTEXT(\'POINT (" + latLng[0] + " " + latLng[1]+ ")\'))");
|
||||
}
|
||||
|
||||
module.exports = {
|
||||
currentRERequest,
|
||||
allRERequest,
|
||||
findPointInsideBoundingBox
|
||||
};
|
||||
|
||||
@@ -0,0 +1,20 @@
|
||||
'use strict';
|
||||
|
||||
module.exports = {
|
||||
up: (queryInterface, Sequelize) => {
|
||||
return queryInterface.addColumn(
|
||||
'MarketAlerts',
|
||||
'notified',
|
||||
{
|
||||
type: Sequelize.BOOLEAN
|
||||
}
|
||||
);
|
||||
},
|
||||
|
||||
down: (queryInterface, Sequelize) => {
|
||||
return queryInterface.removeColumn(
|
||||
'MarketAlerts',
|
||||
'notified'
|
||||
);
|
||||
}
|
||||
};
|
||||
@@ -11,6 +11,7 @@ module.exports = (sequelize, DataTypes) => {
|
||||
municipality : DataTypes.STRING,
|
||||
region : DataTypes.STRING,
|
||||
realEstateType : DataTypes.STRING,
|
||||
notified : DataTypes.BOOLEAN,
|
||||
|
||||
email: {
|
||||
type: DataTypes.STRING,
|
||||
|
||||
@@ -2,6 +2,7 @@
|
||||
const Promise = require("bluebird");
|
||||
const OlxCrawler = require("../helpers/crawlers/olxClawler");
|
||||
const db = require("../models/index");
|
||||
const { allMarketAlerts } = require('../helpers/db/dbHelper');
|
||||
|
||||
const olxCrawler = new OlxCrawler(1, 2, 3);
|
||||
|
||||
@@ -10,6 +11,7 @@ const crawlers = [
|
||||
];
|
||||
|
||||
async function crawlAll() {
|
||||
console.log("CRAWLER SERVICE: crawlAll");
|
||||
|
||||
Promise.map(crawlers, function (crawler) {
|
||||
return crawler.crawl();
|
||||
@@ -17,7 +19,8 @@ async function crawlAll() {
|
||||
|
||||
try {
|
||||
|
||||
const marketAlertsFromDb = await db.MarketAlert.findAll();
|
||||
const marketAlertsFromDb = await allMarketAlerts();
|
||||
console.log("CRAWLER SERVICE: number of existing MarketAlerts from db: " + marketAlertsFromDb.length);
|
||||
|
||||
const marketAlerts = [];
|
||||
const mergedResults = [].concat.apply([], results);
|
||||
@@ -37,16 +40,23 @@ async function crawlAll() {
|
||||
realEstateType: result.realEstateType
|
||||
})
|
||||
}
|
||||
console.log("CRAWLER SERVICE: Number of crawler results: " + marketAlerts.length);
|
||||
|
||||
try {
|
||||
console.log(marketAlerts);
|
||||
const filteredMarketAlerts = marketAlerts.filter((elem) => !marketAlertsFromDb.find(({ url }) => elem.url === url));
|
||||
|
||||
const filteredMarketAlerts = marketAlerts.filter((elem) => !marketAlertsFromDb.find(({ url }) => elem.url === url));
|
||||
console.log("CRAWLER SERVICE: Number of new crawler results: " + filteredMarketAlerts.length);
|
||||
|
||||
await db.MarketAlert.bulkCreate(filteredMarketAlerts);
|
||||
process.exit()
|
||||
process.exit();
|
||||
|
||||
} catch (e) {
|
||||
console.log("Could not bulkCreate marketalers reason: ", e);
|
||||
console.log("CRAWLER SERVICE: Could not bulkCreate marketalers reason: ", e);
|
||||
process.exit();
|
||||
}
|
||||
} catch (e) {
|
||||
console.log("Error crawling. Trying next crawler! ", e);
|
||||
console.log("CRAWLER SERVICE: Error crawling. Trying next crawler! ", e);
|
||||
process.exit();
|
||||
}
|
||||
})
|
||||
};
|
||||
|
||||
19
app/services/notificationService.js
Normal file
19
app/services/notificationService.js
Normal file
@@ -0,0 +1,19 @@
|
||||
|
||||
const Promise = require("bluebird");
|
||||
const db = require("../models/index");
|
||||
const { allMarketAlerts } = require('../helpers/db/dbHelper');
|
||||
|
||||
|
||||
async function processNotifications() {
|
||||
|
||||
try {
|
||||
const marketAlerts = await allMarketAlerts(false);
|
||||
console.log(marketAlerts);
|
||||
process.exit();
|
||||
} catch (e) {
|
||||
console.log("NOTIFICATION SERVICE: could not send notifications reason: ", e);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
processNotifications();
|
||||
@@ -7,7 +7,8 @@
|
||||
"test": "echo \"Error: no test specified\" && exit 1",
|
||||
"start": "node ./index.js",
|
||||
"start-mon": "nodemon ./index.js",
|
||||
"scheduler": "node ./app/services/crawlerService.js",
|
||||
"crawler": "node ./app/services/crawlerService.js",
|
||||
"notification": "node ./app/services/notificationService.js",
|
||||
"migrate": "cd app && npx sequelize db:migrate",
|
||||
"setup": "docker build -t marketalerts . && docker run -e POSTGRES_USER=docker -e POSTGRES_PASSWORD=docker -e POSTGRES_DB=marketalerts --name pg_marketalerts -d -p 5432:5432 marketalerts && sleep 4 && npm run migrate",
|
||||
"docker-start": "docker start pg_marketalerts",
|
||||
|
||||
Reference in New Issue
Block a user