Added send notification service, and queried unsent marketalerts, fixed some issues with crawler, and added proper logging
This commit is contained in:
@@ -87,14 +87,14 @@ module.exports = class OlxCrawler {
|
|||||||
|
|
||||||
const data = {
|
const data = {
|
||||||
realEstateType: this.getCategoryId(realEstateType),
|
realEstateType: this.getCategoryId(realEstateType),
|
||||||
email : email,
|
email: email,
|
||||||
olxId: olxId,
|
olxId: olxId,
|
||||||
// category: category,
|
// category: category,
|
||||||
url,
|
url,
|
||||||
title,
|
title,
|
||||||
price: isNaN(parsedPrice) ? 0 : parsedPrice,
|
price: isNaN(parsedPrice) ? 0 : parsedPrice,
|
||||||
size: parseFloat(size),
|
size: parseFloat(size),
|
||||||
gardenSize: isNaN(parseFloat(gardenSize)) ? 0 : parseFloat(gardenSize),
|
gardenSize: isNaN(parseFloat(gardenSize)) ? 0 : parseFloat(gardenSize),
|
||||||
address,
|
address,
|
||||||
region,
|
region,
|
||||||
municipality,
|
municipality,
|
||||||
@@ -151,22 +151,22 @@ module.exports = class OlxCrawler {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
getCategoryId (category) {
|
getCategoryId(category) {
|
||||||
|
|
||||||
switch(category) {
|
switch (category) {
|
||||||
case 'Stanovi':
|
case 'Stanovi':
|
||||||
return 'stan';
|
return 'stan';
|
||||||
|
|
||||||
case 'Vikendice':
|
case 'Vikendice':
|
||||||
return 'vikendica'
|
return 'vikendica'
|
||||||
|
|
||||||
case 'Kuće':
|
case 'Kuće':
|
||||||
return 'kuca';
|
return 'kuca';
|
||||||
|
|
||||||
default:
|
default:
|
||||||
return '';
|
return '';
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
async indexPages(urls, start, end, maxResults = 1000) {
|
async indexPages(urls, start, end, maxResults = 1000) {
|
||||||
//TODO fix paging
|
//TODO fix paging
|
||||||
@@ -186,16 +186,18 @@ module.exports = class OlxCrawler {
|
|||||||
}
|
}
|
||||||
|
|
||||||
async crawl() {
|
async crawl() {
|
||||||
|
console.log("OLX CRAWLER: start crawl");
|
||||||
|
|
||||||
const filteredResults = [];
|
const filteredResults = [];
|
||||||
const realestateRequests = await allRERequest()
|
const realestateRequests = await allRERequest();
|
||||||
|
console.log("OLX CRAWLER: found " + realestateRequests.length + "subscribed RealEstateRequests");
|
||||||
const urls = this.createRequestUrls(realestateRequests);
|
const urls = this.createRequestUrls(realestateRequests);
|
||||||
let results = await this.indexPages(urls, this.fromPage, this.toPage, this.maxResults);
|
let results = await this.indexPages(urls, this.fromPage, this.toPage, this.maxResults);
|
||||||
|
|
||||||
for (const result of results) {
|
for (const result of results) {
|
||||||
for (const finalResult of result) {
|
for (const finalResult of result) {
|
||||||
if (finalResult.lat !== undefined && finalResult.lat !== null && finalResult.lat !== "") {
|
if (finalResult.lat !== undefined && finalResult.lat !== null && finalResult.lat !== "") {
|
||||||
const pointInsideBoundingBox = await findPointInsideBoundingBox([finalResult.lng, finalResult.lat]);
|
const pointInsideBoundingBox = await findPointInsideBoundingBox([finalResult.lng, finalResult.lat], finalResult.email);
|
||||||
|
|
||||||
if (pointInsideBoundingBox[0].length !== 0) {
|
if (pointInsideBoundingBox[0].length !== 0) {
|
||||||
filteredResults.push(finalResult);
|
filteredResults.push(finalResult);
|
||||||
@@ -203,8 +205,7 @@ module.exports = class OlxCrawler {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
console.log("OLX CRAWLER: number of olx crawler results, after geo location filtering: " + filteredResults.length);
|
||||||
console.log(filteredResults);
|
|
||||||
return filteredResults;
|
return filteredResults;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -1,15 +1,62 @@
|
|||||||
const db = require('../../models/index');
|
const db = require('../../models/index');
|
||||||
|
|
||||||
// TODO Fetch only subscribed realestate requests
|
/**
|
||||||
|
* Find all subscribed RealEstateRequests
|
||||||
|
*/
|
||||||
const allRERequest = async () => {
|
const allRERequest = async () => {
|
||||||
return await db.RealEstateRequest.findAll();
|
return await db.RealEstateRequest.findAll({
|
||||||
|
where: {
|
||||||
|
subscribed: true
|
||||||
|
}
|
||||||
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
const findPointInsideBoundingBox = async (latLng) => {
|
|
||||||
return await db.sequelize.query("SELECT * FROM \"RealEstateRequests\" WHERE ST_Contains(\"RealEstateRequests\".bounding_box, ST_GEOMFROMTEXT(\'POINT (" + latLng[0] + " " + latLng[1]+ ")\'))");
|
/**
|
||||||
|
* Find all unnotified marketalerts, and order them by email
|
||||||
|
*
|
||||||
|
* @param notified bolean
|
||||||
|
*
|
||||||
|
* @returns array of MarketAlerts
|
||||||
|
*/
|
||||||
|
const allMarketAlerts = async (notified) => {
|
||||||
|
|
||||||
|
let queryObject = {
|
||||||
|
order: [
|
||||||
|
['email', 'DESC'],
|
||||||
|
]
|
||||||
|
}
|
||||||
|
|
||||||
|
if (notified){
|
||||||
|
queryObject.where = {
|
||||||
|
notified: notified
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return await db.MarketAlert.findAll(queryObject);
|
||||||
|
|
||||||
|
// return await db.MarketAlerts.findAll({
|
||||||
|
// where: {
|
||||||
|
// notified: notified
|
||||||
|
// },
|
||||||
|
// order: [
|
||||||
|
// ['email', 'DESC'],
|
||||||
|
// ]
|
||||||
|
// });
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Find all unnotified marketalerts
|
||||||
|
* @param latLng array
|
||||||
|
* @param email strig
|
||||||
|
*
|
||||||
|
* @returns array of MarketAlerts
|
||||||
|
*/
|
||||||
|
const findPointInsideBoundingBox = async (latLng, email) => {
|
||||||
|
return await db.sequelize.query(`SELECT * FROM "RealEstateRequests" WHERE email = '${email}' AND subscribed = true AND ST_Contains("RealEstateRequests".bounding_box, ST_GEOMFROMTEXT('POINT (${latLng[0]} ${latLng[1]})'))`);
|
||||||
}
|
}
|
||||||
|
|
||||||
module.exports = {
|
module.exports = {
|
||||||
allRERequest,
|
allRERequest,
|
||||||
|
allMarketAlerts,
|
||||||
findPointInsideBoundingBox
|
findPointInsideBoundingBox
|
||||||
};
|
};
|
||||||
|
|||||||
@@ -7,17 +7,6 @@ const currentRERequest = async (req) => {
|
|||||||
const request = await db.RealEstateRequest.findOne({ where: {uniqueId} });
|
const request = await db.RealEstateRequest.findOne({ where: {uniqueId} });
|
||||||
return request;
|
return request;
|
||||||
};
|
};
|
||||||
// TODO Fetch only subscribed realestate requests
|
|
||||||
const allRERequest = async () => {
|
|
||||||
return await db.RealEstateRequest.findAll();
|
|
||||||
}
|
|
||||||
|
|
||||||
const findPointInsideBoundingBox = async (latLng) => {
|
|
||||||
return await db.sequelize.query("SELECT * FROM \"RealEstateRequests\" WHERE ST_Contains(\"RealEstateRequests\".bounding_box, ST_GEOMFROMTEXT(\'POINT (" + latLng[0] + " " + latLng[1]+ ")\'))");
|
|
||||||
}
|
|
||||||
|
|
||||||
module.exports = {
|
module.exports = {
|
||||||
currentRERequest,
|
currentRERequest,
|
||||||
allRERequest,
|
|
||||||
findPointInsideBoundingBox
|
|
||||||
};
|
};
|
||||||
|
|||||||
@@ -0,0 +1,20 @@
|
|||||||
|
'use strict';
|
||||||
|
|
||||||
|
module.exports = {
|
||||||
|
up: (queryInterface, Sequelize) => {
|
||||||
|
return queryInterface.addColumn(
|
||||||
|
'MarketAlerts',
|
||||||
|
'notified',
|
||||||
|
{
|
||||||
|
type: Sequelize.BOOLEAN
|
||||||
|
}
|
||||||
|
);
|
||||||
|
},
|
||||||
|
|
||||||
|
down: (queryInterface, Sequelize) => {
|
||||||
|
return queryInterface.removeColumn(
|
||||||
|
'MarketAlerts',
|
||||||
|
'notified'
|
||||||
|
);
|
||||||
|
}
|
||||||
|
};
|
||||||
@@ -11,6 +11,7 @@ module.exports = (sequelize, DataTypes) => {
|
|||||||
municipality : DataTypes.STRING,
|
municipality : DataTypes.STRING,
|
||||||
region : DataTypes.STRING,
|
region : DataTypes.STRING,
|
||||||
realEstateType : DataTypes.STRING,
|
realEstateType : DataTypes.STRING,
|
||||||
|
notified : DataTypes.BOOLEAN,
|
||||||
|
|
||||||
email: {
|
email: {
|
||||||
type: DataTypes.STRING,
|
type: DataTypes.STRING,
|
||||||
|
|||||||
@@ -2,6 +2,7 @@
|
|||||||
const Promise = require("bluebird");
|
const Promise = require("bluebird");
|
||||||
const OlxCrawler = require("../helpers/crawlers/olxClawler");
|
const OlxCrawler = require("../helpers/crawlers/olxClawler");
|
||||||
const db = require("../models/index");
|
const db = require("../models/index");
|
||||||
|
const { allMarketAlerts } = require('../helpers/db/dbHelper');
|
||||||
|
|
||||||
const olxCrawler = new OlxCrawler(1, 2, 3);
|
const olxCrawler = new OlxCrawler(1, 2, 3);
|
||||||
|
|
||||||
@@ -10,6 +11,7 @@ const crawlers = [
|
|||||||
];
|
];
|
||||||
|
|
||||||
async function crawlAll() {
|
async function crawlAll() {
|
||||||
|
console.log("CRAWLER SERVICE: crawlAll");
|
||||||
|
|
||||||
Promise.map(crawlers, function (crawler) {
|
Promise.map(crawlers, function (crawler) {
|
||||||
return crawler.crawl();
|
return crawler.crawl();
|
||||||
@@ -17,7 +19,8 @@ async function crawlAll() {
|
|||||||
|
|
||||||
try {
|
try {
|
||||||
|
|
||||||
const marketAlertsFromDb = await db.MarketAlert.findAll();
|
const marketAlertsFromDb = await allMarketAlerts();
|
||||||
|
console.log("CRAWLER SERVICE: number of existing MarketAlerts from db: " + marketAlertsFromDb.length);
|
||||||
|
|
||||||
const marketAlerts = [];
|
const marketAlerts = [];
|
||||||
const mergedResults = [].concat.apply([], results);
|
const mergedResults = [].concat.apply([], results);
|
||||||
@@ -37,16 +40,23 @@ async function crawlAll() {
|
|||||||
realEstateType: result.realEstateType
|
realEstateType: result.realEstateType
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
console.log("CRAWLER SERVICE: Number of crawler results: " + marketAlerts.length);
|
||||||
|
|
||||||
try {
|
try {
|
||||||
console.log(marketAlerts);
|
|
||||||
const filteredMarketAlerts = marketAlerts.filter((elem) => !marketAlertsFromDb.find(({ url }) => elem.url === url));
|
const filteredMarketAlerts = marketAlerts.filter((elem) => !marketAlertsFromDb.find(({ url }) => elem.url === url));
|
||||||
|
console.log("CRAWLER SERVICE: Number of new crawler results: " + filteredMarketAlerts.length);
|
||||||
|
|
||||||
await db.MarketAlert.bulkCreate(filteredMarketAlerts);
|
await db.MarketAlert.bulkCreate(filteredMarketAlerts);
|
||||||
process.exit()
|
process.exit();
|
||||||
|
|
||||||
} catch (e) {
|
} catch (e) {
|
||||||
console.log("Could not bulkCreate marketalers reason: ", e);
|
console.log("CRAWLER SERVICE: Could not bulkCreate marketalers reason: ", e);
|
||||||
|
process.exit();
|
||||||
}
|
}
|
||||||
} catch (e) {
|
} catch (e) {
|
||||||
console.log("Error crawling. Trying next crawler! ", e);
|
console.log("CRAWLER SERVICE: Error crawling. Trying next crawler! ", e);
|
||||||
|
process.exit();
|
||||||
}
|
}
|
||||||
})
|
})
|
||||||
};
|
};
|
||||||
|
|||||||
19
app/services/notificationService.js
Normal file
19
app/services/notificationService.js
Normal file
@@ -0,0 +1,19 @@
|
|||||||
|
|
||||||
|
const Promise = require("bluebird");
|
||||||
|
const db = require("../models/index");
|
||||||
|
const { allMarketAlerts } = require('../helpers/db/dbHelper');
|
||||||
|
|
||||||
|
|
||||||
|
async function processNotifications() {
|
||||||
|
|
||||||
|
try {
|
||||||
|
const marketAlerts = await allMarketAlerts(false);
|
||||||
|
console.log(marketAlerts);
|
||||||
|
process.exit();
|
||||||
|
} catch (e) {
|
||||||
|
console.log("NOTIFICATION SERVICE: could not send notifications reason: ", e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
processNotifications();
|
||||||
@@ -7,7 +7,8 @@
|
|||||||
"test": "echo \"Error: no test specified\" && exit 1",
|
"test": "echo \"Error: no test specified\" && exit 1",
|
||||||
"start": "node ./index.js",
|
"start": "node ./index.js",
|
||||||
"start-mon": "nodemon ./index.js",
|
"start-mon": "nodemon ./index.js",
|
||||||
"scheduler": "node ./app/services/crawlerService.js",
|
"crawler": "node ./app/services/crawlerService.js",
|
||||||
|
"notification": "node ./app/services/notificationService.js",
|
||||||
"migrate": "cd app && npx sequelize db:migrate",
|
"migrate": "cd app && npx sequelize db:migrate",
|
||||||
"setup": "docker build -t marketalerts . && docker run -e POSTGRES_USER=docker -e POSTGRES_PASSWORD=docker -e POSTGRES_DB=marketalerts --name pg_marketalerts -d -p 5432:5432 marketalerts && sleep 4 && npm run migrate",
|
"setup": "docker build -t marketalerts . && docker run -e POSTGRES_USER=docker -e POSTGRES_PASSWORD=docker -e POSTGRES_DB=marketalerts --name pg_marketalerts -d -p 5432:5432 marketalerts && sleep 4 && npm run migrate",
|
||||||
"docker-start": "docker start pg_marketalerts",
|
"docker-start": "docker start pg_marketalerts",
|
||||||
|
|||||||
Reference in New Issue
Block a user