From 208faa08df037f4d427ef158bbccd3ede2d5166a Mon Sep 17 00:00:00 2001 From: Nedim Uka Date: Tue, 25 Jun 2019 17:06:07 +0200 Subject: [PATCH] Added send notification service, and queried unsent marketalerts, fixed some issues with crawler, and added proper logging --- app/helpers/crawlers/olxClawler.js | 27 ++++----- app/helpers/db/dbHelper.js | 55 +++++++++++++++++-- app/helpers/url.js | 11 ---- ...-notification-sent-boolean-marketalerts.js | 20 +++++++ app/models/marketalert.js | 1 + app/services/crawlerService.js | 22 ++++++-- app/services/notificationService.js | 19 +++++++ package.json | 3 +- 8 files changed, 123 insertions(+), 35 deletions(-) create mode 100644 app/migrations/20190625120813-add-notification-sent-boolean-marketalerts.js create mode 100644 app/services/notificationService.js diff --git a/app/helpers/crawlers/olxClawler.js b/app/helpers/crawlers/olxClawler.js index 1f7ea1b..4c0a0a0 100644 --- a/app/helpers/crawlers/olxClawler.js +++ b/app/helpers/crawlers/olxClawler.js @@ -87,14 +87,14 @@ module.exports = class OlxCrawler { const data = { realEstateType: this.getCategoryId(realEstateType), - email : email, + email: email, olxId: olxId, // category: category, url, title, price: isNaN(parsedPrice) ? 0 : parsedPrice, size: parseFloat(size), - gardenSize: isNaN(parseFloat(gardenSize)) ? 0 : parseFloat(gardenSize), + gardenSize: isNaN(parseFloat(gardenSize)) ? 0 : parseFloat(gardenSize), address, region, municipality, @@ -151,22 +151,22 @@ module.exports = class OlxCrawler { } } - getCategoryId (category) { + getCategoryId(category) { - switch(category) { + switch (category) { case 'Stanovi': - return 'stan'; + return 'stan'; case 'Vikendice': - return 'vikendica' + return 'vikendica' case 'Kuće': return 'kuca'; default: - return ''; - } - } + return ''; + } + } async indexPages(urls, start, end, maxResults = 1000) { //TODO fix paging @@ -186,16 +186,18 @@ module.exports = class OlxCrawler { } async crawl() { + console.log("OLX CRAWLER: start crawl"); const filteredResults = []; - const realestateRequests = await allRERequest() + const realestateRequests = await allRERequest(); + console.log("OLX CRAWLER: found " + realestateRequests.length + "subscribed RealEstateRequests"); const urls = this.createRequestUrls(realestateRequests); let results = await this.indexPages(urls, this.fromPage, this.toPage, this.maxResults); for (const result of results) { for (const finalResult of result) { if (finalResult.lat !== undefined && finalResult.lat !== null && finalResult.lat !== "") { - const pointInsideBoundingBox = await findPointInsideBoundingBox([finalResult.lng, finalResult.lat]); + const pointInsideBoundingBox = await findPointInsideBoundingBox([finalResult.lng, finalResult.lat], finalResult.email); if (pointInsideBoundingBox[0].length !== 0) { filteredResults.push(finalResult); @@ -203,8 +205,7 @@ module.exports = class OlxCrawler { } } } - - console.log(filteredResults); + console.log("OLX CRAWLER: number of olx crawler results, after geo location filtering: " + filteredResults.length); return filteredResults; } diff --git a/app/helpers/db/dbHelper.js b/app/helpers/db/dbHelper.js index f51638b..f81961d 100644 --- a/app/helpers/db/dbHelper.js +++ b/app/helpers/db/dbHelper.js @@ -1,15 +1,62 @@ const db = require('../../models/index'); -// TODO Fetch only subscribed realestate requests +/** + * Find all subscribed RealEstateRequests + */ const allRERequest = async () => { - return await db.RealEstateRequest.findAll(); + return await db.RealEstateRequest.findAll({ + where: { + subscribed: true + } + }); } -const findPointInsideBoundingBox = async (latLng) => { - return await db.sequelize.query("SELECT * FROM \"RealEstateRequests\" WHERE ST_Contains(\"RealEstateRequests\".bounding_box, ST_GEOMFROMTEXT(\'POINT (" + latLng[0] + " " + latLng[1]+ ")\'))"); + +/** + * Find all unnotified marketalerts, and order them by email + * + * @param notified bolean + * + * @returns array of MarketAlerts + */ +const allMarketAlerts = async (notified) => { + + let queryObject = { + order: [ + ['email', 'DESC'], + ] + } + + if (notified){ + queryObject.where = { + notified: notified + } + } + return await db.MarketAlert.findAll(queryObject); + + // return await db.MarketAlerts.findAll({ + // where: { + // notified: notified + // }, + // order: [ + // ['email', 'DESC'], + // ] + // }); + } + +/** + * Find all unnotified marketalerts + * @param latLng array + * @param email strig + * + * @returns array of MarketAlerts + */ +const findPointInsideBoundingBox = async (latLng, email) => { + return await db.sequelize.query(`SELECT * FROM "RealEstateRequests" WHERE email = '${email}' AND subscribed = true AND ST_Contains("RealEstateRequests".bounding_box, ST_GEOMFROMTEXT('POINT (${latLng[0]} ${latLng[1]})'))`); } module.exports = { allRERequest, + allMarketAlerts, findPointInsideBoundingBox }; diff --git a/app/helpers/url.js b/app/helpers/url.js index 854d2a7..8bf1cb5 100644 --- a/app/helpers/url.js +++ b/app/helpers/url.js @@ -7,17 +7,6 @@ const currentRERequest = async (req) => { const request = await db.RealEstateRequest.findOne({ where: {uniqueId} }); return request; }; -// TODO Fetch only subscribed realestate requests -const allRERequest = async () => { - return await db.RealEstateRequest.findAll(); -} - -const findPointInsideBoundingBox = async (latLng) => { - return await db.sequelize.query("SELECT * FROM \"RealEstateRequests\" WHERE ST_Contains(\"RealEstateRequests\".bounding_box, ST_GEOMFROMTEXT(\'POINT (" + latLng[0] + " " + latLng[1]+ ")\'))"); -} - module.exports = { currentRERequest, - allRERequest, - findPointInsideBoundingBox }; diff --git a/app/migrations/20190625120813-add-notification-sent-boolean-marketalerts.js b/app/migrations/20190625120813-add-notification-sent-boolean-marketalerts.js new file mode 100644 index 0000000..f70e6f6 --- /dev/null +++ b/app/migrations/20190625120813-add-notification-sent-boolean-marketalerts.js @@ -0,0 +1,20 @@ +'use strict'; + +module.exports = { + up: (queryInterface, Sequelize) => { + return queryInterface.addColumn( + 'MarketAlerts', + 'notified', + { + type: Sequelize.BOOLEAN + } + ); + }, + + down: (queryInterface, Sequelize) => { + return queryInterface.removeColumn( + 'MarketAlerts', + 'notified' + ); + } +}; diff --git a/app/models/marketalert.js b/app/models/marketalert.js index 9f1e092..6f58560 100644 --- a/app/models/marketalert.js +++ b/app/models/marketalert.js @@ -11,6 +11,7 @@ module.exports = (sequelize, DataTypes) => { municipality : DataTypes.STRING, region : DataTypes.STRING, realEstateType : DataTypes.STRING, + notified : DataTypes.BOOLEAN, email: { type: DataTypes.STRING, diff --git a/app/services/crawlerService.js b/app/services/crawlerService.js index 5eb160b..1b75a24 100644 --- a/app/services/crawlerService.js +++ b/app/services/crawlerService.js @@ -2,6 +2,7 @@ const Promise = require("bluebird"); const OlxCrawler = require("../helpers/crawlers/olxClawler"); const db = require("../models/index"); +const { allMarketAlerts } = require('../helpers/db/dbHelper'); const olxCrawler = new OlxCrawler(1, 2, 3); @@ -10,6 +11,7 @@ const crawlers = [ ]; async function crawlAll() { + console.log("CRAWLER SERVICE: crawlAll"); Promise.map(crawlers, function (crawler) { return crawler.crawl(); @@ -17,7 +19,8 @@ async function crawlAll() { try { - const marketAlertsFromDb = await db.MarketAlert.findAll(); + const marketAlertsFromDb = await allMarketAlerts(); + console.log("CRAWLER SERVICE: number of existing MarketAlerts from db: " + marketAlertsFromDb.length); const marketAlerts = []; const mergedResults = [].concat.apply([], results); @@ -37,16 +40,23 @@ async function crawlAll() { realEstateType: result.realEstateType }) } + console.log("CRAWLER SERVICE: Number of crawler results: " + marketAlerts.length); + try { - console.log(marketAlerts); - const filteredMarketAlerts = marketAlerts.filter((elem) => !marketAlertsFromDb.find(({ url }) => elem.url === url)); + + const filteredMarketAlerts = marketAlerts.filter((elem) => !marketAlertsFromDb.find(({ url }) => elem.url === url)); + console.log("CRAWLER SERVICE: Number of new crawler results: " + filteredMarketAlerts.length); + await db.MarketAlert.bulkCreate(filteredMarketAlerts); - process.exit() + process.exit(); + } catch (e) { - console.log("Could not bulkCreate marketalers reason: ", e); + console.log("CRAWLER SERVICE: Could not bulkCreate marketalers reason: ", e); + process.exit(); } } catch (e) { - console.log("Error crawling. Trying next crawler! ", e); + console.log("CRAWLER SERVICE: Error crawling. Trying next crawler! ", e); + process.exit(); } }) }; diff --git a/app/services/notificationService.js b/app/services/notificationService.js new file mode 100644 index 0000000..2e4e6c0 --- /dev/null +++ b/app/services/notificationService.js @@ -0,0 +1,19 @@ + +const Promise = require("bluebird"); +const db = require("../models/index"); +const { allMarketAlerts } = require('../helpers/db/dbHelper'); + + +async function processNotifications() { + + try { + const marketAlerts = await allMarketAlerts(false); + console.log(marketAlerts); + process.exit(); + } catch (e) { + console.log("NOTIFICATION SERVICE: could not send notifications reason: ", e); + } +} + + +processNotifications(); \ No newline at end of file diff --git a/package.json b/package.json index 360b7bd..f7a3e66 100644 --- a/package.json +++ b/package.json @@ -7,7 +7,8 @@ "test": "echo \"Error: no test specified\" && exit 1", "start": "node ./index.js", "start-mon": "nodemon ./index.js", - "scheduler": "node ./app/services/crawlerService.js", + "crawler": "node ./app/services/crawlerService.js", + "notification": "node ./app/services/notificationService.js", "migrate": "cd app && npx sequelize db:migrate", "setup": "docker build -t marketalerts . && docker run -e POSTGRES_USER=docker -e POSTGRES_PASSWORD=docker -e POSTGRES_DB=marketalerts --name pg_marketalerts -d -p 5432:5432 marketalerts && sleep 4 && npm run migrate", "docker-start": "docker start pg_marketalerts",