From 5bdc8e149a4b6b0f7807d0865e51da674e7fe912 Mon Sep 17 00:00:00 2001 From: Naida Vatric Date: Fri, 14 Feb 2020 22:41:51 +0100 Subject: [PATCH 1/9] Prostor VIP ads fixed. --- app/crawler/specificCrawlers/prostor.js | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/app/crawler/specificCrawlers/prostor.js b/app/crawler/specificCrawlers/prostor.js index 04be5f3..104d5d0 100644 --- a/app/crawler/specificCrawlers/prostor.js +++ b/app/crawler/specificCrawlers/prostor.js @@ -191,13 +191,7 @@ class ProstorCrawler { const { lat, lng, property_name, price, size, link, status } = realEstate; //Status information is given already in realestate list - //For VIP Ads status ='' canot be used, but no VIP ads are crawled - //We will make "fake" vip ad for RE that have size=55 - //It is weird because yesterday it said 'VIP ponuda' ??? - const adStatus = - size === "55" - ? ProstorCrawler.getStatusId("VIP ponuda") - : ProstorCrawler.getStatusId(status); + const adStatus = ProstorCrawler.getStatusId(status); const url = `https://prostor.ba${link}`; From 81fa3f046dbb46bfe589184b44e9ca0717d81879 Mon Sep 17 00:00:00 2001 From: Naida Vatric Date: Sat, 15 Feb 2020 00:52:06 +0100 Subject: [PATCH 2/9] Default true for include incomplete ads. --- app/helpers/db/realEstate.js | 6 ++++- app/helpers/db/searchRequest.js | 39 +++++++++++++++++++++++++-------- app/views/advancedFilters.ejs | 3 +-- 3 files changed, 36 insertions(+), 12 deletions(-) diff --git a/app/helpers/db/realEstate.js b/app/helpers/db/realEstate.js index 0f77260..8915695 100644 --- a/app/helpers/db/realEstate.js +++ b/app/helpers/db/realEstate.js @@ -332,10 +332,14 @@ const findRealEstatesForSearchRequest = async (searchRequest, maxResults) => { }; } + //When includeIncompleteAds are not defined - null it will consider it true const order = [["updatedAt", "desc"]]; return db.RealEstate.findAll({ - where: includeIncompleteAds ? queryIncludeIncomplete : query, + where: + includeIncompleteAds || includeIncompleteAds == null + ? queryIncludeIncomplete + : query, limit: maxResults, order }); diff --git a/app/helpers/db/searchRequest.js b/app/helpers/db/searchRequest.js index e2633b4..b6b852e 100644 --- a/app/helpers/db/searchRequest.js +++ b/app/helpers/db/searchRequest.js @@ -157,7 +157,7 @@ const findSearchRequestsForRealEstate = async realEstate => { } else { // If real estate dont have defined number of rooms ex. null //It returns requests that didn't choose number of rooms - also null - //Or ones that picked some values but also picked to includeIncomplete ads + //Or ones that picked some values but also picked to includeIncomplete ads (or default) numberOfRoomsQuery = { [Op.or]: [ { @@ -176,7 +176,10 @@ const findSearchRequestsForRealEstate = async realEstate => { }, { includeIncompleteAds: { - [Op.eq]: true + [Op.or]: { + [Op.eq]: true, + [Op.is]: null + } } } ] @@ -226,7 +229,10 @@ const findSearchRequestsForRealEstate = async realEstate => { }, { includeIncompleteAds: { - [Op.eq]: true + [Op.or]: { + [Op.eq]: true, + [Op.is]: null + } } } ] @@ -275,7 +281,10 @@ const findSearchRequestsForRealEstate = async realEstate => { }, { includeIncompleteAds: { - [Op.eq]: true + [Op.or]: { + [Op.eq]: true, + [Op.is]: null + } } } ] @@ -313,7 +322,10 @@ const findSearchRequestsForRealEstate = async realEstate => { }, { includeIncompleteAds: { - [Op.eq]: true + [Op.or]: { + [Op.eq]: true, + [Op.is]: null + } } } ] @@ -347,7 +359,10 @@ const findSearchRequestsForRealEstate = async realEstate => { }, { includeIncompleteAds: { - [Op.eq]: true + [Op.or]: { + [Op.eq]: true, + [Op.is]: null + } } } ] @@ -381,7 +396,10 @@ const findSearchRequestsForRealEstate = async realEstate => { }, { includeIncompleteAds: { - [Op.eq]: true + [Op.or]: { + [Op.eq]: true, + [Op.is]: null + } } } ] @@ -423,10 +441,13 @@ const findSearchRequestsForRealEstate = async realEstate => { [Op.eq]: "ANY" }; } - //Tag to check if incomplete ads are accepted in query + //Tag to check if incomplete ads are accepted in query which is default if (checkForIncompleteWanted) { query.includeIncompleteAds = { - [Op.eq]: true + [Op.or]: { + [Op.eq]: true, + [Op.is]: null + } }; } diff --git a/app/views/advancedFilters.ejs b/app/views/advancedFilters.ejs index 23e50f7..03b387f 100644 --- a/app/views/advancedFilters.ejs +++ b/app/views/advancedFilters.ejs @@ -61,9 +61,8 @@

From cff7cc2e9c13ec0057fccc4f03d3d46cbb154b60 Mon Sep 17 00:00:00 2001 From: Bilal Catic Date: Thu, 20 Feb 2020 19:46:39 +0100 Subject: [PATCH 3/9] apply prettier --- app/crawler/specificCrawlers/rental.js | 8 ++++++-- app/models/searchRequest.js | 10 +--------- app/services/notificationService.js | 4 ++-- 3 files changed, 9 insertions(+), 13 deletions(-) diff --git a/app/crawler/specificCrawlers/rental.js b/app/crawler/specificCrawlers/rental.js index 39eb1c5..ae0fcd7 100644 --- a/app/crawler/specificCrawlers/rental.js +++ b/app/crawler/specificCrawlers/rental.js @@ -399,7 +399,9 @@ class RentalCrawler { ); if (!publishedDateMoment.isValid()) { throw { - message: `Invalid published date : ${extractedData["re_realEstates_inserted"]}` + message: `Invalid published date : ${ + extractedData["re_realEstates_inserted"] + }` }; } @@ -410,7 +412,9 @@ class RentalCrawler { ); if (!renewedDateMoment.isValid()) { throw { - message: `Invalid renewed date : ${extractedData["re_realEstates_edited"]}` + message: `Invalid renewed date : ${ + extractedData["re_realEstates_edited"] + }` }; } diff --git a/app/models/searchRequest.js b/app/models/searchRequest.js index 0d4997f..8ef9b90 100644 --- a/app/models/searchRequest.js +++ b/app/models/searchRequest.js @@ -15,15 +15,7 @@ module.exports = (sequelize, DataTypes) => { allowNull: false, defaultValue: { type: "Polygon", - coordinates: [ - [ - [0, 0], - [0, 0], - [0, 0], - [0, 0], - [0, 0] - ] - ], + coordinates: [[[0, 0], [0, 0], [0, 0], [0, 0], [0, 0]]], crs: { type: "name", properties: { name: "EPSG:4326" } } } }, diff --git a/app/services/notificationService.js b/app/services/notificationService.js index a005f84..bb649ea 100644 --- a/app/services/notificationService.js +++ b/app/services/notificationService.js @@ -131,7 +131,7 @@ const notifyRequestsWithDailyOption = async () => { }; const checkUpNotify = async () => { - /* const searchRequestsForCheckUp = await findAllRequestsForCheckUp(); + /* const searchRequestsForCheckUp = await findAllRequestsForCheckUp(); const asyncSendEmailActions = []; @@ -144,7 +144,7 @@ const checkUpNotify = async () => { asyncSendEmailActions.push(sendEmailPromise); sendEmailPromise.catch(err => console.log("[Email Sending Failed]", err)); } - await Promise.all(asyncSendEmailActions); */ + await Promise.all(asyncSendEmailActions); */ }; module.exports = { From f62a7200c73097ffe3731a4f2520595f796ff0a2 Mon Sep 17 00:00:00 2001 From: Bilal Catic Date: Thu, 20 Feb 2020 19:47:30 +0100 Subject: [PATCH 4/9] create fetch wrapper with mandatory user agent header --- app/helpers/fetchWrapper.js | 13 +++++++++++++ 1 file changed, 13 insertions(+) create mode 100644 app/helpers/fetchWrapper.js diff --git a/app/helpers/fetchWrapper.js b/app/helpers/fetchWrapper.js new file mode 100644 index 0000000..5a660e2 --- /dev/null +++ b/app/helpers/fetchWrapper.js @@ -0,0 +1,13 @@ +const nodeFetch = require("node-fetch"); + +const fetch = async (url, options = {}) => { + const newOptions = Object.assign({}, options); + if (!newOptions["headers"]) { + newOptions["headers"] = {}; + } + newOptions["headers"]["User-Agent"] = + "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.106 Safari/537.36"; + return nodeFetch(url, newOptions); +}; + +module.exports = fetch; From 4f230020d746d5ed659e945eded3c80b3fdec6e3 Mon Sep 17 00:00:00 2001 From: Bilal Catic Date: Thu, 20 Feb 2020 19:49:29 +0100 Subject: [PATCH 5/9] use fetch wrapper instead of node-fetch --- app/crawler/specificCrawlers/aktido.js | 2 +- app/crawler/specificCrawlers/olx.js | 2 +- app/crawler/specificCrawlers/prostor.js | 2 +- app/crawler/specificCrawlers/rental.js | 2 +- app/crawler/specificCrawlers/saljic.js | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/app/crawler/specificCrawlers/aktido.js b/app/crawler/specificCrawlers/aktido.js index 74bcba7..9755eb7 100644 --- a/app/crawler/specificCrawlers/aktido.js +++ b/app/crawler/specificCrawlers/aktido.js @@ -1,6 +1,6 @@ "use strict"; -const fetch = require("node-fetch"); +const fetch = require("../../helpers/fetchWrapper"); const cheerio = require("cheerio"); const Promise = require("bluebird"); const moment = require("moment-timezone"); diff --git a/app/crawler/specificCrawlers/olx.js b/app/crawler/specificCrawlers/olx.js index ede9734..1e3d63a 100644 --- a/app/crawler/specificCrawlers/olx.js +++ b/app/crawler/specificCrawlers/olx.js @@ -1,6 +1,6 @@ "use strict"; -const fetch = require("node-fetch"); +const fetch = require("../../helpers/fetchWrapper"); const cheerio = require("cheerio"); const Promise = require("bluebird"); const moment = require("moment-timezone"); diff --git a/app/crawler/specificCrawlers/prostor.js b/app/crawler/specificCrawlers/prostor.js index 104d5d0..5690a4b 100644 --- a/app/crawler/specificCrawlers/prostor.js +++ b/app/crawler/specificCrawlers/prostor.js @@ -1,6 +1,6 @@ "use strict"; -const fetch = require("node-fetch"); +const fetch = require("../../helpers/fetchWrapper"); const cheerio = require("cheerio"); const moment = require("moment-timezone"); const FormData = require("form-data"); diff --git a/app/crawler/specificCrawlers/rental.js b/app/crawler/specificCrawlers/rental.js index ae0fcd7..ed37e83 100644 --- a/app/crawler/specificCrawlers/rental.js +++ b/app/crawler/specificCrawlers/rental.js @@ -1,6 +1,6 @@ "use strict"; -const fetch = require("node-fetch"); +const fetch = require("../../helpers/fetchWrapper"); const cheerio = require("cheerio"); const Promise = require("bluebird"); const moment = require("moment-timezone"); diff --git a/app/crawler/specificCrawlers/saljic.js b/app/crawler/specificCrawlers/saljic.js index 2eef7cc..9914f35 100644 --- a/app/crawler/specificCrawlers/saljic.js +++ b/app/crawler/specificCrawlers/saljic.js @@ -1,6 +1,6 @@ "use strict"; -const fetch = require("node-fetch"); +const fetch = require("../../helpers/fetchWrapper"); const cheerio = require("cheerio"); const moment = require("moment-timezone"); From 6791a509d042b1de4d0269eb75043e3afdeb08ba Mon Sep 17 00:00:00 2001 From: Bilal Catic Date: Thu, 20 Feb 2020 21:07:16 +0100 Subject: [PATCH 6/9] make user agent header configurable through env variable --- app/config/appConfig.js | 7 ++++++- app/helpers/fetchWrapper.js | 4 ++-- development.env | 3 ++- 3 files changed, 10 insertions(+), 4 deletions(-) diff --git a/app/config/appConfig.js b/app/config/appConfig.js index 1ed59a3..411e6ff 100644 --- a/app/config/appConfig.js +++ b/app/config/appConfig.js @@ -41,6 +41,10 @@ const PROSTOR_LOGIN = { PASSWORD: process.env.PROSTOR_LOGIN_PASS }; +const USER_AGENT = + process.env.USER_AGENT || + "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3325.181 Safari/537.36"; + module.exports = { APP_PORT, APP_URL, @@ -54,5 +58,6 @@ module.exports = { API_MAP_KEY, STAGING, CHECK_UP_DAYS, - PROSTOR_LOGIN + PROSTOR_LOGIN, + USER_AGENT }; diff --git a/app/helpers/fetchWrapper.js b/app/helpers/fetchWrapper.js index 5a660e2..1d9b198 100644 --- a/app/helpers/fetchWrapper.js +++ b/app/helpers/fetchWrapper.js @@ -1,12 +1,12 @@ const nodeFetch = require("node-fetch"); +const { USER_AGENT } = require("../config/appConfig"); const fetch = async (url, options = {}) => { const newOptions = Object.assign({}, options); if (!newOptions["headers"]) { newOptions["headers"] = {}; } - newOptions["headers"]["User-Agent"] = - "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.106 Safari/537.36"; + newOptions["headers"]["User-Agent"] = USER_AGENT; return nodeFetch(url, newOptions); }; diff --git a/development.env b/development.env index 683fe65..bc1cc92 100644 --- a/development.env +++ b/development.env @@ -10,6 +10,7 @@ APP_BASE_URL=base url for the app ENVIRONMENT=Variable to denote development, staging and production +USER_AGENT=User agent header to send in fetch requests MAX_REAL_ESTATES_IN_EMAIL=Max number of real estates that will be shown in email, others will be truncated and URL with full list will be shwon MAX_REAL_ESTATES_IN_FIRST_EMAIL=Max number of real estates that will be shown in first (welcome) email @@ -69,4 +70,4 @@ AKTIDO_FORCE_CRAWL=Non-zero value will force crawler to crawl all pages without SALJIC_MAX_RESULTS_PER_PAGE=For Saljic crawler, this represents how many ads are crawled at once SALJIC_CRAWLER_AD_TYPE=enum name of what type of ads should be crawled, check common/enums.js file for valid values SALJIC_CRAWLER_AD_CATEGORIES=comma separated list of enum names of categories to be included, check common/enums.js file for valid values -SALJIC_FORCE_CRAWL=Non-zero value will force crawler to crawl all pages without stopping when known real estate is found \ No newline at end of file +SALJIC_FORCE_CRAWL=Non-zero value will force crawler to crawl all pages without stopping when known real estate is found From d436d4a37bd73a4dad5f25e47b685d1183aaf120 Mon Sep 17 00:00:00 2001 From: Naida Vatric Date: Sat, 22 Feb 2020 22:15:27 +0100 Subject: [PATCH 7/9] Added Scraper API option. --- app/config/appConfig.js | 7 ++++++- app/helpers/fetchWrapper.js | 12 ++++++++++-- development.env | 4 ++++ 3 files changed, 20 insertions(+), 3 deletions(-) diff --git a/app/config/appConfig.js b/app/config/appConfig.js index 411e6ff..2843248 100644 --- a/app/config/appConfig.js +++ b/app/config/appConfig.js @@ -45,6 +45,9 @@ const USER_AGENT = process.env.USER_AGENT || "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3325.181 Safari/537.36"; +const USE_SCRAPER_API = process.env.USE_SCRAPER_API || 0; //Default not to use? +const SCRAPER_API_KEY = process.env.SCRAPER_API_KEY || ""; + module.exports = { APP_PORT, APP_URL, @@ -59,5 +62,7 @@ module.exports = { STAGING, CHECK_UP_DAYS, PROSTOR_LOGIN, - USER_AGENT + USER_AGENT, + USE_SCRAPER_API, + SCRAPER_API_KEY }; diff --git a/app/helpers/fetchWrapper.js b/app/helpers/fetchWrapper.js index 1d9b198..74e33c6 100644 --- a/app/helpers/fetchWrapper.js +++ b/app/helpers/fetchWrapper.js @@ -1,5 +1,9 @@ const nodeFetch = require("node-fetch"); -const { USER_AGENT } = require("../config/appConfig"); +const { + USER_AGENT, + USE_SCRAPER_API, + SCRAPER_API_KEY +} = require("../config/appConfig"); const fetch = async (url, options = {}) => { const newOptions = Object.assign({}, options); @@ -7,7 +11,11 @@ const fetch = async (url, options = {}) => { newOptions["headers"] = {}; } newOptions["headers"]["User-Agent"] = USER_AGENT; - return nodeFetch(url, newOptions); + const newUrl = USE_SCRAPER_API + ? `http://api.scraperapi.com/?api_key=${SCRAPER_API_KEY}&url=${url}` + : url; + + return nodeFetch(newUrl, newOptions); }; module.exports = fetch; diff --git a/development.env b/development.env index bc1cc92..42844a6 100644 --- a/development.env +++ b/development.env @@ -22,6 +22,10 @@ GA_ID=Google Analytics ID #=============== GOOGLE MAPS =============# API_MAP_KEY=(your-key-here) +#=============== SCRAPER API SUPORT =============# +USE_SCRAPER_API= To turn it on (1) or off (0) +SCRAPER_API_KEY= Key for Scraper api + #=============== AWS SDK EMAIL SETTINGS =======# AWS_KEY_ID=(your-key-here) AWS_SECRET_ACCESS_KEY=(your-key-here) From 148b2ea863178267f97a45d8d125001ac1d35a71 Mon Sep 17 00:00:00 2001 From: Naida Vatric Date: Sun, 23 Feb 2020 16:38:40 +0100 Subject: [PATCH 8/9] Changed default. --- app/config/appConfig.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/app/config/appConfig.js b/app/config/appConfig.js index 2843248..0ff991a 100644 --- a/app/config/appConfig.js +++ b/app/config/appConfig.js @@ -45,7 +45,7 @@ const USER_AGENT = process.env.USER_AGENT || "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3325.181 Safari/537.36"; -const USE_SCRAPER_API = process.env.USE_SCRAPER_API || 0; //Default not to use? +const USE_SCRAPER_API = process.env.USE_SCRAPER_API || 1; //Default to use const SCRAPER_API_KEY = process.env.SCRAPER_API_KEY || ""; module.exports = { From ce857ddce9c6006fac81cdcc16a19178b3762a9f Mon Sep 17 00:00:00 2001 From: Naida Vatric Date: Sun, 23 Feb 2020 23:11:21 +0100 Subject: [PATCH 9/9] Renamed var. --- app/helpers/fetchWrapper.js | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/app/helpers/fetchWrapper.js b/app/helpers/fetchWrapper.js index 74e33c6..3af2817 100644 --- a/app/helpers/fetchWrapper.js +++ b/app/helpers/fetchWrapper.js @@ -11,11 +11,11 @@ const fetch = async (url, options = {}) => { newOptions["headers"] = {}; } newOptions["headers"]["User-Agent"] = USER_AGENT; - const newUrl = USE_SCRAPER_API + const urlAdaptedForScraping = USE_SCRAPER_API ? `http://api.scraperapi.com/?api_key=${SCRAPER_API_KEY}&url=${url}` : url; - return nodeFetch(newUrl, newOptions); + return nodeFetch(urlAdaptedForScraping, newOptions); }; module.exports = fetch;