diff --git a/app/config/appConfig.js b/app/config/appConfig.js index 1ed59a3..411e6ff 100644 --- a/app/config/appConfig.js +++ b/app/config/appConfig.js @@ -41,6 +41,10 @@ const PROSTOR_LOGIN = { PASSWORD: process.env.PROSTOR_LOGIN_PASS }; +const USER_AGENT = + process.env.USER_AGENT || + "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3325.181 Safari/537.36"; + module.exports = { APP_PORT, APP_URL, @@ -54,5 +58,6 @@ module.exports = { API_MAP_KEY, STAGING, CHECK_UP_DAYS, - PROSTOR_LOGIN + PROSTOR_LOGIN, + USER_AGENT }; diff --git a/app/crawler/specificCrawlers/aktido.js b/app/crawler/specificCrawlers/aktido.js index 74bcba7..9755eb7 100644 --- a/app/crawler/specificCrawlers/aktido.js +++ b/app/crawler/specificCrawlers/aktido.js @@ -1,6 +1,6 @@ "use strict"; -const fetch = require("node-fetch"); +const fetch = require("../../helpers/fetchWrapper"); const cheerio = require("cheerio"); const Promise = require("bluebird"); const moment = require("moment-timezone"); diff --git a/app/crawler/specificCrawlers/olx.js b/app/crawler/specificCrawlers/olx.js index ede9734..1e3d63a 100644 --- a/app/crawler/specificCrawlers/olx.js +++ b/app/crawler/specificCrawlers/olx.js @@ -1,6 +1,6 @@ "use strict"; -const fetch = require("node-fetch"); +const fetch = require("../../helpers/fetchWrapper"); const cheerio = require("cheerio"); const Promise = require("bluebird"); const moment = require("moment-timezone"); diff --git a/app/crawler/specificCrawlers/prostor.js b/app/crawler/specificCrawlers/prostor.js index 104d5d0..5690a4b 100644 --- a/app/crawler/specificCrawlers/prostor.js +++ b/app/crawler/specificCrawlers/prostor.js @@ -1,6 +1,6 @@ "use strict"; -const fetch = require("node-fetch"); +const fetch = require("../../helpers/fetchWrapper"); const cheerio = require("cheerio"); const moment = require("moment-timezone"); const FormData = require("form-data"); diff --git a/app/crawler/specificCrawlers/rental.js b/app/crawler/specificCrawlers/rental.js index 39eb1c5..ed37e83 100644 --- a/app/crawler/specificCrawlers/rental.js +++ b/app/crawler/specificCrawlers/rental.js @@ -1,6 +1,6 @@ "use strict"; -const fetch = require("node-fetch"); +const fetch = require("../../helpers/fetchWrapper"); const cheerio = require("cheerio"); const Promise = require("bluebird"); const moment = require("moment-timezone"); @@ -399,7 +399,9 @@ class RentalCrawler { ); if (!publishedDateMoment.isValid()) { throw { - message: `Invalid published date : ${extractedData["re_realEstates_inserted"]}` + message: `Invalid published date : ${ + extractedData["re_realEstates_inserted"] + }` }; } @@ -410,7 +412,9 @@ class RentalCrawler { ); if (!renewedDateMoment.isValid()) { throw { - message: `Invalid renewed date : ${extractedData["re_realEstates_edited"]}` + message: `Invalid renewed date : ${ + extractedData["re_realEstates_edited"] + }` }; } diff --git a/app/crawler/specificCrawlers/saljic.js b/app/crawler/specificCrawlers/saljic.js index 2eef7cc..9914f35 100644 --- a/app/crawler/specificCrawlers/saljic.js +++ b/app/crawler/specificCrawlers/saljic.js @@ -1,6 +1,6 @@ "use strict"; -const fetch = require("node-fetch"); +const fetch = require("../../helpers/fetchWrapper"); const cheerio = require("cheerio"); const moment = require("moment-timezone"); diff --git a/app/helpers/fetchWrapper.js b/app/helpers/fetchWrapper.js new file mode 100644 index 0000000..1d9b198 --- /dev/null +++ b/app/helpers/fetchWrapper.js @@ -0,0 +1,13 @@ +const nodeFetch = require("node-fetch"); +const { USER_AGENT } = require("../config/appConfig"); + +const fetch = async (url, options = {}) => { + const newOptions = Object.assign({}, options); + if (!newOptions["headers"]) { + newOptions["headers"] = {}; + } + newOptions["headers"]["User-Agent"] = USER_AGENT; + return nodeFetch(url, newOptions); +}; + +module.exports = fetch; diff --git a/app/models/searchRequest.js b/app/models/searchRequest.js index 0d4997f..8ef9b90 100644 --- a/app/models/searchRequest.js +++ b/app/models/searchRequest.js @@ -15,15 +15,7 @@ module.exports = (sequelize, DataTypes) => { allowNull: false, defaultValue: { type: "Polygon", - coordinates: [ - [ - [0, 0], - [0, 0], - [0, 0], - [0, 0], - [0, 0] - ] - ], + coordinates: [[[0, 0], [0, 0], [0, 0], [0, 0], [0, 0]]], crs: { type: "name", properties: { name: "EPSG:4326" } } } }, diff --git a/app/services/notificationService.js b/app/services/notificationService.js index a005f84..bb649ea 100644 --- a/app/services/notificationService.js +++ b/app/services/notificationService.js @@ -131,7 +131,7 @@ const notifyRequestsWithDailyOption = async () => { }; const checkUpNotify = async () => { - /* const searchRequestsForCheckUp = await findAllRequestsForCheckUp(); + /* const searchRequestsForCheckUp = await findAllRequestsForCheckUp(); const asyncSendEmailActions = []; @@ -144,7 +144,7 @@ const checkUpNotify = async () => { asyncSendEmailActions.push(sendEmailPromise); sendEmailPromise.catch(err => console.log("[Email Sending Failed]", err)); } - await Promise.all(asyncSendEmailActions); */ + await Promise.all(asyncSendEmailActions); */ }; module.exports = { diff --git a/development.env b/development.env index 683fe65..bc1cc92 100644 --- a/development.env +++ b/development.env @@ -10,6 +10,7 @@ APP_BASE_URL=base url for the app ENVIRONMENT=Variable to denote development, staging and production +USER_AGENT=User agent header to send in fetch requests MAX_REAL_ESTATES_IN_EMAIL=Max number of real estates that will be shown in email, others will be truncated and URL with full list will be shwon MAX_REAL_ESTATES_IN_FIRST_EMAIL=Max number of real estates that will be shown in first (welcome) email @@ -69,4 +70,4 @@ AKTIDO_FORCE_CRAWL=Non-zero value will force crawler to crawl all pages without SALJIC_MAX_RESULTS_PER_PAGE=For Saljic crawler, this represents how many ads are crawled at once SALJIC_CRAWLER_AD_TYPE=enum name of what type of ads should be crawled, check common/enums.js file for valid values SALJIC_CRAWLER_AD_CATEGORIES=comma separated list of enum names of categories to be included, check common/enums.js file for valid values -SALJIC_FORCE_CRAWL=Non-zero value will force crawler to crawl all pages without stopping when known real estate is found \ No newline at end of file +SALJIC_FORCE_CRAWL=Non-zero value will force crawler to crawl all pages without stopping when known real estate is found