Compare commits

...

12 Commits

Author SHA1 Message Date
Naida Vatric
ce857ddce9 Renamed var. 2020-02-23 23:11:21 +01:00
Naida Vatric
148b2ea863 Changed default. 2020-02-23 16:38:40 +01:00
Naida Vatric
d436d4a37b Added Scraper API option. 2020-02-22 22:15:27 +01:00
Bilal Catic
6791a509d0 make user agent header configurable through env variable 2020-02-20 21:07:16 +01:00
Bilal Catic
edc6e2bbf7 Merge branch 'create-fetch-wrapper-with-user-agent' into 'master'
Create fetch wrapper with user agent

See merge request saburly/marketalarm/web!98
2020-02-20 19:58:32 +00:00
Bilal Catic
4f230020d7 use fetch wrapper instead of node-fetch 2020-02-20 19:49:29 +01:00
Bilal Catic
f62a7200c7 create fetch wrapper with mandatory user agent header 2020-02-20 19:47:30 +01:00
Bilal Catic
cff7cc2e9c apply prettier 2020-02-20 19:46:39 +01:00
Naida Vatric
df2a962d0f Merge branch 'prostor-vip-ads-fix' into 'master'
Prostor VIP ads fixed.

See merge request saburly/marketalarm/web!94
2020-02-17 14:44:58 +00:00
Naida Vatric
be4508ebea Merge branch 'include-incomplete-ads-inverse' into 'master'
Default true for include incomplete ads.

See merge request saburly/marketalarm/web!96
2020-02-17 14:44:35 +00:00
Naida Vatric
81fa3f046d Default true for include incomplete ads. 2020-02-15 00:52:06 +01:00
Naida Vatric
5bdc8e149a Prostor VIP ads fixed. 2020-02-14 22:41:51 +01:00
13 changed files with 89 additions and 39 deletions

View File

@@ -41,6 +41,13 @@ const PROSTOR_LOGIN = {
PASSWORD: process.env.PROSTOR_LOGIN_PASS PASSWORD: process.env.PROSTOR_LOGIN_PASS
}; };
const USER_AGENT =
process.env.USER_AGENT ||
"Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3325.181 Safari/537.36";
const USE_SCRAPER_API = process.env.USE_SCRAPER_API || 1; //Default to use
const SCRAPER_API_KEY = process.env.SCRAPER_API_KEY || "";
module.exports = { module.exports = {
APP_PORT, APP_PORT,
APP_URL, APP_URL,
@@ -54,5 +61,8 @@ module.exports = {
API_MAP_KEY, API_MAP_KEY,
STAGING, STAGING,
CHECK_UP_DAYS, CHECK_UP_DAYS,
PROSTOR_LOGIN PROSTOR_LOGIN,
USER_AGENT,
USE_SCRAPER_API,
SCRAPER_API_KEY
}; };

View File

@@ -1,6 +1,6 @@
"use strict"; "use strict";
const fetch = require("node-fetch"); const fetch = require("../../helpers/fetchWrapper");
const cheerio = require("cheerio"); const cheerio = require("cheerio");
const Promise = require("bluebird"); const Promise = require("bluebird");
const moment = require("moment-timezone"); const moment = require("moment-timezone");

View File

@@ -1,6 +1,6 @@
"use strict"; "use strict";
const fetch = require("node-fetch"); const fetch = require("../../helpers/fetchWrapper");
const cheerio = require("cheerio"); const cheerio = require("cheerio");
const Promise = require("bluebird"); const Promise = require("bluebird");
const moment = require("moment-timezone"); const moment = require("moment-timezone");

View File

@@ -1,6 +1,6 @@
"use strict"; "use strict";
const fetch = require("node-fetch"); const fetch = require("../../helpers/fetchWrapper");
const cheerio = require("cheerio"); const cheerio = require("cheerio");
const moment = require("moment-timezone"); const moment = require("moment-timezone");
const FormData = require("form-data"); const FormData = require("form-data");
@@ -191,13 +191,7 @@ class ProstorCrawler {
const { lat, lng, property_name, price, size, link, status } = realEstate; const { lat, lng, property_name, price, size, link, status } = realEstate;
//Status information is given already in realestate list //Status information is given already in realestate list
//For VIP Ads status ='' canot be used, but no VIP ads are crawled const adStatus = ProstorCrawler.getStatusId(status);
//We will make "fake" vip ad for RE that have size=55
//It is weird because yesterday it said 'VIP ponuda' ???
const adStatus =
size === "55"
? ProstorCrawler.getStatusId("VIP ponuda")
: ProstorCrawler.getStatusId(status);
const url = `https://prostor.ba${link}`; const url = `https://prostor.ba${link}`;

View File

@@ -1,6 +1,6 @@
"use strict"; "use strict";
const fetch = require("node-fetch"); const fetch = require("../../helpers/fetchWrapper");
const cheerio = require("cheerio"); const cheerio = require("cheerio");
const Promise = require("bluebird"); const Promise = require("bluebird");
const moment = require("moment-timezone"); const moment = require("moment-timezone");
@@ -399,7 +399,9 @@ class RentalCrawler {
); );
if (!publishedDateMoment.isValid()) { if (!publishedDateMoment.isValid()) {
throw { throw {
message: `Invalid published date : ${extractedData["re_realEstates_inserted"]}` message: `Invalid published date : ${
extractedData["re_realEstates_inserted"]
}`
}; };
} }
@@ -410,7 +412,9 @@ class RentalCrawler {
); );
if (!renewedDateMoment.isValid()) { if (!renewedDateMoment.isValid()) {
throw { throw {
message: `Invalid renewed date : ${extractedData["re_realEstates_edited"]}` message: `Invalid renewed date : ${
extractedData["re_realEstates_edited"]
}`
}; };
} }

View File

@@ -1,6 +1,6 @@
"use strict"; "use strict";
const fetch = require("node-fetch"); const fetch = require("../../helpers/fetchWrapper");
const cheerio = require("cheerio"); const cheerio = require("cheerio");
const moment = require("moment-timezone"); const moment = require("moment-timezone");

View File

@@ -332,10 +332,14 @@ const findRealEstatesForSearchRequest = async (searchRequest, maxResults) => {
}; };
} }
//When includeIncompleteAds are not defined - null it will consider it true
const order = [["updatedAt", "desc"]]; const order = [["updatedAt", "desc"]];
return db.RealEstate.findAll({ return db.RealEstate.findAll({
where: includeIncompleteAds ? queryIncludeIncomplete : query, where:
includeIncompleteAds || includeIncompleteAds == null
? queryIncludeIncomplete
: query,
limit: maxResults, limit: maxResults,
order order
}); });

View File

@@ -157,7 +157,7 @@ const findSearchRequestsForRealEstate = async realEstate => {
} else { } else {
// If real estate dont have defined number of rooms ex. null // If real estate dont have defined number of rooms ex. null
//It returns requests that didn't choose number of rooms - also null //It returns requests that didn't choose number of rooms - also null
//Or ones that picked some values but also picked to includeIncomplete ads //Or ones that picked some values but also picked to includeIncomplete ads (or default)
numberOfRoomsQuery = { numberOfRoomsQuery = {
[Op.or]: [ [Op.or]: [
{ {
@@ -176,7 +176,10 @@ const findSearchRequestsForRealEstate = async realEstate => {
}, },
{ {
includeIncompleteAds: { includeIncompleteAds: {
[Op.eq]: true [Op.or]: {
[Op.eq]: true,
[Op.is]: null
}
} }
} }
] ]
@@ -226,7 +229,10 @@ const findSearchRequestsForRealEstate = async realEstate => {
}, },
{ {
includeIncompleteAds: { includeIncompleteAds: {
[Op.eq]: true [Op.or]: {
[Op.eq]: true,
[Op.is]: null
}
} }
} }
] ]
@@ -275,7 +281,10 @@ const findSearchRequestsForRealEstate = async realEstate => {
}, },
{ {
includeIncompleteAds: { includeIncompleteAds: {
[Op.eq]: true [Op.or]: {
[Op.eq]: true,
[Op.is]: null
}
} }
} }
] ]
@@ -313,7 +322,10 @@ const findSearchRequestsForRealEstate = async realEstate => {
}, },
{ {
includeIncompleteAds: { includeIncompleteAds: {
[Op.eq]: true [Op.or]: {
[Op.eq]: true,
[Op.is]: null
}
} }
} }
] ]
@@ -347,7 +359,10 @@ const findSearchRequestsForRealEstate = async realEstate => {
}, },
{ {
includeIncompleteAds: { includeIncompleteAds: {
[Op.eq]: true [Op.or]: {
[Op.eq]: true,
[Op.is]: null
}
} }
} }
] ]
@@ -381,7 +396,10 @@ const findSearchRequestsForRealEstate = async realEstate => {
}, },
{ {
includeIncompleteAds: { includeIncompleteAds: {
[Op.eq]: true [Op.or]: {
[Op.eq]: true,
[Op.is]: null
}
} }
} }
] ]
@@ -423,10 +441,13 @@ const findSearchRequestsForRealEstate = async realEstate => {
[Op.eq]: "ANY" [Op.eq]: "ANY"
}; };
} }
//Tag to check if incomplete ads are accepted in query //Tag to check if incomplete ads are accepted in query which is default
if (checkForIncompleteWanted) { if (checkForIncompleteWanted) {
query.includeIncompleteAds = { query.includeIncompleteAds = {
[Op.eq]: true [Op.or]: {
[Op.eq]: true,
[Op.is]: null
}
}; };
} }

View File

@@ -0,0 +1,21 @@
const nodeFetch = require("node-fetch");
const {
USER_AGENT,
USE_SCRAPER_API,
SCRAPER_API_KEY
} = require("../config/appConfig");
const fetch = async (url, options = {}) => {
const newOptions = Object.assign({}, options);
if (!newOptions["headers"]) {
newOptions["headers"] = {};
}
newOptions["headers"]["User-Agent"] = USER_AGENT;
const urlAdaptedForScraping = USE_SCRAPER_API
? `http://api.scraperapi.com/?api_key=${SCRAPER_API_KEY}&url=${url}`
: url;
return nodeFetch(urlAdaptedForScraping, newOptions);
};
module.exports = fetch;

View File

@@ -15,15 +15,7 @@ module.exports = (sequelize, DataTypes) => {
allowNull: false, allowNull: false,
defaultValue: { defaultValue: {
type: "Polygon", type: "Polygon",
coordinates: [ coordinates: [[[0, 0], [0, 0], [0, 0], [0, 0], [0, 0]]],
[
[0, 0],
[0, 0],
[0, 0],
[0, 0],
[0, 0]
]
],
crs: { type: "name", properties: { name: "EPSG:4326" } } crs: { type: "name", properties: { name: "EPSG:4326" } }
} }
}, },

View File

@@ -131,7 +131,7 @@ const notifyRequestsWithDailyOption = async () => {
}; };
const checkUpNotify = async () => { const checkUpNotify = async () => {
/* const searchRequestsForCheckUp = await findAllRequestsForCheckUp(); /* const searchRequestsForCheckUp = await findAllRequestsForCheckUp();
const asyncSendEmailActions = []; const asyncSendEmailActions = [];

View File

@@ -61,9 +61,8 @@
<p class="distinguished"> <p class="distinguished">
<label class="checkbox-label"> <label class="checkbox-label">
<input type="checkbox" class="filled-in" name="includeIncompleteAds" <input type="checkbox" class="filled-in" name="includeIncompleteAds"
<% if (includeIncompleteAds) { %>
checked checked
<% } %>> >
<span>Uključi i oglase bez potpunih informacija</span> <span>Uključi i oglase bez potpunih informacija</span>
</label> </label>
</p> </p>

View File

@@ -10,6 +10,7 @@ APP_BASE_URL=base url for the app
ENVIRONMENT=Variable to denote development, staging and production ENVIRONMENT=Variable to denote development, staging and production
USER_AGENT=User agent header to send in fetch requests
MAX_REAL_ESTATES_IN_EMAIL=Max number of real estates that will be shown in email, others will be truncated and URL with full list will be shwon MAX_REAL_ESTATES_IN_EMAIL=Max number of real estates that will be shown in email, others will be truncated and URL with full list will be shwon
MAX_REAL_ESTATES_IN_FIRST_EMAIL=Max number of real estates that will be shown in first (welcome) email MAX_REAL_ESTATES_IN_FIRST_EMAIL=Max number of real estates that will be shown in first (welcome) email
@@ -21,6 +22,10 @@ GA_ID=Google Analytics ID
#=============== GOOGLE MAPS =============# #=============== GOOGLE MAPS =============#
API_MAP_KEY=(your-key-here) API_MAP_KEY=(your-key-here)
#=============== SCRAPER API SUPORT =============#
USE_SCRAPER_API= To turn it on (1) or off (0)
SCRAPER_API_KEY= Key for Scraper api
#=============== AWS SDK EMAIL SETTINGS =======# #=============== AWS SDK EMAIL SETTINGS =======#
AWS_KEY_ID=(your-key-here) AWS_KEY_ID=(your-key-here)
AWS_SECRET_ACCESS_KEY=(your-key-here) AWS_SECRET_ACCESS_KEY=(your-key-here)