Compare commits

..

2 Commits

Author SHA1 Message Date
Naida Vatric
f56cd5b549 More elegant scrape of lat and long. 2020-02-17 21:55:24 +01:00
Naida Vatric
addd8c1344 Saljic crawler changed substring call. 2020-02-14 23:42:19 +01:00
17 changed files with 82 additions and 114 deletions

View File

@@ -9,14 +9,14 @@ const APP_URL =
? process.env.APP_URL || "http://market-alarm" ? process.env.APP_URL || "http://market-alarm"
: process.env.APP_URL || `${APP_BASE_URL}:${APP_PORT}`; : process.env.APP_URL || `${APP_BASE_URL}:${APP_PORT}`;
const USE_KIVI_ENVIRONMENT_TAG = process.env.KIVI_ENVIRONMENT !== "production"; const STAGING = process.env.ENVIRONMENT !== "production";
const DEFAULT_TIMEZONE = "Europe/Sarajevo"; const DEFAULT_TIMEZONE = "Europe/Sarajevo";
const CRAWLER_INTERVAL = parseInt(process.env.CRAWLER_INTERVAL) || 60; const CRAWLER_INTERVAL = parseInt(process.env.CRAWLER_INTERVAL) || 60;
const STOP_CRAWLER = !!parseInt(process.env.STOP_CRAWLER); const STOP_CRAWLER = !!parseInt(process.env.STOP_CRAWLER);
const NO_CHECK_UP_DAYS = parseInt(process.env.NO_CHECK_UP_DAYS) || 10; const CHECK_UP_DAYS = parseInt(process.env.CHECK_UP_DAYS) || 10;
const AWS_EMAIL_CONFIG = { const AWS_EMAIL_CONFIG = {
REGION: process.env.AWS_REGION || "", REGION: process.env.AWS_REGION || "",
@@ -34,17 +34,13 @@ const MAX_REAL_ESTATES_IN_FIRST_EMAIL =
const PRINT_CRAWLER_DEBUG = process.env.PRINT_CRAWLER_DEBUG_INFO || 0; const PRINT_CRAWLER_DEBUG = process.env.PRINT_CRAWLER_DEBUG_INFO || 0;
const GOOGLE_MAP_KEY = process.env.GOOGLE_MAP_KEY || ""; const API_MAP_KEY = process.env.API_MAP_KEY || "";
const PROSTOR_LOGIN = { const PROSTOR_LOGIN = {
EMAIL: process.env.PROSTOR_LOGIN_EMAIL, EMAIL: process.env.PROSTOR_LOGIN_EMAIL,
PASSWORD: process.env.PROSTOR_LOGIN_PASS PASSWORD: process.env.PROSTOR_LOGIN_PASS
}; };
const USER_AGENT =
process.env.USER_AGENT ||
"Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3325.181 Safari/537.36";
module.exports = { module.exports = {
APP_PORT, APP_PORT,
APP_URL, APP_URL,
@@ -55,9 +51,8 @@ module.exports = {
MAX_REAL_ESTATES_IN_EMAIL, MAX_REAL_ESTATES_IN_EMAIL,
MAX_REAL_ESTATES_IN_FIRST_EMAIL, MAX_REAL_ESTATES_IN_FIRST_EMAIL,
PRINT_CRAWLER_DEBUG, PRINT_CRAWLER_DEBUG,
GOOGLE_MAP_KEY, API_MAP_KEY,
USE_KIVI_ENVIRONMENT_TAG, STAGING,
NO_CHECK_UP_DAYS, CHECK_UP_DAYS,
PROSTOR_LOGIN, PROSTOR_LOGIN
USER_AGENT
}; };

View File

@@ -17,15 +17,15 @@ const getLocation = async (req, res) => {
return; return;
} }
const selectedArea = searchRequest.areaToSearch; const selectedArea = searchRequest.areaToSearch;
const southWest = selectedArea.coordinates[0][3]; const sw = selectedArea.coordinates[0][3];
const northEast = selectedArea.coordinates[0][1]; const ne = selectedArea.coordinates[0][1];
if (sw[0] && ne[0]) { if (sw[0] && ne[0]) {
selectedLatLngBounds = { selectedLatLngBounds = {
swLat: southWest[1], swLat: sw[1],
swLng: southWest[0], swLng: sw[0],
neLat: northEast[1], neLat: ne[1],
neLng: northEast[0] neLng: ne[0]
}; };
boundsSelected = true; boundsSelected = true;
} }

View File

@@ -1,6 +1,6 @@
"use strict"; "use strict";
const fetch = require("../../helpers/fetchWrapper"); const fetch = require("node-fetch");
const cheerio = require("cheerio"); const cheerio = require("cheerio");
const Promise = require("bluebird"); const Promise = require("bluebird");
const moment = require("moment-timezone"); const moment = require("moment-timezone");

View File

@@ -1,6 +1,6 @@
"use strict"; "use strict";
const fetch = require("../../helpers/fetchWrapper"); const fetch = require("node-fetch");
const cheerio = require("cheerio"); const cheerio = require("cheerio");
const Promise = require("bluebird"); const Promise = require("bluebird");
const moment = require("moment-timezone"); const moment = require("moment-timezone");

View File

@@ -1,6 +1,6 @@
"use strict"; "use strict";
const fetch = require("../../helpers/fetchWrapper"); const fetch = require("node-fetch");
const cheerio = require("cheerio"); const cheerio = require("cheerio");
const moment = require("moment-timezone"); const moment = require("moment-timezone");
const FormData = require("form-data"); const FormData = require("form-data");
@@ -191,7 +191,13 @@ class ProstorCrawler {
const { lat, lng, property_name, price, size, link, status } = realEstate; const { lat, lng, property_name, price, size, link, status } = realEstate;
//Status information is given already in realestate list //Status information is given already in realestate list
const adStatus = ProstorCrawler.getStatusId(status); //For VIP Ads status ='' canot be used, but no VIP ads are crawled
//We will make "fake" vip ad for RE that have size=55
//It is weird because yesterday it said 'VIP ponuda' ???
const adStatus =
size === "55"
? ProstorCrawler.getStatusId("VIP ponuda")
: ProstorCrawler.getStatusId(status);
const url = `https://prostor.ba${link}`; const url = `https://prostor.ba${link}`;

View File

@@ -1,6 +1,6 @@
"use strict"; "use strict";
const fetch = require("../../helpers/fetchWrapper"); const fetch = require("node-fetch");
const cheerio = require("cheerio"); const cheerio = require("cheerio");
const Promise = require("bluebird"); const Promise = require("bluebird");
const moment = require("moment-timezone"); const moment = require("moment-timezone");
@@ -399,9 +399,7 @@ class RentalCrawler {
); );
if (!publishedDateMoment.isValid()) { if (!publishedDateMoment.isValid()) {
throw { throw {
message: `Invalid published date : ${ message: `Invalid published date : ${extractedData["re_realEstates_inserted"]}`
extractedData["re_realEstates_inserted"]
}`
}; };
} }
@@ -412,9 +410,7 @@ class RentalCrawler {
); );
if (!renewedDateMoment.isValid()) { if (!renewedDateMoment.isValid()) {
throw { throw {
message: `Invalid renewed date : ${ message: `Invalid renewed date : ${extractedData["re_realEstates_edited"]}`
extractedData["re_realEstates_edited"]
}`
}; };
} }

View File

@@ -1,6 +1,6 @@
"use strict"; "use strict";
const fetch = require("../../helpers/fetchWrapper"); const fetch = require("node-fetch");
const cheerio = require("cheerio"); const cheerio = require("cheerio");
const moment = require("moment-timezone"); const moment = require("moment-timezone");
@@ -218,7 +218,7 @@ class SaljicCrawler {
} }
async scrapeAd(url, adType) { async scrapeAd(url, adType) {
// console.log("[SALJIC] Scraping : ", url); console.log("[SALJIC] Scraping : ", url);
try { try {
const adPageSource = await fetch(url); const adPageSource = await fetch(url);
const body = await adPageSource.text(); const body = await adPageSource.text();
@@ -227,7 +227,9 @@ class SaljicCrawler {
// No information for status ex. PRODAN // No information for status ex. PRODAN
const status = AD_STATUS.STATUS_NORMAL; const status = AD_STATUS.STATUS_NORMAL;
//Extracting agency ID from url //Extracting agency ID from url
const agencyObjectId = parseInt(url.substring(46, url.length)); const agencyObjectId = url
? parseInt(url.substring(46, url.length))
: null;
//Extracting main properties //Extracting main properties
const propertySelectors = { const propertySelectors = {
@@ -272,14 +274,10 @@ class SaljicCrawler {
.trim(); .trim();
const latAndLongSrc = $(propertySelectors.latAndLong).attr("src"); const latAndLongSrc = $(propertySelectors.latAndLong).attr("src");
const latText = latAndLongSrc.substring( const tmpLatLong = latAndLongSrc.split("marker=")[1];
latAndLongSrc.indexOf("marker=") + 7, const latText = tmpLatLong.split("%2C")[0];
latAndLongSrc.indexOf("%2C", latAndLongSrc.indexOf("marker=")) const longText = tmpLatLong.split("%2C")[1];
);
const longText = latAndLongSrc.substring(
latAndLongSrc.indexOf("%2C", latAndLongSrc.indexOf("marker=")) + 3,
latAndLongSrc.length
);
const locationLat = parseFloat(latText) || null; const locationLat = parseFloat(latText) || null;
const locationLong = parseFloat(longText) || null; const locationLong = parseFloat(longText) || null;
@@ -328,11 +326,10 @@ class SaljicCrawler {
let numberOfViewsKivi = null; let numberOfViewsKivi = null;
let streetNumber = 0; let streetNumber = 0;
let adStatus = status; let adStatus = status;
let shortDescription = descriptions.substring( let shortDescription = descriptions
0, ? descriptions.substring(0, descriptions.indexOf("."))
descriptions.indexOf(".") : "";
); let longDescription = descriptions || "";
let longDescription = descriptions;
//Extracting data - Glavne karakteristike //Extracting data - Glavne karakteristike
let mainFieldIndex = 1; let mainFieldIndex = 1;
do { do {
@@ -343,10 +340,14 @@ class SaljicCrawler {
.replace(/[\n\r\t]/gm, "") .replace(/[\n\r\t]/gm, "")
.trim(); .trim();
const mainFieldTitle = mainField.substring(0, mainField.indexOf(" ")); const mainFieldTitle = mainField
? mainField.substring(0, mainField.indexOf(" "))
: "";
const mainFieldValue = mainField const mainFieldValue = mainField
.substring(mainField.indexOf(" "), mainField.length) ? mainField
.trim(); .substring(mainField.indexOf(" "), mainField.length)
.trim()
: "";
switch (mainFieldTitle) { switch (mainFieldTitle) {
case "Površina": case "Površina":

View File

@@ -332,14 +332,10 @@ const findRealEstatesForSearchRequest = async (searchRequest, maxResults) => {
}; };
} }
//When includeIncompleteAds are not defined - null it will consider it true
const order = [["updatedAt", "desc"]]; const order = [["updatedAt", "desc"]];
return db.RealEstate.findAll({ return db.RealEstate.findAll({
where: where: includeIncompleteAds ? queryIncludeIncomplete : query,
includeIncompleteAds || includeIncompleteAds == null
? queryIncludeIncomplete
: query,
limit: maxResults, limit: maxResults,
order order
}); });

View File

@@ -157,7 +157,7 @@ const findSearchRequestsForRealEstate = async realEstate => {
} else { } else {
// If real estate dont have defined number of rooms ex. null // If real estate dont have defined number of rooms ex. null
//It returns requests that didn't choose number of rooms - also null //It returns requests that didn't choose number of rooms - also null
//Or ones that picked some values but also picked to includeIncomplete ads (or default) //Or ones that picked some values but also picked to includeIncomplete ads
numberOfRoomsQuery = { numberOfRoomsQuery = {
[Op.or]: [ [Op.or]: [
{ {
@@ -176,10 +176,7 @@ const findSearchRequestsForRealEstate = async realEstate => {
}, },
{ {
includeIncompleteAds: { includeIncompleteAds: {
[Op.or]: { [Op.eq]: true
[Op.eq]: true,
[Op.is]: null
}
} }
} }
] ]
@@ -229,10 +226,7 @@ const findSearchRequestsForRealEstate = async realEstate => {
}, },
{ {
includeIncompleteAds: { includeIncompleteAds: {
[Op.or]: { [Op.eq]: true
[Op.eq]: true,
[Op.is]: null
}
} }
} }
] ]
@@ -281,10 +275,7 @@ const findSearchRequestsForRealEstate = async realEstate => {
}, },
{ {
includeIncompleteAds: { includeIncompleteAds: {
[Op.or]: { [Op.eq]: true
[Op.eq]: true,
[Op.is]: null
}
} }
} }
] ]
@@ -322,10 +313,7 @@ const findSearchRequestsForRealEstate = async realEstate => {
}, },
{ {
includeIncompleteAds: { includeIncompleteAds: {
[Op.or]: { [Op.eq]: true
[Op.eq]: true,
[Op.is]: null
}
} }
} }
] ]
@@ -359,10 +347,7 @@ const findSearchRequestsForRealEstate = async realEstate => {
}, },
{ {
includeIncompleteAds: { includeIncompleteAds: {
[Op.or]: { [Op.eq]: true
[Op.eq]: true,
[Op.is]: null
}
} }
} }
] ]
@@ -396,10 +381,7 @@ const findSearchRequestsForRealEstate = async realEstate => {
}, },
{ {
includeIncompleteAds: { includeIncompleteAds: {
[Op.or]: { [Op.eq]: true
[Op.eq]: true,
[Op.is]: null
}
} }
} }
] ]
@@ -441,13 +423,10 @@ const findSearchRequestsForRealEstate = async realEstate => {
[Op.eq]: "ANY" [Op.eq]: "ANY"
}; };
} }
//Tag to check if incomplete ads are accepted in query which is default //Tag to check if incomplete ads are accepted in query
if (checkForIncompleteWanted) { if (checkForIncompleteWanted) {
query.includeIncompleteAds = { query.includeIncompleteAds = {
[Op.or]: { [Op.eq]: true
[Op.eq]: true,
[Op.is]: null
}
}; };
} }

View File

@@ -2,7 +2,7 @@
const db = require("../../models/index"); const db = require("../../models/index");
const sequelize = require("sequelize"); const sequelize = require("sequelize");
const Op = sequelize.Op; const Op = sequelize.Op;
const { NO_CHECK_UP_DAYS } = require("../../config/appConfig"); const { CHECK_UP_DAYS } = require("../../config/appConfig");
const findRealEstatesForSearchRequest = async searchRequestId => { const findRealEstatesForSearchRequest = async searchRequestId => {
const query = { const query = {
@@ -45,9 +45,9 @@ const findNotNotifiedMatches = async () => {
}; };
const findAllRequestsForCheckUp = async () => { const findAllRequestsForCheckUp = async () => {
//First we find IDs of search request that don't need to be emailed for check up - to EXCLUDE //First we find IDs of search request that don't need to be emailed for check up - to EXCLUDE
//The ones that received notification for real estate NO_CHECK_UP_DAYS days from now //The ones that received notification for real estate CHECK_UP_DAYS days from now
const date = new Date(); const date = new Date();
const checkUpDate = date.getDate() - NO_CHECK_UP_DAYS; const checkUpDate = date.getDate() - CHECK_UP_DAYS;
date.setDate(checkUpDate); date.setDate(checkUpDate);
const dateQuery = { const dateQuery = {
createdAt: { createdAt: {

View File

@@ -3,12 +3,12 @@
const { const {
MAX_REAL_ESTATES_IN_EMAIL, MAX_REAL_ESTATES_IN_EMAIL,
APP_URL, APP_URL,
USE_KIVI_ENVIRONMENT_TAG STAGING
} = require("../config/appConfig"); } = require("../config/appConfig");
const { AD_CATEGORY, AD_TYPE, EMAIL_FREQUENCY } = require("../common/enums"); const { AD_CATEGORY, AD_TYPE, EMAIL_FREQUENCY } = require("../common/enums");
//Tag to recognize staging from development if needed //Tag to recognize staging from development
const stagingTag = USE_KIVI_ENVIRONMENT_TAG ? "[STAGING] " : ""; const stagingTag = STAGING ? "[STAGING] " : "";
const generateEmailFooter = (searchRequestId, emailFrequencyTitle) => { const generateEmailFooter = (searchRequestId, emailFrequencyTitle) => {
return ` <div>Trenutno ste prijavljeni da obavještenja o novim nekretninama primate <strong>${emailFrequencyTitle.toLowerCase()} </strong>.</div> return ` <div>Trenutno ste prijavljeni da obavještenja o novim nekretninama primate <strong>${emailFrequencyTitle.toLowerCase()} </strong>.</div>

View File

@@ -1,13 +0,0 @@
const nodeFetch = require("node-fetch");
const { USER_AGENT } = require("../config/appConfig");
const fetch = async (url, options = {}) => {
const newOptions = Object.assign({}, options);
if (!newOptions["headers"]) {
newOptions["headers"] = {};
}
newOptions["headers"]["User-Agent"] = USER_AGENT;
return nodeFetch(url, newOptions);
};
module.exports = fetch;

View File

@@ -15,7 +15,15 @@ module.exports = (sequelize, DataTypes) => {
allowNull: false, allowNull: false,
defaultValue: { defaultValue: {
type: "Polygon", type: "Polygon",
coordinates: [[[0, 0], [0, 0], [0, 0], [0, 0], [0, 0]]], coordinates: [
[
[0, 0],
[0, 0],
[0, 0],
[0, 0],
[0, 0]
]
],
crs: { type: "name", properties: { name: "EPSG:4326" } } crs: { type: "name", properties: { name: "EPSG:4326" } }
} }
}, },

View File

@@ -1,7 +1,7 @@
"use strict"; "use strict";
const { USE_KIVI_ENVIRONMENT_TAG } = require("../config/appConfig"); const { STAGING } = require("../config/appConfig");
const stagingTag = USE_KIVI_ENVIRONMENT_TAG ? "[STAGING] " : ""; const stagingTag = STAGING ? "[STAGING] " : "";
const { const {
matchRealEstates, matchRealEstates,
@@ -131,7 +131,7 @@ const notifyRequestsWithDailyOption = async () => {
}; };
const checkUpNotify = async () => { const checkUpNotify = async () => {
/* const searchRequestsForCheckUp = await findAllRequestsForCheckUp(); /* const searchRequestsForCheckUp = await findAllRequestsForCheckUp();
const asyncSendEmailActions = []; const asyncSendEmailActions = [];

View File

@@ -61,8 +61,9 @@
<p class="distinguished"> <p class="distinguished">
<label class="checkbox-label"> <label class="checkbox-label">
<input type="checkbox" class="filled-in" name="includeIncompleteAds" <input type="checkbox" class="filled-in" name="includeIncompleteAds"
<% if (includeIncompleteAds) { %>
checked checked
> <% } %>>
<span>Uključi i oglase bez potpunih informacija</span> <span>Uključi i oglase bez potpunih informacija</span>
</label> </label>
</p> </p>

View File

@@ -217,7 +217,7 @@
}); });
</script> </script>
<script <script
src="https://maps.googleapis.com/maps/api/js?key=<%= process.env.GOOGLE_MAP_KEY %>&language=bs&libraries=places&callback=initMap" src="https://maps.googleapis.com/maps/api/js?key=<%= process.env.API_MAP_KEY %>&language=bs&libraries=places&callback=initMap"
async async
defer defer
></script> ></script>

View File

@@ -8,19 +8,18 @@ SEQUELIZE_LOGGING=0- no sequelize logging, 1- log to the console
PORT=Port for the app, defaults to 5000 PORT=Port for the app, defaults to 5000
APP_BASE_URL=base url for the app APP_BASE_URL=base url for the app
KIVI_ENVIRONMENT=Variable to denote development, staging and production ENVIRONMENT=Variable to denote development, staging and production
USER_AGENT=User agent header to send in fetch requests
MAX_REAL_ESTATES_IN_EMAIL=Max number of real estates that will be shown in email, others will be truncated and URL with full list will be shwon MAX_REAL_ESTATES_IN_EMAIL=Max number of real estates that will be shown in email, others will be truncated and URL with full list will be shwon
MAX_REAL_ESTATES_IN_FIRST_EMAIL=Max number of real estates that will be shown in first (welcome) email MAX_REAL_ESTATES_IN_FIRST_EMAIL=Max number of real estates that will be shown in first (welcome) email
NO_CHECK_UP_DAYS=Check up email is sent after this number of days without notification CHECK_UP_DAYS=Check up email is sent after this number of days without notification
#=============== GOOGLE ANALYTICS =============# #=============== GOOGLE ANALYTICS =============#
GA_ID=Google Analytics ID GA_ID=Google Analytics ID
#=============== GOOGLE MAPS =============# #=============== GOOGLE MAPS =============#
GOOGLE_MAP_KEY=(your-key-here) API_MAP_KEY=(your-key-here)
#=============== AWS SDK EMAIL SETTINGS =======# #=============== AWS SDK EMAIL SETTINGS =======#
AWS_KEY_ID=(your-key-here) AWS_KEY_ID=(your-key-here)