Compare commits

...

23 Commits

Author SHA1 Message Date
Naida Vatric
d7fcb2a278 Merge branch 'master' after user-agent change into email-density 2020-02-21 14:26:33 +01:00
Naida Vatric
6bad24d735 New query for search req search. 2020-02-21 14:25:10 +01:00
Bilal Catic
6791a509d0 make user agent header configurable through env variable 2020-02-20 21:07:16 +01:00
Bilal Catic
edc6e2bbf7 Merge branch 'create-fetch-wrapper-with-user-agent' into 'master'
Create fetch wrapper with user agent

See merge request saburly/marketalarm/web!98
2020-02-20 19:58:32 +00:00
Bilal Catic
4f230020d7 use fetch wrapper instead of node-fetch 2020-02-20 19:49:29 +01:00
Bilal Catic
f62a7200c7 create fetch wrapper with mandatory user agent header 2020-02-20 19:47:30 +01:00
Bilal Catic
cff7cc2e9c apply prettier 2020-02-20 19:46:39 +01:00
Naida Vatric
7302edceec Changed queries logic again. 2020-02-18 15:04:26 +01:00
Naida Vatric
bd33a6b80e Logs for query check. 2020-02-17 23:24:55 +01:00
Naida Vatric
df2a962d0f Merge branch 'prostor-vip-ads-fix' into 'master'
Prostor VIP ads fixed.

See merge request saburly/marketalarm/web!94
2020-02-17 14:44:58 +00:00
Naida Vatric
be4508ebea Merge branch 'include-incomplete-ads-inverse' into 'master'
Default true for include incomplete ads.

See merge request saburly/marketalarm/web!96
2020-02-17 14:44:35 +00:00
Naida Vatric
81fa3f046d Default true for include incomplete ads. 2020-02-15 00:52:06 +01:00
Naida Vatric
5bdc8e149a Prostor VIP ads fixed. 2020-02-14 22:41:51 +01:00
Senad Uka
fc7fe3c0b3 Notificaton service disabled 2020-02-14 15:07:42 +01:00
Naida Vatric
b3007123a5 Merge branch 'rename-settings-var' into 'master'
Rename settings var

See merge request saburly/marketalarm/web!93
2020-02-10 20:17:08 +00:00
Naida Vatric
f7d4a9cd07 Renamed settings var to describe purpose. 2020-02-10 21:15:28 +01:00
Naida Vatric
ab6812889a Merge branch 'fixing-saljic-bugs' into 'master'
Fixing saljic bugs

See merge request saburly/marketalarm/web!92
2020-02-09 18:11:00 +00:00
Naida Vatric
b82134e280 Fixed saljic bug for heroku. 2020-02-09 19:09:00 +01:00
Naida Vatric
be378883c8 Just another fix try. 2020-02-08 00:47:00 +01:00
Naida Vatric
8a87b9e253 Another fix. 2020-02-08 00:27:26 +01:00
Naida Vatric
43bc23b164 Another fix. Defined more var. 2020-02-07 22:27:01 +01:00
Naida Vatric
fc6351af46 Added columns and logs for types. 2020-02-07 22:12:53 +01:00
Naida Vatric
6267b2cab4 Merge branch 'staging-tag-to-checkup-email' into 'master'
Added staging tag to checkup email. Email footer bug fixed.

See merge request saburly/marketalarm/web!91
2020-02-06 21:43:56 +00:00
13 changed files with 105 additions and 179 deletions

View File

@@ -9,7 +9,7 @@ const APP_URL =
? process.env.APP_URL || "http://market-alarm"
: process.env.APP_URL || `${APP_BASE_URL}:${APP_PORT}`;
const STAGING = process.env.SETTINGS !== "production";
const STAGING = process.env.ENVIRONMENT !== "production";
const DEFAULT_TIMEZONE = "Europe/Sarajevo";
@@ -41,6 +41,10 @@ const PROSTOR_LOGIN = {
PASSWORD: process.env.PROSTOR_LOGIN_PASS
};
const USER_AGENT =
process.env.USER_AGENT ||
"Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3325.181 Safari/537.36";
module.exports = {
APP_PORT,
APP_URL,
@@ -54,6 +58,6 @@ module.exports = {
API_MAP_KEY,
STAGING,
CHECK_UP_DAYS,
PROSTOR_LOGIN
PROSTOR_LOGIN,
USER_AGENT
};

View File

@@ -1,6 +1,6 @@
"use strict";
const fetch = require("node-fetch");
const fetch = require("../../helpers/fetchWrapper");
const cheerio = require("cheerio");
const Promise = require("bluebird");
const moment = require("moment-timezone");

View File

@@ -1,6 +1,6 @@
"use strict";
const fetch = require("node-fetch");
const fetch = require("../../helpers/fetchWrapper");
const cheerio = require("cheerio");
const Promise = require("bluebird");
const moment = require("moment-timezone");

View File

@@ -1,6 +1,6 @@
"use strict";
const fetch = require("node-fetch");
const fetch = require("../../helpers/fetchWrapper");
const cheerio = require("cheerio");
const moment = require("moment-timezone");
const FormData = require("form-data");
@@ -191,13 +191,7 @@ class ProstorCrawler {
const { lat, lng, property_name, price, size, link, status } = realEstate;
//Status information is given already in realestate list
//For VIP Ads status ='' canot be used, but no VIP ads are crawled
//We will make "fake" vip ad for RE that have size=55
//It is weird because yesterday it said 'VIP ponuda' ???
const adStatus =
size === "55"
? ProstorCrawler.getStatusId("VIP ponuda")
: ProstorCrawler.getStatusId(status);
const adStatus = ProstorCrawler.getStatusId(status);
const url = `https://prostor.ba${link}`;

View File

@@ -1,6 +1,6 @@
"use strict";
const fetch = require("node-fetch");
const fetch = require("../../helpers/fetchWrapper");
const cheerio = require("cheerio");
const Promise = require("bluebird");
const moment = require("moment-timezone");
@@ -399,7 +399,9 @@ class RentalCrawler {
);
if (!publishedDateMoment.isValid()) {
throw {
message: `Invalid published date : ${extractedData["re_realEstates_inserted"]}`
message: `Invalid published date : ${
extractedData["re_realEstates_inserted"]
}`
};
}
@@ -410,7 +412,9 @@ class RentalCrawler {
);
if (!renewedDateMoment.isValid()) {
throw {
message: `Invalid renewed date : ${extractedData["re_realEstates_edited"]}`
message: `Invalid renewed date : ${
extractedData["re_realEstates_edited"]
}`
};
}

View File

@@ -1,6 +1,6 @@
"use strict";
const fetch = require("node-fetch");
const fetch = require("../../helpers/fetchWrapper");
const cheerio = require("cheerio");
const moment = require("moment-timezone");
@@ -268,6 +268,7 @@ class SaljicCrawler {
const descriptions = $(propertySelectors.descriptions)
.text()
.replace(/\"/g, "")
.trim();
const latAndLongSrc = $(propertySelectors.latAndLong).attr("src");
@@ -283,8 +284,8 @@ class SaljicCrawler {
const locationLong = parseFloat(longText) || null;
//====== DETAIL INFORMATION FIELDS ==========
let area,
gardenSize,
let area = null,
gardenSize = null,
numberOfRooms = null,
numberOfFloors = null,
floor = null,
@@ -315,6 +316,7 @@ class SaljicCrawler {
includingBills = null,
animalsAllowed = null,
pool = null,
exchange = null,
urbanPlanPermit = null,
buildingPermit = null,
utilityConnection = null,
@@ -323,7 +325,14 @@ class SaljicCrawler {
let renewedDate = null;
let realEstateType;
let numberOfViewsAgency = null;
let numberOfViewsKivi = null;
let streetNumber = 0;
let adStatus = status;
let shortDescription = descriptions.substring(
0,
descriptions.indexOf(".")
);
let longDescription = descriptions;
//Extracting data - Glavne karakteristike
let mainFieldIndex = 1;
do {
@@ -482,29 +491,37 @@ class SaljicCrawler {
renewedDate = new Date();
}
const originAgencyName = AD_AGENCY.SALJIC;
const locality = "";
const municipality = "";
const city = "";
const region = "";
const entity = "";
const country = "";
const data = {
url,
agencyObjectId,
originAgencyName: AD_AGENCY.SALJIC,
originAgencyName,
realEstateType,
adType,
title,
price,
area,
gardenSize,
shortDescription: descriptions.substring(0, descriptions.indexOf(".")),
longDescription: descriptions,
streetNumber: 0,
shortDescription,
longDescription,
streetNumber,
streetName,
locality: "",
municipality: "",
city: "",
region: "",
entity: "",
country: "",
locality,
municipality,
city,
region,
entity,
country,
locationLat,
locationLong,
adStatus: status,
adStatus,
publishedDate,
renewedDate,
numberOfRooms,
@@ -537,12 +554,15 @@ class SaljicCrawler {
includingBills,
animalsAllowed,
pool,
exchange,
urbanPlanPermit,
buildingPermit,
utilityConnection,
distanceToRiver,
numberOfViewsAgency
numberOfViewsAgency,
numberOfViewsKivi
};
return data;
} catch (e) {
console.error("Exception caught: " + e.message, "\r\nURL:", url);

View File

@@ -332,10 +332,14 @@ const findRealEstatesForSearchRequest = async (searchRequest, maxResults) => {
};
}
//When includeIncompleteAds are not defined - null it will consider it true
const order = [["updatedAt", "desc"]];
return db.RealEstate.findAll({
where: includeIncompleteAds ? queryIncludeIncomplete : query,
where:
includeIncompleteAds || includeIncompleteAds == null
? queryIncludeIncomplete
: query,
limit: maxResults,
order
});

View File

@@ -156,30 +156,11 @@ const findSearchRequestsForRealEstate = async realEstate => {
};
} else {
// If real estate dont have defined number of rooms ex. null
//It returns requests that didn't choose number of rooms - also null
//Or ones that picked some values but also picked to includeIncomplete ads
//It returns all search requests except for ones that dont want incpomlete ads
numberOfRoomsQuery = {
[Op.or]: [
{
[Op.and]: [
{
numberOfRoomsMin: {
[Op.is]: null
}
},
{
numberOfRoomsMax: {
[Op.is]: null
}
}
]
},
{
includeIncompleteAds: {
[Op.eq]: true
}
}
]
includeIncompleteAds: {
[Op.ne]: false
}
};
}
}
@@ -209,27 +190,9 @@ const findSearchRequestsForRealEstate = async realEstate => {
};
} else {
numberOfFloorsQuery = {
[Op.or]: [
{
[Op.and]: [
{
numberOfFloorsMin: {
[Op.is]: null
}
},
{
numberOfFloorsMax: {
[Op.is]: null
}
}
]
},
{
includeIncompleteAds: {
[Op.eq]: true
}
}
]
includeIncompleteAds: {
[Op.ne]: false
}
};
}
}
@@ -258,27 +221,9 @@ const findSearchRequestsForRealEstate = async realEstate => {
};
} else {
floorQuery = {
[Op.or]: [
{
[Op.and]: [
{
floorMin: {
[Op.is]: null
}
},
{
floorMax: {
[Op.is]: null
}
}
]
},
{
includeIncompleteAds: {
[Op.eq]: true
}
}
]
includeIncompleteAds: {
[Op.ne]: false
}
};
}
}
@@ -287,7 +232,7 @@ const findSearchRequestsForRealEstate = async realEstate => {
//If user dont check checkbox for ex. elevator it does not mean he only wants no elevator
//If real estate characteristic =true find all req, one that wants charachertistic or dont care - dont need query
//If real estate characteristic = false, find all req exept for ones that wants characteristic to be true
//If real estate characteristic = null, dont know if true or false, find req that dont care or want char and want incomplete ads
//If real estate characteristic = null, dont know if true or false, find all req except ones that dont want incomplete ads
let balconyQuery = {};
if (realEstateTypeObject.hasBalconyProp && balcony !== true) {
if (balcony === false) {
@@ -298,27 +243,9 @@ const findSearchRequestsForRealEstate = async realEstate => {
};
} else if (balcony === null) {
balconyQuery = {
[Op.or]: [
{
balcony: {
[Op.ne]: true
}
},
{
[Op.and]: [
{
balcony: {
[Op.eq]: true
}
},
{
includeIncompleteAds: {
[Op.eq]: true
}
}
]
}
]
includeIncompleteAds: {
[Op.ne]: false
}
};
}
}
@@ -332,27 +259,9 @@ const findSearchRequestsForRealEstate = async realEstate => {
};
} else if (newBuilding === null) {
newBuildingQuery = {
[Op.or]: [
{
newBuilding: {
[Op.ne]: true
}
},
{
[Op.and]: [
{
newBuilding: {
[Op.eq]: true
}
},
{
includeIncompleteAds: {
[Op.eq]: true
}
}
]
}
]
includeIncompleteAds: {
[Op.ne]: false
}
};
}
}
@@ -366,30 +275,13 @@ const findSearchRequestsForRealEstate = async realEstate => {
};
} else if (elevator === null) {
elevatorQuery = {
[Op.or]: [
{
elevator: {
[Op.ne]: true
}
},
{
[Op.and]: [
{
elevator: {
[Op.eq]: true
}
},
{
includeIncompleteAds: {
[Op.eq]: true
}
}
]
}
]
includeIncompleteAds: {
[Op.ne]: false
}
};
}
}
//General query consists of each individual query
const query = {
adType,
@@ -423,10 +315,13 @@ const findSearchRequestsForRealEstate = async realEstate => {
[Op.eq]: "ANY"
};
}
//Tag to check if incomplete ads are accepted in query
//Tag to check if incomplete ads are accepted in query which is default
if (checkForIncompleteWanted) {
query.includeIncompleteAds = {
[Op.eq]: true
[Op.or]: {
[Op.eq]: true,
[Op.is]: null
}
};
}

View File

@@ -0,0 +1,13 @@
const nodeFetch = require("node-fetch");
const { USER_AGENT } = require("../config/appConfig");
const fetch = async (url, options = {}) => {
const newOptions = Object.assign({}, options);
if (!newOptions["headers"]) {
newOptions["headers"] = {};
}
newOptions["headers"]["User-Agent"] = USER_AGENT;
return nodeFetch(url, newOptions);
};
module.exports = fetch;

View File

@@ -15,15 +15,7 @@ module.exports = (sequelize, DataTypes) => {
allowNull: false,
defaultValue: {
type: "Polygon",
coordinates: [
[
[0, 0],
[0, 0],
[0, 0],
[0, 0],
[0, 0]
]
],
coordinates: [[[0, 0], [0, 0], [0, 0], [0, 0], [0, 0]]],
crs: { type: "name", properties: { name: "EPSG:4326" } }
}
},

View File

@@ -131,7 +131,7 @@ const notifyRequestsWithDailyOption = async () => {
};
const checkUpNotify = async () => {
const searchRequestsForCheckUp = await findAllRequestsForCheckUp();
/* const searchRequestsForCheckUp = await findAllRequestsForCheckUp();
const asyncSendEmailActions = [];
@@ -144,7 +144,7 @@ const checkUpNotify = async () => {
asyncSendEmailActions.push(sendEmailPromise);
sendEmailPromise.catch(err => console.log("[Email Sending Failed]", err));
}
await Promise.all(asyncSendEmailActions);
await Promise.all(asyncSendEmailActions); */
};
module.exports = {

View File

@@ -61,9 +61,8 @@
<p class="distinguished">
<label class="checkbox-label">
<input type="checkbox" class="filled-in" name="includeIncompleteAds"
<% if (includeIncompleteAds) { %>
checked
<% } %>>
>
<span>Uključi i oglase bez potpunih informacija</span>
</label>
</p>

View File

@@ -8,8 +8,9 @@ SEQUELIZE_LOGGING=0- no sequelize logging, 1- log to the console
PORT=Port for the app, defaults to 5000
APP_BASE_URL=base url for the app
SETTINGS=Variable to denote development, staging and production
ENVIRONMENT=Variable to denote development, staging and production
USER_AGENT=User agent header to send in fetch requests
MAX_REAL_ESTATES_IN_EMAIL=Max number of real estates that will be shown in email, others will be truncated and URL with full list will be shwon
MAX_REAL_ESTATES_IN_FIRST_EMAIL=Max number of real estates that will be shown in first (welcome) email
@@ -69,4 +70,4 @@ AKTIDO_FORCE_CRAWL=Non-zero value will force crawler to crawl all pages without
SALJIC_MAX_RESULTS_PER_PAGE=For Saljic crawler, this represents how many ads are crawled at once
SALJIC_CRAWLER_AD_TYPE=enum name of what type of ads should be crawled, check common/enums.js file for valid values
SALJIC_CRAWLER_AD_CATEGORIES=comma separated list of enum names of categories to be included, check common/enums.js file for valid values
SALJIC_FORCE_CRAWL=Non-zero value will force crawler to crawl all pages without stopping when known real estate is found
SALJIC_FORCE_CRAWL=Non-zero value will force crawler to crawl all pages without stopping when known real estate is found