Compare commits
23 Commits
staging-ta
...
email-dens
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
d7fcb2a278 | ||
|
|
6bad24d735 | ||
|
|
6791a509d0 | ||
|
|
edc6e2bbf7 | ||
|
|
4f230020d7 | ||
|
|
f62a7200c7 | ||
|
|
cff7cc2e9c | ||
|
|
7302edceec | ||
|
|
bd33a6b80e | ||
|
|
df2a962d0f | ||
|
|
be4508ebea | ||
|
|
81fa3f046d | ||
|
|
5bdc8e149a | ||
|
|
fc7fe3c0b3 | ||
|
|
b3007123a5 | ||
|
|
f7d4a9cd07 | ||
|
|
ab6812889a | ||
|
|
b82134e280 | ||
|
|
be378883c8 | ||
|
|
8a87b9e253 | ||
|
|
43bc23b164 | ||
|
|
fc6351af46 | ||
|
|
6267b2cab4 |
@@ -9,7 +9,7 @@ const APP_URL =
|
||||
? process.env.APP_URL || "http://market-alarm"
|
||||
: process.env.APP_URL || `${APP_BASE_URL}:${APP_PORT}`;
|
||||
|
||||
const STAGING = process.env.SETTINGS !== "production";
|
||||
const STAGING = process.env.ENVIRONMENT !== "production";
|
||||
|
||||
const DEFAULT_TIMEZONE = "Europe/Sarajevo";
|
||||
|
||||
@@ -41,6 +41,10 @@ const PROSTOR_LOGIN = {
|
||||
PASSWORD: process.env.PROSTOR_LOGIN_PASS
|
||||
};
|
||||
|
||||
const USER_AGENT =
|
||||
process.env.USER_AGENT ||
|
||||
"Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3325.181 Safari/537.36";
|
||||
|
||||
module.exports = {
|
||||
APP_PORT,
|
||||
APP_URL,
|
||||
@@ -54,6 +58,6 @@ module.exports = {
|
||||
API_MAP_KEY,
|
||||
STAGING,
|
||||
CHECK_UP_DAYS,
|
||||
PROSTOR_LOGIN
|
||||
|
||||
PROSTOR_LOGIN,
|
||||
USER_AGENT
|
||||
};
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
"use strict";
|
||||
|
||||
const fetch = require("node-fetch");
|
||||
const fetch = require("../../helpers/fetchWrapper");
|
||||
const cheerio = require("cheerio");
|
||||
const Promise = require("bluebird");
|
||||
const moment = require("moment-timezone");
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
"use strict";
|
||||
|
||||
const fetch = require("node-fetch");
|
||||
const fetch = require("../../helpers/fetchWrapper");
|
||||
const cheerio = require("cheerio");
|
||||
const Promise = require("bluebird");
|
||||
const moment = require("moment-timezone");
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
"use strict";
|
||||
|
||||
const fetch = require("node-fetch");
|
||||
const fetch = require("../../helpers/fetchWrapper");
|
||||
const cheerio = require("cheerio");
|
||||
const moment = require("moment-timezone");
|
||||
const FormData = require("form-data");
|
||||
@@ -191,13 +191,7 @@ class ProstorCrawler {
|
||||
const { lat, lng, property_name, price, size, link, status } = realEstate;
|
||||
|
||||
//Status information is given already in realestate list
|
||||
//For VIP Ads status ='' canot be used, but no VIP ads are crawled
|
||||
//We will make "fake" vip ad for RE that have size=55
|
||||
//It is weird because yesterday it said 'VIP ponuda' ???
|
||||
const adStatus =
|
||||
size === "55"
|
||||
? ProstorCrawler.getStatusId("VIP ponuda")
|
||||
: ProstorCrawler.getStatusId(status);
|
||||
const adStatus = ProstorCrawler.getStatusId(status);
|
||||
|
||||
const url = `https://prostor.ba${link}`;
|
||||
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
"use strict";
|
||||
|
||||
const fetch = require("node-fetch");
|
||||
const fetch = require("../../helpers/fetchWrapper");
|
||||
const cheerio = require("cheerio");
|
||||
const Promise = require("bluebird");
|
||||
const moment = require("moment-timezone");
|
||||
@@ -399,7 +399,9 @@ class RentalCrawler {
|
||||
);
|
||||
if (!publishedDateMoment.isValid()) {
|
||||
throw {
|
||||
message: `Invalid published date : ${extractedData["re_realEstates_inserted"]}`
|
||||
message: `Invalid published date : ${
|
||||
extractedData["re_realEstates_inserted"]
|
||||
}`
|
||||
};
|
||||
}
|
||||
|
||||
@@ -410,7 +412,9 @@ class RentalCrawler {
|
||||
);
|
||||
if (!renewedDateMoment.isValid()) {
|
||||
throw {
|
||||
message: `Invalid renewed date : ${extractedData["re_realEstates_edited"]}`
|
||||
message: `Invalid renewed date : ${
|
||||
extractedData["re_realEstates_edited"]
|
||||
}`
|
||||
};
|
||||
}
|
||||
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
"use strict";
|
||||
|
||||
const fetch = require("node-fetch");
|
||||
const fetch = require("../../helpers/fetchWrapper");
|
||||
const cheerio = require("cheerio");
|
||||
const moment = require("moment-timezone");
|
||||
|
||||
@@ -268,6 +268,7 @@ class SaljicCrawler {
|
||||
|
||||
const descriptions = $(propertySelectors.descriptions)
|
||||
.text()
|
||||
.replace(/\"/g, "")
|
||||
.trim();
|
||||
|
||||
const latAndLongSrc = $(propertySelectors.latAndLong).attr("src");
|
||||
@@ -283,8 +284,8 @@ class SaljicCrawler {
|
||||
const locationLong = parseFloat(longText) || null;
|
||||
|
||||
//====== DETAIL INFORMATION FIELDS ==========
|
||||
let area,
|
||||
gardenSize,
|
||||
let area = null,
|
||||
gardenSize = null,
|
||||
numberOfRooms = null,
|
||||
numberOfFloors = null,
|
||||
floor = null,
|
||||
@@ -315,6 +316,7 @@ class SaljicCrawler {
|
||||
includingBills = null,
|
||||
animalsAllowed = null,
|
||||
pool = null,
|
||||
exchange = null,
|
||||
urbanPlanPermit = null,
|
||||
buildingPermit = null,
|
||||
utilityConnection = null,
|
||||
@@ -323,7 +325,14 @@ class SaljicCrawler {
|
||||
let renewedDate = null;
|
||||
let realEstateType;
|
||||
let numberOfViewsAgency = null;
|
||||
|
||||
let numberOfViewsKivi = null;
|
||||
let streetNumber = 0;
|
||||
let adStatus = status;
|
||||
let shortDescription = descriptions.substring(
|
||||
0,
|
||||
descriptions.indexOf(".")
|
||||
);
|
||||
let longDescription = descriptions;
|
||||
//Extracting data - Glavne karakteristike
|
||||
let mainFieldIndex = 1;
|
||||
do {
|
||||
@@ -482,29 +491,37 @@ class SaljicCrawler {
|
||||
renewedDate = new Date();
|
||||
}
|
||||
|
||||
const originAgencyName = AD_AGENCY.SALJIC;
|
||||
const locality = "";
|
||||
const municipality = "";
|
||||
const city = "";
|
||||
const region = "";
|
||||
const entity = "";
|
||||
const country = "";
|
||||
|
||||
const data = {
|
||||
url,
|
||||
agencyObjectId,
|
||||
originAgencyName: AD_AGENCY.SALJIC,
|
||||
originAgencyName,
|
||||
realEstateType,
|
||||
adType,
|
||||
title,
|
||||
price,
|
||||
area,
|
||||
gardenSize,
|
||||
shortDescription: descriptions.substring(0, descriptions.indexOf(".")),
|
||||
longDescription: descriptions,
|
||||
streetNumber: 0,
|
||||
shortDescription,
|
||||
longDescription,
|
||||
streetNumber,
|
||||
streetName,
|
||||
locality: "",
|
||||
municipality: "",
|
||||
city: "",
|
||||
region: "",
|
||||
entity: "",
|
||||
country: "",
|
||||
locality,
|
||||
municipality,
|
||||
city,
|
||||
region,
|
||||
entity,
|
||||
country,
|
||||
locationLat,
|
||||
locationLong,
|
||||
adStatus: status,
|
||||
adStatus,
|
||||
publishedDate,
|
||||
renewedDate,
|
||||
numberOfRooms,
|
||||
@@ -537,12 +554,15 @@ class SaljicCrawler {
|
||||
includingBills,
|
||||
animalsAllowed,
|
||||
pool,
|
||||
exchange,
|
||||
urbanPlanPermit,
|
||||
buildingPermit,
|
||||
utilityConnection,
|
||||
distanceToRiver,
|
||||
numberOfViewsAgency
|
||||
numberOfViewsAgency,
|
||||
numberOfViewsKivi
|
||||
};
|
||||
|
||||
return data;
|
||||
} catch (e) {
|
||||
console.error("Exception caught: " + e.message, "\r\nURL:", url);
|
||||
|
||||
@@ -332,10 +332,14 @@ const findRealEstatesForSearchRequest = async (searchRequest, maxResults) => {
|
||||
};
|
||||
}
|
||||
|
||||
//When includeIncompleteAds are not defined - null it will consider it true
|
||||
const order = [["updatedAt", "desc"]];
|
||||
|
||||
return db.RealEstate.findAll({
|
||||
where: includeIncompleteAds ? queryIncludeIncomplete : query,
|
||||
where:
|
||||
includeIncompleteAds || includeIncompleteAds == null
|
||||
? queryIncludeIncomplete
|
||||
: query,
|
||||
limit: maxResults,
|
||||
order
|
||||
});
|
||||
|
||||
@@ -156,30 +156,11 @@ const findSearchRequestsForRealEstate = async realEstate => {
|
||||
};
|
||||
} else {
|
||||
// If real estate dont have defined number of rooms ex. null
|
||||
//It returns requests that didn't choose number of rooms - also null
|
||||
//Or ones that picked some values but also picked to includeIncomplete ads
|
||||
//It returns all search requests except for ones that dont want incpomlete ads
|
||||
numberOfRoomsQuery = {
|
||||
[Op.or]: [
|
||||
{
|
||||
[Op.and]: [
|
||||
{
|
||||
numberOfRoomsMin: {
|
||||
[Op.is]: null
|
||||
}
|
||||
},
|
||||
{
|
||||
numberOfRoomsMax: {
|
||||
[Op.is]: null
|
||||
}
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
includeIncompleteAds: {
|
||||
[Op.eq]: true
|
||||
}
|
||||
}
|
||||
]
|
||||
includeIncompleteAds: {
|
||||
[Op.ne]: false
|
||||
}
|
||||
};
|
||||
}
|
||||
}
|
||||
@@ -209,27 +190,9 @@ const findSearchRequestsForRealEstate = async realEstate => {
|
||||
};
|
||||
} else {
|
||||
numberOfFloorsQuery = {
|
||||
[Op.or]: [
|
||||
{
|
||||
[Op.and]: [
|
||||
{
|
||||
numberOfFloorsMin: {
|
||||
[Op.is]: null
|
||||
}
|
||||
},
|
||||
{
|
||||
numberOfFloorsMax: {
|
||||
[Op.is]: null
|
||||
}
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
includeIncompleteAds: {
|
||||
[Op.eq]: true
|
||||
}
|
||||
}
|
||||
]
|
||||
includeIncompleteAds: {
|
||||
[Op.ne]: false
|
||||
}
|
||||
};
|
||||
}
|
||||
}
|
||||
@@ -258,27 +221,9 @@ const findSearchRequestsForRealEstate = async realEstate => {
|
||||
};
|
||||
} else {
|
||||
floorQuery = {
|
||||
[Op.or]: [
|
||||
{
|
||||
[Op.and]: [
|
||||
{
|
||||
floorMin: {
|
||||
[Op.is]: null
|
||||
}
|
||||
},
|
||||
{
|
||||
floorMax: {
|
||||
[Op.is]: null
|
||||
}
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
includeIncompleteAds: {
|
||||
[Op.eq]: true
|
||||
}
|
||||
}
|
||||
]
|
||||
includeIncompleteAds: {
|
||||
[Op.ne]: false
|
||||
}
|
||||
};
|
||||
}
|
||||
}
|
||||
@@ -287,7 +232,7 @@ const findSearchRequestsForRealEstate = async realEstate => {
|
||||
//If user dont check checkbox for ex. elevator it does not mean he only wants no elevator
|
||||
//If real estate characteristic =true find all req, one that wants charachertistic or dont care - dont need query
|
||||
//If real estate characteristic = false, find all req exept for ones that wants characteristic to be true
|
||||
//If real estate characteristic = null, dont know if true or false, find req that dont care or want char and want incomplete ads
|
||||
//If real estate characteristic = null, dont know if true or false, find all req except ones that dont want incomplete ads
|
||||
let balconyQuery = {};
|
||||
if (realEstateTypeObject.hasBalconyProp && balcony !== true) {
|
||||
if (balcony === false) {
|
||||
@@ -298,27 +243,9 @@ const findSearchRequestsForRealEstate = async realEstate => {
|
||||
};
|
||||
} else if (balcony === null) {
|
||||
balconyQuery = {
|
||||
[Op.or]: [
|
||||
{
|
||||
balcony: {
|
||||
[Op.ne]: true
|
||||
}
|
||||
},
|
||||
{
|
||||
[Op.and]: [
|
||||
{
|
||||
balcony: {
|
||||
[Op.eq]: true
|
||||
}
|
||||
},
|
||||
{
|
||||
includeIncompleteAds: {
|
||||
[Op.eq]: true
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
includeIncompleteAds: {
|
||||
[Op.ne]: false
|
||||
}
|
||||
};
|
||||
}
|
||||
}
|
||||
@@ -332,27 +259,9 @@ const findSearchRequestsForRealEstate = async realEstate => {
|
||||
};
|
||||
} else if (newBuilding === null) {
|
||||
newBuildingQuery = {
|
||||
[Op.or]: [
|
||||
{
|
||||
newBuilding: {
|
||||
[Op.ne]: true
|
||||
}
|
||||
},
|
||||
{
|
||||
[Op.and]: [
|
||||
{
|
||||
newBuilding: {
|
||||
[Op.eq]: true
|
||||
}
|
||||
},
|
||||
{
|
||||
includeIncompleteAds: {
|
||||
[Op.eq]: true
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
includeIncompleteAds: {
|
||||
[Op.ne]: false
|
||||
}
|
||||
};
|
||||
}
|
||||
}
|
||||
@@ -366,30 +275,13 @@ const findSearchRequestsForRealEstate = async realEstate => {
|
||||
};
|
||||
} else if (elevator === null) {
|
||||
elevatorQuery = {
|
||||
[Op.or]: [
|
||||
{
|
||||
elevator: {
|
||||
[Op.ne]: true
|
||||
}
|
||||
},
|
||||
{
|
||||
[Op.and]: [
|
||||
{
|
||||
elevator: {
|
||||
[Op.eq]: true
|
||||
}
|
||||
},
|
||||
{
|
||||
includeIncompleteAds: {
|
||||
[Op.eq]: true
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
includeIncompleteAds: {
|
||||
[Op.ne]: false
|
||||
}
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
//General query consists of each individual query
|
||||
const query = {
|
||||
adType,
|
||||
@@ -423,10 +315,13 @@ const findSearchRequestsForRealEstate = async realEstate => {
|
||||
[Op.eq]: "ANY"
|
||||
};
|
||||
}
|
||||
//Tag to check if incomplete ads are accepted in query
|
||||
//Tag to check if incomplete ads are accepted in query which is default
|
||||
if (checkForIncompleteWanted) {
|
||||
query.includeIncompleteAds = {
|
||||
[Op.eq]: true
|
||||
[Op.or]: {
|
||||
[Op.eq]: true,
|
||||
[Op.is]: null
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
|
||||
13
app/helpers/fetchWrapper.js
Normal file
13
app/helpers/fetchWrapper.js
Normal file
@@ -0,0 +1,13 @@
|
||||
const nodeFetch = require("node-fetch");
|
||||
const { USER_AGENT } = require("../config/appConfig");
|
||||
|
||||
const fetch = async (url, options = {}) => {
|
||||
const newOptions = Object.assign({}, options);
|
||||
if (!newOptions["headers"]) {
|
||||
newOptions["headers"] = {};
|
||||
}
|
||||
newOptions["headers"]["User-Agent"] = USER_AGENT;
|
||||
return nodeFetch(url, newOptions);
|
||||
};
|
||||
|
||||
module.exports = fetch;
|
||||
@@ -15,15 +15,7 @@ module.exports = (sequelize, DataTypes) => {
|
||||
allowNull: false,
|
||||
defaultValue: {
|
||||
type: "Polygon",
|
||||
coordinates: [
|
||||
[
|
||||
[0, 0],
|
||||
[0, 0],
|
||||
[0, 0],
|
||||
[0, 0],
|
||||
[0, 0]
|
||||
]
|
||||
],
|
||||
coordinates: [[[0, 0], [0, 0], [0, 0], [0, 0], [0, 0]]],
|
||||
crs: { type: "name", properties: { name: "EPSG:4326" } }
|
||||
}
|
||||
},
|
||||
|
||||
@@ -131,7 +131,7 @@ const notifyRequestsWithDailyOption = async () => {
|
||||
};
|
||||
|
||||
const checkUpNotify = async () => {
|
||||
const searchRequestsForCheckUp = await findAllRequestsForCheckUp();
|
||||
/* const searchRequestsForCheckUp = await findAllRequestsForCheckUp();
|
||||
|
||||
const asyncSendEmailActions = [];
|
||||
|
||||
@@ -144,7 +144,7 @@ const checkUpNotify = async () => {
|
||||
asyncSendEmailActions.push(sendEmailPromise);
|
||||
sendEmailPromise.catch(err => console.log("[Email Sending Failed]", err));
|
||||
}
|
||||
await Promise.all(asyncSendEmailActions);
|
||||
await Promise.all(asyncSendEmailActions); */
|
||||
};
|
||||
|
||||
module.exports = {
|
||||
|
||||
@@ -61,9 +61,8 @@
|
||||
<p class="distinguished">
|
||||
<label class="checkbox-label">
|
||||
<input type="checkbox" class="filled-in" name="includeIncompleteAds"
|
||||
<% if (includeIncompleteAds) { %>
|
||||
checked
|
||||
<% } %>>
|
||||
>
|
||||
<span>Uključi i oglase bez potpunih informacija</span>
|
||||
</label>
|
||||
</p>
|
||||
|
||||
@@ -8,8 +8,9 @@ SEQUELIZE_LOGGING=0- no sequelize logging, 1- log to the console
|
||||
PORT=Port for the app, defaults to 5000
|
||||
APP_BASE_URL=base url for the app
|
||||
|
||||
SETTINGS=Variable to denote development, staging and production
|
||||
ENVIRONMENT=Variable to denote development, staging and production
|
||||
|
||||
USER_AGENT=User agent header to send in fetch requests
|
||||
|
||||
MAX_REAL_ESTATES_IN_EMAIL=Max number of real estates that will be shown in email, others will be truncated and URL with full list will be shwon
|
||||
MAX_REAL_ESTATES_IN_FIRST_EMAIL=Max number of real estates that will be shown in first (welcome) email
|
||||
@@ -69,4 +70,4 @@ AKTIDO_FORCE_CRAWL=Non-zero value will force crawler to crawl all pages without
|
||||
SALJIC_MAX_RESULTS_PER_PAGE=For Saljic crawler, this represents how many ads are crawled at once
|
||||
SALJIC_CRAWLER_AD_TYPE=enum name of what type of ads should be crawled, check common/enums.js file for valid values
|
||||
SALJIC_CRAWLER_AD_CATEGORIES=comma separated list of enum names of categories to be included, check common/enums.js file for valid values
|
||||
SALJIC_FORCE_CRAWL=Non-zero value will force crawler to crawl all pages without stopping when known real estate is found
|
||||
SALJIC_FORCE_CRAWL=Non-zero value will force crawler to crawl all pages without stopping when known real estate is found
|
||||
|
||||
Reference in New Issue
Block a user