Compare commits

...

12 Commits

Author SHA1 Message Date
Naida Vatric
f56cd5b549 More elegant scrape of lat and long. 2020-02-17 21:55:24 +01:00
Naida Vatric
addd8c1344 Saljic crawler changed substring call. 2020-02-14 23:42:19 +01:00
Senad Uka
fc7fe3c0b3 Notificaton service disabled 2020-02-14 15:07:42 +01:00
Naida Vatric
b3007123a5 Merge branch 'rename-settings-var' into 'master'
Rename settings var

See merge request saburly/marketalarm/web!93
2020-02-10 20:17:08 +00:00
Naida Vatric
f7d4a9cd07 Renamed settings var to describe purpose. 2020-02-10 21:15:28 +01:00
Naida Vatric
ab6812889a Merge branch 'fixing-saljic-bugs' into 'master'
Fixing saljic bugs

See merge request saburly/marketalarm/web!92
2020-02-09 18:11:00 +00:00
Naida Vatric
b82134e280 Fixed saljic bug for heroku. 2020-02-09 19:09:00 +01:00
Naida Vatric
be378883c8 Just another fix try. 2020-02-08 00:47:00 +01:00
Naida Vatric
8a87b9e253 Another fix. 2020-02-08 00:27:26 +01:00
Naida Vatric
43bc23b164 Another fix. Defined more var. 2020-02-07 22:27:01 +01:00
Naida Vatric
fc6351af46 Added columns and logs for types. 2020-02-07 22:12:53 +01:00
Naida Vatric
6267b2cab4 Merge branch 'staging-tag-to-checkup-email' into 'master'
Added staging tag to checkup email. Email footer bug fixed.

See merge request saburly/marketalarm/web!91
2020-02-06 21:43:56 +00:00
4 changed files with 53 additions and 33 deletions

View File

@@ -9,7 +9,7 @@ const APP_URL =
? process.env.APP_URL || "http://market-alarm"
: process.env.APP_URL || `${APP_BASE_URL}:${APP_PORT}`;
const STAGING = process.env.SETTINGS !== "production";
const STAGING = process.env.ENVIRONMENT !== "production";
const DEFAULT_TIMEZONE = "Europe/Sarajevo";
@@ -55,5 +55,4 @@ module.exports = {
STAGING,
CHECK_UP_DAYS,
PROSTOR_LOGIN
};

View File

@@ -218,7 +218,7 @@ class SaljicCrawler {
}
async scrapeAd(url, adType) {
// console.log("[SALJIC] Scraping : ", url);
console.log("[SALJIC] Scraping : ", url);
try {
const adPageSource = await fetch(url);
const body = await adPageSource.text();
@@ -227,7 +227,9 @@ class SaljicCrawler {
// No information for status ex. PRODAN
const status = AD_STATUS.STATUS_NORMAL;
//Extracting agency ID from url
const agencyObjectId = parseInt(url.substring(46, url.length));
const agencyObjectId = url
? parseInt(url.substring(46, url.length))
: null;
//Extracting main properties
const propertySelectors = {
@@ -268,23 +270,20 @@ class SaljicCrawler {
const descriptions = $(propertySelectors.descriptions)
.text()
.replace(/\"/g, "")
.trim();
const latAndLongSrc = $(propertySelectors.latAndLong).attr("src");
const latText = latAndLongSrc.substring(
latAndLongSrc.indexOf("marker=") + 7,
latAndLongSrc.indexOf("%2C", latAndLongSrc.indexOf("marker="))
);
const longText = latAndLongSrc.substring(
latAndLongSrc.indexOf("%2C", latAndLongSrc.indexOf("marker=")) + 3,
latAndLongSrc.length
);
const tmpLatLong = latAndLongSrc.split("marker=")[1];
const latText = tmpLatLong.split("%2C")[0];
const longText = tmpLatLong.split("%2C")[1];
const locationLat = parseFloat(latText) || null;
const locationLong = parseFloat(longText) || null;
//====== DETAIL INFORMATION FIELDS ==========
let area,
gardenSize,
let area = null,
gardenSize = null,
numberOfRooms = null,
numberOfFloors = null,
floor = null,
@@ -315,6 +314,7 @@ class SaljicCrawler {
includingBills = null,
animalsAllowed = null,
pool = null,
exchange = null,
urbanPlanPermit = null,
buildingPermit = null,
utilityConnection = null,
@@ -323,7 +323,13 @@ class SaljicCrawler {
let renewedDate = null;
let realEstateType;
let numberOfViewsAgency = null;
let numberOfViewsKivi = null;
let streetNumber = 0;
let adStatus = status;
let shortDescription = descriptions
? descriptions.substring(0, descriptions.indexOf("."))
: "";
let longDescription = descriptions || "";
//Extracting data - Glavne karakteristike
let mainFieldIndex = 1;
do {
@@ -334,10 +340,14 @@ class SaljicCrawler {
.replace(/[\n\r\t]/gm, "")
.trim();
const mainFieldTitle = mainField.substring(0, mainField.indexOf(" "));
const mainFieldTitle = mainField
? mainField.substring(0, mainField.indexOf(" "))
: "";
const mainFieldValue = mainField
.substring(mainField.indexOf(" "), mainField.length)
.trim();
? mainField
.substring(mainField.indexOf(" "), mainField.length)
.trim()
: "";
switch (mainFieldTitle) {
case "Površina":
@@ -482,29 +492,37 @@ class SaljicCrawler {
renewedDate = new Date();
}
const originAgencyName = AD_AGENCY.SALJIC;
const locality = "";
const municipality = "";
const city = "";
const region = "";
const entity = "";
const country = "";
const data = {
url,
agencyObjectId,
originAgencyName: AD_AGENCY.SALJIC,
originAgencyName,
realEstateType,
adType,
title,
price,
area,
gardenSize,
shortDescription: descriptions.substring(0, descriptions.indexOf(".")),
longDescription: descriptions,
streetNumber: 0,
shortDescription,
longDescription,
streetNumber,
streetName,
locality: "",
municipality: "",
city: "",
region: "",
entity: "",
country: "",
locality,
municipality,
city,
region,
entity,
country,
locationLat,
locationLong,
adStatus: status,
adStatus,
publishedDate,
renewedDate,
numberOfRooms,
@@ -537,12 +555,15 @@ class SaljicCrawler {
includingBills,
animalsAllowed,
pool,
exchange,
urbanPlanPermit,
buildingPermit,
utilityConnection,
distanceToRiver,
numberOfViewsAgency
numberOfViewsAgency,
numberOfViewsKivi
};
return data;
} catch (e) {
console.error("Exception caught: " + e.message, "\r\nURL:", url);

View File

@@ -131,7 +131,7 @@ const notifyRequestsWithDailyOption = async () => {
};
const checkUpNotify = async () => {
const searchRequestsForCheckUp = await findAllRequestsForCheckUp();
/* const searchRequestsForCheckUp = await findAllRequestsForCheckUp();
const asyncSendEmailActions = [];
@@ -144,7 +144,7 @@ const checkUpNotify = async () => {
asyncSendEmailActions.push(sendEmailPromise);
sendEmailPromise.catch(err => console.log("[Email Sending Failed]", err));
}
await Promise.all(asyncSendEmailActions);
await Promise.all(asyncSendEmailActions); */
};
module.exports = {

View File

@@ -8,7 +8,7 @@ SEQUELIZE_LOGGING=0- no sequelize logging, 1- log to the console
PORT=Port for the app, defaults to 5000
APP_BASE_URL=base url for the app
SETTINGS=Variable to denote development, staging and production
ENVIRONMENT=Variable to denote development, staging and production
MAX_REAL_ESTATES_IN_EMAIL=Max number of real estates that will be shown in email, others will be truncated and URL with full list will be shwon