Compare commits

...

52 Commits

Author SHA1 Message Date
Naida Vatric
5d792846ae Update docker file, readme and setup script 2019-12-05 22:46:50 +01:00
Bilal Catic
ab8373651e update garage price slider options 2019-11-18 19:05:00 +01:00
Bilal Catic
ade09f6f15 change sale and rent action title 2019-11-18 18:56:34 +01:00
Bilal Catic
e4edc24cad Merge branch 'replace-front-page-next-button' into 'master'
select ad type on welcome page; update css

See merge request saburly/marketalarm/web!73
2019-11-18 14:49:07 +00:00
Bilal Catic
44565d2f89 select ad type on welcome page; update css 2019-11-18 10:48:41 +01:00
Bilal Catic
860014662a Merge branch 'add-more-real-estate-filters-to-crawler' into 'master'
Add more real estate filters to crawler

See merge request saburly/marketalarm/web!72
2019-11-14 13:58:57 +00:00
Bilal Catic
af42d2c448 improve OLX ad status detection 2019-11-14 08:47:48 +01:00
Bilal Catic
5148f88a62 improve Rental and Aktido ad status detection 2019-11-14 08:31:57 +01:00
Bilal Catic
a7cd75653d improve OLX ad status detection 2019-11-14 08:04:58 +01:00
Bilal Catic
168b2186e7 add more fields to the Prostor real estates crawler 2019-11-14 07:23:23 +01:00
Bilal Catic
1e68d640e2 add RENTED enum status 2019-11-14 07:22:54 +01:00
Bilal Catic
c13857bc09 add additional fields to the Prostor crawler 2019-11-14 02:09:42 +01:00
Bilal Catic
618dcd217e update ENV variables template file 2019-11-14 02:09:22 +01:00
Bilal Catic
3b3e2eda07 refactor Prostor crawler 2019-11-13 16:54:16 +01:00
Bilal Catic
ae93d2f03d update ENV variable description 2019-11-13 16:52:55 +01:00
Bilal Catic
a63671959b improve real estate properties detection for Rental 2019-11-12 22:53:16 +01:00
Bilal Catic
b6d68db3a3 improve real estate properties detection for aktido 2019-11-12 21:39:28 +01:00
Bilal Catic
c91e56c46e add additional real estate fields for Aktido crawler 2019-11-11 19:34:43 +01:00
Bilal Catic
e871550ba6 add two more heating types for Rental crawler 2019-11-11 18:46:01 +01:00
Bilal Catic
debdd01b28 add new fields to the Rental crawler 2019-11-11 17:15:46 +01:00
Bilal Catic
9e10800b02 add new heating type ENUM 2019-11-11 17:15:14 +01:00
Bilal Catic
cb9bb9e566 add rental scraper test script 2019-11-11 03:34:15 +01:00
Bilal Catic
b6024af2cb add new fields for OLX crawler 2019-11-08 17:05:51 +01:00
Bilal Catic
50514aaf03 add new ENUMS for real estate properties 2019-11-08 16:40:15 +01:00
Bilal Catic
9ba41dd7f7 add columns for update on duplicate real estate 2019-11-08 16:39:37 +01:00
Bilal Catic
02f5b97e80 add migration for new real estate fields; update real estate model 2019-11-08 16:27:55 +01:00
Bilal Catic
7242e233e3 Merge branch 'replace-frontend-arrow-functions-with-old-style-function' into 'master'
replace arrow functions on frontend with old style function

See merge request saburly/marketalarm/web!71
2019-11-08 13:11:56 +00:00
Bilal Catic
a77730cc5f replace arrow function with old style function 2019-11-08 14:07:00 +01:00
Bilal Catic
90db3025b5 delete obsolete range file 2019-11-08 14:06:22 +01:00
Bilal Catic
8a95409606 stop using spread operator 2019-11-08 14:03:04 +01:00
Bilal Catic
c2ffc906ea replace arrow functions on frontend with old style function 2019-11-08 13:52:55 +01:00
Bilal Catic
43747eb942 add deletedEmail field to the SearchRequest model 2019-11-05 18:33:31 +01:00
Bilal Catic
d07d0a3453 Merge branch 'move-unsubscribed-email-to-different-column' into 'master'
move email to different column on unsubscribe action

See merge request saburly/marketalarm/web!70
2019-11-05 13:20:41 +00:00
Bilal Catic
c87a1fc8a8 move email to different column on unsubscribe action 2019-11-05 14:19:56 +01:00
Bilal Catic
91cda0ff0f Merge branch 'prevent-sending-emails-to-unsubscribed-users' into 'master'
check if user is subscribed before sending email

See merge request saburly/marketalarm/web!69
2019-11-05 06:54:59 +00:00
Bilal Catic
310448dcb8 check if user is subscribed before sending email 2019-11-05 07:54:23 +01:00
Bilal Catic
8ea44f5fc7 Merge branch 'add-email-frequency-option' into 'master'
Add email frequency option

See merge request saburly/marketalarm/web!68
2019-11-04 13:28:44 +00:00
Bilal Catic
4d5571b1d8 improve email notification copy; add different copy for daily email 2019-11-04 14:28:11 +01:00
Bilal Catic
2be013de1f add npm script for sending daily notifications 2019-11-04 11:03:43 +01:00
Bilal Catic
23e319da5e add method for sending daily notifications 2019-11-04 11:02:26 +01:00
Bilal Catic
a120dfc4a3 add condition for sending emails based on email frequency property 2019-11-04 11:01:47 +01:00
Bilal Catic
5b2961d992 add db helper for searching not notified search request matches 2019-11-04 10:59:51 +01:00
Bilal Catic
b6bc67e442 add search request association to the Search Request Match model 2019-11-04 10:57:54 +01:00
Bilal Catic
46dbe40891 add missing ENV variables to env file template 2019-11-02 00:07:01 +01:00
Bilal Catic
7cc9550031 save/load selected email frequency on query review step 2019-11-01 19:21:37 +01:00
Bilal Catic
1117592f4c add migration and update model to include email frequency in search req 2019-11-01 11:43:08 +01:00
Bilal Catic
a0449f7ffd implement segmented select control without JS 2019-11-01 08:53:50 +01:00
Bilal Catic
e3e0ddd508 stop logging scrape action for Rental crawler 2019-11-01 01:02:45 +01:00
Bilal Catic
2e3ddbac95 fix request ad type bug 2019-11-01 00:01:02 +01:00
Bilal Catic
5433a71859 Merge branch 'add-padding-for-page-content' into 'master'
Add padding for page content

See merge request saburly/marketalarm/web!67
2019-10-31 18:17:25 +00:00
Bilal Catic
6aff0d221b move query review table from top 2019-10-31 19:16:57 +01:00
Bilal Catic
5bc0d4f8c2 Merge branch 'implement-renting-option-frontend' into 'master'
Implement renting option frontend

See merge request saburly/marketalarm/web!65
2019-10-31 18:12:42 +00:00
37 changed files with 2501 additions and 407 deletions

View File

@@ -3,6 +3,7 @@ FROM postgres:11.3
ENV POSTGIS_MAJOR 2.4
RUN apt-get update \
&& apt-get --assume-yes install postgresql-11-postgis-2.5-scripts\
&& apt-get --assume-yes install software-properties-common postgis\
&& rm -rf /var/lib/apt/lists/

View File

@@ -4,6 +4,8 @@ The purpose of this project is to build a web application that enables subscribi
## Setup
* Before setup please confirm that Docker is installed `docker --version`. If not install it from official site.
### Setup with npm commands
1. Install packages
@@ -24,7 +26,7 @@ this will create and run postgres image and then execute migrations
`docker build -t marketalerts .`
2. Run postgres image with
`docker run --name pg_marketalerts -d -p 5432:5432 marketalerts`
`docker run -e POSTGRES_USER=docker -e POSTGRES_PASSWORD=docker -e POSTGRES_DB=marketalerts --name pg_marketalerts -d -p 5432:5432 marketalerts`
3. Install packages
`npm install`
@@ -41,3 +43,4 @@ this will create and run postgres image and then execute migrations
- AWS SES credentials are handled with env vratiables
- Notification emails are sent in batches of 50, by using SES templates
- Make sure that you are using different templates for different envirorments

View File

@@ -58,13 +58,23 @@ const GARAGE_SIZE_SLIDER_OPTIONS = {
connect: true
};
const GARAGE_PRICE_SLIDER_OPTIONS = {
const GARAGE_PRICE_SLIDER_OPTIONS_SALE = {
start: [2000, 10000],
range: {
min: [0],
max: [100000]
max: [60000]
},
step: 500,
step: 200,
connect: true
};
const GARAGE_PRICE_SLIDER_OPTIONS_RENT = {
start: [50, 150],
range: {
min: [0],
max: [1000]
},
step: 10,
connect: true
};
@@ -72,12 +82,12 @@ const AD_TYPE = {
AD_TYPE_SALE: {
id: 1,
stringId: "SALE",
title: "Prodaja"
title: "Kupi"
},
AD_TYPE_RENT: {
id: 2,
stringId: "RENT",
title: "Najam"
title: "Unajmi"
},
AD_TYPE_REQUEST: {
id: 3,
@@ -135,8 +145,8 @@ const AD_CATEGORY = {
id: "GARAGE",
title: "Garaža",
hasGardenSize: false,
priceSliderOptionsSale: PRICE_SLIDER_OPTIONS_SALE,
priceSliderOptionsRent: PRICE_SLIDER_OPTIONS_RENT,
priceSliderOptionsSale: GARAGE_PRICE_SLIDER_OPTIONS_SALE,
priceSliderOptionsRent: GARAGE_PRICE_SLIDER_OPTIONS_RENT,
sizeSliderOptions: GARAGE_SIZE_SLIDER_OPTIONS
},
COTTAGE: {
@@ -156,7 +166,8 @@ const AD_STATUS = {
STATUS_SOLD: 3,
STATUS_DELETED: 4,
STATUS_URGENT: 5,
STATUS_DISCOUNTED: 6
STATUS_DISCOUNTED: 6,
STATUS_RENTED: 7
};
const AD_AGENCY = {
@@ -174,10 +185,100 @@ const CRAWLER_AD_TYPE = {
ONLY_REQUEST: 4
};
const EMAIL_FREQUENCY = {
ASAP: {
id: 1,
stringId: "ASAP",
title: "Odmah"
},
DAILY: {
id: 2,
stringId: "DAILY",
title: "Jednom dnevno"
}
};
const HEATING_TYPE = {
NO_HEATING: {
id: "NO_HEATING",
title: "Nije uvedeno"
},
ELECTRICITY: {
id: "ELECTRICITY",
title: "Struja"
},
GAS: {
id: "GAS",
title: "Plin"
},
WOOD: {
id: "WOOD",
title: "Drva"
},
CENTRAL_CITY: {
id: "CENTRAL_CITY",
title: "Centralno (gradsko)"
},
CENTRAL_BOILER: {
id: "CENTRAL_BOILER",
title: "Centralno (kotlovnica)"
},
CENTRAL_GAS: {
id: "CENTRAL_GAS",
title: "Centralno (plin)"
},
HEAT_PUMP: {
id: "HEAT_PUMP",
title: "Toplotna pumpa"
},
OTHER: {
id: "OTHER",
title: "Drugo"
}
};
const ACCESS_ROAD_TYPE = {
ASPHALT: {
id: "ASPHALT",
title: "Asfalt"
},
CONCRETE: {
id: "CONCRETE",
title: "Beton"
},
MACADAM: {
id: "MACADAM",
title: "Makadam"
},
OTHER: {
id: "OTHER",
title: "Drugo"
}
};
const FURNISHING_TYPE = {
NOT_FURNISHED: {
id: "NOT_FURNISHED",
title: "Nenamješten"
},
HALF_FURNISHED: {
id: "HALF_FURNISHED",
title: "Polunamješten"
},
FURNISHED: {
id: "FURNISHED",
title: "Namješten"
}
};
module.exports = {
AD_TYPE,
AD_CATEGORY,
AD_STATUS,
AD_AGENCY,
CRAWLER_AD_TYPE
CRAWLER_AD_TYPE,
EMAIL_FREQUENCY,
HEATING_TYPE,
ACCESS_ROAD_TYPE,
FURNISHING_TYPE
};

View File

@@ -3,9 +3,9 @@ const { isValidEmail } = require("../helpers/email");
const {
notifyForNewSearchRequest
} = require("../services/notificationService");
const { AD_CATEGORY, AD_TYPE } = require("../common/enums");
const { AD_CATEGORY, AD_TYPE, EMAIL_FREQUENCY } = require("../common/enums");
const getQueryReviewData = searchRequest => {
const getQueryReviewTableData = searchRequest => {
const {
id,
adType,
@@ -87,15 +87,26 @@ const getQueryReview = async (req, res) => {
const title = "Da li je ovo to što ste tražili ?";
const nextStep = req.query.nextStep;
const error = req.query.error;
const queryReviewData = getQueryReviewData(searchRequest);
const queryReviewTableData = getQueryReviewTableData(searchRequest);
const email = searchRequest.email;
let selectedEmailFrequency;
switch (searchRequest.emailFrequency) {
case EMAIL_FREQUENCY.ASAP.stringId:
selectedEmailFrequency = EMAIL_FREQUENCY.ASAP.id;
break;
case EMAIL_FREQUENCY.DAILY.stringId:
selectedEmailFrequency = EMAIL_FREQUENCY.DAILY.id;
break;
}
res.render("queryReview", {
nextStep,
queryReviewData,
queryReviewTableData,
title,
email,
error
selectedEmailFrequency,
error,
EMAIL_FREQUENCY
});
};
@@ -107,17 +118,26 @@ const postQueryReview = async (req, res) => {
}
const nextStep = req.query.nextStep || "/ponovo";
const emailFrequency =
parseInt(req.body.emailFrequency) || EMAIL_FREQUENCY.ASAP.id;
const emailInput = req.body.email;
const emailConfirmInput = req.body.confirmEmail;
const title = "Da li je ovo to što ste tražili ?";
const queryReviewData = getQueryReviewData(searchRequest);
const queryReviewTableData = getQueryReviewTableData(searchRequest);
let emailFrequencyStringId = EMAIL_FREQUENCY.ASAP.stringId;
if (emailFrequency === EMAIL_FREQUENCY.DAILY.id) {
emailFrequencyStringId = EMAIL_FREQUENCY.DAILY.stringId;
}
searchRequest.emailFrequency = emailFrequencyStringId;
if (emailInput !== emailConfirmInput) {
const error = "Greška ! Unešeni emailovi nisu isti";
res.render("queryReview", {
error,
title,
queryReviewData,
queryReviewTableData,
email: ""
});
return;
@@ -128,7 +148,7 @@ const postQueryReview = async (req, res) => {
res.render("queryReview", {
error,
title,
queryReviewData,
queryReviewTableData,
email: ""
});
return;
@@ -147,7 +167,7 @@ const postQueryReview = async (req, res) => {
res.render("queryReview", {
error,
title,
queryReviewData,
queryReviewTableData,
email: ""
});
return;
@@ -164,7 +184,7 @@ const postQueryReview = async (req, res) => {
res.render("queryReview", {
error,
title,
queryReviewData,
queryReviewTableData,
email: ""
});
return;

View File

@@ -10,6 +10,8 @@ const getUnsubscribe = async (req, res) => {
}
searchRequest.subscribed = false;
searchRequest.deletedEmail = searchRequest.email;
searchRequest.email = "";
await searchRequest.save();
res.render("unsubscribe", { nextStep: "/vrstanekretnine", title });

View File

@@ -1,7 +1,42 @@
const { createSearchRequest } = require("../helpers/db/searchRequest");
const { AD_TYPE, AD_CATEGORY } = require("../common/enums");
const getWelcome = (req, res) => {
res.render("welcome", { nextStep: "/vrstanekretnine", title: false });
res.render("welcome", {
title: false,
AD_TYPE
});
};
const postWelcome = async (req, res) => {
const adType = parseInt(req.body.adType);
const adTypeStringIds = {
[AD_TYPE.AD_TYPE_SALE.id]: AD_TYPE.AD_TYPE_SALE.stringId,
[AD_TYPE.AD_TYPE_RENT.id]: AD_TYPE.AD_TYPE_RENT.stringId
};
const adTypeStringId =
adTypeStringIds[adType] || AD_TYPE.AD_TYPE_SALE.stringId;
let nextStepUrl = "";
try {
const newSearchRequest = await createSearchRequest({
adType: adTypeStringId,
realEstateType: AD_CATEGORY.FLAT.id
});
nextStepUrl = `/vrstanekretnine/${newSearchRequest.id}`;
} catch (error) {
console.log(error);
nextStepUrl = `/`;
}
res.redirect(nextStepUrl);
};
module.exports = {
getWelcome
getWelcome,
postWelcome
};

View File

@@ -29,5 +29,6 @@ module.exports = {
PROSTOR_CRAWLER_AD_CATEGORIES: transformedProstorCrawlerAdCategories,
PROSTOR_IGNORED_USERNAMES: prostorIgnoredUsernames || [],
PROSTOR_DELAY_BETWEEN_PAGES:
parseInt(process.env.PROSTOR_DELAY_BETWEEN_PAGES) || 1000
parseInt(process.env.PROSTOR_DELAY_BETWEEN_PAGES) || 1000,
PROSTOR_FORCE_CRAWL: !!parseInt(process.env.PROSTOR_FORCE_CRAWL)
};

View File

@@ -11,7 +11,10 @@ const {
AD_CATEGORY,
AD_AGENCY,
AD_STATUS,
CRAWLER_AD_TYPE
CRAWLER_AD_TYPE,
HEATING_TYPE,
ACCESS_ROAD_TYPE,
FURNISHING_TYPE
} = require("../../common/enums");
const {
@@ -219,6 +222,7 @@ class AktidoCrawler {
throw { message: "Can't find ad data JSON" };
}
let adStatus = AD_STATUS.STATUS_NORMAL;
const aktidoId = extractedData["re_realEstates_id"];
const adCategory = this.getKiviCategoryIdFromAktidoId(
parseInt(extractedData["re_types_id"])
@@ -237,6 +241,181 @@ class AktidoCrawler {
};
}
const descriptionIds = extractedData["re_descriptions_id"]
.split(",")
.map(stringNumber => parseInt(stringNumber));
if (!Array.isArray(descriptionIds)) {
throw {
message:
'Expected array od descriptions but "re_descriptions_id" not found !'
};
}
const spaceIds = extractedData["re_spaces_id"]
.split(",")
.map(stringNumber => parseInt(stringNumber));
if (!Array.isArray(spaceIds)) {
throw {
message: 'Expected array od spaces but "re_spaces_id" not found !'
};
}
const infrastructureIds = extractedData["re_infrastructure_id"]
.split(",")
.map(stringNumber => parseInt(stringNumber));
if (!Array.isArray(infrastructureIds)) {
throw {
message:
'Expected array od infrastructures but "re_infrastructure_id" not found !'
};
}
const floorNoIds = extractedData["re_floorNO_id"]
.split(",")
.map(stringNumber => parseInt(stringNumber));
if (!Array.isArray(floorNoIds)) {
throw {
message:
'Expected array od infrastructures but "re_floorNO_id" not found !'
};
}
// counting floor enums
// for (let i = 1; i < 10; i++) {
// const floorEnumsTitle = $(
// `body > div.container-fluid > div.container > div:nth-child(2) > div.col-xs-12.col-sm-12.col-md-12.col-lg-9.content-main > div:nth-child(1) > div > div > div.col-xs-12.col-sm-4.box-details > div.body > p:nth-child(${i}) > span:nth-child(1)`
// )
// .text()
// .trim();
// if (floorEnumsTitle === "Spratnost:") {
// const floorEnumsValue = $(
// `body > div.container-fluid > div.container > div:nth-child(2) > div.col-xs-12.col-sm-12.col-md-12.col-lg-9.content-main > div:nth-child(1) > div > div > div.col-xs-12.col-sm-4.box-details > div.body > p:nth-child(${i}) > span:nth-child(2)`
// )
// .text()
// .trim()
// .split(",");
//
// console.log("==========");
// floorNoIds.forEach((id, index) => {
// console.log("\t", id, " = ", floorEnumsValue[index]);
// });
// break;
// }
// }
// enumerating infrastructure - relation between id and infrastructure title
// let found = false;
// let infrastructureDescriptions = {};
// for (let i = 1; i < 5; i++) {
// found = false;
// for (let j = 1; j < 10; j++) {
// const infrastructureTitle = $(
// `#b2 > div > div:nth-child(${i}) > div > ul > li:nth-child(${j}) > strong`
// )
// .text()
// .trim();
// if (infrastructureTitle === "Osnovna infrastruktura:") {
// found = true;
//
// const infrastructureValues = $(
// `#b2 > div > div:nth-child(${i}) > div > ul > li:nth-child(${j}) > div`
// )
// .text()
// .trim()
// .split(",");
//
// infrastructureIds.forEach((id, index) => {
// infrastructureDescriptions[id] = infrastructureValues[index];
// });
// }
// }
// if (found) {
// break;
// }
// }
const realEstatePropertiesFromDescriptions = this.getPropertiesFromDescriptions(
descriptionIds
);
const realEstatePropertiesFromSpaces = this.getPropertiesFromSpaces(
spaceIds
);
const realEstatePropertiesFromInfrastructure = this.getPropertiesFromInfrastructure(
infrastructureIds
);
if (extractedData["adm_realEstates_discount"] === "1") {
adStatus = AD_STATUS.STATUS_DISCOUNTED;
}
let numberOfRooms =
parseInt(extractedData["re_realEstates_roomsNO"]) +
parseInt(extractedData["re_realEstates_bedroomNO"]) || null,
numberOfFloors =
parseInt(extractedData["re_realEstates_floorsNO"]) ||
this.getNumberOfFloorsFromFloorId(extractedData["re_floorNO_id"]),
floor =
parseInt(extractedData["re_realEstates_floorNO"]) ||
this.getFloorNumberFromFloorId(extractedData["re_floorNO_id"]),
accessRoadType = realEstatePropertiesFromDescriptions.accessRoadType,
heatingType =
this.getHeatingTypeId(extractedData["re_heating_id"]) || null,
furnishingType = realEstatePropertiesFromDescriptions.furnishingType,
balcony =
realEstatePropertiesFromDescriptions.balcony ||
realEstatePropertiesFromSpaces.balcony,
newBuilding = extractedData["op_realEstates_newBuilding"]
? extractedData["op_realEstates_newBuilding"] === "1"
: null,
elevator = realEstatePropertiesFromDescriptions.elevator,
water =
realEstatePropertiesFromDescriptions.water ||
realEstatePropertiesFromInfrastructure.water,
electricity =
realEstatePropertiesFromDescriptions.electricity ||
realEstatePropertiesFromInfrastructure.electricity,
drainageSystem =
realEstatePropertiesFromInfrastructure.drainageSystem,
registeredInZkBooks =
extractedData["op_realEstates_ownerPermit"] === 1 || null,
recentlyAdapted = null,
parking =
realEstatePropertiesFromDescriptions.parking ||
realEstatePropertiesFromSpaces.parking,
garage = realEstatePropertiesFromSpaces.garage,
gas = realEstatePropertiesFromInfrastructure.gas,
antiTheftDoor = realEstatePropertiesFromDescriptions.antiTheftDoor,
airCondition = realEstatePropertiesFromDescriptions.airCondition,
phoneConnection =
realEstatePropertiesFromInfrastructure.phoneConnection,
cableTV = realEstatePropertiesFromInfrastructure.cableTV,
internet = realEstatePropertiesFromInfrastructure.internet,
basementAttic = realEstatePropertiesFromSpaces.basementAttic,
storeRoom = realEstatePropertiesFromSpaces.storeRoom,
videoSurveillance =
realEstatePropertiesFromDescriptions.videoSurveillance ||
realEstatePropertiesFromInfrastructure.videoSurveillance,
alarm = realEstatePropertiesFromDescriptions.alarm,
suitableForStudents = null,
includingBills =
extractedData["op_realEstates_utilitiesIncluded"] === "1" || null,
animalsAllowed = null,
pool = realEstatePropertiesFromDescriptions.pool,
urbanPlanPermit =
extractedData["op_realEstates_locationPermit"] === "1" ||
realEstatePropertiesFromDescriptions.urbanPlanPermit,
buildingPermit =
extractedData["op_realEstates_buildingPermit"] === "1" || null,
utilityConnection =
realEstatePropertiesFromDescriptions.utilityConnection,
distanceToRiver = null,
numberOfViewsAgency = null;
const title = extractedData["re_realEstates_portalName"];
const extractedPrice = parseFloat(
extractedData["re_realEstates_price"]
@@ -277,8 +456,6 @@ class AktidoCrawler {
};
}
const adStatus = AD_STATUS.STATUS_NORMAL;
const data = {
url,
agencyObjectId: aktidoId,
@@ -303,7 +480,42 @@ class AktidoCrawler {
locationLong,
adStatus,
publishedDate: publishedDateMoment.toISOString(),
renewedDate: renewedDateMoment.toISOString()
renewedDate: renewedDateMoment.toISOString(),
numberOfRooms,
numberOfFloors,
floor,
accessRoadType,
heatingType,
furnishingType,
balcony,
newBuilding,
elevator,
water,
electricity,
drainageSystem,
registeredInZkBooks,
recentlyAdapted,
parking,
garage,
gas,
antiTheftDoor,
airCondition,
phoneConnection,
cableTV,
internet,
basementAttic,
storeRoom,
videoSurveillance,
alarm,
suitableForStudents,
includingBills,
animalsAllowed,
pool,
urbanPlanPermit,
buildingPermit,
utilityConnection,
distanceToRiver,
numberOfViewsAgency
};
return data;
@@ -350,6 +562,270 @@ class AktidoCrawler {
}
}
getPropertiesFromDescriptions(descriptionIds) {
const result = {
accessRoadType: null,
furnishingType: null,
balcony: null,
elevator: null,
parking: null,
antiTheftDoor: null,
airCondition: null,
videoSurveillance: null,
alarm: null,
pool: null,
urbanPlanPermit: null,
utilityConnection: null,
water: null,
electricity: null
};
for (const descriptionId of descriptionIds) {
switch (descriptionId) {
case 16:
result.furnishingType = FURNISHING_TYPE.NOT_FURNISHED.id;
break;
case 17:
result.furnishingType = FURNISHING_TYPE.HALF_FURNISHED.id;
break;
case 1:
case 28:
result.furnishingType = FURNISHING_TYPE.FURNISHED.id;
break;
case 14:
result.elevator = true;
break;
case 39:
result.electricity = true;
break;
case 40:
result.water = true;
break;
case 41:
case 58:
result.accessRoadType = ACCESS_ROAD_TYPE.ASPHALT.id;
break;
case 26:
result.balcony = true;
break;
case 62:
result.parking = true;
break;
case 3:
result.antiTheftDoor = true;
break;
case 2:
case 21:
result.airCondition = true;
break;
case 4:
result.alarm = true;
break;
case 55:
result.videoSurveillance = true;
break;
case 9:
result.pool = true;
break;
case 60:
result.urbanPlanPermit = true;
break;
case 38:
result.utilityConnection = true;
break;
}
}
return result;
}
getPropertiesFromSpaces(spaceIds) {
const result = {
balcony: null,
parking: null,
garage: null,
basementAttic: null,
storeRoom: null
};
for (const spaceId of spaceIds) {
switch (spaceId) {
case 36:
case 12:
result.parking = true;
break;
case 1:
case 2:
case 3:
result.balcony = true;
break;
case 4:
case 30:
result.garage = true;
break;
case 9:
case 10:
result.storeRoom = true;
break;
case 18:
case 34:
case 37:
case 27:
result.basementAttic = true;
break;
}
}
return result;
}
getHeatingTypeId(heatingRentalId) {
// heatingRentalId can have multiple values, like: "1, 2, 3", parseInt will take first integer value
const heatingId = parseInt(heatingRentalId);
switch (heatingId) {
case 27:
case 16:
return HEATING_TYPE.GAS.id;
case 4:
return HEATING_TYPE.CENTRAL_GAS.id;
case 3:
case 23:
case 6:
case 7:
case 8:
case 9:
case 10:
return HEATING_TYPE.CENTRAL_BOILER.id;
case 2:
case 13:
case 30:
case 17:
case 29:
case 31:
return HEATING_TYPE.ELECTRICITY.id;
case 24:
case 25:
case 12:
return HEATING_TYPE.CENTRAL_CITY.id;
case 26:
case 21:
case 20:
return HEATING_TYPE.WOOD.id;
case 28:
case 19:
return HEATING_TYPE.HEAT_PUMP.id;
case 14:
case 32:
return HEATING_TYPE.OTHER.id;
default:
return null;
}
}
getPropertiesFromInfrastructure(infrastructureIds) {
const result = {
electricity: null,
water: null,
gas: null,
drainageSystem: null,
phoneConnection: null,
internet: null,
videoSurveillance: null,
cableTV: null
};
for (const infrastructureId of infrastructureIds) {
switch (infrastructureId) {
case 1:
result.electricity = true;
break;
case 2:
result.water = true;
break;
case 4:
result.gas = true;
break;
case 5:
result.drainageSystem = true;
break;
case 7:
case 8:
result.phoneConnection = true;
break;
case 10:
result.internet = true;
break;
case 11:
result.cableTV = true;
break;
case 16:
case 17:
result.videoSurveillance = true;
break;
}
}
return result;
}
getFloorNumberFromFloorId(floorsIdText) {
// floorIdText can be array of numbers, separated by comma or number
// just extracting floor number from first element
const floorsId = floorsIdText.split(",");
if (floorsId.length === 0) {
return null;
}
const firstFloorId = parseInt(floorsId[0]);
// 1 pod
// 2 sut
// 3 raz
// 4 pri
// 5 vpri
// 6 prv
// 7 dru
// 8 tre
// 9 čet
// 10 man
// 11
// 12 pot
// 13 vpot
// 14 tav
// 15 pet
const floorNumber = [
-1,
-1,
0,
0,
1,
1,
2,
3,
4,
null,
null,
null,
null,
null,
5
];
return floorNumber[firstFloorId - 1] || null;
}
getNumberOfFloorsFromFloorId(floorsIdText) {
// floorIdText can be array of numbers, separated by comma or number
const floorIds = floorsIdText.split(",");
if (floorIds.length === 0) {
return null;
}
return floorIds.length;
}
async sleep(ms) {
return new Promise(resolve => setTimeout(resolve, ms));
}
@@ -362,7 +838,7 @@ class AktidoCrawler {
// }
//For now, we use only Postgres saver, so ...
return await savers[0].save(results);
return savers[0].save(results);
//so that we can use some sequelize options and information when data is inserted
}
}

View File

@@ -10,7 +10,10 @@ const {
AD_CATEGORY,
AD_AGENCY,
AD_STATUS,
CRAWLER_AD_TYPE
CRAWLER_AD_TYPE,
HEATING_TYPE,
FURNISHING_TYPE,
ACCESS_ROAD_TYPE
} = require("../../common/enums");
const {
@@ -271,6 +274,7 @@ class OlxCrawler {
//====== OTHER AD INFORMATION ===============
let adType = null;
let olxId = null;
let numberOfViewsAgency = null;
let otherInformationDivId;
//We need to locate DIV ID where other information are stored
@@ -293,6 +297,7 @@ class OlxCrawler {
const olxIdFieldSelector = `#artikal_glavni_div > div.artikal_lijevo > div:nth-child(${otherInformationDivId}) > div:nth-child(4)`;
const publishedDateValueSelector = `#artikal_glavni_div > div.artikal_lijevo > div:nth-child(${otherInformationDivId}) > div:nth-child(3) > div.df2.neanimiraj > time`;
const numberOfViewsAgencyValueSelector = `#artikal_glavni_div > div.artikal_lijevo > div:nth-child(${otherInformationDivId}) > div:nth-child(6) > div.df2`;
const renewedDateFullValueSelector = `#artikal_glavni_div > div.artikal_lijevo > div.op.ob.pop`;
const publishedDate = $(publishedDateValueSelector)
@@ -331,60 +336,7 @@ class OlxCrawler {
)
.text()
.trim();
const olxIdFieldTitle = $(`${olxIdFieldSelector} > div.df1`)
.text()
.trim();
olxId = $(`${olxIdFieldSelector} > div.df2`)
.text()
.trim();
if (olxIdFieldTitle !== "OLX ID") {
throw { message: "Cannot find correct OLX ID" };
}
//===========================================
//====== DETAIL INFORMATION FIELDS ==========
let area = null;
let gardenSize = null;
let fieldIndex = 1;
do {
const fieldSelector = `#dodatnapolja1 > div:nth-child(${fieldIndex})`;
const fieldTitleSelector = `${fieldSelector} > div.df1`;
const fieldValueSelector = `${fieldSelector} > div.df2`;
const fieldTitle = $(fieldTitleSelector)
.text()
.trim();
const fieldValue = $(fieldValueSelector)
.text()
.trim();
switch (fieldTitle) {
case "Kvadrata":
area = fieldValue;
break;
case "Okućnica (kvadratura)":
gardenSize = fieldValue;
break;
}
if (++fieldIndex === OLX_ENUMS.MAX_DETAIL_FIELDS || fieldTitle === "") {
break;
}
} while (true);
//===========================================
//====== UNUSED FIELDS FOR NOW ==============
const time = $("time").attr("datetime");
const numberOfViews = $(
"#artikal_glavni_div > div.artikal_lijevo > div:nth-child(18) > div:nth-child(6) > div.df2"
)
.text()
.trim();
//===========================================
//=========================================
const parsedCategory = this.getAdCategoryId(category);
if (!parsedCategory) {
throw { message: `Unknown ad category [${category}]` };
@@ -395,6 +347,221 @@ class OlxCrawler {
throw { message: "Unknown ad type" };
}
const olxIdFieldTitle = $(`${olxIdFieldSelector} > div.df1`)
.text()
.trim();
olxId = $(`${olxIdFieldSelector} > div.df2`)
.text()
.trim();
numberOfViewsAgency = parseInt(
$(numberOfViewsAgencyValueSelector)
.text()
.trim()
);
if (olxIdFieldTitle !== "OLX ID") {
throw { message: "Cannot find correct OLX ID" };
}
//===========================================
//====== DETAIL INFORMATION FIELDS ==========
let area,
gardenSize,
numberOfRooms = null,
numberOfFloors = null,
floor = null,
accessRoadType = null,
heatingType = null,
furnishingType = null,
balcony = null,
newBuilding = null,
elevator = null,
water = null,
electricity = null,
drainageSystem = null,
registeredInZkBooks = null,
recentlyAdapted = null,
parking = null,
garage = null,
gas = null,
antiTheftDoor = null,
airCondition = null,
phoneConnection = null,
cableTV = null,
internet = null,
basementAttic = null,
storeRoom = null,
videoSurveillance = null,
alarm = null,
suitableForStudents = null,
includingBills = null,
animalsAllowed = null,
pool = null,
urbanPlanPermit = null,
buildingPermit = null,
utilityConnection = null,
distanceToRiver = null;
let fieldIndex = 1;
do {
const fieldSelector = `#dodatnapolja1 > div:nth-child(${fieldIndex})`;
const fieldTitleSelector = `${fieldSelector} > div.df1`;
const fieldValueSelector = `${fieldSelector} > div.df2`;
const fieldTitle = $(fieldTitleSelector)
.text()
.trim()
.toLowerCase();
const fieldValue = $(fieldValueSelector)
.text()
.trim()
.toLowerCase();
switch (fieldTitle) {
case "kvadrata":
area = fieldValue;
break;
case "okućnica (kvadratura)":
gardenSize = fieldValue;
break;
case "broj soba":
numberOfRooms = this.parseNumberOfRooms(fieldValue, parsedCategory);
break;
case "broj prostorija":
numberOfRooms = this.parseNumberOfRooms(fieldValue, parsedCategory);
break;
case "broj spratova":
numberOfFloors = this.parseNumberOfFloors(
fieldValue,
parsedCategory
);
break;
case "sprat":
floor = this.parseFloorNumber(fieldValue, parsedCategory);
break;
case "vrsta grijanja":
heatingType = this.getHeatingTypeId(fieldValue);
break;
case "namješten?":
furnishingType = this.getFurnishingTypeId(fieldValue);
break;
case "namješten":
furnishingType = FURNISHING_TYPE.FURNISHED.id;
break;
case "namještena":
furnishingType = FURNISHING_TYPE.FURNISHED.id;
break;
case "voda":
water = true;
break;
case "struja":
electricity = true;
break;
case "kanalizacija":
drainageSystem = fieldValue !== "nema";
break;
case "godina izgradnje":
newBuilding = newBuilding || fieldValue === "novogradnja";
break;
case "kućni ljubimci":
animalsAllowed = fieldValue === "da";
break;
case "uknjiženo / zk":
registeredInZkBooks = true;
break;
case "uknjiženo (zk)":
registeredInZkBooks = true;
break;
case "novogradnja":
newBuilding = true;
break;
case "nedavno adaptiran":
recentlyAdapted = true;
break;
case "nedavno adaptirana":
recentlyAdapted = true;
break;
case "balkon":
balcony = true;
break;
case "lift":
elevator = true;
break;
case "parking":
parking = true;
break;
case "garaža":
garage = true;
break;
case "plin":
gas = true;
break;
case "blindirana vrata":
antiTheftDoor = true;
break;
case "klima":
airCondition = true;
break;
case "telefonski priključak":
phoneConnection = true;
break;
case "kablovska tv":
cableTV = true;
break;
case "internet":
internet = true;
break;
case "podrum/tavan":
basementAttic = true;
break;
case "ostava/špajz":
storeRoom = true;
break;
case "video nadzor":
videoSurveillance = true;
break;
case "alarm":
alarm = true;
break;
case "za studente":
suitableForStudents = true;
break;
case "uključen trošak režija":
includingBills = true;
break;
case "građevinska dozvola":
buildingPermit = true;
break;
case "komunalni priključak":
utilityConnection = true;
break;
case "urbanistička dozvola":
urbanPlanPermit = true;
break;
case "udaljenost od rijeke (m)":
distanceToRiver = parseInt(fieldValue) || null;
break;
case "prilaz":
accessRoadType = this.getAccessRoadTypeId(fieldValue);
break;
case "bazen":
pool = true;
break;
case "iznajmljeno":
status = AD_STATUS.STATUS_RENTED;
break;
default:
// console.log(fieldTitle, " = ", fieldValue);
break;
}
if (++fieldIndex === OLX_ENUMS.MAX_DETAIL_FIELDS || fieldTitle === "") {
break;
}
} while (true);
//===========================================
//=========================================
const parsedArea = this.parseArea(area) || null;
const parsedGardenSize = this.parseArea(gardenSize) || null;
const parsedPrice = this.parsePrice(price) || null;
@@ -409,6 +576,13 @@ class OlxCrawler {
locationLong = parseFloat(locationLatLngMatches[2]) || null;
}
if (
title.indexOf("[PRODANO]") !== -1 ||
title.indexOf("[ZAVRŠENO]") !== -1
) {
status = AD_STATUS.STATUS_SOLD;
}
const data = {
url,
agencyObjectId: olxId,
@@ -439,7 +613,42 @@ class OlxCrawler {
locationLong,
adStatus: status,
publishedDate: publishedDateMoment.toISOString(),
renewedDate: renewedDateMoment.toISOString()
renewedDate: renewedDateMoment.toISOString(),
numberOfRooms,
numberOfFloors,
floor,
accessRoadType,
heatingType,
furnishingType,
balcony,
newBuilding,
elevator,
water,
electricity,
drainageSystem,
registeredInZkBooks,
recentlyAdapted,
parking,
garage,
gas,
antiTheftDoor,
airCondition,
phoneConnection,
cableTV,
internet,
basementAttic,
storeRoom,
videoSurveillance,
alarm,
suitableForStudents,
includingBills,
animalsAllowed,
pool,
urbanPlanPermit,
buildingPermit,
utilityConnection,
distanceToRiver,
numberOfViewsAgency
};
return data;
@@ -479,12 +688,70 @@ class OlxCrawler {
case "Izdavanje":
return AD_TYPE.AD_TYPE_RENT.stringId;
case "Potražnja":
return AD_TYPE.AD_TYPE_RENT.stringId;
return AD_TYPE.AD_TYPE_REQUEST.stringId;
default:
return undefined;
}
}
getHeatingTypeId(heatingTypeText) {
switch (heatingTypeText) {
case "struja":
return HEATING_TYPE.ELECTRICITY.id;
case "plin":
return HEATING_TYPE.GAS.id;
case "drva":
return HEATING_TYPE.WOOD.id;
case "centralno (gradsko)":
return HEATING_TYPE.CENTRAL_CITY.id;
case "centralno (kotlovnica)":
return HEATING_TYPE.CENTRAL_BOILER.id;
case "centralno (plin)":
return HEATING_TYPE.CENTRAL_GAS.id;
case "nije uvedeno":
return HEATING_TYPE.NO_HEATING.id;
case "ostalo":
return HEATING_TYPE.OTHER.id;
case "drugo":
return HEATING_TYPE.OTHER.id;
default:
console.log("grijanje = NEPOZNATO [", heatingTypeText, "]");
return null;
}
}
getFurnishingTypeId(furnishingTypeText) {
switch (furnishingTypeText) {
case "namješten":
return FURNISHING_TYPE.FURNISHED.id;
case "polunamješten":
return FURNISHING_TYPE.HALF_FURNISHED.id;
case "nenamješten":
return FURNISHING_TYPE.NOT_FURNISHED.id;
case "":
return FURNISHING_TYPE.FURNISHED.id;
default:
console.log("namješten = NEPOZNATO [", furnishingTypeText, "]");
return null;
}
}
getAccessRoadTypeId(accessRoadTypeText) {
switch (accessRoadTypeText) {
case "asfalt":
return ACCESS_ROAD_TYPE.ASPHALT.id;
case "beton":
return ACCESS_ROAD_TYPE.CONCRETE.id;
case "makadam":
return ACCESS_ROAD_TYPE.MACADAM.id;
case "ostalo":
return ACCESS_ROAD_TYPE.OTHER.id;
default:
console.log("pristup = NEPOZNATO [", accessRoadTypeText, "]");
return null;
}
}
parseArea(areaText) {
if (!areaText) {
return NaN;
@@ -505,56 +772,100 @@ class OlxCrawler {
return parseFloat(formattedPriceText);
}
parseRenewedDate(renewedDateText) {
const currentMoment = moment.tz(DEFAULT_TIMEZONE);
if (renewedDateText.includes("Prije mjesec dana")) {
return currentMoment.add(-1, "month");
}
if (renewedDateText.includes("Jučer")) {
return currentMoment.add(-1, "day");
}
if (renewedDateText.includes("Prije sat")) {
return currentMoment.add(-1, "hour");
}
if (renewedDateText.includes("dan")) {
// format for this case should be "Prije N dana" or "Prije N dan"
const dateParts = renewedDateText.split(" ");
if (dateParts[0] === "Prije") {
const numberOfDays = parseInt(dateParts[1]);
return currentMoment.add(-1 * numberOfDays, "days");
} else {
return undefined;
parseNumberOfRooms(numberOfRoomsText, categoryId) {
if (categoryId === AD_CATEGORY.FLAT.id) {
switch (numberOfRoomsText) {
case "garsonjera":
return 0;
case "jednosoban (1)":
return 1;
case "jednoiposoban (1.5)":
return 1.5;
case "dvosoban (2)":
return 2;
case "trosoban (3)":
return 3;
case "četverosoban (4)":
return 4;
case "petosoban i više":
return 5;
default:
console.log(
"broj soba [stan] = NEPOZNATO [",
numberOfRoomsText,
", ",
categoryId,
"]"
);
return null;
}
}
if (renewedDateText.includes("sat")) {
const dateParts = renewedDateText.split(" ");
const parsedHours =
dateParts && dateParts.length > 2 ? parseInt(dateParts[1]) : undefined;
if (!parsedHours) {
return undefined;
}
return currentMoment.add(-1 * parsedHours, "hours");
if (
categoryId === AD_CATEGORY.HOUSE.id ||
categoryId === AD_CATEGORY.COTTAGE.id ||
categoryId === AD_CATEGORY.APARTMENT.id ||
categoryId === AD_CATEGORY.OFFICE.id
) {
return parseInt(numberOfRoomsText) || null;
}
const todayVariations = ["min", "sekund", "maloprije"];
for (const todayVariation of todayVariations) {
if (renewedDateText.includes(todayVariation)) {
return currentMoment;
}
console.log("broj soba = NEPOZNATO [", numberOfRoomsText, "]");
return null;
}
parseNumberOfFloors(numberOfFloorsText, categoryId) {
if (
categoryId === AD_CATEGORY.HOUSE.id ||
categoryId === AD_CATEGORY.COTTAGE.id
) {
return parseInt(numberOfFloorsText) || null;
}
const renewedDateMoment = moment.tz(
renewedDateText,
OLX_ENUMS.OLX_RENEWED_DATE_FORMAT,
DEFAULT_TIMEZONE
);
if (categoryId === AD_CATEGORY.OFFICE.id) {
if (
numberOfFloorsText === "suteren" ||
numberOfFloorsText === "prizemlje"
) {
return 0;
}
if (numberOfFloorsText === "6+") {
return 7;
}
return parseInt(numberOfFloorsText) || null;
}
return renewedDateMoment.isValid() ? renewedDateMoment : undefined;
console.log("broj spratova = NEPOZNATO [", numberOfFloorsText, "]");
return null;
}
parseFloorNumber(floorText, categoryId) {
if (
categoryId === AD_CATEGORY.FLAT.id ||
categoryId === AD_CATEGORY.APARTMENT.id
) {
if (
floorText === "suteren" ||
floorText === "prizemlje" ||
floorText === "visoko prizemlje"
) {
return 0;
}
return parseInt(floorText) || null;
}
if (categoryId === AD_CATEGORY.OFFICE.id) {
if (floorText === "zaseban objekat") {
return null;
}
if (floorText === "prizemlje" || floorText === "visoko prizemlje") {
return 0;
}
return parseInt(floorText) || null;
}
console.log("sprat = NEPOZNATO [", floorText, "]");
return null;
}
async sleep(ms) {
@@ -569,7 +880,7 @@ class OlxCrawler {
// }
//For now, we use only Postgres saver, so ...
return await savers[0].save(results);
return savers[0].save(results);
//so that we can use some sequelize options and information when data is inserted
}
}

View File

@@ -2,16 +2,23 @@
const fetch = require("node-fetch");
const cheerio = require("cheerio");
const moment = require("moment-timezone");
const {
AD_TYPE,
AD_CATEGORY,
AD_AGENCY,
AD_STATUS,
CRAWLER_AD_TYPE
CRAWLER_AD_TYPE,
FURNISHING_TYPE,
HEATING_TYPE
} = require("../../common/enums");
const { PRINT_CRAWLER_DEBUG } = require("../../config/appConfig");
const {
PRINT_CRAWLER_DEBUG,
DEFAULT_TIMEZONE
} = require("../../config/appConfig");
const { PROSTOR_FORCE_CRAWL } = require("../specificConfigs/prostor");
const PROSTOR_ENUMS = {
PROSTOR_AD_TYPE: {
@@ -48,37 +55,359 @@ class ProstorCrawler {
this.crawlerAdTypes = crawlerAdTypes;
this.crawlerAdCategories = crawlerAdCategories;
this.maxResultsPerPage = maxResultsPerPage;
this.delayBetweenPages = delayBetweenPages;
}
async crawl() {
const crawlAdCategories = this.crawlerAdCategories;
const newRealEstates = [];
if (crawlAdCategories) {
const indexGenerators = [];
for (const adCategory of crawlAdCategories) {
const urlAdTypePart =
PROSTOR_ENUMS.PROSTOR_AD_TYPE[this.crawlerAdTypes];
const urlCategoryPart = PROSTOR_ENUMS.PROSTOR_AD_CATEGORY[adCategory];
if (urlAdTypePart !== undefined && urlCategoryPart !== undefined) {
const urlPageToCrawl = `${this.baseUrl}?remove_sold=1${urlAdTypePart}${urlCategoryPart}`;
const singleCategoryResults = await this.extractRealEstates(
urlPageToCrawl
);
indexGenerators.push(this.categoryIndexer(adCategory));
}
const resultsSubset = singleCategoryResults.slice(
0,
this.maxResultsPerPage
);
const saveResults = await this.saveCrawledResults(resultsSubset);
const { newRecords } = saveResults;
newRealEstates.push(...newRecords);
let done = false;
while (!done) {
const categoryIndexerPromises = [];
const generatorsToRemove = [];
for (const indexGenerator of indexGenerators) {
categoryIndexerPromises.push(indexGenerator.next());
generatorsToRemove.push(false);
}
const singlePageResults = await Promise.all(categoryIndexerPromises);
const entries = singlePageResults.entries();
for (const [index, { value: singlePageResult }] of entries) {
if (singlePageResult) {
const saveResults = await this.saveCrawledResults(singlePageResult);
const { newRecords } = saveResults;
newRealEstates.push(...newRecords);
if (
Array.isArray(newRecords) &&
newRecords.length === 0 &&
!PROSTOR_FORCE_CRAWL
) {
generatorsToRemove[index] = true;
}
} else {
//Generator returned undefined, remove this generator from array
generatorsToRemove[index] = true;
// console.log("Generator ", index + 1, "has no more pages");
}
}
// console.log("Generators state : ", generatorsToRemove);
for (let i = generatorsToRemove.length - 1; i >= 0; i--) {
if (generatorsToRemove[i]) {
// console.log("\tRemove generator ", i + 1);
indexGenerators.splice(i, 1);
}
}
if (indexGenerators.length === 0) {
done = true;
}
await this.sleep(this.delayBetweenPages);
}
}
return newRealEstates;
}
async *categoryIndexer(adCategory) {
const urlAdTypePart = PROSTOR_ENUMS.PROSTOR_AD_TYPE[this.crawlerAdTypes];
const urlCategoryPart = PROSTOR_ENUMS.PROSTOR_AD_CATEGORY[adCategory];
if (urlAdTypePart !== undefined && urlCategoryPart !== undefined) {
const urlPageToCrawl = `${this.baseUrl}?remove_sold=0${urlAdTypePart}${urlCategoryPart}`;
const listOfAllRealEstates = await this.extractRealEstates(
urlPageToCrawl
);
let elementToStartIndexFrom = 0;
while (true) {
const realEstatesForSinglePage = listOfAllRealEstates.slice(
elementToStartIndexFrom,
elementToStartIndexFrom + this.maxResultsPerPage
);
if (realEstatesForSinglePage.length > 0) {
elementToStartIndexFrom += realEstatesForSinglePage.length;
const singlePageResults = await this.indexSinglePage(
realEstatesForSinglePage
);
const filteredSinglePageResults = singlePageResults.filter(
singleResult => !!singleResult
);
if (
Array.isArray(filteredSinglePageResults) &&
filteredSinglePageResults.length > 0
) {
yield filteredSinglePageResults;
} else {
return undefined;
}
} else {
return undefined;
}
}
} else {
return undefined;
}
}
async indexSinglePage(realEstatesList) {
const asyncActions = [];
for (const realEstate of realEstatesList) {
asyncActions.push(this.scrapeAd(realEstate));
}
try {
return await Promise.all(asyncActions);
} catch (e) {
console.log(
"[PROSTOR] Error crawling ads : ",
e.message || "UNKNOWN ERROR"
);
return [];
}
}
async scrapeAd(realEstate) {
const { lat, lng, property_name, price, size, link, status } = realEstate;
const url = `https://prostor.ba${link}`;
// console.log("[PROSTOR] Scraping : ", url);
try {
const adPageSource = await fetch(url);
const body = await adPageSource.text();
const $ = cheerio.load(body);
// link contains part of the URL in the format of : /prodaja/stan/stup/9556
// general form is : /actionType/realEstateType/location/realEstateID
// linkParts contains : ['', 'actionType', 'realEstateType', 'location', 'realEstateID']
const linkParts = link.split("/");
const adType = ProstorCrawler.getAdTypeId(linkParts[1]);
const realEstateType = ProstorCrawler.getAdCategoryId(linkParts[2]);
const prostorId = linkParts[4];
if (!adType || !realEstateType || !prostorId) {
return null;
}
const allDataSelector =
"body > div > div.container-fluid > div > div.column-right > table > tbody";
const realEstateProperties = {};
$(allDataSelector)
.find("p")
.each((i, element) => {
const propertyElement = $(element)
.text()
.split(":")
.map(text => text.trim().toLowerCase());
const propertyTitle = propertyElement[0];
realEstateProperties[propertyTitle] = propertyElement[1];
});
$(allDataSelector)
.find("div.mb-2")
.each((i, element) => {
const propertyElement = $(element)
.text()
.trim()
.toLowerCase();
realEstateProperties[propertyElement] = true;
});
if (JSON.stringify(realEstateProperties) === JSON.stringify({})) {
return null;
}
let numberOfRooms =
parseFloat(realEstateProperties["broj soba"]) +
parseFloat(realEstateProperties["broj spavaćih soba"]) || null,
numberOfFloors = null,
floor = null,
accessRoadType = null,
heatingType = ProstorCrawler.getHeatingTypeId(realEstateProperties),
furnishingType = null,
balcony =
realEstateProperties["balkon"] ||
realEstateProperties["terasa"] ||
realEstateProperties["lođa"] ||
null,
newBuilding = linkParts[1] === "novogradnja",
elevator = realEstateProperties["lift"] || null,
water = realEstateProperties["voda"] || null,
electricity = realEstateProperties["električna energija"] || null,
drainageSystem = realEstateProperties["kanalizacija"] || null,
registeredInZkBooks = null,
recentlyAdapted = null,
parking = realEstateProperties["parking"] || null,
garage = realEstateProperties["garaža"] || null,
gas = realEstateProperties["plin"] || null,
antiTheftDoor = realEstateProperties["blindo vrata"] || null,
airCondition = realEstateProperties["klima"] || null,
phoneConnection = realEstateProperties["telefon"] || null,
cableTV = realEstateProperties["kablovksa tv"] || null,
internet =
realEstateProperties["internet"] ||
realEstateProperties["adsl"] ||
null,
basementAttic = realEstateProperties["podrum"] || null,
storeRoom = realEstateProperties["ostava"] || null,
videoSurveillance = realEstateProperties["video nadzor"],
alarm = realEstateProperties["alarm"] || null,
suitableForStudents = null,
includingBills = null,
animalsAllowed = null,
pool = realEstateProperties["bazen"] || null,
urbanPlanPermit = null,
buildingPermit = null,
utilityConnection = null,
distanceToRiver = null,
numberOfViewsAgency = null;
// Floor versions (there are possibly more versions) :
// Sprat: 3/3
// Sprat: 1 - 2/2
// Sprat: Pr - 7/7
// Sprat: -2/0
// If there are two parts, that represents more real estates are sold
// numberOfFloors is contained in second part, after / sign
const floorsArray = realEstateProperties["sprat"].split(" - ");
let floorText = "";
if (floorsArray.length === 1) {
const floorDescription = floorsArray[0].split("/");
numberOfFloors = parseInt(floorDescription[1]) || null;
floorText = floorDescription[0];
floor = Math.round(parseFloat(floorText));
} else if (floorsArray.length === 2) {
const floorDescription = floorsArray[1].split("/");
numberOfFloors = parseInt(floorDescription[1]) || null;
floorText = floorsArray[0];
floor = Math.round(parseFloat(floorText));
} else {
// This is something strange
}
if (isNaN(floor)) {
// It was textual representation of floor, like "Pr", "Su" or similar
switch (floorText) {
case "pr":
floor = 0;
break;
case "su":
floor = -1;
break;
default:
console.log(
"[PROSTOR] Unknown textual representation of floor : ",
floorText
);
floor = null;
}
}
if (realEstateProperties["namješteno"]) {
furnishingType = FURNISHING_TYPE.FURNISHED.id;
} else if (realEstateProperties["polunamješteno"]) {
furnishingType = FURNISHING_TYPE.HALF_FURNISHED.id;
} else {
furnishingType = FURNISHING_TYPE.NOT_FURNISHED.id;
}
const adStatus = ProstorCrawler.getStatusId(status);
const title = property_name;
const parsedPrice = parseFloat(price.replace(/\./g, "")) || null;
const parsedArea = parseFloat(size);
const gardenSize = null;
const longDescription = null;
const data = {
url,
agencyObjectId: prostorId,
originAgencyName: AD_AGENCY.PROSTOR,
realEstateType,
adType,
title,
price: parsedPrice,
area: parsedArea,
gardenSize,
shortDescription: "",
longDescription: longDescription,
streetNumber: 0,
streetName: realEstateProperties["adresa"],
locality: "",
municipality: "",
city: "",
region: "",
entity: "",
country: "",
locationLat: lat,
locationLong: lng,
adStatus,
numberOfRooms,
numberOfFloors,
floor,
accessRoadType,
heatingType,
furnishingType,
balcony,
newBuilding,
elevator,
water,
electricity,
drainageSystem,
registeredInZkBooks,
recentlyAdapted,
parking,
garage,
gas,
antiTheftDoor,
airCondition,
phoneConnection,
cableTV,
internet,
basementAttic,
storeRoom,
videoSurveillance,
alarm,
suitableForStudents,
includingBills,
animalsAllowed,
pool,
urbanPlanPermit,
buildingPermit,
utilityConnection,
distanceToRiver,
numberOfViewsAgency
};
return data;
} catch (e) {
console.error(
"[PROSTOR] Exception caught: " + e.message,
"\r\nURL:",
url
);
return null;
}
}
async extractRealEstates(url) {
if (PRINT_CRAWLER_DEBUG) {
console.log("[PROSTOR] Index page : ", url);
@@ -115,18 +444,19 @@ class ProstorCrawler {
const jsonData = scriptData.substring(23, jsonEndIndex) + "]";
const realEstates = JSON.parse(jsonData);
const transformedRealEstates = [];
for (const realEstate of realEstates) {
const transformedRealEstate = ProstorCrawler.transformRealEstateData(
realEstate
);
if (transformedRealEstate) {
transformedRealEstates.push(transformedRealEstate);
}
}
return transformedRealEstates;
// const transformedRealEstates = [];
//
// for (const realEstate of realEstates) {
// const transformedRealEstate = ProstorCrawler.transformRealEstateData(
// realEstate
// );
// if (transformedRealEstate) {
// transformedRealEstates.push(transformedRealEstate);
// }
// }
//
// return transformedRealEstates;
return realEstates;
} else {
throw {
message: "Something is wrong with JSON data or data is moved"
@@ -134,73 +464,15 @@ class ProstorCrawler {
}
} catch (e) {
console.log(e);
throw { message: "Can't find ad data JSON" };
throw e;
}
}
} catch (e) {
console.error("[PROSTOR] Exception caught:", e.message);
return [];
}
}
static transformRealEstateData(realEstateData) {
try {
const { lat, lng, property_name, price, size, link } = realEstateData;
// link contains part of the URL in the format of : /prodaja/stan/stup/9556
// general form is : /actionType/realEstateType/location/realEstateID
// linkParts contains : ['', 'actionType', 'realEstateType', 'location', 'realEstateID']
const linkParts = link.split("/");
const adType = ProstorCrawler.getAdTypeId(linkParts[1]);
const realEstateType = ProstorCrawler.getAdCategoryId(linkParts[2]);
const prostorId = linkParts[4];
const url = `https://prostor.ba${link}`;
if (!adType || !realEstateType || !prostorId) {
return null;
}
const adStatus = AD_STATUS.STATUS_NORMAL;
const parsedPrice = parseFloat(price.replace(/\./g, "")) || null;
const parsedArea = parseFloat(size);
const data = {
url,
agencyObjectId: prostorId,
originAgencyName: AD_AGENCY.PROSTOR,
realEstateType,
adType,
title: property_name,
price: parsedPrice,
area: parsedArea,
gardenSize: null,
shortDescription: "",
longDescription: "",
streetNumber: 0,
streetName: "",
locality: "",
municipality: "",
city: "",
region: "",
entity: "",
country: "",
locationLat: lat,
locationLong: lng,
adStatus,
publishedDate: null,
renewedDate: null
};
return data;
} catch (e) {
console.error(
"[PROSTOR] Exception caught: " + e.message,
"\r\nURL:",
url
"[PROSTOR] Exception caught:",
e.message || "UNKNOWN MESSAGE"
);
return null;
return [];
}
}
@@ -231,11 +503,61 @@ class ProstorCrawler {
return AD_TYPE.AD_TYPE_SALE.stringId;
case "najam":
return AD_TYPE.AD_TYPE_RENT.stringId;
case "novogradnja":
return AD_TYPE.AD_TYPE_SALE.stringId;
default:
return undefined;
}
}
static getHeatingTypeId(realEstateProperties) {
const realEstatePropertiesKeys = Object.keys(realEstateProperties);
for (const property of realEstatePropertiesKeys) {
switch (property) {
case "centralno toplane":
return HEATING_TYPE.CENTRAL_CITY.id;
case "etažno plinsko":
return HEATING_TYPE.CENTRAL_GAS.id;
case "termo blok":
case "podno grijanje":
return HEATING_TYPE.OTHER.id;
case "etažno električno":
case "konvektori":
return HEATING_TYPE.ELECTRICITY.id;
case "plinske peći":
return HEATING_TYPE.GAS.id;
case "vlastita kotlovnica":
return HEATING_TYPE.CENTRAL_BOILER.id;
case "toplotna pumpa":
return HEATING_TYPE.HEAT_PUMP.id;
case "kamin":
return HEATING_TYPE.WOOD.id;
default:
//console.log("[PROSTOR] Nepoznato >>> [", property, "]");
}
}
}
static getStatusId(statusText) {
switch (statusText) {
case "":
return AD_STATUS.STATUS_NORMAL;
case "Rezervisano":
return AD_STATUS.STATUS_RESERVED;
case "Prodano":
return AD_STATUS.STATUS_SOLD;
case "Iznajmljeno":
return AD_STATUS.STATUS_RENTED;
default:
console.log("[PROSTOR] Unknown AD_STATUS : [", statusText, "]");
return AD_STATUS.STATUS_NORMAL;
}
}
async sleep(ms) {
return new Promise(resolve => setTimeout(resolve, ms));
}
async saveCrawledResults(results) {
const savers = this.savers;
@@ -244,7 +566,7 @@ class ProstorCrawler {
// }
//For now, we use only Postgres saver, so ...
return await savers[0].save(results);
return savers[0].save(results);
//so that we can use some sequelize options and information when data is inserted
}
}

View File

@@ -11,7 +11,10 @@ const {
AD_CATEGORY,
AD_AGENCY,
AD_STATUS,
CRAWLER_AD_TYPE
CRAWLER_AD_TYPE,
HEATING_TYPE,
ACCESS_ROAD_TYPE,
FURNISHING_TYPE
} = require("../../common/enums");
const {
@@ -193,7 +196,7 @@ class RentalCrawler {
}
async scrapeAd(url) {
console.log("[RENTAL] Scraping : ", url);
// console.log("[RENTAL] Scraping : ", url);
try {
const adPageSource = await fetch(url);
const body = await adPageSource.text();
@@ -219,6 +222,7 @@ class RentalCrawler {
throw { message: "Can't find ad data JSON" };
}
let adStatus = AD_STATUS.STATUS_NORMAL;
const rentalId = extractedData["re_realEstates_id"];
const adCategory = this.getKiviCategoryIdFromRentalId(
parseInt(extractedData["re_types_id"])
@@ -237,6 +241,141 @@ class RentalCrawler {
};
}
const descriptionIds = extractedData["re_descriptions_id"]
.split(",")
.map(stringNumber => parseInt(stringNumber));
if (!Array.isArray(descriptionIds)) {
throw {
message:
'Expected array od descriptions but "re_descriptions_id" not found !'
};
}
const spaceIds = extractedData["re_spaces_id"]
.split(",")
.map(stringNumber => parseInt(stringNumber));
if (!Array.isArray(spaceIds)) {
throw {
message: 'Expected array od spaces but "re_spaces_id" not found !'
};
}
const infrastructureIds = extractedData["re_infrastructure_id"]
.split(",")
.map(stringNumber => parseInt(stringNumber));
if (!Array.isArray(infrastructureIds)) {
throw {
message:
'Expected array od infrastructures but "re_infrastructure_id" not found !'
};
}
const floorNoIds = extractedData["re_floorNO_id"]
.split(",")
.map(stringNumber => parseInt(stringNumber));
if (!Array.isArray(floorNoIds)) {
throw {
message:
'Expected array od infrastructures but "re_floorNO_id" not found !'
};
}
const numberOfViewsAgencySelector = $(
"body > div > div.container > div.row.content-top > div.col-xs-12.col-sm-12.col-md-9 > div > div.box-viewcount"
);
// number of views is written as : "Broj pregledavanja: NNN"
const numberOfViewsAgencyFullText = numberOfViewsAgencySelector
.text()
.trim();
const numberOfViewsAgencyParts = numberOfViewsAgencyFullText.split(":");
const realEstatePropertiesFromDescriptions = this.getPropertiesFromDescriptions(
descriptionIds
);
const realEstatePropertiesFromSpaces = this.getPropertiesFromSpaces(
spaceIds
);
const realEstatePropertiesFromInfrastructure = this.getPropertiesFromInfrastructure(
infrastructureIds
);
if (extractedData["adm_realEstates_discount"] === "1") {
adStatus = AD_STATUS.STATUS_DISCOUNTED;
}
let numberOfRooms =
parseInt(extractedData["re_realEstates_roomsNO"]) +
parseInt(extractedData["re_realEstates_bedroomNO"]) || null,
numberOfFloors =
parseInt(extractedData["re_realEstates_floorsNO"]) ||
this.getNumberOfFloorsFromFloorId(extractedData["re_floorNO_id"]),
floor =
parseInt(extractedData["re_realEstates_floorNO"]) ||
this.getFloorNumberFromFloorId(extractedData["re_floorNO_id"]),
accessRoadType = realEstatePropertiesFromDescriptions.accessRoadType,
heatingType =
this.getHeatingTypeId(extractedData["re_heating_id"]) || null,
furnishingType = realEstatePropertiesFromDescriptions.furnishingType,
balcony =
realEstatePropertiesFromDescriptions.balcony ||
realEstatePropertiesFromSpaces.balcony,
newBuilding = extractedData["op_realEstates_newBuilding"]
? extractedData["op_realEstates_newBuilding"] === "1"
: null,
elevator = realEstatePropertiesFromDescriptions.elevator,
water =
realEstatePropertiesFromDescriptions.water ||
realEstatePropertiesFromInfrastructure.water,
electricity =
realEstatePropertiesFromDescriptions.electricity ||
realEstatePropertiesFromInfrastructure.electricity,
drainageSystem =
realEstatePropertiesFromInfrastructure.drainageSystem,
registeredInZkBooks =
extractedData["op_realEstates_ownerPermit"] === 1 || null,
recentlyAdapted = null,
parking =
realEstatePropertiesFromDescriptions.parking ||
realEstatePropertiesFromSpaces.parking,
garage = realEstatePropertiesFromSpaces.garage,
gas = realEstatePropertiesFromInfrastructure.gas,
antiTheftDoor = realEstatePropertiesFromDescriptions.antiTheftDoor,
airCondition = realEstatePropertiesFromDescriptions.airCondition,
phoneConnection =
realEstatePropertiesFromInfrastructure.phoneConnection,
cableTV = realEstatePropertiesFromInfrastructure.cableTV,
internet = realEstatePropertiesFromInfrastructure.internet,
basementAttic = realEstatePropertiesFromSpaces.basementAttic,
storeRoom = realEstatePropertiesFromSpaces.storeRoom,
videoSurveillance =
realEstatePropertiesFromDescriptions.videoSurveillance ||
realEstatePropertiesFromInfrastructure.videoSurveillance,
alarm = realEstatePropertiesFromDescriptions.alarm,
suitableForStudents = null,
includingBills =
extractedData["op_realEstates_utilitiesIncluded"] === "1" || null,
animalsAllowed = null,
pool = realEstatePropertiesFromDescriptions.pool,
urbanPlanPermit =
extractedData["op_realEstates_locationPermit"] === "1" ||
realEstatePropertiesFromDescriptions.urbanPlanPermit,
buildingPermit =
extractedData["op_realEstates_buildingPermit"] === "1" || null,
utilityConnection =
realEstatePropertiesFromDescriptions.utilityConnection,
distanceToRiver = null,
numberOfViewsAgency =
numberOfViewsAgencyParts.length > 1
? parseInt(numberOfViewsAgencyParts[1])
: null;
const title = extractedData["re_realEstates_portalName"];
const extractedPrice = parseFloat(
extractedData["re_realEstates_price"]
@@ -277,8 +416,6 @@ class RentalCrawler {
};
}
const adStatus = AD_STATUS.STATUS_NORMAL;
const data = {
url,
agencyObjectId: rentalId,
@@ -303,7 +440,42 @@ class RentalCrawler {
locationLong,
adStatus,
publishedDate: publishedDateMoment.toISOString(),
renewedDate: renewedDateMoment.toISOString()
renewedDate: renewedDateMoment.toISOString(),
numberOfRooms,
numberOfFloors,
floor,
accessRoadType,
heatingType,
furnishingType,
balcony,
newBuilding,
elevator,
water,
electricity,
drainageSystem,
registeredInZkBooks,
recentlyAdapted,
parking,
garage,
gas,
antiTheftDoor,
airCondition,
phoneConnection,
cableTV,
internet,
basementAttic,
storeRoom,
videoSurveillance,
alarm,
suitableForStudents,
includingBills,
animalsAllowed,
pool,
urbanPlanPermit,
buildingPermit,
utilityConnection,
distanceToRiver,
numberOfViewsAgency
};
return data;
@@ -350,6 +522,270 @@ class RentalCrawler {
}
}
getPropertiesFromDescriptions(descriptionIds) {
const result = {
accessRoadType: null,
furnishingType: null,
balcony: null,
elevator: null,
parking: null,
antiTheftDoor: null,
airCondition: null,
videoSurveillance: null,
alarm: null,
pool: null,
urbanPlanPermit: null,
utilityConnection: null,
water: null,
electricity: null
};
for (const descriptionId of descriptionIds) {
switch (descriptionId) {
case 16:
result.furnishingType = FURNISHING_TYPE.NOT_FURNISHED.id;
break;
case 17:
result.furnishingType = FURNISHING_TYPE.HALF_FURNISHED.id;
break;
case 1:
case 28:
result.furnishingType = FURNISHING_TYPE.FURNISHED.id;
break;
case 14:
result.elevator = true;
break;
case 39:
result.electricity = true;
break;
case 40:
result.water = true;
break;
case 41:
case 58:
result.accessRoadType = ACCESS_ROAD_TYPE.ASPHALT.id;
break;
case 26:
result.balcony = true;
break;
case 62:
result.parking = true;
break;
case 3:
result.antiTheftDoor = true;
break;
case 2:
case 21:
result.airCondition = true;
break;
case 4:
result.alarm = true;
break;
case 55:
result.videoSurveillance = true;
break;
case 9:
result.pool = true;
break;
case 60:
result.urbanPlanPermit = true;
break;
case 38:
result.utilityConnection = true;
break;
}
}
return result;
}
getPropertiesFromSpaces(spaceIds) {
const result = {
balcony: null,
parking: null,
garage: null,
basementAttic: null,
storeRoom: null
};
for (const spaceId of spaceIds) {
switch (spaceId) {
case 36:
case 12:
result.parking = true;
break;
case 1:
case 2:
case 3:
result.balcony = true;
break;
case 4:
case 30:
result.garage = true;
break;
case 9:
case 10:
result.storeRoom = true;
break;
case 18:
case 34:
case 37:
case 27:
result.basementAttic = true;
break;
}
}
return result;
}
getHeatingTypeId(heatingRentalId) {
// heatingRentalId can have multiple values, like: "1, 2, 3", parseInt will take first integer value
const heatingId = parseInt(heatingRentalId);
switch (heatingId) {
case 27:
case 16:
return HEATING_TYPE.GAS.id;
case 4:
return HEATING_TYPE.CENTRAL_GAS.id;
case 3:
case 23:
case 6:
case 7:
case 8:
case 9:
case 10:
return HEATING_TYPE.CENTRAL_BOILER.id;
case 2:
case 13:
case 30:
case 17:
case 29:
case 31:
return HEATING_TYPE.ELECTRICITY.id;
case 24:
case 25:
case 12:
return HEATING_TYPE.CENTRAL_CITY.id;
case 26:
case 21:
case 20:
return HEATING_TYPE.WOOD.id;
case 28:
case 19:
return HEATING_TYPE.HEAT_PUMP.id;
case 14:
case 32:
return HEATING_TYPE.OTHER.id;
default:
return null;
}
}
getPropertiesFromInfrastructure(infrastructureIds) {
const result = {
electricity: null,
water: null,
gas: null,
drainageSystem: null,
phoneConnection: null,
internet: null,
videoSurveillance: null,
cableTV: null
};
for (const infrastructureId of infrastructureIds) {
switch (infrastructureId) {
case 1:
result.electricity = true;
break;
case 2:
result.water = true;
break;
case 4:
result.gas = true;
break;
case 5:
result.drainageSystem = true;
break;
case 7:
case 8:
result.phoneConnection = true;
break;
case 10:
result.internet = true;
break;
case 11:
result.cableTV = true;
break;
case 16:
case 17:
result.videoSurveillance = true;
break;
}
}
return result;
}
getFloorNumberFromFloorId(floorsIdText) {
// floorIdText can be array of numbers, separated by comma or number
// just extracting floor number from first element
const floorsId = floorsIdText.split(",");
if (floorsId.length === 0) {
return null;
}
const firstFloorId = parseInt(floorsId[0]);
// 1 pod
// 2 sut
// 3 raz
// 4 pri
// 5 vpri
// 6 prv
// 7 dru
// 8 tre
// 9 čet
// 10 man
// 11
// 12 pot
// 13 vpot
// 14 tav
// 15 pet
const floorNumber = [
-1,
-1,
0,
0,
1,
1,
2,
3,
4,
null,
null,
null,
null,
null,
5
];
return floorNumber[firstFloorId - 1] || null;
}
getNumberOfFloorsFromFloorId(floorsIdText) {
// floorIdText can be array of numbers, separated by comma or number
const floorIds = floorsIdText.split(",");
if (floorIds.length === 0) {
return null;
}
return floorIds.length;
}
async sleep(ms) {
return new Promise(resolve => setTimeout(resolve, ms));
}
@@ -362,7 +798,7 @@ class RentalCrawler {
// }
//For now, we use only Postgres saver, so ...
return await savers[0].save(results);
return savers[0].save(results);
//so that we can use some sequelize options and information when data is inserted
}
}

View File

@@ -26,7 +26,42 @@ const bulkUpsertRealEstates = async realEstateData => {
"gardenSize",
"adStatus",
"updatedAt",
"renewedDate"
"renewedDate",
"numberOfRooms",
"numberOfFloors",
"floor",
"accessRoadType",
"heatingType",
"furnishingType",
"balcony",
"newBuilding",
"elevator",
"water",
"electricity",
"drainageSystem",
"registeredInZkBooks",
"recentlyAdapted",
"parking",
"garage",
"gas",
"antiTheftDoor",
"airCondition",
"phoneConnection",
"cableTV",
"internet",
"basementAttic",
"storeRoom",
"videoSurveillance",
"alarm",
"suitableForStudents",
"includingBills",
"animalsAllowed",
"pool",
"urbanPlanPermit",
"buildingPermit",
"utilityConnection",
"distanceToRiver",
"numberOfViewsAgency"
];
const order = [["updatedAt", "desc"]];

View File

@@ -24,6 +24,23 @@ const findRealEstatesForSearchRequest = async searchRequestId => {
return matchingRealEstates;
};
const findNotNotifiedMatches = async () => {
const query = {
notified: false
};
const searchRequestsModel = { model: db.SearchRequest, as: "searchRequests" };
const realEstateModel = { model: db.RealEstate, as: "realEstates" };
const include = [searchRequestsModel, realEstateModel];
const matchingRecords = await db.SearchRequestMatch.findAll({
where: query,
include
});
return matchingRecords;
};
const addMatches = async matchingRecords => {
return await db.SearchRequestMatch.bulkCreate(matchingRecords, {
ignoreDuplicates: true
@@ -32,5 +49,6 @@ const addMatches = async matchingRecords => {
module.exports = {
findRealEstatesForSearchRequest,
addMatches
addMatches,
findNotNotifiedMatches
};

View File

@@ -20,7 +20,11 @@ const generateRealEstateLinks = realEstates => {
return realEstateLinks;
};
const generateNotificationEmail = (realEstates, searchRequestId) => {
const generateNotificationEmail = (
realEstates,
searchRequestId,
dailyNotification = false
) => {
const truncateList = realEstates.length > MAX_REAL_ESTATES_IN_EMAIL;
const realEstatesToShow = truncateList
? realEstates.slice(0, MAX_REAL_ESTATES_IN_EMAIL)
@@ -30,9 +34,20 @@ const generateNotificationEmail = (realEstates, searchRequestId) => {
const realEstateLinks = generateRealEstateLinks(realEstatesToShow);
const moreRealEstates = `<div>Kompletan spisak nekretnina možete pogledati na <a href="${allRealEstatesLink}">listi nekretnina</a><div>`;
const emailFooter = generateEmailFooter(searchRequestId);
const asapMessageBody =
realEstates.length > 1
? "Pronašli smo nekretnine koje odgovaraju Vašoj pretrazi"
: "Pronašli smo nekretninu koja odgovara Vašoj pretrazi";
const dailyMessageBody =
realEstates.length > 1
? "U posljednja 24h objavljene su sljedeće nekretnine koje odgovaraju uslovima Vaše pretrage"
: "U posljednja 24h objavljena je sljedeća nekretnina koja odgovara uslovima Vaše pretrage";
const messageBody = dailyNotification ? dailyMessageBody : asapMessageBody;
return `<h3>Zdravo</h3>
<h4>Pronašli smo nekretnine koje odgovaraju Vašoj pretrazi</h4>
<h4>${messageBody}</h4>
<div>
${realEstateLinks}
<div/>

View File

@@ -0,0 +1,15 @@
"use strict";
const { EMAIL_FREQUENCY } = require("../common/enums");
module.exports = {
up: (queryInterface, Sequelize) => {
return queryInterface.addColumn("SearchRequests", "emailFrequency", {
type: Sequelize.TEXT,
defaultValue: EMAIL_FREQUENCY.ASAP.stringId
});
},
down: (queryInterface, Sequelize) => {
return queryInterface.removeColumn("SearchRequests", "emailFrequency");
}
};

View File

@@ -0,0 +1,13 @@
"use strict";
module.exports = {
up: (queryInterface, Sequelize) => {
return queryInterface.addColumn("SearchRequests", "deletedEmail", {
type: Sequelize.TEXT
});
},
down: (queryInterface, Sequelize) => {
return queryInterface.removeColumn("SearchRequests", "deletedEmail");
}
};

View File

@@ -0,0 +1,163 @@
"use strict";
module.exports = {
up: (queryInterface, Sequelize) => {
return Promise.all([
queryInterface.addColumn("RealEstates", "numberOfRooms", {
type: Sequelize.REAL
}),
queryInterface.addColumn("RealEstates", "numberOfFloors", {
type: Sequelize.INTEGER
}),
queryInterface.addColumn("RealEstates", "floor", {
type: Sequelize.INTEGER
}),
queryInterface.addColumn("RealEstates", "accessRoadType", {
type: Sequelize.TEXT
}),
queryInterface.addColumn("RealEstates", "heatingType", {
type: Sequelize.TEXT
}),
queryInterface.addColumn("RealEstates", "furnishingType", {
type: Sequelize.TEXT
}),
queryInterface.addColumn("RealEstates", "balcony", {
type: Sequelize.BOOLEAN
}),
queryInterface.addColumn("RealEstates", "newBuilding", {
type: Sequelize.BOOLEAN
}),
queryInterface.addColumn("RealEstates", "elevator", {
type: Sequelize.BOOLEAN
}),
queryInterface.addColumn("RealEstates", "water", {
type: Sequelize.BOOLEAN
}),
queryInterface.addColumn("RealEstates", "electricity", {
type: Sequelize.BOOLEAN
}),
queryInterface.addColumn("RealEstates", "drainageSystem", {
type: Sequelize.BOOLEAN
}),
queryInterface.addColumn("RealEstates", "registeredInZkBooks", {
type: Sequelize.BOOLEAN
}),
queryInterface.addColumn("RealEstates", "recentlyAdapted", {
type: Sequelize.BOOLEAN
}),
queryInterface.addColumn("RealEstates", "parking", {
type: Sequelize.BOOLEAN
}),
queryInterface.addColumn("RealEstates", "garage", {
type: Sequelize.BOOLEAN
}),
queryInterface.addColumn("RealEstates", "gas", {
type: Sequelize.BOOLEAN
}),
queryInterface.addColumn("RealEstates", "antiTheftDoor", {
type: Sequelize.BOOLEAN
}),
queryInterface.addColumn("RealEstates", "airCondition", {
type: Sequelize.BOOLEAN
}),
queryInterface.addColumn("RealEstates", "phoneConnection", {
type: Sequelize.BOOLEAN
}),
queryInterface.addColumn("RealEstates", "cableTV", {
type: Sequelize.BOOLEAN
}),
queryInterface.addColumn("RealEstates", "internet", {
type: Sequelize.BOOLEAN
}),
queryInterface.addColumn("RealEstates", "basementAttic", {
type: Sequelize.BOOLEAN
}),
queryInterface.addColumn("RealEstates", "storeRoom", {
type: Sequelize.BOOLEAN
}),
queryInterface.addColumn("RealEstates", "videoSurveillance", {
type: Sequelize.BOOLEAN
}),
queryInterface.addColumn("RealEstates", "alarm", {
type: Sequelize.BOOLEAN
}),
queryInterface.addColumn("RealEstates", "suitableForStudents", {
type: Sequelize.BOOLEAN
}),
queryInterface.addColumn("RealEstates", "includingBills", {
type: Sequelize.BOOLEAN
}),
queryInterface.addColumn("RealEstates", "animalsAllowed", {
type: Sequelize.BOOLEAN
}),
queryInterface.addColumn("RealEstates", "pool", {
type: Sequelize.BOOLEAN
}),
queryInterface.addColumn("RealEstates", "exchange", {
type: Sequelize.BOOLEAN
}),
queryInterface.addColumn("RealEstates", "urbanPlanPermit", {
type: Sequelize.BOOLEAN
}),
queryInterface.addColumn("RealEstates", "buildingPermit", {
type: Sequelize.BOOLEAN
}),
queryInterface.addColumn("RealEstates", "utilityConnection", {
type: Sequelize.BOOLEAN
}),
queryInterface.addColumn("RealEstates", "distanceToRiver", {
type: Sequelize.INTEGER
}),
queryInterface.addColumn("RealEstates", "numberOfViewsAgency", {
type: Sequelize.INTEGER,
defaultValue: 0
}),
queryInterface.addColumn("RealEstates", "numberOfViewsKivi", {
type: Sequelize.INTEGER,
defaultValue: 0
})
]);
},
down: (queryInterface, Sequelize) => {
return Promise.all([
queryInterface.removeColumn("RealEstates", "numberOfRooms"),
queryInterface.removeColumn("RealEstates", "numberOfFloors"),
queryInterface.removeColumn("RealEstates", "floor"),
queryInterface.removeColumn("RealEstates", "accessRoadType"),
queryInterface.removeColumn("RealEstates", "heatingType"),
queryInterface.removeColumn("RealEstates", "furnishingType"),
queryInterface.removeColumn("RealEstates", "balcony"),
queryInterface.removeColumn("RealEstates", "newBuilding"),
queryInterface.removeColumn("RealEstates", "elevator"),
queryInterface.removeColumn("RealEstates", "water"),
queryInterface.removeColumn("RealEstates", "electricity"),
queryInterface.removeColumn("RealEstates", "drainageSystem"),
queryInterface.removeColumn("RealEstates", "registeredInZkBooks"),
queryInterface.removeColumn("RealEstates", "recentlyAdapted"),
queryInterface.removeColumn("RealEstates", "parking"),
queryInterface.removeColumn("RealEstates", "garage"),
queryInterface.removeColumn("RealEstates", "gas"),
queryInterface.removeColumn("RealEstates", "antiTheftDoor"),
queryInterface.removeColumn("RealEstates", "airCondition"),
queryInterface.removeColumn("RealEstates", "phoneConnection"),
queryInterface.removeColumn("RealEstates", "cableTV"),
queryInterface.removeColumn("RealEstates", "internet"),
queryInterface.removeColumn("RealEstates", "basementAttic"),
queryInterface.removeColumn("RealEstates", "storeRoom"),
queryInterface.removeColumn("RealEstates", "videoSurveillance"),
queryInterface.removeColumn("RealEstates", "alarm"),
queryInterface.removeColumn("RealEstates", "suitableForStudents"),
queryInterface.removeColumn("RealEstates", "includingBills"),
queryInterface.removeColumn("RealEstates", "animalsAllowed"),
queryInterface.removeColumn("RealEstates", "pool"),
queryInterface.removeColumn("RealEstates", "exchange"),
queryInterface.removeColumn("RealEstates", "urbanPlanPermit"),
queryInterface.removeColumn("RealEstates", "buildingPermit"),
queryInterface.removeColumn("RealEstates", "utilityConnection"),
queryInterface.removeColumn("RealEstates", "distanceToRiver"),
queryInterface.removeColumn("RealEstates", "numberOfViewsAgency"),
queryInterface.removeColumn("RealEstates", "numberOfViewsKivi")
]);
}
};

View File

@@ -48,7 +48,44 @@ module.exports = (sequelize, DataTypes) => {
longDescription: DataTypes.TEXT,
adStatus: DataTypes.INTEGER,
publishedDate: DataTypes.DATE,
renewedDate: DataTypes.DATE
renewedDate: DataTypes.DATE,
numberOfRooms: DataTypes.INTEGER,
numberOfFloors: DataTypes.INTEGER,
floor: DataTypes.INTEGER,
accessRoadType: DataTypes.TEXT,
heatingType: DataTypes.TEXT,
furnishingType: DataTypes.TEXT,
balcony: DataTypes.BOOLEAN,
newBuilding: DataTypes.BOOLEAN,
elevator: DataTypes.BOOLEAN,
water: DataTypes.BOOLEAN,
electricity: DataTypes.BOOLEAN,
drainageSystem: DataTypes.BOOLEAN,
registeredInZkBooks: DataTypes.BOOLEAN,
recentlyAdapted: DataTypes.BOOLEAN,
parking: DataTypes.BOOLEAN,
garage: DataTypes.BOOLEAN,
gas: DataTypes.BOOLEAN,
antiTheftDoor: DataTypes.BOOLEAN,
airCondition: DataTypes.BOOLEAN,
phoneConnection: DataTypes.BOOLEAN,
cableTV: DataTypes.BOOLEAN,
internet: DataTypes.BOOLEAN,
basementAttic: DataTypes.BOOLEAN,
storeRoom: DataTypes.BOOLEAN,
videoSurveillance: DataTypes.BOOLEAN,
alarm: DataTypes.BOOLEAN,
suitableForStudents: DataTypes.BOOLEAN,
includingBills: DataTypes.BOOLEAN,
animalsAllowed: DataTypes.BOOLEAN,
pool: DataTypes.BOOLEAN,
exchange: DataTypes.BOOLEAN,
urbanPlanPermit: DataTypes.BOOLEAN,
buildingPermit: DataTypes.BOOLEAN,
utilityConnection: DataTypes.BOOLEAN,
distanceToRiver: DataTypes.INTEGER,
numberOfViewsAgency: DataTypes.INTEGER,
numberOfViewsKivi: DataTypes.INTEGER
});
return RealEstate;

View File

@@ -1,6 +1,6 @@
"use strict";
const { AD_TYPE } = require("../common/enums");
const { AD_TYPE, EMAIL_FREQUENCY } = require("../common/enums");
module.exports = (sequelize, DataTypes) => {
const SearchRequest = sequelize.define("SearchRequest", {
@@ -61,6 +61,14 @@ module.exports = (sequelize, DataTypes) => {
type: DataTypes.BOOLEAN,
defaultValue: false,
allowNull: false
},
emailFrequency: {
type: DataTypes.TEXT,
defaultValue: EMAIL_FREQUENCY.ASAP.stringId,
allowNull: false
},
deletedEmail: {
type: DataTypes.TEXT
}
});

View File

@@ -44,6 +44,12 @@ module.exports = (sequelize, DataTypes) => {
);
SearchRequestMatch.associate = models => {
SearchRequestMatch.hasMany(models.SearchRequest, {
foreignKey: "id",
sourceKey: "searchRequestId",
targetKey: "id",
as: "searchRequests"
});
SearchRequestMatch.hasMany(models.RealEstate, {
foreignKey: "id",
as: "realEstates"

View File

@@ -0,0 +1,8 @@
"use strict";
const {
notifyRequestsWithDailyOption
} = require("../services/notificationService");
(async () => {
await notifyRequestsWithDailyOption();
})();

View File

@@ -102,3 +102,11 @@ h3 {
border-radius: 4px !important;
text-align: center;
}
.collection a.collection-item {
color: #02adba;
}
.collection a.collection-item:not(.active):hover {
background-color: rgba(2, 173, 186, 0.2);
}

View File

@@ -1,14 +1,18 @@
.ui-segment {
.segmented {
color: #02adba;
border: 1px solid #02adba;
border-radius: 4px;
display: inline-block;
}
.ui-segment span.option.active {
.segmented label {
color: #02adba;
}
.segmented input:checked + .label {
background-color: #02adba;
color: white;
}
.ui-segment span.option {
[type="radio"]:not(:checked) + span,
[type="radio"]:checked + span {
padding-left: 30px;
padding-right: 30px;
height: 35px;
@@ -21,9 +25,14 @@
border-right: 1px solid #02adba;
}
.ui-segment span.option:last-child {
.segmented :last-child .label {
border-right: none;
}
.segment-select {
.segmented input {
display: none;
}
span.label:before,
span.label:after {
display: none;
}

View File

@@ -2,7 +2,7 @@
const express = require("express");
const welcome = require("../controllers/welcome").getWelcome;
const { getWelcome, postWelcome } = require("../controllers/welcome");
const {
getRealEstateTypes,
postRealEstateTypes
@@ -20,7 +20,8 @@ const { getFilters, postFilters } = require("../controllers/realEstateFilters");
const router = express.Router();
router.get("/", welcome);
router.get("/", getWelcome);
router.post("/", postWelcome);
router.get("/vrstanekretnine/:searchRequestId", getRealEstateTypes);
router.get("/vrstanekretnine", getRealEstateTypes);

View File

@@ -8,6 +8,7 @@ const {
generateNewSearchRequestEmail,
generateEmailSubject
} = require("../helpers/emailContentGenerator");
const { findNotNotifiedMatches } = require("../helpers/db/searchRequestMatch");
const { sendEmail } = require("../services/emailService");
const notifyForNewRealEstates = async newRealEstates => {
@@ -29,34 +30,87 @@ const notifyForNewSearchRequest = async searchRequest => {
await sendEmail(email, "Kivi - novi zahtjev za pretragu", emailContent);
};
const notifyMatches = async matches => {
const notifyMatches = async (matches, dailyNotification = false) => {
const searchRequestsToNotify = Object.keys(matches);
const asyncSendEmailActions = [];
for (const id of searchRequestsToNotify) {
const { searchRequest } = matches[id];
const { email } = searchRequest;
const allMatchingRealEstates = matches[id].realEstates || [];
if (allMatchingRealEstates.length > 0) {
const emailContent = generateNotificationEmail(
allMatchingRealEstates,
id
);
const emailSubject = generateEmailSubject(
allMatchingRealEstates.length,
allMatchingRealEstates[0].title
);
const { searchRequest, notifyNow } = matches[id];
const { email, subscribed } = searchRequest;
if (notifyNow && subscribed) {
const allMatchingRealEstates = matches[id].realEstates || [];
if (allMatchingRealEstates.length > 0) {
const emailContent = generateNotificationEmail(
allMatchingRealEstates,
id,
dailyNotification
);
const emailSubject = generateEmailSubject(
allMatchingRealEstates.length,
allMatchingRealEstates[0].title
);
const sendEmailPromise = sendEmail(email, emailSubject, emailContent);
asyncSendEmailActions.push(sendEmailPromise);
sendEmailPromise.catch(err => console.log("[Email Sending Failed]", err));
const sendEmailPromise = sendEmail(email, emailSubject, emailContent);
asyncSendEmailActions.push(sendEmailPromise);
sendEmailPromise.catch(err =>
console.log("[Email Sending Failed]", err)
);
}
}
}
await Promise.all(asyncSendEmailActions);
};
const notifyRequestsWithDailyOption = async () => {
const notNotifiedSearchRequestMatches = await findNotNotifiedMatches();
const matches = {};
for (const searchRequestMatch of notNotifiedSearchRequestMatches) {
const { searchRequests, realEstates } = searchRequestMatch;
if (!Array.isArray(searchRequests) || searchRequests.length !== 1) {
// Something is wrong with this match
// (search request not found for specified search request id)
// OR
// there are multiple search requests with the same ID (this should never be the case !
// TODO: Maybe if association is defined better, this will be automatically only one object instead of array
continue;
}
if (!Array.isArray(realEstates) || realEstates.length !== 1) {
// Something is wrong with this match
// (real estate not found for specified real estate id)
// OR
// there are multiple real estates with the same ID (this should never be the case !
// TODO: Maybe if association is defined better, this will be automatically only one object instead of array
continue;
}
const searchRequest = searchRequests[0];
const realEstate = realEstates[0];
const searchRequestId = searchRequest.id;
if (!matches[searchRequestId]) {
matches[searchRequestId] = {
searchRequest,
realEstates: [],
notifyNow: true
};
}
matches[searchRequestId].realEstates.push(realEstate);
searchRequestMatch.notified = true;
searchRequestMatch.save();
}
await notifyMatches(matches, true);
};
module.exports = {
notifyForNewRealEstates,
notifyForNewSearchRequest
notifyForNewSearchRequest,
notifyRequestsWithDailyOption
};

View File

@@ -6,6 +6,7 @@ const {
const { findRealEstatesForSearchRequest } = require("../helpers/db/realEstate");
const { addMatches } = require("../helpers/db/searchRequestMatch");
const { MAX_REAL_ESTATES_IN_FIRST_EMAIL } = require("../config/appConfig");
const { EMAIL_FREQUENCY } = require("../common/enums");
const matchRealEstates = async realEstates => {
if (Array.isArray(realEstates)) {
@@ -18,18 +19,19 @@ const matchRealEstates = async realEstates => {
searchRequestsPromise.then(searchRequests => {
for (const searchRequest of searchRequests) {
const { id } = searchRequest;
const { id, emailFrequency } = searchRequest;
if (!matches[id]) {
matches[id] = {
searchRequest,
realEstates: []
realEstates: [],
notifyNow: emailFrequency === EMAIL_FREQUENCY.ASAP.stringId
};
}
matches[id].realEstates.push(realEstate);
matchingRecords.push({
searchRequestId: searchRequest.id,
realEstateId: realEstate.id,
notified: false
notified: emailFrequency === EMAIL_FREQUENCY.ASAP.stringId
});
}
});
@@ -62,7 +64,7 @@ const matchSearchRequest = async searchRequest => {
matchingRecords.push({
searchRequestId,
realEstateId: realEstate.id,
notified: false
notified: true
});
}

View File

@@ -42,7 +42,7 @@
function locateMe() {
if (navigator.geolocation) {
const onLocationSuccess = (position) => {
function onLocationSuccess (position) {
const coordinates = position && position.coords ? position.coords : null;
if (coordinates){
const longitude = coordinates.longitude || null;
@@ -53,7 +53,7 @@
map.setZoom(16);
}
}
};
}
navigator.geolocation.getCurrentPosition(onLocationSuccess);
}
@@ -162,8 +162,8 @@
input.attachEvent = addEventListenerWrapper
}
$(document).ready(() => {
$("#submit").click(() => {
$(document).ready(function() {
$("#submit").click(function() {
const mapBounds = map.getBounds();
$("#north").val(mapBounds.getNorthEast().lat());
@@ -178,4 +178,4 @@
});
</script>
<script src="https://maps.googleapis.com/maps/api/js?key=AIzaSyAna8ohfV2HBMcxGk_29vqxU5Z_bDickqg&language=bs&libraries=places&callback=initMap" async
defer></script>
defer></script>

View File

@@ -1,53 +0,0 @@
<form method="POST" id="form-range">
<div class="row center-align no-ui-slider centered-element-small" id="slider"></div>
<div class="col s6 push-s3 centered-element-small">
<a id="btnsubmit" href="#" class="next-center-button waves-effect waves-light btn">
Dalje
</a>
</div>
<input type="hidden" name="from" id="from" />
<input type="hidden" name="to" id="to" />
</form>
<script>
$(document).ready(() => {
var slider = document.getElementById('slider');
const unitFormat = wNumb({
decimals: 3,
thousand: '.',
suffix: '<%= unit %>'
})
noUiSlider.create(slider, {
start: [<%= rangeFrom.value %>, <%= rangeTo.value %>],
connect: true,
tooltips: true,
step: <%= rangeFrom.step %>,
range: {
'min': <%= rangeFrom.min %>,
'max': <%= rangeTo.max %>
},
format: unitFormat
});
$("#btnsubmit").click(() => {
const sliderValues = slider.noUiSlider.get();
$("#from").val(unitFormat.from(sliderValues[0]));
$("#to").val(unitFormat.from(sliderValues[1]));
$("#form-range").submit();
// });
});
});
</script>

View File

@@ -1,7 +1,8 @@
<br><br>
<form method="POST" id="form-queryreview">
<div class="row center-align">
<ul class="collection with-header">
<% for(const stepData of queryReviewData) { %>
<% for(const stepData of queryReviewTableData) { %>
<li class="collection-item" >
<div id="<%= stepData.id %>" ><%= stepData.title || '-' %>
<a href="<%= stepData.url %>" class="kivi-color secondary-content">
@@ -12,6 +13,26 @@
<% } %>
</ul>
</div>
<div class="row center-align">
<h6>Slanje obavještenja</h6>
<span class="segmented">
<label>
<input type="radio" name="emailFrequency" value="<%= EMAIL_FREQUENCY.ASAP.id %>"
<% if (selectedEmailFrequency === EMAIL_FREQUENCY.ASAP.id) { %>
checked
<% } %>>
<span class="label"><%= EMAIL_FREQUENCY.ASAP.title %></span>
</label>
<label>
<input type="radio" name="emailFrequency" value="<%= EMAIL_FREQUENCY.DAILY.id %>"
<% if (selectedEmailFrequency === EMAIL_FREQUENCY.DAILY.id) { %>
checked
<% } %>>
<span class="label"><%= EMAIL_FREQUENCY.DAILY.title %></span>
</label>
</span>
</div>
<div class="row center-align">
<div class="col">
<input id="email" name="email" type="email" placeholder="vas.email@mail.com" <% if (email) { %>value="<%= email %>" <% } %> required size="250" />
@@ -46,8 +67,9 @@
</form>
<script>
$(document).ready( () => {
$("#submit").click( () => {
$(document).ready(function() {
$("#submit").click(function() {
const simpleEmailRegex = /^(([^<>()\[\]\\.,;:\s@"]+(\.[^<>()\[\]\\.,;:\s@"]+)*)|(".+"))@((\[[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}])|(([a-zA-Z\-0-9]+\.)+[a-zA-Z]{2,}))$/;
const email = $("#email").val();
const confirmEmail = $("#confirmEmail").val();
@@ -61,7 +83,7 @@
$("#submit").attr("disabled", true);
$("#form-queryreview").submit();
}else{
$("#error-label-email").text("Greška ! Unešeni emailovi nisu isti");
$("#error-label-email").text("Greška ! Unešeni email nije validan");
}
});
});

View File

@@ -66,19 +66,19 @@
</form>
<script>
$(document).ready(() => {
const priceSliderOptions = {...<%- priceSliderOptions %>};
const sizeSliderOptions = {...<%- sizeSliderOptions %>};
$(document).ready(function() {
const priceSliderOptions = <%- priceSliderOptions %>;
const sizeSliderOptions = <%- sizeSliderOptions %>;
const priceStep = priceSliderOptions.step;
const sizeStep = sizeSliderOptions.step;
delete priceSliderOptions.step;
delete sizeSliderOptions.step;
const updatePriceInputs = (values, handle, unencoded) => {
function updatePriceInputs(values, handle, unencoded) {
$("#priceMin").val(Math.round(unencoded[0]/priceStep)*priceStep);
$("#priceMax").val(Math.round(unencoded[1]/priceStep)*priceStep);
}
const updateSizeInputs = (values, handle, unencoded) => {
function updateSizeInputs(values, handle, unencoded) {
$("#sizeMin").val(Math.round(unencoded[0]/sizeStep)*sizeStep);
$("#sizeMax").val(Math.round(unencoded[1]/sizeStep)*sizeStep);
}
@@ -92,7 +92,7 @@
priceSliderObject.on('slide', updatePriceInputs);
sizeSliderObject.on('slide', updateSizeInputs);
const priceMinChangeHandler = (element) => {
function priceMinChangeHandler(element) {
if (element && element.currentTarget && element.currentTarget.value){
const currentValues = priceSliderObject.get();
const newValue = element.currentTarget.value;
@@ -101,7 +101,7 @@
$("#priceMin").val(Math.round(priceSliderObject.get()[0]));
}
}
const priceMaxChangeHandler = (element) => {
function priceMaxChangeHandler(element) {
if (element && element.currentTarget && element.currentTarget.value){
const newValue = element.currentTarget.value;
priceSliderObject.set([null, newValue]);
@@ -113,7 +113,7 @@
$("#priceMin").change(priceMinChangeHandler);
$("#priceMax").change(priceMaxChangeHandler);
const sizeMinChangeHandler = (element) => {
function sizeMinChangeHandler(element) {
if (element && element.currentTarget && element.currentTarget.value){
const currentValues = sizeSliderObject.get();
const newValue = element.currentTarget.value;
@@ -122,7 +122,7 @@
$("#sizeMin").val(Math.round(sizeSliderObject.get()[0]));
}
}
const sizeMaxChangeHandler = (element) => {
function sizeMaxChangeHandler(element) {
if (element && element.currentTarget && element.currentTarget.value){
const newValue = element.currentTarget.value;
sizeSliderObject.set([null, newValue]);
@@ -135,11 +135,11 @@
$("#sizeMax").change(sizeMaxChangeHandler);
<% if(hasGardenSize) { %>
const gardenSizeSliderOptions = {...<%- gardenSizeSliderOptions %>};
const gardenSizeSliderOptions = <%- gardenSizeSliderOptions %>;
const gardenSizeStep = gardenSizeSliderOptions.step;
delete gardenSizeSliderOptions.step;
const updateGardenSizeInputs = (values, handle, unencoded) => {
function updateGardenSizeInputs(values, handle, unencoded) {
$("#gardenSizeMin").val(Math.round(unencoded[0]/gardenSizeStep)*gardenSizeStep);
$("#gardenSizeMax").val(Math.round(unencoded[1]/gardenSizeStep)*gardenSizeStep);
}
@@ -147,7 +147,7 @@
const gardenSizeSlider = document.getElementById("gardenSizeFilter");
const gardenSizeSliderObject = noUiSlider.create(gardenSizeSlider, gardenSizeSliderOptions);
gardenSizeSliderObject.on('slide', updateGardenSizeInputs);
const gardenSizeMinChangeHandler = (element) => {
function gardenSizeMinChangeHandler(element) {
if (element && element.currentTarget && element.currentTarget.value){
const currentValues = gardenSizeSliderObject.get();
const newValue = element.currentTarget.value;
@@ -156,7 +156,7 @@
$("#gardenSizeMin").val(Math.round(gardenSizeSliderObject.get()[0]));
}
}
const gardenSizeMaxChangeHandler = (element) => {
function gardenSizeMaxChangeHandler(element) {
if (element && element.currentTarget && element.currentTarget.value){
const newValue = element.currentTarget.value;
gardenSizeSliderObject.set([null, newValue]);
@@ -169,7 +169,7 @@
$("#gardenSizeMax").change("step", gardenSizeMaxChangeHandler);
<% } %>
$("#submit").click(() => {
$("#submit").click(function() {
const priceFilterValues = priceSlider.noUiSlider.get();
$("#priceFilterMin").val(priceFilterValues[0]);
$("#priceFilterMax").val(priceFilterValues[1]);

View File

@@ -3,18 +3,23 @@
<div class="center-align">
<div class="row">
<select class="segment-select" id="adType" name="adType">
<option value="<%= AD_TYPE.AD_TYPE_SALE.id %>"
<% if (selectedAdType === AD_TYPE.AD_TYPE_SALE.id) { %>
selected="selected"
<% } %>
><%= AD_TYPE.AD_TYPE_SALE.title %></option>
<option value="<%= AD_TYPE.AD_TYPE_RENT.id %>"
<% if (selectedAdType === AD_TYPE.AD_TYPE_RENT.id) { %>
selected="selected"
<% } %>
><%= AD_TYPE.AD_TYPE_RENT.title %></option>
</select>
<span class="segmented">
<label>
<input type="radio" name="adType" value="<%= AD_TYPE.AD_TYPE_SALE.id %>"
<% if (selectedAdType === AD_TYPE.AD_TYPE_SALE.id) { %>
checked
<% } %>>
<span class="label"><%= AD_TYPE.AD_TYPE_SALE.title %></span>
</label>
<label>
<input type="radio" name="adType" value="<%= AD_TYPE.AD_TYPE_RENT.id %>"
<% if (selectedAdType === AD_TYPE.AD_TYPE_RENT.id) { %>
checked
<% } %>>
<span class="label"><%= AD_TYPE.AD_TYPE_RENT.title %></span>
</label>
</span>
</div>
<br>
@@ -37,45 +42,6 @@
</form>
<script>
(function($) {
$.fn.extend({
Segment: function() {
$(this).each(function() {
const self = $(this);
const onchange = self.attr('onchange');
const wrapper = $("<div>", { class: "ui-segment" });
$(this)
.find("option")
.each(function() {
const option = $("<span>", {
class: "option",
onclick: onchange,
text: $(this).text(),
value: $(this).val(),
});
if ($(this).is(":selected")) {
option.addClass("active");
}
wrapper.append(option);
});
wrapper.find("span.option").click(function (){
wrapper.find("span.option").removeClass("active");
$(this).addClass("active");
self.val($(this).attr('value'));
});
$(this).after(wrapper);
$(this).hide();
});
}
});
})(jQuery);
$(document).ready(() => {
$(".segment-select").Segment();
});
function saveAndSubmit(id) {
$("#realEstateType").val(id);
$("#realEstateTypeSelection > a").attr("onclick", "");

View File

@@ -20,7 +20,7 @@
</h6>
</div>
<script>
window.onload = () => {
window.onload = function() {
document.getElementById('realEstateUrl').click();
}
</script>

View File

@@ -1,4 +1,3 @@
<!-- -->
<br><br>
<div class="row center-align">
<img src="assets/images/logo.svg" alt="kivi logo" width="160">
@@ -8,8 +7,26 @@
<div> Na vaš email. </div>
<div> BESPLATNO </div>
</div>
<div class="row center-align">
<div class="col s6 push-s3">
<a href="<%= nextStep %>" class="welcome-center-button btn">Javi mi</a>
<form method="POST" name="welcomeForm">
<div class="row center-align">
<div class="col s5 m4 l3 push-s1 push-m2 push-l3">
<a href="#" onclick="saleClick()" class="welcome-center-button btn">Kupi</a>
</div>
<div class="col s5 m4 l3 push-s1 push-m2 push-l3">
<a href="#" onclick="rentClick()" class="welcome-center-button btn">Unajmi</a>
</div>
</div>
</div>
<input type="hidden" id="adType" name="adType">
</form>
<script>
function saleClick(){
$("#adType").val("<%= AD_TYPE.AD_TYPE_SALE.id %>");
document.welcomeForm.submit();
}
function rentClick(){
$("#adType").val("<%= AD_TYPE.AD_TYPE_RENT.id %>");
document.welcomeForm.submit();
}
</script>

View File

@@ -31,6 +31,7 @@ OLX_CRAWLER_AD_TYPE=enum name of what type of ads should be crawled, check commo
OLX_CRAWLER_AD_CATEGORIES=comma separated list of enum names of categories to be included, check common/enums.js file for valid values
OLX_IGNORED_USERNAMES=comma separated list of usernames to ignore
OLX_DELAY_BETWEEN_PAGES=time in miliseconds to wait before indexing next page
OLX_FORCE_CRAWL=Non-zero value will force crawler to crawl all pages without stopping when known real estate is found
#==RENTAL==
RENTAL_MAX_PAGES=Restrict crawler to this number of pages
RENTAL_MAX_RESULTS_PER_PAGE=Only this number or less results from one page will be scraped and saved
@@ -38,13 +39,15 @@ RENTAL_CRAWLER_AD_TYPE=enum name of what type of ads should be crawled, check co
RENTAL_CRAWLER_AD_CATEGORIES=comma separated list of enum names of categories to be included, check common/enums.js file for valid values
RENTAL_IGNORED_USERNAMES=!!! This is not used for rental crawler !!!
RENTAL_DELAY_BETWEEN_PAGES=time in miliseconds to wait before indexing next page
RENTAL_FORCE_CRAWL=Non-zero value will force crawler to crawl all pages without stopping when known real estate is found
#==PROSTOR==
PROSTOR_MAX_PAGES=!!! This is not used for prostor crawler !!!
PROSTOR_MAX_RESULTS_PER_PAGE=For Prostor crawler, this represents MAX RESULTS in total
PROSTOR_MAX_RESULTS_PER_PAGE=For Prostor crawler, this represents how many ads are crawled at once
PROSTOR_CRAWLER_AD_TYPE=enum name of what type of ads should be crawled, check common/enums.js file for valid values
PROSTOR_CRAWLER_AD_CATEGORIES=comma separated list of enum names of categories to be included, check common/enums.js file for valid values
PROSTOR_IGNORED_USERNAMES=!!! This is not used for prostor crawler !!!
PROSTOR_DELAY_BETWEEN_PAGES=!!! This is not used for prostor crawler !!!
PROSTOR_FORCE_CRAWL=Non-zero value will force crawler to crawl all pages without stopping when known real estate is found
#==AKTIDO==
AKTIDO_MAX_PAGES=Restrict crawler to this number of pages
AKTIDO_MAX_RESULTS_PER_PAGE=Only this number or less results from one page will be scraped and saved
@@ -52,3 +55,4 @@ AKTIDO_CRAWLER_AD_TYPE=enum name of what type of ads should be crawled, check co
AKTIDO_CRAWLER_AD_CATEGORIES=comma separated list of enum names of categories to be included, check common/enums.js file for valid values
AKTIDO_IGNORED_USERNAMES=!!! This is not used for aktido crawler !!!
AKTIDO_DELAY_BETWEEN_PAGES=time in miliseconds to wait before indexing next page
AKTIDO_FORCE_CRAWL=Non-zero value will force crawler to crawl all pages without stopping when known real estate is found

41
package-lock.json generated
View File

@@ -1396,7 +1396,8 @@
"ansi-regex": {
"version": "2.1.1",
"bundled": true,
"dev": true
"dev": true,
"optional": true
},
"aproba": {
"version": "1.2.0",
@@ -1417,12 +1418,14 @@
"balanced-match": {
"version": "1.0.0",
"bundled": true,
"dev": true
"dev": true,
"optional": true
},
"brace-expansion": {
"version": "1.1.11",
"bundled": true,
"dev": true,
"optional": true,
"requires": {
"balanced-match": "^1.0.0",
"concat-map": "0.0.1"
@@ -1437,17 +1440,20 @@
"code-point-at": {
"version": "1.1.0",
"bundled": true,
"dev": true
"dev": true,
"optional": true
},
"concat-map": {
"version": "0.0.1",
"bundled": true,
"dev": true
"dev": true,
"optional": true
},
"console-control-strings": {
"version": "1.1.0",
"bundled": true,
"dev": true
"dev": true,
"optional": true
},
"core-util-is": {
"version": "1.0.2",
@@ -1564,7 +1570,8 @@
"inherits": {
"version": "2.0.3",
"bundled": true,
"dev": true
"dev": true,
"optional": true
},
"ini": {
"version": "1.3.5",
@@ -1576,6 +1583,7 @@
"version": "1.0.0",
"bundled": true,
"dev": true,
"optional": true,
"requires": {
"number-is-nan": "^1.0.0"
}
@@ -1590,6 +1598,7 @@
"version": "3.0.4",
"bundled": true,
"dev": true,
"optional": true,
"requires": {
"brace-expansion": "^1.1.7"
}
@@ -1597,12 +1606,14 @@
"minimist": {
"version": "0.0.8",
"bundled": true,
"dev": true
"dev": true,
"optional": true
},
"minipass": {
"version": "2.3.5",
"bundled": true,
"dev": true,
"optional": true,
"requires": {
"safe-buffer": "^5.1.2",
"yallist": "^3.0.0"
@@ -1621,6 +1632,7 @@
"version": "0.5.1",
"bundled": true,
"dev": true,
"optional": true,
"requires": {
"minimist": "0.0.8"
}
@@ -1701,7 +1713,8 @@
"number-is-nan": {
"version": "1.0.1",
"bundled": true,
"dev": true
"dev": true,
"optional": true
},
"object-assign": {
"version": "4.1.1",
@@ -1713,6 +1726,7 @@
"version": "1.4.0",
"bundled": true,
"dev": true,
"optional": true,
"requires": {
"wrappy": "1"
}
@@ -1798,7 +1812,8 @@
"safe-buffer": {
"version": "5.1.2",
"bundled": true,
"dev": true
"dev": true,
"optional": true
},
"safer-buffer": {
"version": "2.1.2",
@@ -1834,6 +1849,7 @@
"version": "1.0.2",
"bundled": true,
"dev": true,
"optional": true,
"requires": {
"code-point-at": "^1.0.0",
"is-fullwidth-code-point": "^1.0.0",
@@ -1853,6 +1869,7 @@
"version": "3.0.1",
"bundled": true,
"dev": true,
"optional": true,
"requires": {
"ansi-regex": "^2.0.0"
}
@@ -1896,12 +1913,14 @@
"wrappy": {
"version": "1.0.2",
"bundled": true,
"dev": true
"dev": true,
"optional": true
},
"yallist": {
"version": "3.0.3",
"bundled": true,
"dev": true
"dev": true,
"optional": true
}
}
},

View File

@@ -8,12 +8,14 @@
"start": "node ./index.js",
"start-mon": "nodemon ./index.js",
"migrate": "cd app && npx sequelize db:migrate",
"setup": "docker build -t marketalerts . && docker run -e POSTGRES_USER=docker -e POSTGRES_PASSWORD=docker -e POSTGRES_DB=marketalerts --name pg_marketalerts -d -p 5432:5432 marketalerts && sleep 4 && npm run migrate",
"setup": "docker build -t marketalerts . && docker run -e POSTGRES_USER=docker -e POSTGRES_PASSWORD=docker -e POSTGRES_DB=marketalerts --name pg_marketalerts -d -p 5432:5432 marketalerts && sleep 10 && npm run migrate",
"docker-start": "docker start pg_marketalerts",
"docker-stop": "docker stop pg_marketalerts",
"crawl": "cd app/crawler && node npmCrawl.js",
"daily-notify": "cd app/npmScripts && node npmDailyNotify.js",
"test-search": "cd test && node searchTest.js",
"test-olx-scraper": "cd test && node olxScrapeTest.js"
"test-olx-scraper": "cd test && node olxScrapeTest.js",
"test-rental-scraper": "cd test && node rentalScrapeTest.js"
},
"repository": {
"type": "git",

17
test/rentalScrapeTest.js Normal file
View File

@@ -0,0 +1,17 @@
"use strict";
const rentalCrawler = require("../app/crawler/specificCrawlers/rental");
const urlToScrape = process.argv[2] || undefined;
if (urlToScrape) {
const crawler = new rentalCrawler();
(async () => {
const data = await crawler.scrapeAd(urlToScrape);
console.log(data);
})();
} else {
console.log("No URL to scrape. Use like this : ");
console.log("npm run test-olx-scraper -- URL_TO_SCRAPE");
}