Compare commits

...

11 Commits

Author SHA1 Message Date
Naida Vatric
d117383802 Tested both ways for realestate and search req filters. 2020-01-17 22:58:22 +01:00
Naida Vatric
870b71a3c7 WIP Changed all logic for searchRequest. 2020-01-17 01:54:06 +01:00
Naida Vatric
4fd4018bf6 Merge branch 'sliders-formating' of gitlab.com:saburly/marketalarm/web into add-even-more-filters 2020-01-14 23:19:51 +01:00
Naida Vatric
259799144e Merge branch 'rental-crawler-fix' into 'master'
Rental crawler fix

See merge request saburly/marketalarm/web!80
2020-01-06 23:12:52 +00:00
Naida Vatric
bc73d4159d Merge branch 'master' into 'rental-crawler-fix'
# Conflicts:
#   .gitignore
2020-01-06 23:12:40 +00:00
Naida Vatric
37ad32fe76 Merge branch 'edit-location-start' into 'master'
Edit location start

See merge request saburly/marketalarm/web!79
2020-01-06 23:10:16 +00:00
Naida Vatric
94875a0fa3 Merge branch 'add-currency-to-price-filters' into 'master'
Add currency to price filters

See merge request saburly/marketalarm/web!78
2020-01-06 23:09:40 +00:00
Naida Vatric
0c2d218d29 Changed floor numbers and basement-attic tag. 2020-01-02 00:10:31 +01:00
Naida Vatric
fed2dc00dc Changed number of rooms. 2019-12-29 23:42:39 +01:00
Naida Vatric
d5d3a1f306 Changed accesRoadType logic 2019-12-26 23:30:05 +01:00
Naida Vatric
42ff1f762f Changed to avoid falsy values and not defined realestate parametrs. 2019-12-21 02:20:26 +01:00
5 changed files with 423 additions and 111 deletions

2
.gitignore vendored
View File

@@ -2,4 +2,4 @@ node_modules/
.env
.idea/
.eslintrc
.vscode/
.vscode/

View File

@@ -312,7 +312,7 @@ class RentalCrawler {
let numberOfRooms =
parseInt(extractedData["re_realEstates_roomsNO"]) +
parseInt(extractedData["re_realEstates_bedroomNO"]) || null,
parseInt(extractedData["re_realEstates_bedNO"]) || null,
numberOfFloors =
parseInt(extractedData["re_realEstates_floorsNO"]) ||
this.getNumberOfFloorsFromFloorId(extractedData["re_floorNO_id"]),
@@ -352,7 +352,9 @@ class RentalCrawler {
realEstatePropertiesFromInfrastructure.phoneConnection,
cableTV = realEstatePropertiesFromInfrastructure.cableTV,
internet = realEstatePropertiesFromInfrastructure.internet,
basementAttic = realEstatePropertiesFromSpaces.basementAttic,
basementAttic =
realEstatePropertiesFromSpaces.basementAttic ||
this.checkBasemAtticFromFloors(extractedData["re_floorNO_id"]),
storeRoom = realEstatePropertiesFromSpaces.storeRoom,
videoSurveillance =
realEstatePropertiesFromDescriptions.videoSurveillance ||
@@ -397,9 +399,7 @@ class RentalCrawler {
);
if (!publishedDateMoment.isValid()) {
throw {
message: `Invalid published date : ${
extractedData["re_realEstates_inserted"]
}`
message: `Invalid published date : ${extractedData["re_realEstates_inserted"]}`
};
}
@@ -410,9 +410,7 @@ class RentalCrawler {
);
if (!renewedDateMoment.isValid()) {
throw {
message: `Invalid renewed date : ${
extractedData["re_realEstates_edited"]
}`
message: `Invalid renewed date : ${extractedData["re_realEstates_edited"]}`
};
}
@@ -782,8 +780,42 @@ class RentalCrawler {
if (floorIds.length === 0) {
return null;
}
let noOfFloors = floorIds.length;
// Floors of 'suteren', 'podrum', 'tavan' and 'potkrovlje' are not counted
floorIds.forEach(id => {
if (
parseInt(id) === 1 ||
parseInt(id) === 2 ||
parseInt(id) === 12 ||
parseInt(id) === 14
) {
noOfFloors--;
}
});
return noOfFloors;
}
return floorIds.length;
checkBasemAtticFromFloors(floorsIdText) {
// floorIdText can be array of numbers, separated by comma or number
const floorIds = floorsIdText.split(",");
let check = false;
if (floorIds.length === 0) {
check = false;
}
//If floors 'suteren', 'podrum', 'tavan' and 'potkrovlje' exists then tag for basement-attic is true
floorIds.forEach(id => {
if (
parseInt(id) === 1 ||
parseInt(id) === 2 ||
parseInt(id) === 12 ||
parseInt(id) === 14
) {
check = true;
}
});
return check;
}
async sleep(ms) {

View File

@@ -2,6 +2,8 @@
const db = require("../../models/index");
const sequelize = require("sequelize");
const Op = sequelize.Op;
const { AD_CATEGORY } = require("../../common/enums");
const bulkUpsertRealEstates = async realEstateData => {
try {
const fieldsToUpdateIfDuplicate = [
@@ -102,6 +104,9 @@ const findRealEstatesForSearchRequest = async (searchRequest, maxResults) => {
accessRoadType
} = searchRequest;
//Needed for defining which attribute should exist or not
const realEstateTypeObject = AD_CATEGORY[realEstateType];
const longitudeColumn = sequelize.col("locationLong");
const latitudeColumn = sequelize.col("locationLat");
@@ -175,8 +180,13 @@ const findRealEstatesForSearchRequest = async (searchRequest, maxResults) => {
[Op.and]: geoSearchQueryPart
};
//Every other attribute is checked separately and included in query only if it is defined
if (gardenSizeMax && gardenSizeMin) {
//Every other attribute is checked separately and included in query only if it is defined for real estate type
if (
realEstateTypeObject.hasGardenSize &&
gardenSizeMax != null &&
gardenSizeMin != null
) {
query.gardenSize = {
[Op.lte]: gardenSizeMax,
[Op.gte]: gardenSizeMin
@@ -192,7 +202,11 @@ const findRealEstatesForSearchRequest = async (searchRequest, maxResults) => {
};
}
if (numberOfRoomsMin && numberOfRoomsMax) {
if (
realEstateTypeObject.hasNumberOfRoom &&
numberOfRoomsMin != null &&
numberOfRoomsMax != null
) {
query.numberOfRooms = {
[Op.lte]: numberOfRoomsMax,
[Op.gte]: numberOfRoomsMin
@@ -208,7 +222,11 @@ const findRealEstatesForSearchRequest = async (searchRequest, maxResults) => {
};
}
if (numberOfFloorsMin && numberOfFloorsMax) {
if (
realEstateTypeObject.hasNumberOfFloors &&
numberOfFloorsMin != null &&
numberOfFloorsMax != null
) {
query.numberOfFloors = {
[Op.lte]: numberOfFloorsMax,
[Op.gte]: numberOfFloorsMin
@@ -224,7 +242,11 @@ const findRealEstatesForSearchRequest = async (searchRequest, maxResults) => {
};
}
if (floorMin && floorMax) {
if (
realEstateTypeObject.hasFloorProp &&
floorMin != null &&
floorMax != null
) {
query.floor = {
[Op.lte]: floorMax,
[Op.gte]: floorMin
@@ -239,8 +261,10 @@ const findRealEstatesForSearchRequest = async (searchRequest, maxResults) => {
}
};
}
if (balcony) {
//Logic for balcony, newBuilding and elevator from users side
//If true is checked, then I want characteristic to be true but,
//if it is not checked, then I dont care - it can be null or false or true
if (realEstateTypeObject.hasBalconyProp && balcony === true) {
query.balcony = {
[Op.eq]: balcony
};
@@ -252,7 +276,7 @@ const findRealEstatesForSearchRequest = async (searchRequest, maxResults) => {
};
}
if (newBuilding) {
if (realEstateTypeObject.hasNewBuildingProp && newBuilding === true) {
query.newBuilding = {
[Op.eq]: newBuilding
};
@@ -264,7 +288,7 @@ const findRealEstatesForSearchRequest = async (searchRequest, maxResults) => {
};
}
if (elevator) {
if (realEstateTypeObject.hasElevatorProp && elevator === true) {
query.elevator = {
[Op.eq]: elevator
};
@@ -275,7 +299,8 @@ const findRealEstatesForSearchRequest = async (searchRequest, maxResults) => {
}
};
}
//If user wants 'ANY' road type acces then it is not included in query -
//returns every road type and null values
if (accessRoadType !== "ANY") {
query.accessRoadType = {
[Op.eq]: accessRoadType

View File

@@ -49,128 +49,383 @@ const findSearchRequestsForRealEstate = async realEstate => {
const geoSearchQueryPart = sequelize.where(contains, true);
//General query contains only attributes that are defined for every RealEstate - not null
const query = {
adType,
realEstateType,
subscribed: true,
[Op.and]: geoSearchQueryPart
};
//Needed for defining which attribute should exist or not
const realEstateTypeObject = AD_CATEGORY[realEstateType];
//Needed to decide on including incomplete RealEstates data
// ?? Needed to decide on including incomplete RealEstates data
let checkForIncompleteWanted = false;
//Attributes are checked separately and included in query only if defined
//Price and area should be defined for every property
//Attributes are checked separately to make different query parts
if (price) {
query.priceMin = {
[Op.lte]: price
};
query.priceMax = {
[Op.gte]: price
//If price is null it will be excluded from query - it will show properties with null price values
//User always defines price and area (sliders) - not null in search req
let priceQuery = {};
if (price != null) {
priceQuery = {
[Op.and]: [
{
priceMin: {
[Op.lte]: price
}
},
{
priceMax: {
[Op.gte]: price
}
}
]
};
}
if (area) {
query.sizeMin = {
[Op.lte]: area
};
query.sizeMax = {
[Op.gte]: area
let areaQuery = {};
if (area != null) {
areaQuery = {
[Op.and]: [
{
sizeMin: {
[Op.lte]: area
}
},
{
sizeMax: {
[Op.gte]: area
}
}
]
};
} else {
checkForIncompleteWanted = true;
}
//Other attributes can be defined or not depending on RealEstate type
if (gardenSize) {
query.gardenSizeMin = {
[Op.lte]: gardenSize
};
query.gardenSizeMax = {
[Op.gte]: gardenSize
};
} else if (realEstateTypeObject.hasGardenSize) {
checkForIncompleteWanted = true;
//we check what to include in query based on real estate type object
let gardenSizeQuery = {};
if (realEstateTypeObject.hasGardenSize) {
if (gardenSize != null) {
gardenSizeQuery = {
[Op.and]: [
{
gardenSizeMin: {
[Op.lte]: gardenSize
}
},
{
gardenSizeMax: {
[Op.gte]: gardenSize
}
}
]
};
} else {
checkForIncompleteWanted = true;
}
}
if (numberOfRooms) {
query.numberOfRoomsMin = {
[Op.lte]: numberOfRooms
};
query.numberOfRoomsMax = {
[Op.gte]: numberOfRooms
};
} else if (realEstateTypeObject.hasNumberOfRoom) {
checkForIncompleteWanted = true;
let numberOfRoomsQuery = {};
if (realEstateTypeObject.hasNumberOfRoom) {
if (numberOfRooms != null) {
//If real estate has defined number of rooms ex. 3 it returns req
// that accepts 3 rooms or ones that don't have defined number - null
//Ex. they didnt choose advanced filters at all
numberOfRoomsQuery = {
[Op.and]: [
{
numberOfRoomsMin: {
[Op.or]: {
[Op.lte]: numberOfRooms,
[Op.is]: null
}
}
},
{
numberOfRoomsMax: {
[Op.or]: {
[Op.gte]: numberOfRooms,
[Op.is]: null
}
}
}
]
};
} else {
// If real estate dont have defined number of rooms ex. null
//It returns requests that didn't choose number of rooms - also null
//Or ones that picked some values but also picked to includeIncomplete ads
numberOfRoomsQuery = {
[Op.or]: [
{
[Op.and]: [
{
numberOfRoomsMin: {
[Op.is]: null
}
},
{
numberOfRoomsMax: {
[Op.is]: null
}
}
]
},
{
includeIncompleteAds: {
[Op.eq]: true
}
}
]
};
}
}
//Same logic for number of Floors and floors
let numberOfFloorsQuery = {};
if (realEstateTypeObject.hasNumberOfFloors) {
if (numberOfFloors != null) {
numberOfFloorsQuery = {
[Op.and]: [
{
numberOfFloorsMin: {
[Op.or]: {
[Op.lte]: numberOfFloors,
[Op.is]: null
}
}
},
{
numberOfFloorsMax: {
[Op.or]: {
[Op.gte]: numberOfFloors,
[Op.is]: null
}
}
}
]
};
} else {
numberOfFloorsQuery = {
[Op.or]: [
{
[Op.and]: [
{
numberOfFloorsMin: {
[Op.is]: null
}
},
{
numberOfFloorsMax: {
[Op.is]: null
}
}
]
},
{
includeIncompleteAds: {
[Op.eq]: true
}
}
]
};
}
}
let floorQuery = {};
if (realEstateTypeObject.hasFloorProp) {
if (floor != null) {
floorQuery = {
[Op.and]: [
{
floorMin: {
[Op.or]: {
[Op.lte]: floor,
[Op.is]: null
}
}
},
{
floorMax: {
[Op.or]: {
[Op.gte]: floor,
[Op.is]: null
}
}
}
]
};
} else {
floorQuery = {
[Op.or]: [
{
[Op.and]: [
{
floorMin: {
[Op.is]: null
}
},
{
floorMax: {
[Op.is]: null
}
}
]
},
{
includeIncompleteAds: {
[Op.eq]: true
}
}
]
};
}
}
if (numberOfFloors) {
query.numberOfFloorsMin = {
[Op.lte]: numberOfFloors
};
query.numberOfFloorsMax = {
[Op.gte]: numberOfFloors
};
} else if (realEstateTypeObject.hasNumberOfFloors) {
checkForIncompleteWanted = true;
//Logic for balcony, newBuilding and elevator
//If user dont check checkbox for ex. elevator it does not mean he only wants no elevator
//If real estate characteristic =true find all req, one that wants charachertistic or dont care - dont need query
//If real estate characteristic = false, find all req exept for ones that wants characteristic to be true
//If real estate characteristic = null, dont know if true or false, find req that dont care or want char and want incomplete ads
let balconyQuery = {};
if (realEstateTypeObject.hasBalconyProp && balcony !== true) {
if (balcony === false) {
balconyQuery = {
balcony: {
[Op.ne]: true
}
};
} else if (balcony === null) {
balconyQuery = {
[Op.or]: [
{
balcony: {
[Op.ne]: true
}
},
{
[Op.and]: [
{
balcony: {
[Op.eq]: true
}
},
{
includeIncompleteAds: {
[Op.eq]: true
}
}
]
}
]
};
}
}
if (floor) {
query.floorMin = {
[Op.lte]: floor
};
query.floorMax = {
[Op.gte]: floor
};
} else if (realEstateTypeObject.hasFloorProp) {
checkForIncompleteWanted = true;
let newBuildingQuery = {};
if (realEstateTypeObject.hasNewBuildingProp && newBuilding !== true) {
if (newBuilding === false) {
newBuildingQuery = {
newBuilding: {
[Op.ne]: true
}
};
} else if (newBuilding === null) {
newBuildingQuery = {
[Op.or]: [
{
newBuilding: {
[Op.ne]: true
}
},
{
[Op.and]: [
{
newBuilding: {
[Op.eq]: true
}
},
{
includeIncompleteAds: {
[Op.eq]: true
}
}
]
}
]
};
}
}
let elevatorQuery = {};
if (realEstateTypeObject.hasElevatorProp && elevator !== true) {
if (elevator === false) {
elevatorQuery = {
elevator: {
[Op.ne]: true
}
};
} else if (elevator === null) {
elevatorQuery = {
[Op.or]: [
{
elevator: {
[Op.ne]: true
}
},
{
[Op.and]: [
{
elevator: {
[Op.eq]: true
}
},
{
includeIncompleteAds: {
[Op.eq]: true
}
}
]
}
]
};
}
}
//General query consists of each individual query
const query = {
adType,
realEstateType,
subscribed: true,
[Op.and]: [
geoSearchQueryPart,
priceQuery,
areaQuery,
gardenSizeQuery,
numberOfRoomsQuery,
numberOfFloorsQuery,
floorQuery,
balconyQuery,
newBuildingQuery,
elevatorQuery
]
};
if (accessRoadType) {
//AccessRoadType is defined - should exists for each ad and estate type
if (accessRoadType != null) {
query.accessRoadType = {
[Op.or]: {
[Op.eq]: "ANY",
[Op.like]: "ANY",
[Op.eq]: accessRoadType
}
};
} else if (realEstateTypeObject.hasAccesRoadType) {
checkForIncompleteWanted = true;
}
if (balcony) {
query.balcony = {
[Op.eq]: balcony
} else {
//Null values are returned for user request that wanted ANY acces road type
query.accessRoadType = {
[Op.eq]: "ANY"
};
} else if (realEstateTypeObject.hasBalconyProp) {
checkForIncompleteWanted = true;
}
if (newBuilding) {
query.newBuilding = {
[Op.eq]: newBuilding
};
} else if (realEstateTypeObject.hasNewBuildingProp) {
checkForIncompleteWanted = true;
}
if (elevator) {
query.elevator = {
[Op.eq]: elevator
};
} else if (realEstateTypeObject.hasElevatorProp) {
checkForIncompleteWanted = true;
}
//If one of the attributes that exists for property type is null
//we include in query to check if incomplete real estates are accepted
//Tag to check if incomplete ads are accepted in query
if (checkForIncompleteWanted) {
query.includeIncompleteAds = {
[Op.eq]: true
};
}
return await db.SearchRequest.findAll({ where: query });
return await db.SearchRequest.findAll({
where: query
});
};
module.exports = {

View File

@@ -13,5 +13,5 @@ if (urlToScrape) {
})();
} else {
console.log("No URL to scrape. Use like this : ");
console.log("npm run test-olx-scraper -- URL_TO_SCRAPE");
console.log("npm run test-rental-scraper -- URL_TO_SCRAPE");
}