From a63671959b7588f37620271235ba84f6c6f47c23 Mon Sep 17 00:00:00 2001 From: Bilal Catic Date: Tue, 12 Nov 2019 22:53:16 +0100 Subject: [PATCH] improve real estate properties detection for Rental --- app/crawler/specificCrawlers/rental.js | 171 +++++++++++++++++++++++-- 1 file changed, 157 insertions(+), 14 deletions(-) diff --git a/app/crawler/specificCrawlers/rental.js b/app/crawler/specificCrawlers/rental.js index de9618e..020fa10 100644 --- a/app/crawler/specificCrawlers/rental.js +++ b/app/crawler/specificCrawlers/rental.js @@ -218,7 +218,6 @@ class RentalCrawler { const jsonData = scriptElement[0].children[0].data.substring(20); const parsedJsonData = JSON.parse(jsonData); extractedData = parsedJsonData[0]; - // console.log(extractedData); } catch (e) { throw { message: "Can't find ad data JSON" }; } @@ -262,6 +261,28 @@ class RentalCrawler { }; } + const infrastructureIds = extractedData["re_infrastructure_id"] + .split(",") + .map(stringNumber => parseInt(stringNumber)); + + if (!Array.isArray(infrastructureIds)) { + throw { + message: + 'Expected array od infrastructures but "re_infrastructure_id" not found !' + }; + } + + const floorNoIds = extractedData["re_floorNO_id"] + .split(",") + .map(stringNumber => parseInt(stringNumber)); + + if (!Array.isArray(floorNoIds)) { + throw { + message: + 'Expected array od infrastructures but "re_floorNO_id" not found !' + }; + } + const numberOfViewsAgencySelector = $( "body > div > div.container > div.row.content-top > div.col-xs-12.col-sm-12.col-md-9 > div > div.box-viewcount" ); @@ -280,12 +301,19 @@ class RentalCrawler { spaceIds ); + const realEstatePropertiesFromInfrastructure = this.getPropertiesFromInfrastructure( + infrastructureIds + ); + let numberOfRooms = parseInt(extractedData["re_realEstates_roomsNO"]) + parseInt(extractedData["re_realEstates_bedroomNO"]) || null, numberOfFloors = - parseInt(extractedData["re_realEstates_floorsNO"]) || null, // Check this for HOUSE - floor = parseInt(extractedData["re_realEstates_floorNO"]) || null, + parseInt(extractedData["re_realEstates_floorsNO"]) || + this.getNumberOfFloorsFromFloorId(extractedData["re_floorNO_id"]), + floor = + parseInt(extractedData["re_realEstates_floorNO"]) || + this.getFloorNumberFromFloorId(extractedData["re_floorNO_id"]), accessRoadType = realEstatePropertiesFromDescriptions.accessRoadType, heatingType = this.getHeatingTypeId(extractedData["re_heating_id"]) || null, @@ -297,33 +325,44 @@ class RentalCrawler { ? extractedData["op_realEstates_newBuilding"] === "1" : null, elevator = realEstatePropertiesFromDescriptions.elevator, - water = realEstatePropertiesFromDescriptions.water, - electricity = realEstatePropertiesFromDescriptions.electricity, - drainageSystem = null, - registeredInZkBooks = null, + water = + realEstatePropertiesFromDescriptions.water || + realEstatePropertiesFromInfrastructure.water, + electricity = + realEstatePropertiesFromDescriptions.electricity || + realEstatePropertiesFromInfrastructure.electricity, + drainageSystem = + realEstatePropertiesFromInfrastructure.drainageSystem, + registeredInZkBooks = + extractedData["op_realEstates_ownerPermit"] === 1 || null, recentlyAdapted = null, parking = realEstatePropertiesFromDescriptions.parking || realEstatePropertiesFromSpaces.parking, garage = realEstatePropertiesFromSpaces.garage, - gas = null, + gas = realEstatePropertiesFromInfrastructure.gas, antiTheftDoor = realEstatePropertiesFromDescriptions.antiTheftDoor, airCondition = realEstatePropertiesFromDescriptions.airCondition, - phoneConnection = null, - cableTV = null, - internet = null, + phoneConnection = + realEstatePropertiesFromInfrastructure.phoneConnection, + cableTV = realEstatePropertiesFromInfrastructure.cableTV, + internet = realEstatePropertiesFromInfrastructure.internet, basementAttic = realEstatePropertiesFromSpaces.basementAttic, storeRoom = realEstatePropertiesFromSpaces.storeRoom, videoSurveillance = - realEstatePropertiesFromDescriptions.videoSurveillance, + realEstatePropertiesFromDescriptions.videoSurveillance || + realEstatePropertiesFromInfrastructure.videoSurveillance, alarm = realEstatePropertiesFromDescriptions.alarm, suitableForStudents = null, - includingBills = null, + includingBills = + extractedData["op_realEstates_utilitiesIncluded"] === "1" || null, animalsAllowed = null, pool = realEstatePropertiesFromDescriptions.pool, urbanPlanPermit = + extractedData["op_realEstates_locationPermit"] === "1" || realEstatePropertiesFromDescriptions.urbanPlanPermit, - buildingPermit = null, + buildingPermit = + extractedData["op_realEstates_buildingPermit"] === "1" || null, utilityConnection = realEstatePropertiesFromDescriptions.utilityConnection, distanceToRiver = null, @@ -640,6 +679,110 @@ class RentalCrawler { } } + getPropertiesFromInfrastructure(infrastructureIds) { + const result = { + electricity: null, + water: null, + gas: null, + drainageSystem: null, + phoneConnection: null, + internet: null, + videoSurveillance: null, + cableTV: null + }; + + for (const infrastructureId of infrastructureIds) { + switch (infrastructureId) { + case 1: + result.electricity = true; + break; + case 2: + result.water = true; + break; + case 4: + result.gas = true; + break; + case 5: + result.drainageSystem = true; + break; + case 7: + case 8: + result.phoneConnection = true; + break; + case 10: + result.internet = true; + break; + case 11: + result.cableTV = true; + break; + case 16: + case 17: + result.videoSurveillance = true; + break; + } + } + + return result; + } + + getFloorNumberFromFloorId(floorsIdText) { + // floorIdText can be array of numbers, separated by comma or number + // just extracting floor number from first element + + const floorsId = floorsIdText.split(","); + if (floorsId.length === 0) { + return null; + } + + const firstFloorId = parseInt(floorsId[0]); + + // 1 pod + // 2 sut + // 3 raz + // 4 pri + // 5 vpri + // 6 prv + // 7 dru + // 8 tre + // 9 čet + // 10 man + // 11 + // 12 pot + // 13 vpot + // 14 tav + // 15 pet + const floorNumber = [ + -1, + -1, + 0, + 0, + 1, + 1, + 2, + 3, + 4, + null, + null, + null, + null, + null, + 5 + ]; + + return floorNumber[firstFloorId - 1] || null; + } + + getNumberOfFloorsFromFloorId(floorsIdText) { + // floorIdText can be array of numbers, separated by comma or number + + const floorIds = floorsIdText.split(","); + if (floorIds.length === 0) { + return null; + } + + return floorIds.length; + } + async sleep(ms) { return new Promise(resolve => setTimeout(resolve, ms)); }