From b6d68db3a3b1c23eb7a9609a6e283cef23507117 Mon Sep 17 00:00:00 2001 From: Bilal Catic Date: Tue, 12 Nov 2019 21:39:28 +0100 Subject: [PATCH] improve real estate properties detection for aktido --- app/crawler/specificCrawlers/aktido.js | 226 +++++++++++++++++++++++-- 1 file changed, 212 insertions(+), 14 deletions(-) diff --git a/app/crawler/specificCrawlers/aktido.js b/app/crawler/specificCrawlers/aktido.js index d8fb517..2445566 100644 --- a/app/crawler/specificCrawlers/aktido.js +++ b/app/crawler/specificCrawlers/aktido.js @@ -261,6 +261,82 @@ class AktidoCrawler { }; } + const infrastructureIds = extractedData["re_infrastructure_id"] + .split(",") + .map(stringNumber => parseInt(stringNumber)); + + if (!Array.isArray(infrastructureIds)) { + throw { + message: + 'Expected array od infrastructures but "re_infrastructure_id" not found !' + }; + } + + const floorNoIds = extractedData["re_floorNO_id"] + .split(",") + .map(stringNumber => parseInt(stringNumber)); + + if (!Array.isArray(floorNoIds)) { + throw { + message: + 'Expected array od infrastructures but "re_floorNO_id" not found !' + }; + } + + // counting floor enums + // for (let i = 1; i < 10; i++) { + // const floorEnumsTitle = $( + // `body > div.container-fluid > div.container > div:nth-child(2) > div.col-xs-12.col-sm-12.col-md-12.col-lg-9.content-main > div:nth-child(1) > div > div > div.col-xs-12.col-sm-4.box-details > div.body > p:nth-child(${i}) > span:nth-child(1)` + // ) + // .text() + // .trim(); + // if (floorEnumsTitle === "Spratnost:") { + // const floorEnumsValue = $( + // `body > div.container-fluid > div.container > div:nth-child(2) > div.col-xs-12.col-sm-12.col-md-12.col-lg-9.content-main > div:nth-child(1) > div > div > div.col-xs-12.col-sm-4.box-details > div.body > p:nth-child(${i}) > span:nth-child(2)` + // ) + // .text() + // .trim() + // .split(","); + // + // console.log("=========="); + // floorNoIds.forEach((id, index) => { + // console.log("\t", id, " = ", floorEnumsValue[index]); + // }); + // break; + // } + // } + + // enumerating infrastructure - relation between id and infrastructure title + // let found = false; + // let infrastructureDescriptions = {}; + // for (let i = 1; i < 5; i++) { + // found = false; + // for (let j = 1; j < 10; j++) { + // const infrastructureTitle = $( + // `#b2 > div > div:nth-child(${i}) > div > ul > li:nth-child(${j}) > strong` + // ) + // .text() + // .trim(); + // if (infrastructureTitle === "Osnovna infrastruktura:") { + // found = true; + // + // const infrastructureValues = $( + // `#b2 > div > div:nth-child(${i}) > div > ul > li:nth-child(${j}) > div` + // ) + // .text() + // .trim() + // .split(","); + // + // infrastructureIds.forEach((id, index) => { + // infrastructureDescriptions[id] = infrastructureValues[index]; + // }); + // } + // } + // if (found) { + // break; + // } + // } + const realEstatePropertiesFromDescriptions = this.getPropertiesFromDescriptions( descriptionIds ); @@ -268,12 +344,19 @@ class AktidoCrawler { spaceIds ); + const realEstatePropertiesFromInfrastructure = this.getPropertiesFromInfrastructure( + infrastructureIds + ); + let numberOfRooms = parseInt(extractedData["re_realEstates_roomsNO"]) + parseInt(extractedData["re_realEstates_bedroomNO"]) || null, numberOfFloors = - parseInt(extractedData["re_realEstates_floorsNO"]) || null, // Check this for HOUSE - floor = parseInt(extractedData["re_realEstates_floorNO"]) || null, + parseInt(extractedData["re_realEstates_floorsNO"]) || + this.getNumberOfFloorsFromFloorId(extractedData["re_floorNO_id"]), + floor = + parseInt(extractedData["re_realEstates_floorNO"]) || + this.getFloorNumberFromFloorId(extractedData["re_floorNO_id"]), accessRoadType = realEstatePropertiesFromDescriptions.accessRoadType, heatingType = this.getHeatingTypeId(extractedData["re_heating_id"]) || null, @@ -285,33 +368,44 @@ class AktidoCrawler { ? extractedData["op_realEstates_newBuilding"] === "1" : null, elevator = realEstatePropertiesFromDescriptions.elevator, - water = realEstatePropertiesFromDescriptions.water, - electricity = realEstatePropertiesFromDescriptions.electricity, - drainageSystem = null, - registeredInZkBooks = null, + water = + realEstatePropertiesFromDescriptions.water || + realEstatePropertiesFromInfrastructure.water, + electricity = + realEstatePropertiesFromDescriptions.electricity || + realEstatePropertiesFromInfrastructure.electricity, + drainageSystem = + realEstatePropertiesFromInfrastructure.drainageSystem, + registeredInZkBooks = + extractedData["op_realEstates_ownerPermit"] === 1 || null, recentlyAdapted = null, parking = realEstatePropertiesFromDescriptions.parking || realEstatePropertiesFromSpaces.parking, garage = realEstatePropertiesFromSpaces.garage, - gas = null, + gas = realEstatePropertiesFromInfrastructure.gas, antiTheftDoor = realEstatePropertiesFromDescriptions.antiTheftDoor, airCondition = realEstatePropertiesFromDescriptions.airCondition, - phoneConnection = null, - cableTV = null, - internet = null, + phoneConnection = + realEstatePropertiesFromInfrastructure.phoneConnection, + cableTV = realEstatePropertiesFromInfrastructure.cableTV, + internet = realEstatePropertiesFromInfrastructure.internet, basementAttic = realEstatePropertiesFromSpaces.basementAttic, storeRoom = realEstatePropertiesFromSpaces.storeRoom, videoSurveillance = - realEstatePropertiesFromDescriptions.videoSurveillance, + realEstatePropertiesFromDescriptions.videoSurveillance || + realEstatePropertiesFromInfrastructure.videoSurveillance, alarm = realEstatePropertiesFromDescriptions.alarm, suitableForStudents = null, - includingBills = null, + includingBills = + extractedData["op_realEstates_utilitiesIncluded"] === "1" || null, animalsAllowed = null, pool = realEstatePropertiesFromDescriptions.pool, urbanPlanPermit = + extractedData["op_realEstates_locationPermit"] === "1" || realEstatePropertiesFromDescriptions.urbanPlanPermit, - buildingPermit = null, + buildingPermit = + extractedData["op_realEstates_buildingPermit"] === "1" || null, utilityConnection = realEstatePropertiesFromDescriptions.utilityConnection, distanceToRiver = null, @@ -625,6 +719,110 @@ class AktidoCrawler { } } + getPropertiesFromInfrastructure(infrastructureIds) { + const result = { + electricity: null, + water: null, + gas: null, + drainageSystem: null, + phoneConnection: null, + internet: null, + videoSurveillance: null, + cableTV: null + }; + + for (const infrastructureId of infrastructureIds) { + switch (infrastructureId) { + case 1: + result.electricity = true; + break; + case 2: + result.water = true; + break; + case 4: + result.gas = true; + break; + case 5: + result.drainageSystem = true; + break; + case 7: + case 8: + result.phoneConnection = true; + break; + case 10: + result.internet = true; + break; + case 11: + result.cableTV = true; + break; + case 16: + case 17: + result.videoSurveillance = true; + break; + } + } + + return result; + } + + getFloorNumberFromFloorId(floorsIdText) { + // floorIdText can be array of numbers, separated by comma or number + // just extracting floor number from first element + + const floorsId = floorsIdText.split(","); + if (floorsId.length === 0) { + return null; + } + + const firstFloorId = parseInt(floorsId[0]); + + // 1 pod + // 2 sut + // 3 raz + // 4 pri + // 5 vpri + // 6 prv + // 7 dru + // 8 tre + // 9 čet + // 10 man + // 11 + // 12 pot + // 13 vpot + // 14 tav + // 15 pet + const floorNumber = [ + -1, + -1, + 0, + 0, + 1, + 1, + 2, + 3, + 4, + null, + null, + null, + null, + null, + 5 + ]; + + return floorNumber[firstFloorId - 1] || null; + } + + getNumberOfFloorsFromFloorId(floorsIdText) { + // floorIdText can be array of numbers, separated by comma or number + + const floorIds = floorsIdText.split(","); + if (floorIds.length === 0) { + return null; + } + + return floorIds.length; + } + async sleep(ms) { return new Promise(resolve => setTimeout(resolve, ms)); } @@ -637,7 +835,7 @@ class AktidoCrawler { // } //For now, we use only Postgres saver, so ... - return await savers[0].save(results); + return savers[0].save(results); //so that we can use some sequelize options and information when data is inserted } }