From debdd01b2866d71eb75c09da8c1a7b9ec4481091 Mon Sep 17 00:00:00 2001 From: Bilal Catic Date: Mon, 11 Nov 2019 17:15:46 +0100 Subject: [PATCH] add new fields to the Rental crawler --- app/crawler/specificCrawlers/rental.js | 294 ++++++++++++++++++++++++- 1 file changed, 291 insertions(+), 3 deletions(-) diff --git a/app/crawler/specificCrawlers/rental.js b/app/crawler/specificCrawlers/rental.js index 668cff2..38a2f5e 100644 --- a/app/crawler/specificCrawlers/rental.js +++ b/app/crawler/specificCrawlers/rental.js @@ -11,7 +11,10 @@ const { AD_CATEGORY, AD_AGENCY, AD_STATUS, - CRAWLER_AD_TYPE + CRAWLER_AD_TYPE, + HEATING_TYPE, + ACCESS_ROAD_TYPE, + FURNISHING_TYPE } = require("../../common/enums"); const { @@ -215,6 +218,7 @@ class RentalCrawler { const jsonData = scriptElement[0].children[0].data.substring(20); const parsedJsonData = JSON.parse(jsonData); extractedData = parsedJsonData[0]; + // console.log(extractedData); } catch (e) { throw { message: "Can't find ad data JSON" }; } @@ -237,6 +241,97 @@ class RentalCrawler { }; } + const descriptionIds = extractedData["re_descriptions_id"] + .split(",") + .map(stringNumber => parseInt(stringNumber)); + + if (!Array.isArray(descriptionIds)) { + throw { + message: + 'Expected array od descriptions but "re_descriptions_id" not found !' + }; + } + + const spaceIds = extractedData["re_spaces_id"] + .split(",") + .map(stringNumber => parseInt(stringNumber)); + + if (!Array.isArray(spaceIds)) { + throw { + message: 'Expected array od spaces but "re_spaces_id" not found !' + }; + } + + const numberOfViewsAgencySelector = $( + "body > div > div.container > div.row.content-top > div.col-xs-12.col-sm-12.col-md-9 > div > div.box-viewcount" + ); + + // number of views is written as : "Broj pregledavanja: NNN" + const numberOfViewsAgencyFullText = numberOfViewsAgencySelector + .text() + .trim(); + + const numberOfViewsAgencyParts = numberOfViewsAgencyFullText.split(":"); + + const realEstatePropertiesFromDescriptions = this.getPropertiesFromDescriptions( + descriptionIds + ); + const realEstatePropertiesFromSpaces = this.getPropertiesFromSpaces( + spaceIds + ); + + let numberOfRooms = + parseInt(extractedData["re_realEstates_roomsNO"]) + + parseInt(extractedData["re_realEstates_bedroomNO"]) || null, + numberOfFloors = + parseInt(extractedData["re_realEstates_floorsNO"]) || null, // Check this for HOUSE + floor = parseInt(extractedData["re_realEstates_floorNO"]) || null, + accessRoadType = realEstatePropertiesFromDescriptions.accessRoadType, + heatingType = + this.getHeatingTypeId(extractedData["re_heating_id"]) || null, + furnishingType = realEstatePropertiesFromDescriptions.furnishingType, + balcony = + realEstatePropertiesFromDescriptions.balcony || + realEstatePropertiesFromSpaces.balcony, + newBuilding = extractedData["op_realEstates_newBuilding"] + ? extractedData["op_realEstates_newBuilding"] === "1" + : null, + elevator = realEstatePropertiesFromDescriptions.elevator, + water = realEstatePropertiesFromDescriptions.water, + electricity = realEstatePropertiesFromDescriptions.electricity, + drainageSystem = null, + registeredInZkBooks = null, + recentlyAdapted = null, + parking = + realEstatePropertiesFromDescriptions.parking || + realEstatePropertiesFromSpaces.parking, + garage = realEstatePropertiesFromSpaces.garage, + gas = null, + antiTheftDoor = realEstatePropertiesFromDescriptions.antiTheftDoor, + airCondition = realEstatePropertiesFromDescriptions.airCondition, + phoneConnection = null, + cableTV = null, + internet = null, + basementAttic = realEstatePropertiesFromSpaces.basementAttic, + storeRoom = realEstatePropertiesFromSpaces.storeRoom, + videoSurveillance = + realEstatePropertiesFromDescriptions.videoSurveillance, + alarm = realEstatePropertiesFromDescriptions.alarm, + suitableForStudents = null, + includingBills = null, + animalsAllowed = null, + pool = realEstatePropertiesFromDescriptions.pool, + urbanPlanPermit = + realEstatePropertiesFromDescriptions.urbanPlanPermit, + buildingPermit = null, + utilityConnection = + realEstatePropertiesFromDescriptions.utilityConnection, + distanceToRiver = null, + numberOfViewsAgency = + numberOfViewsAgencyParts.length > 1 + ? parseInt(numberOfViewsAgencyParts[1]) + : null; + const title = extractedData["re_realEstates_portalName"]; const extractedPrice = parseFloat( extractedData["re_realEstates_price"] @@ -303,7 +398,42 @@ class RentalCrawler { locationLong, adStatus, publishedDate: publishedDateMoment.toISOString(), - renewedDate: renewedDateMoment.toISOString() + renewedDate: renewedDateMoment.toISOString(), + numberOfRooms, + numberOfFloors, + floor, + accessRoadType, + heatingType, + furnishingType, + balcony, + newBuilding, + elevator, + water, + electricity, + drainageSystem, + registeredInZkBooks, + recentlyAdapted, + parking, + garage, + gas, + antiTheftDoor, + airCondition, + phoneConnection, + cableTV, + internet, + basementAttic, + storeRoom, + videoSurveillance, + alarm, + suitableForStudents, + includingBills, + animalsAllowed, + pool, + urbanPlanPermit, + buildingPermit, + utilityConnection, + distanceToRiver, + numberOfViewsAgency }; return data; @@ -350,6 +480,164 @@ class RentalCrawler { } } + getPropertiesFromDescriptions(descriptionIds) { + const result = { + accessRoadType: null, + furnishingType: null, + balcony: null, + elevator: null, + parking: null, + antiTheftDoor: null, + airCondition: null, + videoSurveillance: null, + alarm: null, + pool: null, + urbanPlanPermit: null, + utilityConnection: null, + water: null, + electricity: null + }; + + for (const descriptionId of descriptionIds) { + switch (descriptionId) { + case 16: + result.furnishingType = FURNISHING_TYPE.NOT_FURNISHED.id; + break; + case 17: + result.furnishingType = FURNISHING_TYPE.HALF_FURNISHED.id; + break; + case 1: + case 28: + result.furnishingType = FURNISHING_TYPE.FURNISHED.id; + break; + case 14: + result.elevator = true; + break; + case 39: + result.electricity = true; + break; + case 40: + result.water = true; + break; + case 41: + case 58: + result.accessRoadType = ACCESS_ROAD_TYPE.ASPHALT.id; + break; + case 26: + result.balcony = true; + break; + case 62: + result.parking = true; + break; + case 3: + result.antiTheftDoor = true; + break; + case 2: + case 21: + result.airCondition = true; + break; + case 4: + result.alarm = true; + break; + case 55: + result.videoSurveillance = true; + break; + case 9: + result.pool = true; + break; + case 60: + result.urbanPlanPermit = true; + break; + case 38: + result.utilityConnection = true; + break; + } + } + + return result; + } + + getPropertiesFromSpaces(spaceIds) { + const result = { + balcony: null, + parking: null, + garage: null, + basementAttic: null, + storeRoom: null + }; + + for (const spaceId of spaceIds) { + switch (spaceId) { + case 36: + case 12: + result.parking = true; + break; + case 1: + case 2: + case 3: + result.balcony = true; + break; + case 4: + case 30: + result.garage = true; + break; + case 9: + case 10: + result.storeRoom = true; + break; + case 18: + case 34: + case 37: + case 27: + result.basementAttic = true; + break; + } + } + + return result; + } + + getHeatingTypeId(heatingRentalId) { + // heatingRentalId can have multiple values, like: "1, 2, 3", parseInt will take first integer value + const heatingId = parseInt(heatingRentalId); + switch (heatingId) { + case 27: + case 16: + return HEATING_TYPE.GAS.id; + case 4: + return HEATING_TYPE.CENTRAL_GAS.id; + case 3: + case 23: + case 7: + case 8: + case 9: + case 10: + return HEATING_TYPE.CENTRAL_BOILER.id; + case 2: + case 13: + case 30: + case 17: + case 29: + case 31: + return HEATING_TYPE.ELECTRICITY.id; + case 24: + case 25: + return HEATING_TYPE.CENTRAL_CITY.id; + case 26: + case 21: + case 20: + return HEATING_TYPE.WOOD.id; + case 28: + case 19: + return HEATING_TYPE.HEAT_PUMP.id; + case 14: + case 32: + return HEATING_TYPE.OTHER.id; + default: + return null; + } + } + async sleep(ms) { return new Promise(resolve => setTimeout(resolve, ms)); } @@ -362,7 +650,7 @@ class RentalCrawler { // } //For now, we use only Postgres saver, so ... - return await savers[0].save(results); + return savers[0].save(results); //so that we can use some sequelize options and information when data is inserted } }