add new fields to the Rental crawler

This commit is contained in:
Bilal Catic
2019-11-11 17:15:46 +01:00
parent 9e10800b02
commit debdd01b28

View File

@@ -11,7 +11,10 @@ const {
AD_CATEGORY,
AD_AGENCY,
AD_STATUS,
CRAWLER_AD_TYPE
CRAWLER_AD_TYPE,
HEATING_TYPE,
ACCESS_ROAD_TYPE,
FURNISHING_TYPE
} = require("../../common/enums");
const {
@@ -215,6 +218,7 @@ class RentalCrawler {
const jsonData = scriptElement[0].children[0].data.substring(20);
const parsedJsonData = JSON.parse(jsonData);
extractedData = parsedJsonData[0];
// console.log(extractedData);
} catch (e) {
throw { message: "Can't find ad data JSON" };
}
@@ -237,6 +241,97 @@ class RentalCrawler {
};
}
const descriptionIds = extractedData["re_descriptions_id"]
.split(",")
.map(stringNumber => parseInt(stringNumber));
if (!Array.isArray(descriptionIds)) {
throw {
message:
'Expected array od descriptions but "re_descriptions_id" not found !'
};
}
const spaceIds = extractedData["re_spaces_id"]
.split(",")
.map(stringNumber => parseInt(stringNumber));
if (!Array.isArray(spaceIds)) {
throw {
message: 'Expected array od spaces but "re_spaces_id" not found !'
};
}
const numberOfViewsAgencySelector = $(
"body > div > div.container > div.row.content-top > div.col-xs-12.col-sm-12.col-md-9 > div > div.box-viewcount"
);
// number of views is written as : "Broj pregledavanja: NNN"
const numberOfViewsAgencyFullText = numberOfViewsAgencySelector
.text()
.trim();
const numberOfViewsAgencyParts = numberOfViewsAgencyFullText.split(":");
const realEstatePropertiesFromDescriptions = this.getPropertiesFromDescriptions(
descriptionIds
);
const realEstatePropertiesFromSpaces = this.getPropertiesFromSpaces(
spaceIds
);
let numberOfRooms =
parseInt(extractedData["re_realEstates_roomsNO"]) +
parseInt(extractedData["re_realEstates_bedroomNO"]) || null,
numberOfFloors =
parseInt(extractedData["re_realEstates_floorsNO"]) || null, // Check this for HOUSE
floor = parseInt(extractedData["re_realEstates_floorNO"]) || null,
accessRoadType = realEstatePropertiesFromDescriptions.accessRoadType,
heatingType =
this.getHeatingTypeId(extractedData["re_heating_id"]) || null,
furnishingType = realEstatePropertiesFromDescriptions.furnishingType,
balcony =
realEstatePropertiesFromDescriptions.balcony ||
realEstatePropertiesFromSpaces.balcony,
newBuilding = extractedData["op_realEstates_newBuilding"]
? extractedData["op_realEstates_newBuilding"] === "1"
: null,
elevator = realEstatePropertiesFromDescriptions.elevator,
water = realEstatePropertiesFromDescriptions.water,
electricity = realEstatePropertiesFromDescriptions.electricity,
drainageSystem = null,
registeredInZkBooks = null,
recentlyAdapted = null,
parking =
realEstatePropertiesFromDescriptions.parking ||
realEstatePropertiesFromSpaces.parking,
garage = realEstatePropertiesFromSpaces.garage,
gas = null,
antiTheftDoor = realEstatePropertiesFromDescriptions.antiTheftDoor,
airCondition = realEstatePropertiesFromDescriptions.airCondition,
phoneConnection = null,
cableTV = null,
internet = null,
basementAttic = realEstatePropertiesFromSpaces.basementAttic,
storeRoom = realEstatePropertiesFromSpaces.storeRoom,
videoSurveillance =
realEstatePropertiesFromDescriptions.videoSurveillance,
alarm = realEstatePropertiesFromDescriptions.alarm,
suitableForStudents = null,
includingBills = null,
animalsAllowed = null,
pool = realEstatePropertiesFromDescriptions.pool,
urbanPlanPermit =
realEstatePropertiesFromDescriptions.urbanPlanPermit,
buildingPermit = null,
utilityConnection =
realEstatePropertiesFromDescriptions.utilityConnection,
distanceToRiver = null,
numberOfViewsAgency =
numberOfViewsAgencyParts.length > 1
? parseInt(numberOfViewsAgencyParts[1])
: null;
const title = extractedData["re_realEstates_portalName"];
const extractedPrice = parseFloat(
extractedData["re_realEstates_price"]
@@ -303,7 +398,42 @@ class RentalCrawler {
locationLong,
adStatus,
publishedDate: publishedDateMoment.toISOString(),
renewedDate: renewedDateMoment.toISOString()
renewedDate: renewedDateMoment.toISOString(),
numberOfRooms,
numberOfFloors,
floor,
accessRoadType,
heatingType,
furnishingType,
balcony,
newBuilding,
elevator,
water,
electricity,
drainageSystem,
registeredInZkBooks,
recentlyAdapted,
parking,
garage,
gas,
antiTheftDoor,
airCondition,
phoneConnection,
cableTV,
internet,
basementAttic,
storeRoom,
videoSurveillance,
alarm,
suitableForStudents,
includingBills,
animalsAllowed,
pool,
urbanPlanPermit,
buildingPermit,
utilityConnection,
distanceToRiver,
numberOfViewsAgency
};
return data;
@@ -350,6 +480,164 @@ class RentalCrawler {
}
}
getPropertiesFromDescriptions(descriptionIds) {
const result = {
accessRoadType: null,
furnishingType: null,
balcony: null,
elevator: null,
parking: null,
antiTheftDoor: null,
airCondition: null,
videoSurveillance: null,
alarm: null,
pool: null,
urbanPlanPermit: null,
utilityConnection: null,
water: null,
electricity: null
};
for (const descriptionId of descriptionIds) {
switch (descriptionId) {
case 16:
result.furnishingType = FURNISHING_TYPE.NOT_FURNISHED.id;
break;
case 17:
result.furnishingType = FURNISHING_TYPE.HALF_FURNISHED.id;
break;
case 1:
case 28:
result.furnishingType = FURNISHING_TYPE.FURNISHED.id;
break;
case 14:
result.elevator = true;
break;
case 39:
result.electricity = true;
break;
case 40:
result.water = true;
break;
case 41:
case 58:
result.accessRoadType = ACCESS_ROAD_TYPE.ASPHALT.id;
break;
case 26:
result.balcony = true;
break;
case 62:
result.parking = true;
break;
case 3:
result.antiTheftDoor = true;
break;
case 2:
case 21:
result.airCondition = true;
break;
case 4:
result.alarm = true;
break;
case 55:
result.videoSurveillance = true;
break;
case 9:
result.pool = true;
break;
case 60:
result.urbanPlanPermit = true;
break;
case 38:
result.utilityConnection = true;
break;
}
}
return result;
}
getPropertiesFromSpaces(spaceIds) {
const result = {
balcony: null,
parking: null,
garage: null,
basementAttic: null,
storeRoom: null
};
for (const spaceId of spaceIds) {
switch (spaceId) {
case 36:
case 12:
result.parking = true;
break;
case 1:
case 2:
case 3:
result.balcony = true;
break;
case 4:
case 30:
result.garage = true;
break;
case 9:
case 10:
result.storeRoom = true;
break;
case 18:
case 34:
case 37:
case 27:
result.basementAttic = true;
break;
}
}
return result;
}
getHeatingTypeId(heatingRentalId) {
// heatingRentalId can have multiple values, like: "1, 2, 3", parseInt will take first integer value
const heatingId = parseInt(heatingRentalId);
switch (heatingId) {
case 27:
case 16:
return HEATING_TYPE.GAS.id;
case 4:
return HEATING_TYPE.CENTRAL_GAS.id;
case 3:
case 23:
case 7:
case 8:
case 9:
case 10:
return HEATING_TYPE.CENTRAL_BOILER.id;
case 2:
case 13:
case 30:
case 17:
case 29:
case 31:
return HEATING_TYPE.ELECTRICITY.id;
case 24:
case 25:
return HEATING_TYPE.CENTRAL_CITY.id;
case 26:
case 21:
case 20:
return HEATING_TYPE.WOOD.id;
case 28:
case 19:
return HEATING_TYPE.HEAT_PUMP.id;
case 14:
case 32:
return HEATING_TYPE.OTHER.id;
default:
return null;
}
}
async sleep(ms) {
return new Promise(resolve => setTimeout(resolve, ms));
}
@@ -362,7 +650,7 @@ class RentalCrawler {
// }
//For now, we use only Postgres saver, so ...
return await savers[0].save(results);
return savers[0].save(results);
//so that we can use some sequelize options and information when data is inserted
}
}