add new fields to the Rental crawler
This commit is contained in:
@@ -11,7 +11,10 @@ const {
|
||||
AD_CATEGORY,
|
||||
AD_AGENCY,
|
||||
AD_STATUS,
|
||||
CRAWLER_AD_TYPE
|
||||
CRAWLER_AD_TYPE,
|
||||
HEATING_TYPE,
|
||||
ACCESS_ROAD_TYPE,
|
||||
FURNISHING_TYPE
|
||||
} = require("../../common/enums");
|
||||
|
||||
const {
|
||||
@@ -215,6 +218,7 @@ class RentalCrawler {
|
||||
const jsonData = scriptElement[0].children[0].data.substring(20);
|
||||
const parsedJsonData = JSON.parse(jsonData);
|
||||
extractedData = parsedJsonData[0];
|
||||
// console.log(extractedData);
|
||||
} catch (e) {
|
||||
throw { message: "Can't find ad data JSON" };
|
||||
}
|
||||
@@ -237,6 +241,97 @@ class RentalCrawler {
|
||||
};
|
||||
}
|
||||
|
||||
const descriptionIds = extractedData["re_descriptions_id"]
|
||||
.split(",")
|
||||
.map(stringNumber => parseInt(stringNumber));
|
||||
|
||||
if (!Array.isArray(descriptionIds)) {
|
||||
throw {
|
||||
message:
|
||||
'Expected array od descriptions but "re_descriptions_id" not found !'
|
||||
};
|
||||
}
|
||||
|
||||
const spaceIds = extractedData["re_spaces_id"]
|
||||
.split(",")
|
||||
.map(stringNumber => parseInt(stringNumber));
|
||||
|
||||
if (!Array.isArray(spaceIds)) {
|
||||
throw {
|
||||
message: 'Expected array od spaces but "re_spaces_id" not found !'
|
||||
};
|
||||
}
|
||||
|
||||
const numberOfViewsAgencySelector = $(
|
||||
"body > div > div.container > div.row.content-top > div.col-xs-12.col-sm-12.col-md-9 > div > div.box-viewcount"
|
||||
);
|
||||
|
||||
// number of views is written as : "Broj pregledavanja: NNN"
|
||||
const numberOfViewsAgencyFullText = numberOfViewsAgencySelector
|
||||
.text()
|
||||
.trim();
|
||||
|
||||
const numberOfViewsAgencyParts = numberOfViewsAgencyFullText.split(":");
|
||||
|
||||
const realEstatePropertiesFromDescriptions = this.getPropertiesFromDescriptions(
|
||||
descriptionIds
|
||||
);
|
||||
const realEstatePropertiesFromSpaces = this.getPropertiesFromSpaces(
|
||||
spaceIds
|
||||
);
|
||||
|
||||
let numberOfRooms =
|
||||
parseInt(extractedData["re_realEstates_roomsNO"]) +
|
||||
parseInt(extractedData["re_realEstates_bedroomNO"]) || null,
|
||||
numberOfFloors =
|
||||
parseInt(extractedData["re_realEstates_floorsNO"]) || null, // Check this for HOUSE
|
||||
floor = parseInt(extractedData["re_realEstates_floorNO"]) || null,
|
||||
accessRoadType = realEstatePropertiesFromDescriptions.accessRoadType,
|
||||
heatingType =
|
||||
this.getHeatingTypeId(extractedData["re_heating_id"]) || null,
|
||||
furnishingType = realEstatePropertiesFromDescriptions.furnishingType,
|
||||
balcony =
|
||||
realEstatePropertiesFromDescriptions.balcony ||
|
||||
realEstatePropertiesFromSpaces.balcony,
|
||||
newBuilding = extractedData["op_realEstates_newBuilding"]
|
||||
? extractedData["op_realEstates_newBuilding"] === "1"
|
||||
: null,
|
||||
elevator = realEstatePropertiesFromDescriptions.elevator,
|
||||
water = realEstatePropertiesFromDescriptions.water,
|
||||
electricity = realEstatePropertiesFromDescriptions.electricity,
|
||||
drainageSystem = null,
|
||||
registeredInZkBooks = null,
|
||||
recentlyAdapted = null,
|
||||
parking =
|
||||
realEstatePropertiesFromDescriptions.parking ||
|
||||
realEstatePropertiesFromSpaces.parking,
|
||||
garage = realEstatePropertiesFromSpaces.garage,
|
||||
gas = null,
|
||||
antiTheftDoor = realEstatePropertiesFromDescriptions.antiTheftDoor,
|
||||
airCondition = realEstatePropertiesFromDescriptions.airCondition,
|
||||
phoneConnection = null,
|
||||
cableTV = null,
|
||||
internet = null,
|
||||
basementAttic = realEstatePropertiesFromSpaces.basementAttic,
|
||||
storeRoom = realEstatePropertiesFromSpaces.storeRoom,
|
||||
videoSurveillance =
|
||||
realEstatePropertiesFromDescriptions.videoSurveillance,
|
||||
alarm = realEstatePropertiesFromDescriptions.alarm,
|
||||
suitableForStudents = null,
|
||||
includingBills = null,
|
||||
animalsAllowed = null,
|
||||
pool = realEstatePropertiesFromDescriptions.pool,
|
||||
urbanPlanPermit =
|
||||
realEstatePropertiesFromDescriptions.urbanPlanPermit,
|
||||
buildingPermit = null,
|
||||
utilityConnection =
|
||||
realEstatePropertiesFromDescriptions.utilityConnection,
|
||||
distanceToRiver = null,
|
||||
numberOfViewsAgency =
|
||||
numberOfViewsAgencyParts.length > 1
|
||||
? parseInt(numberOfViewsAgencyParts[1])
|
||||
: null;
|
||||
|
||||
const title = extractedData["re_realEstates_portalName"];
|
||||
const extractedPrice = parseFloat(
|
||||
extractedData["re_realEstates_price"]
|
||||
@@ -303,7 +398,42 @@ class RentalCrawler {
|
||||
locationLong,
|
||||
adStatus,
|
||||
publishedDate: publishedDateMoment.toISOString(),
|
||||
renewedDate: renewedDateMoment.toISOString()
|
||||
renewedDate: renewedDateMoment.toISOString(),
|
||||
numberOfRooms,
|
||||
numberOfFloors,
|
||||
floor,
|
||||
accessRoadType,
|
||||
heatingType,
|
||||
furnishingType,
|
||||
balcony,
|
||||
newBuilding,
|
||||
elevator,
|
||||
water,
|
||||
electricity,
|
||||
drainageSystem,
|
||||
registeredInZkBooks,
|
||||
recentlyAdapted,
|
||||
parking,
|
||||
garage,
|
||||
gas,
|
||||
antiTheftDoor,
|
||||
airCondition,
|
||||
phoneConnection,
|
||||
cableTV,
|
||||
internet,
|
||||
basementAttic,
|
||||
storeRoom,
|
||||
videoSurveillance,
|
||||
alarm,
|
||||
suitableForStudents,
|
||||
includingBills,
|
||||
animalsAllowed,
|
||||
pool,
|
||||
urbanPlanPermit,
|
||||
buildingPermit,
|
||||
utilityConnection,
|
||||
distanceToRiver,
|
||||
numberOfViewsAgency
|
||||
};
|
||||
|
||||
return data;
|
||||
@@ -350,6 +480,164 @@ class RentalCrawler {
|
||||
}
|
||||
}
|
||||
|
||||
getPropertiesFromDescriptions(descriptionIds) {
|
||||
const result = {
|
||||
accessRoadType: null,
|
||||
furnishingType: null,
|
||||
balcony: null,
|
||||
elevator: null,
|
||||
parking: null,
|
||||
antiTheftDoor: null,
|
||||
airCondition: null,
|
||||
videoSurveillance: null,
|
||||
alarm: null,
|
||||
pool: null,
|
||||
urbanPlanPermit: null,
|
||||
utilityConnection: null,
|
||||
water: null,
|
||||
electricity: null
|
||||
};
|
||||
|
||||
for (const descriptionId of descriptionIds) {
|
||||
switch (descriptionId) {
|
||||
case 16:
|
||||
result.furnishingType = FURNISHING_TYPE.NOT_FURNISHED.id;
|
||||
break;
|
||||
case 17:
|
||||
result.furnishingType = FURNISHING_TYPE.HALF_FURNISHED.id;
|
||||
break;
|
||||
case 1:
|
||||
case 28:
|
||||
result.furnishingType = FURNISHING_TYPE.FURNISHED.id;
|
||||
break;
|
||||
case 14:
|
||||
result.elevator = true;
|
||||
break;
|
||||
case 39:
|
||||
result.electricity = true;
|
||||
break;
|
||||
case 40:
|
||||
result.water = true;
|
||||
break;
|
||||
case 41:
|
||||
case 58:
|
||||
result.accessRoadType = ACCESS_ROAD_TYPE.ASPHALT.id;
|
||||
break;
|
||||
case 26:
|
||||
result.balcony = true;
|
||||
break;
|
||||
case 62:
|
||||
result.parking = true;
|
||||
break;
|
||||
case 3:
|
||||
result.antiTheftDoor = true;
|
||||
break;
|
||||
case 2:
|
||||
case 21:
|
||||
result.airCondition = true;
|
||||
break;
|
||||
case 4:
|
||||
result.alarm = true;
|
||||
break;
|
||||
case 55:
|
||||
result.videoSurveillance = true;
|
||||
break;
|
||||
case 9:
|
||||
result.pool = true;
|
||||
break;
|
||||
case 60:
|
||||
result.urbanPlanPermit = true;
|
||||
break;
|
||||
case 38:
|
||||
result.utilityConnection = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
getPropertiesFromSpaces(spaceIds) {
|
||||
const result = {
|
||||
balcony: null,
|
||||
parking: null,
|
||||
garage: null,
|
||||
basementAttic: null,
|
||||
storeRoom: null
|
||||
};
|
||||
|
||||
for (const spaceId of spaceIds) {
|
||||
switch (spaceId) {
|
||||
case 36:
|
||||
case 12:
|
||||
result.parking = true;
|
||||
break;
|
||||
case 1:
|
||||
case 2:
|
||||
case 3:
|
||||
result.balcony = true;
|
||||
break;
|
||||
case 4:
|
||||
case 30:
|
||||
result.garage = true;
|
||||
break;
|
||||
case 9:
|
||||
case 10:
|
||||
result.storeRoom = true;
|
||||
break;
|
||||
case 18:
|
||||
case 34:
|
||||
case 37:
|
||||
case 27:
|
||||
result.basementAttic = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
getHeatingTypeId(heatingRentalId) {
|
||||
// heatingRentalId can have multiple values, like: "1, 2, 3", parseInt will take first integer value
|
||||
const heatingId = parseInt(heatingRentalId);
|
||||
switch (heatingId) {
|
||||
case 27:
|
||||
case 16:
|
||||
return HEATING_TYPE.GAS.id;
|
||||
case 4:
|
||||
return HEATING_TYPE.CENTRAL_GAS.id;
|
||||
case 3:
|
||||
case 23:
|
||||
case 7:
|
||||
case 8:
|
||||
case 9:
|
||||
case 10:
|
||||
return HEATING_TYPE.CENTRAL_BOILER.id;
|
||||
case 2:
|
||||
case 13:
|
||||
case 30:
|
||||
case 17:
|
||||
case 29:
|
||||
case 31:
|
||||
return HEATING_TYPE.ELECTRICITY.id;
|
||||
case 24:
|
||||
case 25:
|
||||
return HEATING_TYPE.CENTRAL_CITY.id;
|
||||
case 26:
|
||||
case 21:
|
||||
case 20:
|
||||
return HEATING_TYPE.WOOD.id;
|
||||
case 28:
|
||||
case 19:
|
||||
return HEATING_TYPE.HEAT_PUMP.id;
|
||||
case 14:
|
||||
case 32:
|
||||
return HEATING_TYPE.OTHER.id;
|
||||
default:
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
async sleep(ms) {
|
||||
return new Promise(resolve => setTimeout(resolve, ms));
|
||||
}
|
||||
@@ -362,7 +650,7 @@ class RentalCrawler {
|
||||
// }
|
||||
|
||||
//For now, we use only Postgres saver, so ...
|
||||
return await savers[0].save(results);
|
||||
return savers[0].save(results);
|
||||
//so that we can use some sequelize options and information when data is inserted
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user