add more fields to the Prostor real estates crawler
This commit is contained in:
@@ -9,7 +9,9 @@ const {
|
||||
AD_CATEGORY,
|
||||
AD_AGENCY,
|
||||
AD_STATUS,
|
||||
CRAWLER_AD_TYPE
|
||||
CRAWLER_AD_TYPE,
|
||||
FURNISHING_TYPE,
|
||||
HEATING_TYPE
|
||||
} = require("../../common/enums");
|
||||
|
||||
const {
|
||||
@@ -121,7 +123,7 @@ class ProstorCrawler {
|
||||
const urlAdTypePart = PROSTOR_ENUMS.PROSTOR_AD_TYPE[this.crawlerAdTypes];
|
||||
const urlCategoryPart = PROSTOR_ENUMS.PROSTOR_AD_CATEGORY[adCategory];
|
||||
if (urlAdTypePart !== undefined && urlCategoryPart !== undefined) {
|
||||
const urlPageToCrawl = `${this.baseUrl}?remove_sold=1${urlAdTypePart}${urlCategoryPart}`;
|
||||
const urlPageToCrawl = `${this.baseUrl}?remove_sold=0${urlAdTypePart}${urlCategoryPart}`;
|
||||
const listOfAllRealEstates = await this.extractRealEstates(
|
||||
urlPageToCrawl
|
||||
);
|
||||
@@ -179,7 +181,7 @@ class ProstorCrawler {
|
||||
}
|
||||
|
||||
async scrapeAd(realEstate) {
|
||||
const { lat, lng, property_name, price, size, link } = realEstate;
|
||||
const { lat, lng, property_name, price, size, link, status } = realEstate;
|
||||
const url = `https://prostor.ba${link}`;
|
||||
// console.log("[PROSTOR] Scraping : ", url);
|
||||
try {
|
||||
@@ -198,16 +200,6 @@ class ProstorCrawler {
|
||||
const prostorId = linkParts[4];
|
||||
|
||||
if (!adType || !realEstateType || !prostorId) {
|
||||
console.log(
|
||||
"adType: ",
|
||||
adType,
|
||||
" reType: ",
|
||||
realEstateType,
|
||||
" prostorId: ",
|
||||
prostorId,
|
||||
"url: ",
|
||||
url
|
||||
);
|
||||
return null;
|
||||
}
|
||||
|
||||
@@ -218,52 +210,70 @@ class ProstorCrawler {
|
||||
|
||||
$(allDataSelector)
|
||||
.find("p")
|
||||
.each((i, elem) => {
|
||||
const propertyElement = $(elem)
|
||||
.each((i, element) => {
|
||||
const propertyElement = $(element)
|
||||
.text()
|
||||
.split(":")
|
||||
.map(text => text.trim());
|
||||
.map(text => text.trim().toLowerCase());
|
||||
|
||||
const propertyTitle = propertyElement[0];
|
||||
realEstateProperties[propertyTitle] = propertyElement[1];
|
||||
});
|
||||
|
||||
$(allDataSelector)
|
||||
.find("div.mb-2")
|
||||
.each((i, element) => {
|
||||
const propertyElement = $(element)
|
||||
.text()
|
||||
.trim()
|
||||
.toLowerCase();
|
||||
|
||||
realEstateProperties[propertyElement] = true;
|
||||
});
|
||||
|
||||
if (JSON.stringify(realEstateProperties) === JSON.stringify({})) {
|
||||
return null;
|
||||
}
|
||||
|
||||
let numberOfRooms =
|
||||
parseFloat(realEstateProperties["Broj soba"]) +
|
||||
parseFloat(realEstateProperties["Broj spavaćih soba"]) || null,
|
||||
parseFloat(realEstateProperties["broj soba"]) +
|
||||
parseFloat(realEstateProperties["broj spavaćih soba"]) || null,
|
||||
numberOfFloors = null,
|
||||
floor = null,
|
||||
accessRoadType = null,
|
||||
heatingType = null,
|
||||
heatingType = ProstorCrawler.getHeatingTypeId(realEstateProperties),
|
||||
furnishingType = null,
|
||||
balcony = null,
|
||||
balcony =
|
||||
realEstateProperties["balkon"] ||
|
||||
realEstateProperties["terasa"] ||
|
||||
realEstateProperties["lođa"] ||
|
||||
null,
|
||||
newBuilding = linkParts[1] === "novogradnja",
|
||||
elevator = null,
|
||||
water = null,
|
||||
electricity = null,
|
||||
drainageSystem = null,
|
||||
elevator = realEstateProperties["lift"] || null,
|
||||
water = realEstateProperties["voda"] || null,
|
||||
electricity = realEstateProperties["električna energija"] || null,
|
||||
drainageSystem = realEstateProperties["kanalizacija"] || null,
|
||||
registeredInZkBooks = null,
|
||||
recentlyAdapted = null,
|
||||
parking = null,
|
||||
garage = null,
|
||||
gas = null,
|
||||
antiTheftDoor = null,
|
||||
airCondition = null,
|
||||
phoneConnection = null,
|
||||
cableTV = null,
|
||||
internet = null,
|
||||
basementAttic = null,
|
||||
storeRoom = null,
|
||||
videoSurveillance = null,
|
||||
alarm = null,
|
||||
parking = realEstateProperties["parking"] || null,
|
||||
garage = realEstateProperties["garaža"] || null,
|
||||
gas = realEstateProperties["plin"] || null,
|
||||
antiTheftDoor = realEstateProperties["blindo vrata"] || null,
|
||||
airCondition = realEstateProperties["klima"] || null,
|
||||
phoneConnection = realEstateProperties["telefon"] || null,
|
||||
cableTV = realEstateProperties["kablovksa tv"] || null,
|
||||
internet =
|
||||
realEstateProperties["internet"] ||
|
||||
realEstateProperties["adsl"] ||
|
||||
null,
|
||||
basementAttic = realEstateProperties["podrum"] || null,
|
||||
storeRoom = realEstateProperties["ostava"] || null,
|
||||
videoSurveillance = realEstateProperties["video nadzor"],
|
||||
alarm = realEstateProperties["alarm"] || null,
|
||||
suitableForStudents = null,
|
||||
includingBills = null,
|
||||
animalsAllowed = null,
|
||||
pool = null,
|
||||
pool = realEstateProperties["bazen"] || null,
|
||||
urbanPlanPermit = null,
|
||||
buildingPermit = null,
|
||||
utilityConnection = null,
|
||||
@@ -278,7 +288,7 @@ class ProstorCrawler {
|
||||
// If there are two parts, that represents more real estates are sold
|
||||
// numberOfFloors is contained in second part, after / sign
|
||||
|
||||
const floorsArray = realEstateProperties["Sprat"].split(" - ");
|
||||
const floorsArray = realEstateProperties["sprat"].split(" - ");
|
||||
let floorText = "";
|
||||
if (floorsArray.length === 1) {
|
||||
const floorDescription = floorsArray[0].split("/");
|
||||
@@ -296,7 +306,7 @@ class ProstorCrawler {
|
||||
|
||||
if (isNaN(floor)) {
|
||||
// It was textual representation of floor, like "Pr", "Su" or similar
|
||||
switch (floorText.toLowerCase()) {
|
||||
switch (floorText) {
|
||||
case "pr":
|
||||
floor = 0;
|
||||
break;
|
||||
@@ -312,7 +322,15 @@ class ProstorCrawler {
|
||||
}
|
||||
}
|
||||
|
||||
const adStatus = AD_STATUS.STATUS_NORMAL;
|
||||
if (realEstateProperties["namješteno"]) {
|
||||
furnishingType = FURNISHING_TYPE.FURNISHED.id;
|
||||
} else if (realEstateProperties["polunamješteno"]) {
|
||||
furnishingType = FURNISHING_TYPE.HALF_FURNISHED.id;
|
||||
} else {
|
||||
furnishingType = FURNISHING_TYPE.NOT_FURNISHED.id;
|
||||
}
|
||||
|
||||
const adStatus = ProstorCrawler.getStatusId(status);
|
||||
const title = property_name;
|
||||
const parsedPrice = parseFloat(price.replace(/\./g, "")) || null;
|
||||
const parsedArea = parseFloat(size);
|
||||
@@ -332,7 +350,7 @@ class ProstorCrawler {
|
||||
shortDescription: "",
|
||||
longDescription: longDescription,
|
||||
streetNumber: 0,
|
||||
streetName: realEstateProperties["Adresa"],
|
||||
streetName: realEstateProperties["adresa"],
|
||||
locality: "",
|
||||
municipality: "",
|
||||
city: "",
|
||||
@@ -492,6 +510,50 @@ class ProstorCrawler {
|
||||
}
|
||||
}
|
||||
|
||||
static getHeatingTypeId(realEstateProperties) {
|
||||
const realEstatePropertiesKeys = Object.keys(realEstateProperties);
|
||||
for (const property of realEstatePropertiesKeys) {
|
||||
switch (property) {
|
||||
case "centralno toplane":
|
||||
return HEATING_TYPE.CENTRAL_CITY.id;
|
||||
case "etažno plinsko":
|
||||
return HEATING_TYPE.CENTRAL_GAS.id;
|
||||
case "termo blok":
|
||||
case "podno grijanje":
|
||||
return HEATING_TYPE.OTHER.id;
|
||||
case "etažno električno":
|
||||
case "konvektori":
|
||||
return HEATING_TYPE.ELECTRICITY.id;
|
||||
case "plinske peći":
|
||||
return HEATING_TYPE.GAS.id;
|
||||
case "vlastita kotlovnica":
|
||||
return HEATING_TYPE.CENTRAL_BOILER.id;
|
||||
case "toplotna pumpa":
|
||||
return HEATING_TYPE.HEAT_PUMP.id;
|
||||
case "kamin":
|
||||
return HEATING_TYPE.WOOD.id;
|
||||
default:
|
||||
//console.log("[PROSTOR] Nepoznato >>> [", property, "]");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static getStatusId(statusText) {
|
||||
switch (statusText) {
|
||||
case "":
|
||||
return AD_STATUS.STATUS_NORMAL;
|
||||
case "Rezervisano":
|
||||
return AD_STATUS.STATUS_RESERVED;
|
||||
case "Prodano":
|
||||
return AD_STATUS.STATUS_SOLD;
|
||||
case "Iznajmljeno":
|
||||
return AD_STATUS.STATUS_RENTED;
|
||||
default:
|
||||
console.log("[PROSTOR] Unknown AD_STATUS : [", statusText, "]");
|
||||
return AD_STATUS.STATUS_NORMAL;
|
||||
}
|
||||
}
|
||||
|
||||
async sleep(ms) {
|
||||
return new Promise(resolve => setTimeout(resolve, ms));
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user