diff --git a/app/crawler/specificCrawlers/prostor.js b/app/crawler/specificCrawlers/prostor.js index 7737591..cb1d3f9 100644 --- a/app/crawler/specificCrawlers/prostor.js +++ b/app/crawler/specificCrawlers/prostor.js @@ -9,7 +9,9 @@ const { AD_CATEGORY, AD_AGENCY, AD_STATUS, - CRAWLER_AD_TYPE + CRAWLER_AD_TYPE, + FURNISHING_TYPE, + HEATING_TYPE } = require("../../common/enums"); const { @@ -121,7 +123,7 @@ class ProstorCrawler { const urlAdTypePart = PROSTOR_ENUMS.PROSTOR_AD_TYPE[this.crawlerAdTypes]; const urlCategoryPart = PROSTOR_ENUMS.PROSTOR_AD_CATEGORY[adCategory]; if (urlAdTypePart !== undefined && urlCategoryPart !== undefined) { - const urlPageToCrawl = `${this.baseUrl}?remove_sold=1${urlAdTypePart}${urlCategoryPart}`; + const urlPageToCrawl = `${this.baseUrl}?remove_sold=0${urlAdTypePart}${urlCategoryPart}`; const listOfAllRealEstates = await this.extractRealEstates( urlPageToCrawl ); @@ -179,7 +181,7 @@ class ProstorCrawler { } async scrapeAd(realEstate) { - const { lat, lng, property_name, price, size, link } = realEstate; + const { lat, lng, property_name, price, size, link, status } = realEstate; const url = `https://prostor.ba${link}`; // console.log("[PROSTOR] Scraping : ", url); try { @@ -198,16 +200,6 @@ class ProstorCrawler { const prostorId = linkParts[4]; if (!adType || !realEstateType || !prostorId) { - console.log( - "adType: ", - adType, - " reType: ", - realEstateType, - " prostorId: ", - prostorId, - "url: ", - url - ); return null; } @@ -218,52 +210,70 @@ class ProstorCrawler { $(allDataSelector) .find("p") - .each((i, elem) => { - const propertyElement = $(elem) + .each((i, element) => { + const propertyElement = $(element) .text() .split(":") - .map(text => text.trim()); + .map(text => text.trim().toLowerCase()); const propertyTitle = propertyElement[0]; realEstateProperties[propertyTitle] = propertyElement[1]; }); + $(allDataSelector) + .find("div.mb-2") + .each((i, element) => { + const propertyElement = $(element) + .text() + .trim() + .toLowerCase(); + + realEstateProperties[propertyElement] = true; + }); + if (JSON.stringify(realEstateProperties) === JSON.stringify({})) { return null; } let numberOfRooms = - parseFloat(realEstateProperties["Broj soba"]) + - parseFloat(realEstateProperties["Broj spavaćih soba"]) || null, + parseFloat(realEstateProperties["broj soba"]) + + parseFloat(realEstateProperties["broj spavaćih soba"]) || null, numberOfFloors = null, floor = null, accessRoadType = null, - heatingType = null, + heatingType = ProstorCrawler.getHeatingTypeId(realEstateProperties), furnishingType = null, - balcony = null, + balcony = + realEstateProperties["balkon"] || + realEstateProperties["terasa"] || + realEstateProperties["lođa"] || + null, newBuilding = linkParts[1] === "novogradnja", - elevator = null, - water = null, - electricity = null, - drainageSystem = null, + elevator = realEstateProperties["lift"] || null, + water = realEstateProperties["voda"] || null, + electricity = realEstateProperties["električna energija"] || null, + drainageSystem = realEstateProperties["kanalizacija"] || null, registeredInZkBooks = null, recentlyAdapted = null, - parking = null, - garage = null, - gas = null, - antiTheftDoor = null, - airCondition = null, - phoneConnection = null, - cableTV = null, - internet = null, - basementAttic = null, - storeRoom = null, - videoSurveillance = null, - alarm = null, + parking = realEstateProperties["parking"] || null, + garage = realEstateProperties["garaža"] || null, + gas = realEstateProperties["plin"] || null, + antiTheftDoor = realEstateProperties["blindo vrata"] || null, + airCondition = realEstateProperties["klima"] || null, + phoneConnection = realEstateProperties["telefon"] || null, + cableTV = realEstateProperties["kablovksa tv"] || null, + internet = + realEstateProperties["internet"] || + realEstateProperties["adsl"] || + null, + basementAttic = realEstateProperties["podrum"] || null, + storeRoom = realEstateProperties["ostava"] || null, + videoSurveillance = realEstateProperties["video nadzor"], + alarm = realEstateProperties["alarm"] || null, suitableForStudents = null, includingBills = null, animalsAllowed = null, - pool = null, + pool = realEstateProperties["bazen"] || null, urbanPlanPermit = null, buildingPermit = null, utilityConnection = null, @@ -278,7 +288,7 @@ class ProstorCrawler { // If there are two parts, that represents more real estates are sold // numberOfFloors is contained in second part, after / sign - const floorsArray = realEstateProperties["Sprat"].split(" - "); + const floorsArray = realEstateProperties["sprat"].split(" - "); let floorText = ""; if (floorsArray.length === 1) { const floorDescription = floorsArray[0].split("/"); @@ -296,7 +306,7 @@ class ProstorCrawler { if (isNaN(floor)) { // It was textual representation of floor, like "Pr", "Su" or similar - switch (floorText.toLowerCase()) { + switch (floorText) { case "pr": floor = 0; break; @@ -312,7 +322,15 @@ class ProstorCrawler { } } - const adStatus = AD_STATUS.STATUS_NORMAL; + if (realEstateProperties["namješteno"]) { + furnishingType = FURNISHING_TYPE.FURNISHED.id; + } else if (realEstateProperties["polunamješteno"]) { + furnishingType = FURNISHING_TYPE.HALF_FURNISHED.id; + } else { + furnishingType = FURNISHING_TYPE.NOT_FURNISHED.id; + } + + const adStatus = ProstorCrawler.getStatusId(status); const title = property_name; const parsedPrice = parseFloat(price.replace(/\./g, "")) || null; const parsedArea = parseFloat(size); @@ -332,7 +350,7 @@ class ProstorCrawler { shortDescription: "", longDescription: longDescription, streetNumber: 0, - streetName: realEstateProperties["Adresa"], + streetName: realEstateProperties["adresa"], locality: "", municipality: "", city: "", @@ -492,6 +510,50 @@ class ProstorCrawler { } } + static getHeatingTypeId(realEstateProperties) { + const realEstatePropertiesKeys = Object.keys(realEstateProperties); + for (const property of realEstatePropertiesKeys) { + switch (property) { + case "centralno toplane": + return HEATING_TYPE.CENTRAL_CITY.id; + case "etažno plinsko": + return HEATING_TYPE.CENTRAL_GAS.id; + case "termo blok": + case "podno grijanje": + return HEATING_TYPE.OTHER.id; + case "etažno električno": + case "konvektori": + return HEATING_TYPE.ELECTRICITY.id; + case "plinske peći": + return HEATING_TYPE.GAS.id; + case "vlastita kotlovnica": + return HEATING_TYPE.CENTRAL_BOILER.id; + case "toplotna pumpa": + return HEATING_TYPE.HEAT_PUMP.id; + case "kamin": + return HEATING_TYPE.WOOD.id; + default: + //console.log("[PROSTOR] Nepoznato >>> [", property, "]"); + } + } + } + + static getStatusId(statusText) { + switch (statusText) { + case "": + return AD_STATUS.STATUS_NORMAL; + case "Rezervisano": + return AD_STATUS.STATUS_RESERVED; + case "Prodano": + return AD_STATUS.STATUS_SOLD; + case "Iznajmljeno": + return AD_STATUS.STATUS_RENTED; + default: + console.log("[PROSTOR] Unknown AD_STATUS : [", statusText, "]"); + return AD_STATUS.STATUS_NORMAL; + } + } + async sleep(ms) { return new Promise(resolve => setTimeout(resolve, ms)); }