diff --git a/app/crawler/specificCrawlers/saljic.js b/app/crawler/specificCrawlers/saljic.js index 0e912fc..4156727 100644 --- a/app/crawler/specificCrawlers/saljic.js +++ b/app/crawler/specificCrawlers/saljic.js @@ -67,7 +67,7 @@ class SaljicCrawler { indexGenerators.push(this.categoryIndexer(adCategory)); } // - console.log(indexGenerators); + //console.log(indexGenerators); // let done = false; while (!done) { @@ -120,247 +120,490 @@ class SaljicCrawler { } async *categoryIndexer(adCategory) { + let pageToIndex = 1; + const urlAdTypePart = SALJIC_ENUMS.SALJIC_AD_TYPE[this.crawlerAdTypes]; const urlCategoryPart = SALJIC_ENUMS.SALJIC_AD_CATEGORY[adCategory]; if (urlAdTypePart !== undefined && urlCategoryPart !== undefined) { - const urlPageToCrawl = `${this.baseUrl}?order_by=${urlAdTypePart}${urlCategoryPart}`; - const listOfAllRealEstates = await this.extractRealEstates( - urlPageToCrawl - ); - - let elementToStartIndexFrom = 0; while (true) { - const realEstatesForSinglePage = listOfAllRealEstates.slice( - elementToStartIndexFrom, - elementToStartIndexFrom + this.maxResultsPerPage + const urlPagePart = pageToIndex === 1 ? "" : (pageToIndex - 1) * 2 * 11; + const urlPageToCrawl = `${this.baseUrl}?order_by=${urlAdTypePart}${urlCategoryPart}&per_page=${urlPagePart}`; + + const singlePageResults = await this.indexSinglePage( + urlPageToCrawl, + this.maxResultsPerPage ); - if (realEstatesForSinglePage.length > 0) { - elementToStartIndexFrom += realEstatesForSinglePage.length; - - const singlePageResults = await this.indexSinglePage( - realEstatesForSinglePage - ); - - const filteredSinglePageResults = singlePageResults.filter( - singleResult => !!singleResult - ); - - if ( - Array.isArray(filteredSinglePageResults) && - filteredSinglePageResults.length > 0 - ) { - yield filteredSinglePageResults; - } else { - return undefined; - } + if (Array.isArray(singlePageResults) && singlePageResults.length > 0) { + yield singlePageResults; } else { return undefined; } + + ++pageToIndex; + if (pageToIndex === this.maxPages) { + return undefined; + } } } else { return undefined; } } - async indexSinglePage(realEstatesList) { - const asyncActions = []; - for (const realEstate of realEstatesList) { - asyncActions.push(this.scrapeAd(realEstate)); + async indexSinglePage(url, maxResultsPerPage) { + if (PRINT_CRAWLER_DEBUG) { + console.log("[SALJIC] Index page : ", url); } try { - return await Promise.all(asyncActions); + const res = await fetch(url); + const body = await res.text(); + const $ = cheerio.load(body); + let hrefs = []; + + $("#shop") + .find(".product") + .each((i, elem) => { + const href = $(elem) + .find("a") + .first() + .attr("href"); + if (href) { + hrefs.push(href); + } + }); + + //Converting to absolute URLs + const hrefsAbs = hrefs.map(link => { + return "https://www.saljicnekretnine.ba" + link; + }); + + let actualNoOfResults = + hrefsAbs.length <= maxResultsPerPage + ? hrefsAbs.length + : maxResultsPerPage; + + const asyncScraping = []; + for (let i = 0; i < actualNoOfResults; i++) { + asyncScraping.push(this.scrapeAd(hrefsAbs[i])); + } + + const scrapedData = await Promise.all(asyncScraping); + const filteredScrapedData = scrapedData.filter(adData => !!adData); + return filteredScrapedData; } catch (e) { - console.log( - "[SALJIC] Error crawling ads : ", - e.message || "UNKNOWN ERROR" - ); + console.error("[SALJIC] Exception caught:" + e); return []; } } - async scrapeAd(realEstate) { - const { lat, lng, property_name, price, size, link, status } = realEstate; - const url = `https://www.saljicnekretnine.ba/v2/${link}`; - // console.log("[SALJIC] Scraping : ", url); + async scrapeAd(url) { + console.log("[SALJIC] Scraping : ", url); try { const adPageSource = await fetch(url); const body = await adPageSource.text(); const $ = cheerio.load(body); - // ?? Ovo se mora promijeniti - // link contains part of the URL in the format of : /prodaja/stan/stup/9556 - // general form is : /actionType/realEstateType/location/realEstateID - // linkParts contains : ['', 'actionType', 'realEstateType', 'location', 'realEstateID'] - const linkParts = link.split("/"); + // ??? treba li nesto za status + let status = AD_STATUS.STATUS_NORMAL; - const adType = SaljicCrawler.getAdTypeId(linkParts[1]); - const realEstateType = SaljicCrawler.getAdCategoryId(linkParts[2]); - const prostorId = linkParts[4]; + const propertySelectors = { + title: + "div.content-wrap > div.container > div.col-md-8.nobottommargin > div.single-post > div.entry > div.entry-title > h2", + price: + "div.content-wrap > div.container > div.col-md-8.nobottommargin > div.single-post > div.entry > div.topmargin-sm.single-product > div.product > div.product-price > ins", + streetName: + "div.content-wrap > div.container > div.col-md-8.nobottommargin > div.single-post > div.entry > div.entry-content.topmargin > p", - if (!adType || !realEstateType || !prostorId) { - return null; - } + descriptions: + "div.content-wrap > div.container > div.col-md-8.nobottommargin > div.single-post > div.entry > div.entry-content.topmargin > div.toggle.toggle-bg > div.togglec >p:nth-child(1)", + latAndLong: + "div.content-wrap > div.container > div.col-md-8.nobottommargin > div.single-post > div.entry > div.entry-content.topmargin > div.gmap.bottommargin > iframe" + }; + const title = $(propertySelectors.title) + .text() + .replace(/(\r\n|\n|\r)/gm, "") + .replace(/ {1,}/g, " ") + .trim(); - const allDataSelector = - "body > div > div.container-fluid > div > div.column-right > table > tbody"; - - const realEstateProperties = {}; - - $(allDataSelector) - .find("p") - .each((i, element) => { - const propertyElement = $(element) - .text() - .split(":") - .map(text => text.trim().toLowerCase()); - - const propertyTitle = propertyElement[0]; - realEstateProperties[propertyTitle] = propertyElement[1]; - }); - - $(allDataSelector) - .find("div.mb-2") - .each((i, element) => { - const propertyElement = $(element) - .text() - .trim() - .toLowerCase(); - - realEstateProperties[propertyElement] = true; - }); - - if (JSON.stringify(realEstateProperties) === JSON.stringify({})) { - return null; - } - - let numberOfRooms = - parseFloat(realEstateProperties["broj soba"]) + - parseFloat(realEstateProperties["broj spavaćih soba"]) || null, - numberOfFloors = null, - floor = null, - accessRoadType = null, - heatingType = ProstorCrawler.getHeatingTypeId(realEstateProperties), - furnishingType = null, - balcony = - realEstateProperties["balkon"] || - realEstateProperties["terasa"] || - realEstateProperties["lođa"] || - null, - newBuilding = linkParts[1] === "novogradnja", - elevator = realEstateProperties["lift"] || null, - water = realEstateProperties["voda"] || null, - electricity = realEstateProperties["električna energija"] || null, - drainageSystem = realEstateProperties["kanalizacija"] || null, - registeredInZkBooks = null, - recentlyAdapted = null, - parking = realEstateProperties["parking"] || null, - garage = realEstateProperties["garaža"] || null, - gas = realEstateProperties["plin"] || null, - antiTheftDoor = realEstateProperties["blindo vrata"] || null, - airCondition = realEstateProperties["klima"] || null, - phoneConnection = realEstateProperties["telefon"] || null, - cableTV = realEstateProperties["kablovksa tv"] || null, - internet = - realEstateProperties["internet"] || - realEstateProperties["adsl"] || - null, - basementAttic = realEstateProperties["podrum"] || null, - storeRoom = realEstateProperties["ostava"] || null, - videoSurveillance = realEstateProperties["video nadzor"], - alarm = realEstateProperties["alarm"] || null, - suitableForStudents = null, - includingBills = null, - animalsAllowed = null, - pool = realEstateProperties["bazen"] || null, - urbanPlanPermit = null, - buildingPermit = null, - utilityConnection = null, - distanceToRiver = null, - numberOfViewsAgency = null; - - // Floor versions (there are possibly more versions) : - // Sprat: 3/3 - // Sprat: 1 - 2/2 - // Sprat: Pr - 7/7 - // Sprat: -2/0 - // If there are two parts, that represents more real estates are sold - // numberOfFloors is contained in second part, after / sign - - const floorsArray = realEstateProperties["sprat"].split(" - "); - let floorText = ""; - if (floorsArray.length === 1) { - const floorDescription = floorsArray[0].split("/"); - numberOfFloors = parseInt(floorDescription[1]) || null; - floorText = floorDescription[0]; - floor = Math.round(parseFloat(floorText)); - } else if (floorsArray.length === 2) { - const floorDescription = floorsArray[1].split("/"); - numberOfFloors = parseInt(floorDescription[1]) || null; - floorText = floorsArray[0]; - floor = Math.round(parseFloat(floorText)); - } else { - // This is something strange - } - - if (isNaN(floor)) { - // It was textual representation of floor, like "Pr", "Su" or similar - switch (floorText) { - case "pr": - floor = 0; - break; - case "su": - floor = -1; - break; - default: - console.log( - "[SALJIC] Unknown textual representation of floor : ", - floorText + console.log("Title:", title); + const priceText = $(propertySelectors.price) + .text() + .replace(/(\r\n|\n|\r)/gm, "") + .replace(/ {1,}/g, " ") + .trim(); + const price = + priceText === "CIJENA NA UPIT" + ? null + : parseFloat( + priceText.substring(8, priceText.length - 3).replace(",", "") ); - floor = null; + + console.log("Price:", price); + + const streetName = $(propertySelectors.streetName) + .text() + .replace(/(\r\n|\n|\r)/gm, "") + .trim(); + console.log("Street:", streetName); + + const descriptions = $(propertySelectors.descriptions) + .text() + .trim(); + console.log("Description:", descriptions); + + const latAndLongSrc = $(propertySelectors.latAndLong).attr("src"); + const latText = latAndLongSrc.substring( + latAndLongSrc.indexOf("marker=") + 7, + latAndLongSrc.indexOf("%2C", latAndLongSrc.indexOf("marker=")) + ); + const longText = latAndLongSrc.substring( + latAndLongSrc.indexOf("%2C", latAndLongSrc.indexOf("marker=")) + 3, + latAndLongSrc.length + ); + const locationLat = parseFloat(latText) || null; + const locationLong = parseFloat(longText) || null; + console.log("Lat:", locationLat); + console.log("Long:", locationLong); + + //const category = $(propertySelectors.category) + //.text() + //.trim(); + + //====== OTHER AD INFORMATION =============== + let adType = null; + let olxId = null; + let numberOfViewsAgency = null; + + let otherInformationDivId; + //We need to locate DIV ID where other information are stored + for (let possibleId = 10; possibleId <= 20; possibleId++) { + const adTypeFieldTitle = $( + `#artikal_glavni_div > div.artikal_lijevo > div:nth-child(${possibleId}) > div:nth-child(2) > div.df1` + ) + .text() + .trim(); + + if (adTypeFieldTitle === "Vrsta oglasa") { + otherInformationDivId = possibleId; + break; } } - if (realEstateProperties["namješteno"]) { - furnishingType = FURNISHING_TYPE.FURNISHED.id; - } else if (realEstateProperties["polunamješteno"]) { - furnishingType = FURNISHING_TYPE.HALF_FURNISHED.id; - } else { - furnishingType = FURNISHING_TYPE.NOT_FURNISHED.id; + if (!otherInformationDivId) { + throw { message: "Other information DIV could not be found" }; } - const adStatus = SaljicCrawler.getStatusId(status); - const title = property_name; - const parsedPrice = parseFloat(price.replace(/\./g, "")) || null; - const parsedArea = parseFloat(size); - const gardenSize = null; - const longDescription = null; + const olxIdFieldSelector = `#artikal_glavni_div > div.artikal_lijevo > div:nth-child(${otherInformationDivId}) > div:nth-child(4)`; + const publishedDateValueSelector = `#artikal_glavni_div > div.artikal_lijevo > div:nth-child(${otherInformationDivId}) > div:nth-child(3) > div.df2.neanimiraj > time`; + const numberOfViewsAgencyValueSelector = `#artikal_glavni_div > div.artikal_lijevo > div:nth-child(${otherInformationDivId}) > div:nth-child(6) > div.df2`; + const renewedDateFullValueSelector = `#artikal_glavni_div > div.artikal_lijevo > div.op.ob.pop`; + + const publishedDate = $(publishedDateValueSelector) + .text() + .trim(); + + const publishedDateMoment = moment.tz( + publishedDate, + OLX_ENUMS.OLX_PUBLISHED_DATE_FORMAT, + DEFAULT_TIMEZONE + ); + + if (!publishedDateMoment.isValid()) { + throw { message: "Invalid published date ! Check parsing format" }; + } + + const renewedDate = $(renewedDateFullValueSelector) + .data("content") + .trim(); + + const renewedDateMoment = moment.tz( + renewedDate, + OLX_ENUMS.OLX_RENEWED_DATE_FORMAT, + DEFAULT_TIMEZONE + ); + + if (!renewedDateMoment) { + throw { + message: + "Invalid renewed date ! Check how parser parsed renewed date text" + }; + } + + adType = $( + `#artikal_glavni_div > div.artikal_lijevo > div:nth-child(${otherInformationDivId}) > div:nth-child(2) > div.df2` + ) + .text() + .trim(); + + const parsedCategory = this.getAdCategoryId(category); + if (!parsedCategory) { + throw { message: `Unknown ad category [${category}]` }; + } + + const parsedAdType = this.getAdTypeId(adType); + if (!parsedAdType) { + throw { message: "Unknown ad type" }; + } + + const olxIdFieldTitle = $(`${olxIdFieldSelector} > div.df1`) + .text() + .trim(); + olxId = $(`${olxIdFieldSelector} > div.df2`) + .text() + .trim(); + numberOfViewsAgency = parseInt( + $(numberOfViewsAgencyValueSelector) + .text() + .trim() + ); + + if (olxIdFieldTitle !== "OLX ID") { + throw { message: "Cannot find correct OLX ID" }; + } + //=========================================== + + //====== DETAIL INFORMATION FIELDS ========== + let area, + gardenSize, + numberOfRooms = null, + numberOfFloors = null, + floor = null, + accessRoadType = null, + heatingType = null, + furnishingType = null, + balcony = null, + newBuilding = null, + elevator = null, + water = null, + electricity = null, + drainageSystem = null, + registeredInZkBooks = null, + recentlyAdapted = null, + parking = null, + garage = null, + gas = null, + antiTheftDoor = null, + airCondition = null, + phoneConnection = null, + cableTV = null, + internet = null, + basementAttic = null, + storeRoom = null, + videoSurveillance = null, + alarm = null, + suitableForStudents = null, + includingBills = null, + animalsAllowed = null, + pool = null, + urbanPlanPermit = null, + buildingPermit = null, + utilityConnection = null, + distanceToRiver = null; + + let fieldIndex = 1; + do { + const fieldSelector = `#dodatnapolja1 > div:nth-child(${fieldIndex})`; + const fieldTitleSelector = `${fieldSelector} > div.df1`; + const fieldValueSelector = `${fieldSelector} > div.df2`; + + const fieldTitle = $(fieldTitleSelector) + .text() + .trim() + .toLowerCase(); + const fieldValue = $(fieldValueSelector) + .text() + .trim() + .toLowerCase(); + + switch (fieldTitle) { + case "kvadrata": + area = fieldValue; + break; + case "okućnica (kvadratura)": + gardenSize = fieldValue; + break; + case "broj soba": + numberOfRooms = this.parseNumberOfRooms(fieldValue, parsedCategory); + break; + case "broj prostorija": + numberOfRooms = this.parseNumberOfRooms(fieldValue, parsedCategory); + break; + case "broj spratova": + numberOfFloors = this.parseNumberOfFloors( + fieldValue, + parsedCategory + ); + break; + case "sprat": + floor = this.parseFloorNumber(fieldValue, parsedCategory); + break; + case "vrsta grijanja": + heatingType = this.getHeatingTypeId(fieldValue); + break; + case "namješten?": + furnishingType = this.getFurnishingTypeId(fieldValue); + break; + case "namješten": + furnishingType = FURNISHING_TYPE.FURNISHED.id; + break; + case "namještena": + furnishingType = FURNISHING_TYPE.FURNISHED.id; + break; + case "voda": + water = true; + break; + case "struja": + electricity = true; + break; + case "kanalizacija": + drainageSystem = fieldValue !== "nema"; + break; + case "godina izgradnje": + newBuilding = newBuilding || fieldValue === "novogradnja"; + break; + case "kućni ljubimci": + animalsAllowed = fieldValue === "da"; + break; + case "uknjiženo / zk": + registeredInZkBooks = true; + break; + case "uknjiženo (zk)": + registeredInZkBooks = true; + break; + case "novogradnja": + newBuilding = true; + break; + case "nedavno adaptiran": + recentlyAdapted = true; + break; + case "nedavno adaptirana": + recentlyAdapted = true; + break; + case "balkon": + balcony = true; + break; + case "lift": + elevator = true; + break; + case "parking": + parking = true; + break; + case "garaža": + garage = true; + break; + case "plin": + gas = true; + break; + case "blindirana vrata": + antiTheftDoor = true; + break; + case "klima": + airCondition = true; + break; + case "telefonski priključak": + phoneConnection = true; + break; + case "kablovska tv": + cableTV = true; + break; + case "internet": + internet = true; + break; + case "podrum/tavan": + basementAttic = true; + break; + case "ostava/špajz": + storeRoom = true; + break; + case "video nadzor": + videoSurveillance = true; + break; + case "alarm": + alarm = true; + break; + case "za studente": + suitableForStudents = true; + break; + case "uključen trošak režija": + includingBills = true; + break; + case "građevinska dozvola": + buildingPermit = true; + break; + case "komunalni priključak": + utilityConnection = true; + break; + case "urbanistička dozvola": + urbanPlanPermit = true; + break; + case "udaljenost od rijeke (m)": + distanceToRiver = parseInt(fieldValue) || null; + break; + case "prilaz": + accessRoadType = this.getAccessRoadTypeId(fieldValue); + break; + case "bazen": + pool = true; + break; + case "iznajmljeno": + status = AD_STATUS.STATUS_RENTED; + break; + default: + // console.log(fieldTitle, " = ", fieldValue); + break; + } + + if (++fieldIndex === OLX_ENUMS.MAX_DETAIL_FIELDS || fieldTitle === "") { + break; + } + } while (true); + //=========================================== + + //========================================= + const parsedArea = this.parseArea(area) || null; + const parsedGardenSize = this.parseArea(gardenSize) || null; + const parsedPrice = this.parsePrice(price) || null; + + if ( + title.indexOf("[PRODANO]") !== -1 || + title.indexOf("[ZAVRŠENO]") !== -1 + ) { + status = AD_STATUS.STATUS_SOLD; + } const data = { url, - agencyObjectId: prostorId, - originAgencyName: AD_AGENCY.SALJIC, - realEstateType, - adType, + agencyObjectId: olxId, + originAgencyName: AD_AGENCY.OLX, + realEstateType: parsedCategory, + adType: parsedAdType, title, price: parsedPrice, area: parsedArea, - gardenSize, - shortDescription: "", - longDescription: longDescription, + gardenSize: parsedGardenSize, + shortDescription: descriptions + .first() + .text() + .trim(), + longDescription: descriptions + .last() + .text() + .trim(), streetNumber: 0, - streetName: realEstateProperties["adresa"], + streetName: "", locality: "", municipality: "", city: "", region: "", entity: "", country: "", - locationLat: lat, - locationLong: lng, - adStatus, + locationLat, + locationLong, + adStatus: status, + publishedDate: publishedDateMoment.toISOString(), + renewedDate: renewedDateMoment.toISOString(), numberOfRooms, numberOfFloors, floor, @@ -400,159 +643,219 @@ class SaljicCrawler { return data; } catch (e) { - console.error("[SALJIC] Exception caught: " + e.message, "\r\nURL:", url); - return null; - } - } - - async extractRealEstates(url) { - if (PRINT_CRAWLER_DEBUG) { - console.log("[SALJIC] Index page : ", url); - } - - try { - const res = await fetch(url); - const body = await res.text(); - const $ = cheerio.load(body); - console.log("SALJIC: $", $); - - const scriptElement = $( - "body > div > div.container-fluid > script:nth-child(7)" - ); - // - //console.log(scriptElement[0]); - //console.log(scriptElement[0].children); - if ( - scriptElement[0] && - scriptElement[0].children && - scriptElement[0].children[0] && - scriptElement[0].children[0].data - ) { - const scriptData = scriptElement[0].children[0].data; - // - console.log(scriptData); - try { - // script element data contains JS code and we need to extract only data for realEstates - // data string starts with : var map; var markers = [{"r ... - // so we remove first 23 characters - // - // real estate JSON data ends with ...}, ]; map = new... - // so we need to find index of that substring to know where to stop - // we will NOT include trailing comma because it breaks JSON parse, so we have to close ] bracket manually - - const jsonEndIndex = scriptData.indexOf(", ]; map = new"); - if (jsonEndIndex > -1) { - const jsonData = scriptData.substring(23, jsonEndIndex) + "]"; - const realEstates = JSON.parse(jsonData); - - // const transformedRealEstates = []; - // - // for (const realEstate of realEstates) { - // const transformedRealEstate = SaljicCrawler.transformRealEstateData( - // realEstate - // ); - // if (transformedRealEstate) { - // transformedRealEstates.push(transformedRealEstate); - // } - // } - // - // return transformedRealEstates; - return realEstates; - } else { - throw { - message: "Something is wrong with JSON data or data is moved" - }; - } - } catch (e) { - console.log(e); - throw e; - } - } - } catch (e) { - console.error( - "[SALJIC] Exception caught:", - e.message || "UNKNOWN MESSAGE" - ); - return []; + console.error("Exception caught: " + e.message, "\r\nURL:", url); } + return null; } //======= HELPER FUNCTIONS ============= - static getAdCategoryId(categoryText) { + getAdCategoryId(categoryText) { switch (categoryText) { - case "stan": + case "Stanovi": return AD_CATEGORY.FLAT.id; - case "kuca": - return AD_CATEGORY.HOUSE.id; - case "apartman": - return AD_CATEGORY.APARTMENT.id; - case "poslovni-prostor": - return AD_CATEGORY.OFFICE.id; - case "garaza": - return AD_CATEGORY.GARAGE.id; - case "zemljiste": + case "Zemljišta": return AD_CATEGORY.LAND.id; + case "Kuće": + return AD_CATEGORY.HOUSE.id; + case "Poslovni prostori": + return AD_CATEGORY.OFFICE.id; + case "Apartmani": + return AD_CATEGORY.APARTMENT.id; + case "Garaže": + return AD_CATEGORY.GARAGE.id; + case "Vikendice": + return AD_CATEGORY.COTTAGE.id; default: return undefined; } } - static getAdTypeId(adTypeText) { + getAdTypeId(adTypeText) { switch (adTypeText) { - case "prodaja": + case "Prodaja": return AD_TYPE.AD_TYPE_SALE.stringId; - case "najam": + case "Izdavanje": return AD_TYPE.AD_TYPE_RENT.stringId; - case "novogradnja": - return AD_TYPE.AD_TYPE_SALE.stringId; + case "Potražnja": + return AD_TYPE.AD_TYPE_REQUEST.stringId; default: return undefined; } } - static getHeatingTypeId(realEstateProperties) { - const realEstatePropertiesKeys = Object.keys(realEstateProperties); - for (const property of realEstatePropertiesKeys) { - switch (property) { - case "centralno toplane": - return HEATING_TYPE.CENTRAL_CITY.id; - case "etažno plinsko": - return HEATING_TYPE.CENTRAL_GAS.id; - case "termo blok": - case "podno grijanje": - return HEATING_TYPE.OTHER.id; - case "etažno električno": - case "konvektori": - return HEATING_TYPE.ELECTRICITY.id; - case "plinske peći": - return HEATING_TYPE.GAS.id; - case "vlastita kotlovnica": - return HEATING_TYPE.CENTRAL_BOILER.id; - case "toplotna pumpa": - return HEATING_TYPE.HEAT_PUMP.id; - case "kamin": - return HEATING_TYPE.WOOD.id; + getHeatingTypeId(heatingTypeText) { + switch (heatingTypeText) { + case "struja": + return HEATING_TYPE.ELECTRICITY.id; + case "plin": + return HEATING_TYPE.GAS.id; + case "drva": + return HEATING_TYPE.WOOD.id; + case "centralno (gradsko)": + return HEATING_TYPE.CENTRAL_CITY.id; + case "centralno (kotlovnica)": + return HEATING_TYPE.CENTRAL_BOILER.id; + case "centralno (plin)": + return HEATING_TYPE.CENTRAL_GAS.id; + case "nije uvedeno": + return HEATING_TYPE.NO_HEATING.id; + case "ostalo": + return HEATING_TYPE.OTHER.id; + case "drugo": + return HEATING_TYPE.OTHER.id; + default: + console.log("grijanje = NEPOZNATO [", heatingTypeText, "]"); + return null; + } + } + + getFurnishingTypeId(furnishingTypeText) { + switch (furnishingTypeText) { + case "namješten": + return FURNISHING_TYPE.FURNISHED.id; + case "polunamješten": + return FURNISHING_TYPE.HALF_FURNISHED.id; + case "nenamješten": + return FURNISHING_TYPE.NOT_FURNISHED.id; + case "": + return FURNISHING_TYPE.FURNISHED.id; + default: + console.log("namješten = NEPOZNATO [", furnishingTypeText, "]"); + return null; + } + } + + getAccessRoadTypeId(accessRoadTypeText) { + switch (accessRoadTypeText) { + case "asfalt": + return ACCESS_ROAD_TYPE.ASPHALT.id; + case "beton": + return ACCESS_ROAD_TYPE.CONCRETE.id; + case "makadam": + return ACCESS_ROAD_TYPE.MACADAM.id; + case "ostalo": + return ACCESS_ROAD_TYPE.OTHER.id; + default: + console.log("pristup = NEPOZNATO [", accessRoadTypeText, "]"); + return null; + } + } + + parseArea(areaText) { + if (!areaText) { + return NaN; + } + const removeDotsExceptLastOneRegex = /[.](?=.*[.])/g; + const textWithOnlyOneDecimalDot = areaText + .replace(",", ".") + .replace(removeDotsExceptLastOneRegex, ""); + + return parseFloat(textWithOnlyOneDecimalDot); + } + + parsePrice(priceText) { + if (!priceText) { + return NaN; + } + const formattedPriceText = priceText.replace(".", "").replace(",", "."); + return parseFloat(formattedPriceText); + } + + parseNumberOfRooms(numberOfRoomsText, categoryId) { + if (categoryId === AD_CATEGORY.FLAT.id) { + switch (numberOfRoomsText) { + case "garsonjera": + return 0; + case "jednosoban (1)": + return 1; + case "jednoiposoban (1.5)": + return 1.5; + case "dvosoban (2)": + return 2; + case "trosoban (3)": + return 3; + case "četverosoban (4)": + return 4; + case "petosoban i više": + return 5; default: - //console.log("[SALJIC] Nepoznato >>> [", property, "]"); + console.log( + "broj soba [stan] = NEPOZNATO [", + numberOfRoomsText, + ", ", + categoryId, + "]" + ); + return null; } } + + if ( + categoryId === AD_CATEGORY.HOUSE.id || + categoryId === AD_CATEGORY.COTTAGE.id || + categoryId === AD_CATEGORY.APARTMENT.id || + categoryId === AD_CATEGORY.OFFICE.id + ) { + return parseInt(numberOfRoomsText) || null; + } + + console.log("broj soba = NEPOZNATO [", numberOfRoomsText, "]"); + return null; } - static getStatusId(statusText) { - switch (statusText) { - case "": - return AD_STATUS.STATUS_NORMAL; - case "Rezervisano": - return AD_STATUS.STATUS_RESERVED; - case "Prodano": - return AD_STATUS.STATUS_SOLD; - case "Iznajmljeno": - return AD_STATUS.STATUS_RENTED; - default: - console.log("[SALJIC] Unknown AD_STATUS : [", statusText, "]"); - return AD_STATUS.STATUS_NORMAL; + parseNumberOfFloors(numberOfFloorsText, categoryId) { + if ( + categoryId === AD_CATEGORY.HOUSE.id || + categoryId === AD_CATEGORY.COTTAGE.id + ) { + return parseInt(numberOfFloorsText) || null; } + + if (categoryId === AD_CATEGORY.OFFICE.id) { + if ( + numberOfFloorsText === "suteren" || + numberOfFloorsText === "prizemlje" + ) { + return 0; + } + if (numberOfFloorsText === "6+") { + return 7; + } + return parseInt(numberOfFloorsText) || null; + } + + console.log("broj spratova = NEPOZNATO [", numberOfFloorsText, "]"); + return null; + } + + parseFloorNumber(floorText, categoryId) { + if ( + categoryId === AD_CATEGORY.FLAT.id || + categoryId === AD_CATEGORY.APARTMENT.id + ) { + if ( + floorText === "suteren" || + floorText === "prizemlje" || + floorText === "visoko prizemlje" + ) { + return 0; + } + return parseInt(floorText) || null; + } + + if (categoryId === AD_CATEGORY.OFFICE.id) { + if (floorText === "zaseban objekat") { + return null; + } + if (floorText === "prizemlje" || floorText === "visoko prizemlje") { + return 0; + } + return parseInt(floorText) || null; + } + + console.log("sprat = NEPOZNATO [", floorText, "]"); + return null; } async sleep(ms) {