847 lines
24 KiB
JavaScript
847 lines
24 KiB
JavaScript
"use strict";
|
|
|
|
const fetch = require("../../helpers/fetchWrapper");
|
|
const cheerio = require("cheerio");
|
|
const Promise = require("bluebird");
|
|
const moment = require("moment-timezone");
|
|
const htmlToText = require("html-to-text");
|
|
|
|
const {
|
|
AD_TYPE,
|
|
AD_CATEGORY,
|
|
AD_AGENCY,
|
|
AD_STATUS,
|
|
CRAWLER_AD_TYPE,
|
|
HEATING_TYPE,
|
|
ACCESS_ROAD_TYPE,
|
|
FURNISHING_TYPE
|
|
} = require("../../common/enums");
|
|
|
|
const {
|
|
DEFAULT_TIMEZONE,
|
|
PRINT_CRAWLER_DEBUG
|
|
} = require("../../config/appConfig");
|
|
|
|
const RENTAL_ENUMS = {
|
|
RENTAL_AD_TYPE: {
|
|
[CRAWLER_AD_TYPE.ALL]: "/prodaja-1/najam-2",
|
|
[CRAWLER_AD_TYPE.ONLY_SELL]: "/prodaja-1",
|
|
[CRAWLER_AD_TYPE.ONLY_RENT]: "/najam-2"
|
|
},
|
|
RENTAL_AD_CATEGORY: {
|
|
[AD_CATEGORY.ALL.id]: "",
|
|
[AD_CATEGORY.FLAT.id]: "/tip-2",
|
|
[AD_CATEGORY.HOUSE.id]: "/tip-1",
|
|
[AD_CATEGORY.LAND.id]: "/tip-5",
|
|
[AD_CATEGORY.OFFICE.id]: "/tip-4",
|
|
[AD_CATEGORY.APARTMENT.id]: "/tip-3",
|
|
[AD_CATEGORY.GARAGE.id]: "/tip-6"
|
|
//[AD_CATEGORY.COTTAGE.id]: ""
|
|
},
|
|
RENTAL_PUBLISHED_DATE_FORMAT: "YYYY-MM-DD HH:mm:ss",
|
|
RENTAL_RENEWED_DATE_FORMAT: "YYYY-MM-DD u HH:mm:ss"
|
|
};
|
|
|
|
const { RENTAL_FORCE_CRAWL } = require("../specificConfigs/rental");
|
|
|
|
class RentalCrawler {
|
|
constructor(
|
|
savers = [],
|
|
crawlerAdTypes = CRAWLER_AD_TYPE.ALL,
|
|
crawlerAdCategories = [AD_CATEGORY.FLAT, AD_CATEGORY.HOUSE],
|
|
maxPages = 1000,
|
|
maxResultsPerPage = 100,
|
|
ignoredUsernames = [],
|
|
delayBetweenPages = 1000
|
|
) {
|
|
this.savers = savers;
|
|
this.baseUrl = "https://www.rental.ba/pretraga/sortiraj-date_DESC";
|
|
this.crawlerAdTypes = crawlerAdTypes;
|
|
this.crawlerAdCategories = crawlerAdCategories;
|
|
this.maxPages = maxPages;
|
|
this.maxResultsPerPage = maxResultsPerPage;
|
|
this.delayBetweenPages = delayBetweenPages;
|
|
}
|
|
|
|
async crawl() {
|
|
const crawlAdCategories = this.crawlerAdCategories;
|
|
|
|
const newRealEstates = [];
|
|
|
|
if (crawlAdCategories) {
|
|
const indexGenerators = [];
|
|
for (const adCategory of crawlAdCategories) {
|
|
indexGenerators.push(this.categoryIndexer(adCategory));
|
|
}
|
|
|
|
let done = false;
|
|
while (!done) {
|
|
const categoryIndexerPromises = [];
|
|
const generatorsToRemove = [];
|
|
for (const indexGenerator of indexGenerators) {
|
|
categoryIndexerPromises.push(indexGenerator.next());
|
|
generatorsToRemove.push(false);
|
|
}
|
|
|
|
const singlePageResults = await Promise.all(categoryIndexerPromises);
|
|
const entries = singlePageResults.entries();
|
|
|
|
for (const [index, { value: singlePageResult }] of entries) {
|
|
if (singlePageResult) {
|
|
const saveResults = await this.saveCrawledResults(singlePageResult);
|
|
const { newRecords } = saveResults;
|
|
|
|
newRealEstates.push(...newRecords);
|
|
|
|
if (
|
|
Array.isArray(newRecords) &&
|
|
newRecords.length === 0 &&
|
|
!RENTAL_FORCE_CRAWL
|
|
) {
|
|
generatorsToRemove[index] = true;
|
|
}
|
|
} else {
|
|
//Generator returned undefined, remove this generator from array
|
|
generatorsToRemove[index] = true;
|
|
// console.log("Generator ", index + 1, "has no more pages");
|
|
}
|
|
}
|
|
|
|
// console.log("Generators state : ", generatorsToRemove);
|
|
for (let i = generatorsToRemove.length - 1; i >= 0; i--) {
|
|
if (generatorsToRemove[i]) {
|
|
// console.log("\tRemove generator ", i + 1);
|
|
indexGenerators.splice(i, 1);
|
|
}
|
|
}
|
|
if (indexGenerators.length === 0) {
|
|
done = true;
|
|
}
|
|
|
|
await this.sleep(this.delayBetweenPages);
|
|
}
|
|
}
|
|
return newRealEstates;
|
|
}
|
|
|
|
async *categoryIndexer(adCategory) {
|
|
let pageToIndex = 1;
|
|
|
|
const urlAdTypePart = RENTAL_ENUMS.RENTAL_AD_TYPE[this.crawlerAdTypes];
|
|
const urlCategoryPart = RENTAL_ENUMS.RENTAL_AD_CATEGORY[adCategory];
|
|
if (urlAdTypePart !== undefined && urlCategoryPart !== undefined) {
|
|
while (true) {
|
|
const urlPageToCrawl = `${this.baseUrl}${urlAdTypePart}${urlCategoryPart}/stranica-${pageToIndex}`;
|
|
const singlePageResults = await this.indexSinglePage(
|
|
urlPageToCrawl,
|
|
this.maxResultsPerPage
|
|
);
|
|
|
|
if (Array.isArray(singlePageResults) && singlePageResults.length > 0) {
|
|
yield singlePageResults;
|
|
} else {
|
|
return undefined;
|
|
}
|
|
|
|
++pageToIndex;
|
|
if (pageToIndex === this.maxPages) {
|
|
return undefined;
|
|
}
|
|
}
|
|
} else {
|
|
return undefined;
|
|
}
|
|
}
|
|
|
|
async indexSinglePage(url, maxResultsPerPage) {
|
|
if (PRINT_CRAWLER_DEBUG) {
|
|
console.log("[RENTAL] Index page : ", url);
|
|
}
|
|
|
|
try {
|
|
const res = await fetch(url, {} , false);
|
|
const body = await res.text();
|
|
const $ = cheerio.load(body);
|
|
let hrefs = [];
|
|
|
|
$(
|
|
"body > div > div.container > div.row > div.col-xs-12.col-sm-12.col-md-12.col-lg-9.content-main > div.row.box-items.group-grid-view"
|
|
)
|
|
.find(".pull-right")
|
|
.each((i, elem) => {
|
|
const href = $(elem)
|
|
.find("a")
|
|
.first()
|
|
.attr("href");
|
|
if (href) {
|
|
hrefs.push(href);
|
|
}
|
|
});
|
|
|
|
let actualNoOfResults =
|
|
hrefs.length <= maxResultsPerPage ? hrefs.length : maxResultsPerPage;
|
|
|
|
const asyncScraping = [];
|
|
for (let i = 0; i < actualNoOfResults; i++) {
|
|
asyncScraping.push(this.scrapeAd(hrefs[i]));
|
|
}
|
|
|
|
const scrapedData = await Promise.all(asyncScraping);
|
|
const filteredScrapedData = scrapedData.filter(adData => !!adData);
|
|
return filteredScrapedData;
|
|
} catch (e) {
|
|
console.error("[RENTAL] Exception caught:" + e);
|
|
return [];
|
|
}
|
|
}
|
|
|
|
async scrapeAd(url) {
|
|
// console.log("[RENTAL] Scraping : ", url);
|
|
try {
|
|
const adPageSource = await fetch(url);
|
|
const body = await adPageSource.text();
|
|
const $ = cheerio.load(body);
|
|
|
|
if (body.indexOf('<html') === -1) {
|
|
throw { message: 'Failed to fetch page !' }
|
|
}
|
|
|
|
const mapElementParent = $(".box-map").parent();
|
|
const scriptElement = $("script", mapElementParent);
|
|
if (
|
|
scriptElement[0] &&
|
|
scriptElement[0].children &&
|
|
scriptElement[0].children[0] &&
|
|
scriptElement[0].children[0].data
|
|
) {
|
|
let extractedData;
|
|
try {
|
|
//data string starts with : var json_map_data = [{"r ...
|
|
//so we remove first 20 characters
|
|
|
|
const jsonData = scriptElement[0].children[0].data.substring(20);
|
|
const parsedJsonData = JSON.parse(jsonData);
|
|
extractedData = parsedJsonData[0];
|
|
} catch (e) {
|
|
throw { message: "Can't find ad data JSON" };
|
|
}
|
|
|
|
let adStatus = AD_STATUS.STATUS_NORMAL;
|
|
const rentalId = extractedData["re_realEstates_id"];
|
|
const adCategory = this.getKiviCategoryIdFromRentalId(
|
|
parseInt(extractedData["re_types_id"])
|
|
);
|
|
if (!adCategory) {
|
|
throw {
|
|
message: `Invalid category : ${extractedData["re_types_id"]}`
|
|
};
|
|
}
|
|
const adType = this.getKiviAdTypeFromRentalActionId(
|
|
parseInt(extractedData["re_action_id"])
|
|
);
|
|
if (!adType) {
|
|
throw {
|
|
message: `Invalid ad type : ${extractedData["re_action_id"]}`
|
|
};
|
|
}
|
|
|
|
const descriptionIds = extractedData["re_descriptions_id"]
|
|
.split(",")
|
|
.map(stringNumber => parseInt(stringNumber));
|
|
|
|
if (!Array.isArray(descriptionIds)) {
|
|
throw {
|
|
message:
|
|
'Expected array od descriptions but "re_descriptions_id" not found !'
|
|
};
|
|
}
|
|
|
|
const spaceIds = extractedData["re_spaces_id"]
|
|
.split(",")
|
|
.map(stringNumber => parseInt(stringNumber));
|
|
|
|
if (!Array.isArray(spaceIds)) {
|
|
throw {
|
|
message: 'Expected array od spaces but "re_spaces_id" not found !'
|
|
};
|
|
}
|
|
|
|
const infrastructureIds = extractedData["re_infrastructure_id"]
|
|
.split(",")
|
|
.map(stringNumber => parseInt(stringNumber));
|
|
|
|
if (!Array.isArray(infrastructureIds)) {
|
|
throw {
|
|
message:
|
|
'Expected array od infrastructures but "re_infrastructure_id" not found !'
|
|
};
|
|
}
|
|
|
|
const floorNoIds = extractedData["re_floorNO_id"]
|
|
.split(",")
|
|
.map(stringNumber => parseInt(stringNumber));
|
|
|
|
if (!Array.isArray(floorNoIds)) {
|
|
throw {
|
|
message:
|
|
'Expected array od infrastructures but "re_floorNO_id" not found !'
|
|
};
|
|
}
|
|
|
|
const numberOfViewsAgencySelector = $(
|
|
"body > div > div.container > div.row.content-top > div.col-xs-12.col-sm-12.col-md-9 > div > div.box-viewcount"
|
|
);
|
|
|
|
// number of views is written as : "Broj pregledavanja: NNN"
|
|
const numberOfViewsAgencyFullText = numberOfViewsAgencySelector
|
|
.text()
|
|
.trim();
|
|
|
|
const numberOfViewsAgencyParts = numberOfViewsAgencyFullText.split(":");
|
|
|
|
const realEstatePropertiesFromDescriptions = this.getPropertiesFromDescriptions(
|
|
descriptionIds
|
|
);
|
|
const realEstatePropertiesFromSpaces = this.getPropertiesFromSpaces(
|
|
spaceIds
|
|
);
|
|
|
|
const realEstatePropertiesFromInfrastructure = this.getPropertiesFromInfrastructure(
|
|
infrastructureIds
|
|
);
|
|
|
|
if (extractedData["adm_realEstates_discount"] === "1") {
|
|
adStatus = AD_STATUS.STATUS_DISCOUNTED;
|
|
}
|
|
|
|
let numberOfRooms =
|
|
parseInt(extractedData["re_realEstates_roomsNO"]) +
|
|
parseInt(extractedData["re_realEstates_bedNO"]) || null,
|
|
numberOfFloors =
|
|
parseInt(extractedData["re_realEstates_floorsNO"]) ||
|
|
this.getNumberOfFloorsFromFloorId(extractedData["re_floorNO_id"]),
|
|
floor =
|
|
parseInt(extractedData["re_realEstates_floorNO"]) ||
|
|
this.getFloorNumberFromFloorId(extractedData["re_floorNO_id"]),
|
|
accessRoadType = realEstatePropertiesFromDescriptions.accessRoadType,
|
|
heatingType =
|
|
this.getHeatingTypeId(extractedData["re_heating_id"]) || null,
|
|
furnishingType = realEstatePropertiesFromDescriptions.furnishingType,
|
|
balcony =
|
|
realEstatePropertiesFromDescriptions.balcony ||
|
|
realEstatePropertiesFromSpaces.balcony,
|
|
newBuilding = extractedData["op_realEstates_newBuilding"]
|
|
? extractedData["op_realEstates_newBuilding"] === "1"
|
|
: null,
|
|
elevator = realEstatePropertiesFromDescriptions.elevator,
|
|
water =
|
|
realEstatePropertiesFromDescriptions.water ||
|
|
realEstatePropertiesFromInfrastructure.water,
|
|
electricity =
|
|
realEstatePropertiesFromDescriptions.electricity ||
|
|
realEstatePropertiesFromInfrastructure.electricity,
|
|
drainageSystem =
|
|
realEstatePropertiesFromInfrastructure.drainageSystem,
|
|
registeredInZkBooks =
|
|
extractedData["op_realEstates_ownerPermit"] === 1 || null,
|
|
recentlyAdapted = null,
|
|
parking =
|
|
realEstatePropertiesFromDescriptions.parking ||
|
|
realEstatePropertiesFromSpaces.parking,
|
|
garage = realEstatePropertiesFromSpaces.garage,
|
|
gas = realEstatePropertiesFromInfrastructure.gas,
|
|
antiTheftDoor = realEstatePropertiesFromDescriptions.antiTheftDoor,
|
|
airCondition = realEstatePropertiesFromDescriptions.airCondition,
|
|
phoneConnection =
|
|
realEstatePropertiesFromInfrastructure.phoneConnection,
|
|
cableTV = realEstatePropertiesFromInfrastructure.cableTV,
|
|
internet = realEstatePropertiesFromInfrastructure.internet,
|
|
basementAttic =
|
|
realEstatePropertiesFromSpaces.basementAttic ||
|
|
this.checkBasemAtticFromFloors(extractedData["re_floorNO_id"]),
|
|
storeRoom = realEstatePropertiesFromSpaces.storeRoom,
|
|
videoSurveillance =
|
|
realEstatePropertiesFromDescriptions.videoSurveillance ||
|
|
realEstatePropertiesFromInfrastructure.videoSurveillance,
|
|
alarm = realEstatePropertiesFromDescriptions.alarm,
|
|
suitableForStudents = null,
|
|
includingBills =
|
|
extractedData["op_realEstates_utilitiesIncluded"] === "1" || null,
|
|
animalsAllowed = null,
|
|
pool = realEstatePropertiesFromDescriptions.pool,
|
|
urbanPlanPermit =
|
|
extractedData["op_realEstates_locationPermit"] === "1" ||
|
|
realEstatePropertiesFromDescriptions.urbanPlanPermit,
|
|
buildingPermit =
|
|
extractedData["op_realEstates_buildingPermit"] === "1" || null,
|
|
utilityConnection =
|
|
realEstatePropertiesFromDescriptions.utilityConnection,
|
|
distanceToRiver = null,
|
|
numberOfViewsAgency =
|
|
numberOfViewsAgencyParts.length > 1
|
|
? parseInt(numberOfViewsAgencyParts[1])
|
|
: null;
|
|
|
|
const title = extractedData["re_realEstates_portalName"];
|
|
const extractedPrice = parseFloat(
|
|
extractedData["re_realEstates_price"]
|
|
);
|
|
const price = extractedPrice ? extractedPrice : null;
|
|
const area = parseFloat(extractedData["re_realEstates_area"]);
|
|
const gardenSize = parseFloat(
|
|
extractedData["re_realEstates_fieldArea"]
|
|
);
|
|
const longDescription = htmlToText.fromString(
|
|
extractedData["re_realEstates_description"]
|
|
);
|
|
const locationLong = extractedData["re_realEstates_longitude"];
|
|
const locationLat = extractedData["re_realEstates_latitude"];
|
|
const publishedDateMoment = moment.tz(
|
|
extractedData["re_realEstates_inserted"],
|
|
RENTAL_ENUMS.RENTAL_PUBLISHED_DATE_FORMAT,
|
|
DEFAULT_TIMEZONE
|
|
);
|
|
if (!publishedDateMoment.isValid()) {
|
|
throw {
|
|
message: `Invalid published date : ${
|
|
extractedData["re_realEstates_inserted"]
|
|
}`
|
|
};
|
|
}
|
|
|
|
const renewedDateMoment = moment.tz(
|
|
extractedData["re_realEstates_edited"],
|
|
RENTAL_ENUMS.RENTAL_RENEWED_DATE_FORMAT,
|
|
DEFAULT_TIMEZONE
|
|
);
|
|
if (!renewedDateMoment.isValid()) {
|
|
throw {
|
|
message: `Invalid renewed date : ${
|
|
extractedData["re_realEstates_edited"]
|
|
}`
|
|
};
|
|
}
|
|
|
|
const data = {
|
|
url,
|
|
agencyObjectId: rentalId,
|
|
originAgencyName: AD_AGENCY.RENTAL,
|
|
realEstateType: adCategory,
|
|
adType,
|
|
title,
|
|
price,
|
|
area,
|
|
gardenSize,
|
|
shortDescription: "",
|
|
longDescription: longDescription,
|
|
streetNumber: 0,
|
|
streetName: "",
|
|
locality: "",
|
|
municipality: "",
|
|
city: "",
|
|
region: "",
|
|
entity: "",
|
|
country: "",
|
|
locationLat,
|
|
locationLong,
|
|
adStatus,
|
|
publishedDate: publishedDateMoment.toISOString(),
|
|
renewedDate: renewedDateMoment.toISOString(),
|
|
numberOfRooms,
|
|
numberOfFloors,
|
|
floor,
|
|
accessRoadType,
|
|
heatingType,
|
|
furnishingType,
|
|
balcony,
|
|
newBuilding,
|
|
elevator,
|
|
water,
|
|
electricity,
|
|
drainageSystem,
|
|
registeredInZkBooks,
|
|
recentlyAdapted,
|
|
parking,
|
|
garage,
|
|
gas,
|
|
antiTheftDoor,
|
|
airCondition,
|
|
phoneConnection,
|
|
cableTV,
|
|
internet,
|
|
basementAttic,
|
|
storeRoom,
|
|
videoSurveillance,
|
|
alarm,
|
|
suitableForStudents,
|
|
includingBills,
|
|
animalsAllowed,
|
|
pool,
|
|
urbanPlanPermit,
|
|
buildingPermit,
|
|
utilityConnection,
|
|
distanceToRiver,
|
|
numberOfViewsAgency
|
|
};
|
|
|
|
return data;
|
|
} else {
|
|
console.log("[RENTAL] No JSON data for this ad : ", url);
|
|
return null;
|
|
}
|
|
} catch (e) {
|
|
console.error("[RENTAL] Exception caught: " + e.message, "\r\nURL:", url);
|
|
return null;
|
|
}
|
|
return null;
|
|
}
|
|
|
|
//======= HELPER FUNCTIONS =============
|
|
|
|
getKiviCategoryIdFromRentalId(rentalCategoryId) {
|
|
switch (rentalCategoryId) {
|
|
case 1:
|
|
return AD_CATEGORY.HOUSE.id;
|
|
case 2:
|
|
return AD_CATEGORY.FLAT.id;
|
|
case 3:
|
|
return AD_CATEGORY.APARTMENT.id;
|
|
case 4:
|
|
return AD_CATEGORY.OFFICE.id;
|
|
case 5:
|
|
return AD_CATEGORY.LAND.id;
|
|
case 6:
|
|
return AD_CATEGORY.GARAGE.id;
|
|
default:
|
|
return undefined;
|
|
}
|
|
}
|
|
|
|
getKiviAdTypeFromRentalActionId(actionId) {
|
|
switch (actionId) {
|
|
case 1:
|
|
return AD_TYPE.AD_TYPE_SALE.stringId;
|
|
case 2:
|
|
return AD_TYPE.AD_TYPE_RENT.stringId;
|
|
default:
|
|
return undefined;
|
|
}
|
|
}
|
|
|
|
getPropertiesFromDescriptions(descriptionIds) {
|
|
const result = {
|
|
accessRoadType: null,
|
|
furnishingType: null,
|
|
balcony: null,
|
|
elevator: null,
|
|
parking: null,
|
|
antiTheftDoor: null,
|
|
airCondition: null,
|
|
videoSurveillance: null,
|
|
alarm: null,
|
|
pool: null,
|
|
urbanPlanPermit: null,
|
|
utilityConnection: null,
|
|
water: null,
|
|
electricity: null
|
|
};
|
|
|
|
for (const descriptionId of descriptionIds) {
|
|
switch (descriptionId) {
|
|
case 16:
|
|
result.furnishingType = FURNISHING_TYPE.NOT_FURNISHED.id;
|
|
break;
|
|
case 17:
|
|
result.furnishingType = FURNISHING_TYPE.HALF_FURNISHED.id;
|
|
break;
|
|
case 1:
|
|
case 28:
|
|
result.furnishingType = FURNISHING_TYPE.FURNISHED.id;
|
|
break;
|
|
case 14:
|
|
result.elevator = true;
|
|
break;
|
|
case 39:
|
|
result.electricity = true;
|
|
break;
|
|
case 40:
|
|
result.water = true;
|
|
break;
|
|
case 41:
|
|
case 58:
|
|
result.accessRoadType = ACCESS_ROAD_TYPE.ASPHALT.id;
|
|
break;
|
|
case 26:
|
|
result.balcony = true;
|
|
break;
|
|
case 62:
|
|
result.parking = true;
|
|
break;
|
|
case 3:
|
|
result.antiTheftDoor = true;
|
|
break;
|
|
case 2:
|
|
case 21:
|
|
result.airCondition = true;
|
|
break;
|
|
case 4:
|
|
result.alarm = true;
|
|
break;
|
|
case 55:
|
|
result.videoSurveillance = true;
|
|
break;
|
|
case 9:
|
|
result.pool = true;
|
|
break;
|
|
case 60:
|
|
result.urbanPlanPermit = true;
|
|
break;
|
|
case 38:
|
|
result.utilityConnection = true;
|
|
break;
|
|
}
|
|
}
|
|
|
|
return result;
|
|
}
|
|
|
|
getPropertiesFromSpaces(spaceIds) {
|
|
const result = {
|
|
balcony: null,
|
|
parking: null,
|
|
garage: null,
|
|
basementAttic: null,
|
|
storeRoom: null
|
|
};
|
|
|
|
for (const spaceId of spaceIds) {
|
|
switch (spaceId) {
|
|
case 36:
|
|
case 12:
|
|
result.parking = true;
|
|
break;
|
|
case 1:
|
|
case 2:
|
|
case 3:
|
|
result.balcony = true;
|
|
break;
|
|
case 4:
|
|
case 30:
|
|
result.garage = true;
|
|
break;
|
|
case 9:
|
|
case 10:
|
|
result.storeRoom = true;
|
|
break;
|
|
case 18:
|
|
case 34:
|
|
case 37:
|
|
case 27:
|
|
result.basementAttic = true;
|
|
break;
|
|
}
|
|
}
|
|
|
|
return result;
|
|
}
|
|
|
|
getHeatingTypeId(heatingRentalId) {
|
|
// heatingRentalId can have multiple values, like: "1, 2, 3", parseInt will take first integer value
|
|
const heatingId = parseInt(heatingRentalId);
|
|
switch (heatingId) {
|
|
case 27:
|
|
case 16:
|
|
return HEATING_TYPE.GAS.id;
|
|
case 4:
|
|
return HEATING_TYPE.CENTRAL_GAS.id;
|
|
case 3:
|
|
case 23:
|
|
case 6:
|
|
case 7:
|
|
case 8:
|
|
case 9:
|
|
case 10:
|
|
return HEATING_TYPE.CENTRAL_BOILER.id;
|
|
case 2:
|
|
case 13:
|
|
case 30:
|
|
case 17:
|
|
case 29:
|
|
case 31:
|
|
return HEATING_TYPE.ELECTRICITY.id;
|
|
case 24:
|
|
case 25:
|
|
case 12:
|
|
return HEATING_TYPE.CENTRAL_CITY.id;
|
|
case 26:
|
|
case 21:
|
|
case 20:
|
|
return HEATING_TYPE.WOOD.id;
|
|
case 28:
|
|
case 19:
|
|
return HEATING_TYPE.HEAT_PUMP.id;
|
|
case 14:
|
|
case 32:
|
|
return HEATING_TYPE.OTHER.id;
|
|
default:
|
|
return null;
|
|
}
|
|
}
|
|
|
|
getPropertiesFromInfrastructure(infrastructureIds) {
|
|
const result = {
|
|
electricity: null,
|
|
water: null,
|
|
gas: null,
|
|
drainageSystem: null,
|
|
phoneConnection: null,
|
|
internet: null,
|
|
videoSurveillance: null,
|
|
cableTV: null
|
|
};
|
|
|
|
for (const infrastructureId of infrastructureIds) {
|
|
switch (infrastructureId) {
|
|
case 1:
|
|
result.electricity = true;
|
|
break;
|
|
case 2:
|
|
result.water = true;
|
|
break;
|
|
case 4:
|
|
result.gas = true;
|
|
break;
|
|
case 5:
|
|
result.drainageSystem = true;
|
|
break;
|
|
case 7:
|
|
case 8:
|
|
result.phoneConnection = true;
|
|
break;
|
|
case 10:
|
|
result.internet = true;
|
|
break;
|
|
case 11:
|
|
result.cableTV = true;
|
|
break;
|
|
case 16:
|
|
case 17:
|
|
result.videoSurveillance = true;
|
|
break;
|
|
}
|
|
}
|
|
|
|
return result;
|
|
}
|
|
|
|
getFloorNumberFromFloorId(floorsIdText) {
|
|
// floorIdText can be array of numbers, separated by comma or number
|
|
// just extracting floor number from first element
|
|
|
|
const floorsId = floorsIdText.split(",");
|
|
if (floorsId.length === 0) {
|
|
return null;
|
|
}
|
|
|
|
const firstFloorId = parseInt(floorsId[0]);
|
|
|
|
// 1 pod
|
|
// 2 sut
|
|
// 3 raz
|
|
// 4 pri
|
|
// 5 vpri
|
|
// 6 prv
|
|
// 7 dru
|
|
// 8 tre
|
|
// 9 čet
|
|
// 10 man
|
|
// 11
|
|
// 12 pot
|
|
// 13 vpot
|
|
// 14 tav
|
|
// 15 pet
|
|
const floorNumber = [
|
|
-1,
|
|
-1,
|
|
0,
|
|
0,
|
|
1,
|
|
1,
|
|
2,
|
|
3,
|
|
4,
|
|
null,
|
|
null,
|
|
null,
|
|
null,
|
|
null,
|
|
5
|
|
];
|
|
|
|
return floorNumber[firstFloorId - 1] || null;
|
|
}
|
|
|
|
getNumberOfFloorsFromFloorId(floorsIdText) {
|
|
// floorIdText can be array of numbers, separated by comma or number
|
|
|
|
const floorIds = floorsIdText.split(",");
|
|
if (floorIds.length === 0) {
|
|
return null;
|
|
}
|
|
let noOfFloors = floorIds.length;
|
|
// Floors of 'suteren', 'podrum', 'tavan' and 'potkrovlje' are not counted
|
|
floorIds.forEach(id => {
|
|
if (
|
|
parseInt(id) === 1 ||
|
|
parseInt(id) === 2 ||
|
|
parseInt(id) === 12 ||
|
|
parseInt(id) === 14
|
|
) {
|
|
noOfFloors--;
|
|
}
|
|
});
|
|
return noOfFloors;
|
|
}
|
|
|
|
checkBasemAtticFromFloors(floorsIdText) {
|
|
// floorIdText can be array of numbers, separated by comma or number
|
|
const floorIds = floorsIdText.split(",");
|
|
|
|
let check = false;
|
|
|
|
if (floorIds.length === 0) {
|
|
check = false;
|
|
}
|
|
//If floors 'suteren', 'podrum', 'tavan' and 'potkrovlje' exists then tag for basement-attic is true
|
|
floorIds.forEach(id => {
|
|
if (
|
|
parseInt(id) === 1 ||
|
|
parseInt(id) === 2 ||
|
|
parseInt(id) === 12 ||
|
|
parseInt(id) === 14
|
|
) {
|
|
check = true;
|
|
}
|
|
});
|
|
return check;
|
|
}
|
|
|
|
async sleep(ms) {
|
|
return new Promise(resolve => setTimeout(resolve, ms));
|
|
}
|
|
|
|
async saveCrawledResults(results) {
|
|
const savers = this.savers;
|
|
|
|
// for (const saver of savers) {
|
|
// await saver.save(results);
|
|
// }
|
|
|
|
//For now, we use only Postgres saver, so ...
|
|
return savers[0].save(results);
|
|
//so that we can use some sequelize options and information when data is inserted
|
|
}
|
|
}
|
|
|
|
module.exports = RentalCrawler;
|