686 lines
20 KiB
JavaScript
686 lines
20 KiB
JavaScript
"use strict";
|
|
|
|
const fetch = require("node-fetch");
|
|
const cheerio = require("cheerio");
|
|
const moment = require("moment-timezone");
|
|
|
|
const {
|
|
AD_TYPE,
|
|
AD_CATEGORY,
|
|
AD_AGENCY,
|
|
AD_STATUS,
|
|
CRAWLER_AD_TYPE,
|
|
FURNISHING_TYPE,
|
|
HEATING_TYPE
|
|
} = require("../../common/enums");
|
|
|
|
const {
|
|
PRINT_CRAWLER_DEBUG,
|
|
DEFAULT_TIMEZONE
|
|
} = require("../../config/appConfig");
|
|
const { SALJIC_FORCE_CRAWL } = require("../specificConfigs/saljic");
|
|
|
|
const SALJIC_ENUMS = {
|
|
SALJIC_AD_TYPE: {
|
|
[CRAWLER_AD_TYPE.ALL]: "&input_vrsta=",
|
|
[CRAWLER_AD_TYPE.ONLY_SELL]: "&input_vrsta=1",
|
|
[CRAWLER_AD_TYPE.ONLY_RENT]: "&input_vrsta=2"
|
|
},
|
|
SALJIC_AD_CATEGORY: {
|
|
[AD_CATEGORY.ALL.id]: "&input_kategorija=",
|
|
[AD_CATEGORY.FLAT.id]: "&input_kategorija=15",
|
|
[AD_CATEGORY.HOUSE.id]: "&input_kategorija=9",
|
|
[AD_CATEGORY.LAND.id]: "&input_kategorija=5", //3 and 4 also gradjevinsko
|
|
[AD_CATEGORY.OFFICE.id]: "&input_kategorija=8",
|
|
[AD_CATEGORY.APARTMENT.id]: "&input_kategorija=1",
|
|
[AD_CATEGORY.GARAGE.id]: "&input_kategorija=2"
|
|
//[AD_CATEGORY.COTTAGE.id]: ""
|
|
}
|
|
};
|
|
|
|
class SaljicCrawler {
|
|
constructor(
|
|
savers = [],
|
|
crawlerAdTypes = CRAWLER_AD_TYPE.ALL,
|
|
crawlerAdCategories = [AD_CATEGORY.FLAT, AD_CATEGORY.HOUSE],
|
|
maxPages = 5000,
|
|
maxResultsPerPage = 5000,
|
|
ignoredUsernames = [],
|
|
delayBetweenPages = 1000
|
|
) {
|
|
this.savers = savers;
|
|
this.baseUrl = "https://www.saljicnekretnine.ba/v2/nekretnine_search";
|
|
this.crawlerAdTypes = crawlerAdTypes;
|
|
this.crawlerAdCategories = crawlerAdCategories;
|
|
this.maxResultsPerPage = maxResultsPerPage;
|
|
this.delayBetweenPages = delayBetweenPages;
|
|
}
|
|
|
|
async crawl() {
|
|
const crawlAdCategories = this.crawlerAdCategories;
|
|
|
|
const newRealEstates = [];
|
|
|
|
if (crawlAdCategories) {
|
|
const indexGenerators = [];
|
|
for (const adCategory of crawlAdCategories) {
|
|
indexGenerators.push(this.categoryIndexer(adCategory));
|
|
}
|
|
//
|
|
//console.log(indexGenerators);
|
|
//
|
|
let done = false;
|
|
while (!done) {
|
|
const categoryIndexerPromises = [];
|
|
const generatorsToRemove = [];
|
|
for (const indexGenerator of indexGenerators) {
|
|
categoryIndexerPromises.push(indexGenerator.next());
|
|
generatorsToRemove.push(false);
|
|
}
|
|
|
|
const singlePageResults = await Promise.all(categoryIndexerPromises);
|
|
const entries = singlePageResults.entries();
|
|
|
|
for (const [index, { value: singlePageResult }] of entries) {
|
|
if (singlePageResult) {
|
|
const saveResults = await this.saveCrawledResults(singlePageResult);
|
|
const { newRecords } = saveResults;
|
|
|
|
newRealEstates.push(...newRecords);
|
|
|
|
if (
|
|
Array.isArray(newRecords) &&
|
|
newRecords.length === 0 &&
|
|
!SALJIC_FORCE_CRAWL
|
|
) {
|
|
generatorsToRemove[index] = true;
|
|
}
|
|
} else {
|
|
//Generator returned undefined, remove this generator from array
|
|
generatorsToRemove[index] = true;
|
|
// console.log("Generator ", index + 1, "has no more pages");
|
|
}
|
|
}
|
|
|
|
// console.log("Generators state : ", generatorsToRemove);
|
|
for (let i = generatorsToRemove.length - 1; i >= 0; i--) {
|
|
if (generatorsToRemove[i]) {
|
|
// console.log("\tRemove generator ", i + 1);
|
|
indexGenerators.splice(i, 1);
|
|
}
|
|
}
|
|
if (indexGenerators.length === 0) {
|
|
done = true;
|
|
}
|
|
|
|
await this.sleep(this.delayBetweenPages);
|
|
}
|
|
}
|
|
return newRealEstates;
|
|
}
|
|
|
|
async *categoryIndexer(adCategory) {
|
|
let pageToIndex = 1;
|
|
|
|
const urlAdTypePart = SALJIC_ENUMS.SALJIC_AD_TYPE[this.crawlerAdTypes];
|
|
const urlCategoryPart = SALJIC_ENUMS.SALJIC_AD_CATEGORY[adCategory];
|
|
|
|
if (urlAdTypePart !== undefined && urlCategoryPart !== undefined) {
|
|
while (true) {
|
|
const urlPagePart = pageToIndex === 1 ? "" : (pageToIndex - 1) * 2 * 11;
|
|
const urlPageToCrawl = `${this.baseUrl}?order_by=${urlAdTypePart}${urlCategoryPart}&per_page=${urlPagePart}`;
|
|
|
|
const singlePageResults = await this.indexSinglePage(
|
|
urlPageToCrawl,
|
|
this.maxResultsPerPage
|
|
);
|
|
|
|
if (Array.isArray(singlePageResults) && singlePageResults.length > 0) {
|
|
yield singlePageResults;
|
|
} else {
|
|
return undefined;
|
|
}
|
|
|
|
++pageToIndex;
|
|
if (pageToIndex === this.maxPages) {
|
|
return undefined;
|
|
}
|
|
}
|
|
} else {
|
|
return undefined;
|
|
}
|
|
}
|
|
|
|
async indexSinglePage(url, maxResultsPerPage) {
|
|
if (PRINT_CRAWLER_DEBUG) {
|
|
console.log("[SALJIC] Index page : ", url);
|
|
}
|
|
|
|
try {
|
|
const res = await fetch(url);
|
|
const body = await res.text();
|
|
const $ = cheerio.load(body);
|
|
let hrefs = [];
|
|
|
|
$("#shop")
|
|
.find(".product")
|
|
.each((i, elem) => {
|
|
const href = $(elem)
|
|
.find("a")
|
|
.first()
|
|
.attr("href");
|
|
if (href) {
|
|
hrefs.push(href);
|
|
}
|
|
});
|
|
|
|
//Converting to absolute URLs
|
|
const hrefsAbs = hrefs.map(link => {
|
|
return "https://www.saljicnekretnine.ba" + link;
|
|
});
|
|
|
|
let actualNoOfResults =
|
|
hrefsAbs.length <= maxResultsPerPage
|
|
? hrefsAbs.length
|
|
: maxResultsPerPage;
|
|
|
|
const asyncScraping = [];
|
|
for (let i = 0; i < actualNoOfResults; i++) {
|
|
asyncScraping.push(this.scrapeAd(hrefsAbs[i]));
|
|
}
|
|
|
|
const scrapedData = await Promise.all(asyncScraping);
|
|
const filteredScrapedData = scrapedData.filter(adData => !!adData);
|
|
return filteredScrapedData;
|
|
} catch (e) {
|
|
console.error("[SALJIC] Exception caught:" + e);
|
|
return [];
|
|
}
|
|
}
|
|
|
|
async scrapeAd(url) {
|
|
console.log("[SALJIC] Scraping : ", url);
|
|
try {
|
|
const adPageSource = await fetch(url);
|
|
const body = await adPageSource.text();
|
|
const $ = cheerio.load(body);
|
|
|
|
// ??? treba li nesto za status
|
|
let status = AD_STATUS.STATUS_NORMAL;
|
|
|
|
const propertySelectors = {
|
|
title:
|
|
"div.content-wrap > div.container > div.col-md-8.nobottommargin > div.single-post > div.entry > div.entry-title > h2",
|
|
price:
|
|
"div.content-wrap > div.container > div.col-md-8.nobottommargin > div.single-post > div.entry > div.topmargin-sm.single-product > div.product > div.product-price > ins",
|
|
streetName:
|
|
"div.content-wrap > div.container > div.col-md-8.nobottommargin > div.single-post > div.entry > div.entry-content.topmargin > p",
|
|
|
|
descriptions:
|
|
"div.content-wrap > div.container > div.col-md-8.nobottommargin > div.single-post > div.entry > div.entry-content.topmargin > div.toggle.toggle-bg > div.togglec >p:nth-child(1)",
|
|
latAndLong:
|
|
"div.content-wrap > div.container > div.col-md-8.nobottommargin > div.single-post > div.entry > div.entry-content.topmargin > div.gmap.bottommargin > iframe"
|
|
};
|
|
const title = $(propertySelectors.title)
|
|
.text()
|
|
.replace(/(\r\n|\n|\r)/gm, "")
|
|
.replace(/ {1,}/g, " ")
|
|
.trim();
|
|
|
|
console.log("Title:", title);
|
|
const priceText = $(propertySelectors.price)
|
|
.text()
|
|
.replace(/(\r\n|\n|\r)/gm, "")
|
|
.replace(/ {1,}/g, " ")
|
|
.trim();
|
|
const price =
|
|
priceText === "CIJENA NA UPIT"
|
|
? null
|
|
: parseFloat(
|
|
priceText.substring(8, priceText.length - 3).replace(",", "")
|
|
);
|
|
|
|
console.log("Price:", price);
|
|
|
|
const streetName = $(propertySelectors.streetName)
|
|
.text()
|
|
.replace(/(\r\n|\n|\r)/gm, "")
|
|
.trim();
|
|
console.log("Street:", streetName);
|
|
|
|
const descriptions = $(propertySelectors.descriptions)
|
|
.text()
|
|
.trim();
|
|
console.log("Description:", descriptions);
|
|
|
|
const latAndLongSrc = $(propertySelectors.latAndLong).attr("src");
|
|
const latText = latAndLongSrc.substring(
|
|
latAndLongSrc.indexOf("marker=") + 7,
|
|
latAndLongSrc.indexOf("%2C", latAndLongSrc.indexOf("marker="))
|
|
);
|
|
const longText = latAndLongSrc.substring(
|
|
latAndLongSrc.indexOf("%2C", latAndLongSrc.indexOf("marker=")) + 3,
|
|
latAndLongSrc.length
|
|
);
|
|
const locationLat = parseFloat(latText) || null;
|
|
const locationLong = parseFloat(longText) || null;
|
|
console.log("Lat:", locationLat);
|
|
console.log("Long:", locationLong);
|
|
|
|
//====== DETAIL INFORMATION FIELDS ==========
|
|
let area,
|
|
gardenSize,
|
|
numberOfRooms = null,
|
|
numberOfFloors = null,
|
|
floor = null,
|
|
accessRoadType = null,
|
|
heatingType = null,
|
|
furnishingType = null,
|
|
balcony = null,
|
|
newBuilding = null,
|
|
elevator = null,
|
|
water = null,
|
|
electricity = null,
|
|
drainageSystem = null,
|
|
registeredInZkBooks = null,
|
|
recentlyAdapted = null,
|
|
parking = null,
|
|
garage = null,
|
|
gas = null,
|
|
antiTheftDoor = null,
|
|
airCondition = null,
|
|
phoneConnection = null,
|
|
cableTV = null,
|
|
internet = null,
|
|
basementAttic = null,
|
|
storeRoom = null,
|
|
videoSurveillance = null,
|
|
alarm = null,
|
|
suitableForStudents = null,
|
|
includingBills = null,
|
|
animalsAllowed = null,
|
|
pool = null,
|
|
urbanPlanPermit = null,
|
|
buildingPermit = null,
|
|
utilityConnection = null,
|
|
distanceToRiver = null;
|
|
let publishedDate = null;
|
|
let renewedDate = null;
|
|
|
|
//Extracting data - Glavne karakteristike
|
|
let mainFieldIndex = 1;
|
|
do {
|
|
const mainFieldSelector = `div.content-wrap > div.container > div.col-md-8.nobottommargin > div.single-post > div.entry > div.entry-content.topmargin > div.col-md-12.bottommargin > ul > li.list-group-item:nth-child(${mainFieldIndex})`;
|
|
|
|
const mainField = $(mainFieldSelector)
|
|
.text()
|
|
.replace(/[\n\r\t]/gm, "")
|
|
.trim();
|
|
|
|
const mainFieldTitle = mainField.substring(0, mainField.indexOf(" "));
|
|
const mainFieldValue = mainField
|
|
.substring(mainField.indexOf(" "), mainField.length)
|
|
.trim();
|
|
|
|
switch (mainFieldTitle) {
|
|
case "Površina":
|
|
area = parseFloat(
|
|
mainFieldValue.substring(0, mainFieldValue.indexOf(" "))
|
|
);
|
|
break;
|
|
case "Okućnica":
|
|
gardenSize = parseFloat(
|
|
mainFieldValue.substring(0, mainFieldValue.indexOf(" "))
|
|
);
|
|
break;
|
|
case "Broj soba":
|
|
numberOfRooms = parseInt(mainFieldValue);
|
|
break;
|
|
case "Broj spratova":
|
|
numberOfFloors = parseInt(mainFieldValue);
|
|
break;
|
|
case "Sprat":
|
|
floor = parseInt(mainFieldValue);
|
|
break;
|
|
case "Godina renoviranja":
|
|
recentlyAdapted = true;
|
|
break;
|
|
case "Broj parking mjesta":
|
|
`${month}/${day}/${year}`;
|
|
parking = true;
|
|
break;
|
|
case "Dostupno od":
|
|
const day = mainFieldValue.substring(0, 2);
|
|
const month = mainFieldValue.substring(3, 5);
|
|
const year = mainFieldValue.substring(6, mainFieldValue.length);
|
|
console.log(`${month}/${day}/${year}`);
|
|
publishedDate = new Date(`${month}/${day}/${year}`);
|
|
break;
|
|
default:
|
|
// console.log(fieldTitle, " = ", fieldValue);
|
|
break;
|
|
}
|
|
|
|
if (mainFieldTitle === "") {
|
|
break;
|
|
}
|
|
mainFieldIndex++;
|
|
} while (true);
|
|
|
|
console.log("Area:", area);
|
|
console.log("Garden size:", gardenSize);
|
|
console.log("Number of rooms:", numberOfRooms);
|
|
console.log("Number of floors", numberOfFloors);
|
|
console.log("Floor:", floor);
|
|
console.log("Adapted:", recentlyAdapted);
|
|
console.log("Parking:", parking);
|
|
console.log("Published date:", publishedDate);
|
|
|
|
//const category = $(propertySelectors.category)
|
|
//.text()
|
|
//.trim();
|
|
|
|
const data = {
|
|
url,
|
|
agencyObjectId: olxId,
|
|
originAgencyName: AD_AGENCY.OLX,
|
|
realEstateType: parsedCategory,
|
|
adType: parsedAdType,
|
|
title,
|
|
price: parsedPrice,
|
|
area: parsedArea,
|
|
gardenSize: parsedGardenSize,
|
|
shortDescription: descriptions
|
|
.first()
|
|
.text()
|
|
.trim(),
|
|
longDescription: descriptions
|
|
.last()
|
|
.text()
|
|
.trim(),
|
|
streetNumber: 0,
|
|
streetName: "",
|
|
locality: "",
|
|
municipality: "",
|
|
city: "",
|
|
region: "",
|
|
entity: "",
|
|
country: "",
|
|
locationLat,
|
|
locationLong,
|
|
adStatus: status,
|
|
publishedDate: publishedDateMoment.toISOString(),
|
|
renewedDate: renewedDateMoment.toISOString(),
|
|
numberOfRooms,
|
|
numberOfFloors,
|
|
floor,
|
|
accessRoadType,
|
|
heatingType,
|
|
furnishingType,
|
|
balcony,
|
|
newBuilding,
|
|
elevator,
|
|
water,
|
|
electricity,
|
|
drainageSystem,
|
|
registeredInZkBooks,
|
|
recentlyAdapted,
|
|
parking,
|
|
garage,
|
|
gas,
|
|
antiTheftDoor,
|
|
airCondition,
|
|
phoneConnection,
|
|
cableTV,
|
|
internet,
|
|
basementAttic,
|
|
storeRoom,
|
|
videoSurveillance,
|
|
alarm,
|
|
suitableForStudents,
|
|
includingBills,
|
|
animalsAllowed,
|
|
pool,
|
|
urbanPlanPermit,
|
|
buildingPermit,
|
|
utilityConnection,
|
|
distanceToRiver,
|
|
numberOfViewsAgency
|
|
};
|
|
|
|
return data;
|
|
} catch (e) {
|
|
console.error("Exception caught: " + e.message, "\r\nURL:", url);
|
|
}
|
|
return null;
|
|
}
|
|
|
|
//======= HELPER FUNCTIONS =============
|
|
|
|
getAdCategoryId(categoryText) {
|
|
switch (categoryText) {
|
|
case "Stanovi":
|
|
return AD_CATEGORY.FLAT.id;
|
|
case "Zemljišta":
|
|
return AD_CATEGORY.LAND.id;
|
|
case "Kuće":
|
|
return AD_CATEGORY.HOUSE.id;
|
|
case "Poslovni prostori":
|
|
return AD_CATEGORY.OFFICE.id;
|
|
case "Apartmani":
|
|
return AD_CATEGORY.APARTMENT.id;
|
|
case "Garaže":
|
|
return AD_CATEGORY.GARAGE.id;
|
|
case "Vikendice":
|
|
return AD_CATEGORY.COTTAGE.id;
|
|
default:
|
|
return undefined;
|
|
}
|
|
}
|
|
|
|
getAdTypeId(adTypeText) {
|
|
switch (adTypeText) {
|
|
case "Prodaja":
|
|
return AD_TYPE.AD_TYPE_SALE.stringId;
|
|
case "Izdavanje":
|
|
return AD_TYPE.AD_TYPE_RENT.stringId;
|
|
case "Potražnja":
|
|
return AD_TYPE.AD_TYPE_REQUEST.stringId;
|
|
default:
|
|
return undefined;
|
|
}
|
|
}
|
|
|
|
getHeatingTypeId(heatingTypeText) {
|
|
switch (heatingTypeText) {
|
|
case "struja":
|
|
return HEATING_TYPE.ELECTRICITY.id;
|
|
case "plin":
|
|
return HEATING_TYPE.GAS.id;
|
|
case "drva":
|
|
return HEATING_TYPE.WOOD.id;
|
|
case "centralno (gradsko)":
|
|
return HEATING_TYPE.CENTRAL_CITY.id;
|
|
case "centralno (kotlovnica)":
|
|
return HEATING_TYPE.CENTRAL_BOILER.id;
|
|
case "centralno (plin)":
|
|
return HEATING_TYPE.CENTRAL_GAS.id;
|
|
case "nije uvedeno":
|
|
return HEATING_TYPE.NO_HEATING.id;
|
|
case "ostalo":
|
|
return HEATING_TYPE.OTHER.id;
|
|
case "drugo":
|
|
return HEATING_TYPE.OTHER.id;
|
|
default:
|
|
console.log("grijanje = NEPOZNATO [", heatingTypeText, "]");
|
|
return null;
|
|
}
|
|
}
|
|
|
|
getFurnishingTypeId(furnishingTypeText) {
|
|
switch (furnishingTypeText) {
|
|
case "namješten":
|
|
return FURNISHING_TYPE.FURNISHED.id;
|
|
case "polunamješten":
|
|
return FURNISHING_TYPE.HALF_FURNISHED.id;
|
|
case "nenamješten":
|
|
return FURNISHING_TYPE.NOT_FURNISHED.id;
|
|
case "":
|
|
return FURNISHING_TYPE.FURNISHED.id;
|
|
default:
|
|
console.log("namješten = NEPOZNATO [", furnishingTypeText, "]");
|
|
return null;
|
|
}
|
|
}
|
|
|
|
getAccessRoadTypeId(accessRoadTypeText) {
|
|
switch (accessRoadTypeText) {
|
|
case "asfalt":
|
|
return ACCESS_ROAD_TYPE.ASPHALT.id;
|
|
case "beton":
|
|
return ACCESS_ROAD_TYPE.CONCRETE.id;
|
|
case "makadam":
|
|
return ACCESS_ROAD_TYPE.MACADAM.id;
|
|
case "ostalo":
|
|
return ACCESS_ROAD_TYPE.OTHER.id;
|
|
default:
|
|
console.log("pristup = NEPOZNATO [", accessRoadTypeText, "]");
|
|
return null;
|
|
}
|
|
}
|
|
|
|
parseArea(areaText) {
|
|
if (!areaText) {
|
|
return NaN;
|
|
}
|
|
const removeDotsExceptLastOneRegex = /[.](?=.*[.])/g;
|
|
const textWithOnlyOneDecimalDot = areaText
|
|
.replace(",", ".")
|
|
.replace(removeDotsExceptLastOneRegex, "");
|
|
|
|
return parseFloat(textWithOnlyOneDecimalDot);
|
|
}
|
|
|
|
parsePrice(priceText) {
|
|
if (!priceText) {
|
|
return NaN;
|
|
}
|
|
const formattedPriceText = priceText.replace(".", "").replace(",", ".");
|
|
return parseFloat(formattedPriceText);
|
|
}
|
|
|
|
parseNumberOfRooms(numberOfRoomsText, categoryId) {
|
|
if (categoryId === AD_CATEGORY.FLAT.id) {
|
|
switch (numberOfRoomsText) {
|
|
case "garsonjera":
|
|
return 0;
|
|
case "jednosoban (1)":
|
|
return 1;
|
|
case "jednoiposoban (1.5)":
|
|
return 1.5;
|
|
case "dvosoban (2)":
|
|
return 2;
|
|
case "trosoban (3)":
|
|
return 3;
|
|
case "četverosoban (4)":
|
|
return 4;
|
|
case "petosoban i više":
|
|
return 5;
|
|
default:
|
|
console.log(
|
|
"broj soba [stan] = NEPOZNATO [",
|
|
numberOfRoomsText,
|
|
", ",
|
|
categoryId,
|
|
"]"
|
|
);
|
|
return null;
|
|
}
|
|
}
|
|
|
|
if (
|
|
categoryId === AD_CATEGORY.HOUSE.id ||
|
|
categoryId === AD_CATEGORY.COTTAGE.id ||
|
|
categoryId === AD_CATEGORY.APARTMENT.id ||
|
|
categoryId === AD_CATEGORY.OFFICE.id
|
|
) {
|
|
return parseInt(numberOfRoomsText) || null;
|
|
}
|
|
|
|
console.log("broj soba = NEPOZNATO [", numberOfRoomsText, "]");
|
|
return null;
|
|
}
|
|
|
|
parseNumberOfFloors(numberOfFloorsText, categoryId) {
|
|
if (
|
|
categoryId === AD_CATEGORY.HOUSE.id ||
|
|
categoryId === AD_CATEGORY.COTTAGE.id
|
|
) {
|
|
return parseInt(numberOfFloorsText) || null;
|
|
}
|
|
|
|
if (categoryId === AD_CATEGORY.OFFICE.id) {
|
|
if (
|
|
numberOfFloorsText === "suteren" ||
|
|
numberOfFloorsText === "prizemlje"
|
|
) {
|
|
return 0;
|
|
}
|
|
if (numberOfFloorsText === "6+") {
|
|
return 7;
|
|
}
|
|
return parseInt(numberOfFloorsText) || null;
|
|
}
|
|
|
|
console.log("broj spratova = NEPOZNATO [", numberOfFloorsText, "]");
|
|
return null;
|
|
}
|
|
|
|
parseFloorNumber(floorText, categoryId) {
|
|
if (
|
|
categoryId === AD_CATEGORY.FLAT.id ||
|
|
categoryId === AD_CATEGORY.APARTMENT.id
|
|
) {
|
|
if (
|
|
floorText === "suteren" ||
|
|
floorText === "prizemlje" ||
|
|
floorText === "visoko prizemlje"
|
|
) {
|
|
return 0;
|
|
}
|
|
return parseInt(floorText) || null;
|
|
}
|
|
|
|
if (categoryId === AD_CATEGORY.OFFICE.id) {
|
|
if (floorText === "zaseban objekat") {
|
|
return null;
|
|
}
|
|
if (floorText === "prizemlje" || floorText === "visoko prizemlje") {
|
|
return 0;
|
|
}
|
|
return parseInt(floorText) || null;
|
|
}
|
|
|
|
console.log("sprat = NEPOZNATO [", floorText, "]");
|
|
return null;
|
|
}
|
|
|
|
async sleep(ms) {
|
|
return new Promise(resolve => setTimeout(resolve, ms));
|
|
}
|
|
|
|
async saveCrawledResults(results) {
|
|
const savers = this.savers;
|
|
|
|
// for (const saver of savers) {
|
|
// await saver.save(results);
|
|
// }
|
|
|
|
//For now, we use only Postgres saver, so ...
|
|
return savers[0].save(results);
|
|
//so that we can use some sequelize options and information when data is inserted
|
|
}
|
|
}
|
|
|
|
module.exports = SaljicCrawler;
|