Saljic crawler changed substring call.

This commit is contained in:
Naida Vatric
2020-02-14 23:42:19 +01:00
parent fc7fe3c0b3
commit addd8c1344

View File

@@ -218,7 +218,7 @@ class SaljicCrawler {
}
async scrapeAd(url, adType) {
// console.log("[SALJIC] Scraping : ", url);
console.log("[SALJIC] Scraping : ", url);
try {
const adPageSource = await fetch(url);
const body = await adPageSource.text();
@@ -227,7 +227,9 @@ class SaljicCrawler {
// No information for status ex. PRODAN
const status = AD_STATUS.STATUS_NORMAL;
//Extracting agency ID from url
const agencyObjectId = parseInt(url.substring(46, url.length));
const agencyObjectId = url
? parseInt(url.substring(46, url.length))
: null;
//Extracting main properties
const propertySelectors = {
@@ -272,14 +274,18 @@ class SaljicCrawler {
.trim();
const latAndLongSrc = $(propertySelectors.latAndLong).attr("src");
const latText = latAndLongSrc.substring(
latAndLongSrc.indexOf("marker=") + 7,
latAndLongSrc.indexOf("%2C", latAndLongSrc.indexOf("marker="))
);
const longText = latAndLongSrc.substring(
latAndLongSrc.indexOf("%2C", latAndLongSrc.indexOf("marker=")) + 3,
latAndLongSrc.length
);
const latText = latAndLongSrc
? latAndLongSrc.substring(
latAndLongSrc.indexOf("marker=") + 7,
latAndLongSrc.indexOf("%2C", latAndLongSrc.indexOf("marker="))
)
: "";
const longText = latAndLongSrc
? latAndLongSrc.substring(
latAndLongSrc.indexOf("%2C", latAndLongSrc.indexOf("marker=")) + 3,
latAndLongSrc.length
)
: "";
const locationLat = parseFloat(latText) || null;
const locationLong = parseFloat(longText) || null;
@@ -328,11 +334,10 @@ class SaljicCrawler {
let numberOfViewsKivi = null;
let streetNumber = 0;
let adStatus = status;
let shortDescription = descriptions.substring(
0,
descriptions.indexOf(".")
);
let longDescription = descriptions;
let shortDescription = descriptions
? descriptions.substring(0, descriptions.indexOf("."))
: "";
let longDescription = descriptions || "";
//Extracting data - Glavne karakteristike
let mainFieldIndex = 1;
do {
@@ -343,10 +348,14 @@ class SaljicCrawler {
.replace(/[\n\r\t]/gm, "")
.trim();
const mainFieldTitle = mainField.substring(0, mainField.indexOf(" "));
const mainFieldTitle = mainField
? mainField.substring(0, mainField.indexOf(" "))
: "";
const mainFieldValue = mainField
.substring(mainField.indexOf(" "), mainField.length)
.trim();
? mainField
.substring(mainField.indexOf(" "), mainField.length)
.trim()
: "";
switch (mainFieldTitle) {
case "Površina":