From addd8c134490bad63557a8762a93a8810f230e9c Mon Sep 17 00:00:00 2001 From: Naida Vatric Date: Fri, 14 Feb 2020 23:42:19 +0100 Subject: [PATCH 1/2] Saljic crawler changed substring call. --- app/crawler/specificCrawlers/saljic.js | 45 +++++++++++++++----------- 1 file changed, 27 insertions(+), 18 deletions(-) diff --git a/app/crawler/specificCrawlers/saljic.js b/app/crawler/specificCrawlers/saljic.js index 2eef7cc..b108671 100644 --- a/app/crawler/specificCrawlers/saljic.js +++ b/app/crawler/specificCrawlers/saljic.js @@ -218,7 +218,7 @@ class SaljicCrawler { } async scrapeAd(url, adType) { - // console.log("[SALJIC] Scraping : ", url); + console.log("[SALJIC] Scraping : ", url); try { const adPageSource = await fetch(url); const body = await adPageSource.text(); @@ -227,7 +227,9 @@ class SaljicCrawler { // No information for status ex. PRODAN const status = AD_STATUS.STATUS_NORMAL; //Extracting agency ID from url - const agencyObjectId = parseInt(url.substring(46, url.length)); + const agencyObjectId = url + ? parseInt(url.substring(46, url.length)) + : null; //Extracting main properties const propertySelectors = { @@ -272,14 +274,18 @@ class SaljicCrawler { .trim(); const latAndLongSrc = $(propertySelectors.latAndLong).attr("src"); - const latText = latAndLongSrc.substring( - latAndLongSrc.indexOf("marker=") + 7, - latAndLongSrc.indexOf("%2C", latAndLongSrc.indexOf("marker=")) - ); - const longText = latAndLongSrc.substring( - latAndLongSrc.indexOf("%2C", latAndLongSrc.indexOf("marker=")) + 3, - latAndLongSrc.length - ); + const latText = latAndLongSrc + ? latAndLongSrc.substring( + latAndLongSrc.indexOf("marker=") + 7, + latAndLongSrc.indexOf("%2C", latAndLongSrc.indexOf("marker=")) + ) + : ""; + const longText = latAndLongSrc + ? latAndLongSrc.substring( + latAndLongSrc.indexOf("%2C", latAndLongSrc.indexOf("marker=")) + 3, + latAndLongSrc.length + ) + : ""; const locationLat = parseFloat(latText) || null; const locationLong = parseFloat(longText) || null; @@ -328,11 +334,10 @@ class SaljicCrawler { let numberOfViewsKivi = null; let streetNumber = 0; let adStatus = status; - let shortDescription = descriptions.substring( - 0, - descriptions.indexOf(".") - ); - let longDescription = descriptions; + let shortDescription = descriptions + ? descriptions.substring(0, descriptions.indexOf(".")) + : ""; + let longDescription = descriptions || ""; //Extracting data - Glavne karakteristike let mainFieldIndex = 1; do { @@ -343,10 +348,14 @@ class SaljicCrawler { .replace(/[\n\r\t]/gm, "") .trim(); - const mainFieldTitle = mainField.substring(0, mainField.indexOf(" ")); + const mainFieldTitle = mainField + ? mainField.substring(0, mainField.indexOf(" ")) + : ""; const mainFieldValue = mainField - .substring(mainField.indexOf(" "), mainField.length) - .trim(); + ? mainField + .substring(mainField.indexOf(" "), mainField.length) + .trim() + : ""; switch (mainFieldTitle) { case "Površina": -- 2.47.3 From f56cd5b54967c2d83de8e23dea87d653d35ebaad Mon Sep 17 00:00:00 2001 From: Naida Vatric Date: Mon, 17 Feb 2020 21:55:24 +0100 Subject: [PATCH 2/2] More elegant scrape of lat and long. --- app/crawler/specificCrawlers/saljic.js | 16 ++++------------ 1 file changed, 4 insertions(+), 12 deletions(-) diff --git a/app/crawler/specificCrawlers/saljic.js b/app/crawler/specificCrawlers/saljic.js index b108671..c8d1f3c 100644 --- a/app/crawler/specificCrawlers/saljic.js +++ b/app/crawler/specificCrawlers/saljic.js @@ -274,18 +274,10 @@ class SaljicCrawler { .trim(); const latAndLongSrc = $(propertySelectors.latAndLong).attr("src"); - const latText = latAndLongSrc - ? latAndLongSrc.substring( - latAndLongSrc.indexOf("marker=") + 7, - latAndLongSrc.indexOf("%2C", latAndLongSrc.indexOf("marker=")) - ) - : ""; - const longText = latAndLongSrc - ? latAndLongSrc.substring( - latAndLongSrc.indexOf("%2C", latAndLongSrc.indexOf("marker=")) + 3, - latAndLongSrc.length - ) - : ""; + const tmpLatLong = latAndLongSrc.split("marker=")[1]; + const latText = tmpLatLong.split("%2C")[0]; + const longText = tmpLatLong.split("%2C")[1]; + const locationLat = parseFloat(latText) || null; const locationLong = parseFloat(longText) || null; -- 2.47.3