From fc6351af46f4a303bb33699d155697e05244c11a Mon Sep 17 00:00:00 2001 From: Naida Vatric Date: Fri, 7 Feb 2020 22:12:53 +0100 Subject: [PATCH 1/5] Added columns and logs for types. --- app/crawler/specificCrawlers/saljic.js | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/app/crawler/specificCrawlers/saljic.js b/app/crawler/specificCrawlers/saljic.js index 267dcde..b3a065c 100644 --- a/app/crawler/specificCrawlers/saljic.js +++ b/app/crawler/specificCrawlers/saljic.js @@ -315,6 +315,7 @@ class SaljicCrawler { includingBills = null, animalsAllowed = null, pool = null, + exchange = null, urbanPlanPermit = null, buildingPermit = null, utilityConnection = null, @@ -323,7 +324,7 @@ class SaljicCrawler { let renewedDate = null; let realEstateType; let numberOfViewsAgency = null; - + let numberOfViewsKivi = null; //Extracting data - Glavne karakteristike let mainFieldIndex = 1; do { @@ -537,12 +538,26 @@ class SaljicCrawler { includingBills, animalsAllowed, pool, + exchange, urbanPlanPermit, buildingPermit, utilityConnection, distanceToRiver, - numberOfViewsAgency + numberOfViewsAgency, + numberOfViewsKivi }; + // + console.log("Type of price:", typeof price); + console.log("Type of area:", typeof area); + console.log("Type of gardenSize:", typeof gardenSize); + console.log("Type of streetNumber:", typeof streetNumber); + console.log("Type of adStatus:", typeof adStatus); + console.log("Type of numberOfRooms:", typeof numberOfRooms); + console.log("Type of numberOfFloors:", typeof numberOfFloors); + console.log("Type of floor:", typeof floor); + console.log("Type of numberOfViewsAgency:", typeof numberOfViewsAgency); + console.log("Type of numberOfViewsKivi:", typeof numberOfViewsKivi); + return data; } catch (e) { console.error("Exception caught: " + e.message, "\r\nURL:", url); From 43bc23b1647a3a603f238dfe3049dc716a963977 Mon Sep 17 00:00:00 2001 From: Naida Vatric Date: Fri, 7 Feb 2020 22:27:01 +0100 Subject: [PATCH 2/5] Another fix. Defined more var. --- app/crawler/specificCrawlers/saljic.js | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/app/crawler/specificCrawlers/saljic.js b/app/crawler/specificCrawlers/saljic.js index b3a065c..7e445d6 100644 --- a/app/crawler/specificCrawlers/saljic.js +++ b/app/crawler/specificCrawlers/saljic.js @@ -283,8 +283,8 @@ class SaljicCrawler { const locationLong = parseFloat(longText) || null; //====== DETAIL INFORMATION FIELDS ========== - let area, - gardenSize, + let area = null, + gardenSize = null, numberOfRooms = null, numberOfFloors = null, floor = null, @@ -325,6 +325,8 @@ class SaljicCrawler { let realEstateType; let numberOfViewsAgency = null; let numberOfViewsKivi = null; + let streetNumber = 0; + let adStatus = status; //Extracting data - Glavne karakteristike let mainFieldIndex = 1; do { @@ -495,7 +497,7 @@ class SaljicCrawler { gardenSize, shortDescription: descriptions.substring(0, descriptions.indexOf(".")), longDescription: descriptions, - streetNumber: 0, + streetNumber, streetName, locality: "", municipality: "", @@ -505,7 +507,7 @@ class SaljicCrawler { country: "", locationLat, locationLong, - adStatus: status, + adStatus, publishedDate, renewedDate, numberOfRooms, From 8a87b9e2539bdaae38e056a8cf46626df2751731 Mon Sep 17 00:00:00 2001 From: Naida Vatric Date: Sat, 8 Feb 2020 00:27:26 +0100 Subject: [PATCH 3/5] Another fix. --- app/crawler/specificCrawlers/saljic.js | 31 ++++++++++++++++++-------- 1 file changed, 22 insertions(+), 9 deletions(-) diff --git a/app/crawler/specificCrawlers/saljic.js b/app/crawler/specificCrawlers/saljic.js index 7e445d6..b580c9c 100644 --- a/app/crawler/specificCrawlers/saljic.js +++ b/app/crawler/specificCrawlers/saljic.js @@ -327,6 +327,11 @@ class SaljicCrawler { let numberOfViewsKivi = null; let streetNumber = 0; let adStatus = status; + let shortDescription = descriptions.substring( + 0, + descriptions.indexOf(".") + ); + let longDescription = descriptions; //Extracting data - Glavne karakteristike let mainFieldIndex = 1; do { @@ -485,26 +490,34 @@ class SaljicCrawler { renewedDate = new Date(); } + const originAgencyName = AD_AGENCY.SALJIC; + const locality = ""; + const municipality = ""; + const city = ""; + const region = ""; + const entity = ""; + const country = ""; + const data = { url, agencyObjectId, - originAgencyName: AD_AGENCY.SALJIC, + originAgencyName, realEstateType, adType, title, price, area, gardenSize, - shortDescription: descriptions.substring(0, descriptions.indexOf(".")), - longDescription: descriptions, + shortDescription, + longDescription, streetNumber, streetName, - locality: "", - municipality: "", - city: "", - region: "", - entity: "", - country: "", + locality, + municipality, + city, + region, + entity, + country, locationLat, locationLong, adStatus, From be378883c8cb1e2fd0058c85b1ec06c9fceaaa01 Mon Sep 17 00:00:00 2001 From: Naida Vatric Date: Sat, 8 Feb 2020 00:47:00 +0100 Subject: [PATCH 4/5] Just another fix try. --- app/crawler/specificCrawlers/saljic.js | 1 + 1 file changed, 1 insertion(+) diff --git a/app/crawler/specificCrawlers/saljic.js b/app/crawler/specificCrawlers/saljic.js index b580c9c..66c9192 100644 --- a/app/crawler/specificCrawlers/saljic.js +++ b/app/crawler/specificCrawlers/saljic.js @@ -268,6 +268,7 @@ class SaljicCrawler { const descriptions = $(propertySelectors.descriptions) .text() + .replace(/\"/g, "") .trim(); const latAndLongSrc = $(propertySelectors.latAndLong).attr("src"); From b82134e2800565d298556073d9720a79236080d9 Mon Sep 17 00:00:00 2001 From: Naida Vatric Date: Sun, 9 Feb 2020 19:09:00 +0100 Subject: [PATCH 5/5] Fixed saljic bug for heroku. --- app/crawler/specificCrawlers/saljic.js | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/app/crawler/specificCrawlers/saljic.js b/app/crawler/specificCrawlers/saljic.js index 66c9192..2eef7cc 100644 --- a/app/crawler/specificCrawlers/saljic.js +++ b/app/crawler/specificCrawlers/saljic.js @@ -562,17 +562,6 @@ class SaljicCrawler { numberOfViewsAgency, numberOfViewsKivi }; - // - console.log("Type of price:", typeof price); - console.log("Type of area:", typeof area); - console.log("Type of gardenSize:", typeof gardenSize); - console.log("Type of streetNumber:", typeof streetNumber); - console.log("Type of adStatus:", typeof adStatus); - console.log("Type of numberOfRooms:", typeof numberOfRooms); - console.log("Type of numberOfFloors:", typeof numberOfFloors); - console.log("Type of floor:", typeof floor); - console.log("Type of numberOfViewsAgency:", typeof numberOfViewsAgency); - console.log("Type of numberOfViewsKivi:", typeof numberOfViewsKivi); return data; } catch (e) {