From fed2dc00dc4d81d39b3c4c5fd06bb8ee6517c89f Mon Sep 17 00:00:00 2001 From: Naida Vatric Date: Sun, 29 Dec 2019 23:42:39 +0100 Subject: [PATCH 1/2] Changed number of rooms. --- .gitignore | 2 ++ app/crawler/specificCrawlers/rental.js | 13 ++++++------- test/rentalScrapeTest.js | 2 +- 3 files changed, 9 insertions(+), 8 deletions(-) diff --git a/.gitignore b/.gitignore index b24fc61..02b0461 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,5 @@ node_modules/ .env .idea/ +.eslintrc +.vscode/ diff --git a/app/crawler/specificCrawlers/rental.js b/app/crawler/specificCrawlers/rental.js index 8f38dc8..f646b92 100644 --- a/app/crawler/specificCrawlers/rental.js +++ b/app/crawler/specificCrawlers/rental.js @@ -218,6 +218,9 @@ class RentalCrawler { const jsonData = scriptElement[0].children[0].data.substring(20); const parsedJsonData = JSON.parse(jsonData); extractedData = parsedJsonData[0]; + + ////**Trying to fix + console.log("Extracted data: ", extractedData); } catch (e) { throw { message: "Can't find ad data JSON" }; } @@ -312,7 +315,7 @@ class RentalCrawler { let numberOfRooms = parseInt(extractedData["re_realEstates_roomsNO"]) + - parseInt(extractedData["re_realEstates_bedroomNO"]) || null, + parseInt(extractedData["re_realEstates_bedNO"]) || null, numberOfFloors = parseInt(extractedData["re_realEstates_floorsNO"]) || this.getNumberOfFloorsFromFloorId(extractedData["re_floorNO_id"]), @@ -397,9 +400,7 @@ class RentalCrawler { ); if (!publishedDateMoment.isValid()) { throw { - message: `Invalid published date : ${ - extractedData["re_realEstates_inserted"] - }` + message: `Invalid published date : ${extractedData["re_realEstates_inserted"]}` }; } @@ -410,9 +411,7 @@ class RentalCrawler { ); if (!renewedDateMoment.isValid()) { throw { - message: `Invalid renewed date : ${ - extractedData["re_realEstates_edited"] - }` + message: `Invalid renewed date : ${extractedData["re_realEstates_edited"]}` }; } diff --git a/test/rentalScrapeTest.js b/test/rentalScrapeTest.js index 9828f2d..4032c01 100644 --- a/test/rentalScrapeTest.js +++ b/test/rentalScrapeTest.js @@ -13,5 +13,5 @@ if (urlToScrape) { })(); } else { console.log("No URL to scrape. Use like this : "); - console.log("npm run test-olx-scraper -- URL_TO_SCRAPE"); + console.log("npm run test-rental-scraper -- URL_TO_SCRAPE"); } From 0c2d218d29f31c46bf10d06d8caecce83eacb18a Mon Sep 17 00:00:00 2001 From: Naida Vatric Date: Thu, 2 Jan 2020 00:10:31 +0100 Subject: [PATCH 2/2] Changed floor numbers and basement-attic tag. --- app/crawler/specificCrawlers/rental.js | 43 +++++++++++++++++++++++--- 1 file changed, 38 insertions(+), 5 deletions(-) diff --git a/app/crawler/specificCrawlers/rental.js b/app/crawler/specificCrawlers/rental.js index f646b92..39eb1c5 100644 --- a/app/crawler/specificCrawlers/rental.js +++ b/app/crawler/specificCrawlers/rental.js @@ -218,9 +218,6 @@ class RentalCrawler { const jsonData = scriptElement[0].children[0].data.substring(20); const parsedJsonData = JSON.parse(jsonData); extractedData = parsedJsonData[0]; - - ////**Trying to fix - console.log("Extracted data: ", extractedData); } catch (e) { throw { message: "Can't find ad data JSON" }; } @@ -355,7 +352,9 @@ class RentalCrawler { realEstatePropertiesFromInfrastructure.phoneConnection, cableTV = realEstatePropertiesFromInfrastructure.cableTV, internet = realEstatePropertiesFromInfrastructure.internet, - basementAttic = realEstatePropertiesFromSpaces.basementAttic, + basementAttic = + realEstatePropertiesFromSpaces.basementAttic || + this.checkBasemAtticFromFloors(extractedData["re_floorNO_id"]), storeRoom = realEstatePropertiesFromSpaces.storeRoom, videoSurveillance = realEstatePropertiesFromDescriptions.videoSurveillance || @@ -781,8 +780,42 @@ class RentalCrawler { if (floorIds.length === 0) { return null; } + let noOfFloors = floorIds.length; + // Floors of 'suteren', 'podrum', 'tavan' and 'potkrovlje' are not counted + floorIds.forEach(id => { + if ( + parseInt(id) === 1 || + parseInt(id) === 2 || + parseInt(id) === 12 || + parseInt(id) === 14 + ) { + noOfFloors--; + } + }); + return noOfFloors; + } - return floorIds.length; + checkBasemAtticFromFloors(floorsIdText) { + // floorIdText can be array of numbers, separated by comma or number + const floorIds = floorsIdText.split(","); + + let check = false; + + if (floorIds.length === 0) { + check = false; + } + //If floors 'suteren', 'podrum', 'tavan' and 'potkrovlje' exists then tag for basement-attic is true + floorIds.forEach(id => { + if ( + parseInt(id) === 1 || + parseInt(id) === 2 || + parseInt(id) === 12 || + parseInt(id) === 14 + ) { + check = true; + } + }); + return check; } async sleep(ms) {