diff --git a/.gitignore b/.gitignore index d0441a1..02b0461 100644 --- a/.gitignore +++ b/.gitignore @@ -2,4 +2,4 @@ node_modules/ .env .idea/ .eslintrc -.vscode/ \ No newline at end of file +.vscode/ diff --git a/app/crawler/specificCrawlers/rental.js b/app/crawler/specificCrawlers/rental.js index 8f38dc8..39eb1c5 100644 --- a/app/crawler/specificCrawlers/rental.js +++ b/app/crawler/specificCrawlers/rental.js @@ -312,7 +312,7 @@ class RentalCrawler { let numberOfRooms = parseInt(extractedData["re_realEstates_roomsNO"]) + - parseInt(extractedData["re_realEstates_bedroomNO"]) || null, + parseInt(extractedData["re_realEstates_bedNO"]) || null, numberOfFloors = parseInt(extractedData["re_realEstates_floorsNO"]) || this.getNumberOfFloorsFromFloorId(extractedData["re_floorNO_id"]), @@ -352,7 +352,9 @@ class RentalCrawler { realEstatePropertiesFromInfrastructure.phoneConnection, cableTV = realEstatePropertiesFromInfrastructure.cableTV, internet = realEstatePropertiesFromInfrastructure.internet, - basementAttic = realEstatePropertiesFromSpaces.basementAttic, + basementAttic = + realEstatePropertiesFromSpaces.basementAttic || + this.checkBasemAtticFromFloors(extractedData["re_floorNO_id"]), storeRoom = realEstatePropertiesFromSpaces.storeRoom, videoSurveillance = realEstatePropertiesFromDescriptions.videoSurveillance || @@ -397,9 +399,7 @@ class RentalCrawler { ); if (!publishedDateMoment.isValid()) { throw { - message: `Invalid published date : ${ - extractedData["re_realEstates_inserted"] - }` + message: `Invalid published date : ${extractedData["re_realEstates_inserted"]}` }; } @@ -410,9 +410,7 @@ class RentalCrawler { ); if (!renewedDateMoment.isValid()) { throw { - message: `Invalid renewed date : ${ - extractedData["re_realEstates_edited"] - }` + message: `Invalid renewed date : ${extractedData["re_realEstates_edited"]}` }; } @@ -782,8 +780,42 @@ class RentalCrawler { if (floorIds.length === 0) { return null; } + let noOfFloors = floorIds.length; + // Floors of 'suteren', 'podrum', 'tavan' and 'potkrovlje' are not counted + floorIds.forEach(id => { + if ( + parseInt(id) === 1 || + parseInt(id) === 2 || + parseInt(id) === 12 || + parseInt(id) === 14 + ) { + noOfFloors--; + } + }); + return noOfFloors; + } - return floorIds.length; + checkBasemAtticFromFloors(floorsIdText) { + // floorIdText can be array of numbers, separated by comma or number + const floorIds = floorsIdText.split(","); + + let check = false; + + if (floorIds.length === 0) { + check = false; + } + //If floors 'suteren', 'podrum', 'tavan' and 'potkrovlje' exists then tag for basement-attic is true + floorIds.forEach(id => { + if ( + parseInt(id) === 1 || + parseInt(id) === 2 || + parseInt(id) === 12 || + parseInt(id) === 14 + ) { + check = true; + } + }); + return check; } async sleep(ms) { diff --git a/test/rentalScrapeTest.js b/test/rentalScrapeTest.js index 9828f2d..4032c01 100644 --- a/test/rentalScrapeTest.js +++ b/test/rentalScrapeTest.js @@ -13,5 +13,5 @@ if (urlToScrape) { })(); } else { console.log("No URL to scrape. Use like this : "); - console.log("npm run test-olx-scraper -- URL_TO_SCRAPE"); + console.log("npm run test-rental-scraper -- URL_TO_SCRAPE"); }