'use strict' let fetch = require('node-fetch'); let cheerio = require('cheerio'); let fs = require('fs'); let cloudinary = require('cloudinary'); let FormData = require('form-data'); import { AD_TYPE_SALE, IGNORED_USERNAMES, CATEGORY_FLAT, CATEGORY_HOUSE, CATEGORY_OFFICE, CATEGORY_LAND, CATEGORY_APARTMENT, CATEGORY_GARAGE } from '../enums'; export default class RentalCrawler { constructor(fromPage = 0, toPage = 10, maxResults = 1000) { console.log("Rental Crawler"); this.fromPage = fromPage; this.toPage = toPage; this.maxResults = maxResults; } async indexSingle(url) { try { const res = await fetch(url); const body = await res.text(); const $ = cheerio.load(body); var title; var category; var price; var size; var rooms; var address; var descriptions; var floor; var floor; var time; var lat; var lng; //Oglas koji nema JSON objekat, nema ni mape try{ const complete_data = $('body > div.container-fluid > div.container > div:nth-child(2) > div.col-xs-12.col-sm-12.col-md-12.col-lg-9.content-main > div:nth-child(7) > script').text(); const data_json_string = complete_data.slice(21,-1); const data_json = JSON.parse(data_json_string); title = data_json["re_realEstates_portalName"]; category = this.getCategoryIdfromNumber(data_json["re_types_id"]); //kategorije ne odgovaraju brojevima u Enums !!! price = data_json["re_realEstates_price"]; size = data_json["re_realEstates_area"]; rooms = data_json["re_realEstates_roomsNO"]; address = data_json["re_realEstates_address"]; descriptions = data_json["re_realEstates_description"]; floor = data_json["re_realEstates_floorNO"]; time = data_json["re_realEstates_inserted"]; lat = data_json["re_realEstates_latitude"]; lng = data_json["re_realEstates_longitude"]; console.log("3"); time = data_json["re_realEstates_inserted"]; lat = data_json["re_realEstates_latitude"]; lng = data_json["re_realEstates_longitude"]; }catch(e){ //oglas nema JSON objekat, informacije izvući preko selektora time=undefined; lat=undefined; lng=undefined; price = (parseFloat($('body > div.container-fluid > div.container > div:nth-child(2) > div.col-xs-12.col-sm-12.col-md-12.col-lg-9.content-main > div:nth-child(1) > div > div > div.col-xs-12.col-sm-4.box-details > div.prices > span.pull-left').text().replace(',','').replace('.','')))/100; const props_list = {}; $('body > div.container-fluid > div.container > div:nth-child(2) > div.col-xs-12.col-sm-12.col-md-12.col-lg-9.content-main > div:nth-child(1) > div > div > div.col-xs-12.col-sm-4.box-details > div.body').contents().map((i,elem)=>{ const entry = $(elem).text().trim().split(':'); if (entry[0]) props_list[entry[0]]=entry[1]; }); address = props_list['Ulica']; size = parseFloat((props_list['Površina']).replace(',','').replace('.',''))/100; rooms = props_list['Broj soba']; floor = parseInt(props_list['Spratnost']); title = $('div.container-fluid > div.container > div.row.content-top > div.col-xs-12.col-sm-6.col-md-9 > div.description.pull-left > h1').text(); descriptions = $('#b1 > div > div > div').text(); const full_category = $('body > div.container-fluid > div.container > div:nth-child(2) > div.col-xs-12.col-sm-12.col-md-12.col-lg-9.content-main > div:nth-child(1) > div > div > div.col-xs-12.col-sm-4.box-details > div.title > p').text().split(',',3); category = (full_category.size > 2) ? this.getCategoryIdfromText(full_category[0]+full_category[1]) : this.getCategoryIdfromText(full_category[0]); } const images = []; $(".img-gallery").contents().map((i,elem)=>{ //u linku sadržanom u tmp stoje i parametri za max visinu i širinu const tmp =$(elem).attr('data-preview'); if(tmp) images.push(tmp); }); const data = { category, url, title, price, size, rooms, floor, address, adType: AD_TYPE_SALE, time, shortDescription: title, longDescription: descriptions, lat, lng, loc: [parseFloat(lat), parseFloat(lng)], //images: cloudinaryImages images }; //console.log(data); return data; } catch (e) { console.error('Exception caught: ' + e.message); } return null; } async indexPage(pageNr, maxResults = 1000) { try { console.log('Starting to index page: ' + pageNr); const url = "http://www.rental.ba/pretraga/prodaja-1/stranica-" + pageNr; /* const data = new FormData(); data.append('sales', 1); // Mislim da ovo definiše oglase tipa prodaje data.append('re_types_id', ''); //odnosi se na tip nekretnine (kuća, stan, apartman,...) data.append('full_text', ''); data.append('re_realEstates_code', ''); data.append('re_realEstates_price_max', ''); data.append('re_realEstates_price_min', ''); data.append('re_realEstates_area_min', ''); data.append('re_realEstates_area_max', ''); data.append('re_realEstates_roomsNO_min', ''); data.append('re_realEstates_roomsNO_max', ''); data.append('re_realEstates_floorNO_min', ''); data.append('re_realEstates_floorNO_max', ''); data.append('re_subTypes_id', 1); */ const res = await fetch(url, { method: 'POST' //body: data }); const body = await res.text(); const $ = cheerio.load(body); const hrefs = []; $('.middle').each((i, elem) => { const href = $(elem).find("a").first().attr('href'); hrefs.push(href); }); const results = {}; for (const href of hrefs) { console.log(`indexing: ${href}`); const singleData = await this.indexSingle(href); if (singleData) { results[href] = singleData; } await this.sleep(500); } return results; } catch (e) { console.error('Exception caught:' + e); } } getCategoryIdfromNumber(category){ switch(category){ case (1): return CATEGORY_HOUSE; case (2): return CATEGORY_FLAT; case (3): return CATEGORY_APARTMENT; case (4): return CATEGORY_OFFICE; case (5): return CATEGORY_LAND; case (6): return CATEGORY_GARAGE; } } getCategoryIdfromText (category) { switch(category){ case ('samostojeća'): return CATEGORY_HOUSE case ('dvojna'): return CATEGORY_HOUSE case ('kuća u nizu'): return CATEGORY_HOUSE case ('stambeno-poslovni objekt'): return CATEGORY_HOUSE case ('prizemnica'): return CATEGORY_HOUSE case ('kuća na moru'): return CATEGORY_HOUSE case ('kuća u izgradnji'): return CATEGORY_HOUSE case ('dvorac'): return CATEGORY_HOUSE case ('apartmanska kuća'): return CATEGORY_HOUSE case ('porodična kuća'): return CATEGORY_HOUSE case ('vikend kuća'): return CATEGORY_HOUSE case ('luksuzna kuća'): return CATEGORY_HOUSE case ('kamena'): return CATEGORY_HOUSE case ('vila'): return CATEGORY_HOUSE case ('splav'): return CATEGORY_HOUSE case ('stan u zgradi'): return CATEGORY_FLAT case ('stan u kući'): return CATEGORY_FLAT case ('stan višeetažni'): return CATEGORY_FLAT case ('stan višeetažni u kući'): return CATEGORY_FLAT case ('stan u starijoj zgradi'): return CATEGORY_FLAT case ('stan u novogradnji'): return CATEGORY_FLAT case ('stan u neboderu'): return CATEGORY_FLAT case ('Korišten stan u novogradnji'): return CATEGORY_FLAT case ('apartman na moru'): return CATEGORY_APARTMENT case ('apartman u planini'): return CATEGORY_APARTMENT case ('unutrašnje garažno mjesto'): return CATEGORY_GARAGE case ('unutrašnje parkirno mjesto'): return CATEGORY_GARAGE case ('građevinsko'): return CATEGORY_LAND case ('građevinsko stambeno'): return CATEGORY_LAND case ('zemljište, ostalo'): return CATEGORY_LAND case ('odmaralište'): return CATEGORY_LAND case ('oranica'): return CATEGORY_LAND case ('šuma'): return CATEGORY_LAND case ('livada'): return CATEGORY_LAND case ('građevinsko M2'): return CATEGORY_LAND case ('građevinsko M1'): return CATEGORY_LAND case ('građevinsko - turističko'): return CATEGORY_LAND case ('građevinsko - poslovno'): return CATEGORY_LAND case ('otok'): return CATEGORY_LAND case ('poljoprivredno'): return CATEGORY_LAND case ('lokal'): return CATEGORY_OFFICE case ('ured'): return CATEGORY_OFFICE case ('skladište ili garaža'): return CATEGORY_OFFICE case ('radionica'): return CATEGORY_OFFICE case ('tvornica'): return CATEGORY_OFFICE case ('restoran'): return CATEGORY_OFFICE case ('sportski centar'): return CATEGORY_OFFICE case ('ordinacija'): return CATEGORY_OFFICE case ('kiosk'): return CATEGORY_OFFICE case ('auto-praonica'): return CATEGORY_OFFICE case ('poslovna zgrada'): return CATEGORY_OFFICE case ('skladište'): return CATEGORY_OFFICE case ('garaža'): return CATEGORY_OFFICE case ('hotel'): return CATEGORY_OFFICE case ('pansion'): return CATEGORY_OFFICE case ('apartmanska zgrada'): return CATEGORY_OFFICE case ('trgovina'): return CATEGORY_OFFICE case ('prodajno skladišni'): return CATEGORY_OFFICE case ('proizvodno skladišni'): return CATEGORY_OFFICE case ('Kancelarije'): return CATEGORY_OFFICE case ('Poslovni prostor'): return CATEGORY_OFFICE } } async sleep(ms) { return new Promise(resolve => setTimeout(resolve, ms)); } async indexPages(start, end, maxResults = 1000) { let results = {}; for (let i = start; i <= end; i++) { let result = await this.indexPage(i, maxResults); Object.assign(results, result) await this.sleep(5000); } return results; } async crawl() { let results = await this.indexPages(this.fromPage, this.toPage, this.maxResults); return results; } }