new structure; code polish
This commit is contained in:
@@ -1,5 +1,3 @@
|
||||
Početna stranica : http://www.rental.ba/pretraga/prodaja-1/stranica-1
|
||||
|
||||
kategorije :
|
||||
|
||||
kuća = 1
|
||||
|
||||
@@ -1,16 +0,0 @@
|
||||
export const AD_TYPE_SALE = 1;
|
||||
export const AD_TYPE_RENT = 2;
|
||||
|
||||
export const IGNORED_USERNAMES = ['rental']
|
||||
|
||||
export const CATEGORY_FLAT = 0;
|
||||
export const CATEGORY_HOUSE = 1;
|
||||
export const CATEGORY_OFFICE = 2;
|
||||
export const CATEGORY_LAND = 3;
|
||||
export const CATEGORY_APARTMENT = 4;
|
||||
export const CATEGORY_GARAGE = 5;
|
||||
|
||||
export const STATUS_NORMAL = 0;
|
||||
export const STATUS_RESERVED = 1;
|
||||
export const STATUS_SOLD = 2;
|
||||
|
||||
@@ -1,35 +0,0 @@
|
||||
{
|
||||
"name": "stan",
|
||||
"version": "1.0.0",
|
||||
"description": "",
|
||||
"main": "index.js",
|
||||
"dependencies": {
|
||||
"babel": "^6.5.2",
|
||||
"babel-core": "^6.18.2",
|
||||
"babel-loader": "^6.2.7",
|
||||
"babel-plugin-transform-async-to-generator": "^6.16.0",
|
||||
"babel-polyfill": "^6.16.0",
|
||||
"babel-preset-es2015": "^6.18.0",
|
||||
"cheerio": "^0.22.0",
|
||||
"cloudinary": "^1.8.0",
|
||||
"dotenv": "^2.0.0",
|
||||
"fetch": "^1.1.0",
|
||||
"form-data": "^2.1.4",
|
||||
"json-loader": "^0.5.4",
|
||||
"mongodb": "^2.2.11",
|
||||
"node-fetch": "^1.6.3",
|
||||
"source-map-support": "^0.4.6",
|
||||
"twilio": "^2.11.0"
|
||||
},
|
||||
"devDependencies": {
|
||||
"babel-preset-es2015": "^6.24.1",
|
||||
"webpack": "^1.13.3"
|
||||
},
|
||||
"scripts": {
|
||||
"dev": "webpack",
|
||||
"prod": "webpack -p",
|
||||
"test": "echo \"Error: no test specified\" && exit 1"
|
||||
},
|
||||
"author": "",
|
||||
"license": "ISC"
|
||||
}
|
||||
@@ -1,176 +0,0 @@
|
||||
http://www.rental.ba/pretraga
|
||||
|
||||
sales=1
|
||||
re_types_id=
|
||||
full_text=
|
||||
re_realEstates_code=
|
||||
re_realEstates_price_max=
|
||||
re_realEstates_price_min=
|
||||
re_realEstates_area_max=
|
||||
re_realEstates_area_min=
|
||||
re_realEstates_roomsNO_min=
|
||||
re_realEstates_roomsNO_max=
|
||||
re_realEstate_floorNO_min=
|
||||
re_realEstate_floorNO_max=
|
||||
re_subTypes_id=1
|
||||
search_count=1
|
||||
|
||||
<script>var json_map_data = [{
|
||||
|
||||
"re_realEstates_id":"1084",
|
||||
"re_realEstates_code":"1084-1",
|
||||
"re_realEstates_parent":"0",
|
||||
"ag_agents_id":"1",
|
||||
"re_types_id":"2",
|
||||
"re_subTypes_id":"1",
|
||||
"re_action_id":"1",
|
||||
"re_status_id":"1",
|
||||
"loc_countries_id":"4",
|
||||
"loc_counties_id":"400008",
|
||||
"loc_islands_id":"0",
|
||||
"loc_cities_id":"400076",
|
||||
"loc_cityAreas_id":"400071",
|
||||
"loc_quarters_id":"400128",
|
||||
"re_realEstates_address":"Adema Bu\u0107e ",
|
||||
"re_realEstates_addressNO":null,
|
||||
"op_realEstates_addressSync":"1",
|
||||
"pr_projects_id":"0",
|
||||
"pr_buildings_id":"0",
|
||||
"re_realEstates_name":"",
|
||||
"re_realEstates_longitude":"18.364784424420122",
|
||||
"re_realEstates_latitude":"43.85508550138114",
|
||||
"op_realEstates_gmapSync":"1",
|
||||
"re_realEstates_price":"127000",
|
||||
"re_realEstates_priceM2":"2116.67",
|
||||
"re_realEstates_priceOld":"",
|
||||
"re_realEstates_priceCustomer":null,
|
||||
"op_realEstates_priceOnRequest":"0",
|
||||
"op_realEstates_priceWithoutTaxes":"0",
|
||||
"op_realEstates_utilitiesIncluded":"0",
|
||||
"re_realEstates_reservePrice":"0",
|
||||
"re_realEstates_repurchase":null,
|
||||
"re_realEstates_area":"60",
|
||||
"re_realEstates_infield":"0",
|
||||
"re_realEstates_warehouseArea":"0",
|
||||
"re_realEstates_constructionQuotient":"0",
|
||||
"re_realEstates_constructionFloors":"0",
|
||||
"re_realEstates_grossDevelopedArea":"0",
|
||||
"re_realEstates_parkingNO":"0",
|
||||
"re_realEstates_bedNO":"0",
|
||||
"re_realEstates_bathroomNO":"1",
|
||||
"re_realEstates_roomsNO":"2",
|
||||
"op_realEstates_openSpace":"0",
|
||||
"op_realEstates_roomApartment":"0",
|
||||
"re_realEstates_flatsNO":"0",
|
||||
"re_realEstates_floorNO":"3",
|
||||
"re_realEstates_floorsNO":"8",
|
||||
"re_realEstates_ceilingHeight":"",
|
||||
"re_realEstates_actualAge":"2010",
|
||||
"re_realEstates_movingIn":"",
|
||||
"re_realEstates_LRcartridge":null,
|
||||
"re_realEstates_LRsubCartridge":null,
|
||||
"re_realEstates_LRlotNo":null,
|
||||
"re_realEstates_LRcounties":null,
|
||||
"op_realEstates_newBuilding":"0",
|
||||
"op_realEstates_buildingPermit":"0",
|
||||
"op_realEstates_locationPermit":"0",
|
||||
"op_realEstates_inspectionCertificat":"0",
|
||||
"re_realEstates_landRegisterNotification":null,
|
||||
"op_realEstates_seafront":"0",
|
||||
"di_realEstates_transportation":"5",
|
||||
"di_realEstates_seaDistance":"",
|
||||
"di_realEstates_center":"",
|
||||
"i18n_id":"57",
|
||||
"re_access_id":"9",
|
||||
"re_agencyCommission_id":"0",
|
||||
"re_categories_id":"0",
|
||||
"re_descriptions_id":"2,14,1,28,26,3,33,19",
|
||||
"re_descriptions_area":null,
|
||||
"re_energyEfficiency_id":"0",
|
||||
"re_floorNO_id":"",
|
||||
"re_heating_id":"4",
|
||||
"re_infrastructure_id":"9,12,16,11,5,4,1,8,2",
|
||||
"re_joinery_id":"6",
|
||||
"re_orientation_id":"1",
|
||||
"re_ownerships_id":"0",
|
||||
"re_proofs_id":"0",
|
||||
"re_propertyCondition_id":"8",
|
||||
"re_registryStatus_id":"0",
|
||||
"re_spaces_id":"1,4,2",
|
||||
"re_spaces_values":"a:3:{i:1;s:0:\"\";i:4;s:2:\"22\";i:2;s:0:\"\";}",
|
||||
"re_transportation_id":"",
|
||||
"re_realEstates_priority":"0",
|
||||
"cp_realEstates_name":null,
|
||||
"cp_realEstates_sex":null,
|
||||
"cp_realEstates_phone":null,
|
||||
"cp_realEstates_phone2":null,
|
||||
"cp_realEstates_email":null,
|
||||
"cl_clients_buyer_id":null,
|
||||
"cl_transactionRiskLevel_id":null,
|
||||
"re_realEstates_contractualPrice":null,
|
||||
"re_realEstates_commission":null,
|
||||
"re_realEstates_conclusionPlace":null,
|
||||
"re_realEstates_conclusionDate":null,
|
||||
"re_realEstates_downPayment":null,
|
||||
"re_realEstates_downPaymentDate":null,
|
||||
"re_realEstates_payoffDate":null,
|
||||
"adm_realEstates_discount":"0",
|
||||
"adm_realEstates_discountDate":"0000-00-00",
|
||||
"op_realEstates_web":"0",
|
||||
"op_realEstates_specialOffer":"1",
|
||||
"op_realEstates_bestBuy":"1",
|
||||
"re_portals_id":null,
|
||||
"re_realEstates_rentedTo":null,
|
||||
"re_realEstates_recived":null,
|
||||
"re_realEstates_inserted":"2017-10-11 14:13:41",
|
||||
"re_realEstates_edited":"2017-10-11 14:26:34",
|
||||
"re_realEstates_reservedDate":null,
|
||||
"re_realEstates_reservedDeadline":null,
|
||||
"re_realEstates_resource":null,
|
||||
"re_realEstates_contractNO":null,
|
||||
"re_position_id":"0",
|
||||
"re_realEstates_old_id":null,
|
||||
"re_realEstates_yearAdapted":null,
|
||||
"op_realEstates_shortLease":"0",
|
||||
"re_advertisement_text":null,
|
||||
"re_advertisement_published_date":null,
|
||||
"re_advertisement_status_id":null,
|
||||
"re_advertisement_spotted_date":null,
|
||||
"re_realEstates_commission2":null,
|
||||
"re_realEstates_commission_suffix_id":null,
|
||||
"re_realEstates_commission2_suffix_id":null,
|
||||
"re_realEstates_fieldArea":"0",
|
||||
"re_roomType_id":"0",
|
||||
"op_realEstates_ownerPermit":"0",
|
||||
"re_realEstates_eop_note":null,
|
||||
"re_realEstates_print_ads_description":null,
|
||||
"cl_profile_id":null,
|
||||
"cl_clients_id":null,
|
||||
"re_photos_name":"1084\/1084_1_1507724744.jpg",
|
||||
"op_realEstates_lux":"0",
|
||||
"loc_cities_name":"Sarajevo",
|
||||
"loc_countries_name":"Bosna i Hercegovina",
|
||||
"loc_counties_name":"Sarajevo",
|
||||
"loc_cityAreas_name":"Sarajevo \u2013 Novo Sarajevo",
|
||||
"loc_quarters_name":"Bu\u0107a Potok",
|
||||
"loc_islands_name":null,
|
||||
"re_action_priority":"1",
|
||||
"i18n_id1":"22133",
|
||||
"re_types_priority":"1",
|
||||
"i18n_id2":"2",
|
||||
"i18n_id3":"8",
|
||||
"re_subTypes_oldID":"101",
|
||||
"re_subTypes_priority":"4",
|
||||
"i18n_id4":"57",
|
||||
"i18n_id5":"125",
|
||||
"re_subTypes_name":"stan u zgradi",
|
||||
"re_subTypes_shortName":"s. u zgradi",
|
||||
"re_realEstates_description":"<p style="text-align: justify;">Vrlo, vrlo dobar stan! U Tibri u Bu\u0107a Potoku, pored Binga, s pogledom prema dje\u010dijem igrali&scaron;tu, ovaj fantasti\u010dni dovosban stan ispuni\u0107e sva o\u010dekivnaja manje porodice ili para koji \u017eeli miran \u017eivot u kvalitetnoj novogradnji u porodi\u010dnom naselju.<\/p>\r\n<p style="text-align: justify;">Knji\u017ene povr&scaron;ine 59,42 m&sup2;, stan se sastoji od dnevne sobe s trpezarijom, kuhinje, spava\u0107e sobe, kupatila, dva balkona, hodnika, te ostave. Sve je vrlo kvalitetno ura\u0111eno i odr\u017eavano pa\u017enjom dobrog doma\u0107ina. Stan je opremljen vrhunskom kuhinjom s aparatima, a prodajemo ga sa svim stvarima koje vidite na slikama, izuzev TV aparata.&nbsp;<\/p>\r\n<p style="text-align: justify;">Dva balkona daju poseban &scaron;arm, a optimalna spratnost omogu\u0107it \u0107e vam u\u017eivanje u punoj intimi. Zgrada je opremljena liftom.<\/p>\r\n<p style="text-align: justify;">Prostrana gara\u017ea povr&scaron;ine 22,36 m&sup2; je povezana liftom sa stanom. Mogu\u0107e je kupiti i stan bez gara\u017ee.<\/p>\r\n<p style="text-align: justify;">Za pogledati!<\/p>\r\n<p style="text-align: justify;">Mirza - 061 188 252&nbsp;<\/p>",
|
||||
"re_realEstates_portalName":"East 17",
|
||||
"re_types_name":"stan",
|
||||
"re_types_plural":"stanovi",
|
||||
"re_types_shortName":"S",
|
||||
"re_action_name":"prodaja",
|
||||
"re_action_shortName":"P",
|
||||
"re_propertyCondition_name":"Ure\u0111eno i odr\u017eavano"
|
||||
}]
|
||||
@@ -12,7 +12,7 @@ import {
|
||||
CATEGORY_HOUSE,
|
||||
CATEGORY_OFFICE,
|
||||
CATEGORY_LAND
|
||||
} from '../enums';
|
||||
} from '../../common/enums';
|
||||
|
||||
export default class OlxCrawler {
|
||||
|
||||
|
||||
@@ -1,150 +1,158 @@
|
||||
'use strict'
|
||||
'use strict';
|
||||
|
||||
let fetch = require('node-fetch');
|
||||
let cheerio = require('cheerio');
|
||||
let fs = require('fs');
|
||||
let cloudinary = require('cloudinary');
|
||||
let FormData = require('form-data');
|
||||
let fetch = require ('node-fetch');
|
||||
let cheerio = require ('cheerio');
|
||||
let fs = require ('fs');
|
||||
let cloudinary = require ('cloudinary');
|
||||
let FormData = require ('form-data');
|
||||
|
||||
import {
|
||||
AD_TYPE_SALE,
|
||||
|
||||
IGNORED_USERNAMES,
|
||||
|
||||
CATEGORY_FLAT,
|
||||
CATEGORY_HOUSE,
|
||||
CATEGORY_OFFICE,
|
||||
CATEGORY_LAND,
|
||||
|
||||
STATUS_NORMAL,
|
||||
STATUS_RESERVED,
|
||||
STATUS_SOLD
|
||||
} from '../enums';
|
||||
STATUS_SOLD,
|
||||
} from '../../common/enums';
|
||||
|
||||
export default class ProstorCrawler {
|
||||
|
||||
constructor(fromPage = 0, toPage = 10, maxResults = 1000) {
|
||||
constructor (fromPage = 0, toPage = 10, maxResults = 1000) {
|
||||
this.fromPage = fromPage;
|
||||
this.toPage = toPage;
|
||||
this.maxResults = maxResults;
|
||||
}
|
||||
|
||||
async indexSingle(url) {
|
||||
async indexSingle (url) {
|
||||
try {
|
||||
const res = await fetch (url);
|
||||
const body = await res.text ();
|
||||
const $ = cheerio.load (body);
|
||||
|
||||
const res = await fetch(url);
|
||||
const body = await res.text();
|
||||
const $ = cheerio.load(body);
|
||||
const title = $ (
|
||||
'#nav_center_sub > div.content_area_1_left > div:nth-child(1) > h1'
|
||||
).text ();
|
||||
|
||||
const title = $('#nav_center_sub > div.content_area_1_left > div:nth-child(1) > h1').text();
|
||||
const category = $ (
|
||||
'#nav_center_sub > div.content_area_1_left > div.bottom10 > div.content_lr_in_show > div:nth-child(3) > div:nth-child(4) > div.size_rs > span'
|
||||
).text ();
|
||||
|
||||
const category = $('#nav_center_sub > div.content_area_1_left > div.bottom10 > div.content_lr_in_show > div:nth-child(3) > div:nth-child(4) > div.size_rs > span').text();
|
||||
const price = $ (
|
||||
'#nav_center_sub > div.content_area_1_left > div.bottom10 > div.content_lr_in_show > div:nth-child(1) > div.size_rs > strong'
|
||||
).text ();
|
||||
|
||||
const size = $ (
|
||||
'#nav_center_sub > div.content_area_1_left > div.bottom10 > div.content_lr_in_show > div:nth-child(4) > div:nth-child(7) > div.size_rs > span'
|
||||
).text ();
|
||||
const rooms = $ (
|
||||
'#nav_center_sub > div.content_area_1_left > div.bottom10 > div.content_lr_in_show > div:nth-child(4) > div:nth-child(2) > div.size_rs > span'
|
||||
).text ();
|
||||
|
||||
const price = $('#nav_center_sub > div.content_area_1_left > div.bottom10 > div.content_lr_in_show > div:nth-child(1) > div.size_rs > strong').text();
|
||||
|
||||
const size = $('#nav_center_sub > div.content_area_1_left > div.bottom10 > div.content_lr_in_show > div:nth-child(4) > div:nth-child(7) > div.size_rs > span').text();
|
||||
const rooms = $('#nav_center_sub > div.content_area_1_left > div.bottom10 > div.content_lr_in_show > div:nth-child(4) > div:nth-child(2) > div.size_rs > span').text();
|
||||
|
||||
const address = $('#nav_center_sub > div.content_area_1_left > div.bottom10 > div.content_lr_in_show > div:nth-child(3) > div:nth-child(3) > div.size_rs > span').text();
|
||||
const address = $ (
|
||||
'#nav_center_sub > div.content_area_1_left > div.bottom10 > div.content_lr_in_show > div:nth-child(3) > div:nth-child(3) > div.size_rs > span'
|
||||
).text ();
|
||||
|
||||
//const location = $('#artikal_glavni_div > div.artikal_lijevo > div.op.pop.mobile-lokacija').attr('data-content');
|
||||
|
||||
//const adType = $('#artikal_glavni_div > div.artikal_lijevo > div:nth-child(15) > div:nth-child(2) > div.df2').text();
|
||||
|
||||
const time = $('#nav_center_sub > div.content_area_1_right > div.bottom_d > div > strong:nth-child(1)').text();
|
||||
const time = $ (
|
||||
'#nav_center_sub > div.content_area_1_right > div.bottom_d > div > strong:nth-child(1)'
|
||||
).text ();
|
||||
|
||||
//const olxId = $('#artikal_glavni_div > div.artikal_lijevo > div:nth-child(15) > div:nth-child(4) > div.df2').text();
|
||||
|
||||
const descriptions = $('#nav_center_sub > div.content_area_1_left > div.bottom10 > div.content_ll_in_show > div:nth-child(1)').text();
|
||||
|
||||
|
||||
const floor = $('#nav_center_sub > div.content_area_1_left > div.bottom10 > div.content_lr_in_show > div:nth-child(4) > div:nth-child(6) > div.size_rs').text();
|
||||
const descriptions = $ (
|
||||
'#nav_center_sub > div.content_area_1_left > div.bottom10 > div.content_ll_in_show > div:nth-child(1)'
|
||||
).text ();
|
||||
|
||||
const floor = $ (
|
||||
'#nav_center_sub > div.content_area_1_left > div.bottom10 > div.content_lr_in_show > div:nth-child(4) > div:nth-child(6) > div.size_rs'
|
||||
).text ();
|
||||
|
||||
const latLngRe = /marker=([0-9]+\.[0-9]+)\,\s*([0-9]+\.[0-9]+)/g;
|
||||
|
||||
var has_map = false;
|
||||
|
||||
var tmpTitle = title.toUpperCase();
|
||||
var hasMap = false;
|
||||
|
||||
var tmpTitle = title.toUpperCase ();
|
||||
|
||||
var status = STATUS_NORMAL;
|
||||
if (tmpTitle.indexOf("PRODANO") !== -1) status = STATUS_SOLD;
|
||||
if (tmpTitle.indexOf("REZERVISANO") !== -1) status = STATUS_RESERVED;
|
||||
|
||||
if (tmpTitle.indexOf ('PRODANO') !== -1) status = STATUS_SOLD;
|
||||
if (tmpTitle.indexOf ('REZERVISANO') !== -1) status = STATUS_RESERVED;
|
||||
|
||||
//const latLngRe = /LatLng\(([0-9]+\.[0-9]+)\,\s+([0-9]+\.[0-9]+)\)/g;
|
||||
|
||||
const matches = latLngRe.exec(body);
|
||||
let lng = '',
|
||||
lat = '';
|
||||
has_map = false;
|
||||
const matches = latLngRe.exec (body);
|
||||
let lng = '', lat = '';
|
||||
hasMap = false;
|
||||
if (matches && matches.length >= 3) {
|
||||
lat = matches[1];
|
||||
lng = matches[2];
|
||||
has_map = true;
|
||||
hasMap = true;
|
||||
}
|
||||
|
||||
//console.log({
|
||||
//lat,
|
||||
//lng,
|
||||
//floor,
|
||||
//descriptions,
|
||||
//time,
|
||||
//price,
|
||||
//size,
|
||||
//category,
|
||||
//title
|
||||
//lat,
|
||||
//lng,
|
||||
//floor,
|
||||
//descriptions,
|
||||
//time,
|
||||
//price,
|
||||
//size,
|
||||
//category,
|
||||
//title
|
||||
//});
|
||||
|
||||
//const imgRe = /href":("[^"]*")/g;
|
||||
|
||||
const images = [];
|
||||
|
||||
|
||||
//const imgMatches = body.match(imgRe);
|
||||
|
||||
const parseRooms = (rooms) => parseInt([...rooms].filter(c => !isNaN(c)).filter(c => c.trim()).join())
|
||||
const parsePrice = (price) => parseFloat(price.replace(".", ""))
|
||||
const parseRooms = rooms =>
|
||||
parseInt (
|
||||
[...rooms].filter (c => !isNaN (c)).filter (c => c.trim ()).join ()
|
||||
);
|
||||
const parsePrice = price => parseFloat (price.replace ('.', ''));
|
||||
|
||||
|
||||
$('.fancybox').each((i, elem) => {
|
||||
const img = $(elem).attr('href');
|
||||
images.push(img);
|
||||
$ ('.fancybox').each ((i, elem) => {
|
||||
const img = $ (elem).attr ('href');
|
||||
images.push (img);
|
||||
});
|
||||
|
||||
//for (let i = 0; imgMatches && i < imgMatches.length; i++) {
|
||||
//let img = imgMatches[i].replace("href\":", "")
|
||||
//img = img.replace("\"", "");
|
||||
//img = img.replace("\"", "");
|
||||
//images.push(img);
|
||||
//let img = imgMatches[i].replace("href\":", "")
|
||||
//img = img.replace("\"", "");
|
||||
//img = img.replace("\"", "");
|
||||
//images.push(img);
|
||||
//}
|
||||
|
||||
//const uploadPromises = images.map(img => {
|
||||
//return cloudinary.uploader.upload(img);
|
||||
//return cloudinary.uploader.upload(img);
|
||||
//});
|
||||
|
||||
//const uploadResults = await Promise.all(uploadPromises);
|
||||
//const cloudinaryImages = uploadResults.map(ur => ur.url);
|
||||
|
||||
|
||||
const parsedPrice = parsePrice(price);
|
||||
const parsedPrice = parsePrice (price);
|
||||
let parsedRooms;
|
||||
|
||||
if (rooms === 'Garsonjera') {
|
||||
parsedRooms = 0;
|
||||
} else {
|
||||
parsedRooms = parseRooms(rooms);
|
||||
parsedRooms = parseRooms (rooms);
|
||||
}
|
||||
|
||||
const data = {
|
||||
category: this.getCategoryId(category),
|
||||
category: this.getCategoryId (category),
|
||||
url,
|
||||
title,
|
||||
price: isNaN(parsedPrice) ? price : parsedPrice,
|
||||
size: parseFloat(size),
|
||||
price: isNaN (parsedPrice) ? price : parsedPrice,
|
||||
size: parseFloat (size),
|
||||
rooms: parsedRooms,
|
||||
floor: parseInt(floor),
|
||||
floor: parseInt (floor),
|
||||
address,
|
||||
adType: AD_TYPE_SALE,
|
||||
time,
|
||||
@@ -152,69 +160,68 @@ export default class ProstorCrawler {
|
||||
longDescription: descriptions,
|
||||
lat,
|
||||
lng,
|
||||
loc: [parseFloat(lat), parseFloat(lng)],
|
||||
has_map,
|
||||
loc: [parseFloat (lat), parseFloat (lng)],
|
||||
hasMap,
|
||||
status,
|
||||
//images: cloudinaryImages
|
||||
images
|
||||
images,
|
||||
};
|
||||
console.log(data);
|
||||
console.log (data);
|
||||
|
||||
return data;
|
||||
} catch (e) {
|
||||
console.error('Exception caught: ' + e.message);
|
||||
console.error ('Exception caught: ' + e.message);
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
async indexPage(pageNr, maxResults = 1000) {
|
||||
async indexPage (pageNr, maxResults = 1000) {
|
||||
try {
|
||||
|
||||
console.log('Starting to index page: ' + pageNr);
|
||||
console.log ('Starting to index page: ' + pageNr);
|
||||
const url = `http://prostor.ba/index.php`;
|
||||
|
||||
const data = new FormData();
|
||||
data.append('sortCombo', 'e.date_create DESC');
|
||||
data.append('command', '');
|
||||
data.append('action', 'show');
|
||||
data.append('page', pageNr);
|
||||
data.append('param', 'ponuda.inc.php');
|
||||
data.append('checkNO', 0);
|
||||
data.append('order', 'e.date_create DESC');
|
||||
data.append('reset', 0);
|
||||
data.append('estate_action', 1);
|
||||
data.append('Itemid', 785);
|
||||
const data = new FormData ();
|
||||
data.append ('sortCombo', 'e.date_create DESC');
|
||||
data.append ('command', '');
|
||||
data.append ('action', 'show');
|
||||
data.append ('page', pageNr);
|
||||
data.append ('param', 'ponuda.inc.php');
|
||||
data.append ('checkNO', 0);
|
||||
data.append ('order', 'e.date_create DESC');
|
||||
data.append ('reset', 0);
|
||||
data.append ('estate_action', 1);
|
||||
data.append ('Itemid', 785);
|
||||
|
||||
const res = await fetch(url, {
|
||||
const res = await fetch (url, {
|
||||
method: 'POST',
|
||||
body: data
|
||||
body: data,
|
||||
});
|
||||
const body = await res.text();
|
||||
const $ = cheerio.load(body);
|
||||
const body = await res.text ();
|
||||
const $ = cheerio.load (body);
|
||||
|
||||
const hrefs = [];
|
||||
$('.nekret_box').each((i, elem) => {
|
||||
const href = $(elem).find("a").first().attr('href');
|
||||
hrefs.push(`http://prostor.ba/${href}`);
|
||||
$ ('.nekret_box').each ((i, elem) => {
|
||||
const href = $ (elem).find ('a').first ().attr ('href');
|
||||
hrefs.push (`http://prostor.ba/${href}`);
|
||||
});
|
||||
|
||||
const results = {};
|
||||
for (const href of hrefs) {
|
||||
console.log(`indexing: ${href}`);
|
||||
console.log (`indexing: ${href}`);
|
||||
|
||||
const singleData = await this.indexSingle(href);
|
||||
const singleData = await this.indexSingle (href);
|
||||
|
||||
if (singleData) {
|
||||
results[href] = singleData;
|
||||
}
|
||||
|
||||
await this.sleep(500);
|
||||
await this.sleep (500);
|
||||
}
|
||||
|
||||
return results;
|
||||
} catch (e) {
|
||||
console.error('Exception caught:' + e);
|
||||
console.error ('Exception caught:' + e);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -230,22 +237,26 @@ export default class ProstorCrawler {
|
||||
}
|
||||
}
|
||||
|
||||
async sleep(ms) {
|
||||
return new Promise(resolve => setTimeout(resolve, ms));
|
||||
async sleep (ms) {
|
||||
return new Promise (resolve => setTimeout (resolve, ms));
|
||||
}
|
||||
|
||||
async indexPages(start, end, maxResults = 1000) {
|
||||
async indexPages (start, end, maxResults = 1000) {
|
||||
let results = {};
|
||||
for (let i = start; i <= end; i++) {
|
||||
let result = await this.indexPage(i, maxResults);
|
||||
Object.assign(results, result)
|
||||
await this.sleep(5000);
|
||||
let result = await this.indexPage (i, maxResults);
|
||||
Object.assign (results, result);
|
||||
await this.sleep (5000);
|
||||
}
|
||||
return results;
|
||||
}
|
||||
|
||||
async crawl() {
|
||||
let results = await this.indexPages(this.fromPage, this.toPage, this.maxResults);
|
||||
async crawl () {
|
||||
let results = await this.indexPages (
|
||||
this.fromPage,
|
||||
this.toPage,
|
||||
this.maxResults
|
||||
);
|
||||
return results;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,45 +1,39 @@
|
||||
'use strict'
|
||||
'use strict';
|
||||
|
||||
let fetch = require('node-fetch');
|
||||
let cheerio = require('cheerio');
|
||||
let fs = require('fs');
|
||||
let cloudinary = require('cloudinary');
|
||||
let FormData = require('form-data');
|
||||
let fetch = require ('node-fetch');
|
||||
let cheerio = require ('cheerio');
|
||||
let fs = require ('fs');
|
||||
let cloudinary = require ('cloudinary');
|
||||
let FormData = require ('form-data');
|
||||
|
||||
import {
|
||||
AD_TYPE_SALE,
|
||||
|
||||
IGNORED_USERNAMES,
|
||||
|
||||
CATEGORY_FLAT,
|
||||
CATEGORY_HOUSE,
|
||||
CATEGORY_OFFICE,
|
||||
CATEGORY_LAND,
|
||||
CATEGORY_APARTMENT,
|
||||
CATEGORY_GARAGE,
|
||||
|
||||
STATUS_NORMAL,
|
||||
STATUS_RESERVED,
|
||||
STATUS_SOLD
|
||||
} from '../enums';
|
||||
STATUS_SOLD,
|
||||
} from '../../common/enums';
|
||||
|
||||
export default class RentalCrawler {
|
||||
|
||||
constructor(fromPage = 0, toPage = 10, maxResults = 1000) {
|
||||
|
||||
console.log("Rental Crawler");
|
||||
constructor (fromPage = 0, toPage = 10, maxResults = 1000) {
|
||||
console.log ('Rental Crawler');
|
||||
|
||||
this.fromPage = fromPage;
|
||||
this.toPage = toPage;
|
||||
this.maxResults = maxResults;
|
||||
}
|
||||
|
||||
async indexSingle(url) {
|
||||
async indexSingle (url) {
|
||||
try {
|
||||
|
||||
const res = await fetch(url);
|
||||
const body = await res.text();
|
||||
const $ = cheerio.load(body);
|
||||
const res = await fetch (url);
|
||||
const body = await res.text ();
|
||||
const $ = cheerio.load (body);
|
||||
|
||||
var title;
|
||||
var category;
|
||||
@@ -50,90 +44,119 @@ export default class RentalCrawler {
|
||||
var descriptions;
|
||||
var floor;
|
||||
var floor;
|
||||
var time;
|
||||
var time;
|
||||
var lat;
|
||||
var lng;
|
||||
var has_map;
|
||||
var hasMap;
|
||||
var status;
|
||||
|
||||
//No JSON string -> No map
|
||||
try{
|
||||
let complete_data;
|
||||
let data_json_string;
|
||||
let data_json;
|
||||
try {
|
||||
let completeData;
|
||||
let dataJsonString;
|
||||
let dataJson;
|
||||
|
||||
const start_n = 5;
|
||||
const last_n = 15;
|
||||
const startN = 5;
|
||||
const lastN = 15;
|
||||
|
||||
for (let i=start_n;i<=last_n;i++){
|
||||
try{
|
||||
complete_data = $('body > div.container-fluid > div.container > div:nth-child(2) > div.col-xs-12.col-sm-12.col-md-12.col-lg-9.content-main > div:nth-child('+i+') > script').text();
|
||||
data_json_string = complete_data.slice(21,-1);
|
||||
data_json = JSON.parse(data_json_string);
|
||||
for (let i = startN; i <= lastN; i++) {
|
||||
try {
|
||||
completeData = $ (
|
||||
'body > div.container-fluid > div.container > div:nth-child(2) > div.col-xs-12.col-sm-12.col-md-12.col-lg-9.content-main > div:nth-child(' +
|
||||
i +
|
||||
') > script'
|
||||
).text ();
|
||||
dataJsonString = completeData.slice (21, -1);
|
||||
dataJson = JSON.parse (dataJsonString);
|
||||
break;
|
||||
}catch(e){
|
||||
console.log("No JSON string");
|
||||
if (i===last_n) throw(e);
|
||||
} catch (e) {
|
||||
console.log ('No JSON string');
|
||||
if (i === lastN) throw e;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
title = data_json["re_realEstates_portalName"];
|
||||
category = this.getCategoryIdfromNumber(parseInt(data_json["re_types_id"])); //categories from JSON string doesn't match categories in ENUMS
|
||||
price = parseFloat(data_json["re_realEstates_price"]);
|
||||
size = parseFloat(data_json["re_realEstates_area"]);
|
||||
rooms = parseInt(data_json["re_realEstates_roomsNO"]);
|
||||
address = data_json["re_realEstates_address"];
|
||||
//descriptions = data_json["re_realEstates_description"];
|
||||
floor = parseInt(data_json["re_realEstates_floorNO"]);
|
||||
|
||||
let time_array = data_json["re_realEstates_inserted"].slice(0,data_json["re_realEstates_inserted"].indexOf(' ')).split('-');
|
||||
time = time_array[2]+'.'+time_array[1]+'.'+time_array[0];
|
||||
|
||||
lat = data_json["re_realEstates_latitude"];
|
||||
lng = data_json["re_realEstates_longitude"];
|
||||
has_map = true;
|
||||
}catch(e){
|
||||
console.log("error : " + e);
|
||||
title = dataJson['re_realEstates_portalName'];
|
||||
category = this.getCategoryIdfromNumber (
|
||||
parseInt (dataJson['re_types_id'])
|
||||
); //categories from JSON string doesn't match categories in ENUMS
|
||||
price = parseFloat (dataJson['re_realEstates_price']);
|
||||
size = parseFloat (dataJson['re_realEstates_area']);
|
||||
rooms = parseInt (dataJson['re_realEstates_roomsNO']);
|
||||
address = dataJson['re_realEstates_address'];
|
||||
//descriptions = dataJson["re_realEstates_description"];
|
||||
floor = parseInt (dataJson['re_realEstates_floorNO']);
|
||||
|
||||
let timeArray = dataJson['re_realEstates_inserted']
|
||||
.slice (0, dataJson['re_realEstates_inserted'].indexOf (' '))
|
||||
.split ('-');
|
||||
time = timeArray[2] + '.' + timeArray[1] + '.' + timeArray[0];
|
||||
|
||||
lat = dataJson['re_realEstates_latitude'];
|
||||
lng = dataJson['re_realEstates_longitude'];
|
||||
hasMap = true;
|
||||
} catch (e) {
|
||||
console.log ('error : ' + e);
|
||||
//This ad has no JSON string, informations should be retrieved using HTML selectors
|
||||
time=undefined;
|
||||
lat=0;
|
||||
lng=0;
|
||||
has_map = false;
|
||||
time = undefined;
|
||||
lat = 0;
|
||||
lng = 0;
|
||||
hasMap = false;
|
||||
|
||||
price = (parseFloat($('body > div.container-fluid > div.container > div:nth-child(2) > div.col-xs-12.col-sm-12.col-md-12.col-lg-9.content-main > div:nth-child(1) > div > div > div.col-xs-12.col-sm-4.box-details > div.prices > span.pull-left').text().replace(',','').replace('.','')))/100;
|
||||
price =
|
||||
parseFloat (
|
||||
$ (
|
||||
'body > div.container-fluid > div.container > div:nth-child(2) > div.col-xs-12.col-sm-12.col-md-12.col-lg-9.content-main > div:nth-child(1) > div > div > div.col-xs-12.col-sm-4.box-details > div.prices > span.pull-left'
|
||||
)
|
||||
.text ()
|
||||
.replace (',', '')
|
||||
.replace ('.', '')
|
||||
) / 100;
|
||||
|
||||
const props_list = {};
|
||||
const propsList = {};
|
||||
|
||||
$('body > div.container-fluid > div.container > div:nth-child(2) > div.col-xs-12.col-sm-12.col-md-12.col-lg-9.content-main > div:nth-child(1) > div > div > div.col-xs-12.col-sm-4.box-details > div.body').contents().map((i,elem)=>{
|
||||
const entry = $(elem).text().trim().split(':');
|
||||
if (entry[0]) props_list[entry[0]]=entry[1];
|
||||
});
|
||||
$ (
|
||||
'body > div.container-fluid > div.container > div:nth-child(2) > div.col-xs-12.col-sm-12.col-md-12.col-lg-9.content-main > div:nth-child(1) > div > div > div.col-xs-12.col-sm-4.box-details > div.body'
|
||||
)
|
||||
.contents ()
|
||||
.map ((i, elem) => {
|
||||
const entry = $ (elem).text ().trim ().split (':');
|
||||
if (entry[0]) propsList[entry[0]] = entry[1];
|
||||
});
|
||||
|
||||
address = props_list['Ulica'];
|
||||
size = parseFloat((props_list['Površina']).replace(',','').replace('.',''))/100;
|
||||
rooms = parseInt(props_list['Broj soba']);
|
||||
floor = parseInt(props_list['Spratnost']);
|
||||
address = propsList['Ulica'];
|
||||
size =
|
||||
parseFloat (
|
||||
propsList['Površina'].replace (',', '').replace ('.', '')
|
||||
) / 100;
|
||||
rooms = parseInt (propsList['Broj soba']);
|
||||
floor = parseInt (propsList['Spratnost']);
|
||||
|
||||
title = $('div.container-fluid > div.container > div.row.content-top > div.col-xs-12.col-sm-6.col-md-9 > div.description.pull-left > h1').text();
|
||||
descriptions = $('#b1 > div > div > div').text();
|
||||
title = $ (
|
||||
'div.container-fluid > div.container > div.row.content-top > div.col-xs-12.col-sm-6.col-md-9 > div.description.pull-left > h1'
|
||||
).text ();
|
||||
descriptions = $ ('#b1 > div > div > div').text ();
|
||||
|
||||
const full_category = $('body > div.container-fluid > div.container > div:nth-child(2) > div.col-xs-12.col-sm-12.col-md-12.col-lg-9.content-main > div:nth-child(1) > div > div > div.col-xs-12.col-sm-4.box-details > div.title > p').text().split(',',3);
|
||||
|
||||
category = (full_category.size > 2) ?
|
||||
this.getCategoryIdfromText(full_category[0]+full_category[1]) :
|
||||
this.getCategoryIdfromText(full_category[0]);
|
||||
const fullCategory = $ (
|
||||
'body > div.container-fluid > div.container > div:nth-child(2) > div.col-xs-12.col-sm-12.col-md-12.col-lg-9.content-main > div:nth-child(1) > div > div > div.col-xs-12.col-sm-4.box-details > div.title > p'
|
||||
)
|
||||
.text ()
|
||||
.split (',', 3);
|
||||
|
||||
category = fullCategory.size > 2
|
||||
? this.getCategoryIdfromText (fullCategory[0] + fullCategory[1])
|
||||
: this.getCategoryIdfromText (fullCategory[0]);
|
||||
}
|
||||
|
||||
descriptions = $('#b1 > div > div > div').text();
|
||||
status = this.getStatusIdFromText($('#a1 > div.box-badges > div').text());
|
||||
|
||||
|
||||
descriptions = $ ('#b1 > div > div > div').text ();
|
||||
status = this.getStatusIdFromText (
|
||||
$ ('#a1 > div.box-badges > div').text ()
|
||||
);
|
||||
|
||||
const images = [];
|
||||
|
||||
$(".img-gallery").contents().map((i,elem)=>{
|
||||
const tmp =$(elem).attr('data-preview');
|
||||
if(tmp) images.push(tmp);
|
||||
|
||||
$ ('.img-gallery').contents ().map ((i, elem) => {
|
||||
const tmp = $ (elem).attr ('data-preview');
|
||||
if (tmp) images.push (tmp);
|
||||
});
|
||||
|
||||
const data = {
|
||||
@@ -151,28 +174,26 @@ export default class RentalCrawler {
|
||||
longDescription: descriptions,
|
||||
lat,
|
||||
lng,
|
||||
loc: [parseFloat(lat), parseFloat(lng)],
|
||||
has_map,
|
||||
loc: [parseFloat (lat), parseFloat (lng)],
|
||||
hasMap,
|
||||
status,
|
||||
//images: cloudinaryImages
|
||||
images
|
||||
images,
|
||||
};
|
||||
|
||||
return data;
|
||||
|
||||
} catch (e) {
|
||||
console.error('Exception caught: ' + e.message);
|
||||
console.error ('Exception caught: ' + e.message);
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
async indexPage(pageNr, maxResults = 1000) {
|
||||
async indexPage (pageNr, maxResults = 1000) {
|
||||
try {
|
||||
console.log ('Starting to index page: ' + pageNr);
|
||||
|
||||
console.log('Starting to index page: ' + pageNr);
|
||||
|
||||
const url = "http://www.rental.ba/pretraga/prodaja-1/stranica-" + pageNr;
|
||||
const url = 'http://www.rental.ba/pretraga/prodaja-1/stranica-' + pageNr;
|
||||
|
||||
/*
|
||||
const data = new FormData();
|
||||
@@ -191,145 +212,213 @@ export default class RentalCrawler {
|
||||
data.append('re_subTypes_id', 1);
|
||||
*/
|
||||
|
||||
const res = await fetch(url, {
|
||||
method: 'POST'
|
||||
const res = await fetch (url, {
|
||||
method: 'POST',
|
||||
//body: data
|
||||
});
|
||||
const body = await res.text();
|
||||
const $ = cheerio.load(body);
|
||||
const body = await res.text ();
|
||||
const $ = cheerio.load (body);
|
||||
|
||||
const hrefs = [];
|
||||
$('.middle').each((i, elem) => {
|
||||
const href = $(elem).find("a").first().attr('href');
|
||||
hrefs.push(href);
|
||||
$ ('.middle').each ((i, elem) => {
|
||||
const href = $ (elem).find ('a').first ().attr ('href');
|
||||
hrefs.push (href);
|
||||
});
|
||||
|
||||
const results = {};
|
||||
for (const href of hrefs) {
|
||||
console.log(`indexing: ${href}`);
|
||||
console.log (`indexing: ${href}`);
|
||||
|
||||
const singleData = await this.indexSingle(href);
|
||||
const singleData = await this.indexSingle (href);
|
||||
|
||||
if (singleData) {
|
||||
results[href] = singleData;
|
||||
}
|
||||
|
||||
await this.sleep(500);
|
||||
await this.sleep (500);
|
||||
}
|
||||
|
||||
return results;
|
||||
|
||||
} catch (e) {
|
||||
console.error('Exception caught:' + e);
|
||||
console.error ('Exception caught:' + e);
|
||||
}
|
||||
}
|
||||
|
||||
getCategoryIdfromNumber(category){
|
||||
switch(category){
|
||||
case (1): return CATEGORY_HOUSE; break;
|
||||
case (2): return CATEGORY_FLAT; break;
|
||||
case (3): return CATEGORY_APARTMENT; break;
|
||||
case (4): return CATEGORY_OFFICE; break;
|
||||
case (5): return CATEGORY_LAND; break;
|
||||
case (6): return CATEGORY_GARAGE; break;
|
||||
getCategoryIdfromNumber (category) {
|
||||
switch (category) {
|
||||
case 1:
|
||||
return CATEGORY_HOUSE;
|
||||
case 2:
|
||||
return CATEGORY_FLAT;
|
||||
case 3:
|
||||
return CATEGORY_APARTMENT;
|
||||
case 4:
|
||||
return CATEGORY_OFFICE;
|
||||
case 5:
|
||||
return CATEGORY_LAND;
|
||||
case 6:
|
||||
return CATEGORY_GARAGE;
|
||||
}
|
||||
}
|
||||
|
||||
getCategoryIdfromText (category) {
|
||||
switch(category){
|
||||
case ('samostojeća'): return CATEGORY_HOUSE
|
||||
case ('dvojna'): return CATEGORY_HOUSE
|
||||
case ('kuća u nizu'): return CATEGORY_HOUSE
|
||||
case ('stambeno-poslovni objekt'): return CATEGORY_HOUSE
|
||||
case ('prizemnica'): return CATEGORY_HOUSE
|
||||
case ('kuća na moru'): return CATEGORY_HOUSE
|
||||
case ('kuća u izgradnji'): return CATEGORY_HOUSE
|
||||
case ('dvorac'): return CATEGORY_HOUSE
|
||||
case ('apartmanska kuća'): return CATEGORY_HOUSE
|
||||
case ('porodična kuća'): return CATEGORY_HOUSE
|
||||
case ('vikend kuća'): return CATEGORY_HOUSE
|
||||
case ('luksuzna kuća'): return CATEGORY_HOUSE
|
||||
case ('kamena'): return CATEGORY_HOUSE
|
||||
case ('vila'): return CATEGORY_HOUSE
|
||||
case ('splav'): return CATEGORY_HOUSE
|
||||
switch (category) {
|
||||
case 'samostojeća':
|
||||
return CATEGORY_HOUSE;
|
||||
case 'dvojna':
|
||||
return CATEGORY_HOUSE;
|
||||
case 'kuća u nizu':
|
||||
return CATEGORY_HOUSE;
|
||||
case 'stambeno-poslovni objekt':
|
||||
return CATEGORY_HOUSE;
|
||||
case 'prizemnica':
|
||||
return CATEGORY_HOUSE;
|
||||
case 'kuća na moru':
|
||||
return CATEGORY_HOUSE;
|
||||
case 'kuća u izgradnji':
|
||||
return CATEGORY_HOUSE;
|
||||
case 'dvorac':
|
||||
return CATEGORY_HOUSE;
|
||||
case 'apartmanska kuća':
|
||||
return CATEGORY_HOUSE;
|
||||
case 'porodična kuća':
|
||||
return CATEGORY_HOUSE;
|
||||
case 'vikend kuća':
|
||||
return CATEGORY_HOUSE;
|
||||
case 'luksuzna kuća':
|
||||
return CATEGORY_HOUSE;
|
||||
case 'kamena':
|
||||
return CATEGORY_HOUSE;
|
||||
case 'vila':
|
||||
return CATEGORY_HOUSE;
|
||||
case 'splav':
|
||||
return CATEGORY_HOUSE;
|
||||
|
||||
case ('stan u zgradi'): return CATEGORY_FLAT
|
||||
case ('stan u kući'): return CATEGORY_FLAT
|
||||
case ('stan višeetažni'): return CATEGORY_FLAT
|
||||
case ('stan višeetažni u kući'): return CATEGORY_FLAT
|
||||
case ('stan u starijoj zgradi'): return CATEGORY_FLAT
|
||||
case ('stan u novogradnji'): return CATEGORY_FLAT
|
||||
case ('stan u neboderu'): return CATEGORY_FLAT
|
||||
case ('Korišten stan u novogradnji'): return CATEGORY_FLAT
|
||||
case 'stan u zgradi':
|
||||
return CATEGORY_FLAT;
|
||||
case 'stan u kući':
|
||||
return CATEGORY_FLAT;
|
||||
case 'stan višeetažni':
|
||||
return CATEGORY_FLAT;
|
||||
case 'stan višeetažni u kući':
|
||||
return CATEGORY_FLAT;
|
||||
case 'stan u starijoj zgradi':
|
||||
return CATEGORY_FLAT;
|
||||
case 'stan u novogradnji':
|
||||
return CATEGORY_FLAT;
|
||||
case 'stan u neboderu':
|
||||
return CATEGORY_FLAT;
|
||||
case 'Korišten stan u novogradnji':
|
||||
return CATEGORY_FLAT;
|
||||
|
||||
case ('apartman na moru'): return CATEGORY_APARTMENT
|
||||
case ('apartman u planini'): return CATEGORY_APARTMENT
|
||||
|
||||
case ('unutrašnje garažno mjesto'): return CATEGORY_GARAGE
|
||||
case ('unutrašnje parkirno mjesto'): return CATEGORY_GARAGE
|
||||
case 'apartman na moru':
|
||||
return CATEGORY_APARTMENT;
|
||||
case 'apartman u planini':
|
||||
return CATEGORY_APARTMENT;
|
||||
|
||||
case ('građevinsko'): return CATEGORY_LAND
|
||||
case ('građevinsko stambeno'): return CATEGORY_LAND
|
||||
case ('zemljište, ostalo'): return CATEGORY_LAND
|
||||
case ('odmaralište'): return CATEGORY_LAND
|
||||
case ('oranica'): return CATEGORY_LAND
|
||||
case ('šuma'): return CATEGORY_LAND
|
||||
case ('livada'): return CATEGORY_LAND
|
||||
case ('građevinsko M2'): return CATEGORY_LAND
|
||||
case ('građevinsko M1'): return CATEGORY_LAND
|
||||
case ('građevinsko - turističko'): return CATEGORY_LAND
|
||||
case ('građevinsko - poslovno'): return CATEGORY_LAND
|
||||
case ('otok'): return CATEGORY_LAND
|
||||
case ('poljoprivredno'): return CATEGORY_LAND
|
||||
|
||||
|
||||
case ('lokal'): return CATEGORY_OFFICE
|
||||
case ('ured'): return CATEGORY_OFFICE
|
||||
case ('skladište ili garaža'): return CATEGORY_OFFICE
|
||||
case ('radionica'): return CATEGORY_OFFICE
|
||||
case ('tvornica'): return CATEGORY_OFFICE
|
||||
case ('restoran'): return CATEGORY_OFFICE
|
||||
case ('sportski centar'): return CATEGORY_OFFICE
|
||||
case ('ordinacija'): return CATEGORY_OFFICE
|
||||
case ('kiosk'): return CATEGORY_OFFICE
|
||||
case ('auto-praonica'): return CATEGORY_OFFICE
|
||||
case ('poslovna zgrada'): return CATEGORY_OFFICE
|
||||
case ('skladište'): return CATEGORY_OFFICE
|
||||
case ('garaža'): return CATEGORY_OFFICE
|
||||
case ('hotel'): return CATEGORY_OFFICE
|
||||
case ('pansion'): return CATEGORY_OFFICE
|
||||
case ('apartmanska zgrada'): return CATEGORY_OFFICE
|
||||
case ('trgovina'): return CATEGORY_OFFICE
|
||||
case ('prodajno skladišni'): return CATEGORY_OFFICE
|
||||
case ('proizvodno skladišni'): return CATEGORY_OFFICE
|
||||
case ('Kancelarije'): return CATEGORY_OFFICE
|
||||
case ('Poslovni prostor'): return CATEGORY_OFFICE
|
||||
|
||||
case 'unutrašnje garažno mjesto':
|
||||
return CATEGORY_GARAGE;
|
||||
case 'unutrašnje parkirno mjesto':
|
||||
return CATEGORY_GARAGE;
|
||||
|
||||
case 'građevinsko':
|
||||
return CATEGORY_LAND;
|
||||
case 'građevinsko stambeno':
|
||||
return CATEGORY_LAND;
|
||||
case 'zemljište, ostalo':
|
||||
return CATEGORY_LAND;
|
||||
case 'odmaralište':
|
||||
return CATEGORY_LAND;
|
||||
case 'oranica':
|
||||
return CATEGORY_LAND;
|
||||
case 'šuma':
|
||||
return CATEGORY_LAND;
|
||||
case 'livada':
|
||||
return CATEGORY_LAND;
|
||||
case 'građevinsko M2':
|
||||
return CATEGORY_LAND;
|
||||
case 'građevinsko M1':
|
||||
return CATEGORY_LAND;
|
||||
case 'građevinsko - turističko':
|
||||
return CATEGORY_LAND;
|
||||
case 'građevinsko - poslovno':
|
||||
return CATEGORY_LAND;
|
||||
case 'otok':
|
||||
return CATEGORY_LAND;
|
||||
case 'poljoprivredno':
|
||||
return CATEGORY_LAND;
|
||||
|
||||
case 'lokal':
|
||||
return CATEGORY_OFFICE;
|
||||
case 'ured':
|
||||
return CATEGORY_OFFICE;
|
||||
case 'skladište ili garaža':
|
||||
return CATEGORY_OFFICE;
|
||||
case 'radionica':
|
||||
return CATEGORY_OFFICE;
|
||||
case 'tvornica':
|
||||
return CATEGORY_OFFICE;
|
||||
case 'restoran':
|
||||
return CATEGORY_OFFICE;
|
||||
case 'sportski centar':
|
||||
return CATEGORY_OFFICE;
|
||||
case 'ordinacija':
|
||||
return CATEGORY_OFFICE;
|
||||
case 'kiosk':
|
||||
return CATEGORY_OFFICE;
|
||||
case 'auto-praonica':
|
||||
return CATEGORY_OFFICE;
|
||||
case 'poslovna zgrada':
|
||||
return CATEGORY_OFFICE;
|
||||
case 'skladište':
|
||||
return CATEGORY_OFFICE;
|
||||
case 'garaža':
|
||||
return CATEGORY_OFFICE;
|
||||
case 'hotel':
|
||||
return CATEGORY_OFFICE;
|
||||
case 'pansion':
|
||||
return CATEGORY_OFFICE;
|
||||
case 'apartmanska zgrada':
|
||||
return CATEGORY_OFFICE;
|
||||
case 'trgovina':
|
||||
return CATEGORY_OFFICE;
|
||||
case 'prodajno skladišni':
|
||||
return CATEGORY_OFFICE;
|
||||
case 'proizvodno skladišni':
|
||||
return CATEGORY_OFFICE;
|
||||
case 'Kancelarije':
|
||||
return CATEGORY_OFFICE;
|
||||
case 'Poslovni prostor':
|
||||
return CATEGORY_OFFICE;
|
||||
}
|
||||
}
|
||||
|
||||
getStatusIdFromText(status){
|
||||
getStatusIdFromText (status) {
|
||||
if (status === 'Prodato') return STATUS_SOLD;
|
||||
|
||||
|
||||
return STATUS_NORMAL;
|
||||
}
|
||||
|
||||
async sleep(ms) {
|
||||
return new Promise(resolve => setTimeout(resolve, ms));
|
||||
async sleep (ms) {
|
||||
return new Promise (resolve => setTimeout (resolve, ms));
|
||||
}
|
||||
|
||||
async indexPages(start, end, maxResults = 1000) {
|
||||
async indexPages (start, end, maxResults = 1000) {
|
||||
let results = {};
|
||||
for (let i = start; i <= end; i++) {
|
||||
let result = await this.indexPage(i, maxResults);
|
||||
Object.assign(results, result)
|
||||
await this.sleep(5000);
|
||||
let result = await this.indexPage (i, maxResults);
|
||||
Object.assign (results, result);
|
||||
await this.sleep (5000);
|
||||
}
|
||||
return results;
|
||||
}
|
||||
|
||||
async crawl() {
|
||||
let results = await this.indexPages(this.fromPage, this.toPage, this.maxResults);
|
||||
async crawl () {
|
||||
let results = await this.indexPages (
|
||||
this.fromPage,
|
||||
this.toPage,
|
||||
this.maxResults
|
||||
);
|
||||
return results;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -4,17 +4,14 @@ module.exports = {
|
||||
|
||||
output: {
|
||||
path: __dirname + "/build",
|
||||
filename: "crawler.js",
|
||||
devtool: 'source-map'
|
||||
filename: "crawler.js"
|
||||
},
|
||||
module: {
|
||||
|
||||
loaders: [{
|
||||
test: /.js?$/,
|
||||
loader: 'babel-loader',
|
||||
exclude: /node_modules/,
|
||||
presets: ['es2015'],
|
||||
plugins: ['transform-async-to-generator']
|
||||
exclude: /node_modules/
|
||||
}, {
|
||||
test: /.json?$/,
|
||||
loader: 'json-loader',
|
||||
|
||||
2395
crawler/yarn.lock
2395
crawler/yarn.lock
File diff suppressed because it is too large
Load Diff
Reference in New Issue
Block a user