Rental crawler #2
File diff suppressed because it is too large
Load Diff
16
backend/enums.js
Normal file
16
backend/enums.js
Normal file
@@ -0,0 +1,16 @@
|
||||
export const AD_TYPE_SALE = 1;
|
||||
export const AD_TYPE_RENT = 2;
|
||||
|
||||
export const IGNORED_USERNAMES = ['rental']
|
||||
|
||||
export const CATEGORY_FLAT = 0;
|
||||
export const CATEGORY_HOUSE = 1;
|
||||
export const CATEGORY_OFFICE = 2;
|
||||
export const CATEGORY_LAND = 3;
|
||||
export const CATEGORY_APARTMENT = 4;
|
||||
export const CATEGORY_GARAGE = 5;
|
||||
|
||||
export const STATUS_NORMAL = 0;
|
||||
export const STATUS_RESERVED = 1;
|
||||
export const STATUS_SOLD = 2;
|
||||
|
||||
1441
backend/package-lock.json
generated
Normal file
1441
backend/package-lock.json
generated
Normal file
File diff suppressed because it is too large
Load Diff
@@ -4,6 +4,12 @@ import distanceInWordsToNow from 'date-fns/distance_in_words_to_now';
|
||||
import parseDate from 'date-fns/format';
|
||||
import moment from 'moment';
|
||||
|
||||
import {
|
||||
STATUS_NORMAL,
|
||||
STATUS_RESERVED,
|
||||
STATUS_SOLD
|
||||
} from "./enums";
|
||||
|
||||
var hr = require('date-fns/locale/hr');
|
||||
|
||||
var MongoClient = require('mongodb').MongoClient;
|
||||
@@ -78,6 +84,7 @@ router.get('/search/listings/:id', async (req, res, next) => {
|
||||
|
||||
router.get('/search/listings', async (req, res, next) => {
|
||||
try {
|
||||
console.log("Search listings");
|
||||
const bounds = req.query.bounds || '';
|
||||
const minPrice = req.query.minPrice;
|
||||
const maxPrice = req.query.maxPrice;
|
||||
@@ -93,6 +100,22 @@ router.get('/search/listings', async (req, res, next) => {
|
||||
const properties = db.collection('listings');
|
||||
let query = {};
|
||||
|
||||
|
||||
//Get only ads with location
|
||||
query = Object.assign(query, {
|
||||
has_map: true
|
||||
});
|
||||
|
||||
//AND
|
||||
|
||||
//Do not show sold or reserved properity
|
||||
query = Object.assign(query, {
|
||||
status: STATUS_NORMAL
|
||||
});
|
||||
|
||||
//AND
|
||||
|
||||
//Show ads that fall inside visible map
|
||||
if (bounds) {
|
||||
const [lat1, lng1, lat2, lng2] = bounds.split(',').map(parseFloat)
|
||||
const box = [[lat1, lng1], [lat2, lng2]];
|
||||
@@ -106,12 +129,18 @@ router.get('/search/listings', async (req, res, next) => {
|
||||
});
|
||||
}
|
||||
|
||||
//AND
|
||||
|
||||
//Show only selected type of ads (selling or renting)
|
||||
if (adType) {
|
||||
query = Object.assign(query, {
|
||||
adType: parseInt(adType)
|
||||
});
|
||||
}
|
||||
|
||||
//AND
|
||||
|
||||
//Match price
|
||||
if (minPrice || maxPrice) {
|
||||
const price = {}
|
||||
if (minPrice) {
|
||||
@@ -127,25 +156,37 @@ router.get('/search/listings', async (req, res, next) => {
|
||||
});
|
||||
}
|
||||
|
||||
const and = [];
|
||||
//AND
|
||||
|
||||
//Match number of rooms
|
||||
if (rooms) {
|
||||
const room_count = [];
|
||||
let four_plus = false;
|
||||
|
||||
const allRooms = rooms.split(',');
|
||||
const or = allRooms.map(val => {
|
||||
if (val === '4+') {
|
||||
return {
|
||||
rooms: {
|
||||
"$gte": 4
|
||||
}
|
||||
}
|
||||
allRooms.map((val)=>{
|
||||
if (parseInt(val)!==4){
|
||||
room_count.push(parseInt(val));
|
||||
}else{
|
||||
four_plus=true;
|
||||
}
|
||||
return {
|
||||
rooms: parseFloat(val)
|
||||
};
|
||||
});
|
||||
|
||||
and.push({ "$or": or });
|
||||
if (four_plus){
|
||||
query = Object.assign(query,{
|
||||
rooms: {'$gte' : 4}
|
||||
});
|
||||
}else{
|
||||
query = Object.assign(query,{
|
||||
rooms: {'$in' : room_count}
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
//AND
|
||||
|
||||
|
||||
//Match size
|
||||
if (minSize || maxSize) {
|
||||
const size = {}
|
||||
if (minSize) {
|
||||
@@ -161,21 +202,21 @@ router.get('/search/listings', async (req, res, next) => {
|
||||
});
|
||||
}
|
||||
|
||||
//AND
|
||||
|
||||
//Match category
|
||||
if (category) {
|
||||
const allCategories = category.split(',');
|
||||
const or = allCategories.map(val => {
|
||||
return {
|
||||
category: parseInt(val)
|
||||
};
|
||||
const category_count = [];
|
||||
|
||||
const allCategories = category.split(',').map(val => {
|
||||
category_count.push(parseInt(val));
|
||||
});
|
||||
|
||||
and.push({ "$or": or });
|
||||
}
|
||||
|
||||
if (and.length > 0) {
|
||||
query = Object.assign(query, {
|
||||
"$and": and
|
||||
category: {'$in' : category_count}
|
||||
});
|
||||
|
||||
|
||||
}
|
||||
|
||||
console.log('QUERY: ', query);
|
||||
|
||||
@@ -21,7 +21,7 @@ install(); // for source maps to work
|
||||
|
||||
let crawlers = [
|
||||
//new OlxCrawler(process.env.OLX_FROM_PAGE, process.env.OLX_TO_PAGE, process.env.OLX_MAX_RESULTS),
|
||||
//new ProstorCrawler(parseInt(process.env.PROSTOR_FROM_PAGE), parseInt(process.env.PROSTOR_TO_PAGE), parseInt(process.env.PROSTOR_MAX_RESULTS)),
|
||||
new ProstorCrawler(parseInt(process.env.PROSTOR_FROM_PAGE), parseInt(process.env.PROSTOR_TO_PAGE), parseInt(process.env.PROSTOR_MAX_RESULTS)),
|
||||
new RentalCrawler(parseInt(process.env.RENTAL_FROM_PAGE), parseInt(process.env.RENTAL_TO_PAGE), parseInt(process.env.RENTAL_MAX_RESULTS))
|
||||
];
|
||||
|
||||
|
||||
@@ -8,3 +8,5 @@ apartman = 3
|
||||
poslovni prostor = 4
|
||||
zemljište = 5
|
||||
garaža = 6
|
||||
|
||||
Datum spremiti u formatu dan.mjesec.godina, u polje "time"
|
||||
|
||||
@@ -10,3 +10,7 @@ export const CATEGORY_LAND = 3;
|
||||
export const CATEGORY_APARTMENT = 4;
|
||||
export const CATEGORY_GARAGE = 5;
|
||||
|
||||
export const STATUS_NORMAL = 0;
|
||||
export const STATUS_RESERVED = 1;
|
||||
export const STATUS_SOLD = 2;
|
||||
|
||||
|
||||
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
@@ -8,11 +8,17 @@ let FormData = require('form-data');
|
||||
|
||||
import {
|
||||
AD_TYPE_SALE,
|
||||
|
||||
IGNORED_USERNAMES,
|
||||
|
||||
CATEGORY_FLAT,
|
||||
CATEGORY_HOUSE,
|
||||
CATEGORY_OFFICE,
|
||||
CATEGORY_LAND
|
||||
CATEGORY_LAND,
|
||||
|
||||
STATUS_NORMAL,
|
||||
STATUS_RESERVED,
|
||||
STATUS_SOLD
|
||||
} from '../enums';
|
||||
|
||||
export default class ProstorCrawler {
|
||||
@@ -58,14 +64,25 @@ export default class ProstorCrawler {
|
||||
|
||||
const latLngRe = /marker=([0-9]+\.[0-9]+)\,\s*([0-9]+\.[0-9]+)/g;
|
||||
|
||||
var has_map = false;
|
||||
|
||||
var tmpTitle = title.toUpperCase();
|
||||
|
||||
var status = STATUS_NORMAL;
|
||||
if (tmpTitle.indexOf("PRODANO") !== -1) status = STATUS_SOLD;
|
||||
if (tmpTitle.indexOf("REZERVISANO") !== -1) status = STATUS_RESERVED;
|
||||
|
||||
|
||||
//const latLngRe = /LatLng\(([0-9]+\.[0-9]+)\,\s+([0-9]+\.[0-9]+)\)/g;
|
||||
|
||||
const matches = latLngRe.exec(body);
|
||||
let lng = '',
|
||||
lat = '';
|
||||
has_map = false;
|
||||
if (matches && matches.length >= 3) {
|
||||
lat = matches[1];
|
||||
lng = matches[2];
|
||||
has_map = true;
|
||||
}
|
||||
|
||||
//console.log({
|
||||
@@ -136,6 +153,8 @@ export default class ProstorCrawler {
|
||||
lat,
|
||||
lng,
|
||||
loc: [parseFloat(lat), parseFloat(lng)],
|
||||
has_map,
|
||||
status,
|
||||
//images: cloudinaryImages
|
||||
images
|
||||
};
|
||||
|
||||
@@ -8,13 +8,19 @@ let FormData = require('form-data');
|
||||
|
||||
import {
|
||||
AD_TYPE_SALE,
|
||||
|
||||
IGNORED_USERNAMES,
|
||||
|
||||
CATEGORY_FLAT,
|
||||
CATEGORY_HOUSE,
|
||||
CATEGORY_OFFICE,
|
||||
CATEGORY_LAND,
|
||||
CATEGORY_APARTMENT,
|
||||
CATEGORY_GARAGE
|
||||
CATEGORY_GARAGE,
|
||||
|
||||
STATUS_NORMAL,
|
||||
STATUS_RESERVED,
|
||||
STATUS_SOLD
|
||||
} from '../enums';
|
||||
|
||||
export default class RentalCrawler {
|
||||
@@ -44,36 +50,55 @@ export default class RentalCrawler {
|
||||
var descriptions;
|
||||
var floor;
|
||||
var floor;
|
||||
var time;
|
||||
var time;
|
||||
var lat;
|
||||
var lng;
|
||||
var has_map;
|
||||
var status;
|
||||
|
||||
//Oglas koji nema JSON objekat, nema ni mape
|
||||
//No JSON string -> No map
|
||||
try{
|
||||
const complete_data = $('body > div.container-fluid > div.container > div:nth-child(2) > div.col-xs-12.col-sm-12.col-md-12.col-lg-9.content-main > div:nth-child(7) > script').text();
|
||||
const data_json_string = complete_data.slice(21,-1);
|
||||
const data_json = JSON.parse(data_json_string);
|
||||
let complete_data;
|
||||
let data_json_string;
|
||||
let data_json;
|
||||
|
||||
const start_n = 5;
|
||||
const last_n = 15;
|
||||
|
||||
for (let i=start_n;i<=last_n;i++){
|
||||
try{
|
||||
complete_data = $('body > div.container-fluid > div.container > div:nth-child(2) > div.col-xs-12.col-sm-12.col-md-12.col-lg-9.content-main > div:nth-child('+i+') > script').text();
|
||||
data_json_string = complete_data.slice(21,-1);
|
||||
data_json = JSON.parse(data_json_string);
|
||||
break;
|
||||
}catch(e){
|
||||
console.log("No JSON string");
|
||||
if (i===last_n) throw(e);
|
||||
}
|
||||
}
|
||||
|
||||
title = data_json["re_realEstates_portalName"];
|
||||
category = this.getCategoryIdfromNumber(data_json["re_types_id"]); //kategorije ne odgovaraju brojevima u Enums !!!
|
||||
price = data_json["re_realEstates_price"];
|
||||
size = data_json["re_realEstates_area"];
|
||||
rooms = data_json["re_realEstates_roomsNO"];
|
||||
category = this.getCategoryIdfromNumber(parseInt(data_json["re_types_id"])); //categories from JSON string doesn't match categories in ENUMS
|
||||
price = parseFloat(data_json["re_realEstates_price"]);
|
||||
size = parseFloat(data_json["re_realEstates_area"]);
|
||||
rooms = parseInt(data_json["re_realEstates_roomsNO"]);
|
||||
address = data_json["re_realEstates_address"];
|
||||
descriptions = data_json["re_realEstates_description"];
|
||||
floor = data_json["re_realEstates_floorNO"];
|
||||
time = data_json["re_realEstates_inserted"];
|
||||
lat = data_json["re_realEstates_latitude"];
|
||||
lng = data_json["re_realEstates_longitude"];
|
||||
//descriptions = data_json["re_realEstates_description"];
|
||||
floor = parseInt(data_json["re_realEstates_floorNO"]);
|
||||
|
||||
let time_array = data_json["re_realEstates_inserted"].slice(0,data_json["re_realEstates_inserted"].indexOf(' ')).split('-');
|
||||
time = time_array[2]+'.'+time_array[1]+'.'+time_array[0];
|
||||
|
||||
time = data_json["re_realEstates_inserted"];
|
||||
lat = data_json["re_realEstates_latitude"];
|
||||
lng = data_json["re_realEstates_longitude"];
|
||||
has_map = true;
|
||||
}catch(e){
|
||||
//oglas nema JSON objekat, informacije izvući preko selektora
|
||||
console.log("error : " + e);
|
||||
//This ad has no JSON string, informations should be retrieved using HTML selectors
|
||||
time=undefined;
|
||||
lat=undefined;
|
||||
lng=undefined;
|
||||
lat=0;
|
||||
lng=0;
|
||||
has_map = false;
|
||||
|
||||
price = (parseFloat($('body > div.container-fluid > div.container > div:nth-child(2) > div.col-xs-12.col-sm-12.col-md-12.col-lg-9.content-main > div:nth-child(1) > div > div > div.col-xs-12.col-sm-4.box-details > div.prices > span.pull-left').text().replace(',','').replace('.','')))/100;
|
||||
|
||||
@@ -86,23 +111,27 @@ export default class RentalCrawler {
|
||||
|
||||
address = props_list['Ulica'];
|
||||
size = parseFloat((props_list['Površina']).replace(',','').replace('.',''))/100;
|
||||
rooms = props_list['Broj soba'];
|
||||
rooms = parseInt(props_list['Broj soba']);
|
||||
floor = parseInt(props_list['Spratnost']);
|
||||
|
||||
title = $('div.container-fluid > div.container > div.row.content-top > div.col-xs-12.col-sm-6.col-md-9 > div.description.pull-left > h1').text();
|
||||
descriptions = $('#b1 > div > div > div').text();
|
||||
|
||||
const full_category = $('body > div.container-fluid > div.container > div:nth-child(2) > div.col-xs-12.col-sm-12.col-md-12.col-lg-9.content-main > div:nth-child(1) > div > div > div.col-xs-12.col-sm-4.box-details > div.title > p').text().split(',',3);
|
||||
|
||||
category = (full_category.size > 2) ?
|
||||
this.getCategoryIdfromText(full_category[0]+full_category[1]) :
|
||||
this.getCategoryIdfromText(full_category[0]);
|
||||
|
||||
}
|
||||
|
||||
descriptions = $('#b1 > div > div > div').text();
|
||||
status = this.getStatusIdFromText($('#a1 > div.box-badges > div').text());
|
||||
|
||||
|
||||
const images = [];
|
||||
|
||||
$(".img-gallery").contents().map((i,elem)=>{
|
||||
//u linku sadržanom u tmp stoje i parametri za max visinu i širinu
|
||||
const tmp =$(elem).attr('data-preview');
|
||||
if(tmp) images.push(tmp);
|
||||
});
|
||||
@@ -123,12 +152,12 @@ export default class RentalCrawler {
|
||||
lat,
|
||||
lng,
|
||||
loc: [parseFloat(lat), parseFloat(lng)],
|
||||
has_map,
|
||||
status,
|
||||
//images: cloudinaryImages
|
||||
images
|
||||
};
|
||||
|
||||
console.log(data);
|
||||
|
||||
return data;
|
||||
|
||||
} catch (e) {
|
||||
@@ -197,12 +226,12 @@ export default class RentalCrawler {
|
||||
|
||||
getCategoryIdfromNumber(category){
|
||||
switch(category){
|
||||
case (1): return CATEGORY_HOUSE;
|
||||
case (2): return CATEGORY_FLAT;
|
||||
case (3): return CATEGORY_APARTMENT;
|
||||
case (4): return CATEGORY_OFFICE;
|
||||
case (5): return CATEGORY_LAND;
|
||||
case (6): return CATEGORY_GARAGE;
|
||||
case (1): return CATEGORY_HOUSE; break;
|
||||
case (2): return CATEGORY_FLAT; break;
|
||||
case (3): return CATEGORY_APARTMENT; break;
|
||||
case (4): return CATEGORY_OFFICE; break;
|
||||
case (5): return CATEGORY_LAND; break;
|
||||
case (6): return CATEGORY_GARAGE; break;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -279,6 +308,12 @@ export default class RentalCrawler {
|
||||
}
|
||||
}
|
||||
|
||||
getStatusIdFromText(status){
|
||||
if (status === 'Prodato') return STATUS_SOLD;
|
||||
|
||||
return STATUS_NORMAL;
|
||||
}
|
||||
|
||||
async sleep(ms) {
|
||||
return new Promise(resolve => setTimeout(resolve, ms));
|
||||
}
|
||||
|
||||
4601
web/package-lock.json
generated
Normal file
4601
web/package-lock.json
generated
Normal file
File diff suppressed because it is too large
Load Diff
@@ -1,5 +1,5 @@
|
||||
import React from 'react'
|
||||
import Gallery from './gallery'
|
||||
import Gallery from './Gallery'
|
||||
import {formatPrice, formatRooms, formatFloor} from '../lib/helpers'
|
||||
import ContactModal from './ContactModal';
|
||||
|
||||
|
||||
@@ -21,7 +21,8 @@ class Main extends React.Component {
|
||||
sort: 'relevance',
|
||||
filters: {
|
||||
rooms: {},
|
||||
category: {}
|
||||
category: {},
|
||||
status : {}
|
||||
},
|
||||
mobileView: 'MAP',
|
||||
contact: {
|
||||
|
||||
Reference in New Issue
Block a user