crawler upgrade, server upgrade

This commit is contained in:
GotPPay
2017-10-30 22:54:56 +01:00
parent 039e34237d
commit a63c108259
14 changed files with 8757 additions and 2517 deletions

File diff suppressed because it is too large Load Diff

16
backend/enums.js Normal file
View File

@@ -0,0 +1,16 @@
export const AD_TYPE_SALE = 1;
export const AD_TYPE_RENT = 2;
export const IGNORED_USERNAMES = ['rental']
export const CATEGORY_FLAT = 0;
export const CATEGORY_HOUSE = 1;
export const CATEGORY_OFFICE = 2;
export const CATEGORY_LAND = 3;
export const CATEGORY_APARTMENT = 4;
export const CATEGORY_GARAGE = 5;
export const STATUS_NORMAL = 0;
export const STATUS_RESERVED = 1;
export const STATUS_SOLD = 2;

1441
backend/package-lock.json generated Normal file

File diff suppressed because it is too large Load Diff

View File

@@ -4,6 +4,12 @@ import distanceInWordsToNow from 'date-fns/distance_in_words_to_now';
import parseDate from 'date-fns/format'; import parseDate from 'date-fns/format';
import moment from 'moment'; import moment from 'moment';
import {
STATUS_NORMAL,
STATUS_RESERVED,
STATUS_SOLD
} from "./enums";
var hr = require('date-fns/locale/hr'); var hr = require('date-fns/locale/hr');
var MongoClient = require('mongodb').MongoClient; var MongoClient = require('mongodb').MongoClient;
@@ -78,6 +84,7 @@ router.get('/search/listings/:id', async (req, res, next) => {
router.get('/search/listings', async (req, res, next) => { router.get('/search/listings', async (req, res, next) => {
try { try {
console.log("Search listings");
const bounds = req.query.bounds || ''; const bounds = req.query.bounds || '';
const minPrice = req.query.minPrice; const minPrice = req.query.minPrice;
const maxPrice = req.query.maxPrice; const maxPrice = req.query.maxPrice;
@@ -93,6 +100,22 @@ router.get('/search/listings', async (req, res, next) => {
const properties = db.collection('listings'); const properties = db.collection('listings');
let query = {}; let query = {};
//Get only ads with location
query = Object.assign(query, {
has_map: true
});
//AND
//Do not show sold or reserved properity
query = Object.assign(query, {
status: STATUS_NORMAL
});
//AND
//Show ads that fall inside visible map
if (bounds) { if (bounds) {
const [lat1, lng1, lat2, lng2] = bounds.split(',').map(parseFloat) const [lat1, lng1, lat2, lng2] = bounds.split(',').map(parseFloat)
const box = [[lat1, lng1], [lat2, lng2]]; const box = [[lat1, lng1], [lat2, lng2]];
@@ -106,12 +129,18 @@ router.get('/search/listings', async (req, res, next) => {
}); });
} }
//AND
//Show only selected type of ads (selling or renting)
if (adType) { if (adType) {
query = Object.assign(query, { query = Object.assign(query, {
adType: parseInt(adType) adType: parseInt(adType)
}); });
} }
//AND
//Match price
if (minPrice || maxPrice) { if (minPrice || maxPrice) {
const price = {} const price = {}
if (minPrice) { if (minPrice) {
@@ -127,25 +156,37 @@ router.get('/search/listings', async (req, res, next) => {
}); });
} }
const and = []; //AND
//Match number of rooms
if (rooms) { if (rooms) {
const room_count = [];
let four_plus = false;
const allRooms = rooms.split(','); const allRooms = rooms.split(',');
const or = allRooms.map(val => { allRooms.map((val)=>{
if (val === '4+') { if (parseInt(val)!==4){
return { room_count.push(parseInt(val));
rooms: { }else{
"$gte": 4 four_plus=true;
}
}
} }
return {
rooms: parseFloat(val)
};
}); });
and.push({ "$or": or }); if (four_plus){
query = Object.assign(query,{
rooms: {'$gte' : 4}
});
}else{
query = Object.assign(query,{
rooms: {'$in' : room_count}
});
}
} }
//AND
//Match size
if (minSize || maxSize) { if (minSize || maxSize) {
const size = {} const size = {}
if (minSize) { if (minSize) {
@@ -161,21 +202,21 @@ router.get('/search/listings', async (req, res, next) => {
}); });
} }
//AND
//Match category
if (category) { if (category) {
const allCategories = category.split(','); const category_count = [];
const or = allCategories.map(val => {
return { const allCategories = category.split(',').map(val => {
category: parseInt(val) category_count.push(parseInt(val));
};
}); });
and.push({ "$or": or });
}
if (and.length > 0) {
query = Object.assign(query, { query = Object.assign(query, {
"$and": and category: {'$in' : category_count}
}); });
} }
console.log('QUERY: ', query); console.log('QUERY: ', query);

View File

@@ -21,7 +21,7 @@ install(); // for source maps to work
let crawlers = [ let crawlers = [
//new OlxCrawler(process.env.OLX_FROM_PAGE, process.env.OLX_TO_PAGE, process.env.OLX_MAX_RESULTS), //new OlxCrawler(process.env.OLX_FROM_PAGE, process.env.OLX_TO_PAGE, process.env.OLX_MAX_RESULTS),
//new ProstorCrawler(parseInt(process.env.PROSTOR_FROM_PAGE), parseInt(process.env.PROSTOR_TO_PAGE), parseInt(process.env.PROSTOR_MAX_RESULTS)), new ProstorCrawler(parseInt(process.env.PROSTOR_FROM_PAGE), parseInt(process.env.PROSTOR_TO_PAGE), parseInt(process.env.PROSTOR_MAX_RESULTS)),
new RentalCrawler(parseInt(process.env.RENTAL_FROM_PAGE), parseInt(process.env.RENTAL_TO_PAGE), parseInt(process.env.RENTAL_MAX_RESULTS)) new RentalCrawler(parseInt(process.env.RENTAL_FROM_PAGE), parseInt(process.env.RENTAL_TO_PAGE), parseInt(process.env.RENTAL_MAX_RESULTS))
]; ];

View File

@@ -8,3 +8,5 @@ apartman = 3
poslovni prostor = 4 poslovni prostor = 4
zemljište = 5 zemljište = 5
garaža = 6 garaža = 6
Datum spremiti u formatu dan.mjesec.godina, u polje "time"

View File

@@ -10,3 +10,7 @@ export const CATEGORY_LAND = 3;
export const CATEGORY_APARTMENT = 4; export const CATEGORY_APARTMENT = 4;
export const CATEGORY_GARAGE = 5; export const CATEGORY_GARAGE = 5;
export const STATUS_NORMAL = 0;
export const STATUS_RESERVED = 1;
export const STATUS_SOLD = 2;

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View File

@@ -8,11 +8,17 @@ let FormData = require('form-data');
import { import {
AD_TYPE_SALE, AD_TYPE_SALE,
IGNORED_USERNAMES, IGNORED_USERNAMES,
CATEGORY_FLAT, CATEGORY_FLAT,
CATEGORY_HOUSE, CATEGORY_HOUSE,
CATEGORY_OFFICE, CATEGORY_OFFICE,
CATEGORY_LAND CATEGORY_LAND,
STATUS_NORMAL,
STATUS_RESERVED,
STATUS_SOLD
} from '../enums'; } from '../enums';
export default class ProstorCrawler { export default class ProstorCrawler {
@@ -58,14 +64,25 @@ export default class ProstorCrawler {
const latLngRe = /marker=([0-9]+\.[0-9]+)\,\s*([0-9]+\.[0-9]+)/g; const latLngRe = /marker=([0-9]+\.[0-9]+)\,\s*([0-9]+\.[0-9]+)/g;
var has_map = false;
var tmpTitle = title.toUpperCase();
var status = STATUS_NORMAL;
if (tmpTitle.indexOf("PRODANO") !== -1) status = STATUS_SOLD;
if (tmpTitle.indexOf("REZERVISANO") !== -1) status = STATUS_RESERVED;
//const latLngRe = /LatLng\(([0-9]+\.[0-9]+)\,\s+([0-9]+\.[0-9]+)\)/g; //const latLngRe = /LatLng\(([0-9]+\.[0-9]+)\,\s+([0-9]+\.[0-9]+)\)/g;
const matches = latLngRe.exec(body); const matches = latLngRe.exec(body);
let lng = '', let lng = '',
lat = ''; lat = '';
has_map = false;
if (matches && matches.length >= 3) { if (matches && matches.length >= 3) {
lat = matches[1]; lat = matches[1];
lng = matches[2]; lng = matches[2];
has_map = true;
} }
//console.log({ //console.log({
@@ -136,6 +153,8 @@ export default class ProstorCrawler {
lat, lat,
lng, lng,
loc: [parseFloat(lat), parseFloat(lng)], loc: [parseFloat(lat), parseFloat(lng)],
has_map,
status,
//images: cloudinaryImages //images: cloudinaryImages
images images
}; };

View File

@@ -8,13 +8,19 @@ let FormData = require('form-data');
import { import {
AD_TYPE_SALE, AD_TYPE_SALE,
IGNORED_USERNAMES, IGNORED_USERNAMES,
CATEGORY_FLAT, CATEGORY_FLAT,
CATEGORY_HOUSE, CATEGORY_HOUSE,
CATEGORY_OFFICE, CATEGORY_OFFICE,
CATEGORY_LAND, CATEGORY_LAND,
CATEGORY_APARTMENT, CATEGORY_APARTMENT,
CATEGORY_GARAGE CATEGORY_GARAGE,
STATUS_NORMAL,
STATUS_RESERVED,
STATUS_SOLD
} from '../enums'; } from '../enums';
export default class RentalCrawler { export default class RentalCrawler {
@@ -44,36 +50,55 @@ export default class RentalCrawler {
var descriptions; var descriptions;
var floor; var floor;
var floor; var floor;
var time; var time;
var lat; var lat;
var lng; var lng;
var has_map;
var status;
//Oglas koji nema JSON objekat, nema ni mape //No JSON string -> No map
try{ try{
const complete_data = $('body > div.container-fluid > div.container > div:nth-child(2) > div.col-xs-12.col-sm-12.col-md-12.col-lg-9.content-main > div:nth-child(7) > script').text(); let complete_data;
const data_json_string = complete_data.slice(21,-1); let data_json_string;
const data_json = JSON.parse(data_json_string); let data_json;
const start_n = 5;
const last_n = 15;
for (let i=start_n;i<=last_n;i++){
try{
complete_data = $('body > div.container-fluid > div.container > div:nth-child(2) > div.col-xs-12.col-sm-12.col-md-12.col-lg-9.content-main > div:nth-child('+i+') > script').text();
data_json_string = complete_data.slice(21,-1);
data_json = JSON.parse(data_json_string);
break;
}catch(e){
console.log("No JSON string");
if (i===last_n) throw(e);
}
}
title = data_json["re_realEstates_portalName"]; title = data_json["re_realEstates_portalName"];
category = this.getCategoryIdfromNumber(data_json["re_types_id"]); //kategorije ne odgovaraju brojevima u Enums !!! category = this.getCategoryIdfromNumber(parseInt(data_json["re_types_id"])); //categories from JSON string doesn't match categories in ENUMS
price = data_json["re_realEstates_price"]; price = parseFloat(data_json["re_realEstates_price"]);
size = data_json["re_realEstates_area"]; size = parseFloat(data_json["re_realEstates_area"]);
rooms = data_json["re_realEstates_roomsNO"]; rooms = parseInt(data_json["re_realEstates_roomsNO"]);
address = data_json["re_realEstates_address"]; address = data_json["re_realEstates_address"];
descriptions = data_json["re_realEstates_description"]; //descriptions = data_json["re_realEstates_description"];
floor = data_json["re_realEstates_floorNO"]; floor = parseInt(data_json["re_realEstates_floorNO"]);
time = data_json["re_realEstates_inserted"];
lat = data_json["re_realEstates_latitude"]; let time_array = data_json["re_realEstates_inserted"].slice(0,data_json["re_realEstates_inserted"].indexOf(' ')).split('-');
lng = data_json["re_realEstates_longitude"]; time = time_array[2]+'.'+time_array[1]+'.'+time_array[0];
time = data_json["re_realEstates_inserted"];
lat = data_json["re_realEstates_latitude"]; lat = data_json["re_realEstates_latitude"];
lng = data_json["re_realEstates_longitude"]; lng = data_json["re_realEstates_longitude"];
has_map = true;
}catch(e){ }catch(e){
//oglas nema JSON objekat, informacije izvući preko selektora console.log("error : " + e);
//This ad has no JSON string, informations should be retrieved using HTML selectors
time=undefined; time=undefined;
lat=undefined; lat=0;
lng=undefined; lng=0;
has_map = false;
price = (parseFloat($('body > div.container-fluid > div.container > div:nth-child(2) > div.col-xs-12.col-sm-12.col-md-12.col-lg-9.content-main > div:nth-child(1) > div > div > div.col-xs-12.col-sm-4.box-details > div.prices > span.pull-left').text().replace(',','').replace('.','')))/100; price = (parseFloat($('body > div.container-fluid > div.container > div:nth-child(2) > div.col-xs-12.col-sm-12.col-md-12.col-lg-9.content-main > div:nth-child(1) > div > div > div.col-xs-12.col-sm-4.box-details > div.prices > span.pull-left').text().replace(',','').replace('.','')))/100;
@@ -86,23 +111,27 @@ export default class RentalCrawler {
address = props_list['Ulica']; address = props_list['Ulica'];
size = parseFloat((props_list['Površina']).replace(',','').replace('.',''))/100; size = parseFloat((props_list['Površina']).replace(',','').replace('.',''))/100;
rooms = props_list['Broj soba']; rooms = parseInt(props_list['Broj soba']);
floor = parseInt(props_list['Spratnost']); floor = parseInt(props_list['Spratnost']);
title = $('div.container-fluid > div.container > div.row.content-top > div.col-xs-12.col-sm-6.col-md-9 > div.description.pull-left > h1').text(); title = $('div.container-fluid > div.container > div.row.content-top > div.col-xs-12.col-sm-6.col-md-9 > div.description.pull-left > h1').text();
descriptions = $('#b1 > div > div > div').text(); descriptions = $('#b1 > div > div > div').text();
const full_category = $('body > div.container-fluid > div.container > div:nth-child(2) > div.col-xs-12.col-sm-12.col-md-12.col-lg-9.content-main > div:nth-child(1) > div > div > div.col-xs-12.col-sm-4.box-details > div.title > p').text().split(',',3); const full_category = $('body > div.container-fluid > div.container > div:nth-child(2) > div.col-xs-12.col-sm-12.col-md-12.col-lg-9.content-main > div:nth-child(1) > div > div > div.col-xs-12.col-sm-4.box-details > div.title > p').text().split(',',3);
category = (full_category.size > 2) ? category = (full_category.size > 2) ?
this.getCategoryIdfromText(full_category[0]+full_category[1]) : this.getCategoryIdfromText(full_category[0]+full_category[1]) :
this.getCategoryIdfromText(full_category[0]); this.getCategoryIdfromText(full_category[0]);
} }
descriptions = $('#b1 > div > div > div').text();
status = this.getStatusIdFromText($('#a1 > div.box-badges > div').text());
const images = []; const images = [];
$(".img-gallery").contents().map((i,elem)=>{ $(".img-gallery").contents().map((i,elem)=>{
//u linku sadržanom u tmp stoje i parametri za max visinu i širinu
const tmp =$(elem).attr('data-preview'); const tmp =$(elem).attr('data-preview');
if(tmp) images.push(tmp); if(tmp) images.push(tmp);
}); });
@@ -123,12 +152,12 @@ export default class RentalCrawler {
lat, lat,
lng, lng,
loc: [parseFloat(lat), parseFloat(lng)], loc: [parseFloat(lat), parseFloat(lng)],
has_map,
status,
//images: cloudinaryImages //images: cloudinaryImages
images images
}; };
console.log(data);
return data; return data;
} catch (e) { } catch (e) {
@@ -197,12 +226,12 @@ export default class RentalCrawler {
getCategoryIdfromNumber(category){ getCategoryIdfromNumber(category){
switch(category){ switch(category){
case (1): return CATEGORY_HOUSE; case (1): return CATEGORY_HOUSE; break;
case (2): return CATEGORY_FLAT; case (2): return CATEGORY_FLAT; break;
case (3): return CATEGORY_APARTMENT; case (3): return CATEGORY_APARTMENT; break;
case (4): return CATEGORY_OFFICE; case (4): return CATEGORY_OFFICE; break;
case (5): return CATEGORY_LAND; case (5): return CATEGORY_LAND; break;
case (6): return CATEGORY_GARAGE; case (6): return CATEGORY_GARAGE; break;
} }
} }
@@ -279,6 +308,12 @@ export default class RentalCrawler {
} }
} }
getStatusIdFromText(status){
if (status === 'Prodato') return STATUS_SOLD;
return STATUS_NORMAL;
}
async sleep(ms) { async sleep(ms) {
return new Promise(resolve => setTimeout(resolve, ms)); return new Promise(resolve => setTimeout(resolve, ms));
} }

4601
web/package-lock.json generated Normal file

File diff suppressed because it is too large Load Diff

View File

@@ -1,5 +1,5 @@
import React from 'react' import React from 'react'
import Gallery from './gallery' import Gallery from './Gallery'
import {formatPrice, formatRooms, formatFloor} from '../lib/helpers' import {formatPrice, formatRooms, formatFloor} from '../lib/helpers'
import ContactModal from './ContactModal'; import ContactModal from './ContactModal';

View File

@@ -21,7 +21,8 @@ class Main extends React.Component {
sort: 'relevance', sort: 'relevance',
filters: { filters: {
rooms: {}, rooms: {},
category: {} category: {},
status : {}
}, },
mobileView: 'MAP', mobileView: 'MAP',
contact: { contact: {