8 Commits

Author SHA1 Message Date
Bilal
aeba6fdc2f Merge pull request #2 from edazdarevic/rental-crawler
Rental crawler
2017-11-02 21:37:01 +01:00
GotPPay
7a5f7242ac new structure; code polish 2017-10-31 20:20:09 +01:00
GotPPay
a63c108259 crawler upgrade, server upgrade 2017-10-30 22:54:56 +01:00
GotPPay
039e34237d . 2017-10-16 12:51:37 +02:00
GotPPay
5d90e5efcb Ispravljen bug pozivanja nepostojeće funkcije 2017-10-16 12:42:18 +02:00
GotPPay
1743171cfd Rental crawler 2017-10-16 11:36:21 +02:00
GotPPay
b2787ebda5 Rental crawler 2017-10-16 11:34:26 +02:00
Edin Dazdarevic
aea928fdef Update README.md 2017-06-26 01:11:49 +02:00
26 changed files with 8661 additions and 6357 deletions

1
.gitignore vendored
View File

@@ -1,4 +1,5 @@
node_modules node_modules
.DS_Store .DS_Store
crawler/build crawler/build
backend/build
npm-debug.log npm-debug.log

View File

@@ -1,28 +1,37 @@
# kivi.ba
Kivi je najbolji nacin da nadjete svoj novi dom.
## Getting started
### Web
Dragi developeru, potrebno je da uradis sljedece:
1. cd web
2. yarn install
3. npm run dev
4. visit http://localhost:8080
5. profit!
Ukljucen je webpack hot module replacement + webpack-dev-server tako da se sve izmjene (osim CSS-a) odmah vide jer se browser sam realoada.
### Crawler ## 1. Cloning repo
Trenutno postoji samo jedan crawler a to je `olx.js` `git clone git@github.com:edazdarevic/kivi.git`
1. cd crawler `cd kivi`
2. npm run dev
3. node build/crawler.js
4. profit!
## 2. Start MongoDB
## 3. Build crawler and crawl some data
`cd crawler`
`npm install`
`webpack`
`PROSTOR_FROM_PAGE=1 PROSTOR_TO_PAGE=10 MONGO_URL=mongodb://localhost:27017/kivi CLOUDINARY_URL=cloudinary://845665345722369:Nw7KYvLs0xkzt6BmE-d_LU6H2LY@kivi node build/crawler.js`
## 4. Start backend server
`cd backend`
`npm install`
`webpack & webpack & node build/server.js`
## 5. Start front-end dev server
`cd web`
`npm install`
`npm run dev`
## 6. Visit http://localhost:8080

View File

@@ -1,3 +0,0 @@
{
"presets": ["es2015", "es2017"],
}

File diff suppressed because it is too large Load Diff

View File

@@ -1,26 +0,0 @@
{
"name": "backend",
"version": "1.0.0",
"description": "",
"main": "server.js",
"scripts": {
"test": "echo \"Error: no test specified\" && exit 1",
"start": "node server.js"
},
"author": "",
"license": "ISC",
"dependencies": {
"babel-core": "^6.24.0",
"babel-loader": "^6.4.1",
"babel-polyfill": "^6.23.0",
"babel-preset-es2015": "^6.24.0",
"babel-preset-es2017": "^6.22.0",
"body-parser": "^1.17.1",
"cookie-parser": "^1.4.3",
"date-fns": "^1.28.2",
"express": "^4.15.2",
"isomorphic-fetch": "^2.2.1",
"moment": "^2.18.1",
"mongodb": "^2.2.25"
}
}

View File

@@ -1,83 +1,85 @@
import express from 'express' import express from 'express';
import bodyParser from 'body-parser'; import bodyParser from 'body-parser';
import distanceInWordsToNow from 'date-fns/distance_in_words_to_now'; import distanceInWordsToNow from 'date-fns/distance_in_words_to_now';
import parseDate from 'date-fns/format'; import parseDate from 'date-fns/format';
import moment from 'moment'; import moment from 'moment';
var hr = require('date-fns/locale/hr'); import {STATUS_NORMAL, STATUS_RESERVED, STATUS_SOLD} from '../common/enums';
var MongoClient = require('mongodb').MongoClient; var hr = require ('date-fns/locale/hr');
var ObjectID = require('mongodb').ObjectID;
var MongoClient = require ('mongodb').MongoClient;
var ObjectID = require ('mongodb').ObjectID;
var url = 'mongodb://localhost:27017/kivi'; var url = 'mongodb://localhost:27017/kivi';
require("babel-polyfill"); require ('babel-polyfill');
const router = express.Router({mergeParams: true}) const router = express.Router ({mergeParams: true});
const PORT = process.env.PORT || 3001; const PORT = process.env.PORT || 3001;
const AGENTURA_KEY = process.env.AGENTURA_KEY || '1somethingverysecret'; const AGENTURA_KEY = process.env.AGENTURA_KEY || '1somethingverysecret';
let db; let db;
router.post('/contact/:listingId', async (req, res, next) => { router.post ('/contact/:listingId', async (req, res, next) => {
try { try {
const listingId = req.params.listingId; const listingId = req.params.listingId;
const body = req.body; const body = req.body;
const contactRequests = db.collection('contact_requests'); const contactRequests = db.collection ('contact_requests');
if (!body.email) { if (!body.email) {
res.status(422); res.status (422);
res.end('Email is required'); res.end ('Email is required');
return return;
} }
if (!body.name) { if (!body.name) {
res.status(422); res.status (422);
res.end('Name is required'); res.end ('Name is required');
return return;
} }
const result = await contactRequests.insertOne({ const result = await contactRequests.insertOne ({
name : body.name, name: body.name,
email : body.email, email: body.email,
listingId, listingId,
message : body.message, message: body.message,
phone : body.phone, phone: body.phone,
alert : body.alert alert: body.alert,
}); });
res.status(200); res.status (200);
res.end(); res.end ();
} catch (e) { } catch (e) {
console.log('error:', e); console.log ('error:', e);
next(e); next (e);
} }
}); });
router.get('/search/listings/:id', async (req, res, next) => { router.get ('/search/listings/:id', async (req, res, next) => {
try { try {
const id = req.params.id; const id = req.params.id;
const listings = db.collection('listings'); const listings = db.collection ('listings');
const listing = await listings.findOne({_id: new ObjectID(id)}); const listing = await listings.findOne ({_id: new ObjectID (id)});
if (listing) { if (listing) {
res.json(listing); res.json (listing);
} else { } else {
res.status(404); res.status (404);
} }
res.end(); res.end ();
} catch (e) { } catch (e) {
console.log('error:', e); console.log ('error:', e);
next(e); next (e);
} }
}); });
router.get('/search/listings', async (req, res, next) => { router.get ('/search/listings', async (req, res, next) => {
try { try {
console.log ('Search listings');
const bounds = req.query.bounds || ''; const bounds = req.query.bounds || '';
const minPrice = req.query.minPrice; const minPrice = req.query.minPrice;
const maxPrice = req.query.maxPrice; const maxPrice = req.query.maxPrice;
@@ -90,98 +92,128 @@ router.get('/search/listings', async (req, res, next) => {
const page = req.query.page || 0; const page = req.query.page || 0;
const pins = req.query.pins || false; const pins = req.query.pins || false;
const properties = db.collection('listings'); const properties = db.collection ('listings');
let query = {}; let query = {};
//Get only ads with location
query = Object.assign (query, {
has_map: true,
});
//AND
//Do not show sold or reserved properity
query = Object.assign (query, {
status: STATUS_NORMAL,
});
//AND
//Show ads that fall inside visible map
if (bounds) { if (bounds) {
const [lat1, lng1, lat2, lng2] = bounds.split(',').map(parseFloat) const [lat1, lng1, lat2, lng2] = bounds.split (',').map (parseFloat);
const box = [[lat1, lng1], [lat2, lng2]]; const box = [[lat1, lng1], [lat2, lng2]];
query = Object.assign(query, { query = Object.assign (query, {
loc: { loc: {
"$geoWithin": { $geoWithin: {
"$box": box $box: box,
} },
} },
}); });
} }
//AND
//Show only selected type of ads (selling or renting)
if (adType) { if (adType) {
query = Object.assign(query, { query = Object.assign (query, {
adType: parseInt(adType) adType: parseInt (adType),
}); });
} }
//AND
//Match price
if (minPrice || maxPrice) { if (minPrice || maxPrice) {
const price = {} const price = {};
if (minPrice) { if (minPrice) {
price["$gte"] = parseFloat(minPrice); price['$gte'] = parseFloat (minPrice);
} }
if (maxPrice) { if (maxPrice) {
price["$lte"] = parseFloat(maxPrice); price['$lte'] = parseFloat (maxPrice);
} }
query = Object.assign(query, { query = Object.assign (query, {
price price,
}); });
} }
const and = []; //AND
//Match number of rooms
if (rooms) { if (rooms) {
const allRooms = rooms.split(','); const roomCount = [];
const or = allRooms.map(val => { let fourPlus = false;
if (val === '4+') {
return { const allRooms = rooms.split (',');
rooms: { allRooms.map (val => {
"$gte": 4 if (parseInt (val) !== 4) {
roomCount.push (parseInt (val));
} else {
fourPlus = true;
} }
}
}
return {
rooms: parseFloat(val)
};
}); });
and.push({ "$or": or }); if (fourPlus) {
query = Object.assign (query, {
rooms: {$gte: 4},
});
} else {
query = Object.assign (query, {
rooms: {$in: roomCount},
});
}
} }
//AND
//Match size
if (minSize || maxSize) { if (minSize || maxSize) {
const size = {} const size = {};
if (minSize) { if (minSize) {
size["$gte"] = parseFloat(minSize); size['$gte'] = parseFloat (minSize);
} }
if (maxSize) { if (maxSize) {
size["$lte"] = parseFloat(maxSize); size['$lte'] = parseFloat (maxSize);
} }
query = Object.assign(query, { query = Object.assign (query, {
size size,
}); });
} }
//AND
//Match category
if (category) { if (category) {
const allCategories = category.split(','); const categoryCount = [];
const or = allCategories.map(val => {
return { const allCategories = category.split (',').map (val => {
category: parseInt(val) categoryCount.push (parseInt (val));
};
}); });
and.push({ "$or": or }); query = Object.assign (query, {
} category: {$in: categoryCount},
if (and.length > 0) {
query = Object.assign(query, {
"$and": and
}); });
} }
console.log('QUERY: ', query); console.log ('QUERY: ', query);
const cnt = await properties.find(query).count(); const cnt = await properties.find (query).count ();
res.header('X-Total-Count', cnt); res.header ('X-Total-Count', cnt);
const getSort = () => { const getSort = () => {
if (sort === 'price-min') { if (sort === 'price-min') {
@@ -196,79 +228,80 @@ router.get('/search/listings', async (req, res, next) => {
} }
}; };
let all = properties.find(query, { let all = properties.find (query, {
//"sort": [['field1','asc'], ['field2','desc']] //"sort": [['field1','asc'], ['field2','desc']]
"sort": getSort() sort: getSort (),
}); });
const isPins = pins === "true"; const isPins = pins === 'true';
if (!isPins) { if (!isPins) {
all = await all.skip(20 * page).limit(20).toArray(); all = await all.skip (20 * page).limit (20).toArray ();
} else { } else {
all = await all.toArray(); all = await all.toArray ();
} }
if (all.length > 0) { if (all.length > 0) {
res.header('X-Last-Record-Id', [...all].pop()._id); res.header ('X-Last-Record-Id', [...all].pop ()._id);
} }
if (isPins) { if (isPins) {
res.json(all.map(val => { res.json (
all.map (val => {
return { return {
_id: val._id, _id: val._id,
loc: val.loc loc: val.loc,
} };
})); })
);
} else { } else {
res.json (
res.json(all.map(({_id, all.map (({_id, address, images, price, rooms, size, time}) => ({
address,
images,
price,
rooms,
size,
time
}) => ({
_id, _id,
address, address,
images: [images[0]], images: [images[0]],
price, price,
rooms, rooms,
size, size,
time: distanceInWordsToNow( time: distanceInWordsToNow (moment (time, 'DD.MM.YYYY'), {
moment(time, 'DD.MM.YYYY'), locale: hr,
{locale: hr} }),
), realTime: time,
realTime: time }))
}))); );
} }
res.end(); res.end ();
} catch (e) { } catch (e) {
console.log('error:', e); console.log ('error:', e);
next(e); next (e);
} }
}); });
const app = express ();
app.use (bodyParser.json ());
const app = express() app.use (function (req, res, next) {
app.use(bodyParser.json()); res.header ('Access-Control-Allow-Origin', '*');
res.header (
app.use(function(req, res, next) { 'Access-Control-Allow-Headers',
res.header("Access-Control-Allow-Origin", "*"); 'Origin, X-Requested-With, Content-Type, Accept, X-Last-Record-Id, X-Total-Count'
res.header("Access-Control-Allow-Headers", "Origin, X-Requested-With, Content-Type, Accept, X-Last-Record-Id, X-Total-Count"); );
res.header("Access-Control-Expose-Headers", "X-Last-Record-Id, X-Total-Count"); res.header (
res.header("Access-Control-Allow-Methods", "GET, POST, OPTIONS"); 'Access-Control-Expose-Headers',
res.header('Access-Control-Allow-Credentials', 'true'); 'X-Last-Record-Id, X-Total-Count'
next(); );
res.header ('Access-Control-Allow-Methods', 'GET, POST, OPTIONS');
res.header ('Access-Control-Allow-Credentials', 'true');
next ();
}); });
app.use('/api', router); app.use ('/api', router);
MongoClient.connect(url).then((database) => { MongoClient.connect (url).then (database => {
db = database; db = database;
db.collection('listings').createIndex({loc: "2d"}); db.collection ('listings').createIndex ({loc: '2d'});
app.listen(PORT, () => console.log('Express server running at localhost: ' + PORT)); app.listen (PORT, () =>
console.log ('Express server running at localhost: ' + PORT)
);
}); });

View File

@@ -7,7 +7,7 @@ module.exports = {
filename: 'build/server.js' filename: 'build/server.js'
}, },
target: 'node', target: 'node',
externals: fs.readdirSync(path.resolve(__dirname, 'node_modules')).reduce((ext, mod) => { externals: fs.readdirSync(path.resolve(__dirname, '../node_modules')).reduce((ext, mod) => {
ext[mod] = 'commonjs ' + mod ext[mod] = 'commonjs ' + mod
return ext return ext
}, {}), }, {}),

File diff suppressed because it is too large Load Diff

3
common/.babelrc Normal file
View File

@@ -0,0 +1,3 @@
{
"presets": ["es2015", "react", "stage-3"]
}

View File

@@ -7,4 +7,10 @@ export const CATEGORY_FLAT = 0;
export const CATEGORY_HOUSE = 1; export const CATEGORY_HOUSE = 1;
export const CATEGORY_OFFICE = 2; export const CATEGORY_OFFICE = 2;
export const CATEGORY_LAND = 3; export const CATEGORY_LAND = 3;
export const CATEGORY_APARTMENT = 4;
export const CATEGORY_GARAGE = 5;
export const STATUS_NORMAL = 0;
export const STATUS_RESERVED = 1;
export const STATUS_SOLD = 2;

View File

@@ -14,13 +14,15 @@ import {
import 'dotenv/config'; import 'dotenv/config';
import OlxCrawler from './specific/olx'; import OlxCrawler from './specific/olx';
import ProstorCrawler from './specific/prostor'; import ProstorCrawler from './specific/prostor';
import RentalCrawler from './specific/rental';
import MongoSaver from './savers/mongo' import MongoSaver from './savers/mongo'
install(); // for source maps to work install(); // for source maps to work
let crawlers = [ let crawlers = [
//new OlxCrawler(process.env.OLX_FROM_PAGE, process.env.OLX_TO_PAGE, process.env.OLX_MAX_RESULTS), //new OlxCrawler(process.env.OLX_FROM_PAGE, process.env.OLX_TO_PAGE, process.env.OLX_MAX_RESULTS),
new ProstorCrawler(parseInt(process.env.PROSTOR_FROM_PAGE), parseInt(process.env.PROSTOR_TO_PAGE), parseInt(process.env.PROSTOR_MAX_RESULTS)) new ProstorCrawler(parseInt(process.env.PROSTOR_FROM_PAGE), parseInt(process.env.PROSTOR_TO_PAGE), parseInt(process.env.PROSTOR_MAX_RESULTS)),
new RentalCrawler(parseInt(process.env.RENTAL_FROM_PAGE), parseInt(process.env.RENTAL_TO_PAGE), parseInt(process.env.RENTAL_MAX_RESULTS))
]; ];
let savers = [ let savers = [

10
crawler/detalji Normal file
View File

@@ -0,0 +1,10 @@
kategorije :
kuća = 1
stan = 2
apartman = 3
poslovni prostor = 4
zemljište = 5
garaža = 6
Datum spremiti u formatu dan.mjesec.godina, u polje "time"

View File

@@ -1,34 +0,0 @@
{
"name": "stan",
"version": "1.0.0",
"description": "",
"main": "index.js",
"dependencies": {
"babel": "^6.5.2",
"babel-core": "^6.18.2",
"babel-loader": "^6.2.7",
"babel-plugin-transform-async-to-generator": "^6.16.0",
"babel-polyfill": "^6.16.0",
"babel-preset-es2015": "^6.18.0",
"cheerio": "^0.22.0",
"cloudinary": "^1.8.0",
"dotenv": "^2.0.0",
"fetch": "^1.1.0",
"form-data": "^2.1.4",
"json-loader": "^0.5.4",
"mongodb": "^2.2.11",
"node-fetch": "^1.6.3",
"source-map-support": "^0.4.6",
"twilio": "^2.11.0"
},
"devDependencies": {
"webpack": "^1.13.3"
},
"scripts": {
"dev": "webpack",
"prod": "webpack -p",
"test": "echo \"Error: no test specified\" && exit 1"
},
"author": "",
"license": "ISC"
}

View File

@@ -12,7 +12,7 @@ import {
CATEGORY_HOUSE, CATEGORY_HOUSE,
CATEGORY_OFFICE, CATEGORY_OFFICE,
CATEGORY_LAND CATEGORY_LAND
} from '../enums'; } from '../../common/enums';
export default class OlxCrawler { export default class OlxCrawler {

View File

@@ -1,10 +1,10 @@
'use strict' 'use strict';
let fetch = require('node-fetch'); let fetch = require ('node-fetch');
let cheerio = require('cheerio'); let cheerio = require ('cheerio');
let fs = require('fs'); let fs = require ('fs');
let cloudinary = require('cloudinary'); let cloudinary = require ('cloudinary');
let FormData = require('form-data'); let FormData = require ('form-data');
import { import {
AD_TYPE_SALE, AD_TYPE_SALE,
@@ -12,60 +12,85 @@ import {
CATEGORY_FLAT, CATEGORY_FLAT,
CATEGORY_HOUSE, CATEGORY_HOUSE,
CATEGORY_OFFICE, CATEGORY_OFFICE,
CATEGORY_LAND CATEGORY_LAND,
} from '../enums'; STATUS_NORMAL,
STATUS_RESERVED,
STATUS_SOLD,
} from '../../common/enums';
export default class ProstorCrawler { export default class ProstorCrawler {
constructor (fromPage = 0, toPage = 10, maxResults = 1000) {
constructor(fromPage = 0, toPage = 10, maxResults = 1000) {
this.fromPage = fromPage; this.fromPage = fromPage;
this.toPage = toPage; this.toPage = toPage;
this.maxResults = maxResults; this.maxResults = maxResults;
} }
async indexSingle(url) { async indexSingle (url) {
try { try {
const res = await fetch (url);
const body = await res.text ();
const $ = cheerio.load (body);
const res = await fetch(url); const title = $ (
const body = await res.text(); '#nav_center_sub > div.content_area_1_left > div:nth-child(1) > h1'
const $ = cheerio.load(body); ).text ();
const title = $('#nav_center_sub > div.content_area_1_left > div:nth-child(1) > h1').text(); const category = $ (
'#nav_center_sub > div.content_area_1_left > div.bottom10 > div.content_lr_in_show > div:nth-child(3) > div:nth-child(4) > div.size_rs > span'
).text ();
const category = $('#nav_center_sub > div.content_area_1_left > div.bottom10 > div.content_lr_in_show > div:nth-child(3) > div:nth-child(4) > div.size_rs > span').text(); const price = $ (
'#nav_center_sub > div.content_area_1_left > div.bottom10 > div.content_lr_in_show > div:nth-child(1) > div.size_rs > strong'
).text ();
const size = $ (
'#nav_center_sub > div.content_area_1_left > div.bottom10 > div.content_lr_in_show > div:nth-child(4) > div:nth-child(7) > div.size_rs > span'
).text ();
const rooms = $ (
'#nav_center_sub > div.content_area_1_left > div.bottom10 > div.content_lr_in_show > div:nth-child(4) > div:nth-child(2) > div.size_rs > span'
).text ();
const price = $('#nav_center_sub > div.content_area_1_left > div.bottom10 > div.content_lr_in_show > div:nth-child(1) > div.size_rs > strong').text(); const address = $ (
'#nav_center_sub > div.content_area_1_left > div.bottom10 > div.content_lr_in_show > div:nth-child(3) > div:nth-child(3) > div.size_rs > span'
const size = $('#nav_center_sub > div.content_area_1_left > div.bottom10 > div.content_lr_in_show > div:nth-child(4) > div:nth-child(7) > div.size_rs > span').text(); ).text ();
const rooms = $('#nav_center_sub > div.content_area_1_left > div.bottom10 > div.content_lr_in_show > div:nth-child(4) > div:nth-child(2) > div.size_rs > span').text();
const address = $('#nav_center_sub > div.content_area_1_left > div.bottom10 > div.content_lr_in_show > div:nth-child(3) > div:nth-child(3) > div.size_rs > span').text();
//const location = $('#artikal_glavni_div > div.artikal_lijevo > div.op.pop.mobile-lokacija').attr('data-content'); //const location = $('#artikal_glavni_div > div.artikal_lijevo > div.op.pop.mobile-lokacija').attr('data-content');
//const adType = $('#artikal_glavni_div > div.artikal_lijevo > div:nth-child(15) > div:nth-child(2) > div.df2').text(); //const adType = $('#artikal_glavni_div > div.artikal_lijevo > div:nth-child(15) > div:nth-child(2) > div.df2').text();
const time = $('#nav_center_sub > div.content_area_1_right > div.bottom_d > div > strong:nth-child(1)').text(); const time = $ (
'#nav_center_sub > div.content_area_1_right > div.bottom_d > div > strong:nth-child(1)'
).text ();
//const olxId = $('#artikal_glavni_div > div.artikal_lijevo > div:nth-child(15) > div:nth-child(4) > div.df2').text(); //const olxId = $('#artikal_glavni_div > div.artikal_lijevo > div:nth-child(15) > div:nth-child(4) > div.df2').text();
const descriptions = $('#nav_center_sub > div.content_area_1_left > div.bottom10 > div.content_ll_in_show > div:nth-child(1)').text(); const descriptions = $ (
'#nav_center_sub > div.content_area_1_left > div.bottom10 > div.content_ll_in_show > div:nth-child(1)'
).text ();
const floor = $('#nav_center_sub > div.content_area_1_left > div.bottom10 > div.content_lr_in_show > div:nth-child(4) > div:nth-child(6) > div.size_rs').text();
const floor = $ (
'#nav_center_sub > div.content_area_1_left > div.bottom10 > div.content_lr_in_show > div:nth-child(4) > div:nth-child(6) > div.size_rs'
).text ();
const latLngRe = /marker=([0-9]+\.[0-9]+)\,\s*([0-9]+\.[0-9]+)/g; const latLngRe = /marker=([0-9]+\.[0-9]+)\,\s*([0-9]+\.[0-9]+)/g;
var hasMap = false;
var tmpTitle = title.toUpperCase ();
var status = STATUS_NORMAL;
if (tmpTitle.indexOf ('PRODANO') !== -1) status = STATUS_SOLD;
if (tmpTitle.indexOf ('REZERVISANO') !== -1) status = STATUS_RESERVED;
//const latLngRe = /LatLng\(([0-9]+\.[0-9]+)\,\s+([0-9]+\.[0-9]+)\)/g; //const latLngRe = /LatLng\(([0-9]+\.[0-9]+)\,\s+([0-9]+\.[0-9]+)\)/g;
const matches = latLngRe.exec(body); const matches = latLngRe.exec (body);
let lng = '', let lng = '', lat = '';
lat = ''; hasMap = false;
if (matches && matches.length >= 3) { if (matches && matches.length >= 3) {
lat = matches[1]; lat = matches[1];
lng = matches[2]; lng = matches[2];
hasMap = true;
} }
//console.log({ //console.log({
@@ -84,16 +109,17 @@ export default class ProstorCrawler {
const images = []; const images = [];
//const imgMatches = body.match(imgRe); //const imgMatches = body.match(imgRe);
const parseRooms = (rooms) => parseInt([...rooms].filter(c => !isNaN(c)).filter(c => c.trim()).join()) const parseRooms = rooms =>
const parsePrice = (price) => parseFloat(price.replace(".", "")) parseInt (
[...rooms].filter (c => !isNaN (c)).filter (c => c.trim ()).join ()
);
const parsePrice = price => parseFloat (price.replace ('.', ''));
$ ('.fancybox').each ((i, elem) => {
$('.fancybox').each((i, elem) => { const img = $ (elem).attr ('href');
const img = $(elem).attr('href'); images.push (img);
images.push(img);
}); });
//for (let i = 0; imgMatches && i < imgMatches.length; i++) { //for (let i = 0; imgMatches && i < imgMatches.length; i++) {
@@ -110,24 +136,23 @@ export default class ProstorCrawler {
//const uploadResults = await Promise.all(uploadPromises); //const uploadResults = await Promise.all(uploadPromises);
//const cloudinaryImages = uploadResults.map(ur => ur.url); //const cloudinaryImages = uploadResults.map(ur => ur.url);
const parsedPrice = parsePrice (price);
const parsedPrice = parsePrice(price);
let parsedRooms; let parsedRooms;
if (rooms === 'Garsonjera') { if (rooms === 'Garsonjera') {
parsedRooms = 0; parsedRooms = 0;
} else { } else {
parsedRooms = parseRooms(rooms); parsedRooms = parseRooms (rooms);
} }
const data = { const data = {
category: this.getCategoryId(category), category: this.getCategoryId (category),
url, url,
title, title,
price: isNaN(parsedPrice) ? price : parsedPrice, price: isNaN (parsedPrice) ? price : parsedPrice,
size: parseFloat(size), size: parseFloat (size),
rooms: parsedRooms, rooms: parsedRooms,
floor: parseInt(floor), floor: parseInt (floor),
address, address,
adType: AD_TYPE_SALE, adType: AD_TYPE_SALE,
time, time,
@@ -135,67 +160,68 @@ export default class ProstorCrawler {
longDescription: descriptions, longDescription: descriptions,
lat, lat,
lng, lng,
loc: [parseFloat(lat), parseFloat(lng)], loc: [parseFloat (lat), parseFloat (lng)],
hasMap,
status,
//images: cloudinaryImages //images: cloudinaryImages
images images,
}; };
console.log(data); console.log (data);
return data; return data;
} catch (e) { } catch (e) {
console.error('Exception caught: ' + e.message); console.error ('Exception caught: ' + e.message);
} }
return null; return null;
} }
async indexPage(pageNr, maxResults = 1000) { async indexPage (pageNr, maxResults = 1000) {
try { try {
console.log ('Starting to index page: ' + pageNr);
console.log('Starting to index page: ' + pageNr);
const url = `http://prostor.ba/index.php`; const url = `http://prostor.ba/index.php`;
const data = new FormData(); const data = new FormData ();
data.append('sortCombo', 'e.date_create DESC'); data.append ('sortCombo', 'e.date_create DESC');
data.append('command', ''); data.append ('command', '');
data.append('action', 'show'); data.append ('action', 'show');
data.append('page', pageNr); data.append ('page', pageNr);
data.append('param', 'ponuda.inc.php'); data.append ('param', 'ponuda.inc.php');
data.append('checkNO', 0); data.append ('checkNO', 0);
data.append('order', 'e.date_create DESC'); data.append ('order', 'e.date_create DESC');
data.append('reset', 0); data.append ('reset', 0);
data.append('estate_action', 1); data.append ('estate_action', 1);
data.append('Itemid', 785); data.append ('Itemid', 785);
const res = await fetch(url, { const res = await fetch (url, {
method: 'POST', method: 'POST',
body: data body: data,
}); });
const body = await res.text(); const body = await res.text ();
const $ = cheerio.load(body); const $ = cheerio.load (body);
const hrefs = []; const hrefs = [];
$('.nekret_box').each((i, elem) => { $ ('.nekret_box').each ((i, elem) => {
const href = $(elem).find("a").first().attr('href'); const href = $ (elem).find ('a').first ().attr ('href');
hrefs.push(`http://prostor.ba/${href}`); hrefs.push (`http://prostor.ba/${href}`);
}); });
const results = {}; const results = {};
for (const href of hrefs) { for (const href of hrefs) {
console.log(`indexing: ${href}`); console.log (`indexing: ${href}`);
const singleData = await this.indexSingle(href); const singleData = await this.indexSingle (href);
if (singleData) { if (singleData) {
results[href] = singleData; results[href] = singleData;
} }
await this.sleep(500); await this.sleep (500);
} }
return results; return results;
} catch (e) { } catch (e) {
console.error('Exception caught:' + e); console.error ('Exception caught:' + e);
} }
} }
@@ -211,22 +237,26 @@ export default class ProstorCrawler {
} }
} }
async sleep(ms) { async sleep (ms) {
return new Promise(resolve => setTimeout(resolve, ms)); return new Promise (resolve => setTimeout (resolve, ms));
} }
async indexPages(start, end, maxResults = 1000) { async indexPages (start, end, maxResults = 1000) {
let results = {}; let results = {};
for (let i = start; i <= end; i++) { for (let i = start; i <= end; i++) {
let result = await this.indexPage(i, maxResults); let result = await this.indexPage (i, maxResults);
Object.assign(results, result) Object.assign (results, result);
await this.sleep(5000); await this.sleep (5000);
} }
return results; return results;
} }
async crawl() { async crawl () {
let results = await this.indexPages(this.fromPage, this.toPage, this.maxResults); let results = await this.indexPages (
this.fromPage,
this.toPage,
this.maxResults
);
return results; return results;
} }
} }

424
crawler/specific/rental.js Normal file
View File

@@ -0,0 +1,424 @@
'use strict';
let fetch = require ('node-fetch');
let cheerio = require ('cheerio');
let fs = require ('fs');
let cloudinary = require ('cloudinary');
let FormData = require ('form-data');
import {
AD_TYPE_SALE,
IGNORED_USERNAMES,
CATEGORY_FLAT,
CATEGORY_HOUSE,
CATEGORY_OFFICE,
CATEGORY_LAND,
CATEGORY_APARTMENT,
CATEGORY_GARAGE,
STATUS_NORMAL,
STATUS_RESERVED,
STATUS_SOLD,
} from '../../common/enums';
export default class RentalCrawler {
constructor (fromPage = 0, toPage = 10, maxResults = 1000) {
console.log ('Rental Crawler');
this.fromPage = fromPage;
this.toPage = toPage;
this.maxResults = maxResults;
}
async indexSingle (url) {
try {
const res = await fetch (url);
const body = await res.text ();
const $ = cheerio.load (body);
var title;
var category;
var price;
var size;
var rooms;
var address;
var descriptions;
var floor;
var floor;
var time;
var lat;
var lng;
var hasMap;
var status;
//No JSON string -> No map
try {
let completeData;
let dataJsonString;
let dataJson;
const startN = 5;
const lastN = 15;
for (let i = startN; i <= lastN; i++) {
try {
completeData = $ (
'body > div.container-fluid > div.container > div:nth-child(2) > div.col-xs-12.col-sm-12.col-md-12.col-lg-9.content-main > div:nth-child(' +
i +
') > script'
).text ();
dataJsonString = completeData.slice (21, -1);
dataJson = JSON.parse (dataJsonString);
break;
} catch (e) {
console.log ('No JSON string');
if (i === lastN) throw e;
}
}
title = dataJson['re_realEstates_portalName'];
category = this.getCategoryIdfromNumber (
parseInt (dataJson['re_types_id'])
); //categories from JSON string doesn't match categories in ENUMS
price = parseFloat (dataJson['re_realEstates_price']);
size = parseFloat (dataJson['re_realEstates_area']);
rooms = parseInt (dataJson['re_realEstates_roomsNO']);
address = dataJson['re_realEstates_address'];
//descriptions = dataJson["re_realEstates_description"];
floor = parseInt (dataJson['re_realEstates_floorNO']);
let timeArray = dataJson['re_realEstates_inserted']
.slice (0, dataJson['re_realEstates_inserted'].indexOf (' '))
.split ('-');
time = timeArray[2] + '.' + timeArray[1] + '.' + timeArray[0];
lat = dataJson['re_realEstates_latitude'];
lng = dataJson['re_realEstates_longitude'];
hasMap = true;
} catch (e) {
console.log ('error : ' + e);
//This ad has no JSON string, informations should be retrieved using HTML selectors
time = undefined;
lat = 0;
lng = 0;
hasMap = false;
price =
parseFloat (
$ (
'body > div.container-fluid > div.container > div:nth-child(2) > div.col-xs-12.col-sm-12.col-md-12.col-lg-9.content-main > div:nth-child(1) > div > div > div.col-xs-12.col-sm-4.box-details > div.prices > span.pull-left'
)
.text ()
.replace (',', '')
.replace ('.', '')
) / 100;
const propsList = {};
$ (
'body > div.container-fluid > div.container > div:nth-child(2) > div.col-xs-12.col-sm-12.col-md-12.col-lg-9.content-main > div:nth-child(1) > div > div > div.col-xs-12.col-sm-4.box-details > div.body'
)
.contents ()
.map ((i, elem) => {
const entry = $ (elem).text ().trim ().split (':');
if (entry[0]) propsList[entry[0]] = entry[1];
});
address = propsList['Ulica'];
size =
parseFloat (
propsList['Površina'].replace (',', '').replace ('.', '')
) / 100;
rooms = parseInt (propsList['Broj soba']);
floor = parseInt (propsList['Spratnost']);
title = $ (
'div.container-fluid > div.container > div.row.content-top > div.col-xs-12.col-sm-6.col-md-9 > div.description.pull-left > h1'
).text ();
descriptions = $ ('#b1 > div > div > div').text ();
const fullCategory = $ (
'body > div.container-fluid > div.container > div:nth-child(2) > div.col-xs-12.col-sm-12.col-md-12.col-lg-9.content-main > div:nth-child(1) > div > div > div.col-xs-12.col-sm-4.box-details > div.title > p'
)
.text ()
.split (',', 3);
category = fullCategory.size > 2
? this.getCategoryIdfromText (fullCategory[0] + fullCategory[1])
: this.getCategoryIdfromText (fullCategory[0]);
}
descriptions = $ ('#b1 > div > div > div').text ();
status = this.getStatusIdFromText (
$ ('#a1 > div.box-badges > div').text ()
);
const images = [];
$ ('.img-gallery').contents ().map ((i, elem) => {
const tmp = $ (elem).attr ('data-preview');
if (tmp) images.push (tmp);
});
const data = {
category,
url,
title,
price,
size,
rooms,
floor,
address,
adType: AD_TYPE_SALE,
time,
shortDescription: title,
longDescription: descriptions,
lat,
lng,
loc: [parseFloat (lat), parseFloat (lng)],
hasMap,
status,
//images: cloudinaryImages
images,
};
return data;
} catch (e) {
console.error ('Exception caught: ' + e.message);
}
return null;
}
async indexPage (pageNr, maxResults = 1000) {
try {
console.log ('Starting to index page: ' + pageNr);
const url = 'http://www.rental.ba/pretraga/prodaja-1/stranica-' + pageNr;
/*
const data = new FormData();
data.append('sales', 1); // Mislim da ovo definiše oglase tipa prodaje
data.append('re_types_id', ''); //odnosi se na tip nekretnine (kuća, stan, apartman,...)
data.append('full_text', '');
data.append('re_realEstates_code', '');
data.append('re_realEstates_price_max', '');
data.append('re_realEstates_price_min', '');
data.append('re_realEstates_area_min', '');
data.append('re_realEstates_area_max', '');
data.append('re_realEstates_roomsNO_min', '');
data.append('re_realEstates_roomsNO_max', '');
data.append('re_realEstates_floorNO_min', '');
data.append('re_realEstates_floorNO_max', '');
data.append('re_subTypes_id', 1);
*/
const res = await fetch (url, {
method: 'POST',
//body: data
});
const body = await res.text ();
const $ = cheerio.load (body);
const hrefs = [];
$ ('.middle').each ((i, elem) => {
const href = $ (elem).find ('a').first ().attr ('href');
hrefs.push (href);
});
const results = {};
for (const href of hrefs) {
console.log (`indexing: ${href}`);
const singleData = await this.indexSingle (href);
if (singleData) {
results[href] = singleData;
}
await this.sleep (500);
}
return results;
} catch (e) {
console.error ('Exception caught:' + e);
}
}
getCategoryIdfromNumber (category) {
switch (category) {
case 1:
return CATEGORY_HOUSE;
case 2:
return CATEGORY_FLAT;
case 3:
return CATEGORY_APARTMENT;
case 4:
return CATEGORY_OFFICE;
case 5:
return CATEGORY_LAND;
case 6:
return CATEGORY_GARAGE;
}
}
getCategoryIdfromText (category) {
switch (category) {
case 'samostojeća':
return CATEGORY_HOUSE;
case 'dvojna':
return CATEGORY_HOUSE;
case 'kuća u nizu':
return CATEGORY_HOUSE;
case 'stambeno-poslovni objekt':
return CATEGORY_HOUSE;
case 'prizemnica':
return CATEGORY_HOUSE;
case 'kuća na moru':
return CATEGORY_HOUSE;
case 'kuća u izgradnji':
return CATEGORY_HOUSE;
case 'dvorac':
return CATEGORY_HOUSE;
case 'apartmanska kuća':
return CATEGORY_HOUSE;
case 'porodična kuća':
return CATEGORY_HOUSE;
case 'vikend kuća':
return CATEGORY_HOUSE;
case 'luksuzna kuća':
return CATEGORY_HOUSE;
case 'kamena':
return CATEGORY_HOUSE;
case 'vila':
return CATEGORY_HOUSE;
case 'splav':
return CATEGORY_HOUSE;
case 'stan u zgradi':
return CATEGORY_FLAT;
case 'stan u kući':
return CATEGORY_FLAT;
case 'stan višeetažni':
return CATEGORY_FLAT;
case 'stan višeetažni u kući':
return CATEGORY_FLAT;
case 'stan u starijoj zgradi':
return CATEGORY_FLAT;
case 'stan u novogradnji':
return CATEGORY_FLAT;
case 'stan u neboderu':
return CATEGORY_FLAT;
case 'Korišten stan u novogradnji':
return CATEGORY_FLAT;
case 'apartman na moru':
return CATEGORY_APARTMENT;
case 'apartman u planini':
return CATEGORY_APARTMENT;
case 'unutrašnje garažno mjesto':
return CATEGORY_GARAGE;
case 'unutrašnje parkirno mjesto':
return CATEGORY_GARAGE;
case 'građevinsko':
return CATEGORY_LAND;
case 'građevinsko stambeno':
return CATEGORY_LAND;
case 'zemljište, ostalo':
return CATEGORY_LAND;
case 'odmaralište':
return CATEGORY_LAND;
case 'oranica':
return CATEGORY_LAND;
case 'šuma':
return CATEGORY_LAND;
case 'livada':
return CATEGORY_LAND;
case 'građevinsko M2':
return CATEGORY_LAND;
case 'građevinsko M1':
return CATEGORY_LAND;
case 'građevinsko - turističko':
return CATEGORY_LAND;
case 'građevinsko - poslovno':
return CATEGORY_LAND;
case 'otok':
return CATEGORY_LAND;
case 'poljoprivredno':
return CATEGORY_LAND;
case 'lokal':
return CATEGORY_OFFICE;
case 'ured':
return CATEGORY_OFFICE;
case 'skladište ili garaža':
return CATEGORY_OFFICE;
case 'radionica':
return CATEGORY_OFFICE;
case 'tvornica':
return CATEGORY_OFFICE;
case 'restoran':
return CATEGORY_OFFICE;
case 'sportski centar':
return CATEGORY_OFFICE;
case 'ordinacija':
return CATEGORY_OFFICE;
case 'kiosk':
return CATEGORY_OFFICE;
case 'auto-praonica':
return CATEGORY_OFFICE;
case 'poslovna zgrada':
return CATEGORY_OFFICE;
case 'skladište':
return CATEGORY_OFFICE;
case 'garaža':
return CATEGORY_OFFICE;
case 'hotel':
return CATEGORY_OFFICE;
case 'pansion':
return CATEGORY_OFFICE;
case 'apartmanska zgrada':
return CATEGORY_OFFICE;
case 'trgovina':
return CATEGORY_OFFICE;
case 'prodajno skladišni':
return CATEGORY_OFFICE;
case 'proizvodno skladišni':
return CATEGORY_OFFICE;
case 'Kancelarije':
return CATEGORY_OFFICE;
case 'Poslovni prostor':
return CATEGORY_OFFICE;
}
}
getStatusIdFromText (status) {
if (status === 'Prodato') return STATUS_SOLD;
return STATUS_NORMAL;
}
async sleep (ms) {
return new Promise (resolve => setTimeout (resolve, ms));
}
async indexPages (start, end, maxResults = 1000) {
let results = {};
for (let i = start; i <= end; i++) {
let result = await this.indexPage (i, maxResults);
Object.assign (results, result);
await this.sleep (5000);
}
return results;
}
async crawl () {
let results = await this.indexPages (
this.fromPage,
this.toPage,
this.maxResults
);
return results;
}
}

View File

@@ -4,17 +4,14 @@ module.exports = {
output: { output: {
path: __dirname + "/build", path: __dirname + "/build",
filename: "crawler.js", filename: "crawler.js"
devtool: 'source-map'
}, },
module: { module: {
loaders: [{ loaders: [{
test: /.js?$/, test: /.js?$/,
loader: 'babel-loader', loader: 'babel-loader',
exclude: /node_modules/, exclude: /node_modules/
presets: ['es2015'],
plugins: ['transform-async-to-generator']
}, { }, {
test: /.json?$/, test: /.json?$/,
loader: 'json-loader', loader: 'json-loader',

File diff suppressed because it is too large Load Diff

5276
package-lock.json generated Normal file

File diff suppressed because it is too large Load Diff

53
package.json Normal file
View File

@@ -0,0 +1,53 @@
{
"name": "kivi",
"version": "1.0.0",
"description": "",
"main": "",
"scripts": {
"web:dev": "webpack-dev-server --content-base ./web/dist --config ./web/webpack.config --hot --inline --host 0.0.0.0",
"test": "echo \"Error: no test specified\" && exit 1",
"format": "prettier-standard 'src/**/*.js'"
},
"author": "",
"license": "ISC",
"dependencies": {
"babel-core": "^6.24.0",
"babel": "^6.5.2",
"babel-plugin-transform-async-to-generator": "^6.16.0",
"babel-loader": "^6.4.1",
"babel-polyfill": "^6.23.0",
"babel-preset-es2015": "^6.24.0",
"babel-preset-es2017": "^6.22.0",
"body-parser": "^1.17.1",
"cookie-parser": "^1.4.3",
"date-fns": "^1.28.2",
"express": "^4.15.2",
"isomorphic-fetch": "^2.2.1",
"moment": "^2.18.1",
"mongodb": "^2.2.25",
"cheerio": "^0.22.0",
"cloudinary": "^1.8.0",
"dotenv": "^2.0.0",
"fetch": "^1.1.0",
"form-data": "^2.1.4",
"json-loader": "^0.5.4",
"source-map-support": "^0.4.6",
"twilio": "^2.11.0",
"babel-preset-stage-3": "^6.22.0",
"lodash.clonedeep": "^4.5.0",
"lodash.merge": "^4.6.0",
"react": "^15.3.2",
"react-dom": "^15.3.2"
},
"devDependencies": {
"babel-core": "^6.18.2",
"babel-loader": "^6.2.7",
"babel-preset-react": "^6.16.0",
"eslint": "^3.19.0",
"prettier": "^0.22.0",
"prettier-standard": "^3.0.1",
"webpack": "1.13.3",
"webpack-dev-server": "^1.16.2",
"babel-preset-es2015": "^6.24.1"
}
}

View File

@@ -1,31 +0,0 @@
{
"name": "web",
"version": "1.0.0",
"description": "",
"main": "index.js",
"scripts": {
"dev": "webpack-dev-server --content-base ./dist --hot --inline --host 0.0.0.0",
"test": "echo \"Error: no test specified\" && exit 1",
"format": "prettier-standard 'src/**/*.js'"
},
"author": "",
"license": "ISC",
"dependencies": {
"babel-preset-stage-3": "^6.22.0",
"lodash.clonedeep": "^4.5.0",
"lodash.merge": "^4.6.0",
"react": "^15.3.2",
"react-dom": "^15.3.2"
},
"devDependencies": {
"babel-core": "^6.18.2",
"babel-loader": "^6.2.7",
"babel-preset-es2015": "^6.18.0",
"babel-preset-react": "^6.16.0",
"eslint": "^3.19.0",
"prettier": "^0.22.0",
"prettier-standard": "^3.0.1",
"webpack": "^1.13.3",
"webpack-dev-server": "^1.16.2"
}
}

View File

@@ -5,7 +5,7 @@ import {
CATEGORY_HOUSE, CATEGORY_HOUSE,
CATEGORY_OFFICE, CATEGORY_OFFICE,
CATEGORY_LAND CATEGORY_LAND
} from "../../../crawler/enums"; } from "../../../common/enums";
export default class Filters extends React.Component { export default class Filters extends React.Component {
onCloseClick(e) { onCloseClick(e) {

View File

@@ -1,5 +1,5 @@
import React from 'react' import React from 'react'
import Gallery from './gallery' import Gallery from './Gallery'
import {formatPrice, formatRooms, formatFloor} from '../lib/helpers' import {formatPrice, formatRooms, formatFloor} from '../lib/helpers'
import ContactModal from './ContactModal'; import ContactModal from './ContactModal';

View File

@@ -21,7 +21,8 @@ class Main extends React.Component {
sort: 'relevance', sort: 'relevance',
filters: { filters: {
rooms: {}, rooms: {},
category: {} category: {},
status : {}
}, },
mobileView: 'MAP', mobileView: 'MAP',
contact: { contact: {

View File

@@ -1,5 +1,5 @@
module.exports = { module.exports = {
entry: ["./src/index.js"], entry: [__dirname + "/src/index.js"],
output: { output: {
path: __dirname + "/dist", path: __dirname + "/dist",
filename: "app.bundle.js", filename: "app.bundle.js",

View File

@@ -1,6 +0,0 @@
# THIS IS AN AUTOGENERATED FILE. DO NOT EDIT THIS FILE DIRECTLY.
# yarn lockfile v1
lodash:
version "4.17.4"
resolved "https://registry.yarnpkg.com/lodash/-/lodash-4.17.4.tgz#78203a4d1c328ae1d86dca6460e369b57f4055ae"