Compare commits
8 Commits
mobile-sec
...
master
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
aeba6fdc2f | ||
|
|
7a5f7242ac | ||
|
|
a63c108259 | ||
|
|
039e34237d | ||
|
|
5d90e5efcb | ||
|
|
1743171cfd | ||
|
|
b2787ebda5 | ||
|
|
aea928fdef |
1
.gitignore
vendored
1
.gitignore
vendored
@@ -1,4 +1,5 @@
|
|||||||
node_modules
|
node_modules
|
||||||
.DS_Store
|
.DS_Store
|
||||||
crawler/build
|
crawler/build
|
||||||
|
backend/build
|
||||||
npm-debug.log
|
npm-debug.log
|
||||||
|
|||||||
55
README.md
55
README.md
@@ -1,28 +1,37 @@
|
|||||||
# kivi.ba
|
|
||||||
|
|
||||||
Kivi je najbolji nacin da nadjete svoj novi dom.
|
|
||||||
|
|
||||||
## Getting started
|
|
||||||
|
|
||||||
### Web
|
|
||||||
|
|
||||||
Dragi developeru, potrebno je da uradis sljedece:
|
|
||||||
|
|
||||||
1. cd web
|
|
||||||
2. yarn install
|
|
||||||
3. npm run dev
|
|
||||||
4. visit http://localhost:8080
|
|
||||||
5. profit!
|
|
||||||
|
|
||||||
Ukljucen je webpack hot module replacement + webpack-dev-server tako da se sve izmjene (osim CSS-a) odmah vide jer se browser sam realoada.
|
|
||||||
|
|
||||||
|
|
||||||
### Crawler
|
## 1. Cloning repo
|
||||||
|
|
||||||
Trenutno postoji samo jedan crawler a to je `olx.js`
|
`git clone git@github.com:edazdarevic/kivi.git`
|
||||||
|
|
||||||
1. cd crawler
|
`cd kivi`
|
||||||
2. npm run dev
|
|
||||||
3. node build/crawler.js
|
|
||||||
4. profit!
|
|
||||||
|
|
||||||
|
## 2. Start MongoDB
|
||||||
|
|
||||||
|
## 3. Build crawler and crawl some data
|
||||||
|
|
||||||
|
`cd crawler`
|
||||||
|
|
||||||
|
`npm install`
|
||||||
|
|
||||||
|
`webpack`
|
||||||
|
|
||||||
|
`PROSTOR_FROM_PAGE=1 PROSTOR_TO_PAGE=10 MONGO_URL=mongodb://localhost:27017/kivi CLOUDINARY_URL=cloudinary://845665345722369:Nw7KYvLs0xkzt6BmE-d_LU6H2LY@kivi node build/crawler.js`
|
||||||
|
|
||||||
|
## 4. Start backend server
|
||||||
|
|
||||||
|
`cd backend`
|
||||||
|
|
||||||
|
`npm install`
|
||||||
|
|
||||||
|
`webpack & webpack & node build/server.js`
|
||||||
|
|
||||||
|
## 5. Start front-end dev server
|
||||||
|
|
||||||
|
`cd web`
|
||||||
|
|
||||||
|
`npm install`
|
||||||
|
|
||||||
|
`npm run dev`
|
||||||
|
|
||||||
|
## 6. Visit http://localhost:8080
|
||||||
|
|||||||
@@ -1,3 +0,0 @@
|
|||||||
{
|
|
||||||
"presets": ["es2015", "es2017"],
|
|
||||||
}
|
|
||||||
File diff suppressed because it is too large
Load Diff
@@ -1,26 +0,0 @@
|
|||||||
{
|
|
||||||
"name": "backend",
|
|
||||||
"version": "1.0.0",
|
|
||||||
"description": "",
|
|
||||||
"main": "server.js",
|
|
||||||
"scripts": {
|
|
||||||
"test": "echo \"Error: no test specified\" && exit 1",
|
|
||||||
"start": "node server.js"
|
|
||||||
},
|
|
||||||
"author": "",
|
|
||||||
"license": "ISC",
|
|
||||||
"dependencies": {
|
|
||||||
"babel-core": "^6.24.0",
|
|
||||||
"babel-loader": "^6.4.1",
|
|
||||||
"babel-polyfill": "^6.23.0",
|
|
||||||
"babel-preset-es2015": "^6.24.0",
|
|
||||||
"babel-preset-es2017": "^6.22.0",
|
|
||||||
"body-parser": "^1.17.1",
|
|
||||||
"cookie-parser": "^1.4.3",
|
|
||||||
"date-fns": "^1.28.2",
|
|
||||||
"express": "^4.15.2",
|
|
||||||
"isomorphic-fetch": "^2.2.1",
|
|
||||||
"moment": "^2.18.1",
|
|
||||||
"mongodb": "^2.2.25"
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@@ -1,83 +1,85 @@
|
|||||||
import express from 'express'
|
import express from 'express';
|
||||||
import bodyParser from 'body-parser';
|
import bodyParser from 'body-parser';
|
||||||
import distanceInWordsToNow from 'date-fns/distance_in_words_to_now';
|
import distanceInWordsToNow from 'date-fns/distance_in_words_to_now';
|
||||||
import parseDate from 'date-fns/format';
|
import parseDate from 'date-fns/format';
|
||||||
import moment from 'moment';
|
import moment from 'moment';
|
||||||
|
|
||||||
var hr = require('date-fns/locale/hr');
|
import {STATUS_NORMAL, STATUS_RESERVED, STATUS_SOLD} from '../common/enums';
|
||||||
|
|
||||||
var MongoClient = require('mongodb').MongoClient;
|
var hr = require ('date-fns/locale/hr');
|
||||||
var ObjectID = require('mongodb').ObjectID;
|
|
||||||
|
var MongoClient = require ('mongodb').MongoClient;
|
||||||
|
var ObjectID = require ('mongodb').ObjectID;
|
||||||
|
|
||||||
var url = 'mongodb://localhost:27017/kivi';
|
var url = 'mongodb://localhost:27017/kivi';
|
||||||
|
|
||||||
require("babel-polyfill");
|
require ('babel-polyfill');
|
||||||
|
|
||||||
const router = express.Router({mergeParams: true})
|
const router = express.Router ({mergeParams: true});
|
||||||
|
|
||||||
const PORT = process.env.PORT || 3001;
|
const PORT = process.env.PORT || 3001;
|
||||||
const AGENTURA_KEY = process.env.AGENTURA_KEY || '1somethingverysecret';
|
const AGENTURA_KEY = process.env.AGENTURA_KEY || '1somethingverysecret';
|
||||||
|
|
||||||
let db;
|
let db;
|
||||||
|
|
||||||
router.post('/contact/:listingId', async (req, res, next) => {
|
router.post ('/contact/:listingId', async (req, res, next) => {
|
||||||
|
|
||||||
try {
|
try {
|
||||||
const listingId = req.params.listingId;
|
const listingId = req.params.listingId;
|
||||||
const body = req.body;
|
const body = req.body;
|
||||||
|
|
||||||
const contactRequests = db.collection('contact_requests');
|
const contactRequests = db.collection ('contact_requests');
|
||||||
|
|
||||||
if (!body.email) {
|
if (!body.email) {
|
||||||
res.status(422);
|
res.status (422);
|
||||||
res.end('Email is required');
|
res.end ('Email is required');
|
||||||
return
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!body.name) {
|
if (!body.name) {
|
||||||
res.status(422);
|
res.status (422);
|
||||||
res.end('Name is required');
|
res.end ('Name is required');
|
||||||
return
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
const result = await contactRequests.insertOne({
|
const result = await contactRequests.insertOne ({
|
||||||
name : body.name,
|
name: body.name,
|
||||||
email : body.email,
|
email: body.email,
|
||||||
listingId,
|
listingId,
|
||||||
message : body.message,
|
message: body.message,
|
||||||
phone : body.phone,
|
phone: body.phone,
|
||||||
alert : body.alert
|
alert: body.alert,
|
||||||
});
|
});
|
||||||
|
|
||||||
res.status(200);
|
res.status (200);
|
||||||
res.end();
|
res.end ();
|
||||||
} catch (e) {
|
} catch (e) {
|
||||||
console.log('error:', e);
|
console.log ('error:', e);
|
||||||
next(e);
|
next (e);
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
router.get('/search/listings/:id', async (req, res, next) => {
|
router.get ('/search/listings/:id', async (req, res, next) => {
|
||||||
try {
|
try {
|
||||||
const id = req.params.id;
|
const id = req.params.id;
|
||||||
|
|
||||||
const listings = db.collection('listings');
|
const listings = db.collection ('listings');
|
||||||
const listing = await listings.findOne({_id: new ObjectID(id)});
|
const listing = await listings.findOne ({_id: new ObjectID (id)});
|
||||||
if (listing) {
|
if (listing) {
|
||||||
res.json(listing);
|
res.json (listing);
|
||||||
} else {
|
} else {
|
||||||
res.status(404);
|
res.status (404);
|
||||||
}
|
}
|
||||||
|
|
||||||
res.end();
|
res.end ();
|
||||||
} catch (e) {
|
} catch (e) {
|
||||||
console.log('error:', e);
|
console.log ('error:', e);
|
||||||
next(e);
|
next (e);
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
router.get('/search/listings', async (req, res, next) => {
|
router.get ('/search/listings', async (req, res, next) => {
|
||||||
try {
|
try {
|
||||||
|
console.log ('Search listings');
|
||||||
const bounds = req.query.bounds || '';
|
const bounds = req.query.bounds || '';
|
||||||
const minPrice = req.query.minPrice;
|
const minPrice = req.query.minPrice;
|
||||||
const maxPrice = req.query.maxPrice;
|
const maxPrice = req.query.maxPrice;
|
||||||
@@ -90,98 +92,128 @@ router.get('/search/listings', async (req, res, next) => {
|
|||||||
const page = req.query.page || 0;
|
const page = req.query.page || 0;
|
||||||
const pins = req.query.pins || false;
|
const pins = req.query.pins || false;
|
||||||
|
|
||||||
const properties = db.collection('listings');
|
const properties = db.collection ('listings');
|
||||||
let query = {};
|
let query = {};
|
||||||
|
|
||||||
|
//Get only ads with location
|
||||||
|
query = Object.assign (query, {
|
||||||
|
has_map: true,
|
||||||
|
});
|
||||||
|
|
||||||
|
//AND
|
||||||
|
|
||||||
|
//Do not show sold or reserved properity
|
||||||
|
query = Object.assign (query, {
|
||||||
|
status: STATUS_NORMAL,
|
||||||
|
});
|
||||||
|
|
||||||
|
//AND
|
||||||
|
|
||||||
|
//Show ads that fall inside visible map
|
||||||
if (bounds) {
|
if (bounds) {
|
||||||
const [lat1, lng1, lat2, lng2] = bounds.split(',').map(parseFloat)
|
const [lat1, lng1, lat2, lng2] = bounds.split (',').map (parseFloat);
|
||||||
const box = [[lat1, lng1], [lat2, lng2]];
|
const box = [[lat1, lng1], [lat2, lng2]];
|
||||||
|
|
||||||
query = Object.assign(query, {
|
query = Object.assign (query, {
|
||||||
loc: {
|
loc: {
|
||||||
"$geoWithin": {
|
$geoWithin: {
|
||||||
"$box": box
|
$box: box,
|
||||||
}
|
},
|
||||||
}
|
},
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
//AND
|
||||||
|
|
||||||
|
//Show only selected type of ads (selling or renting)
|
||||||
if (adType) {
|
if (adType) {
|
||||||
query = Object.assign(query, {
|
query = Object.assign (query, {
|
||||||
adType: parseInt(adType)
|
adType: parseInt (adType),
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
//AND
|
||||||
|
|
||||||
|
//Match price
|
||||||
if (minPrice || maxPrice) {
|
if (minPrice || maxPrice) {
|
||||||
const price = {}
|
const price = {};
|
||||||
if (minPrice) {
|
if (minPrice) {
|
||||||
price["$gte"] = parseFloat(minPrice);
|
price['$gte'] = parseFloat (minPrice);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (maxPrice) {
|
if (maxPrice) {
|
||||||
price["$lte"] = parseFloat(maxPrice);
|
price['$lte'] = parseFloat (maxPrice);
|
||||||
}
|
}
|
||||||
|
|
||||||
query = Object.assign(query, {
|
query = Object.assign (query, {
|
||||||
price
|
price,
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
const and = [];
|
//AND
|
||||||
|
|
||||||
|
//Match number of rooms
|
||||||
if (rooms) {
|
if (rooms) {
|
||||||
const allRooms = rooms.split(',');
|
const roomCount = [];
|
||||||
const or = allRooms.map(val => {
|
let fourPlus = false;
|
||||||
if (val === '4+') {
|
|
||||||
return {
|
const allRooms = rooms.split (',');
|
||||||
rooms: {
|
allRooms.map (val => {
|
||||||
"$gte": 4
|
if (parseInt (val) !== 4) {
|
||||||
|
roomCount.push (parseInt (val));
|
||||||
|
} else {
|
||||||
|
fourPlus = true;
|
||||||
}
|
}
|
||||||
}
|
|
||||||
}
|
|
||||||
return {
|
|
||||||
rooms: parseFloat(val)
|
|
||||||
};
|
|
||||||
});
|
});
|
||||||
|
|
||||||
and.push({ "$or": or });
|
if (fourPlus) {
|
||||||
|
query = Object.assign (query, {
|
||||||
|
rooms: {$gte: 4},
|
||||||
|
});
|
||||||
|
} else {
|
||||||
|
query = Object.assign (query, {
|
||||||
|
rooms: {$in: roomCount},
|
||||||
|
});
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
//AND
|
||||||
|
|
||||||
|
//Match size
|
||||||
if (minSize || maxSize) {
|
if (minSize || maxSize) {
|
||||||
const size = {}
|
const size = {};
|
||||||
if (minSize) {
|
if (minSize) {
|
||||||
size["$gte"] = parseFloat(minSize);
|
size['$gte'] = parseFloat (minSize);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (maxSize) {
|
if (maxSize) {
|
||||||
size["$lte"] = parseFloat(maxSize);
|
size['$lte'] = parseFloat (maxSize);
|
||||||
}
|
}
|
||||||
|
|
||||||
query = Object.assign(query, {
|
query = Object.assign (query, {
|
||||||
size
|
size,
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
//AND
|
||||||
|
|
||||||
|
//Match category
|
||||||
if (category) {
|
if (category) {
|
||||||
const allCategories = category.split(',');
|
const categoryCount = [];
|
||||||
const or = allCategories.map(val => {
|
|
||||||
return {
|
const allCategories = category.split (',').map (val => {
|
||||||
category: parseInt(val)
|
categoryCount.push (parseInt (val));
|
||||||
};
|
|
||||||
});
|
});
|
||||||
|
|
||||||
and.push({ "$or": or });
|
query = Object.assign (query, {
|
||||||
}
|
category: {$in: categoryCount},
|
||||||
|
|
||||||
if (and.length > 0) {
|
|
||||||
query = Object.assign(query, {
|
|
||||||
"$and": and
|
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
console.log('QUERY: ', query);
|
console.log ('QUERY: ', query);
|
||||||
const cnt = await properties.find(query).count();
|
const cnt = await properties.find (query).count ();
|
||||||
|
|
||||||
res.header('X-Total-Count', cnt);
|
res.header ('X-Total-Count', cnt);
|
||||||
|
|
||||||
const getSort = () => {
|
const getSort = () => {
|
||||||
if (sort === 'price-min') {
|
if (sort === 'price-min') {
|
||||||
@@ -196,79 +228,80 @@ router.get('/search/listings', async (req, res, next) => {
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
let all = properties.find(query, {
|
let all = properties.find (query, {
|
||||||
//"sort": [['field1','asc'], ['field2','desc']]
|
//"sort": [['field1','asc'], ['field2','desc']]
|
||||||
"sort": getSort()
|
sort: getSort (),
|
||||||
});
|
});
|
||||||
|
|
||||||
const isPins = pins === "true";
|
const isPins = pins === 'true';
|
||||||
|
|
||||||
if (!isPins) {
|
if (!isPins) {
|
||||||
all = await all.skip(20 * page).limit(20).toArray();
|
all = await all.skip (20 * page).limit (20).toArray ();
|
||||||
} else {
|
} else {
|
||||||
all = await all.toArray();
|
all = await all.toArray ();
|
||||||
}
|
}
|
||||||
|
|
||||||
if (all.length > 0) {
|
if (all.length > 0) {
|
||||||
res.header('X-Last-Record-Id', [...all].pop()._id);
|
res.header ('X-Last-Record-Id', [...all].pop ()._id);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (isPins) {
|
if (isPins) {
|
||||||
res.json(all.map(val => {
|
res.json (
|
||||||
|
all.map (val => {
|
||||||
return {
|
return {
|
||||||
_id: val._id,
|
_id: val._id,
|
||||||
loc: val.loc
|
loc: val.loc,
|
||||||
}
|
};
|
||||||
}));
|
})
|
||||||
|
);
|
||||||
} else {
|
} else {
|
||||||
|
res.json (
|
||||||
res.json(all.map(({_id,
|
all.map (({_id, address, images, price, rooms, size, time}) => ({
|
||||||
address,
|
|
||||||
images,
|
|
||||||
price,
|
|
||||||
rooms,
|
|
||||||
size,
|
|
||||||
time
|
|
||||||
}) => ({
|
|
||||||
_id,
|
_id,
|
||||||
address,
|
address,
|
||||||
images: [images[0]],
|
images: [images[0]],
|
||||||
price,
|
price,
|
||||||
rooms,
|
rooms,
|
||||||
size,
|
size,
|
||||||
time: distanceInWordsToNow(
|
time: distanceInWordsToNow (moment (time, 'DD.MM.YYYY'), {
|
||||||
moment(time, 'DD.MM.YYYY'),
|
locale: hr,
|
||||||
{locale: hr}
|
}),
|
||||||
),
|
realTime: time,
|
||||||
realTime: time
|
}))
|
||||||
})));
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
res.end();
|
res.end ();
|
||||||
} catch (e) {
|
} catch (e) {
|
||||||
console.log('error:', e);
|
console.log ('error:', e);
|
||||||
next(e);
|
next (e);
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
|
const app = express ();
|
||||||
|
app.use (bodyParser.json ());
|
||||||
|
|
||||||
const app = express()
|
app.use (function (req, res, next) {
|
||||||
app.use(bodyParser.json());
|
res.header ('Access-Control-Allow-Origin', '*');
|
||||||
|
res.header (
|
||||||
app.use(function(req, res, next) {
|
'Access-Control-Allow-Headers',
|
||||||
res.header("Access-Control-Allow-Origin", "*");
|
'Origin, X-Requested-With, Content-Type, Accept, X-Last-Record-Id, X-Total-Count'
|
||||||
res.header("Access-Control-Allow-Headers", "Origin, X-Requested-With, Content-Type, Accept, X-Last-Record-Id, X-Total-Count");
|
);
|
||||||
res.header("Access-Control-Expose-Headers", "X-Last-Record-Id, X-Total-Count");
|
res.header (
|
||||||
res.header("Access-Control-Allow-Methods", "GET, POST, OPTIONS");
|
'Access-Control-Expose-Headers',
|
||||||
res.header('Access-Control-Allow-Credentials', 'true');
|
'X-Last-Record-Id, X-Total-Count'
|
||||||
next();
|
);
|
||||||
|
res.header ('Access-Control-Allow-Methods', 'GET, POST, OPTIONS');
|
||||||
|
res.header ('Access-Control-Allow-Credentials', 'true');
|
||||||
|
next ();
|
||||||
});
|
});
|
||||||
|
|
||||||
app.use('/api', router);
|
app.use ('/api', router);
|
||||||
|
|
||||||
MongoClient.connect(url).then((database) => {
|
MongoClient.connect (url).then (database => {
|
||||||
db = database;
|
db = database;
|
||||||
db.collection('listings').createIndex({loc: "2d"});
|
db.collection ('listings').createIndex ({loc: '2d'});
|
||||||
app.listen(PORT, () => console.log('Express server running at localhost: ' + PORT));
|
app.listen (PORT, () =>
|
||||||
|
console.log ('Express server running at localhost: ' + PORT)
|
||||||
|
);
|
||||||
});
|
});
|
||||||
|
|
||||||
|
|||||||
@@ -7,7 +7,7 @@ module.exports = {
|
|||||||
filename: 'build/server.js'
|
filename: 'build/server.js'
|
||||||
},
|
},
|
||||||
target: 'node',
|
target: 'node',
|
||||||
externals: fs.readdirSync(path.resolve(__dirname, 'node_modules')).reduce((ext, mod) => {
|
externals: fs.readdirSync(path.resolve(__dirname, '../node_modules')).reduce((ext, mod) => {
|
||||||
ext[mod] = 'commonjs ' + mod
|
ext[mod] = 'commonjs ' + mod
|
||||||
return ext
|
return ext
|
||||||
}, {}),
|
}, {}),
|
||||||
|
|||||||
1128
backend/yarn.lock
1128
backend/yarn.lock
File diff suppressed because it is too large
Load Diff
3
common/.babelrc
Normal file
3
common/.babelrc
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
{
|
||||||
|
"presets": ["es2015", "react", "stage-3"]
|
||||||
|
}
|
||||||
@@ -7,4 +7,10 @@ export const CATEGORY_FLAT = 0;
|
|||||||
export const CATEGORY_HOUSE = 1;
|
export const CATEGORY_HOUSE = 1;
|
||||||
export const CATEGORY_OFFICE = 2;
|
export const CATEGORY_OFFICE = 2;
|
||||||
export const CATEGORY_LAND = 3;
|
export const CATEGORY_LAND = 3;
|
||||||
|
export const CATEGORY_APARTMENT = 4;
|
||||||
|
export const CATEGORY_GARAGE = 5;
|
||||||
|
|
||||||
|
export const STATUS_NORMAL = 0;
|
||||||
|
export const STATUS_RESERVED = 1;
|
||||||
|
export const STATUS_SOLD = 2;
|
||||||
|
|
||||||
@@ -14,13 +14,15 @@ import {
|
|||||||
import 'dotenv/config';
|
import 'dotenv/config';
|
||||||
import OlxCrawler from './specific/olx';
|
import OlxCrawler from './specific/olx';
|
||||||
import ProstorCrawler from './specific/prostor';
|
import ProstorCrawler from './specific/prostor';
|
||||||
|
import RentalCrawler from './specific/rental';
|
||||||
import MongoSaver from './savers/mongo'
|
import MongoSaver from './savers/mongo'
|
||||||
|
|
||||||
install(); // for source maps to work
|
install(); // for source maps to work
|
||||||
|
|
||||||
let crawlers = [
|
let crawlers = [
|
||||||
//new OlxCrawler(process.env.OLX_FROM_PAGE, process.env.OLX_TO_PAGE, process.env.OLX_MAX_RESULTS),
|
//new OlxCrawler(process.env.OLX_FROM_PAGE, process.env.OLX_TO_PAGE, process.env.OLX_MAX_RESULTS),
|
||||||
new ProstorCrawler(parseInt(process.env.PROSTOR_FROM_PAGE), parseInt(process.env.PROSTOR_TO_PAGE), parseInt(process.env.PROSTOR_MAX_RESULTS))
|
new ProstorCrawler(parseInt(process.env.PROSTOR_FROM_PAGE), parseInt(process.env.PROSTOR_TO_PAGE), parseInt(process.env.PROSTOR_MAX_RESULTS)),
|
||||||
|
new RentalCrawler(parseInt(process.env.RENTAL_FROM_PAGE), parseInt(process.env.RENTAL_TO_PAGE), parseInt(process.env.RENTAL_MAX_RESULTS))
|
||||||
];
|
];
|
||||||
|
|
||||||
let savers = [
|
let savers = [
|
||||||
|
|||||||
10
crawler/detalji
Normal file
10
crawler/detalji
Normal file
@@ -0,0 +1,10 @@
|
|||||||
|
kategorije :
|
||||||
|
|
||||||
|
kuća = 1
|
||||||
|
stan = 2
|
||||||
|
apartman = 3
|
||||||
|
poslovni prostor = 4
|
||||||
|
zemljište = 5
|
||||||
|
garaža = 6
|
||||||
|
|
||||||
|
Datum spremiti u formatu dan.mjesec.godina, u polje "time"
|
||||||
@@ -1,34 +0,0 @@
|
|||||||
{
|
|
||||||
"name": "stan",
|
|
||||||
"version": "1.0.0",
|
|
||||||
"description": "",
|
|
||||||
"main": "index.js",
|
|
||||||
"dependencies": {
|
|
||||||
"babel": "^6.5.2",
|
|
||||||
"babel-core": "^6.18.2",
|
|
||||||
"babel-loader": "^6.2.7",
|
|
||||||
"babel-plugin-transform-async-to-generator": "^6.16.0",
|
|
||||||
"babel-polyfill": "^6.16.0",
|
|
||||||
"babel-preset-es2015": "^6.18.0",
|
|
||||||
"cheerio": "^0.22.0",
|
|
||||||
"cloudinary": "^1.8.0",
|
|
||||||
"dotenv": "^2.0.0",
|
|
||||||
"fetch": "^1.1.0",
|
|
||||||
"form-data": "^2.1.4",
|
|
||||||
"json-loader": "^0.5.4",
|
|
||||||
"mongodb": "^2.2.11",
|
|
||||||
"node-fetch": "^1.6.3",
|
|
||||||
"source-map-support": "^0.4.6",
|
|
||||||
"twilio": "^2.11.0"
|
|
||||||
},
|
|
||||||
"devDependencies": {
|
|
||||||
"webpack": "^1.13.3"
|
|
||||||
},
|
|
||||||
"scripts": {
|
|
||||||
"dev": "webpack",
|
|
||||||
"prod": "webpack -p",
|
|
||||||
"test": "echo \"Error: no test specified\" && exit 1"
|
|
||||||
},
|
|
||||||
"author": "",
|
|
||||||
"license": "ISC"
|
|
||||||
}
|
|
||||||
@@ -12,7 +12,7 @@ import {
|
|||||||
CATEGORY_HOUSE,
|
CATEGORY_HOUSE,
|
||||||
CATEGORY_OFFICE,
|
CATEGORY_OFFICE,
|
||||||
CATEGORY_LAND
|
CATEGORY_LAND
|
||||||
} from '../enums';
|
} from '../../common/enums';
|
||||||
|
|
||||||
export default class OlxCrawler {
|
export default class OlxCrawler {
|
||||||
|
|
||||||
|
|||||||
@@ -1,10 +1,10 @@
|
|||||||
'use strict'
|
'use strict';
|
||||||
|
|
||||||
let fetch = require('node-fetch');
|
let fetch = require ('node-fetch');
|
||||||
let cheerio = require('cheerio');
|
let cheerio = require ('cheerio');
|
||||||
let fs = require('fs');
|
let fs = require ('fs');
|
||||||
let cloudinary = require('cloudinary');
|
let cloudinary = require ('cloudinary');
|
||||||
let FormData = require('form-data');
|
let FormData = require ('form-data');
|
||||||
|
|
||||||
import {
|
import {
|
||||||
AD_TYPE_SALE,
|
AD_TYPE_SALE,
|
||||||
@@ -12,60 +12,85 @@ import {
|
|||||||
CATEGORY_FLAT,
|
CATEGORY_FLAT,
|
||||||
CATEGORY_HOUSE,
|
CATEGORY_HOUSE,
|
||||||
CATEGORY_OFFICE,
|
CATEGORY_OFFICE,
|
||||||
CATEGORY_LAND
|
CATEGORY_LAND,
|
||||||
} from '../enums';
|
STATUS_NORMAL,
|
||||||
|
STATUS_RESERVED,
|
||||||
|
STATUS_SOLD,
|
||||||
|
} from '../../common/enums';
|
||||||
|
|
||||||
export default class ProstorCrawler {
|
export default class ProstorCrawler {
|
||||||
|
constructor (fromPage = 0, toPage = 10, maxResults = 1000) {
|
||||||
constructor(fromPage = 0, toPage = 10, maxResults = 1000) {
|
|
||||||
this.fromPage = fromPage;
|
this.fromPage = fromPage;
|
||||||
this.toPage = toPage;
|
this.toPage = toPage;
|
||||||
this.maxResults = maxResults;
|
this.maxResults = maxResults;
|
||||||
}
|
}
|
||||||
|
|
||||||
async indexSingle(url) {
|
async indexSingle (url) {
|
||||||
try {
|
try {
|
||||||
|
const res = await fetch (url);
|
||||||
|
const body = await res.text ();
|
||||||
|
const $ = cheerio.load (body);
|
||||||
|
|
||||||
const res = await fetch(url);
|
const title = $ (
|
||||||
const body = await res.text();
|
'#nav_center_sub > div.content_area_1_left > div:nth-child(1) > h1'
|
||||||
const $ = cheerio.load(body);
|
).text ();
|
||||||
|
|
||||||
const title = $('#nav_center_sub > div.content_area_1_left > div:nth-child(1) > h1').text();
|
const category = $ (
|
||||||
|
'#nav_center_sub > div.content_area_1_left > div.bottom10 > div.content_lr_in_show > div:nth-child(3) > div:nth-child(4) > div.size_rs > span'
|
||||||
|
).text ();
|
||||||
|
|
||||||
const category = $('#nav_center_sub > div.content_area_1_left > div.bottom10 > div.content_lr_in_show > div:nth-child(3) > div:nth-child(4) > div.size_rs > span').text();
|
const price = $ (
|
||||||
|
'#nav_center_sub > div.content_area_1_left > div.bottom10 > div.content_lr_in_show > div:nth-child(1) > div.size_rs > strong'
|
||||||
|
).text ();
|
||||||
|
|
||||||
|
const size = $ (
|
||||||
|
'#nav_center_sub > div.content_area_1_left > div.bottom10 > div.content_lr_in_show > div:nth-child(4) > div:nth-child(7) > div.size_rs > span'
|
||||||
|
).text ();
|
||||||
|
const rooms = $ (
|
||||||
|
'#nav_center_sub > div.content_area_1_left > div.bottom10 > div.content_lr_in_show > div:nth-child(4) > div:nth-child(2) > div.size_rs > span'
|
||||||
|
).text ();
|
||||||
|
|
||||||
const price = $('#nav_center_sub > div.content_area_1_left > div.bottom10 > div.content_lr_in_show > div:nth-child(1) > div.size_rs > strong').text();
|
const address = $ (
|
||||||
|
'#nav_center_sub > div.content_area_1_left > div.bottom10 > div.content_lr_in_show > div:nth-child(3) > div:nth-child(3) > div.size_rs > span'
|
||||||
const size = $('#nav_center_sub > div.content_area_1_left > div.bottom10 > div.content_lr_in_show > div:nth-child(4) > div:nth-child(7) > div.size_rs > span').text();
|
).text ();
|
||||||
const rooms = $('#nav_center_sub > div.content_area_1_left > div.bottom10 > div.content_lr_in_show > div:nth-child(4) > div:nth-child(2) > div.size_rs > span').text();
|
|
||||||
|
|
||||||
const address = $('#nav_center_sub > div.content_area_1_left > div.bottom10 > div.content_lr_in_show > div:nth-child(3) > div:nth-child(3) > div.size_rs > span').text();
|
|
||||||
|
|
||||||
//const location = $('#artikal_glavni_div > div.artikal_lijevo > div.op.pop.mobile-lokacija').attr('data-content');
|
//const location = $('#artikal_glavni_div > div.artikal_lijevo > div.op.pop.mobile-lokacija').attr('data-content');
|
||||||
|
|
||||||
//const adType = $('#artikal_glavni_div > div.artikal_lijevo > div:nth-child(15) > div:nth-child(2) > div.df2').text();
|
//const adType = $('#artikal_glavni_div > div.artikal_lijevo > div:nth-child(15) > div:nth-child(2) > div.df2').text();
|
||||||
|
|
||||||
const time = $('#nav_center_sub > div.content_area_1_right > div.bottom_d > div > strong:nth-child(1)').text();
|
const time = $ (
|
||||||
|
'#nav_center_sub > div.content_area_1_right > div.bottom_d > div > strong:nth-child(1)'
|
||||||
|
).text ();
|
||||||
|
|
||||||
//const olxId = $('#artikal_glavni_div > div.artikal_lijevo > div:nth-child(15) > div:nth-child(4) > div.df2').text();
|
//const olxId = $('#artikal_glavni_div > div.artikal_lijevo > div:nth-child(15) > div:nth-child(4) > div.df2').text();
|
||||||
|
|
||||||
const descriptions = $('#nav_center_sub > div.content_area_1_left > div.bottom10 > div.content_ll_in_show > div:nth-child(1)').text();
|
const descriptions = $ (
|
||||||
|
'#nav_center_sub > div.content_area_1_left > div.bottom10 > div.content_ll_in_show > div:nth-child(1)'
|
||||||
|
).text ();
|
||||||
const floor = $('#nav_center_sub > div.content_area_1_left > div.bottom10 > div.content_lr_in_show > div:nth-child(4) > div:nth-child(6) > div.size_rs').text();
|
|
||||||
|
|
||||||
|
const floor = $ (
|
||||||
|
'#nav_center_sub > div.content_area_1_left > div.bottom10 > div.content_lr_in_show > div:nth-child(4) > div:nth-child(6) > div.size_rs'
|
||||||
|
).text ();
|
||||||
|
|
||||||
const latLngRe = /marker=([0-9]+\.[0-9]+)\,\s*([0-9]+\.[0-9]+)/g;
|
const latLngRe = /marker=([0-9]+\.[0-9]+)\,\s*([0-9]+\.[0-9]+)/g;
|
||||||
|
|
||||||
|
var hasMap = false;
|
||||||
|
|
||||||
|
var tmpTitle = title.toUpperCase ();
|
||||||
|
|
||||||
|
var status = STATUS_NORMAL;
|
||||||
|
if (tmpTitle.indexOf ('PRODANO') !== -1) status = STATUS_SOLD;
|
||||||
|
if (tmpTitle.indexOf ('REZERVISANO') !== -1) status = STATUS_RESERVED;
|
||||||
|
|
||||||
//const latLngRe = /LatLng\(([0-9]+\.[0-9]+)\,\s+([0-9]+\.[0-9]+)\)/g;
|
//const latLngRe = /LatLng\(([0-9]+\.[0-9]+)\,\s+([0-9]+\.[0-9]+)\)/g;
|
||||||
|
|
||||||
const matches = latLngRe.exec(body);
|
const matches = latLngRe.exec (body);
|
||||||
let lng = '',
|
let lng = '', lat = '';
|
||||||
lat = '';
|
hasMap = false;
|
||||||
if (matches && matches.length >= 3) {
|
if (matches && matches.length >= 3) {
|
||||||
lat = matches[1];
|
lat = matches[1];
|
||||||
lng = matches[2];
|
lng = matches[2];
|
||||||
|
hasMap = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
//console.log({
|
//console.log({
|
||||||
@@ -84,16 +109,17 @@ export default class ProstorCrawler {
|
|||||||
|
|
||||||
const images = [];
|
const images = [];
|
||||||
|
|
||||||
|
|
||||||
//const imgMatches = body.match(imgRe);
|
//const imgMatches = body.match(imgRe);
|
||||||
|
|
||||||
const parseRooms = (rooms) => parseInt([...rooms].filter(c => !isNaN(c)).filter(c => c.trim()).join())
|
const parseRooms = rooms =>
|
||||||
const parsePrice = (price) => parseFloat(price.replace(".", ""))
|
parseInt (
|
||||||
|
[...rooms].filter (c => !isNaN (c)).filter (c => c.trim ()).join ()
|
||||||
|
);
|
||||||
|
const parsePrice = price => parseFloat (price.replace ('.', ''));
|
||||||
|
|
||||||
|
$ ('.fancybox').each ((i, elem) => {
|
||||||
$('.fancybox').each((i, elem) => {
|
const img = $ (elem).attr ('href');
|
||||||
const img = $(elem).attr('href');
|
images.push (img);
|
||||||
images.push(img);
|
|
||||||
});
|
});
|
||||||
|
|
||||||
//for (let i = 0; imgMatches && i < imgMatches.length; i++) {
|
//for (let i = 0; imgMatches && i < imgMatches.length; i++) {
|
||||||
@@ -110,24 +136,23 @@ export default class ProstorCrawler {
|
|||||||
//const uploadResults = await Promise.all(uploadPromises);
|
//const uploadResults = await Promise.all(uploadPromises);
|
||||||
//const cloudinaryImages = uploadResults.map(ur => ur.url);
|
//const cloudinaryImages = uploadResults.map(ur => ur.url);
|
||||||
|
|
||||||
|
const parsedPrice = parsePrice (price);
|
||||||
const parsedPrice = parsePrice(price);
|
|
||||||
let parsedRooms;
|
let parsedRooms;
|
||||||
|
|
||||||
if (rooms === 'Garsonjera') {
|
if (rooms === 'Garsonjera') {
|
||||||
parsedRooms = 0;
|
parsedRooms = 0;
|
||||||
} else {
|
} else {
|
||||||
parsedRooms = parseRooms(rooms);
|
parsedRooms = parseRooms (rooms);
|
||||||
}
|
}
|
||||||
|
|
||||||
const data = {
|
const data = {
|
||||||
category: this.getCategoryId(category),
|
category: this.getCategoryId (category),
|
||||||
url,
|
url,
|
||||||
title,
|
title,
|
||||||
price: isNaN(parsedPrice) ? price : parsedPrice,
|
price: isNaN (parsedPrice) ? price : parsedPrice,
|
||||||
size: parseFloat(size),
|
size: parseFloat (size),
|
||||||
rooms: parsedRooms,
|
rooms: parsedRooms,
|
||||||
floor: parseInt(floor),
|
floor: parseInt (floor),
|
||||||
address,
|
address,
|
||||||
adType: AD_TYPE_SALE,
|
adType: AD_TYPE_SALE,
|
||||||
time,
|
time,
|
||||||
@@ -135,67 +160,68 @@ export default class ProstorCrawler {
|
|||||||
longDescription: descriptions,
|
longDescription: descriptions,
|
||||||
lat,
|
lat,
|
||||||
lng,
|
lng,
|
||||||
loc: [parseFloat(lat), parseFloat(lng)],
|
loc: [parseFloat (lat), parseFloat (lng)],
|
||||||
|
hasMap,
|
||||||
|
status,
|
||||||
//images: cloudinaryImages
|
//images: cloudinaryImages
|
||||||
images
|
images,
|
||||||
};
|
};
|
||||||
console.log(data);
|
console.log (data);
|
||||||
|
|
||||||
return data;
|
return data;
|
||||||
} catch (e) {
|
} catch (e) {
|
||||||
console.error('Exception caught: ' + e.message);
|
console.error ('Exception caught: ' + e.message);
|
||||||
}
|
}
|
||||||
|
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
async indexPage(pageNr, maxResults = 1000) {
|
async indexPage (pageNr, maxResults = 1000) {
|
||||||
try {
|
try {
|
||||||
|
console.log ('Starting to index page: ' + pageNr);
|
||||||
console.log('Starting to index page: ' + pageNr);
|
|
||||||
const url = `http://prostor.ba/index.php`;
|
const url = `http://prostor.ba/index.php`;
|
||||||
|
|
||||||
const data = new FormData();
|
const data = new FormData ();
|
||||||
data.append('sortCombo', 'e.date_create DESC');
|
data.append ('sortCombo', 'e.date_create DESC');
|
||||||
data.append('command', '');
|
data.append ('command', '');
|
||||||
data.append('action', 'show');
|
data.append ('action', 'show');
|
||||||
data.append('page', pageNr);
|
data.append ('page', pageNr);
|
||||||
data.append('param', 'ponuda.inc.php');
|
data.append ('param', 'ponuda.inc.php');
|
||||||
data.append('checkNO', 0);
|
data.append ('checkNO', 0);
|
||||||
data.append('order', 'e.date_create DESC');
|
data.append ('order', 'e.date_create DESC');
|
||||||
data.append('reset', 0);
|
data.append ('reset', 0);
|
||||||
data.append('estate_action', 1);
|
data.append ('estate_action', 1);
|
||||||
data.append('Itemid', 785);
|
data.append ('Itemid', 785);
|
||||||
|
|
||||||
const res = await fetch(url, {
|
const res = await fetch (url, {
|
||||||
method: 'POST',
|
method: 'POST',
|
||||||
body: data
|
body: data,
|
||||||
});
|
});
|
||||||
const body = await res.text();
|
const body = await res.text ();
|
||||||
const $ = cheerio.load(body);
|
const $ = cheerio.load (body);
|
||||||
|
|
||||||
const hrefs = [];
|
const hrefs = [];
|
||||||
$('.nekret_box').each((i, elem) => {
|
$ ('.nekret_box').each ((i, elem) => {
|
||||||
const href = $(elem).find("a").first().attr('href');
|
const href = $ (elem).find ('a').first ().attr ('href');
|
||||||
hrefs.push(`http://prostor.ba/${href}`);
|
hrefs.push (`http://prostor.ba/${href}`);
|
||||||
});
|
});
|
||||||
|
|
||||||
const results = {};
|
const results = {};
|
||||||
for (const href of hrefs) {
|
for (const href of hrefs) {
|
||||||
console.log(`indexing: ${href}`);
|
console.log (`indexing: ${href}`);
|
||||||
|
|
||||||
const singleData = await this.indexSingle(href);
|
const singleData = await this.indexSingle (href);
|
||||||
|
|
||||||
if (singleData) {
|
if (singleData) {
|
||||||
results[href] = singleData;
|
results[href] = singleData;
|
||||||
}
|
}
|
||||||
|
|
||||||
await this.sleep(500);
|
await this.sleep (500);
|
||||||
}
|
}
|
||||||
|
|
||||||
return results;
|
return results;
|
||||||
} catch (e) {
|
} catch (e) {
|
||||||
console.error('Exception caught:' + e);
|
console.error ('Exception caught:' + e);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -211,22 +237,26 @@ export default class ProstorCrawler {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
async sleep(ms) {
|
async sleep (ms) {
|
||||||
return new Promise(resolve => setTimeout(resolve, ms));
|
return new Promise (resolve => setTimeout (resolve, ms));
|
||||||
}
|
}
|
||||||
|
|
||||||
async indexPages(start, end, maxResults = 1000) {
|
async indexPages (start, end, maxResults = 1000) {
|
||||||
let results = {};
|
let results = {};
|
||||||
for (let i = start; i <= end; i++) {
|
for (let i = start; i <= end; i++) {
|
||||||
let result = await this.indexPage(i, maxResults);
|
let result = await this.indexPage (i, maxResults);
|
||||||
Object.assign(results, result)
|
Object.assign (results, result);
|
||||||
await this.sleep(5000);
|
await this.sleep (5000);
|
||||||
}
|
}
|
||||||
return results;
|
return results;
|
||||||
}
|
}
|
||||||
|
|
||||||
async crawl() {
|
async crawl () {
|
||||||
let results = await this.indexPages(this.fromPage, this.toPage, this.maxResults);
|
let results = await this.indexPages (
|
||||||
|
this.fromPage,
|
||||||
|
this.toPage,
|
||||||
|
this.maxResults
|
||||||
|
);
|
||||||
return results;
|
return results;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
424
crawler/specific/rental.js
Normal file
424
crawler/specific/rental.js
Normal file
@@ -0,0 +1,424 @@
|
|||||||
|
'use strict';
|
||||||
|
|
||||||
|
let fetch = require ('node-fetch');
|
||||||
|
let cheerio = require ('cheerio');
|
||||||
|
let fs = require ('fs');
|
||||||
|
let cloudinary = require ('cloudinary');
|
||||||
|
let FormData = require ('form-data');
|
||||||
|
|
||||||
|
import {
|
||||||
|
AD_TYPE_SALE,
|
||||||
|
IGNORED_USERNAMES,
|
||||||
|
CATEGORY_FLAT,
|
||||||
|
CATEGORY_HOUSE,
|
||||||
|
CATEGORY_OFFICE,
|
||||||
|
CATEGORY_LAND,
|
||||||
|
CATEGORY_APARTMENT,
|
||||||
|
CATEGORY_GARAGE,
|
||||||
|
STATUS_NORMAL,
|
||||||
|
STATUS_RESERVED,
|
||||||
|
STATUS_SOLD,
|
||||||
|
} from '../../common/enums';
|
||||||
|
|
||||||
|
export default class RentalCrawler {
|
||||||
|
constructor (fromPage = 0, toPage = 10, maxResults = 1000) {
|
||||||
|
console.log ('Rental Crawler');
|
||||||
|
|
||||||
|
this.fromPage = fromPage;
|
||||||
|
this.toPage = toPage;
|
||||||
|
this.maxResults = maxResults;
|
||||||
|
}
|
||||||
|
|
||||||
|
async indexSingle (url) {
|
||||||
|
try {
|
||||||
|
const res = await fetch (url);
|
||||||
|
const body = await res.text ();
|
||||||
|
const $ = cheerio.load (body);
|
||||||
|
|
||||||
|
var title;
|
||||||
|
var category;
|
||||||
|
var price;
|
||||||
|
var size;
|
||||||
|
var rooms;
|
||||||
|
var address;
|
||||||
|
var descriptions;
|
||||||
|
var floor;
|
||||||
|
var floor;
|
||||||
|
var time;
|
||||||
|
var lat;
|
||||||
|
var lng;
|
||||||
|
var hasMap;
|
||||||
|
var status;
|
||||||
|
|
||||||
|
//No JSON string -> No map
|
||||||
|
try {
|
||||||
|
let completeData;
|
||||||
|
let dataJsonString;
|
||||||
|
let dataJson;
|
||||||
|
|
||||||
|
const startN = 5;
|
||||||
|
const lastN = 15;
|
||||||
|
|
||||||
|
for (let i = startN; i <= lastN; i++) {
|
||||||
|
try {
|
||||||
|
completeData = $ (
|
||||||
|
'body > div.container-fluid > div.container > div:nth-child(2) > div.col-xs-12.col-sm-12.col-md-12.col-lg-9.content-main > div:nth-child(' +
|
||||||
|
i +
|
||||||
|
') > script'
|
||||||
|
).text ();
|
||||||
|
dataJsonString = completeData.slice (21, -1);
|
||||||
|
dataJson = JSON.parse (dataJsonString);
|
||||||
|
break;
|
||||||
|
} catch (e) {
|
||||||
|
console.log ('No JSON string');
|
||||||
|
if (i === lastN) throw e;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
title = dataJson['re_realEstates_portalName'];
|
||||||
|
category = this.getCategoryIdfromNumber (
|
||||||
|
parseInt (dataJson['re_types_id'])
|
||||||
|
); //categories from JSON string doesn't match categories in ENUMS
|
||||||
|
price = parseFloat (dataJson['re_realEstates_price']);
|
||||||
|
size = parseFloat (dataJson['re_realEstates_area']);
|
||||||
|
rooms = parseInt (dataJson['re_realEstates_roomsNO']);
|
||||||
|
address = dataJson['re_realEstates_address'];
|
||||||
|
//descriptions = dataJson["re_realEstates_description"];
|
||||||
|
floor = parseInt (dataJson['re_realEstates_floorNO']);
|
||||||
|
|
||||||
|
let timeArray = dataJson['re_realEstates_inserted']
|
||||||
|
.slice (0, dataJson['re_realEstates_inserted'].indexOf (' '))
|
||||||
|
.split ('-');
|
||||||
|
time = timeArray[2] + '.' + timeArray[1] + '.' + timeArray[0];
|
||||||
|
|
||||||
|
lat = dataJson['re_realEstates_latitude'];
|
||||||
|
lng = dataJson['re_realEstates_longitude'];
|
||||||
|
hasMap = true;
|
||||||
|
} catch (e) {
|
||||||
|
console.log ('error : ' + e);
|
||||||
|
//This ad has no JSON string, informations should be retrieved using HTML selectors
|
||||||
|
time = undefined;
|
||||||
|
lat = 0;
|
||||||
|
lng = 0;
|
||||||
|
hasMap = false;
|
||||||
|
|
||||||
|
price =
|
||||||
|
parseFloat (
|
||||||
|
$ (
|
||||||
|
'body > div.container-fluid > div.container > div:nth-child(2) > div.col-xs-12.col-sm-12.col-md-12.col-lg-9.content-main > div:nth-child(1) > div > div > div.col-xs-12.col-sm-4.box-details > div.prices > span.pull-left'
|
||||||
|
)
|
||||||
|
.text ()
|
||||||
|
.replace (',', '')
|
||||||
|
.replace ('.', '')
|
||||||
|
) / 100;
|
||||||
|
|
||||||
|
const propsList = {};
|
||||||
|
|
||||||
|
$ (
|
||||||
|
'body > div.container-fluid > div.container > div:nth-child(2) > div.col-xs-12.col-sm-12.col-md-12.col-lg-9.content-main > div:nth-child(1) > div > div > div.col-xs-12.col-sm-4.box-details > div.body'
|
||||||
|
)
|
||||||
|
.contents ()
|
||||||
|
.map ((i, elem) => {
|
||||||
|
const entry = $ (elem).text ().trim ().split (':');
|
||||||
|
if (entry[0]) propsList[entry[0]] = entry[1];
|
||||||
|
});
|
||||||
|
|
||||||
|
address = propsList['Ulica'];
|
||||||
|
size =
|
||||||
|
parseFloat (
|
||||||
|
propsList['Površina'].replace (',', '').replace ('.', '')
|
||||||
|
) / 100;
|
||||||
|
rooms = parseInt (propsList['Broj soba']);
|
||||||
|
floor = parseInt (propsList['Spratnost']);
|
||||||
|
|
||||||
|
title = $ (
|
||||||
|
'div.container-fluid > div.container > div.row.content-top > div.col-xs-12.col-sm-6.col-md-9 > div.description.pull-left > h1'
|
||||||
|
).text ();
|
||||||
|
descriptions = $ ('#b1 > div > div > div').text ();
|
||||||
|
|
||||||
|
const fullCategory = $ (
|
||||||
|
'body > div.container-fluid > div.container > div:nth-child(2) > div.col-xs-12.col-sm-12.col-md-12.col-lg-9.content-main > div:nth-child(1) > div > div > div.col-xs-12.col-sm-4.box-details > div.title > p'
|
||||||
|
)
|
||||||
|
.text ()
|
||||||
|
.split (',', 3);
|
||||||
|
|
||||||
|
category = fullCategory.size > 2
|
||||||
|
? this.getCategoryIdfromText (fullCategory[0] + fullCategory[1])
|
||||||
|
: this.getCategoryIdfromText (fullCategory[0]);
|
||||||
|
}
|
||||||
|
|
||||||
|
descriptions = $ ('#b1 > div > div > div').text ();
|
||||||
|
status = this.getStatusIdFromText (
|
||||||
|
$ ('#a1 > div.box-badges > div').text ()
|
||||||
|
);
|
||||||
|
|
||||||
|
const images = [];
|
||||||
|
|
||||||
|
$ ('.img-gallery').contents ().map ((i, elem) => {
|
||||||
|
const tmp = $ (elem).attr ('data-preview');
|
||||||
|
if (tmp) images.push (tmp);
|
||||||
|
});
|
||||||
|
|
||||||
|
const data = {
|
||||||
|
category,
|
||||||
|
url,
|
||||||
|
title,
|
||||||
|
price,
|
||||||
|
size,
|
||||||
|
rooms,
|
||||||
|
floor,
|
||||||
|
address,
|
||||||
|
adType: AD_TYPE_SALE,
|
||||||
|
time,
|
||||||
|
shortDescription: title,
|
||||||
|
longDescription: descriptions,
|
||||||
|
lat,
|
||||||
|
lng,
|
||||||
|
loc: [parseFloat (lat), parseFloat (lng)],
|
||||||
|
hasMap,
|
||||||
|
status,
|
||||||
|
//images: cloudinaryImages
|
||||||
|
images,
|
||||||
|
};
|
||||||
|
|
||||||
|
return data;
|
||||||
|
} catch (e) {
|
||||||
|
console.error ('Exception caught: ' + e.message);
|
||||||
|
}
|
||||||
|
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
async indexPage (pageNr, maxResults = 1000) {
|
||||||
|
try {
|
||||||
|
console.log ('Starting to index page: ' + pageNr);
|
||||||
|
|
||||||
|
const url = 'http://www.rental.ba/pretraga/prodaja-1/stranica-' + pageNr;
|
||||||
|
|
||||||
|
/*
|
||||||
|
const data = new FormData();
|
||||||
|
data.append('sales', 1); // Mislim da ovo definiše oglase tipa prodaje
|
||||||
|
data.append('re_types_id', ''); //odnosi se na tip nekretnine (kuća, stan, apartman,...)
|
||||||
|
data.append('full_text', '');
|
||||||
|
data.append('re_realEstates_code', '');
|
||||||
|
data.append('re_realEstates_price_max', '');
|
||||||
|
data.append('re_realEstates_price_min', '');
|
||||||
|
data.append('re_realEstates_area_min', '');
|
||||||
|
data.append('re_realEstates_area_max', '');
|
||||||
|
data.append('re_realEstates_roomsNO_min', '');
|
||||||
|
data.append('re_realEstates_roomsNO_max', '');
|
||||||
|
data.append('re_realEstates_floorNO_min', '');
|
||||||
|
data.append('re_realEstates_floorNO_max', '');
|
||||||
|
data.append('re_subTypes_id', 1);
|
||||||
|
*/
|
||||||
|
|
||||||
|
const res = await fetch (url, {
|
||||||
|
method: 'POST',
|
||||||
|
//body: data
|
||||||
|
});
|
||||||
|
const body = await res.text ();
|
||||||
|
const $ = cheerio.load (body);
|
||||||
|
|
||||||
|
const hrefs = [];
|
||||||
|
$ ('.middle').each ((i, elem) => {
|
||||||
|
const href = $ (elem).find ('a').first ().attr ('href');
|
||||||
|
hrefs.push (href);
|
||||||
|
});
|
||||||
|
|
||||||
|
const results = {};
|
||||||
|
for (const href of hrefs) {
|
||||||
|
console.log (`indexing: ${href}`);
|
||||||
|
|
||||||
|
const singleData = await this.indexSingle (href);
|
||||||
|
|
||||||
|
if (singleData) {
|
||||||
|
results[href] = singleData;
|
||||||
|
}
|
||||||
|
|
||||||
|
await this.sleep (500);
|
||||||
|
}
|
||||||
|
|
||||||
|
return results;
|
||||||
|
} catch (e) {
|
||||||
|
console.error ('Exception caught:' + e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
getCategoryIdfromNumber (category) {
|
||||||
|
switch (category) {
|
||||||
|
case 1:
|
||||||
|
return CATEGORY_HOUSE;
|
||||||
|
case 2:
|
||||||
|
return CATEGORY_FLAT;
|
||||||
|
case 3:
|
||||||
|
return CATEGORY_APARTMENT;
|
||||||
|
case 4:
|
||||||
|
return CATEGORY_OFFICE;
|
||||||
|
case 5:
|
||||||
|
return CATEGORY_LAND;
|
||||||
|
case 6:
|
||||||
|
return CATEGORY_GARAGE;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
getCategoryIdfromText (category) {
|
||||||
|
switch (category) {
|
||||||
|
case 'samostojeća':
|
||||||
|
return CATEGORY_HOUSE;
|
||||||
|
case 'dvojna':
|
||||||
|
return CATEGORY_HOUSE;
|
||||||
|
case 'kuća u nizu':
|
||||||
|
return CATEGORY_HOUSE;
|
||||||
|
case 'stambeno-poslovni objekt':
|
||||||
|
return CATEGORY_HOUSE;
|
||||||
|
case 'prizemnica':
|
||||||
|
return CATEGORY_HOUSE;
|
||||||
|
case 'kuća na moru':
|
||||||
|
return CATEGORY_HOUSE;
|
||||||
|
case 'kuća u izgradnji':
|
||||||
|
return CATEGORY_HOUSE;
|
||||||
|
case 'dvorac':
|
||||||
|
return CATEGORY_HOUSE;
|
||||||
|
case 'apartmanska kuća':
|
||||||
|
return CATEGORY_HOUSE;
|
||||||
|
case 'porodična kuća':
|
||||||
|
return CATEGORY_HOUSE;
|
||||||
|
case 'vikend kuća':
|
||||||
|
return CATEGORY_HOUSE;
|
||||||
|
case 'luksuzna kuća':
|
||||||
|
return CATEGORY_HOUSE;
|
||||||
|
case 'kamena':
|
||||||
|
return CATEGORY_HOUSE;
|
||||||
|
case 'vila':
|
||||||
|
return CATEGORY_HOUSE;
|
||||||
|
case 'splav':
|
||||||
|
return CATEGORY_HOUSE;
|
||||||
|
|
||||||
|
case 'stan u zgradi':
|
||||||
|
return CATEGORY_FLAT;
|
||||||
|
case 'stan u kući':
|
||||||
|
return CATEGORY_FLAT;
|
||||||
|
case 'stan višeetažni':
|
||||||
|
return CATEGORY_FLAT;
|
||||||
|
case 'stan višeetažni u kući':
|
||||||
|
return CATEGORY_FLAT;
|
||||||
|
case 'stan u starijoj zgradi':
|
||||||
|
return CATEGORY_FLAT;
|
||||||
|
case 'stan u novogradnji':
|
||||||
|
return CATEGORY_FLAT;
|
||||||
|
case 'stan u neboderu':
|
||||||
|
return CATEGORY_FLAT;
|
||||||
|
case 'Korišten stan u novogradnji':
|
||||||
|
return CATEGORY_FLAT;
|
||||||
|
|
||||||
|
case 'apartman na moru':
|
||||||
|
return CATEGORY_APARTMENT;
|
||||||
|
case 'apartman u planini':
|
||||||
|
return CATEGORY_APARTMENT;
|
||||||
|
|
||||||
|
case 'unutrašnje garažno mjesto':
|
||||||
|
return CATEGORY_GARAGE;
|
||||||
|
case 'unutrašnje parkirno mjesto':
|
||||||
|
return CATEGORY_GARAGE;
|
||||||
|
|
||||||
|
case 'građevinsko':
|
||||||
|
return CATEGORY_LAND;
|
||||||
|
case 'građevinsko stambeno':
|
||||||
|
return CATEGORY_LAND;
|
||||||
|
case 'zemljište, ostalo':
|
||||||
|
return CATEGORY_LAND;
|
||||||
|
case 'odmaralište':
|
||||||
|
return CATEGORY_LAND;
|
||||||
|
case 'oranica':
|
||||||
|
return CATEGORY_LAND;
|
||||||
|
case 'šuma':
|
||||||
|
return CATEGORY_LAND;
|
||||||
|
case 'livada':
|
||||||
|
return CATEGORY_LAND;
|
||||||
|
case 'građevinsko M2':
|
||||||
|
return CATEGORY_LAND;
|
||||||
|
case 'građevinsko M1':
|
||||||
|
return CATEGORY_LAND;
|
||||||
|
case 'građevinsko - turističko':
|
||||||
|
return CATEGORY_LAND;
|
||||||
|
case 'građevinsko - poslovno':
|
||||||
|
return CATEGORY_LAND;
|
||||||
|
case 'otok':
|
||||||
|
return CATEGORY_LAND;
|
||||||
|
case 'poljoprivredno':
|
||||||
|
return CATEGORY_LAND;
|
||||||
|
|
||||||
|
case 'lokal':
|
||||||
|
return CATEGORY_OFFICE;
|
||||||
|
case 'ured':
|
||||||
|
return CATEGORY_OFFICE;
|
||||||
|
case 'skladište ili garaža':
|
||||||
|
return CATEGORY_OFFICE;
|
||||||
|
case 'radionica':
|
||||||
|
return CATEGORY_OFFICE;
|
||||||
|
case 'tvornica':
|
||||||
|
return CATEGORY_OFFICE;
|
||||||
|
case 'restoran':
|
||||||
|
return CATEGORY_OFFICE;
|
||||||
|
case 'sportski centar':
|
||||||
|
return CATEGORY_OFFICE;
|
||||||
|
case 'ordinacija':
|
||||||
|
return CATEGORY_OFFICE;
|
||||||
|
case 'kiosk':
|
||||||
|
return CATEGORY_OFFICE;
|
||||||
|
case 'auto-praonica':
|
||||||
|
return CATEGORY_OFFICE;
|
||||||
|
case 'poslovna zgrada':
|
||||||
|
return CATEGORY_OFFICE;
|
||||||
|
case 'skladište':
|
||||||
|
return CATEGORY_OFFICE;
|
||||||
|
case 'garaža':
|
||||||
|
return CATEGORY_OFFICE;
|
||||||
|
case 'hotel':
|
||||||
|
return CATEGORY_OFFICE;
|
||||||
|
case 'pansion':
|
||||||
|
return CATEGORY_OFFICE;
|
||||||
|
case 'apartmanska zgrada':
|
||||||
|
return CATEGORY_OFFICE;
|
||||||
|
case 'trgovina':
|
||||||
|
return CATEGORY_OFFICE;
|
||||||
|
case 'prodajno skladišni':
|
||||||
|
return CATEGORY_OFFICE;
|
||||||
|
case 'proizvodno skladišni':
|
||||||
|
return CATEGORY_OFFICE;
|
||||||
|
case 'Kancelarije':
|
||||||
|
return CATEGORY_OFFICE;
|
||||||
|
case 'Poslovni prostor':
|
||||||
|
return CATEGORY_OFFICE;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
getStatusIdFromText (status) {
|
||||||
|
if (status === 'Prodato') return STATUS_SOLD;
|
||||||
|
|
||||||
|
return STATUS_NORMAL;
|
||||||
|
}
|
||||||
|
|
||||||
|
async sleep (ms) {
|
||||||
|
return new Promise (resolve => setTimeout (resolve, ms));
|
||||||
|
}
|
||||||
|
|
||||||
|
async indexPages (start, end, maxResults = 1000) {
|
||||||
|
let results = {};
|
||||||
|
for (let i = start; i <= end; i++) {
|
||||||
|
let result = await this.indexPage (i, maxResults);
|
||||||
|
Object.assign (results, result);
|
||||||
|
await this.sleep (5000);
|
||||||
|
}
|
||||||
|
return results;
|
||||||
|
}
|
||||||
|
|
||||||
|
async crawl () {
|
||||||
|
let results = await this.indexPages (
|
||||||
|
this.fromPage,
|
||||||
|
this.toPage,
|
||||||
|
this.maxResults
|
||||||
|
);
|
||||||
|
return results;
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -4,17 +4,14 @@ module.exports = {
|
|||||||
|
|
||||||
output: {
|
output: {
|
||||||
path: __dirname + "/build",
|
path: __dirname + "/build",
|
||||||
filename: "crawler.js",
|
filename: "crawler.js"
|
||||||
devtool: 'source-map'
|
|
||||||
},
|
},
|
||||||
module: {
|
module: {
|
||||||
|
|
||||||
loaders: [{
|
loaders: [{
|
||||||
test: /.js?$/,
|
test: /.js?$/,
|
||||||
loader: 'babel-loader',
|
loader: 'babel-loader',
|
||||||
exclude: /node_modules/,
|
exclude: /node_modules/
|
||||||
presets: ['es2015'],
|
|
||||||
plugins: ['transform-async-to-generator']
|
|
||||||
}, {
|
}, {
|
||||||
test: /.json?$/,
|
test: /.json?$/,
|
||||||
loader: 'json-loader',
|
loader: 'json-loader',
|
||||||
|
|||||||
2395
crawler/yarn.lock
2395
crawler/yarn.lock
File diff suppressed because it is too large
Load Diff
5276
package-lock.json
generated
Normal file
5276
package-lock.json
generated
Normal file
File diff suppressed because it is too large
Load Diff
53
package.json
Normal file
53
package.json
Normal file
@@ -0,0 +1,53 @@
|
|||||||
|
{
|
||||||
|
"name": "kivi",
|
||||||
|
"version": "1.0.0",
|
||||||
|
"description": "",
|
||||||
|
"main": "",
|
||||||
|
"scripts": {
|
||||||
|
"web:dev": "webpack-dev-server --content-base ./web/dist --config ./web/webpack.config --hot --inline --host 0.0.0.0",
|
||||||
|
"test": "echo \"Error: no test specified\" && exit 1",
|
||||||
|
"format": "prettier-standard 'src/**/*.js'"
|
||||||
|
},
|
||||||
|
"author": "",
|
||||||
|
"license": "ISC",
|
||||||
|
"dependencies": {
|
||||||
|
"babel-core": "^6.24.0",
|
||||||
|
"babel": "^6.5.2",
|
||||||
|
"babel-plugin-transform-async-to-generator": "^6.16.0",
|
||||||
|
"babel-loader": "^6.4.1",
|
||||||
|
"babel-polyfill": "^6.23.0",
|
||||||
|
"babel-preset-es2015": "^6.24.0",
|
||||||
|
"babel-preset-es2017": "^6.22.0",
|
||||||
|
"body-parser": "^1.17.1",
|
||||||
|
"cookie-parser": "^1.4.3",
|
||||||
|
"date-fns": "^1.28.2",
|
||||||
|
"express": "^4.15.2",
|
||||||
|
"isomorphic-fetch": "^2.2.1",
|
||||||
|
"moment": "^2.18.1",
|
||||||
|
"mongodb": "^2.2.25",
|
||||||
|
"cheerio": "^0.22.0",
|
||||||
|
"cloudinary": "^1.8.0",
|
||||||
|
"dotenv": "^2.0.0",
|
||||||
|
"fetch": "^1.1.0",
|
||||||
|
"form-data": "^2.1.4",
|
||||||
|
"json-loader": "^0.5.4",
|
||||||
|
"source-map-support": "^0.4.6",
|
||||||
|
"twilio": "^2.11.0",
|
||||||
|
"babel-preset-stage-3": "^6.22.0",
|
||||||
|
"lodash.clonedeep": "^4.5.0",
|
||||||
|
"lodash.merge": "^4.6.0",
|
||||||
|
"react": "^15.3.2",
|
||||||
|
"react-dom": "^15.3.2"
|
||||||
|
},
|
||||||
|
"devDependencies": {
|
||||||
|
"babel-core": "^6.18.2",
|
||||||
|
"babel-loader": "^6.2.7",
|
||||||
|
"babel-preset-react": "^6.16.0",
|
||||||
|
"eslint": "^3.19.0",
|
||||||
|
"prettier": "^0.22.0",
|
||||||
|
"prettier-standard": "^3.0.1",
|
||||||
|
"webpack": "1.13.3",
|
||||||
|
"webpack-dev-server": "^1.16.2",
|
||||||
|
"babel-preset-es2015": "^6.24.1"
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -1,31 +0,0 @@
|
|||||||
{
|
|
||||||
"name": "web",
|
|
||||||
"version": "1.0.0",
|
|
||||||
"description": "",
|
|
||||||
"main": "index.js",
|
|
||||||
"scripts": {
|
|
||||||
"dev": "webpack-dev-server --content-base ./dist --hot --inline --host 0.0.0.0",
|
|
||||||
"test": "echo \"Error: no test specified\" && exit 1",
|
|
||||||
"format": "prettier-standard 'src/**/*.js'"
|
|
||||||
},
|
|
||||||
"author": "",
|
|
||||||
"license": "ISC",
|
|
||||||
"dependencies": {
|
|
||||||
"babel-preset-stage-3": "^6.22.0",
|
|
||||||
"lodash.clonedeep": "^4.5.0",
|
|
||||||
"lodash.merge": "^4.6.0",
|
|
||||||
"react": "^15.3.2",
|
|
||||||
"react-dom": "^15.3.2"
|
|
||||||
},
|
|
||||||
"devDependencies": {
|
|
||||||
"babel-core": "^6.18.2",
|
|
||||||
"babel-loader": "^6.2.7",
|
|
||||||
"babel-preset-es2015": "^6.18.0",
|
|
||||||
"babel-preset-react": "^6.16.0",
|
|
||||||
"eslint": "^3.19.0",
|
|
||||||
"prettier": "^0.22.0",
|
|
||||||
"prettier-standard": "^3.0.1",
|
|
||||||
"webpack": "^1.13.3",
|
|
||||||
"webpack-dev-server": "^1.16.2"
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@@ -5,7 +5,7 @@ import {
|
|||||||
CATEGORY_HOUSE,
|
CATEGORY_HOUSE,
|
||||||
CATEGORY_OFFICE,
|
CATEGORY_OFFICE,
|
||||||
CATEGORY_LAND
|
CATEGORY_LAND
|
||||||
} from "../../../crawler/enums";
|
} from "../../../common/enums";
|
||||||
|
|
||||||
export default class Filters extends React.Component {
|
export default class Filters extends React.Component {
|
||||||
onCloseClick(e) {
|
onCloseClick(e) {
|
||||||
|
|||||||
@@ -1,5 +1,5 @@
|
|||||||
import React from 'react'
|
import React from 'react'
|
||||||
import Gallery from './gallery'
|
import Gallery from './Gallery'
|
||||||
import {formatPrice, formatRooms, formatFloor} from '../lib/helpers'
|
import {formatPrice, formatRooms, formatFloor} from '../lib/helpers'
|
||||||
import ContactModal from './ContactModal';
|
import ContactModal from './ContactModal';
|
||||||
|
|
||||||
|
|||||||
@@ -21,7 +21,8 @@ class Main extends React.Component {
|
|||||||
sort: 'relevance',
|
sort: 'relevance',
|
||||||
filters: {
|
filters: {
|
||||||
rooms: {},
|
rooms: {},
|
||||||
category: {}
|
category: {},
|
||||||
|
status : {}
|
||||||
},
|
},
|
||||||
mobileView: 'MAP',
|
mobileView: 'MAP',
|
||||||
contact: {
|
contact: {
|
||||||
|
|||||||
@@ -1,5 +1,5 @@
|
|||||||
module.exports = {
|
module.exports = {
|
||||||
entry: ["./src/index.js"],
|
entry: [__dirname + "/src/index.js"],
|
||||||
output: {
|
output: {
|
||||||
path: __dirname + "/dist",
|
path: __dirname + "/dist",
|
||||||
filename: "app.bundle.js",
|
filename: "app.bundle.js",
|
||||||
|
|||||||
Reference in New Issue
Block a user