Correctly parse rooms, size, price etc.
This commit is contained in:
@@ -61,7 +61,7 @@
|
|||||||
function _asyncToGenerator(fn) { return function () { var gen = fn.apply(this, arguments); return new Promise(function (resolve, reject) { function step(key, arg) { try { var info = gen[key](arg); var value = info.value; } catch (error) { reject(error); return; } if (info.done) { resolve(value); } else { return Promise.resolve(value).then(function (value) { step("next", value); }, function (err) { step("throw", err); }); } } return step("next"); }); }; }
|
function _asyncToGenerator(fn) { return function () { var gen = fn.apply(this, arguments); return new Promise(function (resolve, reject) { function step(key, arg) { try { var info = gen[key](arg); var value = info.value; } catch (error) { reject(error); return; } if (info.done) { resolve(value); } else { return Promise.resolve(value).then(function (value) { step("next", value); }, function (err) { step("throw", err); }); } } return step("next"); }); }; }
|
||||||
|
|
||||||
var MongoClient = __webpack_require__(3).MongoClient;
|
var MongoClient = __webpack_require__(3).MongoClient;
|
||||||
var url = 'mongodb://localhost:27017/example';
|
var url = 'mongodb://localhost:27017/kivi';
|
||||||
|
|
||||||
__webpack_require__(4);
|
__webpack_require__(4);
|
||||||
|
|
||||||
@@ -70,13 +70,16 @@
|
|||||||
var PORT = process.env.PORT || 3001;
|
var PORT = process.env.PORT || 3001;
|
||||||
var AGENTURA_KEY = process.env.AGENTURA_KEY || '1somethingverysecret';
|
var AGENTURA_KEY = process.env.AGENTURA_KEY || '1somethingverysecret';
|
||||||
|
|
||||||
|
var db = void 0;
|
||||||
|
//Monogo = await MongoClient.connect(url);
|
||||||
|
|
||||||
// TODO:
|
// TODO:
|
||||||
// db.results.ensureIndex({loc:"2d"})
|
// db.results.ensureIndex({loc:"2d"})
|
||||||
//collection.ensureIndex("username",callback)
|
//collection.ensureIndex("username",callback)
|
||||||
|
|
||||||
router.get('/search', function () {
|
router.get('/search', function () {
|
||||||
var _ref = _asyncToGenerator(regeneratorRuntime.mark(function _callee(req, res, next) {
|
var _ref = _asyncToGenerator(regeneratorRuntime.mark(function _callee(req, res, next) {
|
||||||
var bounds, db, properties, query, _bounds$split$map, _bounds$split$map2, lat1, lng1, lat2, lng2, box, all;
|
var bounds, minPrice, maxPrice, minSize, maxSize, rooms, adType, properties, query, _bounds$split$map, _bounds$split$map2, lat1, lng1, lat2, lng2, box, all;
|
||||||
|
|
||||||
return regeneratorRuntime.wrap(function _callee$(_context) {
|
return regeneratorRuntime.wrap(function _callee$(_context) {
|
||||||
while (1) {
|
while (1) {
|
||||||
@@ -84,12 +87,13 @@
|
|||||||
case 0:
|
case 0:
|
||||||
_context.prev = 0;
|
_context.prev = 0;
|
||||||
bounds = req.query.bounds || '';
|
bounds = req.query.bounds || '';
|
||||||
_context.next = 4;
|
minPrice = req.query.minPrice;
|
||||||
return MongoClient.connect(url);
|
maxPrice = req.query.maxPrice;
|
||||||
|
minSize = req.query.minSize;
|
||||||
case 4:
|
maxSize = req.query.maxSize;
|
||||||
db = _context.sent;
|
rooms = req.query.rooms;
|
||||||
properties = db.collection('results');
|
adType = req.query.adType;
|
||||||
|
properties = db.collection('listings');
|
||||||
query = {};
|
query = {};
|
||||||
|
|
||||||
|
|
||||||
@@ -107,35 +111,83 @@
|
|||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
_context.next = 10;
|
if (adType) {
|
||||||
|
query = Object.assign(query, {
|
||||||
|
adType: parseInt(adType)
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
if (minPrice) {
|
||||||
|
query = Object.assign(query, {
|
||||||
|
price: {
|
||||||
|
"$gte": parseFloat(minPrice),
|
||||||
|
"$ne": -1
|
||||||
|
}
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
if (maxPrice) {
|
||||||
|
query = Object.assign(query, {
|
||||||
|
price: {
|
||||||
|
"$lte": parseFloat(maxPrice),
|
||||||
|
"$ne": -1
|
||||||
|
}
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
if (rooms === "4+") {
|
||||||
|
query = Object.assign(query, {
|
||||||
|
rooms: {
|
||||||
|
"$gte": 4
|
||||||
|
}
|
||||||
|
});
|
||||||
|
} else if (rooms) {
|
||||||
|
query = Object.assign(query, {
|
||||||
|
rooms: parseFloat(rooms)
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
if (minSize) {
|
||||||
|
query = Object.assign(query, {
|
||||||
|
size: {
|
||||||
|
"$gte": parseFloat(minSize)
|
||||||
|
}
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
if (maxSize) {
|
||||||
|
query = Object.assign(query, {
|
||||||
|
size: {
|
||||||
|
"$lte": parseFloat(maxSize)
|
||||||
|
}
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
_context.next = 19;
|
||||||
return properties.find(query).toArray();
|
return properties.find(query).toArray();
|
||||||
|
|
||||||
case 10:
|
case 19:
|
||||||
all = _context.sent;
|
all = _context.sent;
|
||||||
|
|
||||||
|
|
||||||
res.json(all);
|
res.json(all);
|
||||||
res.end();
|
res.end();
|
||||||
_context.next = 15;
|
_context.next = 28;
|
||||||
return db.close();
|
|
||||||
|
|
||||||
case 15:
|
|
||||||
_context.next = 21;
|
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case 17:
|
case 24:
|
||||||
_context.prev = 17;
|
_context.prev = 24;
|
||||||
_context.t0 = _context['catch'](0);
|
_context.t0 = _context['catch'](0);
|
||||||
|
|
||||||
console.log('error:', _context.t0);
|
console.log('error:', _context.t0);
|
||||||
next(_context.t0);
|
next(_context.t0);
|
||||||
|
|
||||||
case 21:
|
case 28:
|
||||||
case 'end':
|
case 'end':
|
||||||
return _context.stop();
|
return _context.stop();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}, _callee, undefined, [[0, 17]]);
|
}, _callee, undefined, [[0, 24]]);
|
||||||
}));
|
}));
|
||||||
|
|
||||||
return function (_x, _x2, _x3) {
|
return function (_x, _x2, _x3) {
|
||||||
@@ -155,8 +207,13 @@
|
|||||||
});
|
});
|
||||||
|
|
||||||
app.use('/api', router);
|
app.use('/api', router);
|
||||||
app.listen(PORT, function () {
|
|
||||||
return console.log('Express server running at localhost: ' + PORT);
|
MongoClient.connect(url).then(function (database) {
|
||||||
|
db = database;
|
||||||
|
db.collection('listings').createIndex({ loc: "2d" });
|
||||||
|
app.listen(PORT, function () {
|
||||||
|
return console.log('Express server running at localhost: ' + PORT);
|
||||||
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
/***/ },
|
/***/ },
|
||||||
|
|||||||
@@ -1,7 +1,7 @@
|
|||||||
import express from 'express'
|
import express from 'express'
|
||||||
import bodyParser from 'body-parser';
|
import bodyParser from 'body-parser';
|
||||||
var MongoClient = require('mongodb').MongoClient;
|
var MongoClient = require('mongodb').MongoClient;
|
||||||
var url = 'mongodb://localhost:27017/example';
|
var url = 'mongodb://localhost:27017/kivi';
|
||||||
|
|
||||||
require("babel-polyfill");
|
require("babel-polyfill");
|
||||||
|
|
||||||
@@ -10,6 +10,9 @@ const router = express.Router({mergeParams: true})
|
|||||||
const PORT = process.env.PORT || 3001;
|
const PORT = process.env.PORT || 3001;
|
||||||
const AGENTURA_KEY = process.env.AGENTURA_KEY || '1somethingverysecret';
|
const AGENTURA_KEY = process.env.AGENTURA_KEY || '1somethingverysecret';
|
||||||
|
|
||||||
|
let db;
|
||||||
|
//Monogo = await MongoClient.connect(url);
|
||||||
|
|
||||||
// TODO:
|
// TODO:
|
||||||
// db.results.ensureIndex({loc:"2d"})
|
// db.results.ensureIndex({loc:"2d"})
|
||||||
//collection.ensureIndex("username",callback)
|
//collection.ensureIndex("username",callback)
|
||||||
@@ -17,8 +20,13 @@ const AGENTURA_KEY = process.env.AGENTURA_KEY || '1somethingverysecret';
|
|||||||
router.get('/search', async (req, res, next) => {
|
router.get('/search', async (req, res, next) => {
|
||||||
try {
|
try {
|
||||||
const bounds = req.query.bounds || '';
|
const bounds = req.query.bounds || '';
|
||||||
const db = await MongoClient.connect(url);
|
const minPrice = req.query.minPrice;
|
||||||
const properties = db.collection('results');
|
const maxPrice = req.query.maxPrice;
|
||||||
|
const minSize = req.query.minSize;
|
||||||
|
const maxSize = req.query.maxSize;
|
||||||
|
const rooms = req.query.rooms;
|
||||||
|
const adType = req.query.adType;
|
||||||
|
const properties = db.collection('listings');
|
||||||
let query = {};
|
let query = {};
|
||||||
|
|
||||||
if (bounds) {
|
if (bounds) {
|
||||||
@@ -34,11 +42,62 @@ router.get('/search', async (req, res, next) => {
|
|||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (adType) {
|
||||||
|
query = Object.assign(query, {
|
||||||
|
adType: parseInt(adType)
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
if (minPrice) {
|
||||||
|
query = Object.assign(query, {
|
||||||
|
price: {
|
||||||
|
"$gte": parseFloat(minPrice),
|
||||||
|
"$ne": -1
|
||||||
|
}
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
if (maxPrice) {
|
||||||
|
query = Object.assign(query, {
|
||||||
|
price: {
|
||||||
|
"$lte": parseFloat(maxPrice),
|
||||||
|
"$ne": -1
|
||||||
|
}
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
if (rooms === "4+") {
|
||||||
|
query = Object.assign(query, {
|
||||||
|
rooms: {
|
||||||
|
"$gte": 4
|
||||||
|
}
|
||||||
|
})
|
||||||
|
} else if (rooms) {
|
||||||
|
query = Object.assign(query, {
|
||||||
|
rooms: parseFloat(rooms)
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
if (minSize) {
|
||||||
|
query = Object.assign(query, {
|
||||||
|
size: {
|
||||||
|
"$gte": parseFloat(minSize)
|
||||||
|
}
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
if (maxSize) {
|
||||||
|
query = Object.assign(query, {
|
||||||
|
size: {
|
||||||
|
"$lte": parseFloat(maxSize)
|
||||||
|
}
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
const all = await properties.find(query).toArray();
|
const all = await properties.find(query).toArray();
|
||||||
|
|
||||||
res.json(all);
|
res.json(all);
|
||||||
res.end();
|
res.end();
|
||||||
await db.close();
|
|
||||||
} catch (e) {
|
} catch (e) {
|
||||||
console.log('error:', e);
|
console.log('error:', e);
|
||||||
next(e);
|
next(e);
|
||||||
@@ -58,5 +117,10 @@ app.use(function(req, res, next) {
|
|||||||
});
|
});
|
||||||
|
|
||||||
app.use('/api', router);
|
app.use('/api', router);
|
||||||
app.listen(PORT, () => console.log('Express server running at localhost: ' + PORT));
|
|
||||||
|
MongoClient.connect(url).then((database) => {
|
||||||
|
db = database;
|
||||||
|
db.collection('listings').createIndex({loc: "2d"});
|
||||||
|
app.listen(PORT, () => console.log('Express server running at localhost: ' + PORT));
|
||||||
|
});
|
||||||
|
|
||||||
|
|||||||
5
crawler/enums.js
Normal file
5
crawler/enums.js
Normal file
@@ -0,0 +1,5 @@
|
|||||||
|
export const AD_TYPE_SALE = 1;
|
||||||
|
export const AD_TYPE_RENT = 2;
|
||||||
|
|
||||||
|
export const IGNORED_USERNAMES = ['rental']
|
||||||
|
|
||||||
@@ -17,7 +17,7 @@ export default class MongoSaver {
|
|||||||
} else {
|
} else {
|
||||||
console.log('Connection established to', this.url);
|
console.log('Connection established to', this.url);
|
||||||
saver.db = db;
|
saver.db = db;
|
||||||
saver.collection = db.collection('results');
|
saver.collection = db.collection('listings');
|
||||||
saver.ready = true;
|
saver.ready = true;
|
||||||
resolve();
|
resolve();
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -3,6 +3,7 @@
|
|||||||
let fetch = require('node-fetch');
|
let fetch = require('node-fetch');
|
||||||
let cheerio = require('cheerio');
|
let cheerio = require('cheerio');
|
||||||
let fs = require('fs');
|
let fs = require('fs');
|
||||||
|
import {AD_TYPE_SALE, IGNORED_USERNAMES} from '../enums';
|
||||||
|
|
||||||
export default class OlxCrawler {
|
export default class OlxCrawler {
|
||||||
|
|
||||||
@@ -18,6 +19,12 @@ export default class OlxCrawler {
|
|||||||
const body = await res.text();
|
const body = await res.text();
|
||||||
const $ = cheerio.load(body);
|
const $ = cheerio.load(body);
|
||||||
|
|
||||||
|
const username = $('#lg > div.desno2.profil > div:nth-child(2) > div.vrsta1.vrsta_desno > a > div.username > span').text();
|
||||||
|
|
||||||
|
if (IGNORED_USERNAMES.includes((username || '').toLowerCase())) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
const title = $('#naslovartikla').text();
|
const title = $('#naslovartikla').text();
|
||||||
const price = $('#pc > p:nth-child(2)').text();
|
const price = $('#pc > p:nth-child(2)').text();
|
||||||
const size = $('#dodatnapolja1 > div:nth-child(1) > div.df2').text();
|
const size = $('#dodatnapolja1 > div:nth-child(1) > div.df2').text();
|
||||||
@@ -30,6 +37,7 @@ export default class OlxCrawler {
|
|||||||
const olxId = $('#artikal_glavni_div > div.artikal_lijevo > div:nth-child(15) > div:nth-child(4) > div.df2').text();
|
const olxId = $('#artikal_glavni_div > div.artikal_lijevo > div:nth-child(15) > div:nth-child(4) > div.df2').text();
|
||||||
|
|
||||||
const descriptions = $('.artikal_detaljniopis_tekst');
|
const descriptions = $('.artikal_detaljniopis_tekst');
|
||||||
|
const floor = $('#dodatnapolja1').find(':contains(Sprat)').last().nextAll().text();
|
||||||
const latLngRe = /LatLng\(([0-9]+\.[0-9]+)\,\s+([0-9]+\.[0-9]+)\)/g;
|
const latLngRe = /LatLng\(([0-9]+\.[0-9]+)\,\s+([0-9]+\.[0-9]+)\)/g;
|
||||||
const imgRe = /href":("[^"]*")/g;
|
const imgRe = /href":("[^"]*")/g;
|
||||||
const matches = latLngRe.exec(body);
|
const matches = latLngRe.exec(body);
|
||||||
@@ -39,6 +47,10 @@ export default class OlxCrawler {
|
|||||||
const images = [];
|
const images = [];
|
||||||
const imgMatches = body.match(imgRe);
|
const imgMatches = body.match(imgRe);
|
||||||
|
|
||||||
|
const parseRooms = (rooms) => parseInt([...rooms].filter(c => !isNaN(c)).filter(c => c.trim()).join())
|
||||||
|
const parsePrice = (price) => parseFloat(price.replace(".", ""))
|
||||||
|
|
||||||
|
|
||||||
for (let i = 0; imgMatches && i < imgMatches.length; i++) {
|
for (let i = 0; imgMatches && i < imgMatches.length; i++) {
|
||||||
let img = imgMatches[i].replace("href\":", "")
|
let img = imgMatches[i].replace("href\":", "")
|
||||||
img = img.replace("\"", "");
|
img = img.replace("\"", "");
|
||||||
@@ -54,14 +66,14 @@ export default class OlxCrawler {
|
|||||||
const data = {
|
const data = {
|
||||||
url,
|
url,
|
||||||
title,
|
title,
|
||||||
price,
|
price: parsePrice(price) || -1,
|
||||||
size,
|
size: parseFloat(size),
|
||||||
rooms,
|
rooms: parseRooms(rooms),
|
||||||
|
floor: parseInt(floor),
|
||||||
address,
|
address,
|
||||||
location,
|
location,
|
||||||
adType,
|
adType: AD_TYPE_SALE,
|
||||||
time,
|
time,
|
||||||
olxId,
|
|
||||||
shortDescription: descriptions.first().text(),
|
shortDescription: descriptions.first().text(),
|
||||||
longDescription: descriptions.last().text(),
|
longDescription: descriptions.last().text(),
|
||||||
lat,
|
lat,
|
||||||
@@ -81,7 +93,7 @@ export default class OlxCrawler {
|
|||||||
async indexPage(pageNr, maxResults = 1000) {
|
async indexPage(pageNr, maxResults = 1000) {
|
||||||
try {
|
try {
|
||||||
console.log('Starting to index page: ' + pageNr);
|
console.log('Starting to index page: ' + pageNr);
|
||||||
const url = `http://www.olx.ba/pretraga?vrsta=samoizdavanje&sort_order=desc&kategorija=23&sort_po=datum&kanton=9&stranica=${pageNr}`;
|
const url = `http://www.olx.ba/pretraga?vrsta=samoprodaja&sort_order=desc&kategorija=23&sort_po=datum&kanton=9&stranica=${pageNr}`;
|
||||||
|
|
||||||
const res = await fetch(url);
|
const res = await fetch(url);
|
||||||
const body = await res.text();
|
const body = await res.text();
|
||||||
|
|||||||
@@ -48,7 +48,7 @@ export default class ListingDetails extends React.Component {
|
|||||||
imageIndex={this.props.imageIndex} />
|
imageIndex={this.props.imageIndex} />
|
||||||
<div className="ld-price-address-box">
|
<div className="ld-price-address-box">
|
||||||
<div className="ld-price">
|
<div className="ld-price">
|
||||||
{listing.price}
|
{listing.price.toLocaleString('bs')} KM
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
<div className="ld-address">
|
<div className="ld-address">
|
||||||
@@ -60,7 +60,7 @@ export default class ListingDetails extends React.Component {
|
|||||||
<div className="ld-features">
|
<div className="ld-features">
|
||||||
<div className="ld-feature-box">
|
<div className="ld-feature-box">
|
||||||
<i className="fa fa-bed"></i>
|
<i className="fa fa-bed"></i>
|
||||||
{listing.rooms}
|
{listing.rooms} sobe
|
||||||
</div>
|
</div>
|
||||||
<div className="ld-feature-box">
|
<div className="ld-feature-box">
|
||||||
<i className="fa fa-home"></i>
|
<i className="fa fa-home"></i>
|
||||||
@@ -68,11 +68,11 @@ export default class ListingDetails extends React.Component {
|
|||||||
</div>
|
</div>
|
||||||
<div className="ld-feature-box">
|
<div className="ld-feature-box">
|
||||||
<i className="fa fa-home"></i>
|
<i className="fa fa-home"></i>
|
||||||
1. sprat
|
{listing.floor}. sprat
|
||||||
</div>
|
</div>
|
||||||
<div className="ld-feature-box">
|
<div className="ld-feature-box">
|
||||||
<i className="fa fa-home"></i>
|
<i className="fa fa-home"></i>
|
||||||
Balkon
|
--
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
<div className="ld-check-availability">
|
<div className="ld-check-availability">
|
||||||
|
|||||||
@@ -24,8 +24,8 @@ export default class Listings extends React.Component {
|
|||||||
<img src={images[0]} alt=""></img>
|
<img src={images[0]} alt=""></img>
|
||||||
</div>
|
</div>
|
||||||
<div className="pli-details">
|
<div className="pli-details">
|
||||||
<div className="price">{l.price}</div>
|
<div className="price">{l.price.toLocaleString('bs')} KM</div>
|
||||||
<div className="description">{l.rooms ? `${l.rooms}, `: null}{l.size ? `${l.size}m2`: null}</div>
|
<div className="description">{l.rooms ? `${l.rooms} sobe, `: null}{l.size ? `${l.size}m2`: null}</div>
|
||||||
<div className="address">
|
<div className="address">
|
||||||
<div className="street">
|
<div className="street">
|
||||||
{l.address}
|
{l.address}
|
||||||
|
|||||||
Reference in New Issue
Block a user