Correctly parse rooms, size, price etc.
This commit is contained in:
@@ -61,7 +61,7 @@
|
||||
function _asyncToGenerator(fn) { return function () { var gen = fn.apply(this, arguments); return new Promise(function (resolve, reject) { function step(key, arg) { try { var info = gen[key](arg); var value = info.value; } catch (error) { reject(error); return; } if (info.done) { resolve(value); } else { return Promise.resolve(value).then(function (value) { step("next", value); }, function (err) { step("throw", err); }); } } return step("next"); }); }; }
|
||||
|
||||
var MongoClient = __webpack_require__(3).MongoClient;
|
||||
var url = 'mongodb://localhost:27017/example';
|
||||
var url = 'mongodb://localhost:27017/kivi';
|
||||
|
||||
__webpack_require__(4);
|
||||
|
||||
@@ -70,13 +70,16 @@
|
||||
var PORT = process.env.PORT || 3001;
|
||||
var AGENTURA_KEY = process.env.AGENTURA_KEY || '1somethingverysecret';
|
||||
|
||||
var db = void 0;
|
||||
//Monogo = await MongoClient.connect(url);
|
||||
|
||||
// TODO:
|
||||
// db.results.ensureIndex({loc:"2d"})
|
||||
//collection.ensureIndex("username",callback)
|
||||
|
||||
router.get('/search', function () {
|
||||
var _ref = _asyncToGenerator(regeneratorRuntime.mark(function _callee(req, res, next) {
|
||||
var bounds, db, properties, query, _bounds$split$map, _bounds$split$map2, lat1, lng1, lat2, lng2, box, all;
|
||||
var bounds, minPrice, maxPrice, minSize, maxSize, rooms, adType, properties, query, _bounds$split$map, _bounds$split$map2, lat1, lng1, lat2, lng2, box, all;
|
||||
|
||||
return regeneratorRuntime.wrap(function _callee$(_context) {
|
||||
while (1) {
|
||||
@@ -84,12 +87,13 @@
|
||||
case 0:
|
||||
_context.prev = 0;
|
||||
bounds = req.query.bounds || '';
|
||||
_context.next = 4;
|
||||
return MongoClient.connect(url);
|
||||
|
||||
case 4:
|
||||
db = _context.sent;
|
||||
properties = db.collection('results');
|
||||
minPrice = req.query.minPrice;
|
||||
maxPrice = req.query.maxPrice;
|
||||
minSize = req.query.minSize;
|
||||
maxSize = req.query.maxSize;
|
||||
rooms = req.query.rooms;
|
||||
adType = req.query.adType;
|
||||
properties = db.collection('listings');
|
||||
query = {};
|
||||
|
||||
|
||||
@@ -107,35 +111,83 @@
|
||||
});
|
||||
}
|
||||
|
||||
_context.next = 10;
|
||||
if (adType) {
|
||||
query = Object.assign(query, {
|
||||
adType: parseInt(adType)
|
||||
});
|
||||
}
|
||||
|
||||
if (minPrice) {
|
||||
query = Object.assign(query, {
|
||||
price: {
|
||||
"$gte": parseFloat(minPrice),
|
||||
"$ne": -1
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
if (maxPrice) {
|
||||
query = Object.assign(query, {
|
||||
price: {
|
||||
"$lte": parseFloat(maxPrice),
|
||||
"$ne": -1
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
if (rooms === "4+") {
|
||||
query = Object.assign(query, {
|
||||
rooms: {
|
||||
"$gte": 4
|
||||
}
|
||||
});
|
||||
} else if (rooms) {
|
||||
query = Object.assign(query, {
|
||||
rooms: parseFloat(rooms)
|
||||
});
|
||||
}
|
||||
|
||||
if (minSize) {
|
||||
query = Object.assign(query, {
|
||||
size: {
|
||||
"$gte": parseFloat(minSize)
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
if (maxSize) {
|
||||
query = Object.assign(query, {
|
||||
size: {
|
||||
"$lte": parseFloat(maxSize)
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
_context.next = 19;
|
||||
return properties.find(query).toArray();
|
||||
|
||||
case 10:
|
||||
case 19:
|
||||
all = _context.sent;
|
||||
|
||||
|
||||
res.json(all);
|
||||
res.end();
|
||||
_context.next = 15;
|
||||
return db.close();
|
||||
|
||||
case 15:
|
||||
_context.next = 21;
|
||||
_context.next = 28;
|
||||
break;
|
||||
|
||||
case 17:
|
||||
_context.prev = 17;
|
||||
case 24:
|
||||
_context.prev = 24;
|
||||
_context.t0 = _context['catch'](0);
|
||||
|
||||
console.log('error:', _context.t0);
|
||||
next(_context.t0);
|
||||
|
||||
case 21:
|
||||
case 28:
|
||||
case 'end':
|
||||
return _context.stop();
|
||||
}
|
||||
}
|
||||
}, _callee, undefined, [[0, 17]]);
|
||||
}, _callee, undefined, [[0, 24]]);
|
||||
}));
|
||||
|
||||
return function (_x, _x2, _x3) {
|
||||
@@ -155,8 +207,13 @@
|
||||
});
|
||||
|
||||
app.use('/api', router);
|
||||
app.listen(PORT, function () {
|
||||
return console.log('Express server running at localhost: ' + PORT);
|
||||
|
||||
MongoClient.connect(url).then(function (database) {
|
||||
db = database;
|
||||
db.collection('listings').createIndex({ loc: "2d" });
|
||||
app.listen(PORT, function () {
|
||||
return console.log('Express server running at localhost: ' + PORT);
|
||||
});
|
||||
});
|
||||
|
||||
/***/ },
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
import express from 'express'
|
||||
import bodyParser from 'body-parser';
|
||||
var MongoClient = require('mongodb').MongoClient;
|
||||
var url = 'mongodb://localhost:27017/example';
|
||||
var url = 'mongodb://localhost:27017/kivi';
|
||||
|
||||
require("babel-polyfill");
|
||||
|
||||
@@ -10,6 +10,9 @@ const router = express.Router({mergeParams: true})
|
||||
const PORT = process.env.PORT || 3001;
|
||||
const AGENTURA_KEY = process.env.AGENTURA_KEY || '1somethingverysecret';
|
||||
|
||||
let db;
|
||||
//Monogo = await MongoClient.connect(url);
|
||||
|
||||
// TODO:
|
||||
// db.results.ensureIndex({loc:"2d"})
|
||||
//collection.ensureIndex("username",callback)
|
||||
@@ -17,8 +20,13 @@ const AGENTURA_KEY = process.env.AGENTURA_KEY || '1somethingverysecret';
|
||||
router.get('/search', async (req, res, next) => {
|
||||
try {
|
||||
const bounds = req.query.bounds || '';
|
||||
const db = await MongoClient.connect(url);
|
||||
const properties = db.collection('results');
|
||||
const minPrice = req.query.minPrice;
|
||||
const maxPrice = req.query.maxPrice;
|
||||
const minSize = req.query.minSize;
|
||||
const maxSize = req.query.maxSize;
|
||||
const rooms = req.query.rooms;
|
||||
const adType = req.query.adType;
|
||||
const properties = db.collection('listings');
|
||||
let query = {};
|
||||
|
||||
if (bounds) {
|
||||
@@ -34,11 +42,62 @@ router.get('/search', async (req, res, next) => {
|
||||
});
|
||||
}
|
||||
|
||||
if (adType) {
|
||||
query = Object.assign(query, {
|
||||
adType: parseInt(adType)
|
||||
});
|
||||
}
|
||||
|
||||
if (minPrice) {
|
||||
query = Object.assign(query, {
|
||||
price: {
|
||||
"$gte": parseFloat(minPrice),
|
||||
"$ne": -1
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
if (maxPrice) {
|
||||
query = Object.assign(query, {
|
||||
price: {
|
||||
"$lte": parseFloat(maxPrice),
|
||||
"$ne": -1
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
if (rooms === "4+") {
|
||||
query = Object.assign(query, {
|
||||
rooms: {
|
||||
"$gte": 4
|
||||
}
|
||||
})
|
||||
} else if (rooms) {
|
||||
query = Object.assign(query, {
|
||||
rooms: parseFloat(rooms)
|
||||
});
|
||||
}
|
||||
|
||||
if (minSize) {
|
||||
query = Object.assign(query, {
|
||||
size: {
|
||||
"$gte": parseFloat(minSize)
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
if (maxSize) {
|
||||
query = Object.assign(query, {
|
||||
size: {
|
||||
"$lte": parseFloat(maxSize)
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
const all = await properties.find(query).toArray();
|
||||
|
||||
res.json(all);
|
||||
res.end();
|
||||
await db.close();
|
||||
} catch (e) {
|
||||
console.log('error:', e);
|
||||
next(e);
|
||||
@@ -58,5 +117,10 @@ app.use(function(req, res, next) {
|
||||
});
|
||||
|
||||
app.use('/api', router);
|
||||
app.listen(PORT, () => console.log('Express server running at localhost: ' + PORT));
|
||||
|
||||
MongoClient.connect(url).then((database) => {
|
||||
db = database;
|
||||
db.collection('listings').createIndex({loc: "2d"});
|
||||
app.listen(PORT, () => console.log('Express server running at localhost: ' + PORT));
|
||||
});
|
||||
|
||||
|
||||
5
crawler/enums.js
Normal file
5
crawler/enums.js
Normal file
@@ -0,0 +1,5 @@
|
||||
export const AD_TYPE_SALE = 1;
|
||||
export const AD_TYPE_RENT = 2;
|
||||
|
||||
export const IGNORED_USERNAMES = ['rental']
|
||||
|
||||
@@ -17,7 +17,7 @@ export default class MongoSaver {
|
||||
} else {
|
||||
console.log('Connection established to', this.url);
|
||||
saver.db = db;
|
||||
saver.collection = db.collection('results');
|
||||
saver.collection = db.collection('listings');
|
||||
saver.ready = true;
|
||||
resolve();
|
||||
}
|
||||
|
||||
@@ -3,6 +3,7 @@
|
||||
let fetch = require('node-fetch');
|
||||
let cheerio = require('cheerio');
|
||||
let fs = require('fs');
|
||||
import {AD_TYPE_SALE, IGNORED_USERNAMES} from '../enums';
|
||||
|
||||
export default class OlxCrawler {
|
||||
|
||||
@@ -18,6 +19,12 @@ export default class OlxCrawler {
|
||||
const body = await res.text();
|
||||
const $ = cheerio.load(body);
|
||||
|
||||
const username = $('#lg > div.desno2.profil > div:nth-child(2) > div.vrsta1.vrsta_desno > a > div.username > span').text();
|
||||
|
||||
if (IGNORED_USERNAMES.includes((username || '').toLowerCase())) {
|
||||
return null;
|
||||
}
|
||||
|
||||
const title = $('#naslovartikla').text();
|
||||
const price = $('#pc > p:nth-child(2)').text();
|
||||
const size = $('#dodatnapolja1 > div:nth-child(1) > div.df2').text();
|
||||
@@ -30,6 +37,7 @@ export default class OlxCrawler {
|
||||
const olxId = $('#artikal_glavni_div > div.artikal_lijevo > div:nth-child(15) > div:nth-child(4) > div.df2').text();
|
||||
|
||||
const descriptions = $('.artikal_detaljniopis_tekst');
|
||||
const floor = $('#dodatnapolja1').find(':contains(Sprat)').last().nextAll().text();
|
||||
const latLngRe = /LatLng\(([0-9]+\.[0-9]+)\,\s+([0-9]+\.[0-9]+)\)/g;
|
||||
const imgRe = /href":("[^"]*")/g;
|
||||
const matches = latLngRe.exec(body);
|
||||
@@ -39,6 +47,10 @@ export default class OlxCrawler {
|
||||
const images = [];
|
||||
const imgMatches = body.match(imgRe);
|
||||
|
||||
const parseRooms = (rooms) => parseInt([...rooms].filter(c => !isNaN(c)).filter(c => c.trim()).join())
|
||||
const parsePrice = (price) => parseFloat(price.replace(".", ""))
|
||||
|
||||
|
||||
for (let i = 0; imgMatches && i < imgMatches.length; i++) {
|
||||
let img = imgMatches[i].replace("href\":", "")
|
||||
img = img.replace("\"", "");
|
||||
@@ -54,14 +66,14 @@ export default class OlxCrawler {
|
||||
const data = {
|
||||
url,
|
||||
title,
|
||||
price,
|
||||
size,
|
||||
rooms,
|
||||
price: parsePrice(price) || -1,
|
||||
size: parseFloat(size),
|
||||
rooms: parseRooms(rooms),
|
||||
floor: parseInt(floor),
|
||||
address,
|
||||
location,
|
||||
adType,
|
||||
adType: AD_TYPE_SALE,
|
||||
time,
|
||||
olxId,
|
||||
shortDescription: descriptions.first().text(),
|
||||
longDescription: descriptions.last().text(),
|
||||
lat,
|
||||
@@ -81,7 +93,7 @@ export default class OlxCrawler {
|
||||
async indexPage(pageNr, maxResults = 1000) {
|
||||
try {
|
||||
console.log('Starting to index page: ' + pageNr);
|
||||
const url = `http://www.olx.ba/pretraga?vrsta=samoizdavanje&sort_order=desc&kategorija=23&sort_po=datum&kanton=9&stranica=${pageNr}`;
|
||||
const url = `http://www.olx.ba/pretraga?vrsta=samoprodaja&sort_order=desc&kategorija=23&sort_po=datum&kanton=9&stranica=${pageNr}`;
|
||||
|
||||
const res = await fetch(url);
|
||||
const body = await res.text();
|
||||
|
||||
@@ -48,7 +48,7 @@ export default class ListingDetails extends React.Component {
|
||||
imageIndex={this.props.imageIndex} />
|
||||
<div className="ld-price-address-box">
|
||||
<div className="ld-price">
|
||||
{listing.price}
|
||||
{listing.price.toLocaleString('bs')} KM
|
||||
</div>
|
||||
|
||||
<div className="ld-address">
|
||||
@@ -60,7 +60,7 @@ export default class ListingDetails extends React.Component {
|
||||
<div className="ld-features">
|
||||
<div className="ld-feature-box">
|
||||
<i className="fa fa-bed"></i>
|
||||
{listing.rooms}
|
||||
{listing.rooms} sobe
|
||||
</div>
|
||||
<div className="ld-feature-box">
|
||||
<i className="fa fa-home"></i>
|
||||
@@ -68,11 +68,11 @@ export default class ListingDetails extends React.Component {
|
||||
</div>
|
||||
<div className="ld-feature-box">
|
||||
<i className="fa fa-home"></i>
|
||||
1. sprat
|
||||
{listing.floor}. sprat
|
||||
</div>
|
||||
<div className="ld-feature-box">
|
||||
<i className="fa fa-home"></i>
|
||||
Balkon
|
||||
--
|
||||
</div>
|
||||
</div>
|
||||
<div className="ld-check-availability">
|
||||
|
||||
@@ -24,8 +24,8 @@ export default class Listings extends React.Component {
|
||||
<img src={images[0]} alt=""></img>
|
||||
</div>
|
||||
<div className="pli-details">
|
||||
<div className="price">{l.price}</div>
|
||||
<div className="description">{l.rooms ? `${l.rooms}, `: null}{l.size ? `${l.size}m2`: null}</div>
|
||||
<div className="price">{l.price.toLocaleString('bs')} KM</div>
|
||||
<div className="description">{l.rooms ? `${l.rooms} sobe, `: null}{l.size ? `${l.size}m2`: null}</div>
|
||||
<div className="address">
|
||||
<div className="street">
|
||||
{l.address}
|
||||
|
||||
Reference in New Issue
Block a user