Code refactoring, fixed bug with price parsing:

This commit is contained in:
Nedim Uka
2019-06-24 14:20:31 +02:00
parent 6eba5c2a97
commit 2cf6f6f1ff
3 changed files with 37 additions and 26 deletions

View File

@@ -1,6 +1,6 @@
const fetch = require('node-fetch');
const cheerio = require('cheerio');
const { allRERequest, findPointInsideBoundingBox } = require('../url');
const { allRERequest, findPointInsideBoundingBox } = require('../db/dbHelper');
const { getRealEstateTypeEnum } = require('../enums');
const { getRegion, getMunicipality } = require('../codes')
const Promise = require("bluebird");
@@ -92,9 +92,9 @@ module.exports = class OlxCrawler {
// category: category,
url,
title,
price: isNaN(parsedPrice) ? price : parsedPrice,
price: isNaN(parsedPrice) ? 0 : parsedPrice,
size: parseFloat(size),
gardenSize: parseFloat(gardenSize),
gardenSize: isNaN(parseFloat(gardenSize)) ? parseFloat(gardenSize) : 0,
address,
region,
municipality,
@@ -118,6 +118,7 @@ module.exports = class OlxCrawler {
async indexPage(olxUrl, maxResults = 1000) {
try {
//TODO fix paging
// console.log('Starting to index page: ' + pageNr);
// const url = `http://www.olx.ba/pretraga?vrsta=samoprodaja&sort_order=desc&kategorija=23&sort_po=datum&kanton=9&stranica=${pageNr}`;
@@ -142,7 +143,6 @@ module.exports = class OlxCrawler {
if (singleData) {
results.push(singleData);
}
// await this.sleep(500);
}
return results;
@@ -152,30 +152,35 @@ module.exports = class OlxCrawler {
}
getCategoryId (category) {
if (category === 'Stanovi') {
return 'stan';
} else if (category === 'Vikendice') {
return 'vikendica';
} else if (category === 'Kuće') {
return 'kuca';
}
switch(category) {
case 'Stanovi':
return 'stan';
case 'Vikendice':
return 'vikendica'
case 'Kuće':
return 'kuca';
default:
return '';
}
}
async indexPages(urls, start, end, maxResults = 1000) {
//TODO fix paging
// let results = {};
// for (let i = start; i <= end; i++) {
// let result = await this.indexPage(i, maxResults);
// Object.assign(results, result)
// await this.sleep(5000);
// }
// return results;
let results = [];
for (let url of urls) {
let result = await this.indexPage(url, maxResults);
// Object.assign(results, result)
results.push(result);
// await this.sleep(5000);
}
return results;
}
@@ -188,12 +193,12 @@ module.exports = class OlxCrawler {
let results = await this.indexPages(urls, this.fromPage, this.toPage, this.maxResults);
for (const result of results) {
for (const re1 of result) {
if (re1.lat !== undefined && re1.lat !== null && re1.lat !== "") {
const pointInsideBoundingBox = await findPointInsideBoundingBox([re1.lng, re1.lat]);
for (const finalResult of result) {
if (finalResult.lat !== undefined && finalResult.lat !== null && finalResult.lat !== "") {
const pointInsideBoundingBox = await findPointInsideBoundingBox([finalResult.lng, finalResult.lat]);
if (pointInsideBoundingBox[0].length !== 0) {
filteredResults.push(re1);
filteredResults.push(finalResult);
}
}
}
@@ -216,7 +221,7 @@ module.exports = class OlxCrawler {
const priceMax = "do=" + request.priceMax;
const olxUrl = {
url: "https://www.olx.ba/pretraga?" + realsestateType + "&id=2&stanje=0&vrstapregleda=tabela&sort_order=desc&" + region + "&" + municipality + "&" + priceMin + "&" + priceMax + "&vrsta=samoprodaja&" + sizeMin + "&" + sizeMax,
url: `https://www.olx.ba/pretraga?${realsestateType}&id=2&stanje=0&vrstapregleda=tabela&sort_order=desc&${region}&${municipality}&${priceMin}&${priceMax}&vrsta=samoprodaja&${sizeMin}&${sizeMax}`,
email: request.email
}
console.log(olxUrl.url);

View File

@@ -1,10 +1,15 @@
const db = require('../../models/index');
// const db = require('../../models/index');
// TODO Fetch only subscribed realestate requests
const allRERequest = async () => {
return await db.RealEstateRequest.findAll();
}
const findPointInsideBoundingBox = async (latLng) => {
return await db.sequelize.query("SELECT * FROM \"RealEstateRequests\" WHERE ST_Contains(\"RealEstateRequests\".bounding_box, ST_GEOMFROMTEXT(\'POINT (" + latLng[0] + " " + latLng[1]+ ")\'))");
}
// const bulkInsert = async (reuslts) => {
// db.MarketAlert.bulkCreate({
// })
// }
module.exports = {
allRERequest,
findPointInsideBoundingBox
};

View File

@@ -38,6 +38,7 @@ async function crawlAll() {
})
}
try {
console.log(marketAlerts);
const filteredMarketAlerts = marketAlerts.filter((elem) => !marketAlertsFromDb.find(({ url }) => elem.url === url));
await db.MarketAlert.bulkCreate(filteredMarketAlerts);
process.exit()