Code refactoring, fixed bug with price parsing:
This commit is contained in:
@@ -1,6 +1,6 @@
|
||||
const fetch = require('node-fetch');
|
||||
const cheerio = require('cheerio');
|
||||
const { allRERequest, findPointInsideBoundingBox } = require('../url');
|
||||
const { allRERequest, findPointInsideBoundingBox } = require('../db/dbHelper');
|
||||
const { getRealEstateTypeEnum } = require('../enums');
|
||||
const { getRegion, getMunicipality } = require('../codes')
|
||||
const Promise = require("bluebird");
|
||||
@@ -92,9 +92,9 @@ module.exports = class OlxCrawler {
|
||||
// category: category,
|
||||
url,
|
||||
title,
|
||||
price: isNaN(parsedPrice) ? price : parsedPrice,
|
||||
price: isNaN(parsedPrice) ? 0 : parsedPrice,
|
||||
size: parseFloat(size),
|
||||
gardenSize: parseFloat(gardenSize),
|
||||
gardenSize: isNaN(parseFloat(gardenSize)) ? parseFloat(gardenSize) : 0,
|
||||
address,
|
||||
region,
|
||||
municipality,
|
||||
@@ -118,6 +118,7 @@ module.exports = class OlxCrawler {
|
||||
|
||||
async indexPage(olxUrl, maxResults = 1000) {
|
||||
try {
|
||||
//TODO fix paging
|
||||
// console.log('Starting to index page: ' + pageNr);
|
||||
// const url = `http://www.olx.ba/pretraga?vrsta=samoprodaja&sort_order=desc&kategorija=23&sort_po=datum&kanton=9&stranica=${pageNr}`;
|
||||
|
||||
@@ -142,7 +143,6 @@ module.exports = class OlxCrawler {
|
||||
if (singleData) {
|
||||
results.push(singleData);
|
||||
}
|
||||
// await this.sleep(500);
|
||||
}
|
||||
|
||||
return results;
|
||||
@@ -152,30 +152,35 @@ module.exports = class OlxCrawler {
|
||||
}
|
||||
|
||||
getCategoryId (category) {
|
||||
if (category === 'Stanovi') {
|
||||
return 'stan';
|
||||
} else if (category === 'Vikendice') {
|
||||
return 'vikendica';
|
||||
} else if (category === 'Kuće') {
|
||||
return 'kuca';
|
||||
}
|
||||
|
||||
switch(category) {
|
||||
case 'Stanovi':
|
||||
return 'stan';
|
||||
|
||||
case 'Vikendice':
|
||||
return 'vikendica'
|
||||
|
||||
case 'Kuće':
|
||||
return 'kuca';
|
||||
|
||||
default:
|
||||
return '';
|
||||
}
|
||||
}
|
||||
|
||||
async indexPages(urls, start, end, maxResults = 1000) {
|
||||
//TODO fix paging
|
||||
// let results = {};
|
||||
// for (let i = start; i <= end; i++) {
|
||||
// let result = await this.indexPage(i, maxResults);
|
||||
// Object.assign(results, result)
|
||||
// await this.sleep(5000);
|
||||
// }
|
||||
// return results;
|
||||
|
||||
let results = [];
|
||||
for (let url of urls) {
|
||||
let result = await this.indexPage(url, maxResults);
|
||||
// Object.assign(results, result)
|
||||
results.push(result);
|
||||
// await this.sleep(5000);
|
||||
}
|
||||
return results;
|
||||
}
|
||||
@@ -188,12 +193,12 @@ module.exports = class OlxCrawler {
|
||||
let results = await this.indexPages(urls, this.fromPage, this.toPage, this.maxResults);
|
||||
|
||||
for (const result of results) {
|
||||
for (const re1 of result) {
|
||||
if (re1.lat !== undefined && re1.lat !== null && re1.lat !== "") {
|
||||
const pointInsideBoundingBox = await findPointInsideBoundingBox([re1.lng, re1.lat]);
|
||||
for (const finalResult of result) {
|
||||
if (finalResult.lat !== undefined && finalResult.lat !== null && finalResult.lat !== "") {
|
||||
const pointInsideBoundingBox = await findPointInsideBoundingBox([finalResult.lng, finalResult.lat]);
|
||||
|
||||
if (pointInsideBoundingBox[0].length !== 0) {
|
||||
filteredResults.push(re1);
|
||||
filteredResults.push(finalResult);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -216,7 +221,7 @@ module.exports = class OlxCrawler {
|
||||
const priceMax = "do=" + request.priceMax;
|
||||
|
||||
const olxUrl = {
|
||||
url: "https://www.olx.ba/pretraga?" + realsestateType + "&id=2&stanje=0&vrstapregleda=tabela&sort_order=desc&" + region + "&" + municipality + "&" + priceMin + "&" + priceMax + "&vrsta=samoprodaja&" + sizeMin + "&" + sizeMax,
|
||||
url: `https://www.olx.ba/pretraga?${realsestateType}&id=2&stanje=0&vrstapregleda=tabela&sort_order=desc&${region}&${municipality}&${priceMin}&${priceMax}&vrsta=samoprodaja&${sizeMin}&${sizeMax}`,
|
||||
email: request.email
|
||||
}
|
||||
console.log(olxUrl.url);
|
||||
|
||||
@@ -1,10 +1,15 @@
|
||||
const db = require('../../models/index');
|
||||
|
||||
// const db = require('../../models/index');
|
||||
// TODO Fetch only subscribed realestate requests
|
||||
const allRERequest = async () => {
|
||||
return await db.RealEstateRequest.findAll();
|
||||
}
|
||||
|
||||
const findPointInsideBoundingBox = async (latLng) => {
|
||||
return await db.sequelize.query("SELECT * FROM \"RealEstateRequests\" WHERE ST_Contains(\"RealEstateRequests\".bounding_box, ST_GEOMFROMTEXT(\'POINT (" + latLng[0] + " " + latLng[1]+ ")\'))");
|
||||
}
|
||||
|
||||
// const bulkInsert = async (reuslts) => {
|
||||
// db.MarketAlert.bulkCreate({
|
||||
|
||||
// })
|
||||
|
||||
// }
|
||||
module.exports = {
|
||||
allRERequest,
|
||||
findPointInsideBoundingBox
|
||||
};
|
||||
|
||||
@@ -38,6 +38,7 @@ async function crawlAll() {
|
||||
})
|
||||
}
|
||||
try {
|
||||
console.log(marketAlerts);
|
||||
const filteredMarketAlerts = marketAlerts.filter((elem) => !marketAlertsFromDb.find(({ url }) => elem.url === url));
|
||||
await db.MarketAlert.bulkCreate(filteredMarketAlerts);
|
||||
process.exit()
|
||||
|
||||
Reference in New Issue
Block a user