diff --git a/app/helpers/crawlers/olxClawler.js b/app/helpers/crawlers/olxClawler.js index 365813f..d6b7292 100644 --- a/app/helpers/crawlers/olxClawler.js +++ b/app/helpers/crawlers/olxClawler.js @@ -1,6 +1,6 @@ const fetch = require('node-fetch'); const cheerio = require('cheerio'); -const { allRERequest, findPointInsideBoundingBox } = require('../url'); +const { allRERequest, findPointInsideBoundingBox } = require('../db/dbHelper'); const { getRealEstateTypeEnum } = require('../enums'); const { getRegion, getMunicipality } = require('../codes') const Promise = require("bluebird"); @@ -92,9 +92,9 @@ module.exports = class OlxCrawler { // category: category, url, title, - price: isNaN(parsedPrice) ? price : parsedPrice, + price: isNaN(parsedPrice) ? 0 : parsedPrice, size: parseFloat(size), - gardenSize: parseFloat(gardenSize), + gardenSize: isNaN(parseFloat(gardenSize)) ? parseFloat(gardenSize) : 0, address, region, municipality, @@ -118,6 +118,7 @@ module.exports = class OlxCrawler { async indexPage(olxUrl, maxResults = 1000) { try { + //TODO fix paging // console.log('Starting to index page: ' + pageNr); // const url = `http://www.olx.ba/pretraga?vrsta=samoprodaja&sort_order=desc&kategorija=23&sort_po=datum&kanton=9&stranica=${pageNr}`; @@ -142,7 +143,6 @@ module.exports = class OlxCrawler { if (singleData) { results.push(singleData); } - // await this.sleep(500); } return results; @@ -152,30 +152,35 @@ module.exports = class OlxCrawler { } getCategoryId (category) { - if (category === 'Stanovi') { - return 'stan'; - } else if (category === 'Vikendice') { - return 'vikendica'; - } else if (category === 'Kuće') { - return 'kuca'; - } + + switch(category) { + case 'Stanovi': + return 'stan'; + + case 'Vikendice': + return 'vikendica' + + case 'Kuće': + return 'kuca'; + + default: + return ''; + } } async indexPages(urls, start, end, maxResults = 1000) { + //TODO fix paging // let results = {}; // for (let i = start; i <= end; i++) { // let result = await this.indexPage(i, maxResults); // Object.assign(results, result) - // await this.sleep(5000); // } // return results; let results = []; for (let url of urls) { let result = await this.indexPage(url, maxResults); - // Object.assign(results, result) results.push(result); - // await this.sleep(5000); } return results; } @@ -188,12 +193,12 @@ module.exports = class OlxCrawler { let results = await this.indexPages(urls, this.fromPage, this.toPage, this.maxResults); for (const result of results) { - for (const re1 of result) { - if (re1.lat !== undefined && re1.lat !== null && re1.lat !== "") { - const pointInsideBoundingBox = await findPointInsideBoundingBox([re1.lng, re1.lat]); + for (const finalResult of result) { + if (finalResult.lat !== undefined && finalResult.lat !== null && finalResult.lat !== "") { + const pointInsideBoundingBox = await findPointInsideBoundingBox([finalResult.lng, finalResult.lat]); if (pointInsideBoundingBox[0].length !== 0) { - filteredResults.push(re1); + filteredResults.push(finalResult); } } } @@ -216,7 +221,7 @@ module.exports = class OlxCrawler { const priceMax = "do=" + request.priceMax; const olxUrl = { - url: "https://www.olx.ba/pretraga?" + realsestateType + "&id=2&stanje=0&vrstapregleda=tabela&sort_order=desc&" + region + "&" + municipality + "&" + priceMin + "&" + priceMax + "&vrsta=samoprodaja&" + sizeMin + "&" + sizeMax, + url: `https://www.olx.ba/pretraga?${realsestateType}&id=2&stanje=0&vrstapregleda=tabela&sort_order=desc&${region}&${municipality}&${priceMin}&${priceMax}&vrsta=samoprodaja&${sizeMin}&${sizeMax}`, email: request.email } console.log(olxUrl.url); diff --git a/app/helpers/db/dbHelper.js b/app/helpers/db/dbHelper.js index 6c81004..f51638b 100644 --- a/app/helpers/db/dbHelper.js +++ b/app/helpers/db/dbHelper.js @@ -1,10 +1,15 @@ +const db = require('../../models/index'); -// const db = require('../../models/index'); +// TODO Fetch only subscribed realestate requests +const allRERequest = async () => { + return await db.RealEstateRequest.findAll(); +} +const findPointInsideBoundingBox = async (latLng) => { + return await db.sequelize.query("SELECT * FROM \"RealEstateRequests\" WHERE ST_Contains(\"RealEstateRequests\".bounding_box, ST_GEOMFROMTEXT(\'POINT (" + latLng[0] + " " + latLng[1]+ ")\'))"); +} -// const bulkInsert = async (reuslts) => { -// db.MarketAlert.bulkCreate({ - -// }) - -// } \ No newline at end of file +module.exports = { + allRERequest, + findPointInsideBoundingBox +}; diff --git a/app/services/crawlerService.js b/app/services/crawlerService.js index 49ad166..5eb160b 100644 --- a/app/services/crawlerService.js +++ b/app/services/crawlerService.js @@ -38,6 +38,7 @@ async function crawlAll() { }) } try { + console.log(marketAlerts); const filteredMarketAlerts = marketAlerts.filter((elem) => !marketAlertsFromDb.find(({ url }) => elem.url === url)); await db.MarketAlert.bulkCreate(filteredMarketAlerts); process.exit()