saving additional fields, improved async functions with promises

This commit is contained in:
Nedim Uka
2019-06-21 15:14:43 +02:00
parent 3c59292f23
commit 80ff9bcb6b
5 changed files with 165 additions and 35 deletions

View File

@@ -12,7 +12,7 @@ module.exports = class OlxCrawler {
this.maxResults = maxResults;
}
async indexSingle(url) {
async indexSingle(url, email) {
try {
const res = await fetch(url);
const body = await res.text();
@@ -38,7 +38,7 @@ module.exports = class OlxCrawler {
const olxId = $('#artikal_glavni_div > div.artikal_lijevo > div:nth-child(15) > div:nth-child(4) > div.df2').text();
const descriptions = $('.artikal_detaljniopis_tekst');
const floor = $('#dodatnapolja1').find(':contains(Sprat)').last().nextAll().text();
// const floor = $('#dodatnapolja1').find(':contains(Sprat)').last().nextAll().text();
const latLngRe = /LatLng\(([0-9]+\.[0-9]+)\,\s+([0-9]+\.[0-9]+)\)/g;
const imgRe = /href":("[^"]*")/g;
const matches = latLngRe.exec(body);
@@ -73,25 +73,25 @@ module.exports = class OlxCrawler {
}
const parsedPrice = parsePrice(price);
let parsedRooms;
if (rooms === 'Garsonjera') {
parsedRooms = 0;
} else {
parsedRooms = parseRooms(rooms);
}
const locationArray = location.split(",");
const region = locationArray[0];
const municipality = locationArray[1];
console.log(location);
console.log(locationArray);
const data = {
// category: this.getCategoryId(category),
email : email,
olxId: olxId,
category: category,
url,
title,
price: isNaN(parsedPrice) ? price : parsedPrice,
size: parseFloat(size),
rooms: parsedRooms,
floor: parseInt(floor),
address,
location,
region,
municipality,
// adType: AD_TYPE_SALE,
time,
shortDescription: descriptions.first().text(),
@@ -110,12 +110,12 @@ module.exports = class OlxCrawler {
return null;
}
async indexPage(url, maxResults = 1000) {
async indexPage(olxUrl, maxResults = 1000) {
try {
// console.log('Starting to index page: ' + pageNr);
// const url = `http://www.olx.ba/pretraga?vrsta=samoprodaja&sort_order=desc&kategorija=23&sort_po=datum&kanton=9&stranica=${pageNr}`;
const res = await fetch(url);
const res = await fetch(olxUrl.url);
const body = await res.text();
const $ = cheerio.load(body);
const hrefs = [];
@@ -131,7 +131,7 @@ module.exports = class OlxCrawler {
for (let i = 0; i < hrefs.length; i++) {
console.log(`indexing: ${hrefs[i]}`);
const singleData = await this.indexSingle(hrefs[i]);
const singleData = await this.indexSingle(hrefs[i], olxUrl.email);
if (singleData) {
results.push(singleData);
@@ -193,11 +193,13 @@ module.exports = class OlxCrawler {
const pointInsideBoundingBox = await findPointInsideBoundingBox([re1.lng, re1.lat]);
if (pointInsideBoundingBox[0].length !== 0) {
filteredResults.push(result);
filteredResults.push(re1);
}
}
}
}
console.log(filteredResults);
return filteredResults;
}
@@ -213,11 +215,15 @@ module.exports = class OlxCrawler {
const priceMin = "od=" + request.priceMin;
const priceMax = "do=" + request.priceMax;
const olxUrl = "https://www.olx.ba/pretraga?" + realsestateType + "&id=2&stanje=0&vrstapregleda=tabela&sort_order=desc&" + region + "&" + municipality + "&" + priceMin + "&" + priceMax + "&vrsta=samoprodaja&" + sizeMin + "&" + sizeMax
console.log(olxUrl);
const olxUrl = {
url: "https://www.olx.ba/pretraga?" + realsestateType + "&id=2&stanje=0&vrstapregleda=tabela&sort_order=desc&" + region + "&" + municipality + "&" + priceMin + "&" + priceMax + "&vrsta=samoprodaja&" + sizeMin + "&" + sizeMax,
email: request.email
}
console.log(olxUrl.url);
urls.push(olxUrl);
}
return urls;
}
};
};