saving additional fields, improved async functions with promises
This commit is contained in:
@@ -12,7 +12,7 @@ module.exports = class OlxCrawler {
|
||||
this.maxResults = maxResults;
|
||||
}
|
||||
|
||||
async indexSingle(url) {
|
||||
async indexSingle(url, email) {
|
||||
try {
|
||||
const res = await fetch(url);
|
||||
const body = await res.text();
|
||||
@@ -38,7 +38,7 @@ module.exports = class OlxCrawler {
|
||||
const olxId = $('#artikal_glavni_div > div.artikal_lijevo > div:nth-child(15) > div:nth-child(4) > div.df2').text();
|
||||
|
||||
const descriptions = $('.artikal_detaljniopis_tekst');
|
||||
const floor = $('#dodatnapolja1').find(':contains(Sprat)').last().nextAll().text();
|
||||
// const floor = $('#dodatnapolja1').find(':contains(Sprat)').last().nextAll().text();
|
||||
const latLngRe = /LatLng\(([0-9]+\.[0-9]+)\,\s+([0-9]+\.[0-9]+)\)/g;
|
||||
const imgRe = /href":("[^"]*")/g;
|
||||
const matches = latLngRe.exec(body);
|
||||
@@ -73,25 +73,25 @@ module.exports = class OlxCrawler {
|
||||
}
|
||||
|
||||
const parsedPrice = parsePrice(price);
|
||||
let parsedRooms;
|
||||
|
||||
if (rooms === 'Garsonjera') {
|
||||
parsedRooms = 0;
|
||||
} else {
|
||||
parsedRooms = parseRooms(rooms);
|
||||
}
|
||||
const locationArray = location.split(",");
|
||||
const region = locationArray[0];
|
||||
const municipality = locationArray[1];
|
||||
console.log(location);
|
||||
console.log(locationArray);
|
||||
|
||||
const data = {
|
||||
// category: this.getCategoryId(category),
|
||||
email : email,
|
||||
olxId: olxId,
|
||||
category: category,
|
||||
url,
|
||||
title,
|
||||
price: isNaN(parsedPrice) ? price : parsedPrice,
|
||||
size: parseFloat(size),
|
||||
rooms: parsedRooms,
|
||||
floor: parseInt(floor),
|
||||
address,
|
||||
location,
|
||||
region,
|
||||
municipality,
|
||||
// adType: AD_TYPE_SALE,
|
||||
time,
|
||||
shortDescription: descriptions.first().text(),
|
||||
@@ -110,12 +110,12 @@ module.exports = class OlxCrawler {
|
||||
return null;
|
||||
}
|
||||
|
||||
async indexPage(url, maxResults = 1000) {
|
||||
async indexPage(olxUrl, maxResults = 1000) {
|
||||
try {
|
||||
// console.log('Starting to index page: ' + pageNr);
|
||||
// const url = `http://www.olx.ba/pretraga?vrsta=samoprodaja&sort_order=desc&kategorija=23&sort_po=datum&kanton=9&stranica=${pageNr}`;
|
||||
|
||||
const res = await fetch(url);
|
||||
const res = await fetch(olxUrl.url);
|
||||
const body = await res.text();
|
||||
const $ = cheerio.load(body);
|
||||
const hrefs = [];
|
||||
@@ -131,7 +131,7 @@ module.exports = class OlxCrawler {
|
||||
for (let i = 0; i < hrefs.length; i++) {
|
||||
console.log(`indexing: ${hrefs[i]}`);
|
||||
|
||||
const singleData = await this.indexSingle(hrefs[i]);
|
||||
const singleData = await this.indexSingle(hrefs[i], olxUrl.email);
|
||||
|
||||
if (singleData) {
|
||||
results.push(singleData);
|
||||
@@ -193,11 +193,13 @@ module.exports = class OlxCrawler {
|
||||
const pointInsideBoundingBox = await findPointInsideBoundingBox([re1.lng, re1.lat]);
|
||||
|
||||
if (pointInsideBoundingBox[0].length !== 0) {
|
||||
filteredResults.push(result);
|
||||
filteredResults.push(re1);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
console.log(filteredResults);
|
||||
return filteredResults;
|
||||
}
|
||||
|
||||
@@ -213,11 +215,15 @@ module.exports = class OlxCrawler {
|
||||
const priceMin = "od=" + request.priceMin;
|
||||
const priceMax = "do=" + request.priceMax;
|
||||
|
||||
const olxUrl = "https://www.olx.ba/pretraga?" + realsestateType + "&id=2&stanje=0&vrstapregleda=tabela&sort_order=desc&" + region + "&" + municipality + "&" + priceMin + "&" + priceMax + "&vrsta=samoprodaja&" + sizeMin + "&" + sizeMax
|
||||
console.log(olxUrl);
|
||||
const olxUrl = {
|
||||
url: "https://www.olx.ba/pretraga?" + realsestateType + "&id=2&stanje=0&vrstapregleda=tabela&sort_order=desc&" + region + "&" + municipality + "&" + priceMin + "&" + priceMax + "&vrsta=samoprodaja&" + sizeMin + "&" + sizeMax,
|
||||
email: request.email
|
||||
}
|
||||
console.log(olxUrl.url);
|
||||
urls.push(olxUrl);
|
||||
}
|
||||
|
||||
return urls;
|
||||
}
|
||||
};
|
||||
};
|
||||
|
||||
|
||||
Reference in New Issue
Block a user