Added realestate link to bulk email

This commit is contained in:
Nedim Uka
2019-07-12 18:00:02 +02:00
parent 753a09aa36
commit 81c30c36ec
7 changed files with 116 additions and 81 deletions

View File

@@ -7,10 +7,8 @@ const Promise = require("bluebird");
module.exports = class OlxCrawler {
//TODO figure best way to handle paging
constructor(fromPage = 0, toPage = 10, maxResults = 1000) {
this.fromPage = fromPage;
this.toPage = toPage;
this.maxResults = maxResults;
constructor(hrefs = []) {
this.hrefs = hrefs;
}
async indexPages(urls) {
@@ -36,8 +34,7 @@ module.exports = class OlxCrawler {
const urls = this.createRequestUrls(realestateRequests);
let results = await this.indexPages(urls, this.fromPage, this.toPage, this.maxResults);
console.log("Final crawler results");
// console.log(results);
const flatResults = results.flat();
const flatResults = results.flat();
console.log(flatResults);
if (flatResults) {
console.log(flatResults.length);
@@ -48,7 +45,7 @@ module.exports = class OlxCrawler {
if (finalResult.lat !== undefined && finalResult.lat !== null && finalResult.lat !== "") {
const pointInsideBoundingBox = await findPointInsideBoundingBox([finalResult.lng, finalResult.lat], finalResult.email);
if (pointInsideBoundingBox[0].length !== 0) {
finalResult.hasLocation = true
filteredResults.push(finalResult);
@@ -81,7 +78,8 @@ module.exports = class OlxCrawler {
const olxUrl = {
url: `https://www.olx.ba/pretraga?${realsestateType}&id=2&stanje=0&vrstapregleda=tabela&sort_order=desc&${region}&${municipality}&${priceMin}&${priceMax}&vrsta=samoprodaja&${sizeMin}&${sizeMax}&stranica=`,
email: request.email,
uuid: request.uniqueId
uuid: request.uniqueId,
hrefs: this.hrefs
}
console.log(olxUrl.url);
urls.push(olxUrl);
@@ -168,6 +166,7 @@ class Indexer {
}
prepareIndexers(pageNr) {
console.log("Entering prepareIndexers : page nr - " + pageNr);
const indexers = [];
let lastPageNumber;
@@ -177,7 +176,8 @@ class Indexer {
const newOlxUrl = {
url: this.olxUrl.url.replace(/\d+$/, "") + index,
email: this.olxUrl.email,
uuid: this.olxUrl.uuid
uuid: this.olxUrl.uuid,
hrefs: this.olxUrl.hrefs
}
indexers.push(new Indexer(newOlxUrl));
@@ -188,7 +188,8 @@ class Indexer {
const newOlxUrl = {
url: this.olxUrl.url + index,
email: this.olxUrl.email,
uuid: this.olxUrl.uuid
uuid: this.olxUrl.uuid,
hrefs: this.olxUrl.hrefs
}
indexers.push(new Indexer(newOlxUrl));
}
@@ -207,7 +208,8 @@ class Indexer {
const newOlxUrl = {
url: href,
email: results.olxUrl.email,
uuid: results.olxUrl.uuid
uuid: results.olxUrl.uuid,
hrefs: this.olxUrl.hrefs
}
indexers.push(new Indexer(newOlxUrl));
@@ -220,11 +222,11 @@ class Indexer {
if (result !== null && result.hasOwnProperty('hrefs')) {
result.hrefs.forEach(href => {
// console.log(href);
const newOlxUrl = {
url: href,
email: result.olxUrl.email,
uuid: result.olxUrl.uuid
uuid: result.olxUrl.uuid,
hrefs: this.olxUrl.hrefs
}
indexers.push(new Indexer(newOlxUrl));
@@ -279,14 +281,14 @@ class Indexer {
return {}
}
if (global.hrefs) {
// if (global.hrefs) {
if (global.hrefs[this.olxUrl.uuid] && global.hrefs[this.olxUrl.uuid].includes(this.olxUrl.url)) {
if (this.olxUrl.hrefs[this.olxUrl.uuid] && this.olxUrl.hrefs[this.olxUrl.uuid].includes(this.olxUrl.url)) {
console.log("We found duplicate URL");
return null
}
console.log("We found duplicate URL");
return null
}
// }
const res = await fetch(this.olxUrl.url);
const body = await res.text();