Added realestate link to bulk email
This commit is contained in:
@@ -7,10 +7,8 @@ const Promise = require("bluebird");
|
||||
|
||||
module.exports = class OlxCrawler {
|
||||
//TODO figure best way to handle paging
|
||||
constructor(fromPage = 0, toPage = 10, maxResults = 1000) {
|
||||
this.fromPage = fromPage;
|
||||
this.toPage = toPage;
|
||||
this.maxResults = maxResults;
|
||||
constructor(hrefs = []) {
|
||||
this.hrefs = hrefs;
|
||||
}
|
||||
|
||||
async indexPages(urls) {
|
||||
@@ -36,8 +34,7 @@ module.exports = class OlxCrawler {
|
||||
const urls = this.createRequestUrls(realestateRequests);
|
||||
let results = await this.indexPages(urls, this.fromPage, this.toPage, this.maxResults);
|
||||
console.log("Final crawler results");
|
||||
// console.log(results);
|
||||
const flatResults = results.flat();
|
||||
const flatResults = results.flat();
|
||||
console.log(flatResults);
|
||||
if (flatResults) {
|
||||
console.log(flatResults.length);
|
||||
@@ -48,7 +45,7 @@ module.exports = class OlxCrawler {
|
||||
if (finalResult.lat !== undefined && finalResult.lat !== null && finalResult.lat !== "") {
|
||||
const pointInsideBoundingBox = await findPointInsideBoundingBox([finalResult.lng, finalResult.lat], finalResult.email);
|
||||
|
||||
|
||||
|
||||
if (pointInsideBoundingBox[0].length !== 0) {
|
||||
finalResult.hasLocation = true
|
||||
filteredResults.push(finalResult);
|
||||
@@ -81,7 +78,8 @@ module.exports = class OlxCrawler {
|
||||
const olxUrl = {
|
||||
url: `https://www.olx.ba/pretraga?${realsestateType}&id=2&stanje=0&vrstapregleda=tabela&sort_order=desc&${region}&${municipality}&${priceMin}&${priceMax}&vrsta=samoprodaja&${sizeMin}&${sizeMax}&stranica=`,
|
||||
email: request.email,
|
||||
uuid: request.uniqueId
|
||||
uuid: request.uniqueId,
|
||||
hrefs: this.hrefs
|
||||
}
|
||||
console.log(olxUrl.url);
|
||||
urls.push(olxUrl);
|
||||
@@ -168,6 +166,7 @@ class Indexer {
|
||||
}
|
||||
|
||||
prepareIndexers(pageNr) {
|
||||
|
||||
console.log("Entering prepareIndexers : page nr - " + pageNr);
|
||||
const indexers = [];
|
||||
let lastPageNumber;
|
||||
@@ -177,7 +176,8 @@ class Indexer {
|
||||
const newOlxUrl = {
|
||||
url: this.olxUrl.url.replace(/\d+$/, "") + index,
|
||||
email: this.olxUrl.email,
|
||||
uuid: this.olxUrl.uuid
|
||||
uuid: this.olxUrl.uuid,
|
||||
hrefs: this.olxUrl.hrefs
|
||||
}
|
||||
indexers.push(new Indexer(newOlxUrl));
|
||||
|
||||
@@ -188,7 +188,8 @@ class Indexer {
|
||||
const newOlxUrl = {
|
||||
url: this.olxUrl.url + index,
|
||||
email: this.olxUrl.email,
|
||||
uuid: this.olxUrl.uuid
|
||||
uuid: this.olxUrl.uuid,
|
||||
hrefs: this.olxUrl.hrefs
|
||||
}
|
||||
indexers.push(new Indexer(newOlxUrl));
|
||||
}
|
||||
@@ -207,7 +208,8 @@ class Indexer {
|
||||
const newOlxUrl = {
|
||||
url: href,
|
||||
email: results.olxUrl.email,
|
||||
uuid: results.olxUrl.uuid
|
||||
uuid: results.olxUrl.uuid,
|
||||
hrefs: this.olxUrl.hrefs
|
||||
}
|
||||
|
||||
indexers.push(new Indexer(newOlxUrl));
|
||||
@@ -220,11 +222,11 @@ class Indexer {
|
||||
|
||||
if (result !== null && result.hasOwnProperty('hrefs')) {
|
||||
result.hrefs.forEach(href => {
|
||||
// console.log(href);
|
||||
const newOlxUrl = {
|
||||
url: href,
|
||||
email: result.olxUrl.email,
|
||||
uuid: result.olxUrl.uuid
|
||||
uuid: result.olxUrl.uuid,
|
||||
hrefs: this.olxUrl.hrefs
|
||||
}
|
||||
|
||||
indexers.push(new Indexer(newOlxUrl));
|
||||
@@ -279,14 +281,14 @@ class Indexer {
|
||||
return {}
|
||||
}
|
||||
|
||||
if (global.hrefs) {
|
||||
// if (global.hrefs) {
|
||||
|
||||
if (global.hrefs[this.olxUrl.uuid] && global.hrefs[this.olxUrl.uuid].includes(this.olxUrl.url)) {
|
||||
if (this.olxUrl.hrefs[this.olxUrl.uuid] && this.olxUrl.hrefs[this.olxUrl.uuid].includes(this.olxUrl.url)) {
|
||||
|
||||
console.log("We found duplicate URL");
|
||||
return null
|
||||
}
|
||||
console.log("We found duplicate URL");
|
||||
return null
|
||||
}
|
||||
// }
|
||||
|
||||
const res = await fetch(this.olxUrl.url);
|
||||
const body = await res.text();
|
||||
|
||||
Reference in New Issue
Block a user