Changed delay between ads.
This commit is contained in:
@@ -217,6 +217,8 @@ class OlxCrawler {
|
||||
//let numberOfParseErrors = 0;
|
||||
// do {
|
||||
try {
|
||||
await this.sleep(this.delayBetweenAds);
|
||||
|
||||
const adPageSource = await fetch(url);
|
||||
const body = await adPageSource.text();
|
||||
const $ = cheerio.load(body);
|
||||
@@ -694,6 +696,8 @@ class OlxCrawler {
|
||||
console.error("Exception caught: " + e.message, "\r\nURL:", url);
|
||||
}
|
||||
// } while (hasParseErrors && numberOfParseErrors <= 1);
|
||||
await this.sleep(this.delayBetweenAds);
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
|
||||
@@ -84,6 +84,7 @@ class SaljicCrawler {
|
||||
for (const [index, { value: singlePageResult }] of entries) {
|
||||
if (singlePageResult) {
|
||||
const saveResults = await this.saveCrawledResults(singlePageResult);
|
||||
|
||||
const { newRecords } = saveResults;
|
||||
|
||||
newRealEstates.push(...newRecords);
|
||||
@@ -217,8 +218,8 @@ class SaljicCrawler {
|
||||
}
|
||||
}
|
||||
|
||||
async scrapeAd(url, adType) {
|
||||
console.log("[SALJIC] Scraping : ", url);
|
||||
async scrapeAd(url, adTypeAttribute) {
|
||||
//console.log("[SALJIC] Scraping : ", url);
|
||||
try {
|
||||
const adPageSource = await fetch(url);
|
||||
const body = await adPageSource.text();
|
||||
@@ -234,7 +235,7 @@ class SaljicCrawler {
|
||||
//Extracting main properties
|
||||
const propertySelectors = {
|
||||
title:
|
||||
"div.content-wrap > div.container > div.col-md-8.nobottommargin > div.single-post > div.entry > div.entry-title > h2",
|
||||
"div.content-wrap > div.container.clearfix.wpc > div.col-md-8.nobottommargin > div.single-post.nobottommargin > div.entry.clearfix > div.entry-title > h2",
|
||||
price:
|
||||
"div.content-wrap > div.container > div.col-md-8.nobottommargin > div.single-post > div.entry > div.topmargin-sm.single-product > div.product > div.product-price > ins",
|
||||
streetName:
|
||||
@@ -245,6 +246,7 @@ class SaljicCrawler {
|
||||
latAndLong:
|
||||
"div.content-wrap > div.container > div.col-md-8.nobottommargin > div.single-post > div.entry > div.entry-content.topmargin > div.gmap.bottommargin > iframe"
|
||||
};
|
||||
|
||||
const title = $(propertySelectors.title)
|
||||
.text()
|
||||
.replace(/(\r\n|\n|\r)/gm, "")
|
||||
@@ -277,7 +279,8 @@ class SaljicCrawler {
|
||||
let tmpLatLong;
|
||||
let latText;
|
||||
let longText;
|
||||
if (latAndLongSrc) {
|
||||
|
||||
if (latAndLongSrc && latAndLongSrc.indexOf("openstreetmap") !== -1) {
|
||||
tmpLatLong = latAndLongSrc.split("marker=")[1];
|
||||
latText = tmpLatLong.split("%2C")[0];
|
||||
longText = tmpLatLong.split("%2C")[1];
|
||||
@@ -330,6 +333,7 @@ class SaljicCrawler {
|
||||
let numberOfViewsKivi = null;
|
||||
let streetNumber = 0;
|
||||
let adStatus = status;
|
||||
let adType = adTypeAttribute;
|
||||
let shortDescription = descriptions
|
||||
? descriptions.substring(0, descriptions.indexOf("."))
|
||||
: "";
|
||||
@@ -413,6 +417,7 @@ class SaljicCrawler {
|
||||
additionalField.length
|
||||
)
|
||||
.trim();
|
||||
|
||||
realEstateType = this.getAdCategoryId(categoryTmp);
|
||||
} else {
|
||||
switch (additionalField) {
|
||||
@@ -568,10 +573,14 @@ class SaljicCrawler {
|
||||
numberOfViewsKivi
|
||||
};
|
||||
|
||||
await this.sleep(1000);
|
||||
|
||||
return data;
|
||||
} catch (e) {
|
||||
console.error("Exception caught: " + e.message, "\r\nURL:", url);
|
||||
}
|
||||
await this.sleep(1000);
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user