Olx sraper scrape twice.

This commit is contained in:
Naida Vatric
2020-02-28 22:28:39 +01:00
parent df5e38092d
commit 034106d87a

View File

@@ -206,6 +206,9 @@ class OlxCrawler {
async scrapeAd(url) {
console.log("Scraping : ", url);
let hasParseErrors = false;
let numberOfParseErrors = 0;
do {
try {
const adPageSource = await fetch(url);
const body = await adPageSource.text();
@@ -273,7 +276,9 @@ class OlxCrawler {
throw { message: "Can't find normal price" };
}
if (urgentPriceValue && urgentPriceValue.length > 0) {
const priceValues = urgentPriceValue.replace("Cijena", "").split("KM");
const priceValues = urgentPriceValue
.replace("Cijena", "")
.split("KM");
//priceValues will contain values like ["100000", "90000", ...], second element is urgent price
if (priceValues.length > 0) {
if (priceValues[0].trim().indexOf("Hitno") != -1) {
@@ -443,10 +448,16 @@ class OlxCrawler {
gardenSize = fieldValue;
break;
case "broj soba":
numberOfRooms = this.parseNumberOfRooms(fieldValue, parsedCategory);
numberOfRooms = this.parseNumberOfRooms(
fieldValue,
parsedCategory
);
break;
case "broj prostorija":
numberOfRooms = this.parseNumberOfRooms(fieldValue, parsedCategory);
numberOfRooms = this.parseNumberOfRooms(
fieldValue,
parsedCategory
);
break;
case "broj spratova":
numberOfFloors = this.parseNumberOfFloors(
@@ -573,7 +584,10 @@ class OlxCrawler {
break;
}
if (++fieldIndex === OLX_ENUMS.MAX_DETAIL_FIELDS || fieldTitle === "") {
if (
++fieldIndex === OLX_ENUMS.MAX_DETAIL_FIELDS ||
fieldTitle === ""
) {
break;
}
} while (true);
@@ -673,8 +687,11 @@ class OlxCrawler {
return data;
} catch (e) {
hasParseErrors = true;
numberOfParseErrors++;
console.error("Exception caught: " + e.message, "\r\nURL:", url);
}
} while (hasParseErrors && numberOfParseErrors <= 1);
return null;
}