Olx sraper scrape twice.

This commit is contained in:
Naida Vatric
2020-02-28 22:28:39 +01:00
parent df5e38092d
commit 034106d87a

View File

@@ -206,6 +206,9 @@ class OlxCrawler {
async scrapeAd(url) { async scrapeAd(url) {
console.log("Scraping : ", url); console.log("Scraping : ", url);
let hasParseErrors = false;
let numberOfParseErrors = 0;
do {
try { try {
const adPageSource = await fetch(url); const adPageSource = await fetch(url);
const body = await adPageSource.text(); const body = await adPageSource.text();
@@ -273,7 +276,9 @@ class OlxCrawler {
throw { message: "Can't find normal price" }; throw { message: "Can't find normal price" };
} }
if (urgentPriceValue && urgentPriceValue.length > 0) { if (urgentPriceValue && urgentPriceValue.length > 0) {
const priceValues = urgentPriceValue.replace("Cijena", "").split("KM"); const priceValues = urgentPriceValue
.replace("Cijena", "")
.split("KM");
//priceValues will contain values like ["100000", "90000", ...], second element is urgent price //priceValues will contain values like ["100000", "90000", ...], second element is urgent price
if (priceValues.length > 0) { if (priceValues.length > 0) {
if (priceValues[0].trim().indexOf("Hitno") != -1) { if (priceValues[0].trim().indexOf("Hitno") != -1) {
@@ -443,10 +448,16 @@ class OlxCrawler {
gardenSize = fieldValue; gardenSize = fieldValue;
break; break;
case "broj soba": case "broj soba":
numberOfRooms = this.parseNumberOfRooms(fieldValue, parsedCategory); numberOfRooms = this.parseNumberOfRooms(
fieldValue,
parsedCategory
);
break; break;
case "broj prostorija": case "broj prostorija":
numberOfRooms = this.parseNumberOfRooms(fieldValue, parsedCategory); numberOfRooms = this.parseNumberOfRooms(
fieldValue,
parsedCategory
);
break; break;
case "broj spratova": case "broj spratova":
numberOfFloors = this.parseNumberOfFloors( numberOfFloors = this.parseNumberOfFloors(
@@ -573,7 +584,10 @@ class OlxCrawler {
break; break;
} }
if (++fieldIndex === OLX_ENUMS.MAX_DETAIL_FIELDS || fieldTitle === "") { if (
++fieldIndex === OLX_ENUMS.MAX_DETAIL_FIELDS ||
fieldTitle === ""
) {
break; break;
} }
} while (true); } while (true);
@@ -673,8 +687,11 @@ class OlxCrawler {
return data; return data;
} catch (e) { } catch (e) {
hasParseErrors = true;
numberOfParseErrors++;
console.error("Exception caught: " + e.message, "\r\nURL:", url); console.error("Exception caught: " + e.message, "\r\nURL:", url);
} }
} while (hasParseErrors && numberOfParseErrors <= 1);
return null; return null;
} }