Merge branch 'after-scraper-fix' into 'master'
Olx price parsing changed. See merge request saburly/marketalarm/web!103
This commit was merged in pull request #103.
This commit is contained in:
@@ -205,7 +205,7 @@ class OlxCrawler {
|
|||||||
}
|
}
|
||||||
|
|
||||||
async scrapeAd(url) {
|
async scrapeAd(url) {
|
||||||
// console.log("Scraping : ", url);
|
//console.log("Scraping : ", url);
|
||||||
try {
|
try {
|
||||||
const adPageSource = await fetch(url);
|
const adPageSource = await fetch(url);
|
||||||
const body = await adPageSource.text();
|
const body = await adPageSource.text();
|
||||||
@@ -238,6 +238,8 @@ class OlxCrawler {
|
|||||||
|
|
||||||
//====== PRICE DETECTION AND EXTRACTION =====
|
//====== PRICE DETECTION AND EXTRACTION =====
|
||||||
let price = null;
|
let price = null;
|
||||||
|
let normalPrice = null;
|
||||||
|
let urgentPrice = null;
|
||||||
const normalPriceValue = $("#pc > p:nth-child(2)").text();
|
const normalPriceValue = $("#pc > p:nth-child(2)").text();
|
||||||
const urgentPriceValue = $(
|
const urgentPriceValue = $(
|
||||||
"#artikal_glavni_div > div.artikal_lijevo > div:nth-child(5) > p"
|
"#artikal_glavni_div > div.artikal_lijevo > div:nth-child(5) > p"
|
||||||
@@ -246,7 +248,10 @@ class OlxCrawler {
|
|||||||
.trim();
|
.trim();
|
||||||
|
|
||||||
if (normalPriceValue && normalPriceValue.length > 0) {
|
if (normalPriceValue && normalPriceValue.length > 0) {
|
||||||
price = normalPriceValue;
|
normalPrice = normalPriceValue
|
||||||
|
.replace(/\r\n|\n|\r/gm, "")
|
||||||
|
.replace("KM", "")
|
||||||
|
.trim();
|
||||||
if (
|
if (
|
||||||
$("#pc > p.n")
|
$("#pc > p.n")
|
||||||
.text()
|
.text()
|
||||||
@@ -256,21 +261,26 @@ class OlxCrawler {
|
|||||||
} else {
|
} else {
|
||||||
status = AD_STATUS.STATUS_NORMAL;
|
status = AD_STATUS.STATUS_NORMAL;
|
||||||
}
|
}
|
||||||
} else if (urgentPriceValue && urgentPriceValue.length > 0) {
|
} else {
|
||||||
const priceValues = urgentPriceValue.split("KM");
|
throw { message: "Can't find normal price" };
|
||||||
|
}
|
||||||
|
if (urgentPriceValue && urgentPriceValue.length > 0) {
|
||||||
|
const priceValues = urgentPriceValue.replace("Cijena", "").split("KM");
|
||||||
//priceValues will contain values like ["100000", "90000", ...], second element is urgent price
|
//priceValues will contain values like ["100000", "90000", ...], second element is urgent price
|
||||||
if (priceValues.length > 1) {
|
if (priceValues.length > 0) {
|
||||||
price = priceValues[1].trim();
|
if (priceValues[0].trim().indexOf("Hitno") != -1) {
|
||||||
status = AD_STATUS.STATUS_DISCOUNTED;
|
urgentPrice = priceValues[0].replace("Hitno", "").trim();
|
||||||
|
status = AD_STATUS.STATUS_URGENT;
|
||||||
|
} else {
|
||||||
|
urgentPrice = priceValues[0].trim();
|
||||||
|
}
|
||||||
} else {
|
} else {
|
||||||
throw { message: "Can't find urgent price" };
|
throw { message: "Can't find urgent price" };
|
||||||
}
|
}
|
||||||
} else {
|
|
||||||
throw {
|
|
||||||
message: "Can't find price (it is not normal nor urgent price ?)"
|
|
||||||
};
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
price = status === AD_STATUS.STATUS_URGENT ? urgentPrice : normalPrice;
|
||||||
|
|
||||||
//====== OTHER AD INFORMATION ===============
|
//====== OTHER AD INFORMATION ===============
|
||||||
let adType = null;
|
let adType = null;
|
||||||
let olxId = null;
|
let olxId = null;
|
||||||
@@ -768,6 +778,9 @@ class OlxCrawler {
|
|||||||
if (!priceText) {
|
if (!priceText) {
|
||||||
return NaN;
|
return NaN;
|
||||||
}
|
}
|
||||||
|
if (priceText === "Po dogovoru") {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
const formattedPriceText = priceText.replace(".", "").replace(",", ".");
|
const formattedPriceText = priceText.replace(".", "").replace(",", ".");
|
||||||
return parseFloat(formattedPriceText);
|
return parseFloat(formattedPriceText);
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user