handle failed page fetch; detect discounted price

This commit is contained in:
Bilal
2020-05-18 03:43:49 +02:00
parent 820227827e
commit 65068932ad

View File

@@ -205,13 +205,17 @@ class OlxCrawler {
} }
async scrapeAd(url) { async scrapeAd(url) {
console.log("Scraping : ", url); // console.log("Scraping : ", url);
try { try {
const adPageSource = await fetch(url); const adPageSource = await fetch(url);
const body = await adPageSource.text(); const body = await adPageSource.text();
const $ = cheerio.load(body); const $ = cheerio.load(body);
let status = AD_STATUS.STATUS_NORMAL; let status = AD_STATUS.STATUS_NORMAL;
if (body.indexOf('<html') === -1) {
throw { message: 'Failed to fetch page !' }
}
const propertySelectors = { const propertySelectors = {
username: username:
"#lg > div.desno2.profil > div:nth-child(2) > div.vrsta1.vrsta_desno > a > div.username > span", "#lg > div.desno2.profil > div:nth-child(2) > div.vrsta1.vrsta_desno > a > div.username > span",
@@ -238,56 +242,26 @@ class OlxCrawler {
//====== PRICE DETECTION AND EXTRACTION ===== //====== PRICE DETECTION AND EXTRACTION =====
let price = null; let price = null;
let normalPrice = null;
let urgentPrice = null;
const normalPriceValue = $("#pc > p:nth-child(2)")
.text()
.trim();
const urgentPriceValue = $(
"#artikal_glavni_div > div.artikal_lijevo > div:nth-child(5) > p"
)
.text()
.trim();
//Debug const priceHeader = $("#pc > p.n").text().trim();
//console.log("Title:", title); const priceValue = $("#pc > p:nth-child(2)").text().trim();
//console.log("Url scraped:", url); price = priceValue;
// console.log("Normal price value:", normalPriceValue);
// console.log("Urgent price value:", urgentPriceValue); if (priceHeader.indexOf('Hitn') !== -1) {
// // Urgent price
if (normalPriceValue && normalPriceValue.length > 0) { status = AD_STATUS.STATUS_URGENT;
normalPrice = normalPriceValue
.replace(/\r\n|\n|\r/gm, "")
.replace("KM", "")
.trim();
if (
$("#pc > p.n")
.text()
.indexOf("Hitna") !== -1
) {
status = AD_STATUS.STATUS_URGENT;
} else {
status = AD_STATUS.STATUS_NORMAL;
}
} else {
throw { message: "Can't find normal price" };
}
if (urgentPriceValue && urgentPriceValue.length > 0) {
const priceValues = urgentPriceValue.replace("Cijena", "").split("KM");
//priceValues will contain values like ["100000", "90000", ...], second element is urgent price
if (priceValues.length > 0) {
if (priceValues[0].trim().indexOf("Hitno") != -1) {
urgentPrice = priceValues[0].replace("Hitno", "").trim();
status = AD_STATUS.STATUS_URGENT;
} else {
urgentPrice = priceValues[0].trim();
}
} else {
throw { message: "Can't find urgent price" };
}
} }
price = status === AD_STATUS.STATUS_URGENT ? urgentPrice : normalPrice; const discountPriceTag = $("#artikal_glavni_div > div.artikal_lijevo > p:nth-child(4)").text().trim();
if (discountPriceTag.indexOf('Akcij') !== -1) {
status = AD_STATUS.STATUS_DISCOUNTED;
const discountPriceValues = $("#artikal_glavni_div > div.artikal_lijevo > div:nth-child(5) > p").text().trim();
// discountPriceValues contain string like "10.000 KM 7.500 KM"
// First price is regular, second is currently active (discounted) price
const bothPrices = discountPriceValues.split('KM');
// Now, currently active price is second element of bothPrices array
price = bothPrices[1] ? bothPrices[1].trim() : null;
}
//====== OTHER AD INFORMATION =============== //====== OTHER AD INFORMATION ===============
let adType = null; let adType = null;