handle failed page fetch

This commit is contained in:
Bilal
2020-05-18 03:53:08 +02:00
parent 65068932ad
commit 159fedbc2d
4 changed files with 17 additions and 1 deletions

View File

@@ -202,6 +202,10 @@ class AktidoCrawler {
const body = await adPageSource.text();
const $ = cheerio.load(body);
if (body.indexOf('<html') === -1) {
throw { message: 'Failed to fetch page !' }
}
const mapElementParent = $(".box-map").parent();
const scriptElement = $("script", mapElementParent);
if (

View File

@@ -215,6 +215,10 @@ class ProstorCrawler {
const body = await adPageSource.text();
const $ = cheerio.load(body);
if (body.indexOf('<html') === -1) {
throw { message: 'Failed to fetch page !' }
}
// link contains part of the URL in the format of : /prodaja/stan/stup/9556
// general form is : /actionType/realEstateType/location/realEstateID
// linkParts contains : ['', 'actionType', 'realEstateType', 'location', 'realEstateID']

View File

@@ -202,6 +202,10 @@ class RentalCrawler {
const body = await adPageSource.text();
const $ = cheerio.load(body);
if (body.indexOf('<html') === -1) {
throw { message: 'Failed to fetch page !' }
}
const mapElementParent = $(".box-map").parent();
const scriptElement = $("script", mapElementParent);
if (

View File

@@ -220,12 +220,16 @@ class SaljicCrawler {
}
async scrapeAd(url, adType) {
console.log("[SALJIC] Scraping : ", url);
// console.log("[SALJIC] Scraping : ", url);
try {
const adPageSource = await fetch(url);
const body = await adPageSource.text();
const $ = cheerio.load(body);
if (body.indexOf('<html') === -1) {
throw { message: 'Failed to fetch page !' }
}
// No information for status ex. PRODAN
const status = AD_STATUS.STATUS_NORMAL;
//Extracting agency ID from url