Add timeout to fetch wrapper
This commit is contained in:
@@ -220,6 +220,7 @@ class OlxCrawler {
|
|||||||
const res = await fetch(url);
|
const res = await fetch(url);
|
||||||
logDebug("Got category results for: ", url);
|
logDebug("Got category results for: ", url);
|
||||||
const body = await res.text();
|
const body = await res.text();
|
||||||
|
logDebug("Got category results text for: ", url);
|
||||||
const $ = cheerio.load(body);
|
const $ = cheerio.load(body);
|
||||||
let hrefs = [];
|
let hrefs = [];
|
||||||
|
|
||||||
@@ -260,7 +261,7 @@ class OlxCrawler {
|
|||||||
|
|
||||||
return filteredScrapedData;
|
return filteredScrapedData;
|
||||||
} catch (e) {
|
} catch (e) {
|
||||||
console.error("Exception caught:" + e);
|
console.error("Exception caught, index single page: " + e);
|
||||||
return [];
|
return [];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -709,7 +710,7 @@ class OlxCrawler {
|
|||||||
|
|
||||||
return data;
|
return data;
|
||||||
} catch (e) {
|
} catch (e) {
|
||||||
console.error("Exception caught: " + e.message, "\r\nURL:", url);
|
console.error("Exception caught scrapeAd : " + e.message, "\r\nURL:", url);
|
||||||
}
|
}
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,4 +1,5 @@
|
|||||||
const nodeFetch = require("node-fetch");
|
const nodeFetch = require("node-fetch");
|
||||||
|
const AbortController = require('abort-controller');
|
||||||
const {
|
const {
|
||||||
USER_AGENT,
|
USER_AGENT,
|
||||||
USE_SCRAPER_API,
|
USE_SCRAPER_API,
|
||||||
@@ -11,10 +12,15 @@ const timeout = (ms) => {
|
|||||||
}
|
}
|
||||||
|
|
||||||
const fetch = async (url, options = {}) => {
|
const fetch = async (url, options = {}) => {
|
||||||
|
const controller = new AbortController();
|
||||||
|
|
||||||
const newOptions = Object.assign({}, options);
|
const newOptions = Object.assign({}, options);
|
||||||
if (!newOptions["headers"]) {
|
if (!newOptions["headers"]) {
|
||||||
newOptions["headers"] = {};
|
newOptions["headers"] = {};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
newOptions.signal = controller.signal;
|
||||||
|
|
||||||
// newOptions["headers"]["User-Agent"] = USER_AGENT;
|
// newOptions["headers"]["User-Agent"] = USER_AGENT;
|
||||||
|
|
||||||
let urlToFetchThroughAPI = Buffer.from(url).toString('base64');
|
let urlToFetchThroughAPI = Buffer.from(url).toString('base64');
|
||||||
@@ -25,7 +31,9 @@ const fetch = async (url, options = {}) => {
|
|||||||
const urlAdaptedForScraping = USE_SCRAPER_API
|
const urlAdaptedForScraping = USE_SCRAPER_API
|
||||||
? `${SCRAPER_API_BASE_URL}?api_key=${SCRAPER_API_KEY}&url=${urlToFetchThroughAPI}`
|
? `${SCRAPER_API_BASE_URL}?api_key=${SCRAPER_API_KEY}&url=${urlToFetchThroughAPI}`
|
||||||
: url;
|
: url;
|
||||||
return nodeFetch(urlAdaptedForScraping, newOptions);
|
const result = nodeFetch(urlAdaptedForScraping, newOptions);
|
||||||
|
const timeoutId = setTimeout(() => controller.abort(), 5000);
|
||||||
|
return result;
|
||||||
};
|
};
|
||||||
|
|
||||||
module.exports = fetch;
|
module.exports = fetch;
|
||||||
|
|||||||
13
package-lock.json
generated
13
package-lock.json
generated
@@ -92,6 +92,14 @@
|
|||||||
"resolved": "https://registry.npmjs.org/abbrev/-/abbrev-1.1.1.tgz",
|
"resolved": "https://registry.npmjs.org/abbrev/-/abbrev-1.1.1.tgz",
|
||||||
"integrity": "sha512-nne9/IiQ/hzIhY6pdDnbBtz7DjPTKrY00P/zvPSm5pOFkl6xuGrGnXn/VtTNNfNtAfZ9/1RtehkszU9qcTii0Q=="
|
"integrity": "sha512-nne9/IiQ/hzIhY6pdDnbBtz7DjPTKrY00P/zvPSm5pOFkl6xuGrGnXn/VtTNNfNtAfZ9/1RtehkszU9qcTii0Q=="
|
||||||
},
|
},
|
||||||
|
"abort-controller": {
|
||||||
|
"version": "3.0.0",
|
||||||
|
"resolved": "https://registry.npmjs.org/abort-controller/-/abort-controller-3.0.0.tgz",
|
||||||
|
"integrity": "sha512-h8lQ8tacZYnR3vNQTgibj+tODHI5/+l06Au2Pcriv/Gmet0eaj4TwWH41sO9wnHDiQsEj19q0drzdWdeAHtweg==",
|
||||||
|
"requires": {
|
||||||
|
"event-target-shim": "^5.0.0"
|
||||||
|
}
|
||||||
|
},
|
||||||
"accepts": {
|
"accepts": {
|
||||||
"version": "1.3.5",
|
"version": "1.3.5",
|
||||||
"resolved": "https://registry.npmjs.org/accepts/-/accepts-1.3.5.tgz",
|
"resolved": "https://registry.npmjs.org/accepts/-/accepts-1.3.5.tgz",
|
||||||
@@ -1087,6 +1095,11 @@
|
|||||||
"es5-ext": "~0.10.14"
|
"es5-ext": "~0.10.14"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
"event-target-shim": {
|
||||||
|
"version": "5.0.1",
|
||||||
|
"resolved": "https://registry.npmjs.org/event-target-shim/-/event-target-shim-5.0.1.tgz",
|
||||||
|
"integrity": "sha512-i/2XbnSz/uxRCU6+NdVJgKWDTM427+MqYbkQzD321DuCQJUqOuJKIA0IM2+W2xtYHdKOmZ4dR6fExsd4SXL+WQ=="
|
||||||
|
},
|
||||||
"events": {
|
"events": {
|
||||||
"version": "1.1.1",
|
"version": "1.1.1",
|
||||||
"resolved": "https://registry.npmjs.org/events/-/events-1.1.1.tgz",
|
"resolved": "https://registry.npmjs.org/events/-/events-1.1.1.tgz",
|
||||||
|
|||||||
@@ -32,6 +32,8 @@
|
|||||||
"dependencies": {
|
"dependencies": {
|
||||||
"2checkout-node": "0.0.1",
|
"2checkout-node": "0.0.1",
|
||||||
"@sendgrid/mail": "^6.3.1",
|
"@sendgrid/mail": "^6.3.1",
|
||||||
|
"@supercharge/promise-pool": "^1.3.0",
|
||||||
|
"abort-controller": "^3.0.0",
|
||||||
"aws-sdk": "^2.422.0",
|
"aws-sdk": "^2.422.0",
|
||||||
"bluebird": "^3.5.5",
|
"bluebird": "^3.5.5",
|
||||||
"cheerio": "^1.0.0-rc.2",
|
"cheerio": "^1.0.0-rc.2",
|
||||||
@@ -52,8 +54,7 @@
|
|||||||
"prettier": "^1.19.1",
|
"prettier": "^1.19.1",
|
||||||
"react-step-wizard": "^5.1.0",
|
"react-step-wizard": "^5.1.0",
|
||||||
"sequelize": "^5.18.4",
|
"sequelize": "^5.18.4",
|
||||||
"sequelize-cli": "^5.5.0",
|
"sequelize-cli": "^5.5.0"
|
||||||
"@supercharge/promise-pool": "^1.3.0"
|
|
||||||
},
|
},
|
||||||
"devDependencies": {
|
"devDependencies": {
|
||||||
"nodemon": "^1.19.0"
|
"nodemon": "^1.19.0"
|
||||||
|
|||||||
Reference in New Issue
Block a user