Add caching to fetch wrapper
This commit is contained in:
@@ -159,7 +159,7 @@ class AktidoCrawler {
|
|||||||
}
|
}
|
||||||
|
|
||||||
try {
|
try {
|
||||||
const res = await fetch(url);
|
const res = await fetch(url, {}, false);
|
||||||
const body = await res.text();
|
const body = await res.text();
|
||||||
const $ = cheerio.load(body);
|
const $ = cheerio.load(body);
|
||||||
let hrefs = [];
|
let hrefs = [];
|
||||||
|
|||||||
@@ -217,7 +217,7 @@ class OlxCrawler {
|
|||||||
}
|
}
|
||||||
|
|
||||||
try {
|
try {
|
||||||
const res = await fetch(url);
|
const res = await fetch(url, {}, false);
|
||||||
logDebug("Got category results for: ", url);
|
logDebug("Got category results for: ", url);
|
||||||
const body = await res.text();
|
const body = await res.text();
|
||||||
logDebug("Got category results text for: ", url);
|
logDebug("Got category results text for: ", url);
|
||||||
|
|||||||
@@ -159,7 +159,7 @@ class RentalCrawler {
|
|||||||
}
|
}
|
||||||
|
|
||||||
try {
|
try {
|
||||||
const res = await fetch(url);
|
const res = await fetch(url, {} , false);
|
||||||
const body = await res.text();
|
const body = await res.text();
|
||||||
const $ = cheerio.load(body);
|
const $ = cheerio.load(body);
|
||||||
let hrefs = [];
|
let hrefs = [];
|
||||||
|
|||||||
@@ -160,7 +160,7 @@ class SaljicCrawler {
|
|||||||
}
|
}
|
||||||
|
|
||||||
try {
|
try {
|
||||||
const res = await fetch(url);
|
const res = await fetch(url, {}, false);
|
||||||
const body = await res.text();
|
const body = await res.text();
|
||||||
const $ = cheerio.load(body);
|
const $ = cheerio.load(body);
|
||||||
let hrefs = [];
|
let hrefs = [];
|
||||||
|
|||||||
@@ -1,5 +1,9 @@
|
|||||||
const nodeFetch = require("node-fetch");
|
const nodeFetch = require("node-fetch");
|
||||||
const AbortController = require('abort-controller');
|
const AbortController = require('abort-controller');
|
||||||
|
const FetchCache = require('@sozialhelden/fetch-cache').default;
|
||||||
|
|
||||||
|
console.log("Fc ", FetchCache)
|
||||||
|
|
||||||
const {
|
const {
|
||||||
USER_AGENT,
|
USER_AGENT,
|
||||||
USE_SCRAPER_API,
|
USE_SCRAPER_API,
|
||||||
@@ -12,7 +16,20 @@ const timeout = (ms) => {
|
|||||||
return new Promise(resolve => setTimeout(resolve, ms));
|
return new Promise(resolve => setTimeout(resolve, ms));
|
||||||
}
|
}
|
||||||
|
|
||||||
const fetch = async (url, options = {}) => {
|
const fetchCache = new FetchCache({
|
||||||
|
fetch: nodeFetch,
|
||||||
|
cacheOptions: {
|
||||||
|
// Don't save more than 100 responses in the cache. Allows infinite responses by default
|
||||||
|
maximalItemCount: 10000,
|
||||||
|
// When should the cache evict responses when its full?
|
||||||
|
evictExceedingItemsBy: 'age', // Valid values: 'lru' or 'age'
|
||||||
|
defaultTTL: 10 * 60 * 1000 // 10 minutes
|
||||||
|
// ...see https://github.com/sozialhelden/hamster-cache for all possible options
|
||||||
|
},
|
||||||
|
});
|
||||||
|
|
||||||
|
|
||||||
|
const fetch = async (url, options = {}, useCache = true) => {
|
||||||
const controller = new AbortController();
|
const controller = new AbortController();
|
||||||
|
|
||||||
const newOptions = Object.assign({}, options);
|
const newOptions = Object.assign({}, options);
|
||||||
@@ -32,7 +49,8 @@ const fetch = async (url, options = {}) => {
|
|||||||
const urlAdaptedForScraping = USE_SCRAPER_API
|
const urlAdaptedForScraping = USE_SCRAPER_API
|
||||||
? `${SCRAPER_API_BASE_URL}?api_key=${SCRAPER_API_KEY}&url=${urlToFetchThroughAPI}`
|
? `${SCRAPER_API_BASE_URL}?api_key=${SCRAPER_API_KEY}&url=${urlToFetchThroughAPI}`
|
||||||
: url;
|
: url;
|
||||||
const result = nodeFetch(urlAdaptedForScraping, newOptions);
|
const result = useCache ? fetchCache.fetch(urlAdaptedForScraping, newOptions) : nodeFetch(urlAdaptedForScraping, newOptions);
|
||||||
|
|
||||||
const timeoutId = setTimeout(() => controller.abort(), NODE_FETCH_TIMEOUT_MS);
|
const timeoutId = setTimeout(() => controller.abort(), NODE_FETCH_TIMEOUT_MS);
|
||||||
return result;
|
return result;
|
||||||
};
|
};
|
||||||
|
|||||||
13
package-lock.json
generated
13
package-lock.json
generated
@@ -40,6 +40,19 @@
|
|||||||
"@sendgrid/helpers": "^6.3.0"
|
"@sendgrid/helpers": "^6.3.0"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
"@sozialhelden/fetch-cache": {
|
||||||
|
"version": "2.0.1",
|
||||||
|
"resolved": "https://registry.npmjs.org/@sozialhelden/fetch-cache/-/fetch-cache-2.0.1.tgz",
|
||||||
|
"integrity": "sha512-vMlsdT5JQCGjx1fcFxmMNh7ZKppjjsfUAeZEhhNwhEL7GaqbZXsD1OXEyx2IcRa25ZuZtvJSV6Q3rE77VRdLvg==",
|
||||||
|
"requires": {
|
||||||
|
"@sozialhelden/hamster-cache": "^1.0.0"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"@sozialhelden/hamster-cache": {
|
||||||
|
"version": "1.0.0",
|
||||||
|
"resolved": "https://registry.npmjs.org/@sozialhelden/hamster-cache/-/hamster-cache-1.0.0.tgz",
|
||||||
|
"integrity": "sha512-/TEGA8mdMawZp4Yq/GrkL+72YL5EGuSeVXC3pKW12YY1t3C+zCN/HZ0HRp4zWF/e67svXcxuz/B0AEQxEdvi7A=="
|
||||||
|
},
|
||||||
"@supercharge/goodies": {
|
"@supercharge/goodies": {
|
||||||
"version": "1.4.0",
|
"version": "1.4.0",
|
||||||
"resolved": "https://registry.npmjs.org/@supercharge/goodies/-/goodies-1.4.0.tgz",
|
"resolved": "https://registry.npmjs.org/@supercharge/goodies/-/goodies-1.4.0.tgz",
|
||||||
|
|||||||
@@ -32,6 +32,7 @@
|
|||||||
"dependencies": {
|
"dependencies": {
|
||||||
"2checkout-node": "0.0.1",
|
"2checkout-node": "0.0.1",
|
||||||
"@sendgrid/mail": "^6.3.1",
|
"@sendgrid/mail": "^6.3.1",
|
||||||
|
"@sozialhelden/fetch-cache": "^2.0.1",
|
||||||
"@supercharge/promise-pool": "^1.3.0",
|
"@supercharge/promise-pool": "^1.3.0",
|
||||||
"abort-controller": "^3.0.0",
|
"abort-controller": "^3.0.0",
|
||||||
"aws-sdk": "^2.422.0",
|
"aws-sdk": "^2.422.0",
|
||||||
|
|||||||
Reference in New Issue
Block a user