diff --git a/app/crawler/specificCrawlers/aktido.js b/app/crawler/specificCrawlers/aktido.js index ac4dcc6..cd492bc 100644 --- a/app/crawler/specificCrawlers/aktido.js +++ b/app/crawler/specificCrawlers/aktido.js @@ -159,7 +159,7 @@ class AktidoCrawler { } try { - const res = await fetch(url); + const res = await fetch(url, {}, false); const body = await res.text(); const $ = cheerio.load(body); let hrefs = []; diff --git a/app/crawler/specificCrawlers/olx.js b/app/crawler/specificCrawlers/olx.js index 6ee1963..6b56428 100644 --- a/app/crawler/specificCrawlers/olx.js +++ b/app/crawler/specificCrawlers/olx.js @@ -217,7 +217,7 @@ class OlxCrawler { } try { - const res = await fetch(url); + const res = await fetch(url, {}, false); logDebug("Got category results for: ", url); const body = await res.text(); logDebug("Got category results text for: ", url); diff --git a/app/crawler/specificCrawlers/rental.js b/app/crawler/specificCrawlers/rental.js index 292885c..855814b 100644 --- a/app/crawler/specificCrawlers/rental.js +++ b/app/crawler/specificCrawlers/rental.js @@ -159,7 +159,7 @@ class RentalCrawler { } try { - const res = await fetch(url); + const res = await fetch(url, {} , false); const body = await res.text(); const $ = cheerio.load(body); let hrefs = []; diff --git a/app/crawler/specificCrawlers/saljic.js b/app/crawler/specificCrawlers/saljic.js index 44adb20..be1e787 100644 --- a/app/crawler/specificCrawlers/saljic.js +++ b/app/crawler/specificCrawlers/saljic.js @@ -160,7 +160,7 @@ class SaljicCrawler { } try { - const res = await fetch(url); + const res = await fetch(url, {}, false); const body = await res.text(); const $ = cheerio.load(body); let hrefs = []; diff --git a/app/helpers/fetchWrapper.js b/app/helpers/fetchWrapper.js index c2abf6c..91bb58a 100644 --- a/app/helpers/fetchWrapper.js +++ b/app/helpers/fetchWrapper.js @@ -1,5 +1,9 @@ const nodeFetch = require("node-fetch"); const AbortController = require('abort-controller'); +const FetchCache = require('@sozialhelden/fetch-cache').default; + +console.log("Fc ", FetchCache) + const { USER_AGENT, USE_SCRAPER_API, @@ -12,7 +16,20 @@ const timeout = (ms) => { return new Promise(resolve => setTimeout(resolve, ms)); } -const fetch = async (url, options = {}) => { +const fetchCache = new FetchCache({ + fetch: nodeFetch, + cacheOptions: { + // Don't save more than 100 responses in the cache. Allows infinite responses by default + maximalItemCount: 10000, + // When should the cache evict responses when its full? + evictExceedingItemsBy: 'age', // Valid values: 'lru' or 'age' + defaultTTL: 10 * 60 * 1000 // 10 minutes + // ...see https://github.com/sozialhelden/hamster-cache for all possible options + }, +}); + + +const fetch = async (url, options = {}, useCache = true) => { const controller = new AbortController(); const newOptions = Object.assign({}, options); @@ -32,7 +49,8 @@ const fetch = async (url, options = {}) => { const urlAdaptedForScraping = USE_SCRAPER_API ? `${SCRAPER_API_BASE_URL}?api_key=${SCRAPER_API_KEY}&url=${urlToFetchThroughAPI}` : url; - const result = nodeFetch(urlAdaptedForScraping, newOptions); + const result = useCache ? fetchCache.fetch(urlAdaptedForScraping, newOptions) : nodeFetch(urlAdaptedForScraping, newOptions); + const timeoutId = setTimeout(() => controller.abort(), NODE_FETCH_TIMEOUT_MS); return result; }; diff --git a/package-lock.json b/package-lock.json index 2baa071..39e6ee7 100644 --- a/package-lock.json +++ b/package-lock.json @@ -40,6 +40,19 @@ "@sendgrid/helpers": "^6.3.0" } }, + "@sozialhelden/fetch-cache": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/@sozialhelden/fetch-cache/-/fetch-cache-2.0.1.tgz", + "integrity": "sha512-vMlsdT5JQCGjx1fcFxmMNh7ZKppjjsfUAeZEhhNwhEL7GaqbZXsD1OXEyx2IcRa25ZuZtvJSV6Q3rE77VRdLvg==", + "requires": { + "@sozialhelden/hamster-cache": "^1.0.0" + } + }, + "@sozialhelden/hamster-cache": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/@sozialhelden/hamster-cache/-/hamster-cache-1.0.0.tgz", + "integrity": "sha512-/TEGA8mdMawZp4Yq/GrkL+72YL5EGuSeVXC3pKW12YY1t3C+zCN/HZ0HRp4zWF/e67svXcxuz/B0AEQxEdvi7A==" + }, "@supercharge/goodies": { "version": "1.4.0", "resolved": "https://registry.npmjs.org/@supercharge/goodies/-/goodies-1.4.0.tgz", diff --git a/package.json b/package.json index cd8ca94..36b235e 100644 --- a/package.json +++ b/package.json @@ -32,6 +32,7 @@ "dependencies": { "2checkout-node": "0.0.1", "@sendgrid/mail": "^6.3.1", + "@sozialhelden/fetch-cache": "^2.0.1", "@supercharge/promise-pool": "^1.3.0", "abort-controller": "^3.0.0", "aws-sdk": "^2.422.0",