From 2b1cbcaa4752422887bf0888251669137be1b8f8 Mon Sep 17 00:00:00 2001 From: = Date: Wed, 16 Sep 2020 06:16:49 -0700 Subject: [PATCH] Cleanup after debugging --- Procfile | 2 +- app/config/appConfig.js | 4 +- app/crawler/crawl.js | 73 ++++++++++++++++++------------------- app/helpers/fetchWrapper.js | 5 ++- 4 files changed, 43 insertions(+), 41 deletions(-) diff --git a/Procfile b/Procfile index abdde38..65423b4 100644 --- a/Procfile +++ b/Procfile @@ -1 +1 @@ -web: node --inspect ./index.js +web: node ./index.js diff --git a/app/config/appConfig.js b/app/config/appConfig.js index 7ed2ada..9563ec2 100644 --- a/app/config/appConfig.js +++ b/app/config/appConfig.js @@ -48,6 +48,7 @@ const USER_AGENT = const USE_SCRAPER_API = process.env.USE_SCRAPER_API === undefined ? 1 : parseInt(process.env.USE_SCRAPER_API); const SCRAPER_API_KEY = process.env.SCRAPER_API_KEY || ""; const SCRAPER_API_BASE_URL = process.env.SCRAPER_API_BASE_URL || ""; +const NODE_FETCH_TIMEOUT_MS = process.env.NODE_FETCH_TIMEOUT_MS || 60000 module.exports = { APP_PORT, @@ -66,5 +67,6 @@ module.exports = { USER_AGENT, USE_SCRAPER_API, SCRAPER_API_KEY, - SCRAPER_API_BASE_URL + SCRAPER_API_BASE_URL, + NODE_FETCH_TIMEOUT_MS }; diff --git a/app/crawler/crawl.js b/app/crawler/crawl.js index 9aafc75..e4a66cf 100644 --- a/app/crawler/crawl.js +++ b/app/crawler/crawl.js @@ -33,44 +33,43 @@ async function crawlAll() { OLX_CONFIG.OLX_MAX_RESULTS_PER_PAGE, OLX_CONFIG.OLX_IGNORED_USERNAMES, OLX_CONFIG.OLX_DELAY_BETWEEN_PAGES + ), + new RentalCrawler( + [postgresSaver], + RENTAL_CONFIG.RENTAL_CRAWLER_AD_TYPE, + RENTAL_CONFIG.RENTAL_CRAWLER_AD_CATEGORIES, + RENTAL_CONFIG.RENTAL_MAX_PAGES, + RENTAL_CONFIG.RENTAL_MAX_RESULTS_PER_PAGE, + RENTAL_CONFIG.RENTAL_IGNORED_USERNAMES, + RENTAL_CONFIG.RENTAL_DELAY_BETWEEN_PAGES + ), + new ProstorCrawler( + [postgresSaver], + PROSTOR_CONFIG.PROSTOR_CRAWLER_AD_TYPE, + PROSTOR_CONFIG.PROSTOR_CRAWLER_AD_CATEGORIES, + PROSTOR_CONFIG.PROSTOR_MAX_PAGES, + PROSTOR_CONFIG.PROSTOR_MAX_RESULTS_PER_PAGE, + PROSTOR_CONFIG.PROSTOR_IGNORED_USERNAMES, + PROSTOR_CONFIG.PROSTOR_DELAY_BETWEEN_PAGES + ), + new AktidoCrawler( + [postgresSaver], + AKTIDO_CONFIG.AKTIDO_CRAWLER_AD_TYPE, + AKTIDO_CONFIG.AKTIDO_CRAWLER_AD_CATEGORIES, + AKTIDO_CONFIG.AKTIDO_MAX_PAGES, + AKTIDO_CONFIG.AKTIDO_MAX_RESULTS_PER_PAGE, + AKTIDO_CONFIG.AKTIDO_IGNORED_USERNAMES, + AKTIDO_CONFIG.AKTIDO_DELAY_BETWEEN_PAGES + ), + new SaljicCrawler( + [postgresSaver], + SALJIC_CONFIG.SALJIC_CRAWLER_AD_TYPE, + SALJIC_CONFIG.SALJIC_CRAWLER_AD_CATEGORIES, + SALJIC_CONFIG.SALJIC_MAX_PAGES, + SALJIC_CONFIG.SALJIC_MAX_RESULTS_PER_PAGE, + SALJIC_CONFIG.SALJIC_IGNORED_USERNAMES, + SALJIC_CONFIG.SALJIC_DELAY_BETWEEN_PAGES ) - //new RentalCrawler( - //[postgresSaver], - //RENTAL_CONFIG.RENTAL_CRAWLER_AD_TYPE, - //RENTAL_CONFIG.RENTAL_CRAWLER_AD_CATEGORIES, - //RENTAL_CONFIG.RENTAL_MAX_PAGES, - //RENTAL_CONFIG.RENTAL_MAX_RESULTS_PER_PAGE, - //RENTAL_CONFIG.RENTAL_IGNORED_USERNAMES, - //RENTAL_CONFIG.RENTAL_DELAY_BETWEEN_PAGES - //), - //new ProstorCrawler( - //[postgresSaver], - //PROSTOR_CONFIG.PROSTOR_CRAWLER_AD_TYPE, - //PROSTOR_CONFIG.PROSTOR_CRAWLER_AD_CATEGORIES, - //PROSTOR_CONFIG.PROSTOR_MAX_PAGES, - //PROSTOR_CONFIG.PROSTOR_MAX_RESULTS_PER_PAGE, - //PROSTOR_CONFIG.PROSTOR_IGNORED_USERNAMES, - //PROSTOR_CONFIG.PROSTOR_DELAY_BETWEEN_PAGES - //), - //new AktidoCrawler( - //[postgresSaver], - //AKTIDO_CONFIG.AKTIDO_CRAWLER_AD_TYPE, - //AKTIDO_CONFIG.AKTIDO_CRAWLER_AD_CATEGORIES, - //AKTIDO_CONFIG.AKTIDO_MAX_PAGES, - //AKTIDO_CONFIG.AKTIDO_MAX_RESULTS_PER_PAGE, - //AKTIDO_CONFIG.AKTIDO_IGNORED_USERNAMES, - //AKTIDO_CONFIG.AKTIDO_DELAY_BETWEEN_PAGES - //) - //, - //new SaljicCrawler( - //[postgresSaver], - //SALJIC_CONFIG.SALJIC_CRAWLER_AD_TYPE, - //SALJIC_CONFIG.SALJIC_CRAWLER_AD_CATEGORIES, - //SALJIC_CONFIG.SALJIC_MAX_PAGES, - //SALJIC_CONFIG.SALJIC_MAX_RESULTS_PER_PAGE, - //SALJIC_CONFIG.SALJIC_IGNORED_USERNAMES, - //SALJIC_CONFIG.SALJIC_DELAY_BETWEEN_PAGES - //) ]; const newRealEstates = []; diff --git a/app/helpers/fetchWrapper.js b/app/helpers/fetchWrapper.js index f88b151..c2abf6c 100644 --- a/app/helpers/fetchWrapper.js +++ b/app/helpers/fetchWrapper.js @@ -4,7 +4,8 @@ const { USER_AGENT, USE_SCRAPER_API, SCRAPER_API_KEY, - SCRAPER_API_BASE_URL + SCRAPER_API_BASE_URL, + NODE_FETCH_TIMEOUT_MS } = require("../config/appConfig"); const timeout = (ms) => { @@ -32,7 +33,7 @@ const fetch = async (url, options = {}) => { ? `${SCRAPER_API_BASE_URL}?api_key=${SCRAPER_API_KEY}&url=${urlToFetchThroughAPI}` : url; const result = nodeFetch(urlAdaptedForScraping, newOptions); - const timeoutId = setTimeout(() => controller.abort(), 20000); + const timeoutId = setTimeout(() => controller.abort(), NODE_FETCH_TIMEOUT_MS); return result; };