diff --git a/app/config/appConfig.js b/app/config/appConfig.js index 8a11650..0ed1e6b 100644 --- a/app/config/appConfig.js +++ b/app/config/appConfig.js @@ -28,6 +28,8 @@ const MAX_REAL_ESTATES_IN_EMAIL = const MAX_REAL_ESTATES_IN_FIRST_EMAIL = parseInt(process.env.MAX_REAL_ESTATES_IN_FIRST_EMAIL) || 5; +const PRINT_CRAWLER_DEBUG = process.env.PRINT_CRAWLER_DEBUG_INFO || 0; + module.exports = { APP_PORT, APP_URL, @@ -36,5 +38,6 @@ module.exports = { STOP_CRAWLER, AWS_EMAIL_CONFIG, MAX_REAL_ESTATES_IN_EMAIL, - MAX_REAL_ESTATES_IN_FIRST_EMAIL + MAX_REAL_ESTATES_IN_FIRST_EMAIL, + PRINT_CRAWLER_DEBUG }; diff --git a/app/crawler/specificCrawlers/olx.js b/app/crawler/specificCrawlers/olx.js index 526db05..d7176d1 100644 --- a/app/crawler/specificCrawlers/olx.js +++ b/app/crawler/specificCrawlers/olx.js @@ -13,7 +13,10 @@ const { CRAWLER_AD_TYPE } = require("../../common/enums"); -const { DEFAULT_TIMEZONE } = require("../../config/appConfig"); +const { + DEFAULT_TIMEZONE, + PRINT_CRAWLER_DEBUG +} = require("../../config/appConfig"); const OLX_ENUMS = { OLX_AD_TYPE: { @@ -156,6 +159,10 @@ class OlxCrawler { } async indexSinglePage(url, maxResultsPerPage) { + if (PRINT_CRAWLER_DEBUG) { + console.log("[OLX] Index page : ", url); + } + try { const res = await fetch(url); const body = await res.text(); diff --git a/app/crawler/specificCrawlers/prostor.js b/app/crawler/specificCrawlers/prostor.js index 16bcce1..bb3133c 100644 --- a/app/crawler/specificCrawlers/prostor.js +++ b/app/crawler/specificCrawlers/prostor.js @@ -11,6 +11,8 @@ const { CRAWLER_AD_TYPE } = require("../../common/enums"); +const { PRINT_CRAWLER_DEBUG } = require("../../config/appConfig"); + const PROSTOR_ENUMS = { PROSTOR_AD_TYPE: { [CRAWLER_AD_TYPE.ALL]: "&action=0", @@ -78,7 +80,9 @@ class ProstorCrawler { } async extractRealEstates(url) { - console.log("[PROSTOR] Index page : ", url); + if (PRINT_CRAWLER_DEBUG) { + console.log("[PROSTOR] Index page : ", url); + } try { const res = await fetch(url); diff --git a/app/crawler/specificCrawlers/rental.js b/app/crawler/specificCrawlers/rental.js index a29d772..45e572d 100644 --- a/app/crawler/specificCrawlers/rental.js +++ b/app/crawler/specificCrawlers/rental.js @@ -14,7 +14,10 @@ const { CRAWLER_AD_TYPE } = require("../../common/enums"); -const { DEFAULT_TIMEZONE } = require("../../config/appConfig"); +const { + DEFAULT_TIMEZONE, + PRINT_CRAWLER_DEBUG +} = require("../../config/appConfig"); const RENTAL_ENUMS = { RENTAL_AD_TYPE: { @@ -159,7 +162,9 @@ class RentalCrawler { } async indexSinglePage(url, maxResultsPerPage) { - // console.log("[RENTAL] Index page : ", url); + if (PRINT_CRAWLER_DEBUG) { + console.log("[RENTAL] Index page : ", url); + } try { const res = await fetch(url); diff --git a/development.env b/development.env index 6713fcd..0bd2939 100644 --- a/development.env +++ b/development.env @@ -23,6 +23,7 @@ SOURCE_EMAIL=info@saburly.com #=============== CRAWLER SETTINGS===============# CRAWLER_INTERVAL=Interval to run cralwer(s), in seconds STOP_CRAWLER=Non-zero value will skip crawler execution +PRINT_CRAWLER_DEBUG_INFO=Non-zero value will print crawler debugging info to the server console #==OLX== OLX_MAX_PAGES=Restrict crawler to this number of pages OLX_MAX_RESULTS_PER_PAGE=Only this number or less results from one page will be scraped and saved