add delay between pages config variable

This commit is contained in:
Bilal Catic
2019-09-25 08:31:37 +02:00
parent 90bc57edb6
commit f93d0e738f
3 changed files with 4 additions and 3 deletions

View File

@@ -34,7 +34,6 @@ const CRAWLER_AD_TYPE = {
module.exports = {
AD_TYPE,
IGNORED_USERNAMES,
AD_CATEGORY,
AD_STATUS,
AD_AGENCY,

View File

@@ -31,7 +31,8 @@ const OLX_CONFIG = {
parseInt(process.env.OLX_MAX_RESULTS_PER_PAGE) || 50,
OLX_CRAWLER_AD_TYPE: olxCrawlerAdType || CRAWLER_AD_TYPE.NONE,
OLX_CRAWLER_AD_CATEGORIES: transformedCrawlerAdCategories,
OLX_IGNORED_USERNAMES: olxIgnoredUsernames || []
OLX_IGNORED_USERNAMES: olxIgnoredUsernames || [],
OLX_DELAY_BETWEEN_PAGES: parseInt(process.env.OLX_DELAY_BETWEEN_PAGES) || 1000
};
module.exports = {

View File

@@ -20,4 +20,5 @@ OLX_MAX_PAGES=Restrict crawler to this number of pages
OLX_MAX_RESULTS_PER_PAGE=Only this number or less results from one page will be scraped and saved
OLX_CRAWLER_AD_TYPE=enum name of what type of ads should be crawled, check common/enums.js file for valid values
OLX_CRAWLER_AD_CATEGORIES=comma separated list of enum names of categories to be included, check common/enums.js file for valid values
OLX_MAX_AGE=[in days] if ad is crawled before this number of days, it will be re-crawled
OLX_IGNORED_USERNAMES=comma separated list of usernames to ignore
OLX_DELAY_BETWEEN_PAGES=time in miliseconds to wait before indexing next page