use function generator to index pages; crawl in parallel

This commit is contained in:
Bilal Catic
2019-09-23 10:46:31 +02:00
parent c4f6c6e1c3
commit 3140fdf0c0
6 changed files with 127 additions and 77 deletions

View File

@@ -16,8 +16,8 @@ SOURCE_EMAIL=info@saburly.com
#=============== CRAWLER SETTINGS===============#
#==OLX==
OLX_START_PAGE=Crawler starts from this page
OLX_END_PAGE=Crawler ends with this page (including this page)
OLX_MAX_PAGES=Restrict crawler to this number of pages
OLX_MAX_RESULTS_PER_PAGE=Only this number or less results from one page will be scraped and saved
OLX_CRAWLER_AD_TYPE=enum name of what type of ads should be crawled, check common/enums.js file for valid values
OLX_CRAWLER_AD_CATEGORIES=comma separated list of enum names of categories to be included, check common/enums.js file for valid values
OLX_MAX_AGE=[in days] if ad is crawled before this number of days, it will be re-crawled