From 6fc4218e39db29ca11aa4acc794bbb50904b8206 Mon Sep 17 00:00:00 2001 From: Bilal Catic Date: Thu, 24 Oct 2019 17:11:12 +0200 Subject: [PATCH] add config files for Prostor agency --- app/crawler/crawlerConfig.js | 4 +++- app/crawler/specificConfigs/prostor.js | 33 ++++++++++++++++++++++++++ development.env | 7 ++++++ 3 files changed, 43 insertions(+), 1 deletion(-) create mode 100644 app/crawler/specificConfigs/prostor.js diff --git a/app/crawler/crawlerConfig.js b/app/crawler/crawlerConfig.js index 29c68f2..1818ccb 100644 --- a/app/crawler/crawlerConfig.js +++ b/app/crawler/crawlerConfig.js @@ -3,8 +3,10 @@ require("dotenv").config({ path: __dirname + "/./../../.env" }); const OLX_CONFIG = require("./specificConfigs/olx"); const RENTAL_CONFIG = require("./specificConfigs/rental"); +const PROSTOR_CONFIG = require("./specificConfigs/prostor"); module.exports = { OLX_CONFIG, - RENTAL_CONFIG + RENTAL_CONFIG, + PROSTOR_CONFIG }; diff --git a/app/crawler/specificConfigs/prostor.js b/app/crawler/specificConfigs/prostor.js new file mode 100644 index 0000000..cde72d0 --- /dev/null +++ b/app/crawler/specificConfigs/prostor.js @@ -0,0 +1,33 @@ +"use strict"; +const { CRAWLER_AD_TYPE, AD_CATEGORY } = require("../../common/enums"); + +const prostorCrawlerAdType = + process.env.PROSTOR_CRAWLER_AD_TYPE !== undefined + ? CRAWLER_AD_TYPE[process.env.PROSTOR_CRAWLER_AD_TYPE] + : null; + +const prostorParsedCrawlerAdCategories = + process.env.PROSTOR_CRAWLER_AD_CATEGORIES !== undefined + ? process.env.PROSTOR_CRAWLER_AD_CATEGORIES.split(",").map(category => + category.trim() + ) + : ["FLAT", "HOUSE"]; + +const prostorIgnoredUsernames = []; + +const transformedProstorCrawlerAdCategories = prostorParsedCrawlerAdCategories + .map(categoryName => + AD_CATEGORY[categoryName] ? AD_CATEGORY[categoryName].id : undefined + ) + .filter(category => !!category); + +module.exports = { + PROSTOR_MAX_PAGES: parseInt(process.env.PROSTOR_MAX_PAGES) || 100, + PROSTOR_MAX_RESULTS_PER_PAGE: + parseInt(process.env.PROSTOR_MAX_RESULTS_PER_PAGE) || 50, + PROSTOR_CRAWLER_AD_TYPE: prostorCrawlerAdType || CRAWLER_AD_TYPE.NONE, + PROSTOR_CRAWLER_AD_CATEGORIES: transformedProstorCrawlerAdCategories, + PROSTOR_IGNORED_USERNAMES: prostorIgnoredUsernames || [], + PROSTOR_DELAY_BETWEEN_PAGES: + parseInt(process.env.PROSTOR_DELAY_BETWEEN_PAGES) || 1000 +}; diff --git a/development.env b/development.env index 656ed15..fd6dd30 100644 --- a/development.env +++ b/development.env @@ -37,3 +37,10 @@ RENTAL_CRAWLER_AD_TYPE=enum name of what type of ads should be crawled, check co RENTAL_CRAWLER_AD_CATEGORIES=comma separated list of enum names of categories to be included, check common/enums.js file for valid values RENTAL_IGNORED_USERNAMES=!!! This is not used for rental crawler !!! RENTAL_DELAY_BETWEEN_PAGES=time in miliseconds to wait before indexing next page +#==PROSTOR== +PROSTOR_MAX_PAGES=Restrict crawler to this number of pages +PROSTOR_MAX_RESULTS_PER_PAGE=Only this number or less results from one page will be scraped and saved +PROSTOR_CRAWLER_AD_TYPE=enum name of what type of ads should be crawled, check common/enums.js file for valid values +PROSTOR_CRAWLER_AD_CATEGORIES=comma separated list of enum names of categories to be included, check common/enums.js file for valid values +PROSTOR_IGNORED_USERNAMES=!!! This is not used for prostor crawler !!! +PROSTOR_DELAY_BETWEEN_PAGES=time in miliseconds to wait before indexing next page