add config files for Prostor agency
This commit is contained in:
@@ -3,8 +3,10 @@ require("dotenv").config({ path: __dirname + "/./../../.env" });
|
||||
|
||||
const OLX_CONFIG = require("./specificConfigs/olx");
|
||||
const RENTAL_CONFIG = require("./specificConfigs/rental");
|
||||
const PROSTOR_CONFIG = require("./specificConfigs/prostor");
|
||||
|
||||
module.exports = {
|
||||
OLX_CONFIG,
|
||||
RENTAL_CONFIG
|
||||
RENTAL_CONFIG,
|
||||
PROSTOR_CONFIG
|
||||
};
|
||||
|
||||
33
app/crawler/specificConfigs/prostor.js
Normal file
33
app/crawler/specificConfigs/prostor.js
Normal file
@@ -0,0 +1,33 @@
|
||||
"use strict";
|
||||
const { CRAWLER_AD_TYPE, AD_CATEGORY } = require("../../common/enums");
|
||||
|
||||
const prostorCrawlerAdType =
|
||||
process.env.PROSTOR_CRAWLER_AD_TYPE !== undefined
|
||||
? CRAWLER_AD_TYPE[process.env.PROSTOR_CRAWLER_AD_TYPE]
|
||||
: null;
|
||||
|
||||
const prostorParsedCrawlerAdCategories =
|
||||
process.env.PROSTOR_CRAWLER_AD_CATEGORIES !== undefined
|
||||
? process.env.PROSTOR_CRAWLER_AD_CATEGORIES.split(",").map(category =>
|
||||
category.trim()
|
||||
)
|
||||
: ["FLAT", "HOUSE"];
|
||||
|
||||
const prostorIgnoredUsernames = [];
|
||||
|
||||
const transformedProstorCrawlerAdCategories = prostorParsedCrawlerAdCategories
|
||||
.map(categoryName =>
|
||||
AD_CATEGORY[categoryName] ? AD_CATEGORY[categoryName].id : undefined
|
||||
)
|
||||
.filter(category => !!category);
|
||||
|
||||
module.exports = {
|
||||
PROSTOR_MAX_PAGES: parseInt(process.env.PROSTOR_MAX_PAGES) || 100,
|
||||
PROSTOR_MAX_RESULTS_PER_PAGE:
|
||||
parseInt(process.env.PROSTOR_MAX_RESULTS_PER_PAGE) || 50,
|
||||
PROSTOR_CRAWLER_AD_TYPE: prostorCrawlerAdType || CRAWLER_AD_TYPE.NONE,
|
||||
PROSTOR_CRAWLER_AD_CATEGORIES: transformedProstorCrawlerAdCategories,
|
||||
PROSTOR_IGNORED_USERNAMES: prostorIgnoredUsernames || [],
|
||||
PROSTOR_DELAY_BETWEEN_PAGES:
|
||||
parseInt(process.env.PROSTOR_DELAY_BETWEEN_PAGES) || 1000
|
||||
};
|
||||
@@ -37,3 +37,10 @@ RENTAL_CRAWLER_AD_TYPE=enum name of what type of ads should be crawled, check co
|
||||
RENTAL_CRAWLER_AD_CATEGORIES=comma separated list of enum names of categories to be included, check common/enums.js file for valid values
|
||||
RENTAL_IGNORED_USERNAMES=!!! This is not used for rental crawler !!!
|
||||
RENTAL_DELAY_BETWEEN_PAGES=time in miliseconds to wait before indexing next page
|
||||
#==PROSTOR==
|
||||
PROSTOR_MAX_PAGES=Restrict crawler to this number of pages
|
||||
PROSTOR_MAX_RESULTS_PER_PAGE=Only this number or less results from one page will be scraped and saved
|
||||
PROSTOR_CRAWLER_AD_TYPE=enum name of what type of ads should be crawled, check common/enums.js file for valid values
|
||||
PROSTOR_CRAWLER_AD_CATEGORIES=comma separated list of enum names of categories to be included, check common/enums.js file for valid values
|
||||
PROSTOR_IGNORED_USERNAMES=!!! This is not used for prostor crawler !!!
|
||||
PROSTOR_DELAY_BETWEEN_PAGES=time in miliseconds to wait before indexing next page
|
||||
|
||||
Reference in New Issue
Block a user