add config files for Prostor agency
This commit is contained in:
@@ -3,8 +3,10 @@ require("dotenv").config({ path: __dirname + "/./../../.env" });
|
|||||||
|
|
||||||
const OLX_CONFIG = require("./specificConfigs/olx");
|
const OLX_CONFIG = require("./specificConfigs/olx");
|
||||||
const RENTAL_CONFIG = require("./specificConfigs/rental");
|
const RENTAL_CONFIG = require("./specificConfigs/rental");
|
||||||
|
const PROSTOR_CONFIG = require("./specificConfigs/prostor");
|
||||||
|
|
||||||
module.exports = {
|
module.exports = {
|
||||||
OLX_CONFIG,
|
OLX_CONFIG,
|
||||||
RENTAL_CONFIG
|
RENTAL_CONFIG,
|
||||||
|
PROSTOR_CONFIG
|
||||||
};
|
};
|
||||||
|
|||||||
33
app/crawler/specificConfigs/prostor.js
Normal file
33
app/crawler/specificConfigs/prostor.js
Normal file
@@ -0,0 +1,33 @@
|
|||||||
|
"use strict";
|
||||||
|
const { CRAWLER_AD_TYPE, AD_CATEGORY } = require("../../common/enums");
|
||||||
|
|
||||||
|
const prostorCrawlerAdType =
|
||||||
|
process.env.PROSTOR_CRAWLER_AD_TYPE !== undefined
|
||||||
|
? CRAWLER_AD_TYPE[process.env.PROSTOR_CRAWLER_AD_TYPE]
|
||||||
|
: null;
|
||||||
|
|
||||||
|
const prostorParsedCrawlerAdCategories =
|
||||||
|
process.env.PROSTOR_CRAWLER_AD_CATEGORIES !== undefined
|
||||||
|
? process.env.PROSTOR_CRAWLER_AD_CATEGORIES.split(",").map(category =>
|
||||||
|
category.trim()
|
||||||
|
)
|
||||||
|
: ["FLAT", "HOUSE"];
|
||||||
|
|
||||||
|
const prostorIgnoredUsernames = [];
|
||||||
|
|
||||||
|
const transformedProstorCrawlerAdCategories = prostorParsedCrawlerAdCategories
|
||||||
|
.map(categoryName =>
|
||||||
|
AD_CATEGORY[categoryName] ? AD_CATEGORY[categoryName].id : undefined
|
||||||
|
)
|
||||||
|
.filter(category => !!category);
|
||||||
|
|
||||||
|
module.exports = {
|
||||||
|
PROSTOR_MAX_PAGES: parseInt(process.env.PROSTOR_MAX_PAGES) || 100,
|
||||||
|
PROSTOR_MAX_RESULTS_PER_PAGE:
|
||||||
|
parseInt(process.env.PROSTOR_MAX_RESULTS_PER_PAGE) || 50,
|
||||||
|
PROSTOR_CRAWLER_AD_TYPE: prostorCrawlerAdType || CRAWLER_AD_TYPE.NONE,
|
||||||
|
PROSTOR_CRAWLER_AD_CATEGORIES: transformedProstorCrawlerAdCategories,
|
||||||
|
PROSTOR_IGNORED_USERNAMES: prostorIgnoredUsernames || [],
|
||||||
|
PROSTOR_DELAY_BETWEEN_PAGES:
|
||||||
|
parseInt(process.env.PROSTOR_DELAY_BETWEEN_PAGES) || 1000
|
||||||
|
};
|
||||||
@@ -37,3 +37,10 @@ RENTAL_CRAWLER_AD_TYPE=enum name of what type of ads should be crawled, check co
|
|||||||
RENTAL_CRAWLER_AD_CATEGORIES=comma separated list of enum names of categories to be included, check common/enums.js file for valid values
|
RENTAL_CRAWLER_AD_CATEGORIES=comma separated list of enum names of categories to be included, check common/enums.js file for valid values
|
||||||
RENTAL_IGNORED_USERNAMES=!!! This is not used for rental crawler !!!
|
RENTAL_IGNORED_USERNAMES=!!! This is not used for rental crawler !!!
|
||||||
RENTAL_DELAY_BETWEEN_PAGES=time in miliseconds to wait before indexing next page
|
RENTAL_DELAY_BETWEEN_PAGES=time in miliseconds to wait before indexing next page
|
||||||
|
#==PROSTOR==
|
||||||
|
PROSTOR_MAX_PAGES=Restrict crawler to this number of pages
|
||||||
|
PROSTOR_MAX_RESULTS_PER_PAGE=Only this number or less results from one page will be scraped and saved
|
||||||
|
PROSTOR_CRAWLER_AD_TYPE=enum name of what type of ads should be crawled, check common/enums.js file for valid values
|
||||||
|
PROSTOR_CRAWLER_AD_CATEGORIES=comma separated list of enum names of categories to be included, check common/enums.js file for valid values
|
||||||
|
PROSTOR_IGNORED_USERNAMES=!!! This is not used for prostor crawler !!!
|
||||||
|
PROSTOR_DELAY_BETWEEN_PAGES=time in miliseconds to wait before indexing next page
|
||||||
|
|||||||
Reference in New Issue
Block a user