use function generator to index pages; crawl in parallel
This commit is contained in:
@@ -2,29 +2,29 @@
|
||||
require("dotenv").config({ path: "../../.env" });
|
||||
const { CRAWLER_AD_TYPE, AD_CATEGORY } = require("../common/enums");
|
||||
|
||||
const crawlerAdType =
|
||||
const olxCrawlerAdType =
|
||||
process.env.OLX_CRAWLER_AD_TYPE !== undefined
|
||||
? CRAWLER_AD_TYPE[process.env.OLX_CRAWLER_AD_TYPE]
|
||||
: null;
|
||||
|
||||
const parsedCrawlerAdCategories =
|
||||
const olxParsedCrawlerAdCategories =
|
||||
process.env.OLX_CRAWLER_AD_CATEGORIES !== undefined
|
||||
? process.env.OLX_CRAWLER_AD_CATEGORIES.split(",").map(category =>
|
||||
category.trim()
|
||||
)
|
||||
: ["CATEGORY_FLAT", "CATEGORY_HOUSE"];
|
||||
|
||||
const transformedCrawlerAdCategories = parsedCrawlerAdCategories
|
||||
const transformedCrawlerAdCategories = olxParsedCrawlerAdCategories
|
||||
.map(categoryName => AD_CATEGORY[categoryName])
|
||||
.filter(category => !!category);
|
||||
|
||||
const OLX_CONFIG = {
|
||||
OLX_START_PAGE: parseInt(process.env.OLX_START_PAGE) || 1,
|
||||
OLX_END_PAGE: parseInt(process.env.OLX_END_PAGE) || 10,
|
||||
OLX_MAX_PAGES: parseInt(process.env.MAX_PAGES) || 500,
|
||||
OLX_MAX_RESULTS_PER_PAGE:
|
||||
parseInt(process.env.OLX_MAX_RESULTS_PER_PAGE) || 50,
|
||||
OLX_CRAWLER_AD_TYPE: crawlerAdType || CRAWLER_AD_TYPE.NONE,
|
||||
OLX_CRAWLER_AD_CATEGORIES: transformedCrawlerAdCategories
|
||||
OLX_CRAWLER_AD_TYPE: olxCrawlerAdType || CRAWLER_AD_TYPE.NONE,
|
||||
OLX_CRAWLER_AD_CATEGORIES: transformedCrawlerAdCategories,
|
||||
OLX_MAX_AGE: parseInt(process.env.OLX_MAX_AGE) || 30
|
||||
};
|
||||
|
||||
module.exports = {
|
||||
|
||||
Reference in New Issue
Block a user