use function generator to index pages; crawl in parallel

This commit is contained in:
Bilal Catic
2019-09-23 10:46:31 +02:00
parent c4f6c6e1c3
commit 3140fdf0c0
6 changed files with 127 additions and 77 deletions

View File

@@ -2,29 +2,29 @@
require("dotenv").config({ path: "../../.env" });
const { CRAWLER_AD_TYPE, AD_CATEGORY } = require("../common/enums");
const crawlerAdType =
const olxCrawlerAdType =
process.env.OLX_CRAWLER_AD_TYPE !== undefined
? CRAWLER_AD_TYPE[process.env.OLX_CRAWLER_AD_TYPE]
: null;
const parsedCrawlerAdCategories =
const olxParsedCrawlerAdCategories =
process.env.OLX_CRAWLER_AD_CATEGORIES !== undefined
? process.env.OLX_CRAWLER_AD_CATEGORIES.split(",").map(category =>
category.trim()
)
: ["CATEGORY_FLAT", "CATEGORY_HOUSE"];
const transformedCrawlerAdCategories = parsedCrawlerAdCategories
const transformedCrawlerAdCategories = olxParsedCrawlerAdCategories
.map(categoryName => AD_CATEGORY[categoryName])
.filter(category => !!category);
const OLX_CONFIG = {
OLX_START_PAGE: parseInt(process.env.OLX_START_PAGE) || 1,
OLX_END_PAGE: parseInt(process.env.OLX_END_PAGE) || 10,
OLX_MAX_PAGES: parseInt(process.env.MAX_PAGES) || 500,
OLX_MAX_RESULTS_PER_PAGE:
parseInt(process.env.OLX_MAX_RESULTS_PER_PAGE) || 50,
OLX_CRAWLER_AD_TYPE: crawlerAdType || CRAWLER_AD_TYPE.NONE,
OLX_CRAWLER_AD_CATEGORIES: transformedCrawlerAdCategories
OLX_CRAWLER_AD_TYPE: olxCrawlerAdType || CRAWLER_AD_TYPE.NONE,
OLX_CRAWLER_AD_CATEGORIES: transformedCrawlerAdCategories,
OLX_MAX_AGE: parseInt(process.env.OLX_MAX_AGE) || 30
};
module.exports = {