refactor crawler - adapt to use new ENUM objects

This commit is contained in:
Bilal Catic
2019-09-30 10:27:12 +02:00
parent e1dfd6a4eb
commit 9c0104a57c
2 changed files with 13 additions and 18 deletions

View File

@@ -5,8 +5,6 @@
All environment specific configuration is read here and
passed to the crawlers and savers.
*/
require("dotenv").config();
const OlxCrawler = require("./specific/olx");
const { OLX_CONFIG } = require("./crawlerConfig");
const PostgresSaver = require("./savers/postgres");

View File

@@ -22,12 +22,12 @@ const OLX_ENUMS = {
[CRAWLER_AD_TYPE.ONLY_RENT]: "&vrsta=samoizdavanje"
},
OLX_AD_CATEGORY: {
[AD_CATEGORY.CATEGORY_FLAT]: "&kategorija=23",
[AD_CATEGORY.CATEGORY_HOUSE]: "&kategorija=24",
[AD_CATEGORY.CATEGORY_LAND]: "&kategorija=29",
[AD_CATEGORY.CATEGORY_OFFICE]: "&kategorija=25",
[AD_CATEGORY.CATEGORY_APARTMENT]: "&kategorija=27",
[AD_CATEGORY.CATEGORY_GARAGE]: "&kategorija=30"
[AD_CATEGORY.FLAT]: "&kategorija=23",
[AD_CATEGORY.HOUSE]: "&kategorija=24",
//[AD_CATEGORY.LAND]: "&kategorija=29",
//[AD_CATEGORY.OFFICE]: "&kategorija=25",
[AD_CATEGORY.APARTMENT]: "&kategorija=27"
//[AD_CATEGORY.CATEGORY_GARAGE]: "&kategorija=30"
},
MAX_DETAIL_FIELDS: 30,
OLX_PUBLISHED_DATE_FORMAT: "DD.MM.YYYY. u HH:mm",
@@ -38,10 +38,7 @@ class OlxCrawler {
constructor(
savers = [],
crawlerAdTypes = CRAWLER_AD_TYPE.ALL,
crawlerAdCategories = [
AD_CATEGORY.CATEGORY_FLAT,
AD_CATEGORY.CATEGORY_HOUSE
],
crawlerAdCategories = [AD_CATEGORY.FLAT, AD_CATEGORY.HOUSE],
maxPages = 1000,
maxResultsPerPage = 100,
ignoredUsernames = [],
@@ -407,7 +404,7 @@ class OlxCrawler {
url,
agencyObjectId: olxId,
originAgencyName: AD_AGENCY.OLX,
realEstateType: this.getAdCategoryId(category),
realEstateType: parsedCategory,
adType: parsedAdType,
title,
price: parsedPrice,
@@ -448,15 +445,15 @@ class OlxCrawler {
getAdCategoryId(categoryText) {
switch (categoryText) {
case "Stanovi":
return AD_CATEGORY.CATEGORY_FLAT;
return AD_CATEGORY.FLAT.id;
case "Zemljišta":
return AD_CATEGORY.CATEGORY_LAND;
return undefined; //AD_CATEGORY.LAND;
case "Kuće":
return AD_CATEGORY.CATEGORY_HOUSE;
return AD_CATEGORY.HOUSE.id;
case "Poslovni prostori":
return AD_CATEGORY.CATEGORY_OFFICE;
return undefined; //AD_CATEGORY.OFFICE;
case "Apartmani":
return AD_CATEGORY.CATEGORY_APARTMENT;
return AD_CATEGORY.APARTMENT.id;
default:
return undefined;
}