implement RENT and REQUEST option for OLX; implement force crawl option

This commit is contained in:
Bilal Catic
2019-10-30 15:03:59 +01:00
parent 97d93a3f37
commit 3abbed183e

View File

@@ -22,7 +22,8 @@ const OLX_ENUMS = {
OLX_AD_TYPE: {
[CRAWLER_AD_TYPE.ALL]: "",
[CRAWLER_AD_TYPE.ONLY_SELL]: "&vrsta=samoprodaja",
[CRAWLER_AD_TYPE.ONLY_RENT]: "&vrsta=samoizdavanje"
[CRAWLER_AD_TYPE.ONLY_RENT]: "&vrsta=samoizdavanje",
[CRAWLER_AD_TYPE.ONLY_REQUEST]: "&vrsta=samopotraznja"
},
OLX_AD_CATEGORY: {
[AD_CATEGORY.FLAT.id]: "&kategorija=23",
@@ -38,6 +39,8 @@ const OLX_ENUMS = {
OLX_RENEWED_DATE_FORMAT: "DD.MM.YYYY. u HH:mm"
};
const { OLX_FORCE_CRAWL } = require("../specificConfigs/olx");
class OlxCrawler {
constructor(
savers = [],
@@ -99,7 +102,7 @@ class OlxCrawler {
"minute"
);
if (stopCrawlingThisCategory) {
if (stopCrawlingThisCategory && !OLX_FORCE_CRAWL) {
generatorsToRemove[index] = true;
// console.log("\tGenerator ", index + 1, "has no more new ads");
break;
@@ -134,7 +137,7 @@ class OlxCrawler {
const urlAdTypePart = OLX_ENUMS.OLX_AD_TYPE[this.crawlerAdTypes];
const urlCategoryPart = OLX_ENUMS.OLX_AD_CATEGORY[adCategory];
if (urlAdTypePart && urlCategoryPart) {
if (urlAdTypePart !== undefined && urlCategoryPart !== undefined) {
while (true) {
const urlPageToCrawl = `${this.baseUrl}${urlAdTypePart}${urlCategoryPart}&stranica=${pageToIndex}`;
const singlePageResults = await this.indexSinglePage(
@@ -212,7 +215,7 @@ class OlxCrawler {
title: "#naslovartikla",
descriptions: ".artikal_detaljniopis_tekst",
category:
"#artikal_glavni_div > div.artikal_lijevo > div:nth-child(3) > div > span:nth-child(3) > a > span"
"#artikal_glavni_div > div.artikal_lijevo > div.artikal_kat > div > span:nth-child(3) > a > span"
};
const username = $(propertySelectors.username)
@@ -384,7 +387,7 @@ class OlxCrawler {
//=========================================
const parsedCategory = this.getAdCategoryId(category);
if (!parsedCategory) {
throw { message: "Unknown ad category" };
throw { message: `Unknown ad category [${category}]` };
}
const parsedAdType = this.getAdTypeId(adType);
@@ -475,6 +478,8 @@ class OlxCrawler {
return AD_TYPE.AD_TYPE_SALE;
case "Izdavanje":
return AD_TYPE.AD_TYPE_RENT;
case "Potražnja":
return AD_TYPE.AD_TYPE_RENT;
default:
return undefined;
}