implement RENT and REQUEST option for OLX; implement force crawl option
This commit is contained in:
@@ -22,7 +22,8 @@ const OLX_ENUMS = {
|
|||||||
OLX_AD_TYPE: {
|
OLX_AD_TYPE: {
|
||||||
[CRAWLER_AD_TYPE.ALL]: "",
|
[CRAWLER_AD_TYPE.ALL]: "",
|
||||||
[CRAWLER_AD_TYPE.ONLY_SELL]: "&vrsta=samoprodaja",
|
[CRAWLER_AD_TYPE.ONLY_SELL]: "&vrsta=samoprodaja",
|
||||||
[CRAWLER_AD_TYPE.ONLY_RENT]: "&vrsta=samoizdavanje"
|
[CRAWLER_AD_TYPE.ONLY_RENT]: "&vrsta=samoizdavanje",
|
||||||
|
[CRAWLER_AD_TYPE.ONLY_REQUEST]: "&vrsta=samopotraznja"
|
||||||
},
|
},
|
||||||
OLX_AD_CATEGORY: {
|
OLX_AD_CATEGORY: {
|
||||||
[AD_CATEGORY.FLAT.id]: "&kategorija=23",
|
[AD_CATEGORY.FLAT.id]: "&kategorija=23",
|
||||||
@@ -38,6 +39,8 @@ const OLX_ENUMS = {
|
|||||||
OLX_RENEWED_DATE_FORMAT: "DD.MM.YYYY. u HH:mm"
|
OLX_RENEWED_DATE_FORMAT: "DD.MM.YYYY. u HH:mm"
|
||||||
};
|
};
|
||||||
|
|
||||||
|
const { OLX_FORCE_CRAWL } = require("../specificConfigs/olx");
|
||||||
|
|
||||||
class OlxCrawler {
|
class OlxCrawler {
|
||||||
constructor(
|
constructor(
|
||||||
savers = [],
|
savers = [],
|
||||||
@@ -99,7 +102,7 @@ class OlxCrawler {
|
|||||||
"minute"
|
"minute"
|
||||||
);
|
);
|
||||||
|
|
||||||
if (stopCrawlingThisCategory) {
|
if (stopCrawlingThisCategory && !OLX_FORCE_CRAWL) {
|
||||||
generatorsToRemove[index] = true;
|
generatorsToRemove[index] = true;
|
||||||
// console.log("\tGenerator ", index + 1, "has no more new ads");
|
// console.log("\tGenerator ", index + 1, "has no more new ads");
|
||||||
break;
|
break;
|
||||||
@@ -134,7 +137,7 @@ class OlxCrawler {
|
|||||||
|
|
||||||
const urlAdTypePart = OLX_ENUMS.OLX_AD_TYPE[this.crawlerAdTypes];
|
const urlAdTypePart = OLX_ENUMS.OLX_AD_TYPE[this.crawlerAdTypes];
|
||||||
const urlCategoryPart = OLX_ENUMS.OLX_AD_CATEGORY[adCategory];
|
const urlCategoryPart = OLX_ENUMS.OLX_AD_CATEGORY[adCategory];
|
||||||
if (urlAdTypePart && urlCategoryPart) {
|
if (urlAdTypePart !== undefined && urlCategoryPart !== undefined) {
|
||||||
while (true) {
|
while (true) {
|
||||||
const urlPageToCrawl = `${this.baseUrl}${urlAdTypePart}${urlCategoryPart}&stranica=${pageToIndex}`;
|
const urlPageToCrawl = `${this.baseUrl}${urlAdTypePart}${urlCategoryPart}&stranica=${pageToIndex}`;
|
||||||
const singlePageResults = await this.indexSinglePage(
|
const singlePageResults = await this.indexSinglePage(
|
||||||
@@ -212,7 +215,7 @@ class OlxCrawler {
|
|||||||
title: "#naslovartikla",
|
title: "#naslovartikla",
|
||||||
descriptions: ".artikal_detaljniopis_tekst",
|
descriptions: ".artikal_detaljniopis_tekst",
|
||||||
category:
|
category:
|
||||||
"#artikal_glavni_div > div.artikal_lijevo > div:nth-child(3) > div > span:nth-child(3) > a > span"
|
"#artikal_glavni_div > div.artikal_lijevo > div.artikal_kat > div > span:nth-child(3) > a > span"
|
||||||
};
|
};
|
||||||
|
|
||||||
const username = $(propertySelectors.username)
|
const username = $(propertySelectors.username)
|
||||||
@@ -384,7 +387,7 @@ class OlxCrawler {
|
|||||||
//=========================================
|
//=========================================
|
||||||
const parsedCategory = this.getAdCategoryId(category);
|
const parsedCategory = this.getAdCategoryId(category);
|
||||||
if (!parsedCategory) {
|
if (!parsedCategory) {
|
||||||
throw { message: "Unknown ad category" };
|
throw { message: `Unknown ad category [${category}]` };
|
||||||
}
|
}
|
||||||
|
|
||||||
const parsedAdType = this.getAdTypeId(adType);
|
const parsedAdType = this.getAdTypeId(adType);
|
||||||
@@ -475,6 +478,8 @@ class OlxCrawler {
|
|||||||
return AD_TYPE.AD_TYPE_SALE;
|
return AD_TYPE.AD_TYPE_SALE;
|
||||||
case "Izdavanje":
|
case "Izdavanje":
|
||||||
return AD_TYPE.AD_TYPE_RENT;
|
return AD_TYPE.AD_TYPE_RENT;
|
||||||
|
case "Potražnja":
|
||||||
|
return AD_TYPE.AD_TYPE_RENT;
|
||||||
default:
|
default:
|
||||||
return undefined;
|
return undefined;
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user