Implement renting option - crawler part #64
@@ -60,7 +60,8 @@ const GARAGE_PRICE_SLIDER_OPTIONS = {
|
||||
|
||||
const AD_TYPE = {
|
||||
AD_TYPE_SALE: "SALE",
|
||||
AD_TYPE_RENT: "RENT"
|
||||
AD_TYPE_RENT: "RENT",
|
||||
AD_TYPE_REQUEST: "REQUEST"
|
||||
};
|
||||
|
||||
const AD_CATEGORY = {
|
||||
@@ -140,7 +141,8 @@ const CRAWLER_AD_TYPE = {
|
||||
NONE: 0,
|
||||
ALL: 1,
|
||||
ONLY_SELL: 2,
|
||||
ONLY_RENT: 3
|
||||
ONLY_RENT: 3,
|
||||
ONLY_REQUEST: 4
|
||||
};
|
||||
|
||||
module.exports = {
|
||||
|
||||
@@ -29,5 +29,6 @@ module.exports = {
|
||||
AKTIDO_CRAWLER_AD_CATEGORIES: transformedAktidoCrawlerAdCategories,
|
||||
AKTIDO_IGNORED_USERNAMES: aktidoIgnoredUsernames || [],
|
||||
AKTIDO_DELAY_BETWEEN_PAGES:
|
||||
parseInt(process.env.AKTIDO_DELAY_BETWEEN_PAGES) || 1000
|
||||
parseInt(process.env.AKTIDO_DELAY_BETWEEN_PAGES) || 1000,
|
||||
AKTIDO_FORCE_CRAWL: !!parseInt(process.env.AKTIDO_FORCE_CRAWL)
|
||||
};
|
||||
|
||||
@@ -33,5 +33,7 @@ module.exports = {
|
||||
OLX_CRAWLER_AD_TYPE: olxCrawlerAdType || CRAWLER_AD_TYPE.NONE,
|
||||
OLX_CRAWLER_AD_CATEGORIES: transformedOlxCrawlerAdCategories,
|
||||
OLX_IGNORED_USERNAMES: olxIgnoredUsernames || [],
|
||||
OLX_DELAY_BETWEEN_PAGES: parseInt(process.env.OLX_DELAY_BETWEEN_PAGES) || 1000
|
||||
OLX_DELAY_BETWEEN_PAGES:
|
||||
parseInt(process.env.OLX_DELAY_BETWEEN_PAGES) || 1000,
|
||||
OLX_FORCE_CRAWL: !!parseInt(process.env.OLX_FORCE_CRAWL)
|
||||
};
|
||||
|
||||
@@ -29,5 +29,6 @@ module.exports = {
|
||||
RENTAL_CRAWLER_AD_CATEGORIES: transformedRentalCrawlerAdCategories,
|
||||
RENTAL_IGNORED_USERNAMES: rentalIgnoredUsernames || [],
|
||||
RENTAL_DELAY_BETWEEN_PAGES:
|
||||
parseInt(process.env.RENTAL_DELAY_BETWEEN_PAGES) || 1000
|
||||
parseInt(process.env.RENTAL_DELAY_BETWEEN_PAGES) || 1000,
|
||||
RENTAL_FORCE_CRAWL: !!parseInt(process.env.RENTAL_FORCE_CRAWL)
|
||||
};
|
||||
|
||||
@@ -39,6 +39,8 @@ const AKTIDO_ENUMS = {
|
||||
AKTIDO_RENEWED_DATE_FORMAT: "YYYY-MM-DD u HH:mm:ss"
|
||||
};
|
||||
|
||||
const { AKTIDO_FORCE_CRAWL } = require("../specificConfigs/aktido");
|
||||
|
||||
class AktidoCrawler {
|
||||
constructor(
|
||||
savers = [],
|
||||
@@ -88,27 +90,13 @@ class AktidoCrawler {
|
||||
|
||||
newRealEstates.push(...newRecords);
|
||||
|
||||
if (Array.isArray(newRecords) && newRecords.length === 0) {
|
||||
if (
|
||||
Array.isArray(newRecords) &&
|
||||
newRecords.length === 0 &&
|
||||
!AKTIDO_FORCE_CRAWL
|
||||
) {
|
||||
generatorsToRemove[index] = true;
|
||||
}
|
||||
|
||||
// for (const existingRecord of existingRecords) {
|
||||
// const { publishedDate, renewedDate } = existingRecord;
|
||||
//
|
||||
// const publishedDateMoment = moment.utc(publishedDate);
|
||||
// const renewedDateMoment = moment.utc(renewedDate);
|
||||
//
|
||||
// const stopCrawlingThisCategory = publishedDateMoment.isSame(
|
||||
// renewedDateMoment,
|
||||
// "minute"
|
||||
// );
|
||||
//
|
||||
// if (stopCrawlingThisCategory) {
|
||||
// generatorsToRemove[index] = true;
|
||||
// // console.log("\tGenerator ", index + 1, "has no more new ads");
|
||||
// break;
|
||||
// }
|
||||
// }
|
||||
} else {
|
||||
//Generator returned undefined, remove this generator from array
|
||||
generatorsToRemove[index] = true;
|
||||
|
||||
@@ -22,7 +22,8 @@ const OLX_ENUMS = {
|
||||
OLX_AD_TYPE: {
|
||||
[CRAWLER_AD_TYPE.ALL]: "",
|
||||
[CRAWLER_AD_TYPE.ONLY_SELL]: "&vrsta=samoprodaja",
|
||||
[CRAWLER_AD_TYPE.ONLY_RENT]: "&vrsta=samoizdavanje"
|
||||
[CRAWLER_AD_TYPE.ONLY_RENT]: "&vrsta=samoizdavanje",
|
||||
[CRAWLER_AD_TYPE.ONLY_REQUEST]: "&vrsta=samopotraznja"
|
||||
},
|
||||
OLX_AD_CATEGORY: {
|
||||
[AD_CATEGORY.FLAT.id]: "&kategorija=23",
|
||||
@@ -38,6 +39,8 @@ const OLX_ENUMS = {
|
||||
OLX_RENEWED_DATE_FORMAT: "DD.MM.YYYY. u HH:mm"
|
||||
};
|
||||
|
||||
const { OLX_FORCE_CRAWL } = require("../specificConfigs/olx");
|
||||
|
||||
class OlxCrawler {
|
||||
constructor(
|
||||
savers = [],
|
||||
@@ -99,7 +102,7 @@ class OlxCrawler {
|
||||
"minute"
|
||||
);
|
||||
|
||||
if (stopCrawlingThisCategory) {
|
||||
if (stopCrawlingThisCategory && !OLX_FORCE_CRAWL) {
|
||||
generatorsToRemove[index] = true;
|
||||
// console.log("\tGenerator ", index + 1, "has no more new ads");
|
||||
break;
|
||||
@@ -134,7 +137,7 @@ class OlxCrawler {
|
||||
|
||||
const urlAdTypePart = OLX_ENUMS.OLX_AD_TYPE[this.crawlerAdTypes];
|
||||
const urlCategoryPart = OLX_ENUMS.OLX_AD_CATEGORY[adCategory];
|
||||
if (urlAdTypePart && urlCategoryPart) {
|
||||
if (urlAdTypePart !== undefined && urlCategoryPart !== undefined) {
|
||||
while (true) {
|
||||
const urlPageToCrawl = `${this.baseUrl}${urlAdTypePart}${urlCategoryPart}&stranica=${pageToIndex}`;
|
||||
const singlePageResults = await this.indexSinglePage(
|
||||
@@ -212,7 +215,7 @@ class OlxCrawler {
|
||||
title: "#naslovartikla",
|
||||
descriptions: ".artikal_detaljniopis_tekst",
|
||||
category:
|
||||
"#artikal_glavni_div > div.artikal_lijevo > div:nth-child(3) > div > span:nth-child(3) > a > span"
|
||||
"#artikal_glavni_div > div.artikal_lijevo > div.artikal_kat > div > span:nth-child(3) > a > span"
|
||||
};
|
||||
|
||||
const username = $(propertySelectors.username)
|
||||
@@ -384,7 +387,7 @@ class OlxCrawler {
|
||||
//=========================================
|
||||
const parsedCategory = this.getAdCategoryId(category);
|
||||
if (!parsedCategory) {
|
||||
throw { message: "Unknown ad category" };
|
||||
throw { message: `Unknown ad category [${category}]` };
|
||||
}
|
||||
|
||||
const parsedAdType = this.getAdTypeId(adType);
|
||||
@@ -475,6 +478,8 @@ class OlxCrawler {
|
||||
return AD_TYPE.AD_TYPE_SALE;
|
||||
case "Izdavanje":
|
||||
return AD_TYPE.AD_TYPE_RENT;
|
||||
case "Potražnja":
|
||||
return AD_TYPE.AD_TYPE_RENT;
|
||||
default:
|
||||
return undefined;
|
||||
}
|
||||
|
||||
@@ -39,6 +39,8 @@ const RENTAL_ENUMS = {
|
||||
RENTAL_RENEWED_DATE_FORMAT: "YYYY-MM-DD u HH:mm:ss"
|
||||
};
|
||||
|
||||
const { RENTAL_FORCE_CRAWL } = require("../specificConfigs/rental");
|
||||
|
||||
class RentalCrawler {
|
||||
constructor(
|
||||
savers = [],
|
||||
@@ -88,27 +90,13 @@ class RentalCrawler {
|
||||
|
||||
newRealEstates.push(...newRecords);
|
||||
|
||||
if (Array.isArray(newRecords) && newRecords.length === 0) {
|
||||
if (
|
||||
Array.isArray(newRecords) &&
|
||||
newRecords.length === 0 &&
|
||||
!RENTAL_FORCE_CRAWL
|
||||
) {
|
||||
generatorsToRemove[index] = true;
|
||||
}
|
||||
|
||||
// for (const existingRecord of existingRecords) {
|
||||
// const { publishedDate, renewedDate } = existingRecord;
|
||||
//
|
||||
// const publishedDateMoment = moment.utc(publishedDate);
|
||||
// const renewedDateMoment = moment.utc(renewedDate);
|
||||
//
|
||||
// const stopCrawlingThisCategory = publishedDateMoment.isSame(
|
||||
// renewedDateMoment,
|
||||
// "minute"
|
||||
// );
|
||||
//
|
||||
// if (stopCrawlingThisCategory) {
|
||||
// generatorsToRemove[index] = true;
|
||||
// // console.log("\tGenerator ", index + 1, "has no more new ads");
|
||||
// break;
|
||||
// }
|
||||
// }
|
||||
} else {
|
||||
//Generator returned undefined, remove this generator from array
|
||||
generatorsToRemove[index] = true;
|
||||
|
||||
Reference in New Issue
Block a user