Implement renting option - crawler part #64

Merged
bilal.catic merged 5 commits from implement-renting-option into master 2019-10-31 19:05:55 +01:00
7 changed files with 35 additions and 48 deletions

View File

@@ -60,7 +60,8 @@ const GARAGE_PRICE_SLIDER_OPTIONS = {
const AD_TYPE = {
AD_TYPE_SALE: "SALE",
AD_TYPE_RENT: "RENT"
AD_TYPE_RENT: "RENT",
AD_TYPE_REQUEST: "REQUEST"
};
const AD_CATEGORY = {
@@ -140,7 +141,8 @@ const CRAWLER_AD_TYPE = {
NONE: 0,
ALL: 1,
ONLY_SELL: 2,
ONLY_RENT: 3
ONLY_RENT: 3,
ONLY_REQUEST: 4
};
module.exports = {

View File

@@ -29,5 +29,6 @@ module.exports = {
AKTIDO_CRAWLER_AD_CATEGORIES: transformedAktidoCrawlerAdCategories,
AKTIDO_IGNORED_USERNAMES: aktidoIgnoredUsernames || [],
AKTIDO_DELAY_BETWEEN_PAGES:
parseInt(process.env.AKTIDO_DELAY_BETWEEN_PAGES) || 1000
parseInt(process.env.AKTIDO_DELAY_BETWEEN_PAGES) || 1000,
AKTIDO_FORCE_CRAWL: !!parseInt(process.env.AKTIDO_FORCE_CRAWL)
};

View File

@@ -33,5 +33,7 @@ module.exports = {
OLX_CRAWLER_AD_TYPE: olxCrawlerAdType || CRAWLER_AD_TYPE.NONE,
OLX_CRAWLER_AD_CATEGORIES: transformedOlxCrawlerAdCategories,
OLX_IGNORED_USERNAMES: olxIgnoredUsernames || [],
OLX_DELAY_BETWEEN_PAGES: parseInt(process.env.OLX_DELAY_BETWEEN_PAGES) || 1000
OLX_DELAY_BETWEEN_PAGES:
parseInt(process.env.OLX_DELAY_BETWEEN_PAGES) || 1000,
OLX_FORCE_CRAWL: !!parseInt(process.env.OLX_FORCE_CRAWL)
};

View File

@@ -29,5 +29,6 @@ module.exports = {
RENTAL_CRAWLER_AD_CATEGORIES: transformedRentalCrawlerAdCategories,
RENTAL_IGNORED_USERNAMES: rentalIgnoredUsernames || [],
RENTAL_DELAY_BETWEEN_PAGES:
parseInt(process.env.RENTAL_DELAY_BETWEEN_PAGES) || 1000
parseInt(process.env.RENTAL_DELAY_BETWEEN_PAGES) || 1000,
RENTAL_FORCE_CRAWL: !!parseInt(process.env.RENTAL_FORCE_CRAWL)
};

View File

@@ -39,6 +39,8 @@ const AKTIDO_ENUMS = {
AKTIDO_RENEWED_DATE_FORMAT: "YYYY-MM-DD u HH:mm:ss"
};
const { AKTIDO_FORCE_CRAWL } = require("../specificConfigs/aktido");
class AktidoCrawler {
constructor(
savers = [],
@@ -88,27 +90,13 @@ class AktidoCrawler {
newRealEstates.push(...newRecords);
if (Array.isArray(newRecords) && newRecords.length === 0) {
if (
Array.isArray(newRecords) &&
newRecords.length === 0 &&
!AKTIDO_FORCE_CRAWL
) {
generatorsToRemove[index] = true;
}
// for (const existingRecord of existingRecords) {
// const { publishedDate, renewedDate } = existingRecord;
//
// const publishedDateMoment = moment.utc(publishedDate);
// const renewedDateMoment = moment.utc(renewedDate);
//
// const stopCrawlingThisCategory = publishedDateMoment.isSame(
// renewedDateMoment,
// "minute"
// );
//
// if (stopCrawlingThisCategory) {
// generatorsToRemove[index] = true;
// // console.log("\tGenerator ", index + 1, "has no more new ads");
// break;
// }
// }
} else {
//Generator returned undefined, remove this generator from array
generatorsToRemove[index] = true;

View File

@@ -22,7 +22,8 @@ const OLX_ENUMS = {
OLX_AD_TYPE: {
[CRAWLER_AD_TYPE.ALL]: "",
[CRAWLER_AD_TYPE.ONLY_SELL]: "&vrsta=samoprodaja",
[CRAWLER_AD_TYPE.ONLY_RENT]: "&vrsta=samoizdavanje"
[CRAWLER_AD_TYPE.ONLY_RENT]: "&vrsta=samoizdavanje",
[CRAWLER_AD_TYPE.ONLY_REQUEST]: "&vrsta=samopotraznja"
},
OLX_AD_CATEGORY: {
[AD_CATEGORY.FLAT.id]: "&kategorija=23",
@@ -38,6 +39,8 @@ const OLX_ENUMS = {
OLX_RENEWED_DATE_FORMAT: "DD.MM.YYYY. u HH:mm"
};
const { OLX_FORCE_CRAWL } = require("../specificConfigs/olx");
class OlxCrawler {
constructor(
savers = [],
@@ -99,7 +102,7 @@ class OlxCrawler {
"minute"
);
if (stopCrawlingThisCategory) {
if (stopCrawlingThisCategory && !OLX_FORCE_CRAWL) {
generatorsToRemove[index] = true;
// console.log("\tGenerator ", index + 1, "has no more new ads");
break;
@@ -134,7 +137,7 @@ class OlxCrawler {
const urlAdTypePart = OLX_ENUMS.OLX_AD_TYPE[this.crawlerAdTypes];
const urlCategoryPart = OLX_ENUMS.OLX_AD_CATEGORY[adCategory];
if (urlAdTypePart && urlCategoryPart) {
if (urlAdTypePart !== undefined && urlCategoryPart !== undefined) {
while (true) {
const urlPageToCrawl = `${this.baseUrl}${urlAdTypePart}${urlCategoryPart}&stranica=${pageToIndex}`;
const singlePageResults = await this.indexSinglePage(
@@ -212,7 +215,7 @@ class OlxCrawler {
title: "#naslovartikla",
descriptions: ".artikal_detaljniopis_tekst",
category:
"#artikal_glavni_div > div.artikal_lijevo > div:nth-child(3) > div > span:nth-child(3) > a > span"
"#artikal_glavni_div > div.artikal_lijevo > div.artikal_kat > div > span:nth-child(3) > a > span"
};
const username = $(propertySelectors.username)
@@ -384,7 +387,7 @@ class OlxCrawler {
//=========================================
const parsedCategory = this.getAdCategoryId(category);
if (!parsedCategory) {
throw { message: "Unknown ad category" };
throw { message: `Unknown ad category [${category}]` };
}
const parsedAdType = this.getAdTypeId(adType);
@@ -475,6 +478,8 @@ class OlxCrawler {
return AD_TYPE.AD_TYPE_SALE;
case "Izdavanje":
return AD_TYPE.AD_TYPE_RENT;
case "Potražnja":
return AD_TYPE.AD_TYPE_RENT;
default:
return undefined;
}

View File

@@ -39,6 +39,8 @@ const RENTAL_ENUMS = {
RENTAL_RENEWED_DATE_FORMAT: "YYYY-MM-DD u HH:mm:ss"
};
const { RENTAL_FORCE_CRAWL } = require("../specificConfigs/rental");
class RentalCrawler {
constructor(
savers = [],
@@ -88,27 +90,13 @@ class RentalCrawler {
newRealEstates.push(...newRecords);
if (Array.isArray(newRecords) && newRecords.length === 0) {
if (
Array.isArray(newRecords) &&
newRecords.length === 0 &&
!RENTAL_FORCE_CRAWL
) {
generatorsToRemove[index] = true;
}
// for (const existingRecord of existingRecords) {
// const { publishedDate, renewedDate } = existingRecord;
//
// const publishedDateMoment = moment.utc(publishedDate);
// const renewedDateMoment = moment.utc(renewedDate);
//
// const stopCrawlingThisCategory = publishedDateMoment.isSame(
// renewedDateMoment,
// "minute"
// );
//
// if (stopCrawlingThisCategory) {
// generatorsToRemove[index] = true;
// // console.log("\tGenerator ", index + 1, "has no more new ads");
// break;
// }
// }
} else {
//Generator returned undefined, remove this generator from array
generatorsToRemove[index] = true;