diff --git a/app/common/enums.js b/app/common/enums.js index 9742f21..aa61b84 100644 --- a/app/common/enums.js +++ b/app/common/enums.js @@ -60,7 +60,8 @@ const GARAGE_PRICE_SLIDER_OPTIONS = { const AD_TYPE = { AD_TYPE_SALE: "SALE", - AD_TYPE_RENT: "RENT" + AD_TYPE_RENT: "RENT", + AD_TYPE_REQUEST: "REQUEST" }; const AD_CATEGORY = { @@ -140,7 +141,8 @@ const CRAWLER_AD_TYPE = { NONE: 0, ALL: 1, ONLY_SELL: 2, - ONLY_RENT: 3 + ONLY_RENT: 3, + ONLY_REQUEST: 4 }; module.exports = { diff --git a/app/crawler/specificConfigs/aktido.js b/app/crawler/specificConfigs/aktido.js index bd06645..092f09d 100644 --- a/app/crawler/specificConfigs/aktido.js +++ b/app/crawler/specificConfigs/aktido.js @@ -29,5 +29,6 @@ module.exports = { AKTIDO_CRAWLER_AD_CATEGORIES: transformedAktidoCrawlerAdCategories, AKTIDO_IGNORED_USERNAMES: aktidoIgnoredUsernames || [], AKTIDO_DELAY_BETWEEN_PAGES: - parseInt(process.env.AKTIDO_DELAY_BETWEEN_PAGES) || 1000 + parseInt(process.env.AKTIDO_DELAY_BETWEEN_PAGES) || 1000, + AKTIDO_FORCE_CRAWL: !!parseInt(process.env.AKTIDO_FORCE_CRAWL) }; diff --git a/app/crawler/specificConfigs/olx.js b/app/crawler/specificConfigs/olx.js index 53ca727..150ec16 100644 --- a/app/crawler/specificConfigs/olx.js +++ b/app/crawler/specificConfigs/olx.js @@ -33,5 +33,7 @@ module.exports = { OLX_CRAWLER_AD_TYPE: olxCrawlerAdType || CRAWLER_AD_TYPE.NONE, OLX_CRAWLER_AD_CATEGORIES: transformedOlxCrawlerAdCategories, OLX_IGNORED_USERNAMES: olxIgnoredUsernames || [], - OLX_DELAY_BETWEEN_PAGES: parseInt(process.env.OLX_DELAY_BETWEEN_PAGES) || 1000 + OLX_DELAY_BETWEEN_PAGES: + parseInt(process.env.OLX_DELAY_BETWEEN_PAGES) || 1000, + OLX_FORCE_CRAWL: !!parseInt(process.env.OLX_FORCE_CRAWL) }; diff --git a/app/crawler/specificConfigs/rental.js b/app/crawler/specificConfigs/rental.js index 8930d64..103723e 100644 --- a/app/crawler/specificConfigs/rental.js +++ b/app/crawler/specificConfigs/rental.js @@ -29,5 +29,6 @@ module.exports = { RENTAL_CRAWLER_AD_CATEGORIES: transformedRentalCrawlerAdCategories, RENTAL_IGNORED_USERNAMES: rentalIgnoredUsernames || [], RENTAL_DELAY_BETWEEN_PAGES: - parseInt(process.env.RENTAL_DELAY_BETWEEN_PAGES) || 1000 + parseInt(process.env.RENTAL_DELAY_BETWEEN_PAGES) || 1000, + RENTAL_FORCE_CRAWL: !!parseInt(process.env.RENTAL_FORCE_CRAWL) }; diff --git a/app/crawler/specificCrawlers/aktido.js b/app/crawler/specificCrawlers/aktido.js index a2ea43d..6512f15 100644 --- a/app/crawler/specificCrawlers/aktido.js +++ b/app/crawler/specificCrawlers/aktido.js @@ -39,6 +39,8 @@ const AKTIDO_ENUMS = { AKTIDO_RENEWED_DATE_FORMAT: "YYYY-MM-DD u HH:mm:ss" }; +const { AKTIDO_FORCE_CRAWL } = require("../specificConfigs/aktido"); + class AktidoCrawler { constructor( savers = [], @@ -88,27 +90,13 @@ class AktidoCrawler { newRealEstates.push(...newRecords); - if (Array.isArray(newRecords) && newRecords.length === 0) { + if ( + Array.isArray(newRecords) && + newRecords.length === 0 && + !AKTIDO_FORCE_CRAWL + ) { generatorsToRemove[index] = true; } - - // for (const existingRecord of existingRecords) { - // const { publishedDate, renewedDate } = existingRecord; - // - // const publishedDateMoment = moment.utc(publishedDate); - // const renewedDateMoment = moment.utc(renewedDate); - // - // const stopCrawlingThisCategory = publishedDateMoment.isSame( - // renewedDateMoment, - // "minute" - // ); - // - // if (stopCrawlingThisCategory) { - // generatorsToRemove[index] = true; - // // console.log("\tGenerator ", index + 1, "has no more new ads"); - // break; - // } - // } } else { //Generator returned undefined, remove this generator from array generatorsToRemove[index] = true; diff --git a/app/crawler/specificCrawlers/olx.js b/app/crawler/specificCrawlers/olx.js index d7176d1..bb08b95 100644 --- a/app/crawler/specificCrawlers/olx.js +++ b/app/crawler/specificCrawlers/olx.js @@ -22,7 +22,8 @@ const OLX_ENUMS = { OLX_AD_TYPE: { [CRAWLER_AD_TYPE.ALL]: "", [CRAWLER_AD_TYPE.ONLY_SELL]: "&vrsta=samoprodaja", - [CRAWLER_AD_TYPE.ONLY_RENT]: "&vrsta=samoizdavanje" + [CRAWLER_AD_TYPE.ONLY_RENT]: "&vrsta=samoizdavanje", + [CRAWLER_AD_TYPE.ONLY_REQUEST]: "&vrsta=samopotraznja" }, OLX_AD_CATEGORY: { [AD_CATEGORY.FLAT.id]: "&kategorija=23", @@ -38,6 +39,8 @@ const OLX_ENUMS = { OLX_RENEWED_DATE_FORMAT: "DD.MM.YYYY. u HH:mm" }; +const { OLX_FORCE_CRAWL } = require("../specificConfigs/olx"); + class OlxCrawler { constructor( savers = [], @@ -99,7 +102,7 @@ class OlxCrawler { "minute" ); - if (stopCrawlingThisCategory) { + if (stopCrawlingThisCategory && !OLX_FORCE_CRAWL) { generatorsToRemove[index] = true; // console.log("\tGenerator ", index + 1, "has no more new ads"); break; @@ -134,7 +137,7 @@ class OlxCrawler { const urlAdTypePart = OLX_ENUMS.OLX_AD_TYPE[this.crawlerAdTypes]; const urlCategoryPart = OLX_ENUMS.OLX_AD_CATEGORY[adCategory]; - if (urlAdTypePart && urlCategoryPart) { + if (urlAdTypePart !== undefined && urlCategoryPart !== undefined) { while (true) { const urlPageToCrawl = `${this.baseUrl}${urlAdTypePart}${urlCategoryPart}&stranica=${pageToIndex}`; const singlePageResults = await this.indexSinglePage( @@ -212,7 +215,7 @@ class OlxCrawler { title: "#naslovartikla", descriptions: ".artikal_detaljniopis_tekst", category: - "#artikal_glavni_div > div.artikal_lijevo > div:nth-child(3) > div > span:nth-child(3) > a > span" + "#artikal_glavni_div > div.artikal_lijevo > div.artikal_kat > div > span:nth-child(3) > a > span" }; const username = $(propertySelectors.username) @@ -384,7 +387,7 @@ class OlxCrawler { //========================================= const parsedCategory = this.getAdCategoryId(category); if (!parsedCategory) { - throw { message: "Unknown ad category" }; + throw { message: `Unknown ad category [${category}]` }; } const parsedAdType = this.getAdTypeId(adType); @@ -475,6 +478,8 @@ class OlxCrawler { return AD_TYPE.AD_TYPE_SALE; case "Izdavanje": return AD_TYPE.AD_TYPE_RENT; + case "Potražnja": + return AD_TYPE.AD_TYPE_RENT; default: return undefined; } diff --git a/app/crawler/specificCrawlers/rental.js b/app/crawler/specificCrawlers/rental.js index b73278e..6293d1f 100644 --- a/app/crawler/specificCrawlers/rental.js +++ b/app/crawler/specificCrawlers/rental.js @@ -39,6 +39,8 @@ const RENTAL_ENUMS = { RENTAL_RENEWED_DATE_FORMAT: "YYYY-MM-DD u HH:mm:ss" }; +const { RENTAL_FORCE_CRAWL } = require("../specificConfigs/rental"); + class RentalCrawler { constructor( savers = [], @@ -88,27 +90,13 @@ class RentalCrawler { newRealEstates.push(...newRecords); - if (Array.isArray(newRecords) && newRecords.length === 0) { + if ( + Array.isArray(newRecords) && + newRecords.length === 0 && + !RENTAL_FORCE_CRAWL + ) { generatorsToRemove[index] = true; } - - // for (const existingRecord of existingRecords) { - // const { publishedDate, renewedDate } = existingRecord; - // - // const publishedDateMoment = moment.utc(publishedDate); - // const renewedDateMoment = moment.utc(renewedDate); - // - // const stopCrawlingThisCategory = publishedDateMoment.isSame( - // renewedDateMoment, - // "minute" - // ); - // - // if (stopCrawlingThisCategory) { - // generatorsToRemove[index] = true; - // // console.log("\tGenerator ", index + 1, "has no more new ads"); - // break; - // } - // } } else { //Generator returned undefined, remove this generator from array generatorsToRemove[index] = true;