From 1229b3fa6c84a63fbad534505b34f16e774811a6 Mon Sep 17 00:00:00 2001 From: Bilal Date: Tue, 16 Jun 2020 19:51:13 +0200 Subject: [PATCH] Add more debug logs --- app/crawler/specificCrawlers/olx.js | 33 +++++++++++++++++++++++++++-- 1 file changed, 31 insertions(+), 2 deletions(-) diff --git a/app/crawler/specificCrawlers/olx.js b/app/crawler/specificCrawlers/olx.js index bd8afd6..da8afef 100644 --- a/app/crawler/specificCrawlers/olx.js +++ b/app/crawler/specificCrawlers/olx.js @@ -88,14 +88,31 @@ class OlxCrawler { const entries = singlePageResults.entries(); for (const [index, { value: singlePageResult }] of entries) { + if (PRINT_CRAWLER_DEBUG) { + console.log("================================"); + console.log("Category Indexer index : ", index); + console.log("\tTotal entries : ", singlePageResult.length) + } if (singlePageResult) { const saveResults = await this.saveCrawledResults(singlePageResult); const { newRecords, existingRecords } = saveResults; + if (PRINT_CRAWLER_DEBUG) { + console.log("--------------------------"); + console.log("\tNew record URLs [", newRecords.length, "] :"); + + for(const newRecord of newRecords) { + console.log("\t\t",newRecord.url); + } + + console.log("\t-------------------------"); + console.log("\tExisting record URLs [", existingRecords.length, "] :"); + } + newRealEstates.push(...newRecords); for (const existingRecord of existingRecords) { - const { publishedDate, renewedDate } = existingRecord; + const { publishedDate, renewedDate, url } = existingRecord; const publishedDateMoment = moment.utc(publishedDate); const renewedDateMoment = moment.utc(renewedDate); @@ -105,13 +122,25 @@ class OlxCrawler { "minute" ); + if (PRINT_CRAWLER_DEBUG) { + console.log("\t\t", url); + console.log("\t\t\tPublished date : ", publishedDate); + console.log("\t\t\tRenewed date : ", renewedDate); + console.log("\t\t\tIs same (up to minute) : ", stopCrawlingThisCategory); + } + if (stopCrawlingThisCategory && !OLX_FORCE_CRAWL) { generatorsToRemove[index] = true; - // console.log("\tGenerator ", index + 1, "has no more new ads"); + if (PRINT_CRAWLER_DEBUG) { + console.log("\t\t\tStopping this category indexer"); + } break; } } } else { + if (PRINT_CRAWLER_DEBUG) { + console.log("\tNo more entries in this category, stopping!"); + } //Generator returned undefined, remove this generator from array generatorsToRemove[index] = true; // console.log("Generator ", index + 1, "has no more pages");