Add more debug logs
This commit is contained in:
@@ -88,14 +88,31 @@ class OlxCrawler {
|
|||||||
const entries = singlePageResults.entries();
|
const entries = singlePageResults.entries();
|
||||||
|
|
||||||
for (const [index, { value: singlePageResult }] of entries) {
|
for (const [index, { value: singlePageResult }] of entries) {
|
||||||
|
if (PRINT_CRAWLER_DEBUG) {
|
||||||
|
console.log("================================");
|
||||||
|
console.log("Category Indexer index : ", index);
|
||||||
|
console.log("\tTotal entries : ", singlePageResult.length)
|
||||||
|
}
|
||||||
if (singlePageResult) {
|
if (singlePageResult) {
|
||||||
const saveResults = await this.saveCrawledResults(singlePageResult);
|
const saveResults = await this.saveCrawledResults(singlePageResult);
|
||||||
const { newRecords, existingRecords } = saveResults;
|
const { newRecords, existingRecords } = saveResults;
|
||||||
|
|
||||||
|
if (PRINT_CRAWLER_DEBUG) {
|
||||||
|
console.log("--------------------------");
|
||||||
|
console.log("\tNew record URLs [", newRecords.length, "] :");
|
||||||
|
|
||||||
|
for(const newRecord of newRecords) {
|
||||||
|
console.log("\t\t",newRecord.url);
|
||||||
|
}
|
||||||
|
|
||||||
|
console.log("\t-------------------------");
|
||||||
|
console.log("\tExisting record URLs [", existingRecords.length, "] :");
|
||||||
|
}
|
||||||
|
|
||||||
newRealEstates.push(...newRecords);
|
newRealEstates.push(...newRecords);
|
||||||
|
|
||||||
for (const existingRecord of existingRecords) {
|
for (const existingRecord of existingRecords) {
|
||||||
const { publishedDate, renewedDate } = existingRecord;
|
const { publishedDate, renewedDate, url } = existingRecord;
|
||||||
|
|
||||||
const publishedDateMoment = moment.utc(publishedDate);
|
const publishedDateMoment = moment.utc(publishedDate);
|
||||||
const renewedDateMoment = moment.utc(renewedDate);
|
const renewedDateMoment = moment.utc(renewedDate);
|
||||||
@@ -105,13 +122,25 @@ class OlxCrawler {
|
|||||||
"minute"
|
"minute"
|
||||||
);
|
);
|
||||||
|
|
||||||
|
if (PRINT_CRAWLER_DEBUG) {
|
||||||
|
console.log("\t\t", url);
|
||||||
|
console.log("\t\t\tPublished date : ", publishedDate);
|
||||||
|
console.log("\t\t\tRenewed date : ", renewedDate);
|
||||||
|
console.log("\t\t\tIs same (up to minute) : ", stopCrawlingThisCategory);
|
||||||
|
}
|
||||||
|
|
||||||
if (stopCrawlingThisCategory && !OLX_FORCE_CRAWL) {
|
if (stopCrawlingThisCategory && !OLX_FORCE_CRAWL) {
|
||||||
generatorsToRemove[index] = true;
|
generatorsToRemove[index] = true;
|
||||||
// console.log("\tGenerator ", index + 1, "has no more new ads");
|
if (PRINT_CRAWLER_DEBUG) {
|
||||||
|
console.log("\t\t\tStopping this category indexer");
|
||||||
|
}
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
|
if (PRINT_CRAWLER_DEBUG) {
|
||||||
|
console.log("\tNo more entries in this category, stopping!");
|
||||||
|
}
|
||||||
//Generator returned undefined, remove this generator from array
|
//Generator returned undefined, remove this generator from array
|
||||||
generatorsToRemove[index] = true;
|
generatorsToRemove[index] = true;
|
||||||
// console.log("Generator ", index + 1, "has no more pages");
|
// console.log("Generator ", index + 1, "has no more pages");
|
||||||
|
|||||||
Reference in New Issue
Block a user