stop "rental" crawler if there are no new real estates on the page
This commit is contained in:
@@ -80,27 +80,31 @@ class RentalCrawler {
|
||||
for (const [index, { value: singlePageResult }] of entries) {
|
||||
if (singlePageResult) {
|
||||
const saveResults = await this.saveCrawledResults(singlePageResult);
|
||||
const { newRecords, existingRecords } = saveResults;
|
||||
const { newRecords } = saveResults;
|
||||
|
||||
newRealEstates.push(...newRecords);
|
||||
|
||||
for (const existingRecord of existingRecords) {
|
||||
const { publishedDate, renewedDate } = existingRecord;
|
||||
|
||||
const publishedDateMoment = moment.utc(publishedDate);
|
||||
const renewedDateMoment = moment.utc(renewedDate);
|
||||
|
||||
const stopCrawlingThisCategory = publishedDateMoment.isSame(
|
||||
renewedDateMoment,
|
||||
"minute"
|
||||
);
|
||||
|
||||
if (stopCrawlingThisCategory) {
|
||||
generatorsToRemove[index] = true;
|
||||
// console.log("\tGenerator ", index + 1, "has no more new ads");
|
||||
break;
|
||||
}
|
||||
if (Array.isArray(newRecords) && newRecords.length === 0) {
|
||||
generatorsToRemove[index] = true;
|
||||
}
|
||||
|
||||
// for (const existingRecord of existingRecords) {
|
||||
// const { publishedDate, renewedDate } = existingRecord;
|
||||
//
|
||||
// const publishedDateMoment = moment.utc(publishedDate);
|
||||
// const renewedDateMoment = moment.utc(renewedDate);
|
||||
//
|
||||
// const stopCrawlingThisCategory = publishedDateMoment.isSame(
|
||||
// renewedDateMoment,
|
||||
// "minute"
|
||||
// );
|
||||
//
|
||||
// if (stopCrawlingThisCategory) {
|
||||
// generatorsToRemove[index] = true;
|
||||
// // console.log("\tGenerator ", index + 1, "has no more new ads");
|
||||
// break;
|
||||
// }
|
||||
// }
|
||||
} else {
|
||||
//Generator returned undefined, remove this generator from array
|
||||
generatorsToRemove[index] = true;
|
||||
@@ -240,7 +244,10 @@ class RentalCrawler {
|
||||
}
|
||||
|
||||
const title = extractedData["re_realEstates_portalName"];
|
||||
const price = parseFloat(extractedData["re_realEstates_price"]);
|
||||
const extractedPrice = parseFloat(
|
||||
extractedData["re_realEstates_price"]
|
||||
);
|
||||
const price = extractedPrice ? extractedPrice : null;
|
||||
const area = parseFloat(extractedData["re_realEstates_area"]);
|
||||
const gardenSize = parseFloat(
|
||||
extractedData["re_realEstates_fieldArea"]
|
||||
|
||||
Reference in New Issue
Block a user