stop "rental" crawler if there are no new real estates on the page
This commit is contained in:
@@ -80,27 +80,31 @@ class RentalCrawler {
|
|||||||
for (const [index, { value: singlePageResult }] of entries) {
|
for (const [index, { value: singlePageResult }] of entries) {
|
||||||
if (singlePageResult) {
|
if (singlePageResult) {
|
||||||
const saveResults = await this.saveCrawledResults(singlePageResult);
|
const saveResults = await this.saveCrawledResults(singlePageResult);
|
||||||
const { newRecords, existingRecords } = saveResults;
|
const { newRecords } = saveResults;
|
||||||
|
|
||||||
newRealEstates.push(...newRecords);
|
newRealEstates.push(...newRecords);
|
||||||
|
|
||||||
for (const existingRecord of existingRecords) {
|
if (Array.isArray(newRecords) && newRecords.length === 0) {
|
||||||
const { publishedDate, renewedDate } = existingRecord;
|
generatorsToRemove[index] = true;
|
||||||
|
|
||||||
const publishedDateMoment = moment.utc(publishedDate);
|
|
||||||
const renewedDateMoment = moment.utc(renewedDate);
|
|
||||||
|
|
||||||
const stopCrawlingThisCategory = publishedDateMoment.isSame(
|
|
||||||
renewedDateMoment,
|
|
||||||
"minute"
|
|
||||||
);
|
|
||||||
|
|
||||||
if (stopCrawlingThisCategory) {
|
|
||||||
generatorsToRemove[index] = true;
|
|
||||||
// console.log("\tGenerator ", index + 1, "has no more new ads");
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// for (const existingRecord of existingRecords) {
|
||||||
|
// const { publishedDate, renewedDate } = existingRecord;
|
||||||
|
//
|
||||||
|
// const publishedDateMoment = moment.utc(publishedDate);
|
||||||
|
// const renewedDateMoment = moment.utc(renewedDate);
|
||||||
|
//
|
||||||
|
// const stopCrawlingThisCategory = publishedDateMoment.isSame(
|
||||||
|
// renewedDateMoment,
|
||||||
|
// "minute"
|
||||||
|
// );
|
||||||
|
//
|
||||||
|
// if (stopCrawlingThisCategory) {
|
||||||
|
// generatorsToRemove[index] = true;
|
||||||
|
// // console.log("\tGenerator ", index + 1, "has no more new ads");
|
||||||
|
// break;
|
||||||
|
// }
|
||||||
|
// }
|
||||||
} else {
|
} else {
|
||||||
//Generator returned undefined, remove this generator from array
|
//Generator returned undefined, remove this generator from array
|
||||||
generatorsToRemove[index] = true;
|
generatorsToRemove[index] = true;
|
||||||
@@ -240,7 +244,10 @@ class RentalCrawler {
|
|||||||
}
|
}
|
||||||
|
|
||||||
const title = extractedData["re_realEstates_portalName"];
|
const title = extractedData["re_realEstates_portalName"];
|
||||||
const price = parseFloat(extractedData["re_realEstates_price"]);
|
const extractedPrice = parseFloat(
|
||||||
|
extractedData["re_realEstates_price"]
|
||||||
|
);
|
||||||
|
const price = extractedPrice ? extractedPrice : null;
|
||||||
const area = parseFloat(extractedData["re_realEstates_area"]);
|
const area = parseFloat(extractedData["re_realEstates_area"]);
|
||||||
const gardenSize = parseFloat(
|
const gardenSize = parseFloat(
|
||||||
extractedData["re_realEstates_fieldArea"]
|
extractedData["re_realEstates_fieldArea"]
|
||||||
|
|||||||
Reference in New Issue
Block a user