Changed delay between ads.
This commit is contained in:
@@ -217,6 +217,8 @@ class OlxCrawler {
|
||||
//let numberOfParseErrors = 0;
|
||||
// do {
|
||||
try {
|
||||
await this.sleep(this.delayBetweenAds);
|
||||
|
||||
const adPageSource = await fetch(url);
|
||||
const body = await adPageSource.text();
|
||||
const $ = cheerio.load(body);
|
||||
@@ -694,6 +696,8 @@ class OlxCrawler {
|
||||
console.error("Exception caught: " + e.message, "\r\nURL:", url);
|
||||
}
|
||||
// } while (hasParseErrors && numberOfParseErrors <= 1);
|
||||
await this.sleep(this.delayBetweenAds);
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
|
||||
@@ -84,6 +84,7 @@ class SaljicCrawler {
|
||||
for (const [index, { value: singlePageResult }] of entries) {
|
||||
if (singlePageResult) {
|
||||
const saveResults = await this.saveCrawledResults(singlePageResult);
|
||||
|
||||
const { newRecords } = saveResults;
|
||||
|
||||
newRealEstates.push(...newRecords);
|
||||
@@ -217,8 +218,8 @@ class SaljicCrawler {
|
||||
}
|
||||
}
|
||||
|
||||
async scrapeAd(url, adType) {
|
||||
console.log("[SALJIC] Scraping : ", url);
|
||||
async scrapeAd(url, adTypeAttribute) {
|
||||
//console.log("[SALJIC] Scraping : ", url);
|
||||
try {
|
||||
const adPageSource = await fetch(url);
|
||||
const body = await adPageSource.text();
|
||||
@@ -234,7 +235,7 @@ class SaljicCrawler {
|
||||
//Extracting main properties
|
||||
const propertySelectors = {
|
||||
title:
|
||||
"div.content-wrap > div.container > div.col-md-8.nobottommargin > div.single-post > div.entry > div.entry-title > h2",
|
||||
"div.content-wrap > div.container.clearfix.wpc > div.col-md-8.nobottommargin > div.single-post.nobottommargin > div.entry.clearfix > div.entry-title > h2",
|
||||
price:
|
||||
"div.content-wrap > div.container > div.col-md-8.nobottommargin > div.single-post > div.entry > div.topmargin-sm.single-product > div.product > div.product-price > ins",
|
||||
streetName:
|
||||
@@ -245,6 +246,7 @@ class SaljicCrawler {
|
||||
latAndLong:
|
||||
"div.content-wrap > div.container > div.col-md-8.nobottommargin > div.single-post > div.entry > div.entry-content.topmargin > div.gmap.bottommargin > iframe"
|
||||
};
|
||||
|
||||
const title = $(propertySelectors.title)
|
||||
.text()
|
||||
.replace(/(\r\n|\n|\r)/gm, "")
|
||||
@@ -277,7 +279,8 @@ class SaljicCrawler {
|
||||
let tmpLatLong;
|
||||
let latText;
|
||||
let longText;
|
||||
if (latAndLongSrc) {
|
||||
|
||||
if (latAndLongSrc && latAndLongSrc.indexOf("openstreetmap") !== -1) {
|
||||
tmpLatLong = latAndLongSrc.split("marker=")[1];
|
||||
latText = tmpLatLong.split("%2C")[0];
|
||||
longText = tmpLatLong.split("%2C")[1];
|
||||
@@ -330,6 +333,7 @@ class SaljicCrawler {
|
||||
let numberOfViewsKivi = null;
|
||||
let streetNumber = 0;
|
||||
let adStatus = status;
|
||||
let adType = adTypeAttribute;
|
||||
let shortDescription = descriptions
|
||||
? descriptions.substring(0, descriptions.indexOf("."))
|
||||
: "";
|
||||
@@ -413,6 +417,7 @@ class SaljicCrawler {
|
||||
additionalField.length
|
||||
)
|
||||
.trim();
|
||||
|
||||
realEstateType = this.getAdCategoryId(categoryTmp);
|
||||
} else {
|
||||
switch (additionalField) {
|
||||
@@ -568,10 +573,14 @@ class SaljicCrawler {
|
||||
numberOfViewsKivi
|
||||
};
|
||||
|
||||
await this.sleep(1000);
|
||||
|
||||
return data;
|
||||
} catch (e) {
|
||||
console.error("Exception caught: " + e.message, "\r\nURL:", url);
|
||||
}
|
||||
await this.sleep(1000);
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
|
||||
@@ -17,6 +17,7 @@
|
||||
"checkup-notify": "cd app/npmScripts && node npmCheckUpNotify.js",
|
||||
"test-search": "cd test && node searchTest.js",
|
||||
"test-olx-scraper": "cd test && node olxScrapeTest.js",
|
||||
"test-saljic-scraper": "cd test && node saljicScrapeTest.js",
|
||||
"test-rental-scraper": "cd test && node rentalScrapeTest.js"
|
||||
},
|
||||
"repository": {
|
||||
|
||||
17
test/saljicScrapeTest.js
Normal file
17
test/saljicScrapeTest.js
Normal file
@@ -0,0 +1,17 @@
|
||||
"use strict";
|
||||
|
||||
const saljicCrawler = require("../app/crawler/specificCrawlers/saljic");
|
||||
|
||||
const urlToScrape = process.argv[2] || undefined;
|
||||
|
||||
if (urlToScrape) {
|
||||
const crawler = new saljicCrawler();
|
||||
|
||||
(async () => {
|
||||
const data = await crawler.scrapeAd(urlToScrape);
|
||||
console.log("Scraped data:", data);
|
||||
})();
|
||||
} else {
|
||||
console.log("No URL to scrape. Use like this : ");
|
||||
console.log("npm run test-saljic-scraper -- URL_TO_SCRAPE");
|
||||
}
|
||||
Reference in New Issue
Block a user