Merge branch 'after-scraper-fix' into 'master'
After scraper fix See merge request saburly/marketalarm/web!104
This commit was merged in pull request #104.
This commit is contained in:
@@ -205,7 +205,7 @@ class OlxCrawler {
|
|||||||
}
|
}
|
||||||
|
|
||||||
async scrapeAd(url) {
|
async scrapeAd(url) {
|
||||||
//console.log("Scraping : ", url);
|
console.log("Scraping : ", url);
|
||||||
try {
|
try {
|
||||||
const adPageSource = await fetch(url);
|
const adPageSource = await fetch(url);
|
||||||
const body = await adPageSource.text();
|
const body = await adPageSource.text();
|
||||||
@@ -240,13 +240,21 @@ class OlxCrawler {
|
|||||||
let price = null;
|
let price = null;
|
||||||
let normalPrice = null;
|
let normalPrice = null;
|
||||||
let urgentPrice = null;
|
let urgentPrice = null;
|
||||||
const normalPriceValue = $("#pc > p:nth-child(2)").text();
|
const normalPriceValue = $("#pc > p:nth-child(2)")
|
||||||
|
.text()
|
||||||
|
.trim();
|
||||||
const urgentPriceValue = $(
|
const urgentPriceValue = $(
|
||||||
"#artikal_glavni_div > div.artikal_lijevo > div:nth-child(5) > p"
|
"#artikal_glavni_div > div.artikal_lijevo > div:nth-child(5) > p"
|
||||||
)
|
)
|
||||||
.text()
|
.text()
|
||||||
.trim();
|
.trim();
|
||||||
|
|
||||||
|
//Debug
|
||||||
|
//console.log("Title:", title);
|
||||||
|
//console.log("Url scraped:", url);
|
||||||
|
// console.log("Normal price value:", normalPriceValue);
|
||||||
|
// console.log("Urgent price value:", urgentPriceValue);
|
||||||
|
//
|
||||||
if (normalPriceValue && normalPriceValue.length > 0) {
|
if (normalPriceValue && normalPriceValue.length > 0) {
|
||||||
normalPrice = normalPriceValue
|
normalPrice = normalPriceValue
|
||||||
.replace(/\r\n|\n|\r/gm, "")
|
.replace(/\r\n|\n|\r/gm, "")
|
||||||
@@ -288,7 +296,7 @@ class OlxCrawler {
|
|||||||
|
|
||||||
let otherInformationDivId;
|
let otherInformationDivId;
|
||||||
//We need to locate DIV ID where other information are stored
|
//We need to locate DIV ID where other information are stored
|
||||||
for (let possibleId = 10; possibleId <= 20; possibleId++) {
|
for (let possibleId = 1; possibleId <= 30; possibleId++) {
|
||||||
const adTypeFieldTitle = $(
|
const adTypeFieldTitle = $(
|
||||||
`#artikal_glavni_div > div.artikal_lijevo > div:nth-child(${possibleId}) > div:nth-child(2) > div.df1`
|
`#artikal_glavni_div > div.artikal_lijevo > div:nth-child(${possibleId}) > div:nth-child(2) > div.df1`
|
||||||
)
|
)
|
||||||
@@ -660,6 +668,8 @@ class OlxCrawler {
|
|||||||
distanceToRiver,
|
distanceToRiver,
|
||||||
numberOfViewsAgency
|
numberOfViewsAgency
|
||||||
};
|
};
|
||||||
|
//
|
||||||
|
//console.log("Scraped data:", data);
|
||||||
|
|
||||||
return data;
|
return data;
|
||||||
} catch (e) {
|
} catch (e) {
|
||||||
|
|||||||
@@ -274,10 +274,14 @@ class SaljicCrawler {
|
|||||||
.trim();
|
.trim();
|
||||||
|
|
||||||
const latAndLongSrc = $(propertySelectors.latAndLong).attr("src");
|
const latAndLongSrc = $(propertySelectors.latAndLong).attr("src");
|
||||||
const tmpLatLong = latAndLongSrc.split("marker=")[1];
|
let tmpLatLong;
|
||||||
const latText = tmpLatLong.split("%2C")[0];
|
let latText;
|
||||||
const longText = tmpLatLong.split("%2C")[1];
|
let longText;
|
||||||
|
if (latAndLongSrc) {
|
||||||
|
tmpLatLong = latAndLongSrc.split("marker=")[1];
|
||||||
|
latText = tmpLatLong.split("%2C")[0];
|
||||||
|
longText = tmpLatLong.split("%2C")[1];
|
||||||
|
}
|
||||||
const locationLat = parseFloat(latText) || null;
|
const locationLat = parseFloat(latText) || null;
|
||||||
const locationLong = parseFloat(longText) || null;
|
const locationLong = parseFloat(longText) || null;
|
||||||
|
|
||||||
|
|||||||
@@ -9,7 +9,7 @@ if (urlToScrape) {
|
|||||||
|
|
||||||
(async () => {
|
(async () => {
|
||||||
const data = await crawler.scrapeAd(urlToScrape);
|
const data = await crawler.scrapeAd(urlToScrape);
|
||||||
console.log(data);
|
console.log("Scraped data:", data);
|
||||||
})();
|
})();
|
||||||
} else {
|
} else {
|
||||||
console.log("No URL to scrape. Use like this : ");
|
console.log("No URL to scrape. Use like this : ");
|
||||||
|
|||||||
Reference in New Issue
Block a user