Compare commits

..

1 Commits

Author SHA1 Message Date
Naida Vatric
5bdc8e149a Prostor VIP ads fixed. 2020-02-14 22:41:51 +01:00
2 changed files with 19 additions and 26 deletions

View File

@@ -191,13 +191,7 @@ class ProstorCrawler {
const { lat, lng, property_name, price, size, link, status } = realEstate; const { lat, lng, property_name, price, size, link, status } = realEstate;
//Status information is given already in realestate list //Status information is given already in realestate list
//For VIP Ads status ='' canot be used, but no VIP ads are crawled const adStatus = ProstorCrawler.getStatusId(status);
//We will make "fake" vip ad for RE that have size=55
//It is weird because yesterday it said 'VIP ponuda' ???
const adStatus =
size === "55"
? ProstorCrawler.getStatusId("VIP ponuda")
: ProstorCrawler.getStatusId(status);
const url = `https://prostor.ba${link}`; const url = `https://prostor.ba${link}`;

View File

@@ -218,7 +218,7 @@ class SaljicCrawler {
} }
async scrapeAd(url, adType) { async scrapeAd(url, adType) {
console.log("[SALJIC] Scraping : ", url); // console.log("[SALJIC] Scraping : ", url);
try { try {
const adPageSource = await fetch(url); const adPageSource = await fetch(url);
const body = await adPageSource.text(); const body = await adPageSource.text();
@@ -227,9 +227,7 @@ class SaljicCrawler {
// No information for status ex. PRODAN // No information for status ex. PRODAN
const status = AD_STATUS.STATUS_NORMAL; const status = AD_STATUS.STATUS_NORMAL;
//Extracting agency ID from url //Extracting agency ID from url
const agencyObjectId = url const agencyObjectId = parseInt(url.substring(46, url.length));
? parseInt(url.substring(46, url.length))
: null;
//Extracting main properties //Extracting main properties
const propertySelectors = { const propertySelectors = {
@@ -274,10 +272,14 @@ class SaljicCrawler {
.trim(); .trim();
const latAndLongSrc = $(propertySelectors.latAndLong).attr("src"); const latAndLongSrc = $(propertySelectors.latAndLong).attr("src");
const tmpLatLong = latAndLongSrc.split("marker=")[1]; const latText = latAndLongSrc.substring(
const latText = tmpLatLong.split("%2C")[0]; latAndLongSrc.indexOf("marker=") + 7,
const longText = tmpLatLong.split("%2C")[1]; latAndLongSrc.indexOf("%2C", latAndLongSrc.indexOf("marker="))
);
const longText = latAndLongSrc.substring(
latAndLongSrc.indexOf("%2C", latAndLongSrc.indexOf("marker=")) + 3,
latAndLongSrc.length
);
const locationLat = parseFloat(latText) || null; const locationLat = parseFloat(latText) || null;
const locationLong = parseFloat(longText) || null; const locationLong = parseFloat(longText) || null;
@@ -326,10 +328,11 @@ class SaljicCrawler {
let numberOfViewsKivi = null; let numberOfViewsKivi = null;
let streetNumber = 0; let streetNumber = 0;
let adStatus = status; let adStatus = status;
let shortDescription = descriptions let shortDescription = descriptions.substring(
? descriptions.substring(0, descriptions.indexOf(".")) 0,
: ""; descriptions.indexOf(".")
let longDescription = descriptions || ""; );
let longDescription = descriptions;
//Extracting data - Glavne karakteristike //Extracting data - Glavne karakteristike
let mainFieldIndex = 1; let mainFieldIndex = 1;
do { do {
@@ -340,14 +343,10 @@ class SaljicCrawler {
.replace(/[\n\r\t]/gm, "") .replace(/[\n\r\t]/gm, "")
.trim(); .trim();
const mainFieldTitle = mainField const mainFieldTitle = mainField.substring(0, mainField.indexOf(" "));
? mainField.substring(0, mainField.indexOf(" "))
: "";
const mainFieldValue = mainField const mainFieldValue = mainField
? mainField .substring(mainField.indexOf(" "), mainField.length)
.substring(mainField.indexOf(" "), mainField.length) .trim();
.trim()
: "";
switch (mainFieldTitle) { switch (mainFieldTitle) {
case "Površina": case "Površina":