Compare commits

..

2 Commits

Author SHA1 Message Date
Naida Vatric
f56cd5b549 More elegant scrape of lat and long. 2020-02-17 21:55:24 +01:00
Naida Vatric
addd8c1344 Saljic crawler changed substring call. 2020-02-14 23:42:19 +01:00
4 changed files with 31 additions and 54 deletions

View File

@@ -218,7 +218,7 @@ class SaljicCrawler {
} }
async scrapeAd(url, adType) { async scrapeAd(url, adType) {
// console.log("[SALJIC] Scraping : ", url); console.log("[SALJIC] Scraping : ", url);
try { try {
const adPageSource = await fetch(url); const adPageSource = await fetch(url);
const body = await adPageSource.text(); const body = await adPageSource.text();
@@ -227,7 +227,9 @@ class SaljicCrawler {
// No information for status ex. PRODAN // No information for status ex. PRODAN
const status = AD_STATUS.STATUS_NORMAL; const status = AD_STATUS.STATUS_NORMAL;
//Extracting agency ID from url //Extracting agency ID from url
const agencyObjectId = parseInt(url.substring(46, url.length)); const agencyObjectId = url
? parseInt(url.substring(46, url.length))
: null;
//Extracting main properties //Extracting main properties
const propertySelectors = { const propertySelectors = {
@@ -272,14 +274,10 @@ class SaljicCrawler {
.trim(); .trim();
const latAndLongSrc = $(propertySelectors.latAndLong).attr("src"); const latAndLongSrc = $(propertySelectors.latAndLong).attr("src");
const latText = latAndLongSrc.substring( const tmpLatLong = latAndLongSrc.split("marker=")[1];
latAndLongSrc.indexOf("marker=") + 7, const latText = tmpLatLong.split("%2C")[0];
latAndLongSrc.indexOf("%2C", latAndLongSrc.indexOf("marker=")) const longText = tmpLatLong.split("%2C")[1];
);
const longText = latAndLongSrc.substring(
latAndLongSrc.indexOf("%2C", latAndLongSrc.indexOf("marker=")) + 3,
latAndLongSrc.length
);
const locationLat = parseFloat(latText) || null; const locationLat = parseFloat(latText) || null;
const locationLong = parseFloat(longText) || null; const locationLong = parseFloat(longText) || null;
@@ -328,11 +326,10 @@ class SaljicCrawler {
let numberOfViewsKivi = null; let numberOfViewsKivi = null;
let streetNumber = 0; let streetNumber = 0;
let adStatus = status; let adStatus = status;
let shortDescription = descriptions.substring( let shortDescription = descriptions
0, ? descriptions.substring(0, descriptions.indexOf("."))
descriptions.indexOf(".") : "";
); let longDescription = descriptions || "";
let longDescription = descriptions;
//Extracting data - Glavne karakteristike //Extracting data - Glavne karakteristike
let mainFieldIndex = 1; let mainFieldIndex = 1;
do { do {
@@ -343,10 +340,14 @@ class SaljicCrawler {
.replace(/[\n\r\t]/gm, "") .replace(/[\n\r\t]/gm, "")
.trim(); .trim();
const mainFieldTitle = mainField.substring(0, mainField.indexOf(" ")); const mainFieldTitle = mainField
? mainField.substring(0, mainField.indexOf(" "))
: "";
const mainFieldValue = mainField const mainFieldValue = mainField
.substring(mainField.indexOf(" "), mainField.length) ? mainField
.trim(); .substring(mainField.indexOf(" "), mainField.length)
.trim()
: "";
switch (mainFieldTitle) { switch (mainFieldTitle) {
case "Površina": case "Površina":

View File

@@ -332,14 +332,10 @@ const findRealEstatesForSearchRequest = async (searchRequest, maxResults) => {
}; };
} }
//When includeIncompleteAds are not defined - null it will consider it true
const order = [["updatedAt", "desc"]]; const order = [["updatedAt", "desc"]];
return db.RealEstate.findAll({ return db.RealEstate.findAll({
where: where: includeIncompleteAds ? queryIncludeIncomplete : query,
includeIncompleteAds || includeIncompleteAds == null
? queryIncludeIncomplete
: query,
limit: maxResults, limit: maxResults,
order order
}); });

View File

@@ -157,7 +157,7 @@ const findSearchRequestsForRealEstate = async realEstate => {
} else { } else {
// If real estate dont have defined number of rooms ex. null // If real estate dont have defined number of rooms ex. null
//It returns requests that didn't choose number of rooms - also null //It returns requests that didn't choose number of rooms - also null
//Or ones that picked some values but also picked to includeIncomplete ads (or default) //Or ones that picked some values but also picked to includeIncomplete ads
numberOfRoomsQuery = { numberOfRoomsQuery = {
[Op.or]: [ [Op.or]: [
{ {
@@ -176,10 +176,7 @@ const findSearchRequestsForRealEstate = async realEstate => {
}, },
{ {
includeIncompleteAds: { includeIncompleteAds: {
[Op.or]: { [Op.eq]: true
[Op.eq]: true,
[Op.is]: null
}
} }
} }
] ]
@@ -229,10 +226,7 @@ const findSearchRequestsForRealEstate = async realEstate => {
}, },
{ {
includeIncompleteAds: { includeIncompleteAds: {
[Op.or]: { [Op.eq]: true
[Op.eq]: true,
[Op.is]: null
}
} }
} }
] ]
@@ -281,10 +275,7 @@ const findSearchRequestsForRealEstate = async realEstate => {
}, },
{ {
includeIncompleteAds: { includeIncompleteAds: {
[Op.or]: { [Op.eq]: true
[Op.eq]: true,
[Op.is]: null
}
} }
} }
] ]
@@ -322,10 +313,7 @@ const findSearchRequestsForRealEstate = async realEstate => {
}, },
{ {
includeIncompleteAds: { includeIncompleteAds: {
[Op.or]: { [Op.eq]: true
[Op.eq]: true,
[Op.is]: null
}
} }
} }
] ]
@@ -359,10 +347,7 @@ const findSearchRequestsForRealEstate = async realEstate => {
}, },
{ {
includeIncompleteAds: { includeIncompleteAds: {
[Op.or]: { [Op.eq]: true
[Op.eq]: true,
[Op.is]: null
}
} }
} }
] ]
@@ -396,10 +381,7 @@ const findSearchRequestsForRealEstate = async realEstate => {
}, },
{ {
includeIncompleteAds: { includeIncompleteAds: {
[Op.or]: { [Op.eq]: true
[Op.eq]: true,
[Op.is]: null
}
} }
} }
] ]
@@ -441,13 +423,10 @@ const findSearchRequestsForRealEstate = async realEstate => {
[Op.eq]: "ANY" [Op.eq]: "ANY"
}; };
} }
//Tag to check if incomplete ads are accepted in query which is default //Tag to check if incomplete ads are accepted in query
if (checkForIncompleteWanted) { if (checkForIncompleteWanted) {
query.includeIncompleteAds = { query.includeIncompleteAds = {
[Op.or]: { [Op.eq]: true
[Op.eq]: true,
[Op.is]: null
}
}; };
} }

View File

@@ -61,8 +61,9 @@
<p class="distinguished"> <p class="distinguished">
<label class="checkbox-label"> <label class="checkbox-label">
<input type="checkbox" class="filled-in" name="includeIncompleteAds" <input type="checkbox" class="filled-in" name="includeIncompleteAds"
<% if (includeIncompleteAds) { %>
checked checked
> <% } %>>
<span>Uključi i oglase bez potpunih informacija</span> <span>Uključi i oglase bez potpunih informacija</span>
</label> </label>
</p> </p>