Compare commits

...

11 Commits

Author SHA1 Message Date
Naida Vatric
fc33c1210a Add more detail to the email 2020-01-13 14:58:09 +01:00
Naida Vatric
511b290096 Login to prostor.ba befoure crawl. 2020-01-13 12:05:33 +01:00
Naida Vatric
ba43fa0713 WIP Changed cookies. 2020-01-13 11:02:26 +01:00
Naida Vatric
e70901d369 WIP Changed login to crawler. 2020-01-13 09:12:03 +01:00
Naida Vatric
8505282670 WiP Login of crawler prostor. 2020-01-12 01:22:50 +01:00
Naida Vatric
64e4835899 Changed redirecting for VIP ads. 2020-01-10 22:52:50 +01:00
Naida Vatric
1658325c4b WIP Fake vip ads. 2020-01-10 19:20:26 +01:00
Naida Vatric
49161c1b60 WIP Changed redirecting for VIP ads. 2020-01-09 12:19:19 +01:00
Naida Vatric
d23ddf849f Results title text made into link. 2020-01-07 01:06:22 +01:00
Naida Vatric
38bd0343f5 Merge branch 'results-link' of gitlab.com:saburly/marketalarm/web into no-all-results-email 2020-01-07 01:01:57 +01:00
Naida Vatric
fa4e0d64de Changed email content to show number of all matching real estates. 2020-01-06 23:59:56 +01:00
13 changed files with 249 additions and 59 deletions

View File

@@ -216,7 +216,8 @@ const AD_STATUS = {
STATUS_DELETED: 4,
STATUS_URGENT: 5,
STATUS_DISCOUNTED: 6,
STATUS_RENTED: 7
STATUS_RENTED: 7,
STATUS_VIP: 8
};
const AD_AGENCY = {

View File

@@ -32,6 +32,11 @@ const PRINT_CRAWLER_DEBUG = process.env.PRINT_CRAWLER_DEBUG_INFO || 0;
const API_MAP_KEY = process.env.API_MAP_KEY || "";
const PROSTOR_LOGIN = {
EMAIL: process.env.PROSTOR_LOGIN_EMAIL,
PASSWORD: process.env.PROSTOR_LOGIN_PASS
};
module.exports = {
APP_PORT,
APP_URL,
@@ -42,5 +47,6 @@ module.exports = {
MAX_REAL_ESTATES_IN_EMAIL,
MAX_REAL_ESTATES_IN_FIRST_EMAIL,
PRINT_CRAWLER_DEBUG,
API_MAP_KEY
API_MAP_KEY,
PROSTOR_LOGIN
};

View File

@@ -2,13 +2,14 @@
const {
findRealEstatesForSearchRequest
} = require("../helpers/db/searchRequestMatch");
const { AD_STATUS } = require("../common/enums");
const getRealEstates = async (req, res) => {
const searchRequestId = req.params["searchRequestId"] || "";
const realEstates = await findRealEstatesForSearchRequest(searchRequestId);
const title = "Nekretnine koje odgovaraju Vašim uslovima pretrage";
res.render("realEstates", { realEstates, title });
res.render("realEstates", { realEstates, title, AD_STATUS });
};
module.exports = {

View File

@@ -1,9 +1,11 @@
const { getRealEstateById } = require("../helpers/db/realEstate");
const { AD_STATUS } = require("../common/enums");
const getRedirect = async (req, res) => {
const id = req.params.id || null;
let error = false;
let redirectUrl = undefined;
let vipAd = undefined;
if (!id) {
error = true;
} else {
@@ -13,6 +15,7 @@ const getRedirect = async (req, res) => {
error = true;
} else {
redirectUrl = realEstate.url;
vipAd = realEstate.adStatus === AD_STATUS.STATUS_VIP;
}
} catch (e) {
error = true;
@@ -24,7 +27,7 @@ const getRedirect = async (req, res) => {
res.render("notFound", { title });
} else {
const title = "Preusmjeravanje";
res.render("redirect", { title, redirectUrl });
res.render("redirect", { title, redirectUrl, vipAd });
}
};

View File

@@ -3,6 +3,7 @@
const fetch = require("node-fetch");
const cheerio = require("cheerio");
const moment = require("moment-timezone");
const FormData = require("form-data");
const {
AD_TYPE,
@@ -16,7 +17,8 @@ const {
const {
PRINT_CRAWLER_DEBUG,
DEFAULT_TIMEZONE
DEFAULT_TIMEZONE,
PROSTOR_LOGIN
} = require("../../config/appConfig");
const { PROSTOR_FORCE_CRAWL } = require("../specificConfigs/prostor");
@@ -60,13 +62,16 @@ class ProstorCrawler {
async crawl() {
const crawlAdCategories = this.crawlerAdCategories;
//We need session cookie to use login privileges
const prostorCookie = await this.getCookies();
//New tag to check if crawler loged in
const login = await this.loginForScraping(PROSTOR_LOGIN, prostorCookie);
const newRealEstates = [];
if (crawlAdCategories) {
//Crawl only if login was successful
if (crawlAdCategories && login) {
const indexGenerators = [];
for (const adCategory of crawlAdCategories) {
indexGenerators.push(this.categoryIndexer(adCategory));
indexGenerators.push(this.categoryIndexer(adCategory, prostorCookie));
}
let done = false;
@@ -119,13 +124,14 @@ class ProstorCrawler {
return newRealEstates;
}
async *categoryIndexer(adCategory) {
async *categoryIndexer(adCategory, prostorCookie) {
const urlAdTypePart = PROSTOR_ENUMS.PROSTOR_AD_TYPE[this.crawlerAdTypes];
const urlCategoryPart = PROSTOR_ENUMS.PROSTOR_AD_CATEGORY[adCategory];
if (urlAdTypePart !== undefined && urlCategoryPart !== undefined) {
const urlPageToCrawl = `${this.baseUrl}?remove_sold=0${urlAdTypePart}${urlCategoryPart}`;
const listOfAllRealEstates = await this.extractRealEstates(
urlPageToCrawl
urlPageToCrawl,
prostorCookie
);
let elementToStartIndexFrom = 0;
@@ -139,7 +145,8 @@ class ProstorCrawler {
elementToStartIndexFrom += realEstatesForSinglePage.length;
const singlePageResults = await this.indexSinglePage(
realEstatesForSinglePage
realEstatesForSinglePage,
prostorCookie
);
const filteredSinglePageResults = singlePageResults.filter(
@@ -163,10 +170,10 @@ class ProstorCrawler {
}
}
async indexSinglePage(realEstatesList) {
async indexSinglePage(realEstatesList, prostorCookie) {
const asyncActions = [];
for (const realEstate of realEstatesList) {
asyncActions.push(this.scrapeAd(realEstate));
asyncActions.push(this.scrapeAd(realEstate, prostorCookie));
}
try {
@@ -180,12 +187,25 @@ class ProstorCrawler {
}
}
async scrapeAd(realEstate) {
async scrapeAd(realEstate, prostorCookie) {
const { lat, lng, property_name, price, size, link, status } = realEstate;
//Status information is given already in realestate list
//For VIP Ads status ='' canot be used, but no VIP ads are crawled
//We will make "fake" vip ad for RE that have size=55
//It is weird because yesterday it said 'VIP ponuda' ???
const adStatus =
size === "55"
? ProstorCrawler.getStatusId("VIP ponuda")
: ProstorCrawler.getStatusId(status);
const url = `https://prostor.ba${link}`;
// console.log("[PROSTOR] Scraping : ", url);
try {
const adPageSource = await fetch(url);
const adPageSource = await fetch(url, {
headers: { Cookie: prostorCookie }
});
const body = await adPageSource.text();
const $ = cheerio.load(body);
@@ -330,7 +350,6 @@ class ProstorCrawler {
furnishingType = FURNISHING_TYPE.NOT_FURNISHED.id;
}
const adStatus = ProstorCrawler.getStatusId(status);
const title = property_name;
const parsedPrice = parseFloat(price.replace(/\./g, "")) || null;
const parsedArea = parseFloat(size);
@@ -408,13 +427,15 @@ class ProstorCrawler {
}
}
async extractRealEstates(url) {
async extractRealEstates(url, prostorCookie) {
if (PRINT_CRAWLER_DEBUG) {
console.log("[PROSTOR] Index page : ", url);
}
try {
const res = await fetch(url);
const res = await fetch(url, {
headers: { Cookie: prostorCookie }
});
const body = await res.text();
const $ = cheerio.load(body);
@@ -548,6 +569,8 @@ class ProstorCrawler {
return AD_STATUS.STATUS_SOLD;
case "Iznajmljeno":
return AD_STATUS.STATUS_RENTED;
case "VIP ponuda":
return AD_STATUS.STATUS_VIP;
default:
console.log("[PROSTOR] Unknown AD_STATUS : [", statusText, "]");
return AD_STATUS.STATUS_NORMAL;
@@ -569,6 +592,51 @@ class ProstorCrawler {
return savers[0].save(results);
//so that we can use some sequelize options and information when data is inserted
}
async loginForScraping(PROSTOR_LOGIN, prostorCookie) {
let formData = new FormData();
formData.append("email", PROSTOR_LOGIN.EMAIL);
formData.append("password", PROSTOR_LOGIN.PASSWORD);
return fetch("https://prostor.ba/moj-prostor/prijava", {
method: "POST",
body: formData,
headers: { Cookie: prostorCookie }
})
.then(page => {
return page.text();
})
.then(resp => {
const $ = cheerio.load(resp);
if (
$("h1")
.text()
.indexOf("Dobrodošli") !== -1
) {
console.log("[PROSTOR]: Crawler loged in!");
return true;
} else {
console.log("[PROSTOR]: Crawler login failed - wrong credentials!");
return false;
}
})
.catch(err => {
console.log("[PROSTOR]: Crawler login error ", err);
});
}
async getCookies() {
const getResponse = await fetch("https://prostor.ba/moj-prostor/prijava", {
headers: { Cookie: "" }
});
const raw = getResponse.headers.raw()["set-cookie"];
const cookie = raw
.map(datastring => {
const data = datastring.split(";");
const cookieData = data[0];
return cookieData;
})
.join(";");
return cookie;
}
}
module.exports = ProstorCrawler;

View File

@@ -1,10 +1,11 @@
"use strict";
const { MAX_REAL_ESTATES_IN_EMAIL, APP_URL } = require("../config/appConfig");
const { AD_CATEGORY } = require("../common/enums");
const { AD_CATEGORY, AD_TYPE, EMAIL_FREQUENCY } = require("../common/enums");
const generateEmailFooter = searchRequestId => {
return `<div>Ako želite prestati dobijati obavještenja za ovu pretragu, <a href="${APP_URL}/odjava/${searchRequestId}">odjavite ovdje</a></div>
const generateEmailFooter = (searchRequestId, emailFrequencyTitle) => {
return ` <div>Trenutno ste prijavljeni da obavještenja o novim nekretninama primate <strong>${emailFrequencyTitle.toLowerCase()} </strong>.</div>
<div>Ako želite prestati dobijati obavještenja za ovu pretragu, <a href="${APP_URL}/odjava/${searchRequestId}">odjavite ovdje</a></div>
<div>Ako želite pogledati ili promijeniti uslove za ovu pretragu, <a href="${APP_URL}/pregled/${searchRequestId}">pogledajte ovdje</a></div>
<br/>
<strong>Vaš,<br/>Kivi tim</strong>`;
@@ -23,17 +24,24 @@ const generateRealEstateLinks = realEstates => {
const generateNotificationEmail = (
realEstates,
searchRequestId,
noAllRealEstates,
dailyNotification = false
) => {
const truncateList = realEstates.length > MAX_REAL_ESTATES_IN_EMAIL;
const realEstatesToShow = truncateList
? realEstates.slice(0, MAX_REAL_ESTATES_IN_EMAIL)
: realEstates;
const allRealEstatesLink = `${APP_URL}/nekretnine/${searchRequestId}`;
const emailFrequencyTitle = dailyNotification
? EMAIL_FREQUENCY.DAILY.title
: EMAIL_FREQUENCY.ASAP.title;
const realEstateLinks = generateRealEstateLinks(realEstatesToShow);
const moreRealEstates = `<div>Kompletan spisak nekretnina možete pogledati na <a href="${allRealEstatesLink}">listi nekretnina</a><div>`;
const emailFooter = generateEmailFooter(searchRequestId);
const moreRealEstates = `<div>Kompletan spisak nekretnina (${noAllRealEstates}) možete pogledati na <a href="${allRealEstatesLink}">listi nekretnina</a><div>`;
const emailFooter = generateEmailFooter(searchRequestId, emailFrequencyTitle);
const asapMessageBody =
realEstates.length > 1
? "Pronašli smo nekretnine koje odgovaraju Vašoj pretrazi"
@@ -59,6 +67,28 @@ const generateNotificationEmail = (
const generateNewSearchRequestEmail = (searchRequest, matchingRealEstates) => {
const realEstateType = AD_CATEGORY[searchRequest.realEstateType];
let adTypeTitle = "";
switch (searchRequest.adType) {
case AD_TYPE.AD_TYPE_SALE.stringId:
adTypeTitle = AD_TYPE.AD_TYPE_SALE.title;
break;
case AD_TYPE.AD_TYPE_RENT.stringId:
adTypeTitle = AD_TYPE.AD_TYPE_RENT.title;
break;
default:
adTypeTitle = "-";
break;
}
let emailFrequencyTitle;
switch (searchRequest.emailFrequency) {
case EMAIL_FREQUENCY.ASAP.stringId:
emailFrequencyTitle = EMAIL_FREQUENCY.ASAP.title;
break;
case EMAIL_FREQUENCY.DAILY.stringId:
emailFrequencyTitle = EMAIL_FREQUENCY.DAILY.title;
break;
}
const {
id,
gardenSizeMin,
@@ -70,6 +100,7 @@ const generateNewSearchRequestEmail = (searchRequest, matchingRealEstates) => {
} = searchRequest;
const realEstateLinks = generateRealEstateLinks(matchingRealEstates);
const instantRealEstatesText = `<br/>
<div>
U međuvremenu pogledajte neke od nedavno objavljenih nekretnina koje odgovaraju Vašim uslovima pretrage :<br/>
@@ -80,13 +111,14 @@ const generateNewSearchRequestEmail = (searchRequest, matchingRealEstates) => {
? `<div><strong>Kvadratura okućnice: Od ${gardenSizeMin} do ${gardenSizeMax} m2</strong></div>`
: ``;
const emailFooter = generateEmailFooter(id);
const emailFooter = generateEmailFooter(id, emailFrequencyTitle);
return `<h3>Zdravo</h3>
<div>Naručili ste da Vam javimo ako se nekretnina sa navedenim uslovima pojavi u oglasima:</div>
<br/>
<div>
<div><strong>Tip nekretnine: </strong>${realEstateType.title}</div>
<div><strong>Vrsta oglasa: </strong>${adTypeTitle}</div>
<div><strong>Kvadratura nekretnine:</strong> Od ${sizeMin} do ${sizeMax} m2</div>
${gardenSize}
<div><strong>Cijena:</strong> ${priceMin} do ${priceMax} KM</div>

View File

@@ -154,3 +154,7 @@ h3 {
margin-top: 2rem;
margin-bottom: 1rem;
}
.estates-link {
color: rgba(0, 0, 0, 0.87);
}

View File

@@ -8,7 +8,10 @@ const {
generateNewSearchRequestEmail,
generateEmailSubject
} = require("../helpers/emailContentGenerator");
const { findNotNotifiedMatches } = require("../helpers/db/searchRequestMatch");
const {
findNotNotifiedMatches,
findRealEstatesForSearchRequest
} = require("../helpers/db/searchRequestMatch");
const { sendEmail } = require("../services/emailService");
const notifyForNewRealEstates = async newRealEstates => {
@@ -39,10 +42,18 @@ const notifyMatches = async (matches, dailyNotification = false) => {
const { email, subscribed } = searchRequest;
if (notifyNow && subscribed) {
const allMatchingRealEstates = matches[id].realEstates || [];
//Variable allMatchingRealEstates are real estates that are "new" on the market
//the ones that we notify user in this moment, not all that already exists in db
//New variable allRealEstates are all real estates that exists in db for search req
const allRealEstates = await findRealEstatesForSearchRequest(id);
const noAllRealEstates = allRealEstates.length;
if (allMatchingRealEstates.length > 0) {
const emailContent = generateNotificationEmail(
allMatchingRealEstates,
id,
noAllRealEstates,
dailyNotification
);
const emailSubject = generateEmailSubject(

View File

@@ -1,13 +1,29 @@
<div class="row center-align">
<ul class="collection with-header">
<% for(const realEstate of realEstates) { %>
<li class="collection-item">
<div><%= realEstate.title %>
<a href="<%= realEstate.url %>" class="kivi-color secondary-content">
<ul class="collection with-header">
<% for(const realEstate of realEstates) { %>
<li class="collection-item">
<% if(realEstate.adStatus === AD_STATUS.STATUS_VIP) {%>
<div>
<% //This needs to do redirecting instead of direct link to realestate
%>
<a href="/redirect/<%= realEstate.id %>" class="estates-link">
<%= realEstate.title %>
<div class="kivi-color secondary-content">
<i class="material-icons">send</i>
</a>
</div>
</li>
<% } %>
</ul>
</div>
</div>
</a>
</div>
<%} else { %>
<div>
<a href="<%= realEstate.url %>" class="estates-link">
<%= realEstate.title %>
<div class="kivi-color secondary-content">
<i class="material-icons">send</i>
</div>
</a>
</div>
<% }%>
</li>
<% } %>
</ul>
</div>

View File

@@ -1,26 +1,49 @@
<br><br>
<br /><br />
<div class="center">
<div class="preloader-wrapper big active center">
<div class="kivi-spinner-color spinner-layer spinner-green-only">
<div class="circle-clipper left">
<div class="circle"></div>
</div><div class="gap-patch">
<div class="circle"></div>
</div><div class="circle-clipper right">
<div class="circle"></div>
</div>
</div>
<div class="preloader-wrapper big active center">
<div class="kivi-spinner-color spinner-layer spinner-green-only">
<div class="circle-clipper left">
<div class="circle"></div>
</div>
<div class="gap-patch">
<div class="circle"></div>
</div>
<div class="circle-clipper right">
<div class="circle"></div>
</div>
</div>
</div>
</div>
<br>
<br />
<% if(vipAd) { %>
<div class="center">
<h6>
<a href="<%= redirectUrl %>" rel="noreferrer" id="realEstateUrl">Kliknite ovdje ako Vas web preglednik ne preusmjeri automatski</a>
</h6>
<h6>
Ovaj oglas zahtijeva da budete član
<a href="https://prostor.ba/" rel="noreferrer">Prostor.ba</a>.
<br />
<br />
<a href="https://prostor.ba/moj-prostor/prijava" rel="noreferrer"
>Ulogujte se</a
>
ili napravite
<a href="https://prostor.ba/moj-prostor/registracija" rel="noreferrer"
>novi račun</a
>, a potom otvorite <a href="<%= redirectUrl %>" rel="noreferrer">oglas</a>.
</h6>
</div>
<% } else { %>
<div class="center">
<h6>
<a href="<%= redirectUrl %>" rel="noreferrer" id="realEstateUrl"
>Kliknite ovdje ako Vas web preglednik ne preusmjeri automatski</a
>
</h6>
</div>
<% }%>
<script>
window.onload = function() {
document.getElementById('realEstateUrl').click();
}
window.onload = function() {
document.getElementById("realEstateUrl").click();
};
</script>

View File

@@ -51,6 +51,8 @@ PROSTOR_CRAWLER_AD_CATEGORIES=comma separated list of enum names of categories t
PROSTOR_IGNORED_USERNAMES=!!! This is not used for prostor crawler !!!
PROSTOR_DELAY_BETWEEN_PAGES=!!! This is not used for prostor crawler !!!
PROSTOR_FORCE_CRAWL=Non-zero value will force crawler to crawl all pages without stopping when known real estate is found
PROSTOR_LOGIN_EMAIL=Email of valid Prostor.ba account for crawling purposes
PROSTOR_LOGIN_PASS=Password of valid Prostor.ba account for crawling purposes
#==AKTIDO==
AKTIDO_MAX_PAGES=Restrict crawler to this number of pages
AKTIDO_MAX_RESULTS_PER_PAGE=Only this number or less results from one page will be scraped and saved

30
package-lock.json generated
View File

@@ -1346,13 +1346,23 @@
"integrity": "sha1-+8cfDEGt6zf5bFd60e1C2P2sypE="
},
"form-data": {
"version": "2.3.3",
"resolved": "https://registry.npmjs.org/form-data/-/form-data-2.3.3.tgz",
"integrity": "sha512-1lLKB2Mu3aGP1Q/2eCOx0fNbRMe7XdwktwOruhfqqd0rIJWwN4Dh+E3hrPSlDCXnSR7UtZ1N38rVXm+6+MEhJQ==",
"version": "3.0.0",
"resolved": "https://registry.npmjs.org/form-data/-/form-data-3.0.0.tgz",
"integrity": "sha512-CKMFDglpbMi6PyN+brwB9Q/GOw0eAnsrEZDgcsH5Krhz5Od/haKHAX0NmQfha2zPPz0JpWzA7GJHGSnvCRLWsg==",
"requires": {
"asynckit": "^0.4.0",
"combined-stream": "^1.0.6",
"combined-stream": "^1.0.8",
"mime-types": "^2.1.12"
},
"dependencies": {
"combined-stream": {
"version": "1.0.8",
"resolved": "https://registry.npmjs.org/combined-stream/-/combined-stream-1.0.8.tgz",
"integrity": "sha512-FQN4MRfuJeHf7cBbBMJFXhKSDq+2kAArBlmRBvcvFE5BB1HZKXtSFASDhdlz9zOYwxh8lDdnvmMOe/+5cdoEdg==",
"requires": {
"delayed-stream": "~1.0.0"
}
}
}
},
"forwarded": {
@@ -3430,6 +3440,18 @@
"tough-cookie": "~2.4.3",
"tunnel-agent": "^0.6.0",
"uuid": "^3.3.2"
},
"dependencies": {
"form-data": {
"version": "2.3.3",
"resolved": "https://registry.npmjs.org/form-data/-/form-data-2.3.3.tgz",
"integrity": "sha512-1lLKB2Mu3aGP1Q/2eCOx0fNbRMe7XdwktwOruhfqqd0rIJWwN4Dh+E3hrPSlDCXnSR7UtZ1N38rVXm+6+MEhJQ==",
"requires": {
"asynckit": "^0.4.0",
"combined-stream": "^1.0.6",
"mime-types": "^2.1.12"
}
}
}
},
"require-directory": {

View File

@@ -39,6 +39,7 @@
"express": "^4.16.4",
"express-ejs-layouts": "^2.5.0",
"express-layout": "^0.1.0",
"form-data": "^3.0.0",
"html-to-text": "^5.1.1",
"moment": "^2.24.0",
"moment-timezone": "^0.5.26",