From 49161c1b605fb0fd4c10186b77d2de80e2023fd9 Mon Sep 17 00:00:00 2001 From: Naida Vatric Date: Thu, 9 Jan 2020 12:19:19 +0100 Subject: [PATCH 1/7] WIP Changed redirecting for VIP ads. --- app/common/enums.js | 3 +- app/controllers/redirect.js | 5 ++- app/crawler/specificCrawlers/prostor.js | 6 ++-- app/views/redirect.ejs | 47 +++++++++++++++---------- 4 files changed, 39 insertions(+), 22 deletions(-) diff --git a/app/common/enums.js b/app/common/enums.js index 33cb41e..85ed553 100644 --- a/app/common/enums.js +++ b/app/common/enums.js @@ -216,7 +216,8 @@ const AD_STATUS = { STATUS_DELETED: 4, STATUS_URGENT: 5, STATUS_DISCOUNTED: 6, - STATUS_RENTED: 7 + STATUS_RENTED: 7, + STATUS_VIP: 8 }; const AD_AGENCY = { diff --git a/app/controllers/redirect.js b/app/controllers/redirect.js index 9975ab2..eb4e505 100644 --- a/app/controllers/redirect.js +++ b/app/controllers/redirect.js @@ -1,9 +1,11 @@ const { getRealEstateById } = require("../helpers/db/realEstate"); +const { AD_STATUS } = require("../common/enums"); const getRedirect = async (req, res) => { const id = req.params.id || null; let error = false; let redirectUrl = undefined; + let vipAd = undefined; if (!id) { error = true; } else { @@ -13,6 +15,7 @@ const getRedirect = async (req, res) => { error = true; } else { redirectUrl = realEstate.url; + vipAd = realEstate.adStatus === AD_STATUS.STATUS_VIP; } } catch (e) { error = true; @@ -24,7 +27,7 @@ const getRedirect = async (req, res) => { res.render("notFound", { title }); } else { const title = "Preusmjeravanje"; - res.render("redirect", { title, redirectUrl }); + res.render("redirect", { title, redirectUrl, vipAd }); } }; diff --git a/app/crawler/specificCrawlers/prostor.js b/app/crawler/specificCrawlers/prostor.js index cb1d3f9..fb7a52f 100644 --- a/app/crawler/specificCrawlers/prostor.js +++ b/app/crawler/specificCrawlers/prostor.js @@ -182,8 +182,8 @@ class ProstorCrawler { async scrapeAd(realEstate) { const { lat, lng, property_name, price, size, link, status } = realEstate; - const url = `https://prostor.ba${link}`; - // console.log("[PROSTOR] Scraping : ", url); + + //console.log("[PROSTOR] Scraping : ", url); try { const adPageSource = await fetch(url); const body = await adPageSource.text(); @@ -548,6 +548,8 @@ class ProstorCrawler { return AD_STATUS.STATUS_SOLD; case "Iznajmljeno": return AD_STATUS.STATUS_RENTED; + case "VIP ponuda": + return AD_STATUS.STATUS_VIP; default: console.log("[PROSTOR] Unknown AD_STATUS : [", statusText, "]"); return AD_STATUS.STATUS_NORMAL; diff --git a/app/views/redirect.ejs b/app/views/redirect.ejs index 52233cb..3346bcb 100644 --- a/app/views/redirect.ejs +++ b/app/views/redirect.ejs @@ -1,26 +1,37 @@ -

+

-
-
-
-
-
-
-
-
-
-
+
+
+
+
+
+
+
+
+
+
+
+
-
+
+<% if(vipAd) { %> +<% } else { %> + +<% }%> + From 1658325c4b45d95a7a0efc26c6d56e8ba755ad18 Mon Sep 17 00:00:00 2001 From: Naida Vatric Date: Fri, 10 Jan 2020 19:20:26 +0100 Subject: [PATCH 2/7] WIP Fake vip ads. --- app/crawler/specificCrawlers/prostor.js | 17 +++++++++++++++-- app/views/redirect.ejs | 2 +- 2 files changed, 16 insertions(+), 3 deletions(-) diff --git a/app/crawler/specificCrawlers/prostor.js b/app/crawler/specificCrawlers/prostor.js index fb7a52f..3d180df 100644 --- a/app/crawler/specificCrawlers/prostor.js +++ b/app/crawler/specificCrawlers/prostor.js @@ -183,7 +183,19 @@ class ProstorCrawler { async scrapeAd(realEstate) { const { lat, lng, property_name, price, size, link, status } = realEstate; - //console.log("[PROSTOR] Scraping : ", url); + //Status information is given already in realestate list + //For VIP Ads status ='' canot be used, but also area='0' we will use that temporary + //It is weird because yesterday it said 'VIP ponuda' ??? + const adStatus = + size === "0" + ? ProstorCrawler.getStatusId("VIP ponuda") + : ProstorCrawler.getStatusId(status); + // + console.log("adStatus", adStatus); + + const url = `https://prostor.ba${link}`; + + // console.log("[PROSTOR] Scraping : ", url); try { const adPageSource = await fetch(url); const body = await adPageSource.text(); @@ -330,7 +342,6 @@ class ProstorCrawler { furnishingType = FURNISHING_TYPE.NOT_FURNISHED.id; } - const adStatus = ProstorCrawler.getStatusId(status); const title = property_name; const parsedPrice = parseFloat(price.replace(/\./g, "")) || null; const parsedArea = parseFloat(size); @@ -539,6 +550,8 @@ class ProstorCrawler { } static getStatusId(statusText) { + // + console.log("statusText u funkciji", statusText); switch (statusText) { case "": return AD_STATUS.STATUS_NORMAL; diff --git a/app/views/redirect.ejs b/app/views/redirect.ejs index 3346bcb..5b111cc 100644 --- a/app/views/redirect.ejs +++ b/app/views/redirect.ejs @@ -32,6 +32,6 @@ From 64e483589915fbbdf3d65db0452c2583090d9c74 Mon Sep 17 00:00:00 2001 From: Naida Vatric Date: Fri, 10 Jan 2020 22:52:50 +0100 Subject: [PATCH 3/7] Changed redirecting for VIP ads. --- app/controllers/realEstates.js | 3 ++- app/crawler/specificCrawlers/prostor.js | 7 +++---- app/views/realEstates.ejs | 13 +++++++++++++ app/views/redirect.ejs | 16 ++++++++++++++-- 4 files changed, 32 insertions(+), 7 deletions(-) diff --git a/app/controllers/realEstates.js b/app/controllers/realEstates.js index ce82765..48c1aff 100644 --- a/app/controllers/realEstates.js +++ b/app/controllers/realEstates.js @@ -2,13 +2,14 @@ const { findRealEstatesForSearchRequest } = require("../helpers/db/searchRequestMatch"); +const { AD_STATUS } = require("../common/enums"); const getRealEstates = async (req, res) => { const searchRequestId = req.params["searchRequestId"] || ""; const realEstates = await findRealEstatesForSearchRequest(searchRequestId); const title = "Nekretnine koje odgovaraju Vašim uslovima pretrage"; - res.render("realEstates", { realEstates, title }); + res.render("realEstates", { realEstates, title, AD_STATUS }); }; module.exports = { diff --git a/app/crawler/specificCrawlers/prostor.js b/app/crawler/specificCrawlers/prostor.js index 3d180df..ca4271c 100644 --- a/app/crawler/specificCrawlers/prostor.js +++ b/app/crawler/specificCrawlers/prostor.js @@ -184,14 +184,13 @@ class ProstorCrawler { const { lat, lng, property_name, price, size, link, status } = realEstate; //Status information is given already in realestate list - //For VIP Ads status ='' canot be used, but also area='0' we will use that temporary + //For VIP Ads status ='' canot be used, but no VIP ads are crawled + //We will make "fake" vip ad for RE that have size=55 //It is weird because yesterday it said 'VIP ponuda' ??? const adStatus = - size === "0" + size === "55" ? ProstorCrawler.getStatusId("VIP ponuda") : ProstorCrawler.getStatusId(status); - // - console.log("adStatus", adStatus); const url = `https://prostor.ba${link}`; diff --git a/app/views/realEstates.ejs b/app/views/realEstates.ejs index 3e94a1f..b47b744 100644 --- a/app/views/realEstates.ejs +++ b/app/views/realEstates.ejs @@ -2,6 +2,18 @@
+ <% }%> <% } %> diff --git a/app/views/redirect.ejs b/app/views/redirect.ejs index 5b111cc..e36e081 100644 --- a/app/views/redirect.ejs +++ b/app/views/redirect.ejs @@ -18,7 +18,19 @@
<% if(vipAd) { %>
-
Work in progress....
+
+ Ovaj oglas zahtijeva da budete član + Prostor.ba. +
+
+ Ulogujte se + ili napravite + novi račun, a potom otvorite oglas. +
<% } else { %>
@@ -32,6 +44,6 @@ From 850528267011f08261f1486483f4de9374df8d09 Mon Sep 17 00:00:00 2001 From: Naida Vatric Date: Sun, 12 Jan 2020 01:22:50 +0100 Subject: [PATCH 4/7] WiP Login of crawler prostor. --- app/config/appConfig.js | 8 +++- app/crawler/specificCrawlers/prostor.js | 54 ++++++++++++++++++++++--- development.env | 2 + 3 files changed, 57 insertions(+), 7 deletions(-) diff --git a/app/config/appConfig.js b/app/config/appConfig.js index 7a7887b..4403fbd 100644 --- a/app/config/appConfig.js +++ b/app/config/appConfig.js @@ -32,6 +32,11 @@ const PRINT_CRAWLER_DEBUG = process.env.PRINT_CRAWLER_DEBUG_INFO || 0; const API_MAP_KEY = process.env.API_MAP_KEY || ""; +const PROSTOR_LOGIN = { + EMAIL: process.env.PROSTOR_LOGIN_EMAIL, + PASSWORD: process.env.PROSTOR_LOGIN_PASS +}; + module.exports = { APP_PORT, APP_URL, @@ -42,5 +47,6 @@ module.exports = { MAX_REAL_ESTATES_IN_EMAIL, MAX_REAL_ESTATES_IN_FIRST_EMAIL, PRINT_CRAWLER_DEBUG, - API_MAP_KEY + API_MAP_KEY, + PROSTOR_LOGIN }; diff --git a/app/crawler/specificCrawlers/prostor.js b/app/crawler/specificCrawlers/prostor.js index ca4271c..96aab61 100644 --- a/app/crawler/specificCrawlers/prostor.js +++ b/app/crawler/specificCrawlers/prostor.js @@ -16,7 +16,8 @@ const { const { PRINT_CRAWLER_DEBUG, - DEFAULT_TIMEZONE + DEFAULT_TIMEZONE, + PROSTOR_LOGIN } = require("../../config/appConfig"); const { PROSTOR_FORCE_CRAWL } = require("../specificConfigs/prostor"); @@ -60,10 +61,12 @@ class ProstorCrawler { async crawl() { const crawlAdCategories = this.crawlerAdCategories; - + //New tag to check if crawler loged in + const login = await this.loginForScraping(PROSTOR_LOGIN); const newRealEstates = []; - - if (crawlAdCategories) { + // + console.log("login before crawl:", login); + if (crawlAdCategories && login) { const indexGenerators = []; for (const adCategory of crawlAdCategories) { indexGenerators.push(this.categoryIndexer(adCategory)); @@ -549,8 +552,6 @@ class ProstorCrawler { } static getStatusId(statusText) { - // - console.log("statusText u funkciji", statusText); switch (statusText) { case "": return AD_STATUS.STATUS_NORMAL; @@ -583,6 +584,47 @@ class ProstorCrawler { return savers[0].save(results); //so that we can use some sequelize options and information when data is inserted } + async loginForScraping(PROSTOR_LOGIN) { + console.log("PROSTOR_LOGIN", PROSTOR_LOGIN); + let logedin = false; + fetch("https://prostor.ba/moj-prostor/prijava", { + method: "POST", + body: JSON.stringify({ + email: PROSTOR_LOGIN.EMAIL, + password: PROSTOR_LOGIN.PASSWORD + }) + }) + .then(page => { + /* console.log("page", page.text()); + + const $ = cheerio.load(page); + console.log("$ ", $); + if ( + $(".icons .d-none.d-xl-inline-block.mr-2") + .text() + .indexOf("Dobrodošli") != -1 + ) { + console.log("[PROSTOR]: Crawler loged in!"); + logedin = true; + } else { + console.log("[PROSTOR]: Crawler login failed - wrong credentials!"); + } */ + + return page.text(); + }) + .then(resp => { + // console.log(resp); + const $ = cheerio.load(resp); + console.log("$ ", $("h1").text()); + }) + + .catch(err => { + console.log("[PROSTOR]: Crawler login error ", err); + }); + // + console.log("login in function:", logedin); + return logedin; + } } module.exports = ProstorCrawler; diff --git a/development.env b/development.env index 89f0a1e..150f8be 100644 --- a/development.env +++ b/development.env @@ -51,6 +51,8 @@ PROSTOR_CRAWLER_AD_CATEGORIES=comma separated list of enum names of categories t PROSTOR_IGNORED_USERNAMES=!!! This is not used for prostor crawler !!! PROSTOR_DELAY_BETWEEN_PAGES=!!! This is not used for prostor crawler !!! PROSTOR_FORCE_CRAWL=Non-zero value will force crawler to crawl all pages without stopping when known real estate is found +PROSTOR_LOGIN_EMAIL=Email of valid Prostor.ba account for crawling purposes +PROSTOR_LOGIN_PASS=Password of valid Prostor.ba account for crawling purposes #==AKTIDO== AKTIDO_MAX_PAGES=Restrict crawler to this number of pages AKTIDO_MAX_RESULTS_PER_PAGE=Only this number or less results from one page will be scraped and saved From e70901d3692cc6f1a9c503a59d947f2cd6a8ee58 Mon Sep 17 00:00:00 2001 From: Naida Vatric Date: Mon, 13 Jan 2020 09:12:03 +0100 Subject: [PATCH 5/7] WIP Changed login to crawler. --- app/crawler/specificCrawlers/prostor.js | 46 +++++++++++-------------- package-lock.json | 30 +++++++++++++--- package.json | 1 + 3 files changed, 47 insertions(+), 30 deletions(-) diff --git a/app/crawler/specificCrawlers/prostor.js b/app/crawler/specificCrawlers/prostor.js index 96aab61..6e32af8 100644 --- a/app/crawler/specificCrawlers/prostor.js +++ b/app/crawler/specificCrawlers/prostor.js @@ -3,6 +3,7 @@ const fetch = require("node-fetch"); const cheerio = require("cheerio"); const moment = require("moment-timezone"); +const FormData = require("form-data"); const { AD_TYPE, @@ -586,44 +587,37 @@ class ProstorCrawler { } async loginForScraping(PROSTOR_LOGIN) { console.log("PROSTOR_LOGIN", PROSTOR_LOGIN); - let logedin = false; - fetch("https://prostor.ba/moj-prostor/prijava", { + var formData = new FormData(); + formData.append("email", PROSTOR_LOGIN.EMAIL); + formData.append("password", PROSTOR_LOGIN.PASSWORD); + //When once loged in it stays loged in with same credentials. + //Do we need to log out ?? + return fetch("https://prostor.ba/moj-prostor/prijava", { method: "POST", - body: JSON.stringify({ - email: PROSTOR_LOGIN.EMAIL, - password: PROSTOR_LOGIN.PASSWORD - }) + body: formData, + headers: { Cookie: "ci_session=3a47b6e18b3b9bc146bcde1f95126cbad0f58bf7" } }) .then(page => { - /* console.log("page", page.text()); - - const $ = cheerio.load(page); - console.log("$ ", $); - if ( - $(".icons .d-none.d-xl-inline-block.mr-2") - .text() - .indexOf("Dobrodošli") != -1 - ) { - console.log("[PROSTOR]: Crawler loged in!"); - logedin = true; - } else { - console.log("[PROSTOR]: Crawler login failed - wrong credentials!"); - } */ - return page.text(); }) .then(resp => { - // console.log(resp); const $ = cheerio.load(resp); console.log("$ ", $("h1").text()); + if ( + $("h1") + .text() + .indexOf("Dobrodošli") !== -1 + ) { + console.log("[PROSTOR]: Crawler loged in!"); + return true; + } else { + console.log("[PROSTOR]: Crawler login failed - wrong credentials!"); + return false; + } }) - .catch(err => { console.log("[PROSTOR]: Crawler login error ", err); }); - // - console.log("login in function:", logedin); - return logedin; } } diff --git a/package-lock.json b/package-lock.json index 9661459..4626180 100644 --- a/package-lock.json +++ b/package-lock.json @@ -1346,13 +1346,23 @@ "integrity": "sha1-+8cfDEGt6zf5bFd60e1C2P2sypE=" }, "form-data": { - "version": "2.3.3", - "resolved": "https://registry.npmjs.org/form-data/-/form-data-2.3.3.tgz", - "integrity": "sha512-1lLKB2Mu3aGP1Q/2eCOx0fNbRMe7XdwktwOruhfqqd0rIJWwN4Dh+E3hrPSlDCXnSR7UtZ1N38rVXm+6+MEhJQ==", + "version": "3.0.0", + "resolved": "https://registry.npmjs.org/form-data/-/form-data-3.0.0.tgz", + "integrity": "sha512-CKMFDglpbMi6PyN+brwB9Q/GOw0eAnsrEZDgcsH5Krhz5Od/haKHAX0NmQfha2zPPz0JpWzA7GJHGSnvCRLWsg==", "requires": { "asynckit": "^0.4.0", - "combined-stream": "^1.0.6", + "combined-stream": "^1.0.8", "mime-types": "^2.1.12" + }, + "dependencies": { + "combined-stream": { + "version": "1.0.8", + "resolved": "https://registry.npmjs.org/combined-stream/-/combined-stream-1.0.8.tgz", + "integrity": "sha512-FQN4MRfuJeHf7cBbBMJFXhKSDq+2kAArBlmRBvcvFE5BB1HZKXtSFASDhdlz9zOYwxh8lDdnvmMOe/+5cdoEdg==", + "requires": { + "delayed-stream": "~1.0.0" + } + } } }, "forwarded": { @@ -3430,6 +3440,18 @@ "tough-cookie": "~2.4.3", "tunnel-agent": "^0.6.0", "uuid": "^3.3.2" + }, + "dependencies": { + "form-data": { + "version": "2.3.3", + "resolved": "https://registry.npmjs.org/form-data/-/form-data-2.3.3.tgz", + "integrity": "sha512-1lLKB2Mu3aGP1Q/2eCOx0fNbRMe7XdwktwOruhfqqd0rIJWwN4Dh+E3hrPSlDCXnSR7UtZ1N38rVXm+6+MEhJQ==", + "requires": { + "asynckit": "^0.4.0", + "combined-stream": "^1.0.6", + "mime-types": "^2.1.12" + } + } } }, "require-directory": { diff --git a/package.json b/package.json index 75a7cc4..511f772 100644 --- a/package.json +++ b/package.json @@ -39,6 +39,7 @@ "express": "^4.16.4", "express-ejs-layouts": "^2.5.0", "express-layout": "^0.1.0", + "form-data": "^3.0.0", "html-to-text": "^5.1.1", "moment": "^2.24.0", "moment-timezone": "^0.5.26", From ba43fa0713c0a841479699de2ba038ca93c5e4fa Mon Sep 17 00:00:00 2001 From: Naida Vatric Date: Mon, 13 Jan 2020 11:02:26 +0100 Subject: [PATCH 6/7] WIP Changed cookies. --- app/crawler/specificCrawlers/prostor.js | 24 +++++++++++++++++++++--- 1 file changed, 21 insertions(+), 3 deletions(-) diff --git a/app/crawler/specificCrawlers/prostor.js b/app/crawler/specificCrawlers/prostor.js index 6e32af8..01e2402 100644 --- a/app/crawler/specificCrawlers/prostor.js +++ b/app/crawler/specificCrawlers/prostor.js @@ -587,17 +587,21 @@ class ProstorCrawler { } async loginForScraping(PROSTOR_LOGIN) { console.log("PROSTOR_LOGIN", PROSTOR_LOGIN); - var formData = new FormData(); + const prostorCookie = await this.getCookies(); + console.log("prostor cookie", prostorCookie); + let formData = new FormData(); formData.append("email", PROSTOR_LOGIN.EMAIL); formData.append("password", PROSTOR_LOGIN.PASSWORD); - //When once loged in it stays loged in with same credentials. + //When once loged in it stays loged in with same credentials. //Do we need to log out ?? return fetch("https://prostor.ba/moj-prostor/prijava", { method: "POST", body: formData, - headers: { Cookie: "ci_session=3a47b6e18b3b9bc146bcde1f95126cbad0f58bf7" } + headers: { Cookie: prostorCookie } }) .then(page => { + // + console.log("headers: ", page.headers.raw()["set-cookie"]); return page.text(); }) .then(resp => { @@ -619,6 +623,20 @@ class ProstorCrawler { console.log("[PROSTOR]: Crawler login error ", err); }); } + async getCookies() { + const getResponse = await fetch("https://prostor.ba/moj-prostor/prijava", { + headers: { Cookie: "" } + }); + const raw = getResponse.headers.raw()["set-cookie"]; + const cookie = raw + .map(datastring => { + const data = datastring.split(";"); + const cookieData = data[0]; + return cookieData; + }) + .join(";"); + return cookie; + } } module.exports = ProstorCrawler; From 511b2900961a470c80ba22d64bbab3ec57193ee9 Mon Sep 17 00:00:00 2001 From: Naida Vatric Date: Mon, 13 Jan 2020 12:05:33 +0100 Subject: [PATCH 7/7] Login to prostor.ba befoure crawl. --- app/crawler/specificCrawlers/prostor.js | 44 ++++++++++++------------- 1 file changed, 22 insertions(+), 22 deletions(-) diff --git a/app/crawler/specificCrawlers/prostor.js b/app/crawler/specificCrawlers/prostor.js index 01e2402..04be5f3 100644 --- a/app/crawler/specificCrawlers/prostor.js +++ b/app/crawler/specificCrawlers/prostor.js @@ -62,15 +62,16 @@ class ProstorCrawler { async crawl() { const crawlAdCategories = this.crawlerAdCategories; + //We need session cookie to use login privileges + const prostorCookie = await this.getCookies(); //New tag to check if crawler loged in - const login = await this.loginForScraping(PROSTOR_LOGIN); + const login = await this.loginForScraping(PROSTOR_LOGIN, prostorCookie); const newRealEstates = []; - // - console.log("login before crawl:", login); + //Crawl only if login was successful if (crawlAdCategories && login) { const indexGenerators = []; for (const adCategory of crawlAdCategories) { - indexGenerators.push(this.categoryIndexer(adCategory)); + indexGenerators.push(this.categoryIndexer(adCategory, prostorCookie)); } let done = false; @@ -123,13 +124,14 @@ class ProstorCrawler { return newRealEstates; } - async *categoryIndexer(adCategory) { + async *categoryIndexer(adCategory, prostorCookie) { const urlAdTypePart = PROSTOR_ENUMS.PROSTOR_AD_TYPE[this.crawlerAdTypes]; const urlCategoryPart = PROSTOR_ENUMS.PROSTOR_AD_CATEGORY[adCategory]; if (urlAdTypePart !== undefined && urlCategoryPart !== undefined) { const urlPageToCrawl = `${this.baseUrl}?remove_sold=0${urlAdTypePart}${urlCategoryPart}`; const listOfAllRealEstates = await this.extractRealEstates( - urlPageToCrawl + urlPageToCrawl, + prostorCookie ); let elementToStartIndexFrom = 0; @@ -143,7 +145,8 @@ class ProstorCrawler { elementToStartIndexFrom += realEstatesForSinglePage.length; const singlePageResults = await this.indexSinglePage( - realEstatesForSinglePage + realEstatesForSinglePage, + prostorCookie ); const filteredSinglePageResults = singlePageResults.filter( @@ -167,10 +170,10 @@ class ProstorCrawler { } } - async indexSinglePage(realEstatesList) { + async indexSinglePage(realEstatesList, prostorCookie) { const asyncActions = []; for (const realEstate of realEstatesList) { - asyncActions.push(this.scrapeAd(realEstate)); + asyncActions.push(this.scrapeAd(realEstate, prostorCookie)); } try { @@ -184,7 +187,7 @@ class ProstorCrawler { } } - async scrapeAd(realEstate) { + async scrapeAd(realEstate, prostorCookie) { const { lat, lng, property_name, price, size, link, status } = realEstate; //Status information is given already in realestate list @@ -200,7 +203,9 @@ class ProstorCrawler { // console.log("[PROSTOR] Scraping : ", url); try { - const adPageSource = await fetch(url); + const adPageSource = await fetch(url, { + headers: { Cookie: prostorCookie } + }); const body = await adPageSource.text(); const $ = cheerio.load(body); @@ -422,13 +427,15 @@ class ProstorCrawler { } } - async extractRealEstates(url) { + async extractRealEstates(url, prostorCookie) { if (PRINT_CRAWLER_DEBUG) { console.log("[PROSTOR] Index page : ", url); } try { - const res = await fetch(url); + const res = await fetch(url, { + headers: { Cookie: prostorCookie } + }); const body = await res.text(); const $ = cheerio.load(body); @@ -585,28 +592,21 @@ class ProstorCrawler { return savers[0].save(results); //so that we can use some sequelize options and information when data is inserted } - async loginForScraping(PROSTOR_LOGIN) { - console.log("PROSTOR_LOGIN", PROSTOR_LOGIN); - const prostorCookie = await this.getCookies(); - console.log("prostor cookie", prostorCookie); + async loginForScraping(PROSTOR_LOGIN, prostorCookie) { let formData = new FormData(); formData.append("email", PROSTOR_LOGIN.EMAIL); formData.append("password", PROSTOR_LOGIN.PASSWORD); - //When once loged in it stays loged in with same credentials. - //Do we need to log out ?? + return fetch("https://prostor.ba/moj-prostor/prijava", { method: "POST", body: formData, headers: { Cookie: prostorCookie } }) .then(page => { - // - console.log("headers: ", page.headers.raw()["set-cookie"]); return page.text(); }) .then(resp => { const $ = cheerio.load(resp); - console.log("$ ", $("h1").text()); if ( $("h1") .text()