WiP Login of crawler prostor.
This commit is contained in:
@@ -32,6 +32,11 @@ const PRINT_CRAWLER_DEBUG = process.env.PRINT_CRAWLER_DEBUG_INFO || 0;
|
|||||||
|
|
||||||
const API_MAP_KEY = process.env.API_MAP_KEY || "";
|
const API_MAP_KEY = process.env.API_MAP_KEY || "";
|
||||||
|
|
||||||
|
const PROSTOR_LOGIN = {
|
||||||
|
EMAIL: process.env.PROSTOR_LOGIN_EMAIL,
|
||||||
|
PASSWORD: process.env.PROSTOR_LOGIN_PASS
|
||||||
|
};
|
||||||
|
|
||||||
module.exports = {
|
module.exports = {
|
||||||
APP_PORT,
|
APP_PORT,
|
||||||
APP_URL,
|
APP_URL,
|
||||||
@@ -42,5 +47,6 @@ module.exports = {
|
|||||||
MAX_REAL_ESTATES_IN_EMAIL,
|
MAX_REAL_ESTATES_IN_EMAIL,
|
||||||
MAX_REAL_ESTATES_IN_FIRST_EMAIL,
|
MAX_REAL_ESTATES_IN_FIRST_EMAIL,
|
||||||
PRINT_CRAWLER_DEBUG,
|
PRINT_CRAWLER_DEBUG,
|
||||||
API_MAP_KEY
|
API_MAP_KEY,
|
||||||
|
PROSTOR_LOGIN
|
||||||
};
|
};
|
||||||
|
|||||||
@@ -16,7 +16,8 @@ const {
|
|||||||
|
|
||||||
const {
|
const {
|
||||||
PRINT_CRAWLER_DEBUG,
|
PRINT_CRAWLER_DEBUG,
|
||||||
DEFAULT_TIMEZONE
|
DEFAULT_TIMEZONE,
|
||||||
|
PROSTOR_LOGIN
|
||||||
} = require("../../config/appConfig");
|
} = require("../../config/appConfig");
|
||||||
const { PROSTOR_FORCE_CRAWL } = require("../specificConfigs/prostor");
|
const { PROSTOR_FORCE_CRAWL } = require("../specificConfigs/prostor");
|
||||||
|
|
||||||
@@ -60,10 +61,12 @@ class ProstorCrawler {
|
|||||||
|
|
||||||
async crawl() {
|
async crawl() {
|
||||||
const crawlAdCategories = this.crawlerAdCategories;
|
const crawlAdCategories = this.crawlerAdCategories;
|
||||||
|
//New tag to check if crawler loged in
|
||||||
|
const login = await this.loginForScraping(PROSTOR_LOGIN);
|
||||||
const newRealEstates = [];
|
const newRealEstates = [];
|
||||||
|
//
|
||||||
if (crawlAdCategories) {
|
console.log("login before crawl:", login);
|
||||||
|
if (crawlAdCategories && login) {
|
||||||
const indexGenerators = [];
|
const indexGenerators = [];
|
||||||
for (const adCategory of crawlAdCategories) {
|
for (const adCategory of crawlAdCategories) {
|
||||||
indexGenerators.push(this.categoryIndexer(adCategory));
|
indexGenerators.push(this.categoryIndexer(adCategory));
|
||||||
@@ -549,8 +552,6 @@ class ProstorCrawler {
|
|||||||
}
|
}
|
||||||
|
|
||||||
static getStatusId(statusText) {
|
static getStatusId(statusText) {
|
||||||
//
|
|
||||||
console.log("statusText u funkciji", statusText);
|
|
||||||
switch (statusText) {
|
switch (statusText) {
|
||||||
case "":
|
case "":
|
||||||
return AD_STATUS.STATUS_NORMAL;
|
return AD_STATUS.STATUS_NORMAL;
|
||||||
@@ -583,6 +584,47 @@ class ProstorCrawler {
|
|||||||
return savers[0].save(results);
|
return savers[0].save(results);
|
||||||
//so that we can use some sequelize options and information when data is inserted
|
//so that we can use some sequelize options and information when data is inserted
|
||||||
}
|
}
|
||||||
|
async loginForScraping(PROSTOR_LOGIN) {
|
||||||
|
console.log("PROSTOR_LOGIN", PROSTOR_LOGIN);
|
||||||
|
let logedin = false;
|
||||||
|
fetch("https://prostor.ba/moj-prostor/prijava", {
|
||||||
|
method: "POST",
|
||||||
|
body: JSON.stringify({
|
||||||
|
email: PROSTOR_LOGIN.EMAIL,
|
||||||
|
password: PROSTOR_LOGIN.PASSWORD
|
||||||
|
})
|
||||||
|
})
|
||||||
|
.then(page => {
|
||||||
|
/* console.log("page", page.text());
|
||||||
|
|
||||||
|
const $ = cheerio.load(page);
|
||||||
|
console.log("$ ", $);
|
||||||
|
if (
|
||||||
|
$(".icons .d-none.d-xl-inline-block.mr-2")
|
||||||
|
.text()
|
||||||
|
.indexOf("Dobrodošli") != -1
|
||||||
|
) {
|
||||||
|
console.log("[PROSTOR]: Crawler loged in!");
|
||||||
|
logedin = true;
|
||||||
|
} else {
|
||||||
|
console.log("[PROSTOR]: Crawler login failed - wrong credentials!");
|
||||||
|
} */
|
||||||
|
|
||||||
|
return page.text();
|
||||||
|
})
|
||||||
|
.then(resp => {
|
||||||
|
// console.log(resp);
|
||||||
|
const $ = cheerio.load(resp);
|
||||||
|
console.log("$ ", $("h1").text());
|
||||||
|
})
|
||||||
|
|
||||||
|
.catch(err => {
|
||||||
|
console.log("[PROSTOR]: Crawler login error ", err);
|
||||||
|
});
|
||||||
|
//
|
||||||
|
console.log("login in function:", logedin);
|
||||||
|
return logedin;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
module.exports = ProstorCrawler;
|
module.exports = ProstorCrawler;
|
||||||
|
|||||||
@@ -51,6 +51,8 @@ PROSTOR_CRAWLER_AD_CATEGORIES=comma separated list of enum names of categories t
|
|||||||
PROSTOR_IGNORED_USERNAMES=!!! This is not used for prostor crawler !!!
|
PROSTOR_IGNORED_USERNAMES=!!! This is not used for prostor crawler !!!
|
||||||
PROSTOR_DELAY_BETWEEN_PAGES=!!! This is not used for prostor crawler !!!
|
PROSTOR_DELAY_BETWEEN_PAGES=!!! This is not used for prostor crawler !!!
|
||||||
PROSTOR_FORCE_CRAWL=Non-zero value will force crawler to crawl all pages without stopping when known real estate is found
|
PROSTOR_FORCE_CRAWL=Non-zero value will force crawler to crawl all pages without stopping when known real estate is found
|
||||||
|
PROSTOR_LOGIN_EMAIL=Email of valid Prostor.ba account for crawling purposes
|
||||||
|
PROSTOR_LOGIN_PASS=Password of valid Prostor.ba account for crawling purposes
|
||||||
#==AKTIDO==
|
#==AKTIDO==
|
||||||
AKTIDO_MAX_PAGES=Restrict crawler to this number of pages
|
AKTIDO_MAX_PAGES=Restrict crawler to this number of pages
|
||||||
AKTIDO_MAX_RESULTS_PER_PAGE=Only this number or less results from one page will be scraped and saved
|
AKTIDO_MAX_RESULTS_PER_PAGE=Only this number or less results from one page will be scraped and saved
|
||||||
|
|||||||
Reference in New Issue
Block a user