WiP Login of crawler prostor.
This commit is contained in:
@@ -32,6 +32,11 @@ const PRINT_CRAWLER_DEBUG = process.env.PRINT_CRAWLER_DEBUG_INFO || 0;
|
||||
|
||||
const API_MAP_KEY = process.env.API_MAP_KEY || "";
|
||||
|
||||
const PROSTOR_LOGIN = {
|
||||
EMAIL: process.env.PROSTOR_LOGIN_EMAIL,
|
||||
PASSWORD: process.env.PROSTOR_LOGIN_PASS
|
||||
};
|
||||
|
||||
module.exports = {
|
||||
APP_PORT,
|
||||
APP_URL,
|
||||
@@ -42,5 +47,6 @@ module.exports = {
|
||||
MAX_REAL_ESTATES_IN_EMAIL,
|
||||
MAX_REAL_ESTATES_IN_FIRST_EMAIL,
|
||||
PRINT_CRAWLER_DEBUG,
|
||||
API_MAP_KEY
|
||||
API_MAP_KEY,
|
||||
PROSTOR_LOGIN
|
||||
};
|
||||
|
||||
@@ -16,7 +16,8 @@ const {
|
||||
|
||||
const {
|
||||
PRINT_CRAWLER_DEBUG,
|
||||
DEFAULT_TIMEZONE
|
||||
DEFAULT_TIMEZONE,
|
||||
PROSTOR_LOGIN
|
||||
} = require("../../config/appConfig");
|
||||
const { PROSTOR_FORCE_CRAWL } = require("../specificConfigs/prostor");
|
||||
|
||||
@@ -60,10 +61,12 @@ class ProstorCrawler {
|
||||
|
||||
async crawl() {
|
||||
const crawlAdCategories = this.crawlerAdCategories;
|
||||
|
||||
//New tag to check if crawler loged in
|
||||
const login = await this.loginForScraping(PROSTOR_LOGIN);
|
||||
const newRealEstates = [];
|
||||
|
||||
if (crawlAdCategories) {
|
||||
//
|
||||
console.log("login before crawl:", login);
|
||||
if (crawlAdCategories && login) {
|
||||
const indexGenerators = [];
|
||||
for (const adCategory of crawlAdCategories) {
|
||||
indexGenerators.push(this.categoryIndexer(adCategory));
|
||||
@@ -549,8 +552,6 @@ class ProstorCrawler {
|
||||
}
|
||||
|
||||
static getStatusId(statusText) {
|
||||
//
|
||||
console.log("statusText u funkciji", statusText);
|
||||
switch (statusText) {
|
||||
case "":
|
||||
return AD_STATUS.STATUS_NORMAL;
|
||||
@@ -583,6 +584,47 @@ class ProstorCrawler {
|
||||
return savers[0].save(results);
|
||||
//so that we can use some sequelize options and information when data is inserted
|
||||
}
|
||||
async loginForScraping(PROSTOR_LOGIN) {
|
||||
console.log("PROSTOR_LOGIN", PROSTOR_LOGIN);
|
||||
let logedin = false;
|
||||
fetch("https://prostor.ba/moj-prostor/prijava", {
|
||||
method: "POST",
|
||||
body: JSON.stringify({
|
||||
email: PROSTOR_LOGIN.EMAIL,
|
||||
password: PROSTOR_LOGIN.PASSWORD
|
||||
})
|
||||
})
|
||||
.then(page => {
|
||||
/* console.log("page", page.text());
|
||||
|
||||
const $ = cheerio.load(page);
|
||||
console.log("$ ", $);
|
||||
if (
|
||||
$(".icons .d-none.d-xl-inline-block.mr-2")
|
||||
.text()
|
||||
.indexOf("Dobrodošli") != -1
|
||||
) {
|
||||
console.log("[PROSTOR]: Crawler loged in!");
|
||||
logedin = true;
|
||||
} else {
|
||||
console.log("[PROSTOR]: Crawler login failed - wrong credentials!");
|
||||
} */
|
||||
|
||||
return page.text();
|
||||
})
|
||||
.then(resp => {
|
||||
// console.log(resp);
|
||||
const $ = cheerio.load(resp);
|
||||
console.log("$ ", $("h1").text());
|
||||
})
|
||||
|
||||
.catch(err => {
|
||||
console.log("[PROSTOR]: Crawler login error ", err);
|
||||
});
|
||||
//
|
||||
console.log("login in function:", logedin);
|
||||
return logedin;
|
||||
}
|
||||
}
|
||||
|
||||
module.exports = ProstorCrawler;
|
||||
|
||||
@@ -51,6 +51,8 @@ PROSTOR_CRAWLER_AD_CATEGORIES=comma separated list of enum names of categories t
|
||||
PROSTOR_IGNORED_USERNAMES=!!! This is not used for prostor crawler !!!
|
||||
PROSTOR_DELAY_BETWEEN_PAGES=!!! This is not used for prostor crawler !!!
|
||||
PROSTOR_FORCE_CRAWL=Non-zero value will force crawler to crawl all pages without stopping when known real estate is found
|
||||
PROSTOR_LOGIN_EMAIL=Email of valid Prostor.ba account for crawling purposes
|
||||
PROSTOR_LOGIN_PASS=Password of valid Prostor.ba account for crawling purposes
|
||||
#==AKTIDO==
|
||||
AKTIDO_MAX_PAGES=Restrict crawler to this number of pages
|
||||
AKTIDO_MAX_RESULTS_PER_PAGE=Only this number or less results from one page will be scraped and saved
|
||||
|
||||
Reference in New Issue
Block a user