WiP Login of crawler prostor.

This commit is contained in:
Naida Vatric
2020-01-12 01:22:50 +01:00
parent 64e4835899
commit 8505282670
3 changed files with 57 additions and 7 deletions

View File

@@ -32,6 +32,11 @@ const PRINT_CRAWLER_DEBUG = process.env.PRINT_CRAWLER_DEBUG_INFO || 0;
const API_MAP_KEY = process.env.API_MAP_KEY || "";
const PROSTOR_LOGIN = {
EMAIL: process.env.PROSTOR_LOGIN_EMAIL,
PASSWORD: process.env.PROSTOR_LOGIN_PASS
};
module.exports = {
APP_PORT,
APP_URL,
@@ -42,5 +47,6 @@ module.exports = {
MAX_REAL_ESTATES_IN_EMAIL,
MAX_REAL_ESTATES_IN_FIRST_EMAIL,
PRINT_CRAWLER_DEBUG,
API_MAP_KEY
API_MAP_KEY,
PROSTOR_LOGIN
};

View File

@@ -16,7 +16,8 @@ const {
const {
PRINT_CRAWLER_DEBUG,
DEFAULT_TIMEZONE
DEFAULT_TIMEZONE,
PROSTOR_LOGIN
} = require("../../config/appConfig");
const { PROSTOR_FORCE_CRAWL } = require("../specificConfigs/prostor");
@@ -60,10 +61,12 @@ class ProstorCrawler {
async crawl() {
const crawlAdCategories = this.crawlerAdCategories;
//New tag to check if crawler loged in
const login = await this.loginForScraping(PROSTOR_LOGIN);
const newRealEstates = [];
if (crawlAdCategories) {
//
console.log("login before crawl:", login);
if (crawlAdCategories && login) {
const indexGenerators = [];
for (const adCategory of crawlAdCategories) {
indexGenerators.push(this.categoryIndexer(adCategory));
@@ -549,8 +552,6 @@ class ProstorCrawler {
}
static getStatusId(statusText) {
//
console.log("statusText u funkciji", statusText);
switch (statusText) {
case "":
return AD_STATUS.STATUS_NORMAL;
@@ -583,6 +584,47 @@ class ProstorCrawler {
return savers[0].save(results);
//so that we can use some sequelize options and information when data is inserted
}
async loginForScraping(PROSTOR_LOGIN) {
console.log("PROSTOR_LOGIN", PROSTOR_LOGIN);
let logedin = false;
fetch("https://prostor.ba/moj-prostor/prijava", {
method: "POST",
body: JSON.stringify({
email: PROSTOR_LOGIN.EMAIL,
password: PROSTOR_LOGIN.PASSWORD
})
})
.then(page => {
/* console.log("page", page.text());
const $ = cheerio.load(page);
console.log("$ ", $);
if (
$(".icons .d-none.d-xl-inline-block.mr-2")
.text()
.indexOf("Dobrodošli") != -1
) {
console.log("[PROSTOR]: Crawler loged in!");
logedin = true;
} else {
console.log("[PROSTOR]: Crawler login failed - wrong credentials!");
} */
return page.text();
})
.then(resp => {
// console.log(resp);
const $ = cheerio.load(resp);
console.log("$ ", $("h1").text());
})
.catch(err => {
console.log("[PROSTOR]: Crawler login error ", err);
});
//
console.log("login in function:", logedin);
return logedin;
}
}
module.exports = ProstorCrawler;

View File

@@ -51,6 +51,8 @@ PROSTOR_CRAWLER_AD_CATEGORIES=comma separated list of enum names of categories t
PROSTOR_IGNORED_USERNAMES=!!! This is not used for prostor crawler !!!
PROSTOR_DELAY_BETWEEN_PAGES=!!! This is not used for prostor crawler !!!
PROSTOR_FORCE_CRAWL=Non-zero value will force crawler to crawl all pages without stopping when known real estate is found
PROSTOR_LOGIN_EMAIL=Email of valid Prostor.ba account for crawling purposes
PROSTOR_LOGIN_PASS=Password of valid Prostor.ba account for crawling purposes
#==AKTIDO==
AKTIDO_MAX_PAGES=Restrict crawler to this number of pages
AKTIDO_MAX_RESULTS_PER_PAGE=Only this number or less results from one page will be scraped and saved