Fix Prostor crawler - use new JSON location in page body

This commit is contained in:
Bilal
2020-05-14 15:38:15 +02:00
parent f1d45fed26
commit ba60f8749d

View File

@@ -63,13 +63,19 @@ class ProstorCrawler {
async crawl() {
const crawlAdCategories = this.crawlerAdCategories;
const crawlAdTypes = this.crawlerAdTypes;
if (!crawlAdCategories || !crawlAdTypes) {
return []
}
const newRealEstates = [];
//We need session cookie to use login privileges
const prostorCookie = await this.getCookies();
//New tag to check if crawler loged in
//New tag to check if crawler logged in
const login = await this.loginForScraping(PROSTOR_LOGIN, prostorCookie);
const newRealEstates = [];
//Crawl only if login was successful
if (crawlAdCategories && login) {
if (login) {
const indexGenerators = [];
for (const adCategory of crawlAdCategories) {
indexGenerators.push(this.categoryIndexer(adCategory, prostorCookie));
@@ -135,6 +141,11 @@ class ProstorCrawler {
prostorCookie
);
if (!Array.isArray(listOfAllRealEstates)){
console.log('[PROSTOR] Could not find real estate JSON data, check selector !');
return undefined;
}
let elementToStartIndexFrom = 0;
while (true) {
const realEstatesForSinglePage = listOfAllRealEstates.slice(
@@ -435,7 +446,7 @@ class ProstorCrawler {
const $ = cheerio.load(body);
const scriptElement = $(
"body > div > div.container-fluid > script:nth-child(7)"
"body > div.content > div.container-fluid > script:nth-child(6)"
);
if (