From ba60f8749dcabc45a0a5dc8782fd11063de8205c Mon Sep 17 00:00:00 2001 From: Bilal Date: Thu, 14 May 2020 15:38:15 +0200 Subject: [PATCH] Fix Prostor crawler - use new JSON location in page body --- app/crawler/specificCrawlers/prostor.js | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/app/crawler/specificCrawlers/prostor.js b/app/crawler/specificCrawlers/prostor.js index bf2c30a..251c514 100644 --- a/app/crawler/specificCrawlers/prostor.js +++ b/app/crawler/specificCrawlers/prostor.js @@ -63,13 +63,19 @@ class ProstorCrawler { async crawl() { const crawlAdCategories = this.crawlerAdCategories; + const crawlAdTypes = this.crawlerAdTypes; + if (!crawlAdCategories || !crawlAdTypes) { + return [] + } + + const newRealEstates = []; //We need session cookie to use login privileges const prostorCookie = await this.getCookies(); - //New tag to check if crawler loged in + //New tag to check if crawler logged in const login = await this.loginForScraping(PROSTOR_LOGIN, prostorCookie); - const newRealEstates = []; + //Crawl only if login was successful - if (crawlAdCategories && login) { + if (login) { const indexGenerators = []; for (const adCategory of crawlAdCategories) { indexGenerators.push(this.categoryIndexer(adCategory, prostorCookie)); @@ -135,6 +141,11 @@ class ProstorCrawler { prostorCookie ); + if (!Array.isArray(listOfAllRealEstates)){ + console.log('[PROSTOR] Could not find real estate JSON data, check selector !'); + return undefined; + } + let elementToStartIndexFrom = 0; while (true) { const realEstatesForSinglePage = listOfAllRealEstates.slice( @@ -435,7 +446,7 @@ class ProstorCrawler { const $ = cheerio.load(body); const scriptElement = $( - "body > div > div.container-fluid > script:nth-child(7)" + "body > div.content > div.container-fluid > script:nth-child(6)" ); if (