Fix Prostor crawler - use new JSON location in page body
This commit is contained in:
@@ -63,13 +63,19 @@ class ProstorCrawler {
|
|||||||
|
|
||||||
async crawl() {
|
async crawl() {
|
||||||
const crawlAdCategories = this.crawlerAdCategories;
|
const crawlAdCategories = this.crawlerAdCategories;
|
||||||
|
const crawlAdTypes = this.crawlerAdTypes;
|
||||||
|
if (!crawlAdCategories || !crawlAdTypes) {
|
||||||
|
return []
|
||||||
|
}
|
||||||
|
|
||||||
|
const newRealEstates = [];
|
||||||
//We need session cookie to use login privileges
|
//We need session cookie to use login privileges
|
||||||
const prostorCookie = await this.getCookies();
|
const prostorCookie = await this.getCookies();
|
||||||
//New tag to check if crawler loged in
|
//New tag to check if crawler logged in
|
||||||
const login = await this.loginForScraping(PROSTOR_LOGIN, prostorCookie);
|
const login = await this.loginForScraping(PROSTOR_LOGIN, prostorCookie);
|
||||||
const newRealEstates = [];
|
|
||||||
//Crawl only if login was successful
|
//Crawl only if login was successful
|
||||||
if (crawlAdCategories && login) {
|
if (login) {
|
||||||
const indexGenerators = [];
|
const indexGenerators = [];
|
||||||
for (const adCategory of crawlAdCategories) {
|
for (const adCategory of crawlAdCategories) {
|
||||||
indexGenerators.push(this.categoryIndexer(adCategory, prostorCookie));
|
indexGenerators.push(this.categoryIndexer(adCategory, prostorCookie));
|
||||||
@@ -135,6 +141,11 @@ class ProstorCrawler {
|
|||||||
prostorCookie
|
prostorCookie
|
||||||
);
|
);
|
||||||
|
|
||||||
|
if (!Array.isArray(listOfAllRealEstates)){
|
||||||
|
console.log('[PROSTOR] Could not find real estate JSON data, check selector !');
|
||||||
|
return undefined;
|
||||||
|
}
|
||||||
|
|
||||||
let elementToStartIndexFrom = 0;
|
let elementToStartIndexFrom = 0;
|
||||||
while (true) {
|
while (true) {
|
||||||
const realEstatesForSinglePage = listOfAllRealEstates.slice(
|
const realEstatesForSinglePage = listOfAllRealEstates.slice(
|
||||||
@@ -435,7 +446,7 @@ class ProstorCrawler {
|
|||||||
const $ = cheerio.load(body);
|
const $ = cheerio.load(body);
|
||||||
|
|
||||||
const scriptElement = $(
|
const scriptElement = $(
|
||||||
"body > div > div.container-fluid > script:nth-child(7)"
|
"body > div.content > div.container-fluid > script:nth-child(6)"
|
||||||
);
|
);
|
||||||
|
|
||||||
if (
|
if (
|
||||||
|
|||||||
Reference in New Issue
Block a user