Compare commits
2 Commits
welcome-po
...
crawler-lo
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
7567b27bb8 | ||
|
|
755973e42d |
@@ -39,11 +39,11 @@ async function crawlAll() {
|
||||
await saver.connect();
|
||||
await saver.save(results);
|
||||
} catch (e) {
|
||||
console.log("Error saving. Trying next saver! ", e);
|
||||
console.log("[E] Error saving. Trying next saver! ", e);
|
||||
}
|
||||
}
|
||||
} catch (e) {
|
||||
console.log("Error crawling. Trying next crawler! ", e);
|
||||
console.log("[E] Error crawling. Trying next crawler! ", e);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -12,10 +12,10 @@ export default class MongoSaver {
|
||||
if (!saver.ready) {
|
||||
MongoClient.connect(saver.url, (err, db) => {
|
||||
if (err) {
|
||||
console.log('Unable to connect to the mongoDB server. Error:', err);
|
||||
console.log('[E] Unable to connect to the mongoDB server. Error:', err);
|
||||
reject(err);
|
||||
} else {
|
||||
console.log('Connection established to', this.url);
|
||||
//console.log('[I] Connection established to', this.url);
|
||||
saver.db = db;
|
||||
saver.collection = db.collection('listings');
|
||||
saver.ready = true;
|
||||
@@ -43,7 +43,7 @@ export default class MongoSaver {
|
||||
//Close connection
|
||||
await this.disconnect();
|
||||
} catch (e) {
|
||||
console.log("error closing", e);
|
||||
console.log("[E] Error closing connection", e);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -113,7 +113,7 @@ export default class OlxCrawler {
|
||||
|
||||
return data;
|
||||
} catch (e) {
|
||||
console.error('Exception caught: ' + e.message);
|
||||
console.error('[E] Exception caught: ' + e.message);
|
||||
}
|
||||
|
||||
return null;
|
||||
@@ -121,7 +121,7 @@ export default class OlxCrawler {
|
||||
|
||||
async indexPage(pageNr, maxResults = 1000) {
|
||||
try {
|
||||
console.log('Starting to index page: ' + pageNr);
|
||||
//console.log('Starting to index page: ' + pageNr);
|
||||
const url = `http://www.olx.ba/pretraga?vrsta=samoprodaja&sort_order=desc&kategorija=23&sort_po=datum&kanton=9&stranica=${pageNr}`;
|
||||
|
||||
const res = await fetch(url);
|
||||
@@ -138,7 +138,7 @@ export default class OlxCrawler {
|
||||
let actualNoOfResults = (hrefs.length <= maxResults) ? hrefs.length : maxResults;
|
||||
|
||||
for (let i = 0; i < hrefs.length; i++) {
|
||||
console.log(`indexing: ${hrefs[i]}`);
|
||||
//console.log(`indexing: ${hrefs[i]}`);
|
||||
|
||||
const singleData = await this.indexSingle(hrefs[i]);
|
||||
|
||||
@@ -150,7 +150,7 @@ export default class OlxCrawler {
|
||||
|
||||
return results;
|
||||
} catch (e) {
|
||||
console.error('Exception caught:' + e);
|
||||
console.error('[E] Exception caught:' + e);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -166,11 +166,11 @@ export default class ProstorCrawler {
|
||||
//images: cloudinaryImages
|
||||
images,
|
||||
};
|
||||
console.log (data);
|
||||
//console.log (data);
|
||||
|
||||
return data;
|
||||
} catch (e) {
|
||||
console.error ('Exception caught: ' + e.message);
|
||||
console.error ('[E] Exception caught: ' + e.message);
|
||||
}
|
||||
|
||||
return null;
|
||||
@@ -178,7 +178,7 @@ export default class ProstorCrawler {
|
||||
|
||||
async indexPage (pageNr, maxResults = 1000) {
|
||||
try {
|
||||
console.log ('Starting to index page: ' + pageNr);
|
||||
//console.log ('Starting to index page: ' + pageNr);
|
||||
const url = `http://prostor.ba/index.php`;
|
||||
|
||||
const data = new FormData ();
|
||||
@@ -208,7 +208,7 @@ export default class ProstorCrawler {
|
||||
|
||||
const results = {};
|
||||
for (const href of hrefs) {
|
||||
console.log (`indexing: ${href}`);
|
||||
//console.log (`indexing: ${href}`);
|
||||
|
||||
const singleData = await this.indexSingle (href);
|
||||
|
||||
@@ -221,7 +221,7 @@ export default class ProstorCrawler {
|
||||
|
||||
return results;
|
||||
} catch (e) {
|
||||
console.error ('Exception caught:' + e);
|
||||
console.error ('[E] Exception caught:' + e);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -22,7 +22,6 @@ import {
|
||||
|
||||
export default class RentalCrawler {
|
||||
constructor (fromPage = 0, toPage = 10, maxResults = 1000) {
|
||||
console.log ('Rental Crawler');
|
||||
|
||||
this.fromPage = fromPage;
|
||||
this.toPage = toPage;
|
||||
@@ -70,7 +69,7 @@ export default class RentalCrawler {
|
||||
dataJson = JSON.parse (dataJsonString);
|
||||
break;
|
||||
} catch (e) {
|
||||
console.log ('No JSON string');
|
||||
//console.log ('No JSON string');
|
||||
if (i === lastN) throw e;
|
||||
}
|
||||
}
|
||||
@@ -95,7 +94,6 @@ export default class RentalCrawler {
|
||||
lng = dataJson['re_realEstates_longitude'];
|
||||
hasMap = true;
|
||||
} catch (e) {
|
||||
console.log ('error : ' + e);
|
||||
//This ad has no JSON string, informations should be retrieved using HTML selectors
|
||||
time = undefined;
|
||||
lat = 0;
|
||||
@@ -183,7 +181,7 @@ export default class RentalCrawler {
|
||||
|
||||
return data;
|
||||
} catch (e) {
|
||||
console.error ('Exception caught: ' + e.message);
|
||||
console.error ('[E] Exception caught: ' + e.message);
|
||||
}
|
||||
|
||||
return null;
|
||||
@@ -191,7 +189,7 @@ export default class RentalCrawler {
|
||||
|
||||
async indexPage (pageNr, maxResults = 1000) {
|
||||
try {
|
||||
console.log ('Starting to index page: ' + pageNr);
|
||||
//console.log ('Starting to index page: ' + pageNr);
|
||||
|
||||
const url = 'http://www.rental.ba/pretraga/prodaja-1/stranica-' + pageNr;
|
||||
|
||||
@@ -227,7 +225,7 @@ export default class RentalCrawler {
|
||||
|
||||
const results = {};
|
||||
for (const href of hrefs) {
|
||||
console.log (`indexing: ${href}`);
|
||||
//console.log (`indexing: ${href}`);
|
||||
|
||||
const singleData = await this.indexSingle (href);
|
||||
|
||||
@@ -240,7 +238,7 @@ export default class RentalCrawler {
|
||||
|
||||
return results;
|
||||
} catch (e) {
|
||||
console.error ('Exception caught:' + e);
|
||||
console.error ('[E] Exception caught:' + e);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user