Compare commits
2 Commits
welcome-po
...
crawler-lo
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
7567b27bb8 | ||
|
|
755973e42d |
@@ -39,11 +39,11 @@ async function crawlAll() {
|
|||||||
await saver.connect();
|
await saver.connect();
|
||||||
await saver.save(results);
|
await saver.save(results);
|
||||||
} catch (e) {
|
} catch (e) {
|
||||||
console.log("Error saving. Trying next saver! ", e);
|
console.log("[E] Error saving. Trying next saver! ", e);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} catch (e) {
|
} catch (e) {
|
||||||
console.log("Error crawling. Trying next crawler! ", e);
|
console.log("[E] Error crawling. Trying next crawler! ", e);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -12,10 +12,10 @@ export default class MongoSaver {
|
|||||||
if (!saver.ready) {
|
if (!saver.ready) {
|
||||||
MongoClient.connect(saver.url, (err, db) => {
|
MongoClient.connect(saver.url, (err, db) => {
|
||||||
if (err) {
|
if (err) {
|
||||||
console.log('Unable to connect to the mongoDB server. Error:', err);
|
console.log('[E] Unable to connect to the mongoDB server. Error:', err);
|
||||||
reject(err);
|
reject(err);
|
||||||
} else {
|
} else {
|
||||||
console.log('Connection established to', this.url);
|
//console.log('[I] Connection established to', this.url);
|
||||||
saver.db = db;
|
saver.db = db;
|
||||||
saver.collection = db.collection('listings');
|
saver.collection = db.collection('listings');
|
||||||
saver.ready = true;
|
saver.ready = true;
|
||||||
@@ -43,7 +43,7 @@ export default class MongoSaver {
|
|||||||
//Close connection
|
//Close connection
|
||||||
await this.disconnect();
|
await this.disconnect();
|
||||||
} catch (e) {
|
} catch (e) {
|
||||||
console.log("error closing", e);
|
console.log("[E] Error closing connection", e);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -113,7 +113,7 @@ export default class OlxCrawler {
|
|||||||
|
|
||||||
return data;
|
return data;
|
||||||
} catch (e) {
|
} catch (e) {
|
||||||
console.error('Exception caught: ' + e.message);
|
console.error('[E] Exception caught: ' + e.message);
|
||||||
}
|
}
|
||||||
|
|
||||||
return null;
|
return null;
|
||||||
@@ -121,7 +121,7 @@ export default class OlxCrawler {
|
|||||||
|
|
||||||
async indexPage(pageNr, maxResults = 1000) {
|
async indexPage(pageNr, maxResults = 1000) {
|
||||||
try {
|
try {
|
||||||
console.log('Starting to index page: ' + pageNr);
|
//console.log('Starting to index page: ' + pageNr);
|
||||||
const url = `http://www.olx.ba/pretraga?vrsta=samoprodaja&sort_order=desc&kategorija=23&sort_po=datum&kanton=9&stranica=${pageNr}`;
|
const url = `http://www.olx.ba/pretraga?vrsta=samoprodaja&sort_order=desc&kategorija=23&sort_po=datum&kanton=9&stranica=${pageNr}`;
|
||||||
|
|
||||||
const res = await fetch(url);
|
const res = await fetch(url);
|
||||||
@@ -138,7 +138,7 @@ export default class OlxCrawler {
|
|||||||
let actualNoOfResults = (hrefs.length <= maxResults) ? hrefs.length : maxResults;
|
let actualNoOfResults = (hrefs.length <= maxResults) ? hrefs.length : maxResults;
|
||||||
|
|
||||||
for (let i = 0; i < hrefs.length; i++) {
|
for (let i = 0; i < hrefs.length; i++) {
|
||||||
console.log(`indexing: ${hrefs[i]}`);
|
//console.log(`indexing: ${hrefs[i]}`);
|
||||||
|
|
||||||
const singleData = await this.indexSingle(hrefs[i]);
|
const singleData = await this.indexSingle(hrefs[i]);
|
||||||
|
|
||||||
@@ -150,7 +150,7 @@ export default class OlxCrawler {
|
|||||||
|
|
||||||
return results;
|
return results;
|
||||||
} catch (e) {
|
} catch (e) {
|
||||||
console.error('Exception caught:' + e);
|
console.error('[E] Exception caught:' + e);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -166,11 +166,11 @@ export default class ProstorCrawler {
|
|||||||
//images: cloudinaryImages
|
//images: cloudinaryImages
|
||||||
images,
|
images,
|
||||||
};
|
};
|
||||||
console.log (data);
|
//console.log (data);
|
||||||
|
|
||||||
return data;
|
return data;
|
||||||
} catch (e) {
|
} catch (e) {
|
||||||
console.error ('Exception caught: ' + e.message);
|
console.error ('[E] Exception caught: ' + e.message);
|
||||||
}
|
}
|
||||||
|
|
||||||
return null;
|
return null;
|
||||||
@@ -178,7 +178,7 @@ export default class ProstorCrawler {
|
|||||||
|
|
||||||
async indexPage (pageNr, maxResults = 1000) {
|
async indexPage (pageNr, maxResults = 1000) {
|
||||||
try {
|
try {
|
||||||
console.log ('Starting to index page: ' + pageNr);
|
//console.log ('Starting to index page: ' + pageNr);
|
||||||
const url = `http://prostor.ba/index.php`;
|
const url = `http://prostor.ba/index.php`;
|
||||||
|
|
||||||
const data = new FormData ();
|
const data = new FormData ();
|
||||||
@@ -208,7 +208,7 @@ export default class ProstorCrawler {
|
|||||||
|
|
||||||
const results = {};
|
const results = {};
|
||||||
for (const href of hrefs) {
|
for (const href of hrefs) {
|
||||||
console.log (`indexing: ${href}`);
|
//console.log (`indexing: ${href}`);
|
||||||
|
|
||||||
const singleData = await this.indexSingle (href);
|
const singleData = await this.indexSingle (href);
|
||||||
|
|
||||||
@@ -221,7 +221,7 @@ export default class ProstorCrawler {
|
|||||||
|
|
||||||
return results;
|
return results;
|
||||||
} catch (e) {
|
} catch (e) {
|
||||||
console.error ('Exception caught:' + e);
|
console.error ('[E] Exception caught:' + e);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -22,7 +22,6 @@ import {
|
|||||||
|
|
||||||
export default class RentalCrawler {
|
export default class RentalCrawler {
|
||||||
constructor (fromPage = 0, toPage = 10, maxResults = 1000) {
|
constructor (fromPage = 0, toPage = 10, maxResults = 1000) {
|
||||||
console.log ('Rental Crawler');
|
|
||||||
|
|
||||||
this.fromPage = fromPage;
|
this.fromPage = fromPage;
|
||||||
this.toPage = toPage;
|
this.toPage = toPage;
|
||||||
@@ -70,7 +69,7 @@ export default class RentalCrawler {
|
|||||||
dataJson = JSON.parse (dataJsonString);
|
dataJson = JSON.parse (dataJsonString);
|
||||||
break;
|
break;
|
||||||
} catch (e) {
|
} catch (e) {
|
||||||
console.log ('No JSON string');
|
//console.log ('No JSON string');
|
||||||
if (i === lastN) throw e;
|
if (i === lastN) throw e;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -95,7 +94,6 @@ export default class RentalCrawler {
|
|||||||
lng = dataJson['re_realEstates_longitude'];
|
lng = dataJson['re_realEstates_longitude'];
|
||||||
hasMap = true;
|
hasMap = true;
|
||||||
} catch (e) {
|
} catch (e) {
|
||||||
console.log ('error : ' + e);
|
|
||||||
//This ad has no JSON string, informations should be retrieved using HTML selectors
|
//This ad has no JSON string, informations should be retrieved using HTML selectors
|
||||||
time = undefined;
|
time = undefined;
|
||||||
lat = 0;
|
lat = 0;
|
||||||
@@ -183,7 +181,7 @@ export default class RentalCrawler {
|
|||||||
|
|
||||||
return data;
|
return data;
|
||||||
} catch (e) {
|
} catch (e) {
|
||||||
console.error ('Exception caught: ' + e.message);
|
console.error ('[E] Exception caught: ' + e.message);
|
||||||
}
|
}
|
||||||
|
|
||||||
return null;
|
return null;
|
||||||
@@ -191,7 +189,7 @@ export default class RentalCrawler {
|
|||||||
|
|
||||||
async indexPage (pageNr, maxResults = 1000) {
|
async indexPage (pageNr, maxResults = 1000) {
|
||||||
try {
|
try {
|
||||||
console.log ('Starting to index page: ' + pageNr);
|
//console.log ('Starting to index page: ' + pageNr);
|
||||||
|
|
||||||
const url = 'http://www.rental.ba/pretraga/prodaja-1/stranica-' + pageNr;
|
const url = 'http://www.rental.ba/pretraga/prodaja-1/stranica-' + pageNr;
|
||||||
|
|
||||||
@@ -227,7 +225,7 @@ export default class RentalCrawler {
|
|||||||
|
|
||||||
const results = {};
|
const results = {};
|
||||||
for (const href of hrefs) {
|
for (const href of hrefs) {
|
||||||
console.log (`indexing: ${href}`);
|
//console.log (`indexing: ${href}`);
|
||||||
|
|
||||||
const singleData = await this.indexSingle (href);
|
const singleData = await this.indexSingle (href);
|
||||||
|
|
||||||
@@ -240,7 +238,7 @@ export default class RentalCrawler {
|
|||||||
|
|
||||||
return results;
|
return results;
|
||||||
} catch (e) {
|
} catch (e) {
|
||||||
console.error ('Exception caught:' + e);
|
console.error ('[E] Exception caught:' + e);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user