crawling and saving to mongo
This commit is contained in:
@@ -1,4 +1,4 @@
|
|||||||
MONGO_URL='mongodb://localhost:27017/kivi'
|
MONGO_URL='mongodb://localhost:27017/kivi'
|
||||||
OLX_FROM_PAGE=8
|
OLX_FROM_PAGE=8
|
||||||
OLX_TO_PAGE=8
|
OLX_TO_PAGE=8
|
||||||
OLX_MAX_RESULTS=1000
|
OLX_MAX_RESULTS=3
|
||||||
|
|||||||
@@ -24,25 +24,29 @@ let savers = [
|
|||||||
new MongoSaver(process.env.MONGO_URL)
|
new MongoSaver(process.env.MONGO_URL)
|
||||||
];
|
];
|
||||||
|
|
||||||
let done = 0;
|
async function crawlAll() {
|
||||||
for (let crawler of crawlers) {
|
let crawlersAndSaversDone = 0;
|
||||||
try {
|
|
||||||
crawler.crawl().then((results) => {
|
for (let crawler of crawlers) {
|
||||||
|
try {
|
||||||
|
let results = await crawler.crawl()
|
||||||
for (let saver of savers) {
|
for (let saver of savers) {
|
||||||
try {
|
try {
|
||||||
saver.open();
|
await saver.connect();
|
||||||
saver.save(results);
|
await saver.save(results);
|
||||||
} catch (e) {
|
} catch (e) {
|
||||||
console.log("Error saving. Trying next saver! ", e);
|
console.log("Error saving. Trying next saver! ", e);
|
||||||
|
crawlersAndSaversDone += 1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
});
|
} catch (e) {
|
||||||
} catch (e) {
|
console.log("Error crawling. Trying next crawler! ", e);
|
||||||
console.log("Error crawling. Trying next crawler! ", e);
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for (let saver of savers) {
|
||||||
|
saver.close();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
for (let saver of savers) {
|
crawlAll();
|
||||||
saver.close();
|
|
||||||
}
|
|
||||||
|
|||||||
@@ -15,7 +15,7 @@ export default class MongoSaver {
|
|||||||
console.log('Unable to connect to the mongoDB server. Error:', err);
|
console.log('Unable to connect to the mongoDB server. Error:', err);
|
||||||
reject(err);
|
reject(err);
|
||||||
} else {
|
} else {
|
||||||
console.log('Connection established to', url);
|
console.log('Connection established to', this.url);
|
||||||
saver.db = db;
|
saver.db = db;
|
||||||
saver.collection = db.collection('results');
|
saver.collection = db.collection('results');
|
||||||
saver.ready = true;
|
saver.ready = true;
|
||||||
|
|||||||
@@ -122,7 +122,7 @@ export default class OlxCrawler {
|
|||||||
async indexPages(start, end, maxResults = 1000) {
|
async indexPages(start, end, maxResults = 1000) {
|
||||||
let results = {};
|
let results = {};
|
||||||
for (let i = start; i <= end; i++) {
|
for (let i = start; i <= end; i++) {
|
||||||
let result = await this.indexPage(i, numberOfResults);
|
let result = await this.indexPage(i, maxResults);
|
||||||
Object.assign(results, result)
|
Object.assign(results, result)
|
||||||
await this.sleep(5000);
|
await this.sleep(5000);
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user