working on crawler
This commit is contained in:
@@ -0,0 +1,30 @@
|
||||
const fetch = require('node-fetch');
|
||||
const cheerio = require('cheerio');
|
||||
|
||||
export default class OlxCrawler {
|
||||
|
||||
constructor(fromPage = 0, toPage = 10, maxResults = 1000) {
|
||||
this.fromPage = fromPage;
|
||||
this.toPage = toPage;
|
||||
this.maxResults = maxResults;
|
||||
}
|
||||
|
||||
async sleep(ms) {
|
||||
return new Promise(resolve => setTimeout(resolve, ms));
|
||||
}
|
||||
|
||||
async indexPages(start, end, maxResults = 1000) {
|
||||
let results = {};
|
||||
for (let i = start; i <= end; i++) {
|
||||
let result = await this.indexPage(i, maxResults);
|
||||
Object.assign(results, result)
|
||||
await this.sleep(5000);
|
||||
}
|
||||
return results;
|
||||
}
|
||||
|
||||
async crawl() {
|
||||
let results = await this.indexPages(this.fromPage, this.toPage, this.maxResults);
|
||||
return results;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,30 @@
|
||||
|
||||
|
||||
const crawlers = [
|
||||
//new OlxCrawler(process.env.OLX_FROM_PAGE, process.env.OLX_TO_PAGE, process.env.OLX_MAX_RESULTS),
|
||||
];
|
||||
|
||||
async function crawlAll() {
|
||||
|
||||
for (let crawler of crawlers) {
|
||||
try {
|
||||
let results = await crawler.crawl()
|
||||
for (let saver of savers) {
|
||||
try {
|
||||
await saver.connect();
|
||||
await saver.save(results);
|
||||
} catch (e) {
|
||||
console.log("Error saving. Trying next saver! ", e);
|
||||
}
|
||||
}
|
||||
} catch (e) {
|
||||
console.log("Error crawling. Trying next crawler! ", e);
|
||||
}
|
||||
}
|
||||
|
||||
for (let saver of savers) {
|
||||
saver.close();
|
||||
}
|
||||
}
|
||||
|
||||
crawlAll();
|
||||
Reference in New Issue
Block a user