construct crawling url before indexing single page
This commit is contained in:
@@ -78,9 +78,9 @@ class OlxCrawler {
|
||||
const maxResultsPerPage = this.maxResults;
|
||||
|
||||
for (let pageNumber = startPage; pageNumber <= endPage; pageNumber++) {
|
||||
const pageUrl = `${url}&stranica=${pageNumber}`;
|
||||
const singlePageResults = await this.indexSinglePage(
|
||||
url,
|
||||
pageNumber,
|
||||
pageUrl,
|
||||
maxResultsPerPage
|
||||
);
|
||||
await this.saveCrawledResults(singlePageResults);
|
||||
@@ -88,10 +88,8 @@ class OlxCrawler {
|
||||
}
|
||||
}
|
||||
|
||||
async indexSinglePage(urlWithoutPageNumber, pageNumber, maxResultsPerPage) {
|
||||
async indexSinglePage(url, maxResultsPerPage) {
|
||||
try {
|
||||
const url = `${urlWithoutPageNumber}&stranica=${pageNumber}`;
|
||||
|
||||
const res = await fetch(url);
|
||||
const body = await res.text();
|
||||
const $ = cheerio.load(body);
|
||||
@@ -114,8 +112,6 @@ class OlxCrawler {
|
||||
hrefs.length <= maxResultsPerPage ? hrefs.length : maxResultsPerPage;
|
||||
|
||||
for (let i = 0; i < actualNoOfResults; i++) {
|
||||
console.log(`Scraping : ${hrefs[i]}`);
|
||||
|
||||
const adData = await this.scrapeAd(hrefs[i]);
|
||||
|
||||
if (adData) {
|
||||
|
||||
Reference in New Issue
Block a user