construct crawling url before indexing single page

This commit is contained in:
Bilal Catic
2019-09-21 15:45:48 +02:00
parent 5f1697f6ae
commit c4f6c6e1c3

View File

@@ -78,9 +78,9 @@ class OlxCrawler {
const maxResultsPerPage = this.maxResults;
for (let pageNumber = startPage; pageNumber <= endPage; pageNumber++) {
const pageUrl = `${url}&stranica=${pageNumber}`;
const singlePageResults = await this.indexSinglePage(
url,
pageNumber,
pageUrl,
maxResultsPerPage
);
await this.saveCrawledResults(singlePageResults);
@@ -88,10 +88,8 @@ class OlxCrawler {
}
}
async indexSinglePage(urlWithoutPageNumber, pageNumber, maxResultsPerPage) {
async indexSinglePage(url, maxResultsPerPage) {
try {
const url = `${urlWithoutPageNumber}&stranica=${pageNumber}`;
const res = await fetch(url);
const body = await res.text();
const $ = cheerio.load(body);
@@ -114,8 +112,6 @@ class OlxCrawler {
hrefs.length <= maxResultsPerPage ? hrefs.length : maxResultsPerPage;
for (let i = 0; i < actualNoOfResults; i++) {
console.log(`Scraping : ${hrefs[i]}`);
const adData = await this.scrapeAd(hrefs[i]);
if (adData) {