construct crawling url before indexing single page
This commit is contained in:
@@ -78,9 +78,9 @@ class OlxCrawler {
|
|||||||
const maxResultsPerPage = this.maxResults;
|
const maxResultsPerPage = this.maxResults;
|
||||||
|
|
||||||
for (let pageNumber = startPage; pageNumber <= endPage; pageNumber++) {
|
for (let pageNumber = startPage; pageNumber <= endPage; pageNumber++) {
|
||||||
|
const pageUrl = `${url}&stranica=${pageNumber}`;
|
||||||
const singlePageResults = await this.indexSinglePage(
|
const singlePageResults = await this.indexSinglePage(
|
||||||
url,
|
pageUrl,
|
||||||
pageNumber,
|
|
||||||
maxResultsPerPage
|
maxResultsPerPage
|
||||||
);
|
);
|
||||||
await this.saveCrawledResults(singlePageResults);
|
await this.saveCrawledResults(singlePageResults);
|
||||||
@@ -88,10 +88,8 @@ class OlxCrawler {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
async indexSinglePage(urlWithoutPageNumber, pageNumber, maxResultsPerPage) {
|
async indexSinglePage(url, maxResultsPerPage) {
|
||||||
try {
|
try {
|
||||||
const url = `${urlWithoutPageNumber}&stranica=${pageNumber}`;
|
|
||||||
|
|
||||||
const res = await fetch(url);
|
const res = await fetch(url);
|
||||||
const body = await res.text();
|
const body = await res.text();
|
||||||
const $ = cheerio.load(body);
|
const $ = cheerio.load(body);
|
||||||
@@ -114,8 +112,6 @@ class OlxCrawler {
|
|||||||
hrefs.length <= maxResultsPerPage ? hrefs.length : maxResultsPerPage;
|
hrefs.length <= maxResultsPerPage ? hrefs.length : maxResultsPerPage;
|
||||||
|
|
||||||
for (let i = 0; i < actualNoOfResults; i++) {
|
for (let i = 0; i < actualNoOfResults; i++) {
|
||||||
console.log(`Scraping : ${hrefs[i]}`);
|
|
||||||
|
|
||||||
const adData = await this.scrapeAd(hrefs[i]);
|
const adData = await this.scrapeAd(hrefs[i]);
|
||||||
|
|
||||||
if (adData) {
|
if (adData) {
|
||||||
|
|||||||
Reference in New Issue
Block a user