Olx added preflight check of available concurrent req.
This commit is contained in:
@@ -19,7 +19,8 @@ const {
|
||||
const {
|
||||
DEFAULT_TIMEZONE,
|
||||
PRINT_CRAWLER_DEBUG,
|
||||
NUMBER_OF_CONCURRENT_REQ_SCRAPER_API
|
||||
NUMBER_OF_CONCURRENT_REQ_SCRAPER_API,
|
||||
SCRAPER_API_KEY
|
||||
} = require("../../config/appConfig");
|
||||
|
||||
const OLX_ENUMS = {
|
||||
@@ -45,6 +46,8 @@ const OLX_ENUMS = {
|
||||
|
||||
const { OLX_FORCE_CRAWL } = require("../specificConfigs/olx");
|
||||
|
||||
const scraperapiClient = require("scraperapi-sdk")(SCRAPER_API_KEY);
|
||||
|
||||
class OlxCrawler {
|
||||
constructor(
|
||||
savers = [],
|
||||
@@ -201,6 +204,14 @@ class OlxCrawler {
|
||||
i,
|
||||
i + NUMBER_OF_CONCURRENT_REQ_SCRAPER_API
|
||||
);
|
||||
//Before it send n req to scraperAPI it send preflight request to check if we have enough concurrent req availabe
|
||||
//It does not send "real" req until approven internaly
|
||||
let availableConcurrentReqSlots = false;
|
||||
do {
|
||||
availableConcurrentReqSlots = await this.checkAvailableConcurrentReqSlots(
|
||||
concurrentUrlsToScrape.length
|
||||
);
|
||||
} while (availableConcurrentReqSlots !== true);
|
||||
//
|
||||
console.log(
|
||||
`OLX - Sending requests from ${i} to ${i +
|
||||
@@ -920,28 +931,25 @@ class OlxCrawler {
|
||||
console.log("sprat = NEPOZNATO [", floorText, "]");
|
||||
return null;
|
||||
}
|
||||
/*
|
||||
async consecutiveRequestSending(requestsToScraperApi) {
|
||||
let dataFromAllRequests = [];
|
||||
|
||||
for (
|
||||
const i = 0;
|
||||
i <= requestsToScraperApi.length;
|
||||
i + NUMBER_OF_CONCURRENT_REQ_SCRAPER_API
|
||||
) {
|
||||
const concurrentRequestsToScraperApi = requestsToScraperApi.slice(
|
||||
i,
|
||||
NUMBER_OF_CONCURRENT_REQ_SCRAPER_API
|
||||
);
|
||||
const dataFromConcurrentRequest = await Promise.all(
|
||||
concurrentRequestsToScraperApi
|
||||
);
|
||||
dataFromAllRequests.push(dataFromConcurrentRequest);
|
||||
this.sleep(DELAY_BETWEEN_REQ_SCRAPER_API);
|
||||
async checkAvailableConcurrentReqSlots(numberOfNeededConcurrentReq) {
|
||||
try {
|
||||
const scraperApiAccountInfo = await scraperapiClient.account();
|
||||
const numberOfUsedConcurrentReq =
|
||||
scraperApiAccountInfo.concurrentRequests;
|
||||
const limitOfConcurrentReq = scraperApiAccountInfo.concurrencyLimit;
|
||||
//Buffer of requests to prevent errors with prefligh requests
|
||||
const bufferNumberOfReq = 3;
|
||||
const numberOfAvailableConcurrentReq =
|
||||
limitOfConcurrentReq - bufferNumberOfReq - numberOfUsedConcurrentReq;
|
||||
if (numberOfNeededConcurrentReq <= numberOfAvailableConcurrentReq) {
|
||||
return true;
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
} catch (err) {
|
||||
return false;
|
||||
}
|
||||
|
||||
return dataFromAllRequests;
|
||||
}*/
|
||||
}
|
||||
|
||||
async sleep(ms) {
|
||||
// console.log("Sleep for:", ms);
|
||||
|
||||
Reference in New Issue
Block a user