Olx added preflight check of available concurrent req.

This commit is contained in:
Naida Vatric
2020-03-06 14:25:02 +01:00
parent 824414adad
commit 131536d9fb
4 changed files with 239 additions and 23 deletions

View File

@@ -19,7 +19,8 @@ const {
const {
DEFAULT_TIMEZONE,
PRINT_CRAWLER_DEBUG,
NUMBER_OF_CONCURRENT_REQ_SCRAPER_API
NUMBER_OF_CONCURRENT_REQ_SCRAPER_API,
SCRAPER_API_KEY
} = require("../../config/appConfig");
const OLX_ENUMS = {
@@ -45,6 +46,8 @@ const OLX_ENUMS = {
const { OLX_FORCE_CRAWL } = require("../specificConfigs/olx");
const scraperapiClient = require("scraperapi-sdk")(SCRAPER_API_KEY);
class OlxCrawler {
constructor(
savers = [],
@@ -201,6 +204,14 @@ class OlxCrawler {
i,
i + NUMBER_OF_CONCURRENT_REQ_SCRAPER_API
);
//Before it send n req to scraperAPI it send preflight request to check if we have enough concurrent req availabe
//It does not send "real" req until approven internaly
let availableConcurrentReqSlots = false;
do {
availableConcurrentReqSlots = await this.checkAvailableConcurrentReqSlots(
concurrentUrlsToScrape.length
);
} while (availableConcurrentReqSlots !== true);
//
console.log(
`OLX - Sending requests from ${i} to ${i +
@@ -920,28 +931,25 @@ class OlxCrawler {
console.log("sprat = NEPOZNATO [", floorText, "]");
return null;
}
/*
async consecutiveRequestSending(requestsToScraperApi) {
let dataFromAllRequests = [];
for (
const i = 0;
i <= requestsToScraperApi.length;
i + NUMBER_OF_CONCURRENT_REQ_SCRAPER_API
) {
const concurrentRequestsToScraperApi = requestsToScraperApi.slice(
i,
NUMBER_OF_CONCURRENT_REQ_SCRAPER_API
);
const dataFromConcurrentRequest = await Promise.all(
concurrentRequestsToScraperApi
);
dataFromAllRequests.push(dataFromConcurrentRequest);
this.sleep(DELAY_BETWEEN_REQ_SCRAPER_API);
async checkAvailableConcurrentReqSlots(numberOfNeededConcurrentReq) {
try {
const scraperApiAccountInfo = await scraperapiClient.account();
const numberOfUsedConcurrentReq =
scraperApiAccountInfo.concurrentRequests;
const limitOfConcurrentReq = scraperApiAccountInfo.concurrencyLimit;
//Buffer of requests to prevent errors with prefligh requests
const bufferNumberOfReq = 3;
const numberOfAvailableConcurrentReq =
limitOfConcurrentReq - bufferNumberOfReq - numberOfUsedConcurrentReq;
if (numberOfNeededConcurrentReq <= numberOfAvailableConcurrentReq) {
return true;
} else {
return false;
}
} catch (err) {
return false;
}
return dataFromAllRequests;
}*/
}
async sleep(ms) {
// console.log("Sleep for:", ms);