Cleanup after debugging
This commit is contained in:
@@ -48,6 +48,7 @@ const USER_AGENT =
|
|||||||
const USE_SCRAPER_API = process.env.USE_SCRAPER_API === undefined ? 1 : parseInt(process.env.USE_SCRAPER_API);
|
const USE_SCRAPER_API = process.env.USE_SCRAPER_API === undefined ? 1 : parseInt(process.env.USE_SCRAPER_API);
|
||||||
const SCRAPER_API_KEY = process.env.SCRAPER_API_KEY || "";
|
const SCRAPER_API_KEY = process.env.SCRAPER_API_KEY || "";
|
||||||
const SCRAPER_API_BASE_URL = process.env.SCRAPER_API_BASE_URL || "";
|
const SCRAPER_API_BASE_URL = process.env.SCRAPER_API_BASE_URL || "";
|
||||||
|
const NODE_FETCH_TIMEOUT_MS = process.env.NODE_FETCH_TIMEOUT_MS || 60000
|
||||||
|
|
||||||
module.exports = {
|
module.exports = {
|
||||||
APP_PORT,
|
APP_PORT,
|
||||||
@@ -66,5 +67,6 @@ module.exports = {
|
|||||||
USER_AGENT,
|
USER_AGENT,
|
||||||
USE_SCRAPER_API,
|
USE_SCRAPER_API,
|
||||||
SCRAPER_API_KEY,
|
SCRAPER_API_KEY,
|
||||||
SCRAPER_API_BASE_URL
|
SCRAPER_API_BASE_URL,
|
||||||
|
NODE_FETCH_TIMEOUT_MS
|
||||||
};
|
};
|
||||||
|
|||||||
@@ -33,44 +33,43 @@ async function crawlAll() {
|
|||||||
OLX_CONFIG.OLX_MAX_RESULTS_PER_PAGE,
|
OLX_CONFIG.OLX_MAX_RESULTS_PER_PAGE,
|
||||||
OLX_CONFIG.OLX_IGNORED_USERNAMES,
|
OLX_CONFIG.OLX_IGNORED_USERNAMES,
|
||||||
OLX_CONFIG.OLX_DELAY_BETWEEN_PAGES
|
OLX_CONFIG.OLX_DELAY_BETWEEN_PAGES
|
||||||
|
),
|
||||||
|
new RentalCrawler(
|
||||||
|
[postgresSaver],
|
||||||
|
RENTAL_CONFIG.RENTAL_CRAWLER_AD_TYPE,
|
||||||
|
RENTAL_CONFIG.RENTAL_CRAWLER_AD_CATEGORIES,
|
||||||
|
RENTAL_CONFIG.RENTAL_MAX_PAGES,
|
||||||
|
RENTAL_CONFIG.RENTAL_MAX_RESULTS_PER_PAGE,
|
||||||
|
RENTAL_CONFIG.RENTAL_IGNORED_USERNAMES,
|
||||||
|
RENTAL_CONFIG.RENTAL_DELAY_BETWEEN_PAGES
|
||||||
|
),
|
||||||
|
new ProstorCrawler(
|
||||||
|
[postgresSaver],
|
||||||
|
PROSTOR_CONFIG.PROSTOR_CRAWLER_AD_TYPE,
|
||||||
|
PROSTOR_CONFIG.PROSTOR_CRAWLER_AD_CATEGORIES,
|
||||||
|
PROSTOR_CONFIG.PROSTOR_MAX_PAGES,
|
||||||
|
PROSTOR_CONFIG.PROSTOR_MAX_RESULTS_PER_PAGE,
|
||||||
|
PROSTOR_CONFIG.PROSTOR_IGNORED_USERNAMES,
|
||||||
|
PROSTOR_CONFIG.PROSTOR_DELAY_BETWEEN_PAGES
|
||||||
|
),
|
||||||
|
new AktidoCrawler(
|
||||||
|
[postgresSaver],
|
||||||
|
AKTIDO_CONFIG.AKTIDO_CRAWLER_AD_TYPE,
|
||||||
|
AKTIDO_CONFIG.AKTIDO_CRAWLER_AD_CATEGORIES,
|
||||||
|
AKTIDO_CONFIG.AKTIDO_MAX_PAGES,
|
||||||
|
AKTIDO_CONFIG.AKTIDO_MAX_RESULTS_PER_PAGE,
|
||||||
|
AKTIDO_CONFIG.AKTIDO_IGNORED_USERNAMES,
|
||||||
|
AKTIDO_CONFIG.AKTIDO_DELAY_BETWEEN_PAGES
|
||||||
|
),
|
||||||
|
new SaljicCrawler(
|
||||||
|
[postgresSaver],
|
||||||
|
SALJIC_CONFIG.SALJIC_CRAWLER_AD_TYPE,
|
||||||
|
SALJIC_CONFIG.SALJIC_CRAWLER_AD_CATEGORIES,
|
||||||
|
SALJIC_CONFIG.SALJIC_MAX_PAGES,
|
||||||
|
SALJIC_CONFIG.SALJIC_MAX_RESULTS_PER_PAGE,
|
||||||
|
SALJIC_CONFIG.SALJIC_IGNORED_USERNAMES,
|
||||||
|
SALJIC_CONFIG.SALJIC_DELAY_BETWEEN_PAGES
|
||||||
)
|
)
|
||||||
//new RentalCrawler(
|
|
||||||
//[postgresSaver],
|
|
||||||
//RENTAL_CONFIG.RENTAL_CRAWLER_AD_TYPE,
|
|
||||||
//RENTAL_CONFIG.RENTAL_CRAWLER_AD_CATEGORIES,
|
|
||||||
//RENTAL_CONFIG.RENTAL_MAX_PAGES,
|
|
||||||
//RENTAL_CONFIG.RENTAL_MAX_RESULTS_PER_PAGE,
|
|
||||||
//RENTAL_CONFIG.RENTAL_IGNORED_USERNAMES,
|
|
||||||
//RENTAL_CONFIG.RENTAL_DELAY_BETWEEN_PAGES
|
|
||||||
//),
|
|
||||||
//new ProstorCrawler(
|
|
||||||
//[postgresSaver],
|
|
||||||
//PROSTOR_CONFIG.PROSTOR_CRAWLER_AD_TYPE,
|
|
||||||
//PROSTOR_CONFIG.PROSTOR_CRAWLER_AD_CATEGORIES,
|
|
||||||
//PROSTOR_CONFIG.PROSTOR_MAX_PAGES,
|
|
||||||
//PROSTOR_CONFIG.PROSTOR_MAX_RESULTS_PER_PAGE,
|
|
||||||
//PROSTOR_CONFIG.PROSTOR_IGNORED_USERNAMES,
|
|
||||||
//PROSTOR_CONFIG.PROSTOR_DELAY_BETWEEN_PAGES
|
|
||||||
//),
|
|
||||||
//new AktidoCrawler(
|
|
||||||
//[postgresSaver],
|
|
||||||
//AKTIDO_CONFIG.AKTIDO_CRAWLER_AD_TYPE,
|
|
||||||
//AKTIDO_CONFIG.AKTIDO_CRAWLER_AD_CATEGORIES,
|
|
||||||
//AKTIDO_CONFIG.AKTIDO_MAX_PAGES,
|
|
||||||
//AKTIDO_CONFIG.AKTIDO_MAX_RESULTS_PER_PAGE,
|
|
||||||
//AKTIDO_CONFIG.AKTIDO_IGNORED_USERNAMES,
|
|
||||||
//AKTIDO_CONFIG.AKTIDO_DELAY_BETWEEN_PAGES
|
|
||||||
//)
|
|
||||||
//,
|
|
||||||
//new SaljicCrawler(
|
|
||||||
//[postgresSaver],
|
|
||||||
//SALJIC_CONFIG.SALJIC_CRAWLER_AD_TYPE,
|
|
||||||
//SALJIC_CONFIG.SALJIC_CRAWLER_AD_CATEGORIES,
|
|
||||||
//SALJIC_CONFIG.SALJIC_MAX_PAGES,
|
|
||||||
//SALJIC_CONFIG.SALJIC_MAX_RESULTS_PER_PAGE,
|
|
||||||
//SALJIC_CONFIG.SALJIC_IGNORED_USERNAMES,
|
|
||||||
//SALJIC_CONFIG.SALJIC_DELAY_BETWEEN_PAGES
|
|
||||||
//)
|
|
||||||
];
|
];
|
||||||
|
|
||||||
const newRealEstates = [];
|
const newRealEstates = [];
|
||||||
|
|||||||
@@ -4,7 +4,8 @@ const {
|
|||||||
USER_AGENT,
|
USER_AGENT,
|
||||||
USE_SCRAPER_API,
|
USE_SCRAPER_API,
|
||||||
SCRAPER_API_KEY,
|
SCRAPER_API_KEY,
|
||||||
SCRAPER_API_BASE_URL
|
SCRAPER_API_BASE_URL,
|
||||||
|
NODE_FETCH_TIMEOUT_MS
|
||||||
} = require("../config/appConfig");
|
} = require("../config/appConfig");
|
||||||
|
|
||||||
const timeout = (ms) => {
|
const timeout = (ms) => {
|
||||||
@@ -32,7 +33,7 @@ const fetch = async (url, options = {}) => {
|
|||||||
? `${SCRAPER_API_BASE_URL}?api_key=${SCRAPER_API_KEY}&url=${urlToFetchThroughAPI}`
|
? `${SCRAPER_API_BASE_URL}?api_key=${SCRAPER_API_KEY}&url=${urlToFetchThroughAPI}`
|
||||||
: url;
|
: url;
|
||||||
const result = nodeFetch(urlAdaptedForScraping, newOptions);
|
const result = nodeFetch(urlAdaptedForScraping, newOptions);
|
||||||
const timeoutId = setTimeout(() => controller.abort(), 20000);
|
const timeoutId = setTimeout(() => controller.abort(), NODE_FETCH_TIMEOUT_MS);
|
||||||
return result;
|
return result;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user