Cleanup after debugging

This commit is contained in:
=
2020-09-16 06:16:49 -07:00
parent bf8d131025
commit 2b1cbcaa47
4 changed files with 43 additions and 41 deletions

View File

@@ -1 +1 @@
web: node --inspect ./index.js
web: node ./index.js

View File

@@ -48,6 +48,7 @@ const USER_AGENT =
const USE_SCRAPER_API = process.env.USE_SCRAPER_API === undefined ? 1 : parseInt(process.env.USE_SCRAPER_API);
const SCRAPER_API_KEY = process.env.SCRAPER_API_KEY || "";
const SCRAPER_API_BASE_URL = process.env.SCRAPER_API_BASE_URL || "";
const NODE_FETCH_TIMEOUT_MS = process.env.NODE_FETCH_TIMEOUT_MS || 60000
module.exports = {
APP_PORT,
@@ -66,5 +67,6 @@ module.exports = {
USER_AGENT,
USE_SCRAPER_API,
SCRAPER_API_KEY,
SCRAPER_API_BASE_URL
SCRAPER_API_BASE_URL,
NODE_FETCH_TIMEOUT_MS
};

View File

@@ -33,44 +33,43 @@ async function crawlAll() {
OLX_CONFIG.OLX_MAX_RESULTS_PER_PAGE,
OLX_CONFIG.OLX_IGNORED_USERNAMES,
OLX_CONFIG.OLX_DELAY_BETWEEN_PAGES
),
new RentalCrawler(
[postgresSaver],
RENTAL_CONFIG.RENTAL_CRAWLER_AD_TYPE,
RENTAL_CONFIG.RENTAL_CRAWLER_AD_CATEGORIES,
RENTAL_CONFIG.RENTAL_MAX_PAGES,
RENTAL_CONFIG.RENTAL_MAX_RESULTS_PER_PAGE,
RENTAL_CONFIG.RENTAL_IGNORED_USERNAMES,
RENTAL_CONFIG.RENTAL_DELAY_BETWEEN_PAGES
),
new ProstorCrawler(
[postgresSaver],
PROSTOR_CONFIG.PROSTOR_CRAWLER_AD_TYPE,
PROSTOR_CONFIG.PROSTOR_CRAWLER_AD_CATEGORIES,
PROSTOR_CONFIG.PROSTOR_MAX_PAGES,
PROSTOR_CONFIG.PROSTOR_MAX_RESULTS_PER_PAGE,
PROSTOR_CONFIG.PROSTOR_IGNORED_USERNAMES,
PROSTOR_CONFIG.PROSTOR_DELAY_BETWEEN_PAGES
),
new AktidoCrawler(
[postgresSaver],
AKTIDO_CONFIG.AKTIDO_CRAWLER_AD_TYPE,
AKTIDO_CONFIG.AKTIDO_CRAWLER_AD_CATEGORIES,
AKTIDO_CONFIG.AKTIDO_MAX_PAGES,
AKTIDO_CONFIG.AKTIDO_MAX_RESULTS_PER_PAGE,
AKTIDO_CONFIG.AKTIDO_IGNORED_USERNAMES,
AKTIDO_CONFIG.AKTIDO_DELAY_BETWEEN_PAGES
),
new SaljicCrawler(
[postgresSaver],
SALJIC_CONFIG.SALJIC_CRAWLER_AD_TYPE,
SALJIC_CONFIG.SALJIC_CRAWLER_AD_CATEGORIES,
SALJIC_CONFIG.SALJIC_MAX_PAGES,
SALJIC_CONFIG.SALJIC_MAX_RESULTS_PER_PAGE,
SALJIC_CONFIG.SALJIC_IGNORED_USERNAMES,
SALJIC_CONFIG.SALJIC_DELAY_BETWEEN_PAGES
)
//new RentalCrawler(
//[postgresSaver],
//RENTAL_CONFIG.RENTAL_CRAWLER_AD_TYPE,
//RENTAL_CONFIG.RENTAL_CRAWLER_AD_CATEGORIES,
//RENTAL_CONFIG.RENTAL_MAX_PAGES,
//RENTAL_CONFIG.RENTAL_MAX_RESULTS_PER_PAGE,
//RENTAL_CONFIG.RENTAL_IGNORED_USERNAMES,
//RENTAL_CONFIG.RENTAL_DELAY_BETWEEN_PAGES
//),
//new ProstorCrawler(
//[postgresSaver],
//PROSTOR_CONFIG.PROSTOR_CRAWLER_AD_TYPE,
//PROSTOR_CONFIG.PROSTOR_CRAWLER_AD_CATEGORIES,
//PROSTOR_CONFIG.PROSTOR_MAX_PAGES,
//PROSTOR_CONFIG.PROSTOR_MAX_RESULTS_PER_PAGE,
//PROSTOR_CONFIG.PROSTOR_IGNORED_USERNAMES,
//PROSTOR_CONFIG.PROSTOR_DELAY_BETWEEN_PAGES
//),
//new AktidoCrawler(
//[postgresSaver],
//AKTIDO_CONFIG.AKTIDO_CRAWLER_AD_TYPE,
//AKTIDO_CONFIG.AKTIDO_CRAWLER_AD_CATEGORIES,
//AKTIDO_CONFIG.AKTIDO_MAX_PAGES,
//AKTIDO_CONFIG.AKTIDO_MAX_RESULTS_PER_PAGE,
//AKTIDO_CONFIG.AKTIDO_IGNORED_USERNAMES,
//AKTIDO_CONFIG.AKTIDO_DELAY_BETWEEN_PAGES
//)
//,
//new SaljicCrawler(
//[postgresSaver],
//SALJIC_CONFIG.SALJIC_CRAWLER_AD_TYPE,
//SALJIC_CONFIG.SALJIC_CRAWLER_AD_CATEGORIES,
//SALJIC_CONFIG.SALJIC_MAX_PAGES,
//SALJIC_CONFIG.SALJIC_MAX_RESULTS_PER_PAGE,
//SALJIC_CONFIG.SALJIC_IGNORED_USERNAMES,
//SALJIC_CONFIG.SALJIC_DELAY_BETWEEN_PAGES
//)
];
const newRealEstates = [];

View File

@@ -4,7 +4,8 @@ const {
USER_AGENT,
USE_SCRAPER_API,
SCRAPER_API_KEY,
SCRAPER_API_BASE_URL
SCRAPER_API_BASE_URL,
NODE_FETCH_TIMEOUT_MS
} = require("../config/appConfig");
const timeout = (ms) => {
@@ -32,7 +33,7 @@ const fetch = async (url, options = {}) => {
? `${SCRAPER_API_BASE_URL}?api_key=${SCRAPER_API_KEY}&url=${urlToFetchThroughAPI}`
: url;
const result = nodeFetch(urlAdaptedForScraping, newOptions);
const timeoutId = setTimeout(() => controller.abort(), 20000);
const timeoutId = setTimeout(() => controller.abort(), NODE_FETCH_TIMEOUT_MS);
return result;
};