add debugging log switch for crawler process
This commit is contained in:
@@ -28,6 +28,8 @@ const MAX_REAL_ESTATES_IN_EMAIL =
|
||||
const MAX_REAL_ESTATES_IN_FIRST_EMAIL =
|
||||
parseInt(process.env.MAX_REAL_ESTATES_IN_FIRST_EMAIL) || 5;
|
||||
|
||||
const PRINT_CRAWLER_DEBUG = process.env.PRINT_CRAWLER_DEBUG_INFO || 0;
|
||||
|
||||
module.exports = {
|
||||
APP_PORT,
|
||||
APP_URL,
|
||||
@@ -36,5 +38,6 @@ module.exports = {
|
||||
STOP_CRAWLER,
|
||||
AWS_EMAIL_CONFIG,
|
||||
MAX_REAL_ESTATES_IN_EMAIL,
|
||||
MAX_REAL_ESTATES_IN_FIRST_EMAIL
|
||||
MAX_REAL_ESTATES_IN_FIRST_EMAIL,
|
||||
PRINT_CRAWLER_DEBUG
|
||||
};
|
||||
|
||||
@@ -13,7 +13,10 @@ const {
|
||||
CRAWLER_AD_TYPE
|
||||
} = require("../../common/enums");
|
||||
|
||||
const { DEFAULT_TIMEZONE } = require("../../config/appConfig");
|
||||
const {
|
||||
DEFAULT_TIMEZONE,
|
||||
PRINT_CRAWLER_DEBUG
|
||||
} = require("../../config/appConfig");
|
||||
|
||||
const OLX_ENUMS = {
|
||||
OLX_AD_TYPE: {
|
||||
@@ -156,6 +159,10 @@ class OlxCrawler {
|
||||
}
|
||||
|
||||
async indexSinglePage(url, maxResultsPerPage) {
|
||||
if (PRINT_CRAWLER_DEBUG) {
|
||||
console.log("[OLX] Index page : ", url);
|
||||
}
|
||||
|
||||
try {
|
||||
const res = await fetch(url);
|
||||
const body = await res.text();
|
||||
|
||||
@@ -11,6 +11,8 @@ const {
|
||||
CRAWLER_AD_TYPE
|
||||
} = require("../../common/enums");
|
||||
|
||||
const { PRINT_CRAWLER_DEBUG } = require("../../config/appConfig");
|
||||
|
||||
const PROSTOR_ENUMS = {
|
||||
PROSTOR_AD_TYPE: {
|
||||
[CRAWLER_AD_TYPE.ALL]: "&action=0",
|
||||
@@ -78,7 +80,9 @@ class ProstorCrawler {
|
||||
}
|
||||
|
||||
async extractRealEstates(url) {
|
||||
console.log("[PROSTOR] Index page : ", url);
|
||||
if (PRINT_CRAWLER_DEBUG) {
|
||||
console.log("[PROSTOR] Index page : ", url);
|
||||
}
|
||||
|
||||
try {
|
||||
const res = await fetch(url);
|
||||
|
||||
@@ -14,7 +14,10 @@ const {
|
||||
CRAWLER_AD_TYPE
|
||||
} = require("../../common/enums");
|
||||
|
||||
const { DEFAULT_TIMEZONE } = require("../../config/appConfig");
|
||||
const {
|
||||
DEFAULT_TIMEZONE,
|
||||
PRINT_CRAWLER_DEBUG
|
||||
} = require("../../config/appConfig");
|
||||
|
||||
const RENTAL_ENUMS = {
|
||||
RENTAL_AD_TYPE: {
|
||||
@@ -159,7 +162,9 @@ class RentalCrawler {
|
||||
}
|
||||
|
||||
async indexSinglePage(url, maxResultsPerPage) {
|
||||
// console.log("[RENTAL] Index page : ", url);
|
||||
if (PRINT_CRAWLER_DEBUG) {
|
||||
console.log("[RENTAL] Index page : ", url);
|
||||
}
|
||||
|
||||
try {
|
||||
const res = await fetch(url);
|
||||
|
||||
@@ -23,6 +23,7 @@ SOURCE_EMAIL=info@saburly.com
|
||||
#=============== CRAWLER SETTINGS===============#
|
||||
CRAWLER_INTERVAL=Interval to run cralwer(s), in seconds
|
||||
STOP_CRAWLER=Non-zero value will skip crawler execution
|
||||
PRINT_CRAWLER_DEBUG_INFO=Non-zero value will print crawler debugging info to the server console
|
||||
#==OLX==
|
||||
OLX_MAX_PAGES=Restrict crawler to this number of pages
|
||||
OLX_MAX_RESULTS_PER_PAGE=Only this number or less results from one page will be scraped and saved
|
||||
|
||||
Reference in New Issue
Block a user