add debugging log switch for crawler process

This commit is contained in:
Bilal Catic
2019-10-25 11:08:52 +02:00
parent 7e3b0bfcd5
commit 747ebb88e5
5 changed files with 25 additions and 5 deletions

View File

@@ -28,6 +28,8 @@ const MAX_REAL_ESTATES_IN_EMAIL =
const MAX_REAL_ESTATES_IN_FIRST_EMAIL =
parseInt(process.env.MAX_REAL_ESTATES_IN_FIRST_EMAIL) || 5;
const PRINT_CRAWLER_DEBUG = process.env.PRINT_CRAWLER_DEBUG_INFO || 0;
module.exports = {
APP_PORT,
APP_URL,
@@ -36,5 +38,6 @@ module.exports = {
STOP_CRAWLER,
AWS_EMAIL_CONFIG,
MAX_REAL_ESTATES_IN_EMAIL,
MAX_REAL_ESTATES_IN_FIRST_EMAIL
MAX_REAL_ESTATES_IN_FIRST_EMAIL,
PRINT_CRAWLER_DEBUG
};

View File

@@ -13,7 +13,10 @@ const {
CRAWLER_AD_TYPE
} = require("../../common/enums");
const { DEFAULT_TIMEZONE } = require("../../config/appConfig");
const {
DEFAULT_TIMEZONE,
PRINT_CRAWLER_DEBUG
} = require("../../config/appConfig");
const OLX_ENUMS = {
OLX_AD_TYPE: {
@@ -156,6 +159,10 @@ class OlxCrawler {
}
async indexSinglePage(url, maxResultsPerPage) {
if (PRINT_CRAWLER_DEBUG) {
console.log("[OLX] Index page : ", url);
}
try {
const res = await fetch(url);
const body = await res.text();

View File

@@ -11,6 +11,8 @@ const {
CRAWLER_AD_TYPE
} = require("../../common/enums");
const { PRINT_CRAWLER_DEBUG } = require("../../config/appConfig");
const PROSTOR_ENUMS = {
PROSTOR_AD_TYPE: {
[CRAWLER_AD_TYPE.ALL]: "&action=0",
@@ -78,7 +80,9 @@ class ProstorCrawler {
}
async extractRealEstates(url) {
console.log("[PROSTOR] Index page : ", url);
if (PRINT_CRAWLER_DEBUG) {
console.log("[PROSTOR] Index page : ", url);
}
try {
const res = await fetch(url);

View File

@@ -14,7 +14,10 @@ const {
CRAWLER_AD_TYPE
} = require("../../common/enums");
const { DEFAULT_TIMEZONE } = require("../../config/appConfig");
const {
DEFAULT_TIMEZONE,
PRINT_CRAWLER_DEBUG
} = require("../../config/appConfig");
const RENTAL_ENUMS = {
RENTAL_AD_TYPE: {
@@ -159,7 +162,9 @@ class RentalCrawler {
}
async indexSinglePage(url, maxResultsPerPage) {
// console.log("[RENTAL] Index page : ", url);
if (PRINT_CRAWLER_DEBUG) {
console.log("[RENTAL] Index page : ", url);
}
try {
const res = await fetch(url);

View File

@@ -23,6 +23,7 @@ SOURCE_EMAIL=info@saburly.com
#=============== CRAWLER SETTINGS===============#
CRAWLER_INTERVAL=Interval to run cralwer(s), in seconds
STOP_CRAWLER=Non-zero value will skip crawler execution
PRINT_CRAWLER_DEBUG_INFO=Non-zero value will print crawler debugging info to the server console
#==OLX==
OLX_MAX_PAGES=Restrict crawler to this number of pages
OLX_MAX_RESULTS_PER_PAGE=Only this number or less results from one page will be scraped and saved