add debugging log switch for crawler process
This commit is contained in:
@@ -28,6 +28,8 @@ const MAX_REAL_ESTATES_IN_EMAIL =
|
|||||||
const MAX_REAL_ESTATES_IN_FIRST_EMAIL =
|
const MAX_REAL_ESTATES_IN_FIRST_EMAIL =
|
||||||
parseInt(process.env.MAX_REAL_ESTATES_IN_FIRST_EMAIL) || 5;
|
parseInt(process.env.MAX_REAL_ESTATES_IN_FIRST_EMAIL) || 5;
|
||||||
|
|
||||||
|
const PRINT_CRAWLER_DEBUG = process.env.PRINT_CRAWLER_DEBUG_INFO || 0;
|
||||||
|
|
||||||
module.exports = {
|
module.exports = {
|
||||||
APP_PORT,
|
APP_PORT,
|
||||||
APP_URL,
|
APP_URL,
|
||||||
@@ -36,5 +38,6 @@ module.exports = {
|
|||||||
STOP_CRAWLER,
|
STOP_CRAWLER,
|
||||||
AWS_EMAIL_CONFIG,
|
AWS_EMAIL_CONFIG,
|
||||||
MAX_REAL_ESTATES_IN_EMAIL,
|
MAX_REAL_ESTATES_IN_EMAIL,
|
||||||
MAX_REAL_ESTATES_IN_FIRST_EMAIL
|
MAX_REAL_ESTATES_IN_FIRST_EMAIL,
|
||||||
|
PRINT_CRAWLER_DEBUG
|
||||||
};
|
};
|
||||||
|
|||||||
@@ -13,7 +13,10 @@ const {
|
|||||||
CRAWLER_AD_TYPE
|
CRAWLER_AD_TYPE
|
||||||
} = require("../../common/enums");
|
} = require("../../common/enums");
|
||||||
|
|
||||||
const { DEFAULT_TIMEZONE } = require("../../config/appConfig");
|
const {
|
||||||
|
DEFAULT_TIMEZONE,
|
||||||
|
PRINT_CRAWLER_DEBUG
|
||||||
|
} = require("../../config/appConfig");
|
||||||
|
|
||||||
const OLX_ENUMS = {
|
const OLX_ENUMS = {
|
||||||
OLX_AD_TYPE: {
|
OLX_AD_TYPE: {
|
||||||
@@ -156,6 +159,10 @@ class OlxCrawler {
|
|||||||
}
|
}
|
||||||
|
|
||||||
async indexSinglePage(url, maxResultsPerPage) {
|
async indexSinglePage(url, maxResultsPerPage) {
|
||||||
|
if (PRINT_CRAWLER_DEBUG) {
|
||||||
|
console.log("[OLX] Index page : ", url);
|
||||||
|
}
|
||||||
|
|
||||||
try {
|
try {
|
||||||
const res = await fetch(url);
|
const res = await fetch(url);
|
||||||
const body = await res.text();
|
const body = await res.text();
|
||||||
|
|||||||
@@ -11,6 +11,8 @@ const {
|
|||||||
CRAWLER_AD_TYPE
|
CRAWLER_AD_TYPE
|
||||||
} = require("../../common/enums");
|
} = require("../../common/enums");
|
||||||
|
|
||||||
|
const { PRINT_CRAWLER_DEBUG } = require("../../config/appConfig");
|
||||||
|
|
||||||
const PROSTOR_ENUMS = {
|
const PROSTOR_ENUMS = {
|
||||||
PROSTOR_AD_TYPE: {
|
PROSTOR_AD_TYPE: {
|
||||||
[CRAWLER_AD_TYPE.ALL]: "&action=0",
|
[CRAWLER_AD_TYPE.ALL]: "&action=0",
|
||||||
@@ -78,7 +80,9 @@ class ProstorCrawler {
|
|||||||
}
|
}
|
||||||
|
|
||||||
async extractRealEstates(url) {
|
async extractRealEstates(url) {
|
||||||
console.log("[PROSTOR] Index page : ", url);
|
if (PRINT_CRAWLER_DEBUG) {
|
||||||
|
console.log("[PROSTOR] Index page : ", url);
|
||||||
|
}
|
||||||
|
|
||||||
try {
|
try {
|
||||||
const res = await fetch(url);
|
const res = await fetch(url);
|
||||||
|
|||||||
@@ -14,7 +14,10 @@ const {
|
|||||||
CRAWLER_AD_TYPE
|
CRAWLER_AD_TYPE
|
||||||
} = require("../../common/enums");
|
} = require("../../common/enums");
|
||||||
|
|
||||||
const { DEFAULT_TIMEZONE } = require("../../config/appConfig");
|
const {
|
||||||
|
DEFAULT_TIMEZONE,
|
||||||
|
PRINT_CRAWLER_DEBUG
|
||||||
|
} = require("../../config/appConfig");
|
||||||
|
|
||||||
const RENTAL_ENUMS = {
|
const RENTAL_ENUMS = {
|
||||||
RENTAL_AD_TYPE: {
|
RENTAL_AD_TYPE: {
|
||||||
@@ -159,7 +162,9 @@ class RentalCrawler {
|
|||||||
}
|
}
|
||||||
|
|
||||||
async indexSinglePage(url, maxResultsPerPage) {
|
async indexSinglePage(url, maxResultsPerPage) {
|
||||||
// console.log("[RENTAL] Index page : ", url);
|
if (PRINT_CRAWLER_DEBUG) {
|
||||||
|
console.log("[RENTAL] Index page : ", url);
|
||||||
|
}
|
||||||
|
|
||||||
try {
|
try {
|
||||||
const res = await fetch(url);
|
const res = await fetch(url);
|
||||||
|
|||||||
@@ -23,6 +23,7 @@ SOURCE_EMAIL=info@saburly.com
|
|||||||
#=============== CRAWLER SETTINGS===============#
|
#=============== CRAWLER SETTINGS===============#
|
||||||
CRAWLER_INTERVAL=Interval to run cralwer(s), in seconds
|
CRAWLER_INTERVAL=Interval to run cralwer(s), in seconds
|
||||||
STOP_CRAWLER=Non-zero value will skip crawler execution
|
STOP_CRAWLER=Non-zero value will skip crawler execution
|
||||||
|
PRINT_CRAWLER_DEBUG_INFO=Non-zero value will print crawler debugging info to the server console
|
||||||
#==OLX==
|
#==OLX==
|
||||||
OLX_MAX_PAGES=Restrict crawler to this number of pages
|
OLX_MAX_PAGES=Restrict crawler to this number of pages
|
||||||
OLX_MAX_RESULTS_PER_PAGE=Only this number or less results from one page will be scraped and saved
|
OLX_MAX_RESULTS_PER_PAGE=Only this number or less results from one page will be scraped and saved
|
||||||
|
|||||||
Reference in New Issue
Block a user