Compare commits

..

50 Commits

Author SHA1 Message Date
Senad Uka
70bd952ee1 Add ssl to sequelize 2021-03-18 11:38:42 +01:00
Senad Uka
14039975c2 Modify time to live to 6 hours 2020-10-21 10:49:22 +02:00
Senad Uka
92e4f4ed5a Add caching to fetch wrapper 2020-10-21 06:16:35 +02:00
=
88f9d10586 parseInt NODE_FETCH_TIMEOUT_MS 2020-09-16 06:21:03 -07:00
=
2b1cbcaa47 Cleanup after debugging 2020-09-16 06:16:49 -07:00
=
bf8d131025 Increase timeout to 20 secs 2020-09-15 01:34:07 -07:00
=
698acb010a Add timeout to fetch wrapper 2020-09-15 01:27:20 -07:00
=
ade3eb307d Add try/catch to generator 2020-09-14 14:54:39 -07:00
=
8031f6f8a3 Moar attempts 2020-09-14 14:46:21 -07:00
=
d7a680a3ac Modify procfile instead of package.json for --inspect 2020-09-14 07:17:37 -07:00
=
8018caab47 Enable inspector and debugger 2020-09-14 06:53:35 -07:00
Edin Dazdarevic
d871d9ad1f More debugging, disable all crawlers 2020-09-13 23:45:08 +02:00
=
dfbefc20cd Remove PromisePool from olx 2020-09-13 04:57:01 -07:00
=
a481ecfe37 Debug 2020-09-13 04:48:11 -07:00
Senad Uka
8df94da48c Saljic fix fix 2020-09-11 05:03:37 +02:00
Senad Uka
d4fcd1950d Fix saljic 2020-09-11 04:51:54 +02:00
=
e8115a9215 moar fixes 2020-09-10 14:26:48 -07:00
=
160efdf6ab More fixes 2020-09-10 14:06:09 -07:00
=
c9b8c2e1a5 Fiks 2020-09-10 13:31:57 -07:00
Senad Uka
855b93ca41 Scraped data 2020-09-10 19:46:25 +02:00
Senad Uka
70779b24c0 Scraped data 2020-09-10 19:42:50 +02:00
Senad Uka
ba873f9f4e package json 2020-09-10 19:40:51 +02:00
Senad Uka
e4775158fc Promise pool 2020-09-10 19:39:13 +02:00
Senad Uka
26377c485c Reverting 2020-09-10 18:30:14 +02:00
Senad Uka
b30b0f45a6 trying different approach 2020-09-10 18:23:01 +02:00
Senad Uka
9c1a029ff1 Debugging ba 2020-09-10 18:17:16 +02:00
Senad Uka
9b49759485 Pause is not enough 2020-09-10 18:13:47 +02:00
Senad Uka
0c2f8d11ee Add random timeout up to 500ms 2020-09-10 18:10:42 +02:00
Senad Uka
b27d9d3499 Add debugging info 2020-09-10 18:00:13 +02:00
Senad Uka
dd3f30ef0e Comment agent header 2020-09-10 17:57:21 +02:00
Senad Uka
78c6056db4 Change for scraperapi 2020-09-10 17:37:19 +02:00
Bilal Catic
ecf27f2ba1 Merge branch 'improve-olx-scrapper' into 'master'
Improve olx scrapper

See merge request saburly/marketalarm/web!113
2020-06-16 20:37:37 +00:00
Bilal
1229b3fa6c Add more debug logs 2020-06-16 19:51:13 +02:00
Edin
542ff56123 Merge branch 'improve-wom' into 'master'
Improve styling of WOM message

See merge request saburly/marketalarm/web!112
2020-06-05 16:24:31 +00:00
=
0aa851015b Improve styling of WOM message 2020-06-05 09:07:54 -07:00
Senad Uka
c033b2e47c Merge branch 'word-of-mouth-email' into 'master'
Add word of mouth requests

See merge request saburly/marketalarm/web!111
2020-06-05 14:55:44 +00:00
Senad Uka
0895654db2 Update emailContentGenerator.js 2020-06-05 14:55:00 +00:00
Senad Uka
8925eb9f4e Update emailContentGenerator.js 2020-06-05 14:54:22 +00:00
=
52201af3ba Add word of mouth requests 2020-06-05 07:52:52 -07:00
Senad Uka
1505c07363 Merge branch 'improve-crawler' into 'master'
Improve crawler

See merge request saburly/marketalarm/web!110
2020-05-18 09:30:24 +00:00
Bilal
159fedbc2d handle failed page fetch 2020-05-18 03:53:08 +02:00
Bilal
65068932ad handle failed page fetch; detect discounted price 2020-05-18 03:43:49 +02:00
Senad Uka
820227827e Merge branch 'fix-crawler-errors' into 'master'
Fix crawler errors

See merge request saburly/marketalarm/web!109
2020-05-14 21:34:17 +00:00
Bilal
d35a113baa Fix saljic crawler 2020-05-14 19:01:19 +02:00
Bilal
ba60f8749d Fix Prostor crawler - use new JSON location in page body 2020-05-14 15:38:15 +02:00
Senad Uka
f1d45fed26 Merge branch 'move-scraper-url-to-the-env' into 'master'
Move scraper api url to the ENV

See merge request saburly/marketalarm/web!108
2020-05-12 11:55:12 +00:00
Bilal
ff923605ad Move scraper api base url to the ENV; send URL as base64 string 2020-05-12 13:44:09 +02:00
Naida Vatric
692577fb8c Merge branch 'after-scraper-fix' into 'master'
After scraper fix

See merge request saburly/marketalarm/web!104
2020-02-28 16:14:43 +00:00
Naida Vatric
2a13ab55ed Merge branch 'after-scraper-fix' into 'master'
Olx price parsing changed.

See merge request saburly/marketalarm/web!103
2020-02-27 22:38:00 +00:00
Naida Vatric
39f9383ae2 Merge branch 'after-scraper-fix' into 'master'
Commented checkup out and Prostor fetch changed.

See merge request saburly/marketalarm/web!102
2020-02-27 20:31:14 +00:00
17 changed files with 365 additions and 430 deletions

View File

@@ -45,10 +45,10 @@ const USER_AGENT =
process.env.USER_AGENT || process.env.USER_AGENT ||
"Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3325.181 Safari/537.36"; "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3325.181 Safari/537.36";
const USE_SCRAPER_API = process.env.USE_SCRAPER_API || 1; //Default to use const USE_SCRAPER_API = process.env.USE_SCRAPER_API === undefined ? 1 : parseInt(process.env.USE_SCRAPER_API);
const SCRAPER_API_KEY = process.env.SCRAPER_API_KEY || ""; const SCRAPER_API_KEY = process.env.SCRAPER_API_KEY || "";
const NUMBER_OF_CONCURRENT_REQ_SCRAPER_API = const SCRAPER_API_BASE_URL = process.env.SCRAPER_API_BASE_URL || "";
parseInt(process.env.NUMBER_OF_CONCURRENT_REQ_SCRAPER_API) || 10; const NODE_FETCH_TIMEOUT_MS = parseInt(process.env.NODE_FETCH_TIMEOUT_MS) || 60000
module.exports = { module.exports = {
APP_PORT, APP_PORT,
@@ -67,5 +67,6 @@ module.exports = {
USER_AGENT, USER_AGENT,
USE_SCRAPER_API, USE_SCRAPER_API,
SCRAPER_API_KEY, SCRAPER_API_KEY,
NUMBER_OF_CONCURRENT_REQ_SCRAPER_API SCRAPER_API_BASE_URL,
NODE_FETCH_TIMEOUT_MS
}; };

View File

@@ -10,6 +10,7 @@ const RentalCrawler = require("./specificCrawlers/rental");
const ProstorCrawler = require("./specificCrawlers/prostor"); const ProstorCrawler = require("./specificCrawlers/prostor");
const AktidoCrawler = require("./specificCrawlers/aktido"); const AktidoCrawler = require("./specificCrawlers/aktido");
const SaljicCrawler = require("./specificCrawlers/saljic"); const SaljicCrawler = require("./specificCrawlers/saljic");
const { logDebug } = require("../helpers/log");
const { const {
OLX_CONFIG, OLX_CONFIG,
@@ -75,7 +76,9 @@ async function crawlAll() {
for (const crawler of crawlers) { for (const crawler of crawlers) {
try { try {
logDebug('Starting crawler: ', crawler);
const newRealEstatesFromSingleCrawler = await crawler.crawl(); const newRealEstatesFromSingleCrawler = await crawler.crawl();
logDebug('Crawler done: ', crawler);
if (Array.isArray(newRealEstatesFromSingleCrawler)) { if (Array.isArray(newRealEstatesFromSingleCrawler)) {
newRealEstates.push(...newRealEstatesFromSingleCrawler); newRealEstates.push(...newRealEstatesFromSingleCrawler);
} }

View File

@@ -159,7 +159,7 @@ class AktidoCrawler {
} }
try { try {
const res = await fetch(url); const res = await fetch(url, {}, false);
const body = await res.text(); const body = await res.text();
const $ = cheerio.load(body); const $ = cheerio.load(body);
let hrefs = []; let hrefs = [];
@@ -202,6 +202,10 @@ class AktidoCrawler {
const body = await adPageSource.text(); const body = await adPageSource.text();
const $ = cheerio.load(body); const $ = cheerio.load(body);
if (body.indexOf('<html') === -1) {
throw { message: 'Failed to fetch page !' }
}
const mapElementParent = $(".box-map").parent(); const mapElementParent = $(".box-map").parent();
const scriptElement = $("script", mapElementParent); const scriptElement = $("script", mapElementParent);
if ( if (

View File

@@ -1,6 +1,7 @@
"use strict"; "use strict";
const fetch = require("../../helpers/fetchWrapper"); const fetch = require("../../helpers/fetchWrapper");
const { logDebug } = require("../../helpers/log");
const cheerio = require("cheerio"); const cheerio = require("cheerio");
const Promise = require("bluebird"); const Promise = require("bluebird");
const moment = require("moment-timezone"); const moment = require("moment-timezone");
@@ -18,9 +19,7 @@ const {
const { const {
DEFAULT_TIMEZONE, DEFAULT_TIMEZONE,
PRINT_CRAWLER_DEBUG, PRINT_CRAWLER_DEBUG
NUMBER_OF_CONCURRENT_REQ_SCRAPER_API,
SCRAPER_API_KEY
} = require("../../config/appConfig"); } = require("../../config/appConfig");
const OLX_ENUMS = { const OLX_ENUMS = {
@@ -46,7 +45,15 @@ const OLX_ENUMS = {
const { OLX_FORCE_CRAWL } = require("../specificConfigs/olx"); const { OLX_FORCE_CRAWL } = require("../specificConfigs/olx");
const scraperapiClient = require("scraperapi-sdk")(SCRAPER_API_KEY); const chunk = (array, size = 10) => {
let i, j ,temparray;
const result = []
for (i=0,j=array.length; i<j; i+=size) {
temparray = array.slice(i,i+size);
result.push(temparray);
}
return result;
}
class OlxCrawler { class OlxCrawler {
constructor( constructor(
@@ -56,7 +63,7 @@ class OlxCrawler {
maxPages = 1000, maxPages = 1000,
maxResultsPerPage = 100, maxResultsPerPage = 100,
ignoredUsernames = [], ignoredUsernames = [],
delayBetweenPages = 1000 delayBetweenPages = 500
) { ) {
this.savers = savers; this.savers = savers;
this.baseUrl = "https://www.olx.ba/pretraga?sort_order=desc&sort_po=datum"; this.baseUrl = "https://www.olx.ba/pretraga?sort_order=desc&sort_po=datum";
@@ -69,6 +76,7 @@ class OlxCrawler {
} }
async crawl() { async crawl() {
logDebug("Starting OLX crawl");
const crawlAdCategories = this.crawlerAdCategories; const crawlAdCategories = this.crawlerAdCategories;
const newRealEstates = []; const newRealEstates = [];
@@ -92,14 +100,32 @@ class OlxCrawler {
const entries = singlePageResults.entries(); const entries = singlePageResults.entries();
for (const [index, { value: singlePageResult }] of entries) { for (const [index, { value: singlePageResult }] of entries) {
if (PRINT_CRAWLER_DEBUG) {
console.log("================================");
console.log("Category Indexer index : ", index);
}
if (singlePageResult) { if (singlePageResult) {
console.log("\tTotal entries : ", singlePageResult.length)
const saveResults = await this.saveCrawledResults(singlePageResult); const saveResults = await this.saveCrawledResults(singlePageResult);
const { newRecords, existingRecords } = saveResults; const { newRecords, existingRecords } = saveResults;
if (PRINT_CRAWLER_DEBUG) {
console.log("--------------------------");
console.log("\tNew record URLs [", newRecords.length, "] :");
for(const newRecord of newRecords) {
console.log("\t\t",newRecord.url);
}
console.log("\t-------------------------");
console.log("\tExisting record URLs [", existingRecords.length, "] :");
}
newRealEstates.push(...newRecords); newRealEstates.push(...newRecords);
for (const existingRecord of existingRecords) { for (const existingRecord of existingRecords) {
const { publishedDate, renewedDate } = existingRecord; const { publishedDate, renewedDate, url } = existingRecord;
const publishedDateMoment = moment.utc(publishedDate); const publishedDateMoment = moment.utc(publishedDate);
const renewedDateMoment = moment.utc(renewedDate); const renewedDateMoment = moment.utc(renewedDate);
@@ -109,13 +135,25 @@ class OlxCrawler {
"minute" "minute"
); );
if (PRINT_CRAWLER_DEBUG) {
console.log("\t\t", url);
console.log("\t\t\tPublished date : ", publishedDate);
console.log("\t\t\tRenewed date : ", renewedDate);
console.log("\t\t\tIs same (up to minute) : ", stopCrawlingThisCategory);
}
if (stopCrawlingThisCategory && !OLX_FORCE_CRAWL) { if (stopCrawlingThisCategory && !OLX_FORCE_CRAWL) {
generatorsToRemove[index] = true; generatorsToRemove[index] = true;
// console.log("\tGenerator ", index + 1, "has no more new ads"); if (PRINT_CRAWLER_DEBUG) {
console.log("\t\t\tStopping this category indexer");
}
break; break;
} }
} }
} else { } else {
if (PRINT_CRAWLER_DEBUG) {
console.log("\tNo more entries in this category, stopping!");
}
//Generator returned undefined, remove this generator from array //Generator returned undefined, remove this generator from array
generatorsToRemove[index] = true; generatorsToRemove[index] = true;
// console.log("Generator ", index + 1, "has no more pages"); // console.log("Generator ", index + 1, "has no more pages");
@@ -140,31 +178,36 @@ class OlxCrawler {
} }
async *categoryIndexer(adCategory) { async *categoryIndexer(adCategory) {
let pageToIndex = 1; try {
let pageToIndex = 1;
const urlAdTypePart = OLX_ENUMS.OLX_AD_TYPE[this.crawlerAdTypes]; const urlAdTypePart = OLX_ENUMS.OLX_AD_TYPE[this.crawlerAdTypes];
const urlCategoryPart = OLX_ENUMS.OLX_AD_CATEGORY[adCategory]; const urlCategoryPart = OLX_ENUMS.OLX_AD_CATEGORY[adCategory];
if (urlAdTypePart !== undefined && urlCategoryPart !== undefined) { if (urlAdTypePart !== undefined && urlCategoryPart !== undefined) {
while (true) { while (true) {
const urlPageToCrawl = `${this.baseUrl}${urlAdTypePart}${urlCategoryPart}&stranica=${pageToIndex}`; const urlPageToCrawl = `${this.baseUrl}${urlAdTypePart}${urlCategoryPart}&stranica=${pageToIndex}`;
const singlePageResults = await this.indexSinglePage( const singlePageResults = await this.indexSinglePage(
urlPageToCrawl, urlPageToCrawl,
this.maxResultsPerPage this.maxResultsPerPage
); );
if (Array.isArray(singlePageResults) && singlePageResults.length > 0) { await this.sleep(this.delayBetweenPages);
yield singlePageResults; if (Array.isArray(singlePageResults) && singlePageResults.length > 0) {
} else { yield singlePageResults;
return undefined; } else {
} return undefined;
}
++pageToIndex;
if (pageToIndex === this.maxPages) { ++pageToIndex;
return undefined; if (pageToIndex === this.maxPages) {
return undefined;
}
} }
} else {
return undefined;
} }
} else { } catch (e) {
return undefined; console.log('Error inside generator: ', e);
} }
} }
@@ -174,8 +217,10 @@ class OlxCrawler {
} }
try { try {
const res = await fetch(url); const res = await fetch(url, {}, false);
logDebug("Got category results for: ", url);
const body = await res.text(); const body = await res.text();
logDebug("Got category results text for: ", url);
const $ = cheerio.load(body); const $ = cheerio.load(body);
let hrefs = []; let hrefs = [];
@@ -194,57 +239,46 @@ class OlxCrawler {
let actualNoOfResults = let actualNoOfResults =
hrefs.length <= maxResultsPerPage ? hrefs.length : maxResultsPerPage; hrefs.length <= maxResultsPerPage ? hrefs.length : maxResultsPerPage;
const scrapedData = []; const asyncScraping = [];
for ( for (let i = 0; i < actualNoOfResults; i++) {
let i = 0; asyncScraping.push(hrefs[i]);
i <= actualNoOfResults;
i = i + NUMBER_OF_CONCURRENT_REQ_SCRAPER_API
) {
const concurrentUrlsToScrape = hrefs.slice(
i,
i + NUMBER_OF_CONCURRENT_REQ_SCRAPER_API
);
//Before it send n req to scraperAPI it send preflight request to check if we have enough concurrent req availabe
//It does not send "real" req until approven internaly
let availableConcurrentReqSlots = false;
do {
availableConcurrentReqSlots = await this.checkAvailableConcurrentReqSlots(
concurrentUrlsToScrape.length
);
} while (availableConcurrentReqSlots !== true);
//
console.log(
`OLX - Sending requests from ${i} to ${i +
NUMBER_OF_CONCURRENT_REQ_SCRAPER_API}.`
);
console.log(`OLX - Urls sent to scrape: `, concurrentUrlsToScrape);
//
const concurrentReqScraperApi = concurrentUrlsToScrape.map(url =>
this.scrapeAd(url)
);
const concurrentReqData = await Promise.all(concurrentReqScraperApi);
concurrentReqData.forEach(reqData => scrapedData.push(reqData));
} }
const filteredScrapedData = scrapedData.filter(adData => !!adData); const allChunks = chunk(asyncScraping, 2);
const dataResults = []
for (let i = 0; i < allChunks.length; i++) {
const singleChunk = allChunks[i];
const promises = singleChunk.map(c => this.scrapeAd(c))
const chunkResults = await Promise.all(promises);
await this.sleep(this.delayBetweenPages);
dataResults.push(...chunkResults);
logDebug("Chunk results len:", chunkResults.length);
}
const filteredScrapedData = dataResults.filter(adData => !!adData);
logDebug("Filtered scraped data length: ", filteredScrapedData.length);
return filteredScrapedData; return filteredScrapedData;
} catch (e) { } catch (e) {
console.error("Exception caught:" + e); console.error("Exception caught, index single page: " + e);
return []; return [];
} }
} }
async scrapeAd(url) { async scrapeAd(url) {
// console.log("Scraping : ", url); logDebug("Scraping : ", url);
try { try {
const adPageSource = await fetch(url); const adPageSource = await fetch(url);
const body = await adPageSource.text(); const body = await adPageSource.text();
const $ = cheerio.load(body); const $ = cheerio.load(body);
let status = AD_STATUS.STATUS_NORMAL; let status = AD_STATUS.STATUS_NORMAL;
if (body.indexOf('<html') === -1) {
console.error("This is the body: ", body);
throw { message: 'Failed to fetch page !' }
}
const propertySelectors = { const propertySelectors = {
username: username:
"#lg > div.desno2.profil > div:nth-child(2) > div.vrsta1.vrsta_desno > a > div.username > span", "#lg > div.desno2.profil > div:nth-child(2) > div.vrsta1.vrsta_desno > a > div.username > span",
@@ -271,65 +305,26 @@ class OlxCrawler {
//====== PRICE DETECTION AND EXTRACTION ===== //====== PRICE DETECTION AND EXTRACTION =====
let price = null; let price = null;
let normalPrice = null;
let urgentPrice = null;
const normalPriceValue = $("#pc > p:nth-child(2)")
.text()
.trim();
const urgentPriceValue = $(
"#artikal_glavni_div > div.artikal_lijevo > div:nth-child(5) > p"
)
.text()
.trim();
//For cases where price is given in discount manner - different from default parsing
const discountPriceValue = $(
"#artikal_glavni_div > div.artikal_lijevo > div.op.pop > p"
)
.text()
.trim();
if (normalPriceValue && normalPriceValue.length > 0) { const priceHeader = $("#pc > p.n").text().trim();
normalPrice = normalPriceValue const priceValue = $("#pc > p:nth-child(2)").text().trim();
.replace(/\r\n|\n|\r/gm, "") price = priceValue;
.replace("KM", "")
.trim(); if (priceHeader.indexOf('Hitn') !== -1) {
if ( // Urgent price
$("#pc > p.n")
.text()
.indexOf("Hitna") !== -1
) {
status = AD_STATUS.STATUS_URGENT;
} else {
status = AD_STATUS.STATUS_NORMAL;
}
} else if (discountPriceValue && discountPriceValue.length > 0) {
status = AD_STATUS.STATUS_URGENT; status = AD_STATUS.STATUS_URGENT;
const priceValues = discountPriceValue.split("KM");
normalPrice = priceValues[0].trim();
} else {
console.log("Body:", body);
throw { message: "Can't find normal price" };
}
if (urgentPriceValue && urgentPriceValue.length > 0) {
const priceValues = urgentPriceValue.replace("Cijena", "").split("KM");
//priceValues will contain values like ["100000", "90000", ...], second element is urgent price
if (priceValues.length > 0) {
if (priceValues[0].trim().indexOf("Hitno") != -1) {
urgentPrice = priceValues[0].replace("Hitno", "").trim();
status = AD_STATUS.STATUS_URGENT;
} else {
urgentPrice = priceValues[0].trim();
}
} else if (discountPriceValue && discountPriceValue.length > 0) {
status = AD_STATUS.STATUS_URGENT;
const priceValues = discountPriceValue.split("KM");
urgentPrice = priceValues[1].trim();
} else {
throw { message: "Can't find urgent price" };
}
} }
price = status === AD_STATUS.STATUS_URGENT ? urgentPrice : normalPrice; const discountPriceTag = $("#artikal_glavni_div > div.artikal_lijevo > p:nth-child(4)").text().trim();
if (discountPriceTag.indexOf('Akcij') !== -1) {
status = AD_STATUS.STATUS_DISCOUNTED;
const discountPriceValues = $("#artikal_glavni_div > div.artikal_lijevo > div:nth-child(5) > p").text().trim();
// discountPriceValues contain string like "10.000 KM 7.500 KM"
// First price is regular, second is currently active (discounted) price
const bothPrices = discountPriceValues.split('KM');
// Now, currently active price is second element of bothPrices array
price = bothPrices[1] ? bothPrices[1].trim() : null;
}
//====== OTHER AD INFORMATION =============== //====== OTHER AD INFORMATION ===============
let adType = null; let adType = null;
@@ -710,12 +705,13 @@ class OlxCrawler {
distanceToRiver, distanceToRiver,
numberOfViewsAgency numberOfViewsAgency
}; };
//
//console.log("Scraped data:", data);
return data; return data;
} catch (e) { } catch (e) {
console.error("Exception caught: " + e.message, "\r\nURL:", url); console.error("Exception caught scrapeAd : " + e.message, "\r\nURL:", url);
} }
return null; return null;
} }
@@ -931,28 +927,8 @@ class OlxCrawler {
console.log("sprat = NEPOZNATO [", floorText, "]"); console.log("sprat = NEPOZNATO [", floorText, "]");
return null; return null;
} }
async checkAvailableConcurrentReqSlots(numberOfNeededConcurrentReq) {
try {
const scraperApiAccountInfo = await scraperapiClient.account();
const numberOfUsedConcurrentReq =
scraperApiAccountInfo.concurrentRequests;
const limitOfConcurrentReq = scraperApiAccountInfo.concurrencyLimit;
//Buffer of requests to prevent errors with prefligh requests
const bufferNumberOfReq = 3;
const numberOfAvailableConcurrentReq =
limitOfConcurrentReq - bufferNumberOfReq - numberOfUsedConcurrentReq;
if (numberOfNeededConcurrentReq <= numberOfAvailableConcurrentReq) {
return true;
} else {
return false;
}
} catch (err) {
return false;
}
}
async sleep(ms) { async sleep(ms) {
// console.log("Sleep for:", ms);
return new Promise(resolve => setTimeout(resolve, ms)); return new Promise(resolve => setTimeout(resolve, ms));
} }

View File

@@ -63,13 +63,19 @@ class ProstorCrawler {
async crawl() { async crawl() {
const crawlAdCategories = this.crawlerAdCategories; const crawlAdCategories = this.crawlerAdCategories;
const crawlAdTypes = this.crawlerAdTypes;
if (!crawlAdCategories || !crawlAdTypes) {
return []
}
const newRealEstates = [];
//We need session cookie to use login privileges //We need session cookie to use login privileges
const prostorCookie = await this.getCookies(); const prostorCookie = await this.getCookies();
//New tag to check if crawler loged in //New tag to check if crawler logged in
const login = await this.loginForScraping(PROSTOR_LOGIN, prostorCookie); const login = await this.loginForScraping(PROSTOR_LOGIN, prostorCookie);
const newRealEstates = [];
//Crawl only if login was successful //Crawl only if login was successful
if (crawlAdCategories && login) { if (login) {
const indexGenerators = []; const indexGenerators = [];
for (const adCategory of crawlAdCategories) { for (const adCategory of crawlAdCategories) {
indexGenerators.push(this.categoryIndexer(adCategory, prostorCookie)); indexGenerators.push(this.categoryIndexer(adCategory, prostorCookie));
@@ -135,6 +141,11 @@ class ProstorCrawler {
prostorCookie prostorCookie
); );
if (!Array.isArray(listOfAllRealEstates)){
console.log('[PROSTOR] Could not find real estate JSON data, check selector !');
return undefined;
}
let elementToStartIndexFrom = 0; let elementToStartIndexFrom = 0;
while (true) { while (true) {
const realEstatesForSinglePage = listOfAllRealEstates.slice( const realEstatesForSinglePage = listOfAllRealEstates.slice(
@@ -204,6 +215,10 @@ class ProstorCrawler {
const body = await adPageSource.text(); const body = await adPageSource.text();
const $ = cheerio.load(body); const $ = cheerio.load(body);
if (body.indexOf('<html') === -1) {
throw { message: 'Failed to fetch page !' }
}
// link contains part of the URL in the format of : /prodaja/stan/stup/9556 // link contains part of the URL in the format of : /prodaja/stan/stup/9556
// general form is : /actionType/realEstateType/location/realEstateID // general form is : /actionType/realEstateType/location/realEstateID
// linkParts contains : ['', 'actionType', 'realEstateType', 'location', 'realEstateID'] // linkParts contains : ['', 'actionType', 'realEstateType', 'location', 'realEstateID']
@@ -435,7 +450,7 @@ class ProstorCrawler {
const $ = cheerio.load(body); const $ = cheerio.load(body);
const scriptElement = $( const scriptElement = $(
"body > div > div.container-fluid > script:nth-child(7)" "body > div.content > div.container-fluid > script:nth-child(6)"
); );
if ( if (

View File

@@ -159,7 +159,7 @@ class RentalCrawler {
} }
try { try {
const res = await fetch(url); const res = await fetch(url, {} , false);
const body = await res.text(); const body = await res.text();
const $ = cheerio.load(body); const $ = cheerio.load(body);
let hrefs = []; let hrefs = [];
@@ -202,6 +202,10 @@ class RentalCrawler {
const body = await adPageSource.text(); const body = await adPageSource.text();
const $ = cheerio.load(body); const $ = cheerio.load(body);
if (body.indexOf('<html') === -1) {
throw { message: 'Failed to fetch page !' }
}
const mapElementParent = $(".box-map").parent(); const mapElementParent = $(".box-map").parent();
const scriptElement = $("script", mapElementParent); const scriptElement = $("script", mapElementParent);
if ( if (

View File

@@ -1,8 +1,10 @@
"use strict"; "use strict";
const fetch = require("../../helpers/fetchWrapper"); const fetch = require("../../helpers/fetchWrapper");
const { getUrlParams } = require("../../helpers/url");
const cheerio = require("cheerio"); const cheerio = require("cheerio");
const moment = require("moment-timezone"); const moment = require("moment-timezone");
const PromisePool = require('@supercharge/promise-pool');
const { const {
AD_TYPE, AD_TYPE,
@@ -16,8 +18,7 @@ const {
const { const {
PRINT_CRAWLER_DEBUG, PRINT_CRAWLER_DEBUG,
DEFAULT_TIMEZONE, DEFAULT_TIMEZONE
NUMBER_OF_CONCURRENT_REQ_SCRAPER_API
} = require("../../config/appConfig"); } = require("../../config/appConfig");
const { SALJIC_FORCE_CRAWL } = require("../specificConfigs/saljic"); const { SALJIC_FORCE_CRAWL } = require("../specificConfigs/saljic");
@@ -47,12 +48,13 @@ class SaljicCrawler {
maxPages = 5000, maxPages = 5000,
maxResultsPerPage = 5000, maxResultsPerPage = 5000,
ignoredUsernames = [], ignoredUsernames = [],
delayBetweenPages = 1000 delayBetweenPages = 500
) { ) {
this.savers = savers; this.savers = savers;
this.baseUrl = "https://www.saljicnekretnine.ba/v2/nekretnine_search"; this.baseUrl = "https://www.saljicnekretnine.ba/v2/nekretnine_search";
this.crawlerAdTypes = crawlerAdTypes; this.crawlerAdTypes = crawlerAdTypes;
this.crawlerAdCategories = crawlerAdCategories; this.crawlerAdCategories = crawlerAdCategories;
this.maxPages = maxPages
this.maxResultsPerPage = maxResultsPerPage; this.maxResultsPerPage = maxResultsPerPage;
this.delayBetweenPages = delayBetweenPages; this.delayBetweenPages = delayBetweenPages;
} }
@@ -85,7 +87,6 @@ class SaljicCrawler {
for (const [index, { value: singlePageResult }] of entries) { for (const [index, { value: singlePageResult }] of entries) {
if (singlePageResult) { if (singlePageResult) {
const saveResults = await this.saveCrawledResults(singlePageResult); const saveResults = await this.saveCrawledResults(singlePageResult);
const { newRecords } = saveResults; const { newRecords } = saveResults;
newRealEstates.push(...newRecords); newRealEstates.push(...newRecords);
@@ -159,7 +160,7 @@ class SaljicCrawler {
} }
try { try {
const res = await fetch(url); const res = await fetch(url, {}, false);
const body = await res.text(); const body = await res.text();
const $ = cheerio.load(body); const $ = cheerio.load(body);
let hrefs = []; let hrefs = [];
@@ -205,32 +206,25 @@ class SaljicCrawler {
? hrefsAbs.length ? hrefsAbs.length
: maxResultsPerPage; : maxResultsPerPage;
const scrapedData = []; const asyncScraping = [];
for ( for (let i = 0; i < actualNoOfResults; i++) {
let i = 0; asyncScraping.push([hrefsAbs[i], adTypes[i]]);
i <= actualNoOfResults;
i = i + NUMBER_OF_CONCURRENT_REQ_SCRAPER_API
) {
const concurrentUrlsToScrape = hrefsAbs.slice(
i,
i + NUMBER_OF_CONCURRENT_REQ_SCRAPER_API
);
const concurrentAdTypesOfReq = adTypes.slice(
i,
i + NUMBER_OF_CONCURRENT_REQ_SCRAPER_API
);
const concurrentReqScraperApi = concurrentUrlsToScrape.map(
(url, index) => this.scrapeAd(url, concurrentAdTypesOfReq[index])
);
const concurrentReqData = await Promise.all(concurrentReqScraperApi);
concurrentReqData.forEach(reqData => scrapedData.push(reqData));
} }
const filteredScrapedData = scrapedData.filter(adData => !!adData);
const dataResults = []
const { scrapedData, errors } = await PromisePool
.withConcurrency(2)
.for(asyncScraping)
.process(async data => {
const result = await this.scrapeAd(...data)
await this.sleep(this.delayBetweenPages);
dataResults.push(result)
return result; //TODO: this does not work, scrapedData is null, dataResults works
})
const filteredScrapedData = dataResults.filter(adData => !!adData);
return filteredScrapedData; return filteredScrapedData;
} catch (e) { } catch (e) {
console.error("[SALJIC] Exception caught:" + e); console.error("[SALJIC] Exception caught:" + e);
@@ -238,17 +232,17 @@ class SaljicCrawler {
} }
} }
async scrapeAd(url, adTypeAttribute) { async scrapeAd(url, adType) {
//console.log("[SALJIC] Scraping : ", url); // console.log("[SALJIC] Scraping : ", url);
try { try {
const adPageSource = await fetch(url); const adPageSource = await fetch(url);
const body = await adPageSource.text(); const body = await adPageSource.text();
const $ = cheerio.load(body); const $ = cheerio.load(body);
//Throws error if req to Scraper API proxy wasn't succesful and responds with error if (body.indexOf('<html') === -1) {
if (body.indexOf("<html>") === -1) { throw { message: 'Failed to fetch page !' }
throw { message: "Scraper API server error." };
} }
// No information for status ex. PRODAN // No information for status ex. PRODAN
const status = AD_STATUS.STATUS_NORMAL; const status = AD_STATUS.STATUS_NORMAL;
//Extracting agency ID from url //Extracting agency ID from url
@@ -256,21 +250,23 @@ class SaljicCrawler {
? parseInt(url.substring(46, url.length)) ? parseInt(url.substring(46, url.length))
: null; : null;
if (!agencyObjectId) {
throw { message : 'No agency object ID - URL changed?'}
}
//Extracting main properties //Extracting main properties
const propertySelectors = { const propertySelectors = {
title: title:
"div.content-wrap > div.container.clearfix.wpc > div.col-md-8.nobottommargin > div.single-post.nobottommargin > div.entry.clearfix > div.entry-title > h2", "div.content-wrap > div.container > div.col-md-8.nobottommargin > div.single-post > div.entry > div.entry-title > h2",
price: price:
"div.content-wrap > div.container > div.col-md-8.nobottommargin > div.single-post > div.entry > div.topmargin-sm.single-product > div.product > div.product-price > ins", "div.content-wrap > div.container > div.col-md-8.nobottommargin > div.single-post > div.entry > div.topmargin-sm.single-product > div.product > div.product-price > ins",
streetName: streetName:
"div.content-wrap > div.container > div.col-md-8.nobottommargin > div.single-post > div.entry > div.entry-content.topmargin > p", "div.content-wrap > div.container > div.col-md-8.nobottommargin > div.single-post > div.entry > div.entry-content.topmargin > p",
descriptions: descriptions:
"div.content-wrap > div.container > div.col-md-8.nobottommargin > div.single-post > div.entry > div.entry-content.topmargin > div.toggle.toggle-bg > div.togglec >p:nth-child(1)", "div.content-wrap > div.container > div.col-md-8.nobottommargin > div.single-post > div.entry > div.entry-content.topmargin > div.toggle.toggle-bg > div.togglec >p:nth-child(1)",
latAndLong: latAndLong:
"div.content-wrap > div.container > div.col-md-8.nobottommargin > div.single-post > div.entry > div.entry-content.topmargin > div.gmap.bottommargin > iframe" "iframe"
}; };
const title = $(propertySelectors.title) const title = $(propertySelectors.title)
.text() .text()
.replace(/(\r\n|\n|\r)/gm, "") .replace(/(\r\n|\n|\r)/gm, "")
@@ -300,15 +296,26 @@ class SaljicCrawler {
.trim(); .trim();
const latAndLongSrc = $(propertySelectors.latAndLong).attr("src"); const latAndLongSrc = $(propertySelectors.latAndLong).attr("src");
let tmpLatLong;
let latText; let latText;
let longText; let longText;
if (latAndLongSrc){
if (latAndLongSrc && latAndLongSrc.indexOf("openstreetmap") !== -1) { const mapParams = getUrlParams(latAndLongSrc);
tmpLatLong = latAndLongSrc.split("marker=")[1]; if (mapParams) {
latText = tmpLatLong.split("%2C")[0]; if (mapParams['marker']){
longText = tmpLatLong.split("%2C")[1]; const marker = mapParams['marker'].split(',');
latText = marker[0] ? marker[0] : undefined;
longText = marker[1] ? marker[1] : undefined;
}else{
if (mapParams['mlat']) {
latText = mapParams['mlat'];
}
if (mapParams['mlon']) {
longText = mapParams['mlon'];
}
}
}
} }
const locationLat = parseFloat(latText) || null; const locationLat = parseFloat(latText) || null;
const locationLong = parseFloat(longText) || null; const locationLong = parseFloat(longText) || null;
@@ -357,7 +364,6 @@ class SaljicCrawler {
let numberOfViewsKivi = null; let numberOfViewsKivi = null;
let streetNumber = 0; let streetNumber = 0;
let adStatus = status; let adStatus = status;
let adType = adTypeAttribute;
let shortDescription = descriptions let shortDescription = descriptions
? descriptions.substring(0, descriptions.indexOf(".")) ? descriptions.substring(0, descriptions.indexOf("."))
: ""; : "";
@@ -396,7 +402,7 @@ class SaljicCrawler {
numberOfRooms = parseInt(mainFieldValue); numberOfRooms = parseInt(mainFieldValue);
break; break;
case "Broj spratova": case "Broj spratova":
numberOfFloors = parseInt(mainFieldValue); numberOfFloors = this.parseNumberOfFloors(mainFieldValue);
break; break;
case "Sprat": case "Sprat":
floor = parseInt(mainFieldValue); floor = parseInt(mainFieldValue);
@@ -441,8 +447,10 @@ class SaljicCrawler {
additionalField.length additionalField.length
) )
.trim(); .trim();
realEstateType = this.getAdCategoryId(categoryTmp); realEstateType = this.getAdCategoryId(categoryTmp);
if (!realEstateType) {
throw { message: 'No real estate type - page body not loaded correctly or page changed?' }
}
} else { } else {
switch (additionalField) { switch (additionalField) {
case "Internet": case "Internet":
@@ -532,11 +540,6 @@ class SaljicCrawler {
const region = ""; const region = "";
const entity = ""; const entity = "";
const country = ""; const country = "";
//Throws error if realEstateType is null - not read. Still dont know why?
if (realEstateType === null) {
console.log("Body:", body);
throw { message: "Couldn't read real estate type." };
}
const data = { const data = {
url, url,
@@ -604,9 +607,8 @@ class SaljicCrawler {
return data; return data;
} catch (e) { } catch (e) {
console.error("Exception caught: " + e.message, "\r\nURL:", url); console.error("[SALJIC] Exception caught: " + e.message, "\r\nURL:", url);
} }
return null; return null;
} }
@@ -650,6 +652,21 @@ class SaljicCrawler {
} }
} }
parseNumberOfFloors(numberOfFloorsText) {
const tryNumericalValue = parseInt(numberOfFloorsText);
if (!isNaN(tryNumericalValue)){
return tryNumericalValue;
}
// Guess number of floors based on number of + sign concatenations
// e.g. P+S+Pt -> 3 floors
if (typeof numberOfFloorsText === 'string' && numberOfFloorsText.indexOf('+') > 0) {
return numberOfFloorsText.split('+').length + 1
}
return null
}
async sleep(ms) { async sleep(ms) {
return new Promise(resolve => setTimeout(resolve, ms)); return new Promise(resolve => setTimeout(resolve, ms));
} }

View File

@@ -9,6 +9,7 @@ const { AD_CATEGORY, AD_TYPE, EMAIL_FREQUENCY } = require("../common/enums");
//Tag to recognize staging from development //Tag to recognize staging from development
const stagingTag = STAGING ? "[STAGING] " : ""; const stagingTag = STAGING ? "[STAGING] " : "";
const wordOfMouthRequest = `Molimo vas <strong>recite svojim prijateljima</strong> za Kivi - što više korisnika budemo imali, moći ćemo više agencija uključiti i više nekretnina imati u bazi. Hvala!`
const generateEmailFooter = (searchRequestId, emailFrequencyTitle) => { const generateEmailFooter = (searchRequestId, emailFrequencyTitle) => {
return ` <div>Trenutno ste prijavljeni da obavještenja o novim nekretninama primate <strong>${emailFrequencyTitle.toLowerCase()} </strong>.</div> return ` <div>Trenutno ste prijavljeni da obavještenja o novim nekretninama primate <strong>${emailFrequencyTitle.toLowerCase()} </strong>.</div>
@@ -69,6 +70,9 @@ const generateNotificationEmail = (
${moreRealEstates} ${moreRealEstates}
</div> </div>
<br/> <br/>
${wordOfMouthRequest}
<br/>
<br/>
${emailFooter}`; ${emailFooter}`;
}; };
@@ -132,6 +136,10 @@ const generateNewSearchRequestEmail = (searchRequest, matchingRealEstates) => {
</div> </div>
${matchingRealEstates.length > 0 ? instantRealEstatesText : ""} ${matchingRealEstates.length > 0 ? instantRealEstatesText : ""}
<br/> <br/>
<br/>
${wordOfMouthRequest}
<br/>
<br/>
${emailFooter}`; ${emailFooter}`;
}; };

View File

@@ -1,24 +1,58 @@
const nodeFetch = require("node-fetch"); const nodeFetch = require("node-fetch");
const AbortController = require('abort-controller');
const FetchCache = require('@sozialhelden/fetch-cache').default;
console.log("Fc ", FetchCache)
const { const {
USER_AGENT, USER_AGENT,
USE_SCRAPER_API, USE_SCRAPER_API,
SCRAPER_API_KEY SCRAPER_API_KEY,
SCRAPER_API_BASE_URL,
NODE_FETCH_TIMEOUT_MS
} = require("../config/appConfig"); } = require("../config/appConfig");
const fetch = async (url, options = {}) => { const timeout = (ms) => {
return new Promise(resolve => setTimeout(resolve, ms));
}
const fetchCache = new FetchCache({
fetch: nodeFetch,
cacheOptions: {
// Don't save more than 100 responses in the cache. Allows infinite responses by default
maximalItemCount: 10000,
// When should the cache evict responses when its full?
evictExceedingItemsBy: 'age', // Valid values: 'lru' or 'age'
defaultTTL: 6 * 60 * 60 * 1000 // 6 hours
// ...see https://github.com/sozialhelden/hamster-cache for all possible options
},
});
const fetch = async (url, options = {}, useCache = true) => {
const controller = new AbortController();
const newOptions = Object.assign({}, options); const newOptions = Object.assign({}, options);
if (!newOptions["headers"]) { if (!newOptions["headers"]) {
newOptions["headers"] = {}; newOptions["headers"] = {};
} }
newOptions["headers"]["User-Agent"] = USER_AGENT;
newOptions.signal = controller.signal;
// newOptions["headers"]["User-Agent"] = USER_AGENT;
let urlToFetchThroughAPI = Buffer.from(url).toString('base64');
if (SCRAPER_API_BASE_URL.includes('scraperapi')) {
urlToFetchThroughAPI = url;
}
const urlAdaptedForScraping = USE_SCRAPER_API const urlAdaptedForScraping = USE_SCRAPER_API
? `http://api.scraperapi.com/?api_key=${SCRAPER_API_KEY}&url=${url}` ? `${SCRAPER_API_BASE_URL}?api_key=${SCRAPER_API_KEY}&url=${urlToFetchThroughAPI}`
: url; : url;
const result = useCache ? fetchCache.fetch(urlAdaptedForScraping, newOptions) : nodeFetch(urlAdaptedForScraping, newOptions);
// const timeoutId = setTimeout(() => controller.abort(), NODE_FETCH_TIMEOUT_MS);
// console.log("Url for scraping:", urlAdaptedForScraping); return result;
return nodeFetch(urlAdaptedForScraping, newOptions);
}; };
module.exports = fetch; module.exports = fetch;

13
app/helpers/log.js Normal file
View File

@@ -0,0 +1,13 @@
const {
PRINT_CRAWLER_DEBUG
} = require("../config/appConfig");
const logDebug = (...args) => {
if (PRINT_CRAWLER_DEBUG) {
console.log(...args);
}
}
module.exports = {
logDebug
};

View File

@@ -7,6 +7,26 @@ const currentSearchRequest = async req => {
return await getSearchRequest(searchRequestId); return await getSearchRequest(searchRequestId);
}; };
module.exports = {
currentSearchRequest const getUrlParams = function (url) {
if (typeof url === 'string' && url.length > 0){
const params = {};
const questionMarkIndex = url.indexOf('?');
if (questionMarkIndex === -1) {
return undefined;
}
const query = url.substring(questionMarkIndex+1);
const vars = query.split('&');
for (let i = 0; i < vars.length; i++) {
const pair = vars[i].split('=');
params[pair[0]] = decodeURIComponent(pair[1]);
}
return params;
}
return undefined;
};
module.exports = {
currentSearchRequest,
getUrlParams
}; };

View File

@@ -16,7 +16,7 @@ config.logging = parseInt(process.env.SEQUELIZE_LOGGING) ? console.log : false;
let sequelize; let sequelize;
if (config.use_env_variable) { if (config.use_env_variable) {
sequelize = new Sequelize(process.env[config.use_env_variable], config); sequelize = new Sequelize(process.env[config.use_env_variable] + "?ssl=true", config);
} else { } else {
sequelize = new Sequelize( sequelize = new Sequelize(
config.database, config.database,

View File

@@ -24,8 +24,8 @@ API_MAP_KEY=(your-key-here)
#=============== SCRAPER API SUPORT =============# #=============== SCRAPER API SUPORT =============#
USE_SCRAPER_API= To turn it on (1) or off (0) USE_SCRAPER_API= To turn it on (1) or off (0)
SCRAPER_API_KEY= Key for Scraper api SCRAPER_API_KEY= Key for Scraper api
NUMBER_OF_CONCURRENT_REQ_SCRAPER_API= Number of requests to send concurrently to Srcaper API proxy SCRAPER_API_BASE_URL= Base url without question mark (example: http://sabur.kivi.ba:1337)
#=============== AWS SDK EMAIL SETTINGS =======# #=============== AWS SDK EMAIL SETTINGS =======#
AWS_KEY_ID=(your-key-here) AWS_KEY_ID=(your-key-here)
@@ -37,7 +37,6 @@ SOURCE_EMAIL=info@saburly.com
CRAWLER_INTERVAL=Interval to run cralwer(s), in seconds CRAWLER_INTERVAL=Interval to run cralwer(s), in seconds
STOP_CRAWLER=Non-zero value will skip crawler execution STOP_CRAWLER=Non-zero value will skip crawler execution
PRINT_CRAWLER_DEBUG_INFO=Non-zero value will print crawler debugging info to the server console PRINT_CRAWLER_DEBUG_INFO=Non-zero value will print crawler debugging info to the server console
#==OLX== #==OLX==
OLX_MAX_PAGES=Restrict crawler to this number of pages OLX_MAX_PAGES=Restrict crawler to this number of pages
OLX_MAX_RESULTS_PER_PAGE=Only this number or less results from one page will be scraped and saved OLX_MAX_RESULTS_PER_PAGE=Only this number or less results from one page will be scraped and saved
@@ -46,7 +45,6 @@ OLX_CRAWLER_AD_CATEGORIES=comma separated list of enum names of categories to be
OLX_IGNORED_USERNAMES=comma separated list of usernames to ignore OLX_IGNORED_USERNAMES=comma separated list of usernames to ignore
OLX_DELAY_BETWEEN_PAGES=time in miliseconds to wait before indexing next page OLX_DELAY_BETWEEN_PAGES=time in miliseconds to wait before indexing next page
OLX_FORCE_CRAWL=Non-zero value will force crawler to crawl all pages without stopping when known real estate is found OLX_FORCE_CRAWL=Non-zero value will force crawler to crawl all pages without stopping when known real estate is found
#==RENTAL== #==RENTAL==
RENTAL_MAX_PAGES=Restrict crawler to this number of pages RENTAL_MAX_PAGES=Restrict crawler to this number of pages
RENTAL_MAX_RESULTS_PER_PAGE=Only this number or less results from one page will be scraped and saved RENTAL_MAX_RESULTS_PER_PAGE=Only this number or less results from one page will be scraped and saved
@@ -74,6 +72,7 @@ AKTIDO_IGNORED_USERNAMES=!!! This is not used for aktido crawler !!!
AKTIDO_DELAY_BETWEEN_PAGES=time in miliseconds to wait before indexing next page AKTIDO_DELAY_BETWEEN_PAGES=time in miliseconds to wait before indexing next page
AKTIDO_FORCE_CRAWL=Non-zero value will force crawler to crawl all pages without stopping when known real estate is found AKTIDO_FORCE_CRAWL=Non-zero value will force crawler to crawl all pages without stopping when known real estate is found
#==SALJIC NEKRETNINE== #==SALJIC NEKRETNINE==
SALJIC_MAX_PAGES=Restrict crawler to this number of pages
SALJIC_MAX_RESULTS_PER_PAGE=For Saljic crawler, this represents how many ads are crawled at once SALJIC_MAX_RESULTS_PER_PAGE=For Saljic crawler, this represents how many ads are crawled at once
SALJIC_CRAWLER_AD_TYPE=enum name of what type of ads should be crawled, check common/enums.js file for valid values SALJIC_CRAWLER_AD_TYPE=enum name of what type of ads should be crawled, check common/enums.js file for valid values
SALJIC_CRAWLER_AD_CATEGORIES=comma separated list of enum names of categories to be included, check common/enums.js file for valid values SALJIC_CRAWLER_AD_CATEGORIES=comma separated list of enum names of categories to be included, check common/enums.js file for valid values

View File

@@ -4,6 +4,7 @@ const bodyParser = require("body-parser");
const layout = require("express-layout"); const layout = require("express-layout");
const compression = require("compression"); const compression = require("compression");
const forceSSL = require("./app/helpers/forceSSL"); const forceSSL = require("./app/helpers/forceSSL");
const { logDebug } = require("./app/helpers/log");
const { const {
APP_PORT, APP_PORT,
@@ -38,11 +39,17 @@ app.listen(APP_PORT, () =>
let crawlerRunning = STOP_CRAWLER; let crawlerRunning = STOP_CRAWLER;
const crawl = () => { const crawl = () => {
logDebug("Crawl start. crawlerRunning: ", crawlerRunning);
if (!crawlerRunning) { if (!crawlerRunning) {
crawlerRunning = true; crawlerRunning = true;
crawlAll().then(newRealEstates => { crawlAll().then(newRealEstates => {
crawlerRunning = false; logDebug("crawlAll done, new real estate len: ", newRealEstates.length);
notifyForNewRealEstates(newRealEstates); notifyForNewRealEstates(newRealEstates);
}).catch(e => {
console.error('Error happened: ', e);
}).finally(()=> {
crawlerRunning = false;
logDebug('Finally done!');
}); });
} }
}; };

226
package-lock.json generated
View File

@@ -40,6 +40,32 @@
"@sendgrid/helpers": "^6.3.0" "@sendgrid/helpers": "^6.3.0"
} }
}, },
"@sozialhelden/fetch-cache": {
"version": "2.0.1",
"resolved": "https://registry.npmjs.org/@sozialhelden/fetch-cache/-/fetch-cache-2.0.1.tgz",
"integrity": "sha512-vMlsdT5JQCGjx1fcFxmMNh7ZKppjjsfUAeZEhhNwhEL7GaqbZXsD1OXEyx2IcRa25ZuZtvJSV6Q3rE77VRdLvg==",
"requires": {
"@sozialhelden/hamster-cache": "^1.0.0"
}
},
"@sozialhelden/hamster-cache": {
"version": "1.0.0",
"resolved": "https://registry.npmjs.org/@sozialhelden/hamster-cache/-/hamster-cache-1.0.0.tgz",
"integrity": "sha512-/TEGA8mdMawZp4Yq/GrkL+72YL5EGuSeVXC3pKW12YY1t3C+zCN/HZ0HRp4zWF/e67svXcxuz/B0AEQxEdvi7A=="
},
"@supercharge/goodies": {
"version": "1.4.0",
"resolved": "https://registry.npmjs.org/@supercharge/goodies/-/goodies-1.4.0.tgz",
"integrity": "sha512-Np6u2qjRwiA3wTgzz4n2yduydIjSXqtJWP5cOnNqjdlCR/EUAK86LAOhEcU+YW211D1ksugns3GqpARJDoXQ7g=="
},
"@supercharge/promise-pool": {
"version": "1.3.0",
"resolved": "https://registry.npmjs.org/@supercharge/promise-pool/-/promise-pool-1.3.0.tgz",
"integrity": "sha512-9/EVrJevSPEqI4i/gRH8Dt7C+FQT65wRRYuu0MDaGmSLZ2aTel0jOGu8Ae84fPiQ+Ah0B80RPFUxk+K+Cz48DA==",
"requires": {
"@supercharge/goodies": "~1.4.0"
}
},
"@types/caseless": { "@types/caseless": {
"version": "0.12.2", "version": "0.12.2",
"resolved": "https://registry.npmjs.org/@types/caseless/-/caseless-0.12.2.tgz", "resolved": "https://registry.npmjs.org/@types/caseless/-/caseless-0.12.2.tgz",
@@ -79,6 +105,14 @@
"resolved": "https://registry.npmjs.org/abbrev/-/abbrev-1.1.1.tgz", "resolved": "https://registry.npmjs.org/abbrev/-/abbrev-1.1.1.tgz",
"integrity": "sha512-nne9/IiQ/hzIhY6pdDnbBtz7DjPTKrY00P/zvPSm5pOFkl6xuGrGnXn/VtTNNfNtAfZ9/1RtehkszU9qcTii0Q==" "integrity": "sha512-nne9/IiQ/hzIhY6pdDnbBtz7DjPTKrY00P/zvPSm5pOFkl6xuGrGnXn/VtTNNfNtAfZ9/1RtehkszU9qcTii0Q=="
}, },
"abort-controller": {
"version": "3.0.0",
"resolved": "https://registry.npmjs.org/abort-controller/-/abort-controller-3.0.0.tgz",
"integrity": "sha512-h8lQ8tacZYnR3vNQTgibj+tODHI5/+l06Au2Pcriv/Gmet0eaj4TwWH41sO9wnHDiQsEj19q0drzdWdeAHtweg==",
"requires": {
"event-target-shim": "^5.0.0"
}
},
"accepts": { "accepts": {
"version": "1.3.5", "version": "1.3.5",
"resolved": "https://registry.npmjs.org/accepts/-/accepts-1.3.5.tgz", "resolved": "https://registry.npmjs.org/accepts/-/accepts-1.3.5.tgz",
@@ -147,14 +181,6 @@
} }
} }
}, },
"argparse": {
"version": "1.0.10",
"resolved": "https://registry.npmjs.org/argparse/-/argparse-1.0.10.tgz",
"integrity": "sha512-o5Roy6tNG4SL/FOkCAN6RzjiakZS25RLYFrcMttJqbdd8BWrnA+fGz57iN5Pb06pvBGvl5gQ0B48dJlslXvoTg==",
"requires": {
"sprintf-js": "~1.0.2"
}
},
"arr-diff": { "arr-diff": {
"version": "4.0.0", "version": "4.0.0",
"resolved": "https://registry.npmjs.org/arr-diff/-/arr-diff-4.0.0.tgz", "resolved": "https://registry.npmjs.org/arr-diff/-/arr-diff-4.0.0.tgz",
@@ -203,21 +229,6 @@
"integrity": "sha1-WWZ/QfrdTyDMvCu5a41Pf3jsA2c=", "integrity": "sha1-WWZ/QfrdTyDMvCu5a41Pf3jsA2c=",
"dev": true "dev": true
}, },
"async": {
"version": "2.6.3",
"resolved": "https://registry.npmjs.org/async/-/async-2.6.3.tgz",
"integrity": "sha512-zflvls11DCy+dQWzTW2dzuilv8Z5X/pjfmZOWba6TNIVDm+2UDaJmXSOXlasHKfNBs8oo3M0aT50fDEWfKZjXg==",
"requires": {
"lodash": "^4.17.14"
},
"dependencies": {
"lodash": {
"version": "4.17.15",
"resolved": "https://registry.npmjs.org/lodash/-/lodash-4.17.15.tgz",
"integrity": "sha512-8xOcRHvCjnocdS5cpwXQXVzmmh5e5+saE2QGoeQmbKmRS6J3VQppPOIt0MnmE+4xlZoumy0GPG0D0MVIQbNA1A=="
}
}
},
"async-each": { "async-each": {
"version": "1.0.3", "version": "1.0.3",
"resolved": "https://registry.npmjs.org/async-each/-/async-each-1.0.3.tgz", "resolved": "https://registry.npmjs.org/async-each/-/async-each-1.0.3.tgz",
@@ -648,11 +659,6 @@
"resolved": "https://registry.npmjs.org/color-name/-/color-name-1.1.3.tgz", "resolved": "https://registry.npmjs.org/color-name/-/color-name-1.1.3.tgz",
"integrity": "sha1-p9BVi9icQveV3UIyj3QIMcpTvCU=" "integrity": "sha1-p9BVi9icQveV3UIyj3QIMcpTvCU="
}, },
"colors": {
"version": "1.0.3",
"resolved": "https://registry.npmjs.org/colors/-/colors-1.0.3.tgz",
"integrity": "sha1-BDP0TYCWgP3rYO0mDxsMJi6CpAs="
},
"combined-stream": { "combined-stream": {
"version": "1.0.7", "version": "1.0.7",
"resolved": "https://registry.npmjs.org/combined-stream/-/combined-stream-1.0.7.tgz", "resolved": "https://registry.npmjs.org/combined-stream/-/combined-stream-1.0.7.tgz",
@@ -758,25 +764,6 @@
"resolved": "https://registry.npmjs.org/core-util-is/-/core-util-is-1.0.2.tgz", "resolved": "https://registry.npmjs.org/core-util-is/-/core-util-is-1.0.2.tgz",
"integrity": "sha1-tf1UIgqivFq1eqtxQMlAdUUDwac=" "integrity": "sha1-tf1UIgqivFq1eqtxQMlAdUUDwac="
}, },
"coveralls": {
"version": "3.0.9",
"resolved": "https://registry.npmjs.org/coveralls/-/coveralls-3.0.9.tgz",
"integrity": "sha512-nNBg3B1+4iDox5A5zqHKzUTiwl2ey4k2o0NEcVZYvl+GOSJdKBj4AJGKLv6h3SvWch7tABHePAQOSZWM9E2hMg==",
"requires": {
"js-yaml": "^3.13.1",
"lcov-parse": "^1.0.0",
"log-driver": "^1.2.7",
"minimist": "^1.2.0",
"request": "^2.88.0"
},
"dependencies": {
"minimist": {
"version": "1.2.0",
"resolved": "https://registry.npmjs.org/minimist/-/minimist-1.2.0.tgz",
"integrity": "sha1-o1AIsg9BOD7sH7kU9M1d95omQoQ="
}
}
},
"create-error-class": { "create-error-class": {
"version": "3.0.2", "version": "3.0.2",
"resolved": "https://registry.npmjs.org/create-error-class/-/create-error-class-3.0.2.tgz", "resolved": "https://registry.npmjs.org/create-error-class/-/create-error-class-3.0.2.tgz",
@@ -829,11 +816,6 @@
"resolved": "https://registry.npmjs.org/css-what/-/css-what-2.1.3.tgz", "resolved": "https://registry.npmjs.org/css-what/-/css-what-2.1.3.tgz",
"integrity": "sha512-a+EPoD+uZiNfh+5fxw2nO9QwFa6nJe2Or35fGY6Ipw1R3R4AGz1d1TEZrCegvw2YTmZ0jXirGYlzxxpYSHwpEg==" "integrity": "sha512-a+EPoD+uZiNfh+5fxw2nO9QwFa6nJe2Or35fGY6Ipw1R3R4AGz1d1TEZrCegvw2YTmZ0jXirGYlzxxpYSHwpEg=="
}, },
"cycle": {
"version": "1.0.3",
"resolved": "https://registry.npmjs.org/cycle/-/cycle-1.0.3.tgz",
"integrity": "sha1-IegLK+hYD5i0aPN5QwZisEbDStI="
},
"d": { "d": {
"version": "1.0.1", "version": "1.0.1",
"resolved": "https://registry.npmjs.org/d/-/d-1.0.1.tgz", "resolved": "https://registry.npmjs.org/d/-/d-1.0.1.tgz",
@@ -1112,11 +1094,6 @@
"prettier-linter-helpers": "^1.0.0" "prettier-linter-helpers": "^1.0.0"
} }
}, },
"esprima": {
"version": "4.0.1",
"resolved": "https://registry.npmjs.org/esprima/-/esprima-4.0.1.tgz",
"integrity": "sha512-eGuFFw7Upda+g4p+QHvnW0RyTX/SVeJBDM/gCtMARO0cLuT2HcEKnTPvhjV6aGeqrCB/sbNop0Kszm0jsaWU4A=="
},
"etag": { "etag": {
"version": "1.8.1", "version": "1.8.1",
"resolved": "https://registry.npmjs.org/etag/-/etag-1.8.1.tgz", "resolved": "https://registry.npmjs.org/etag/-/etag-1.8.1.tgz",
@@ -1131,6 +1108,11 @@
"es5-ext": "~0.10.14" "es5-ext": "~0.10.14"
} }
}, },
"event-target-shim": {
"version": "5.0.1",
"resolved": "https://registry.npmjs.org/event-target-shim/-/event-target-shim-5.0.1.tgz",
"integrity": "sha512-i/2XbnSz/uxRCU6+NdVJgKWDTM427+MqYbkQzD321DuCQJUqOuJKIA0IM2+W2xtYHdKOmZ4dR6fExsd4SXL+WQ=="
},
"events": { "events": {
"version": "1.1.1", "version": "1.1.1",
"resolved": "https://registry.npmjs.org/events/-/events-1.1.1.tgz", "resolved": "https://registry.npmjs.org/events/-/events-1.1.1.tgz",
@@ -1331,11 +1313,6 @@
"resolved": "https://registry.npmjs.org/extsprintf/-/extsprintf-1.3.0.tgz", "resolved": "https://registry.npmjs.org/extsprintf/-/extsprintf-1.3.0.tgz",
"integrity": "sha1-lpGEQOMEGnpBT4xS48V06zw+HgU=" "integrity": "sha1-lpGEQOMEGnpBT4xS48V06zw+HgU="
}, },
"eyes": {
"version": "0.1.8",
"resolved": "https://registry.npmjs.org/eyes/-/eyes-0.1.8.tgz",
"integrity": "sha1-Ys8SAjTGg3hdkCNIqADvPgzCC8A="
},
"fast-deep-equal": { "fast-deep-equal": {
"version": "2.0.1", "version": "2.0.1",
"resolved": "https://registry.npmjs.org/fast-deep-equal/-/fast-deep-equal-2.0.1.tgz", "resolved": "https://registry.npmjs.org/fast-deep-equal/-/fast-deep-equal-2.0.1.tgz",
@@ -2537,15 +2514,6 @@
"nopt": "~4.0.1" "nopt": "~4.0.1"
} }
}, },
"js-yaml": {
"version": "3.13.1",
"resolved": "https://registry.npmjs.org/js-yaml/-/js-yaml-3.13.1.tgz",
"integrity": "sha512-YfbcO7jXDdyj0DGxYVSlSeQNHbD7XPWvrVWeVUujrQEoZzWJIRrCPoyk6kL6IAjAG2IolMK4T0hNUe0HOUs5Jw==",
"requires": {
"argparse": "^1.0.7",
"esprima": "^4.0.0"
}
},
"jsbn": { "jsbn": {
"version": "0.1.1", "version": "0.1.1",
"resolved": "https://registry.npmjs.org/jsbn/-/jsbn-0.1.1.tgz", "resolved": "https://registry.npmjs.org/jsbn/-/jsbn-0.1.1.tgz",
@@ -2608,11 +2576,6 @@
"invert-kv": "^2.0.0" "invert-kv": "^2.0.0"
} }
}, },
"lcov-parse": {
"version": "1.0.0",
"resolved": "https://registry.npmjs.org/lcov-parse/-/lcov-parse-1.0.0.tgz",
"integrity": "sha1-6w1GtUER68VhrLTECO+TY73I9+A="
},
"locate-path": { "locate-path": {
"version": "3.0.0", "version": "3.0.0",
"resolved": "https://registry.npmjs.org/locate-path/-/locate-path-3.0.0.tgz", "resolved": "https://registry.npmjs.org/locate-path/-/locate-path-3.0.0.tgz",
@@ -2627,11 +2590,6 @@
"resolved": "https://registry.npmjs.org/lodash/-/lodash-4.17.11.tgz", "resolved": "https://registry.npmjs.org/lodash/-/lodash-4.17.11.tgz",
"integrity": "sha512-cQKh8igo5QUhZ7lg38DYWAxMvjSAKG0A8wGSVimP07SIUEK2UO+arSRKbRZWtelMtN5V0Hkwh5ryOto/SshYIg==" "integrity": "sha512-cQKh8igo5QUhZ7lg38DYWAxMvjSAKG0A8wGSVimP07SIUEK2UO+arSRKbRZWtelMtN5V0Hkwh5ryOto/SshYIg=="
}, },
"log-driver": {
"version": "1.2.7",
"resolved": "https://registry.npmjs.org/log-driver/-/log-driver-1.2.7.tgz",
"integrity": "sha512-U7KCmLdqsGHBLeWqYlFA0V0Sl6P08EE1ZrmA9cxjUE0WVqT9qnyVDPz1kzpFEP0jdJuFnasWIfSd7fsaNXkpbg=="
},
"long-timeout": { "long-timeout": {
"version": "0.1.1", "version": "0.1.1",
"resolved": "https://registry.npmjs.org/long-timeout/-/long-timeout-0.1.1.tgz", "resolved": "https://registry.npmjs.org/long-timeout/-/long-timeout-0.1.1.tgz",
@@ -3302,20 +3260,6 @@
"integrity": "sha512-MtEC1TqN0EU5nephaJ4rAtThHtC86dNN9qCuEhtshvpVBkAW5ZO7BASN9REnF9eoXGcRub+pFuKEpOHE+HbEMw==", "integrity": "sha512-MtEC1TqN0EU5nephaJ4rAtThHtC86dNN9qCuEhtshvpVBkAW5ZO7BASN9REnF9eoXGcRub+pFuKEpOHE+HbEMw==",
"dev": true "dev": true
}, },
"promise-request-retry": {
"version": "1.0.2",
"resolved": "https://registry.npmjs.org/promise-request-retry/-/promise-request-retry-1.0.2.tgz",
"integrity": "sha512-zZmu19chRtC6TYeAZaELF8s+Zotl48M6bRnIVjcUrObEjpI4wk+2VpGVRaRgCG6isOqsK4c5IMY7t59Ff2ia0A==",
"requires": {
"async": "^2.6.0",
"bluebird": "^3.5.1",
"coveralls": "^3.0.0",
"req-cwd": "^2.0.0",
"request": "^2.85.0",
"request-promise": "^4.2.2",
"winston": "^2.4.0"
}
},
"proto-list": { "proto-list": {
"version": "1.2.4", "version": "1.2.4",
"resolved": "https://registry.npmjs.org/proto-list/-/proto-list-1.2.4.tgz", "resolved": "https://registry.npmjs.org/proto-list/-/proto-list-1.2.4.tgz",
@@ -3510,22 +3454,6 @@
"integrity": "sha1-jcrkcOHIirwtYA//Sndihtp15jc=", "integrity": "sha1-jcrkcOHIirwtYA//Sndihtp15jc=",
"dev": true "dev": true
}, },
"req-cwd": {
"version": "2.0.0",
"resolved": "https://registry.npmjs.org/req-cwd/-/req-cwd-2.0.0.tgz",
"integrity": "sha1-1AgrTURZgDZkD7c93qAe1T20nrw=",
"requires": {
"req-from": "^2.0.0"
}
},
"req-from": {
"version": "2.0.0",
"resolved": "https://registry.npmjs.org/req-from/-/req-from-2.0.0.tgz",
"integrity": "sha1-10GI5H+TeW9Kpx327jWuaJ8+DnA=",
"requires": {
"resolve-from": "^3.0.0"
}
},
"request": { "request": {
"version": "2.88.0", "version": "2.88.0",
"resolved": "https://registry.npmjs.org/request/-/request-2.88.0.tgz", "resolved": "https://registry.npmjs.org/request/-/request-2.88.0.tgz",
@@ -3565,32 +3493,6 @@
} }
} }
}, },
"request-promise": {
"version": "4.2.5",
"resolved": "https://registry.npmjs.org/request-promise/-/request-promise-4.2.5.tgz",
"integrity": "sha512-ZgnepCykFdmpq86fKGwqntyTiUrHycALuGggpyCZwMvGaZWgxW6yagT0FHkgo5LzYvOaCNvxYwWYIjevSH1EDg==",
"requires": {
"bluebird": "^3.5.0",
"request-promise-core": "1.1.3",
"stealthy-require": "^1.1.1",
"tough-cookie": "^2.3.3"
}
},
"request-promise-core": {
"version": "1.1.3",
"resolved": "https://registry.npmjs.org/request-promise-core/-/request-promise-core-1.1.3.tgz",
"integrity": "sha512-QIs2+ArIGQVp5ZYbWD5ZLCY29D5CfWizP8eWnm8FoGD1TX61veauETVQbrV60662V0oFBkrDOuaBI8XgtuyYAQ==",
"requires": {
"lodash": "^4.17.15"
},
"dependencies": {
"lodash": {
"version": "4.17.15",
"resolved": "https://registry.npmjs.org/lodash/-/lodash-4.17.15.tgz",
"integrity": "sha512-8xOcRHvCjnocdS5cpwXQXVzmmh5e5+saE2QGoeQmbKmRS6J3VQppPOIt0MnmE+4xlZoumy0GPG0D0MVIQbNA1A=="
}
}
},
"require-directory": { "require-directory": {
"version": "2.1.1", "version": "2.1.1",
"resolved": "https://registry.npmjs.org/require-directory/-/require-directory-2.1.1.tgz", "resolved": "https://registry.npmjs.org/require-directory/-/require-directory-2.1.1.tgz",
@@ -3609,11 +3511,6 @@
"path-parse": "^1.0.6" "path-parse": "^1.0.6"
} }
}, },
"resolve-from": {
"version": "3.0.0",
"resolved": "https://registry.npmjs.org/resolve-from/-/resolve-from-3.0.0.tgz",
"integrity": "sha1-six699nWiBvItuZTM17rywoYh0g="
},
"resolve-url": { "resolve-url": {
"version": "0.2.1", "version": "0.2.1",
"resolved": "https://registry.npmjs.org/resolve-url/-/resolve-url-0.2.1.tgz", "resolved": "https://registry.npmjs.org/resolve-url/-/resolve-url-0.2.1.tgz",
@@ -3658,16 +3555,6 @@
"resolved": "https://registry.npmjs.org/sax/-/sax-1.2.1.tgz", "resolved": "https://registry.npmjs.org/sax/-/sax-1.2.1.tgz",
"integrity": "sha1-e45lYZCyKOgaZq6nSEgNgozS03o=" "integrity": "sha1-e45lYZCyKOgaZq6nSEgNgozS03o="
}, },
"scraperapi-sdk": {
"version": "1.0.3",
"resolved": "https://registry.npmjs.org/scraperapi-sdk/-/scraperapi-sdk-1.0.3.tgz",
"integrity": "sha512-wFzdVptJHAA13HWMxR6DxsesA95cx0eBvylh2CHH9UmzBYor7N54jxgL473IW1VZEferSCNpwlW2R/B3zTPDsQ==",
"requires": {
"promise-request-retry": "^1.0.2",
"request": "^2.88.0",
"request-promise": "^4.2.5"
}
},
"semver": { "semver": {
"version": "5.6.0", "version": "5.6.0",
"resolved": "https://registry.npmjs.org/semver/-/semver-5.6.0.tgz", "resolved": "https://registry.npmjs.org/semver/-/semver-5.6.0.tgz",
@@ -3990,11 +3877,6 @@
"extend-shallow": "^3.0.0" "extend-shallow": "^3.0.0"
} }
}, },
"sprintf-js": {
"version": "1.0.3",
"resolved": "https://registry.npmjs.org/sprintf-js/-/sprintf-js-1.0.3.tgz",
"integrity": "sha1-BOaSb2YolTVPPdAVIDYzuFcpfiw="
},
"sshpk": { "sshpk": {
"version": "1.16.1", "version": "1.16.1",
"resolved": "https://registry.npmjs.org/sshpk/-/sshpk-1.16.1.tgz", "resolved": "https://registry.npmjs.org/sshpk/-/sshpk-1.16.1.tgz",
@@ -4011,11 +3893,6 @@
"tweetnacl": "~0.14.0" "tweetnacl": "~0.14.0"
} }
}, },
"stack-trace": {
"version": "0.0.10",
"resolved": "https://registry.npmjs.org/stack-trace/-/stack-trace-0.0.10.tgz",
"integrity": "sha1-VHxws0fo0ytOEI6hoqFZ5f3eGcA="
},
"static-extend": { "static-extend": {
"version": "0.1.2", "version": "0.1.2",
"resolved": "https://registry.npmjs.org/static-extend/-/static-extend-0.1.2.tgz", "resolved": "https://registry.npmjs.org/static-extend/-/static-extend-0.1.2.tgz",
@@ -4042,11 +3919,6 @@
"resolved": "https://registry.npmjs.org/statuses/-/statuses-1.4.0.tgz", "resolved": "https://registry.npmjs.org/statuses/-/statuses-1.4.0.tgz",
"integrity": "sha512-zhSCtt8v2NDrRlPQpCNtw/heZLtfUDqxBM1udqikb/Hbk52LK4nQSwr10u77iopCW5LsyHpuXS0GnEc48mLeew==" "integrity": "sha512-zhSCtt8v2NDrRlPQpCNtw/heZLtfUDqxBM1udqikb/Hbk52LK4nQSwr10u77iopCW5LsyHpuXS0GnEc48mLeew=="
}, },
"stealthy-require": {
"version": "1.1.1",
"resolved": "https://registry.npmjs.org/stealthy-require/-/stealthy-require-1.1.1.tgz",
"integrity": "sha1-NbCYdbT/SfJqd35QmzCQoyJr8ks="
},
"string-width": { "string-width": {
"version": "2.1.1", "version": "2.1.1",
"resolved": "https://registry.npmjs.org/string-width/-/string-width-2.1.1.tgz", "resolved": "https://registry.npmjs.org/string-width/-/string-width-2.1.1.tgz",
@@ -4518,26 +4390,6 @@
"string-width": "^2.1.1" "string-width": "^2.1.1"
} }
}, },
"winston": {
"version": "2.4.4",
"resolved": "https://registry.npmjs.org/winston/-/winston-2.4.4.tgz",
"integrity": "sha512-NBo2Pepn4hK4V01UfcWcDlmiVTs7VTB1h7bgnB0rgP146bYhMxX0ypCz3lBOfNxCO4Zuek7yeT+y/zM1OfMw4Q==",
"requires": {
"async": "~1.0.0",
"colors": "1.0.x",
"cycle": "1.0.x",
"eyes": "0.1.x",
"isstream": "0.1.x",
"stack-trace": "0.0.x"
},
"dependencies": {
"async": {
"version": "1.0.0",
"resolved": "https://registry.npmjs.org/async/-/async-1.0.0.tgz",
"integrity": "sha1-+PwEyjoTeErenhZBr5hXjPvWR6k="
}
}
},
"wkx": { "wkx": {
"version": "0.4.8", "version": "0.4.8",
"resolved": "https://registry.npmjs.org/wkx/-/wkx-0.4.8.tgz", "resolved": "https://registry.npmjs.org/wkx/-/wkx-0.4.8.tgz",

View File

@@ -17,9 +17,8 @@
"checkup-notify": "cd app/npmScripts && node npmCheckUpNotify.js", "checkup-notify": "cd app/npmScripts && node npmCheckUpNotify.js",
"test-search": "cd test && node searchTest.js", "test-search": "cd test && node searchTest.js",
"test-olx-scraper": "cd test && node olxScrapeTest.js", "test-olx-scraper": "cd test && node olxScrapeTest.js",
"test-saljic-scraper": "cd test && node saljicScrapeTest.js",
"test-rental-scraper": "cd test && node rentalScrapeTest.js", "test-rental-scraper": "cd test && node rentalScrapeTest.js",
"test-scraper-api": "cd test && node scraperAPITest.js" "test-saljic-scraper": "cd test && node saljicScrapeTest.js"
}, },
"repository": { "repository": {
"type": "git", "type": "git",
@@ -33,6 +32,9 @@
"dependencies": { "dependencies": {
"2checkout-node": "0.0.1", "2checkout-node": "0.0.1",
"@sendgrid/mail": "^6.3.1", "@sendgrid/mail": "^6.3.1",
"@sozialhelden/fetch-cache": "^2.0.1",
"@supercharge/promise-pool": "^1.3.0",
"abort-controller": "^3.0.0",
"aws-sdk": "^2.422.0", "aws-sdk": "^2.422.0",
"bluebird": "^3.5.5", "bluebird": "^3.5.5",
"cheerio": "^1.0.0-rc.2", "cheerio": "^1.0.0-rc.2",
@@ -52,7 +54,6 @@
"pg": "^7.10.0", "pg": "^7.10.0",
"prettier": "^1.19.1", "prettier": "^1.19.1",
"react-step-wizard": "^5.1.0", "react-step-wizard": "^5.1.0",
"scraperapi-sdk": "^1.0.3",
"sequelize": "^5.18.4", "sequelize": "^5.18.4",
"sequelize-cli": "^5.5.0" "sequelize-cli": "^5.5.0"
}, },

View File

@@ -1,19 +0,0 @@
const { SCRAPER_API_KEY } = require("../app/config/appConfig");
const scraperapiClient = require("scraperapi-sdk")(SCRAPER_API_KEY);
async function logUsedConcurrentReq() {
try {
const response = await scraperapiClient.account();
const dateOfLog = new Date().toLocaleString();
console.log(
dateOfLog,
" Number of concurrent requests: ",
response.concurrentRequests
);
} catch (err) {
console.log(err.message);
}
}
setInterval(logUsedConcurrentReq, 1000);