From 2e92f961ff75473cf3fc1dc897d1bb2896d0c54f Mon Sep 17 00:00:00 2001 From: Bilal Catic Date: Thu, 26 Sep 2019 17:30:06 +0200 Subject: [PATCH] start crawler loop when server is started --- app/config/appConfig.js | 5 ++++- app/crawler/crawl.js | 11 +++++------ app/crawler/npmCrawl.js | 5 +++++ development.env | 2 ++ index.js | 14 +++++++++++++- package.json | 2 +- 6 files changed, 30 insertions(+), 9 deletions(-) create mode 100644 app/crawler/npmCrawl.js diff --git a/app/config/appConfig.js b/app/config/appConfig.js index 5b06652..37879bd 100644 --- a/app/config/appConfig.js +++ b/app/config/appConfig.js @@ -8,8 +8,11 @@ const APP_URL = const DEFAULT_TIMEZONE = "Europe/Sarajevo"; +const CRAWLER_INTERVAL = parseInt(process.env.CRAWLER_INTERVAL) || 60; + module.exports = { APP_PORT, APP_URL, - DEFAULT_TIMEZONE + DEFAULT_TIMEZONE, + CRAWLER_INTERVAL }; diff --git a/app/crawler/crawl.js b/app/crawler/crawl.js index 77d4fc9..e77053c 100644 --- a/app/crawler/crawl.js +++ b/app/crawler/crawl.js @@ -26,15 +26,14 @@ const crawlers = [ async function crawlAll() { for (let crawler of crawlers) { try { - const newRealEstates = await crawler.crawl(); - - console.log("Number of new real estates : ", newRealEstates.length); + return await crawler.crawl(); } catch (e) { console.log("Error crawling. Trying next crawler! ", e); + return []; } } } -(async () => { - await crawlAll(); -})(); +module.exports = { + crawlAll +}; diff --git a/app/crawler/npmCrawl.js b/app/crawler/npmCrawl.js new file mode 100644 index 0000000..fa26ebc --- /dev/null +++ b/app/crawler/npmCrawl.js @@ -0,0 +1,5 @@ +const { crawlAll } = require("./crawl"); + +(async () => { + await crawlAll(); +})(); diff --git a/development.env b/development.env index 2f2fdc0..ff0fdf3 100644 --- a/development.env +++ b/development.env @@ -14,6 +14,8 @@ AMAZON_REGION=eu-west-1 APP_URL=http://localhost:3001 SOURCE_EMAIL=info@saburly.com +CRAWLER_INTERVAL=Interval to run cralwer(s), in seconds + #=============== CRAWLER SETTINGS===============# #==OLX== OLX_MAX_PAGES=Restrict crawler to this number of pages diff --git a/index.js b/index.js index c1a7b7d..262136a 100644 --- a/index.js +++ b/index.js @@ -6,8 +6,9 @@ const bodyParser = require("body-parser"); const layout = require("express-layout"); const compression = require("compression"); -const { APP_PORT } = require("./app/config/appConfig"); +const { APP_PORT, CRAWLER_INTERVAL } = require("./app/config/appConfig"); const routes = require("./app/routes"); +const { crawlAll } = require("./app/crawler/crawl"); const app = express(); @@ -26,3 +27,14 @@ app.use("/assets", express.static("./app/public")); app.listen(APP_PORT, () => console.log(`Example app listening on port ${APP_PORT}!`) ); + +let crawlerRunning = false; +const crawl = () => { + if (!crawlerRunning) { + crawlerRunning = true; + crawlAll().then(newRealEstates => { + crawlerRunning = false; + }); + } +}; +setInterval(crawl, CRAWLER_INTERVAL * 1000); diff --git a/package.json b/package.json index f55a068..f915d34 100644 --- a/package.json +++ b/package.json @@ -11,7 +11,7 @@ "setup": "docker build -t marketalerts . && docker run -e POSTGRES_USER=docker -e POSTGRES_PASSWORD=docker -e POSTGRES_DB=marketalerts --name pg_marketalerts -d -p 5432:5432 marketalerts && sleep 4 && npm run migrate", "docker-start": "docker start pg_marketalerts", "docker-stop": "docker stop pg_marketalerts", - "crawl": "cd app/crawler && node crawl.js" + "crawl": "cd app/crawler && node npmCrawl.js" }, "repository": { "type": "git",