Merge pull request #17 from GotPPay/summarizer-feature

4b. Summarizer feature
This commit is contained in:
Senad Uka
2018-04-16 12:12:50 +02:00
committed by GitHub
5 changed files with 246 additions and 1178 deletions

View File

@@ -1,9 +1,9 @@
const constants = {}; const constants = {};
constants.skillStage = { constants.skillStage = {
IN_DEVELOPMENT : 'development', IN_DEVELOPMENT: 'development',
LIVE : 'live' LIVE: 'live',
} };
constants.amazonResultCodes = { constants.amazonResultCodes = {
OK: 200, OK: 200,
@@ -36,12 +36,17 @@ constants.voiceResponseStrings = {
QUESTION_NOT_FOUND: 'Sorry, I didnt understand', QUESTION_NOT_FOUND: 'Sorry, I didnt understand',
GENERIC_CONTINUE: 'Say something to continue', GENERIC_CONTINUE: 'Say something to continue',
DIDNT_ASK_ANYTHING: 'There was no question to answer to', DIDNT_ASK_ANYTHING: 'There was no question to answer to',
ERROR_SUMMARIZING_CONTENT: 'Sorry, there was problem with summarizing news',
ERROR_FETCHING_CONTENT: 'Failed to get content',
}; };
//Timing is given in [ms] //Timing is given in [ms]
constants.voiceResponseTimings = { constants.voiceResponseTimings = {
PAUSE_BETWEEN_QUESTIONS: 650, PAUSE_BETWEEN_QUESTIONS: 650,
PAUSE_AFTER_WELCOME_MESSAGE: 650, PAUSE_AFTER_WELCOME_MESSAGE: 650,
PAUSE_BETWEEN_TITLES: 500,
PAUSE_BETWEEN_TITLE_AND_CONTENT: 500,
PAUSE_BETWEEN_NEWS: 800,
}; };
constants.stringConstraints = { constants.stringConstraints = {
@@ -65,8 +70,15 @@ constants.stringConstraints = {
constants.answerType = { constants.answerType = {
PREDEFINED: 0, PREDEFINED: 0,
EXTERNAL_SOURCE_WP_TITLES : 1, EXTERNAL_SOURCE_WP_TITLES: 1,
EXTERNAL_SOURCE_WP_NEWS : 2 EXTERNAL_SOURCE_WP_NEWS: 2,
} };
constants.contentType = {
TITLES: 0,
NEWS: 1,
};
constants.FIXED_SUMMARY_LENGTH = 3;
module.exports = constants; module.exports = constants;

View File

@@ -1,5 +1,8 @@
let request = require ('request'); let request = require ('request');
let Parser = require ('rss-parser'); let Parser = require ('rss-parser');
let summarizer = require ('nodejs-text-summarizer');
var htmlToText = require ('html-to-text');
const constants = require ('../config/constants');
let parser = new Parser (); let parser = new Parser ();
@@ -8,37 +11,102 @@ getDataFromWPJSON = function (sourceUrl, page = 1, maxPosts = 10) {
var options = { var options = {
method: 'GET', method: 'GET',
url: `${sourceUrl}/wp-json/wp/v2/posts`, url: `${sourceUrl}/wp-json/wp/v2/posts`,
qs:{ qs: {
page:page, page: page,
per_page:maxPosts per_page: maxPosts,
} },
}; };
request (options, (error, response, body)=> { request (options, (error, response, body) => {
if (error) { if (error) {
reject (error); reject (error);
} else { } else {
resolve(JSON.parse (body)); resolve (JSON.parse (body));
} }
}); });
}); });
} };
module.exports = { summarizeText = function (text, length, clearText = true) {
getAnswerFromWP : function (sourceUrl){ let preparedText = text;
//This function will extract needed data from JSON, which we got from getDataFromWPJSON if (clearText) {
//At the moment, it's taking titles and creates answer preparedText = htmlToText.fromString (text, {
return new Promise((resolve,reject)=>{ wordwrap: false,
getDataFromWPJSON(sourceUrl).then(rawData=>{ ignoreHref: true,
let result=''; ignoreImage: true,
rawData.forEach(post=>{
result += post.title.rendered + '<break time="300ms"/> '
});
resolve(result);
}).catch(err=>{
reject('Failed to get answer');
});
}); });
} }
}
return summarizer (preparedText, {n: length});
};
getTitlesFromWP = function (sourceUrl) {
return new Promise ((resolve, reject) => {
getDataFromWPJSON (sourceUrl)
.then (rawData => {
let result = '';
rawData.forEach (post => {
result +=
post.title.rendered +
`<break time="${constants.voiceResponseTimings.PAUSE_BETWEEN_TITLES}ms"/> `;
});
resolve (result);
})
.catch (err => {
reject (constants.voiceResponseStrings.ERROR_FETCHING_CONTENT);
});
});
};
getLatestNewsFromWP = function (
sourceUrl,
postCount = 10,
includeTitle = false
) {
return new Promise ((resolve, reject) => {
getDataFromWPJSON (sourceUrl, 1, postCount)
.then (rawData => {
let result = '';
let htmlToTextOptions = {
wordwrap: false,
ignoreHref: true,
ignoreImage: true,
};
try {
rawData.forEach (post => {
result += includeTitle ? post.title.rendered : '';
result += includeTitle
? `<break time="${constants.voiceResponseTimings.PAUSE_BETWEEN_TITLE_AND_CONTENT}ms"/>`
: '';
result += summarizeText (
post.content.rendered,
constants.FIXED_SUMMARY_LENGTH
);
result += `<break time="${constants.voiceResponseTimings.PAUSE_BETWEEN_NEWS}ms"/>`;
});
resolve (result);
} catch (err) {
reject (constants.voiceResponseStrings.ERROR_SUMMARIZING_CONTENT);
}
})
.catch (err => {
reject (constants.voiceResponseStrings.ERROR_FETCHING_CONTENT);
});
});
};
module.exports = {
getAnswerFromWP: function (sourceUrl, contentType) {
//This function will extract needed data from JSON, which we got from getDataFromWPJSON
switch (contentType) {
case constants.contentType.TITLES:
return getTitlesFromWP (sourceUrl);
break;
case constants.contentType.NEWS:
return getLatestNewsFromWP (sourceUrl);
break;
}
},
};

View File

@@ -76,30 +76,45 @@ module.exports = {
let answerPromiseProps = { let answerPromiseProps = {
resolve: null, resolve: null,
reject: null reject: null,
} };
let answerPromise = new Promise ((resolve, reject) => { let answerPromise = new Promise ((resolve, reject) => {
answerPromiseProps = { answerPromiseProps = {
resolve:resolve, resolve: resolve,
reject:reject reject: reject,
} };
}); });
switch (intent.answerType){ switch (intent.answerType) {
case constants.answerType.PREDEFINED: case constants.answerType.PREDEFINED:
answerPromiseProps.resolve(intent.answer); answerPromiseProps.resolve (intent.answer);
break; break;
case constants.answerType.EXTERNAL_SOURCE_WP_TITLES: case constants.answerType.EXTERNAL_SOURCE_WP_TITLES:
predefinedSourceHelper.getAnswerFromWP(intent.externalAnswerSource).then(answer=>{ predefinedSourceHelper
answerPromiseProps.resolve(answer); .getAnswerFromWP (
}).catch(error=>{ intent.externalAnswerSource,
answerPromiseProps.reject(error); constants.contentType.TITLES
}); )
.then (answer => {
answerPromiseProps.resolve (answer);
})
.catch (error => {
answerPromiseProps.reject (error);
});
break; break;
case constants.answerType.EXTERNAL_SOURCE_WP_NEWS: case constants.answerType.EXTERNAL_SOURCE_WP_NEWS:
answer = 'Not implemented yet' predefinedSourceHelper
answerPromiseProps.resolve(answer); .getAnswerFromWP (
intent.externalAnswerSource,
constants.contentType.NEWS
)
.then (answer => {
answerPromiseProps.resolve (answer);
})
.catch (error => {
answerPromiseProps.reject (error);
});
break; break;
} }

1240
backend/package-lock.json generated

File diff suppressed because it is too large Load Diff

View File

@@ -8,9 +8,10 @@
"body-parser": "^1.13.1", "body-parser": "^1.13.1",
"ejs": "^2.5.7", "ejs": "^2.5.7",
"express": "^4.13.0", "express": "^4.13.0",
"html-to-text": "^4.0.0",
"isomorphic-fetch": "^2.2.1", "isomorphic-fetch": "^2.2.1",
"mongodb": "^2.2.33", "mongodb": "^2.2.33",
"nodejs-text-summarizer": "^2.0.3", "nodejs-text-summarizer": "GotPPay/nodejs-text-summarizer",
"nodemailer": "^4.4.1", "nodemailer": "^4.4.1",
"request": "^2.83.0", "request": "^2.83.0",
"rss-parser": "^3.1.1" "rss-parser": "^3.1.1"