Merge pull request #17 from GotPPay/summarizer-feature

4b. Summarizer feature
This commit is contained in:
Senad Uka
2018-04-16 12:12:50 +02:00
committed by GitHub
5 changed files with 246 additions and 1178 deletions

View File

@@ -1,9 +1,9 @@
const constants = {};
constants.skillStage = {
IN_DEVELOPMENT : 'development',
LIVE : 'live'
}
IN_DEVELOPMENT: 'development',
LIVE: 'live',
};
constants.amazonResultCodes = {
OK: 200,
@@ -36,12 +36,17 @@ constants.voiceResponseStrings = {
QUESTION_NOT_FOUND: 'Sorry, I didnt understand',
GENERIC_CONTINUE: 'Say something to continue',
DIDNT_ASK_ANYTHING: 'There was no question to answer to',
ERROR_SUMMARIZING_CONTENT: 'Sorry, there was problem with summarizing news',
ERROR_FETCHING_CONTENT: 'Failed to get content',
};
//Timing is given in [ms]
constants.voiceResponseTimings = {
PAUSE_BETWEEN_QUESTIONS: 650,
PAUSE_AFTER_WELCOME_MESSAGE: 650,
PAUSE_BETWEEN_TITLES: 500,
PAUSE_BETWEEN_TITLE_AND_CONTENT: 500,
PAUSE_BETWEEN_NEWS: 800,
};
constants.stringConstraints = {
@@ -65,8 +70,15 @@ constants.stringConstraints = {
constants.answerType = {
PREDEFINED: 0,
EXTERNAL_SOURCE_WP_TITLES : 1,
EXTERNAL_SOURCE_WP_NEWS : 2
}
EXTERNAL_SOURCE_WP_TITLES: 1,
EXTERNAL_SOURCE_WP_NEWS: 2,
};
constants.contentType = {
TITLES: 0,
NEWS: 1,
};
constants.FIXED_SUMMARY_LENGTH = 3;
module.exports = constants;

View File

@@ -1,5 +1,8 @@
let request = require ('request');
let Parser = require ('rss-parser');
let summarizer = require ('nodejs-text-summarizer');
var htmlToText = require ('html-to-text');
const constants = require ('../config/constants');
let parser = new Parser ();
@@ -8,37 +11,102 @@ getDataFromWPJSON = function (sourceUrl, page = 1, maxPosts = 10) {
var options = {
method: 'GET',
url: `${sourceUrl}/wp-json/wp/v2/posts`,
qs:{
page:page,
per_page:maxPosts
}
qs: {
page: page,
per_page: maxPosts,
},
};
request (options, (error, response, body)=> {
request (options, (error, response, body) => {
if (error) {
reject (error);
} else {
resolve(JSON.parse (body));
resolve (JSON.parse (body));
}
});
});
}
};
module.exports = {
getAnswerFromWP : function (sourceUrl){
//This function will extract needed data from JSON, which we got from getDataFromWPJSON
//At the moment, it's taking titles and creates answer
return new Promise((resolve,reject)=>{
getDataFromWPJSON(sourceUrl).then(rawData=>{
let result='';
rawData.forEach(post=>{
result += post.title.rendered + '<break time="300ms"/> '
});
resolve(result);
}).catch(err=>{
reject('Failed to get answer');
});
summarizeText = function (text, length, clearText = true) {
let preparedText = text;
if (clearText) {
preparedText = htmlToText.fromString (text, {
wordwrap: false,
ignoreHref: true,
ignoreImage: true,
});
}
}
return summarizer (preparedText, {n: length});
};
getTitlesFromWP = function (sourceUrl) {
return new Promise ((resolve, reject) => {
getDataFromWPJSON (sourceUrl)
.then (rawData => {
let result = '';
rawData.forEach (post => {
result +=
post.title.rendered +
`<break time="${constants.voiceResponseTimings.PAUSE_BETWEEN_TITLES}ms"/> `;
});
resolve (result);
})
.catch (err => {
reject (constants.voiceResponseStrings.ERROR_FETCHING_CONTENT);
});
});
};
getLatestNewsFromWP = function (
sourceUrl,
postCount = 10,
includeTitle = false
) {
return new Promise ((resolve, reject) => {
getDataFromWPJSON (sourceUrl, 1, postCount)
.then (rawData => {
let result = '';
let htmlToTextOptions = {
wordwrap: false,
ignoreHref: true,
ignoreImage: true,
};
try {
rawData.forEach (post => {
result += includeTitle ? post.title.rendered : '';
result += includeTitle
? `<break time="${constants.voiceResponseTimings.PAUSE_BETWEEN_TITLE_AND_CONTENT}ms"/>`
: '';
result += summarizeText (
post.content.rendered,
constants.FIXED_SUMMARY_LENGTH
);
result += `<break time="${constants.voiceResponseTimings.PAUSE_BETWEEN_NEWS}ms"/>`;
});
resolve (result);
} catch (err) {
reject (constants.voiceResponseStrings.ERROR_SUMMARIZING_CONTENT);
}
})
.catch (err => {
reject (constants.voiceResponseStrings.ERROR_FETCHING_CONTENT);
});
});
};
module.exports = {
getAnswerFromWP: function (sourceUrl, contentType) {
//This function will extract needed data from JSON, which we got from getDataFromWPJSON
switch (contentType) {
case constants.contentType.TITLES:
return getTitlesFromWP (sourceUrl);
break;
case constants.contentType.NEWS:
return getLatestNewsFromWP (sourceUrl);
break;
}
},
};

View File

@@ -76,30 +76,45 @@ module.exports = {
let answerPromiseProps = {
resolve: null,
reject: null
}
reject: null,
};
let answerPromise = new Promise ((resolve, reject) => {
answerPromiseProps = {
resolve:resolve,
reject:reject
}
resolve: resolve,
reject: reject,
};
});
switch (intent.answerType){
switch (intent.answerType) {
case constants.answerType.PREDEFINED:
answerPromiseProps.resolve(intent.answer);
answerPromiseProps.resolve (intent.answer);
break;
case constants.answerType.EXTERNAL_SOURCE_WP_TITLES:
predefinedSourceHelper.getAnswerFromWP(intent.externalAnswerSource).then(answer=>{
answerPromiseProps.resolve(answer);
}).catch(error=>{
answerPromiseProps.reject(error);
});
predefinedSourceHelper
.getAnswerFromWP (
intent.externalAnswerSource,
constants.contentType.TITLES
)
.then (answer => {
answerPromiseProps.resolve (answer);
})
.catch (error => {
answerPromiseProps.reject (error);
});
break;
case constants.answerType.EXTERNAL_SOURCE_WP_NEWS:
answer = 'Not implemented yet'
answerPromiseProps.resolve(answer);
predefinedSourceHelper
.getAnswerFromWP (
intent.externalAnswerSource,
constants.contentType.NEWS
)
.then (answer => {
answerPromiseProps.resolve (answer);
})
.catch (error => {
answerPromiseProps.reject (error);
});
break;
}

1240
backend/package-lock.json generated

File diff suppressed because it is too large Load Diff

View File

@@ -8,9 +8,10 @@
"body-parser": "^1.13.1",
"ejs": "^2.5.7",
"express": "^4.13.0",
"html-to-text": "^4.0.0",
"isomorphic-fetch": "^2.2.1",
"mongodb": "^2.2.33",
"nodejs-text-summarizer": "^2.0.3",
"nodejs-text-summarizer": "GotPPay/nodejs-text-summarizer",
"nodemailer": "^4.4.1",
"request": "^2.83.0",
"rss-parser": "^3.1.1"