clean text before summarization
This commit is contained in:
@@ -65,23 +65,24 @@ module.exports = {
|
|||||||
getDataFromWPJSON (sourceUrl, 1, postCount)
|
getDataFromWPJSON (sourceUrl, 1, postCount)
|
||||||
.then (rawData => {
|
.then (rawData => {
|
||||||
let result = '';
|
let result = '';
|
||||||
|
let htmlToTextOptions = {
|
||||||
|
wordwrap: false,
|
||||||
|
ignoreHref: true,
|
||||||
|
ignoreImage: true,
|
||||||
|
}
|
||||||
|
|
||||||
try {
|
try {
|
||||||
rawData.forEach (post => {
|
rawData.forEach (post => {
|
||||||
|
let textData = htmlToText.fromString(post.content.rendered,htmlToTextOptions);
|
||||||
result += includeTitle ? post.title.rendered : '';
|
result += includeTitle ? post.title.rendered : '';
|
||||||
result += includeTitle
|
result += includeTitle
|
||||||
? `<break time="${constants.voiceResponseTimings.PAUSE_BETWEEN_TITLE_AND_CONTENT}ms"/>`
|
? `<break time="${constants.voiceResponseTimings.PAUSE_BETWEEN_TITLE_AND_CONTENT}ms"/>`
|
||||||
: '';
|
: '';
|
||||||
result += summarizer (post.content.rendered);
|
result += summarizer (textData);
|
||||||
result += `<break time="${constants.voiceResponseTimings.PAUSE_BETWEEN_NEWS}ms"/>`;
|
result += `<break time="${constants.voiceResponseTimings.PAUSE_BETWEEN_NEWS}ms"/>`;
|
||||||
});
|
});
|
||||||
var textAnswer = htmlToText.fromString (result, {
|
|
||||||
wordwrap: false,
|
|
||||||
ignoreHref: true,
|
|
||||||
ignoreImage: true,
|
|
||||||
});
|
|
||||||
|
|
||||||
resolve (textAnswer);
|
resolve (result);
|
||||||
} catch (err) {
|
} catch (err) {
|
||||||
reject (constants.voiceResponseString.ERROR_SUMMARIZING_CONTENT);
|
reject (constants.voiceResponseString.ERROR_SUMMARIZING_CONTENT);
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user