Initial commit
This commit is contained in:
66
venv/lib/python3.7/site-packages/nltk/draw/dispersion.py
Normal file
66
venv/lib/python3.7/site-packages/nltk/draw/dispersion.py
Normal file
@@ -0,0 +1,66 @@
|
||||
# Natural Language Toolkit: Dispersion Plots
|
||||
#
|
||||
# Copyright (C) 2001-2019 NLTK Project
|
||||
# Author: Steven Bird <stevenbird1@gmail.com>
|
||||
# URL: <http://nltk.org/>
|
||||
# For license information, see LICENSE.TXT
|
||||
|
||||
"""
|
||||
A utility for displaying lexical dispersion.
|
||||
"""
|
||||
|
||||
|
||||
def dispersion_plot(text, words, ignore_case=False, title="Lexical Dispersion Plot"):
|
||||
"""
|
||||
Generate a lexical dispersion plot.
|
||||
|
||||
:param text: The source text
|
||||
:type text: list(str) or enum(str)
|
||||
:param words: The target words
|
||||
:type words: list of str
|
||||
:param ignore_case: flag to set if case should be ignored when searching text
|
||||
:type ignore_case: bool
|
||||
"""
|
||||
|
||||
try:
|
||||
from matplotlib import pylab
|
||||
except ImportError:
|
||||
raise ValueError(
|
||||
'The plot function requires matplotlib to be installed.'
|
||||
'See http://matplotlib.org/'
|
||||
)
|
||||
|
||||
text = list(text)
|
||||
words.reverse()
|
||||
|
||||
if ignore_case:
|
||||
words_to_comp = list(map(str.lower, words))
|
||||
text_to_comp = list(map(str.lower, text))
|
||||
else:
|
||||
words_to_comp = words
|
||||
text_to_comp = text
|
||||
|
||||
points = [
|
||||
(x, y)
|
||||
for x in range(len(text_to_comp))
|
||||
for y in range(len(words_to_comp))
|
||||
if text_to_comp[x] == words_to_comp[y]
|
||||
]
|
||||
if points:
|
||||
x, y = list(zip(*points))
|
||||
else:
|
||||
x = y = ()
|
||||
pylab.plot(x, y, "b|", scalex=0.1)
|
||||
pylab.yticks(list(range(len(words))), words, color="b")
|
||||
pylab.ylim(-1, len(words))
|
||||
pylab.title(title)
|
||||
pylab.xlabel("Word Offset")
|
||||
pylab.show()
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
import nltk.compat
|
||||
from nltk.corpus import gutenberg
|
||||
|
||||
words = ['Elinor', 'Marianne', 'Edward', 'Willoughby']
|
||||
dispersion_plot(gutenberg.words('austen-sense.txt'), words)
|
||||
Reference in New Issue
Block a user