Initial commit

2019-10-20 13:16:49 +02:00
commit 233066caf4
2099 changed files with 360824 additions and 0 deletions
--- a/venv/lib/python3.7/site-packages/nltk/draw/dispersion.py
+++ b/venv/lib/python3.7/site-packages/nltk/draw/dispersion.py
@@ -0,0 +1,66 @@
+# Natural Language Toolkit: Dispersion Plots
+#
+# Copyright (C) 2001-2019 NLTK Project
+# Author: Steven Bird <stevenbird1@gmail.com>
+# URL: <http://nltk.org/>
+# For license information, see LICENSE.TXT
+
+"""
+A utility for displaying lexical dispersion.
+"""
+
+
+def dispersion_plot(text, words, ignore_case=False, title="Lexical Dispersion Plot"):
+    """
+    Generate a lexical dispersion plot.
+
+    :param text: The source text
+    :type text: list(str) or enum(str)
+    :param words: The target words
+    :type words: list of str
+    :param ignore_case: flag to set if case should be ignored when searching text
+    :type ignore_case: bool
+    """
+
+    try:
+        from matplotlib import pylab
+    except ImportError:
+        raise ValueError(
+            'The plot function requires matplotlib to be installed.'
+            'See http://matplotlib.org/'
+        )
+
+    text = list(text)
+    words.reverse()
+
+    if ignore_case:
+        words_to_comp = list(map(str.lower, words))
+        text_to_comp = list(map(str.lower, text))
+    else:
+        words_to_comp = words
+        text_to_comp = text
+
+    points = [
+        (x, y)
+        for x in range(len(text_to_comp))
+        for y in range(len(words_to_comp))
+        if text_to_comp[x] == words_to_comp[y]
+    ]
+    if points:
+        x, y = list(zip(*points))
+    else:
+        x = y = ()
+    pylab.plot(x, y, "b|", scalex=0.1)
+    pylab.yticks(list(range(len(words))), words, color="b")
+    pylab.ylim(-1, len(words))
+    pylab.title(title)
+    pylab.xlabel("Word Offset")
+    pylab.show()
+
+
+if __name__ == '__main__':
+    import nltk.compat
+    from nltk.corpus import gutenberg
+
+    words = ['Elinor', 'Marianne', 'Edward', 'Willoughby']
+    dispersion_plot(gutenberg.words('austen-sense.txt'), words)