Initial commit

2019-10-20 13:16:49 +02:00
commit 233066caf4
2099 changed files with 360824 additions and 0 deletions
--- a/venv/lib/python3.7/site-packages/nltk/test/unit/init.py
+++ b/venv/lib/python3.7/site-packages/nltk/test/unit/init.py
--- a/venv/lib/python3.7/site-packages/nltk/test/unit/pycache/init.cpython-37.pyc
+++ b/venv/lib/python3.7/site-packages/nltk/test/unit/pycache/init.cpython-37.pyc
--- a/venv/lib/python3.7/site-packages/nltk/test/unit/pycache/test_2x_compat.cpython-37.pyc
+++ b/venv/lib/python3.7/site-packages/nltk/test/unit/pycache/test_2x_compat.cpython-37.pyc
--- a/venv/lib/python3.7/site-packages/nltk/test/unit/pycache/test_aline.cpython-37.pyc
+++ b/venv/lib/python3.7/site-packages/nltk/test/unit/pycache/test_aline.cpython-37.pyc
--- a/venv/lib/python3.7/site-packages/nltk/test/unit/pycache/test_brill.cpython-37.pyc
+++ b/venv/lib/python3.7/site-packages/nltk/test/unit/pycache/test_brill.cpython-37.pyc
--- a/venv/lib/python3.7/site-packages/nltk/test/unit/pycache/test_cfd_mutation.cpython-37.pyc
+++ b/venv/lib/python3.7/site-packages/nltk/test/unit/pycache/test_cfd_mutation.cpython-37.pyc
--- a/venv/lib/python3.7/site-packages/nltk/test/unit/pycache/test_cfg2chomsky.cpython-37.pyc
+++ b/venv/lib/python3.7/site-packages/nltk/test/unit/pycache/test_cfg2chomsky.cpython-37.pyc
--- a/venv/lib/python3.7/site-packages/nltk/test/unit/pycache/test_chunk.cpython-37.pyc
+++ b/venv/lib/python3.7/site-packages/nltk/test/unit/pycache/test_chunk.cpython-37.pyc
--- a/venv/lib/python3.7/site-packages/nltk/test/unit/pycache/test_classify.cpython-37.pyc
+++ b/venv/lib/python3.7/site-packages/nltk/test/unit/pycache/test_classify.cpython-37.pyc
--- a/venv/lib/python3.7/site-packages/nltk/test/unit/pycache/test_collocations.cpython-37.pyc
+++ b/venv/lib/python3.7/site-packages/nltk/test/unit/pycache/test_collocations.cpython-37.pyc
--- a/venv/lib/python3.7/site-packages/nltk/test/unit/pycache/test_concordance.cpython-37.pyc
+++ b/venv/lib/python3.7/site-packages/nltk/test/unit/pycache/test_concordance.cpython-37.pyc
--- a/venv/lib/python3.7/site-packages/nltk/test/unit/pycache/test_corenlp.cpython-37.pyc
+++ b/venv/lib/python3.7/site-packages/nltk/test/unit/pycache/test_corenlp.cpython-37.pyc
--- a/venv/lib/python3.7/site-packages/nltk/test/unit/pycache/test_corpora.cpython-37.pyc
+++ b/venv/lib/python3.7/site-packages/nltk/test/unit/pycache/test_corpora.cpython-37.pyc
--- a/venv/lib/python3.7/site-packages/nltk/test/unit/pycache/test_corpus_views.cpython-37.pyc
+++ b/venv/lib/python3.7/site-packages/nltk/test/unit/pycache/test_corpus_views.cpython-37.pyc
--- a/venv/lib/python3.7/site-packages/nltk/test/unit/pycache/test_data.cpython-37.pyc
+++ b/venv/lib/python3.7/site-packages/nltk/test/unit/pycache/test_data.cpython-37.pyc
--- a/venv/lib/python3.7/site-packages/nltk/test/unit/pycache/test_disagreement.cpython-37.pyc
+++ b/venv/lib/python3.7/site-packages/nltk/test/unit/pycache/test_disagreement.cpython-37.pyc
--- a/venv/lib/python3.7/site-packages/nltk/test/unit/pycache/test_hmm.cpython-37.pyc
+++ b/venv/lib/python3.7/site-packages/nltk/test/unit/pycache/test_hmm.cpython-37.pyc
--- a/venv/lib/python3.7/site-packages/nltk/test/unit/pycache/test_json2csv_corpus.cpython-37.pyc
+++ b/venv/lib/python3.7/site-packages/nltk/test/unit/pycache/test_json2csv_corpus.cpython-37.pyc
--- a/venv/lib/python3.7/site-packages/nltk/test/unit/pycache/test_naivebayes.cpython-37.pyc
+++ b/venv/lib/python3.7/site-packages/nltk/test/unit/pycache/test_naivebayes.cpython-37.pyc
--- a/venv/lib/python3.7/site-packages/nltk/test/unit/pycache/test_nombank.cpython-37.pyc
+++ b/venv/lib/python3.7/site-packages/nltk/test/unit/pycache/test_nombank.cpython-37.pyc
--- a/venv/lib/python3.7/site-packages/nltk/test/unit/pycache/test_pos_tag.cpython-37.pyc
+++ b/venv/lib/python3.7/site-packages/nltk/test/unit/pycache/test_pos_tag.cpython-37.pyc
--- a/venv/lib/python3.7/site-packages/nltk/test/unit/pycache/test_rte_classify.cpython-37.pyc
+++ b/venv/lib/python3.7/site-packages/nltk/test/unit/pycache/test_rte_classify.cpython-37.pyc
--- a/venv/lib/python3.7/site-packages/nltk/test/unit/pycache/test_seekable_unicode_stream_reader.cpython-37.pyc
+++ b/venv/lib/python3.7/site-packages/nltk/test/unit/pycache/test_seekable_unicode_stream_reader.cpython-37.pyc
--- a/venv/lib/python3.7/site-packages/nltk/test/unit/pycache/test_senna.cpython-37.pyc
+++ b/venv/lib/python3.7/site-packages/nltk/test/unit/pycache/test_senna.cpython-37.pyc
--- a/venv/lib/python3.7/site-packages/nltk/test/unit/pycache/test_stem.cpython-37.pyc
+++ b/venv/lib/python3.7/site-packages/nltk/test/unit/pycache/test_stem.cpython-37.pyc
--- a/venv/lib/python3.7/site-packages/nltk/test/unit/pycache/test_tag.cpython-37.pyc
+++ b/venv/lib/python3.7/site-packages/nltk/test/unit/pycache/test_tag.cpython-37.pyc
--- a/venv/lib/python3.7/site-packages/nltk/test/unit/pycache/test_tgrep.cpython-37.pyc
+++ b/venv/lib/python3.7/site-packages/nltk/test/unit/pycache/test_tgrep.cpython-37.pyc
--- a/venv/lib/python3.7/site-packages/nltk/test/unit/pycache/test_tokenize.cpython-37.pyc
+++ b/venv/lib/python3.7/site-packages/nltk/test/unit/pycache/test_tokenize.cpython-37.pyc
--- a/venv/lib/python3.7/site-packages/nltk/test/unit/pycache/test_twitter_auth.cpython-37.pyc
+++ b/venv/lib/python3.7/site-packages/nltk/test/unit/pycache/test_twitter_auth.cpython-37.pyc
--- a/venv/lib/python3.7/site-packages/nltk/test/unit/pycache/test_wordnet.cpython-37.pyc
+++ b/venv/lib/python3.7/site-packages/nltk/test/unit/pycache/test_wordnet.cpython-37.pyc
--- a/venv/lib/python3.7/site-packages/nltk/test/unit/pycache/utils.cpython-37.pyc
+++ b/venv/lib/python3.7/site-packages/nltk/test/unit/pycache/utils.cpython-37.pyc
--- a/venv/lib/python3.7/site-packages/nltk/test/unit/lm/init.py
+++ b/venv/lib/python3.7/site-packages/nltk/test/unit/lm/init.py
--- a/venv/lib/python3.7/site-packages/nltk/test/unit/lm/pycache/init.cpython-37.pyc
+++ b/venv/lib/python3.7/site-packages/nltk/test/unit/lm/pycache/init.cpython-37.pyc
--- a/venv/lib/python3.7/site-packages/nltk/test/unit/lm/pycache/test_counter.cpython-37.pyc
+++ b/venv/lib/python3.7/site-packages/nltk/test/unit/lm/pycache/test_counter.cpython-37.pyc
--- a/venv/lib/python3.7/site-packages/nltk/test/unit/lm/pycache/test_models.cpython-37.pyc
+++ b/venv/lib/python3.7/site-packages/nltk/test/unit/lm/pycache/test_models.cpython-37.pyc
--- a/venv/lib/python3.7/site-packages/nltk/test/unit/lm/pycache/test_preprocessing.cpython-37.pyc
+++ b/venv/lib/python3.7/site-packages/nltk/test/unit/lm/pycache/test_preprocessing.cpython-37.pyc
--- a/venv/lib/python3.7/site-packages/nltk/test/unit/lm/pycache/test_vocabulary.cpython-37.pyc
+++ b/venv/lib/python3.7/site-packages/nltk/test/unit/lm/pycache/test_vocabulary.cpython-37.pyc
--- a/venv/lib/python3.7/site-packages/nltk/test/unit/lm/test_counter.py
+++ b/venv/lib/python3.7/site-packages/nltk/test/unit/lm/test_counter.py
@@ -0,0 +1,135 @@
+# -*- coding: utf-8 -*-
+# Natural Language Toolkit: Language Model Unit Tests
+#
+# Copyright (C) 2001-2019 NLTK Project
+# Author: Ilia Kurenkov <ilia.kurenkov@gmail.com>
+# URL: <http://nltk.org/>
+# For license information, see LICENSE.TXT
+
+import unittest
+
+import six
+
+from nltk import FreqDist
+from nltk.lm import NgramCounter
+from nltk.util import everygrams
+
+
+class NgramCounterTests(unittest.TestCase):
+    """Tests for NgramCounter that only involve lookup, no modification."""
+
+    @classmethod
+    def setUpClass(cls):
+
+        text = [list("abcd"), list("egdbe")]
+        cls.trigram_counter = NgramCounter(
+            (everygrams(sent, max_len=3) for sent in text)
+        )
+        cls.bigram_counter = NgramCounter(
+            (everygrams(sent, max_len=2) for sent in text)
+        )
+
+    def test_N(self):
+        self.assertEqual(self.bigram_counter.N(), 16)
+        self.assertEqual(self.trigram_counter.N(), 21)
+
+    def test_counter_len_changes_with_lookup(self):
+        self.assertEqual(len(self.bigram_counter), 2)
+        _ = self.bigram_counter[50]
+        self.assertEqual(len(self.bigram_counter), 3)
+
+    def test_ngram_order_access_unigrams(self):
+        self.assertEqual(self.bigram_counter[1], self.bigram_counter.unigrams)
+
+    def test_ngram_conditional_freqdist(self):
+        expected_trigram_contexts = [
+            ("a", "b"),
+            ("b", "c"),
+            ("e", "g"),
+            ("g", "d"),
+            ("d", "b"),
+        ]
+        expected_bigram_contexts = [("a",), ("b",), ("d",), ("e",), ("c",), ("g",)]
+
+        bigrams = self.trigram_counter[2]
+        trigrams = self.trigram_counter[3]
+
+        six.assertCountEqual(self, expected_bigram_contexts, bigrams.conditions())
+        six.assertCountEqual(self, expected_trigram_contexts, trigrams.conditions())
+
+    def test_bigram_counts_seen_ngrams(self):
+        b_given_a_count = 1
+        unk_given_b_count = 1
+
+        self.assertEqual(b_given_a_count, self.bigram_counter[["a"]]["b"])
+        self.assertEqual(unk_given_b_count, self.bigram_counter[["b"]]["c"])
+
+    def test_bigram_counts_unseen_ngrams(self):
+        z_given_b_count = 0
+
+        self.assertEqual(z_given_b_count, self.bigram_counter[["b"]]["z"])
+
+    def test_unigram_counts_seen_words(self):
+        expected_count_b = 2
+
+        self.assertEqual(expected_count_b, self.bigram_counter["b"])
+
+    def test_unigram_counts_completely_unseen_words(self):
+        unseen_count = 0
+
+        self.assertEqual(unseen_count, self.bigram_counter["z"])
+
+
+class NgramCounterTrainingTests(unittest.TestCase):
+    def setUp(self):
+        self.counter = NgramCounter()
+
+    def test_empty_string(self):
+        test = NgramCounter("")
+        self.assertNotIn(2, test)
+        self.assertEqual(test[1], FreqDist())
+
+    def test_empty_list(self):
+        test = NgramCounter([])
+        self.assertNotIn(2, test)
+        self.assertEqual(test[1], FreqDist())
+
+    def test_None(self):
+        test = NgramCounter(None)
+        self.assertNotIn(2, test)
+        self.assertEqual(test[1], FreqDist())
+
+    def test_train_on_unigrams(self):
+        words = list("abcd")
+        counter = NgramCounter([[(w,) for w in words]])
+
+        self.assertFalse(bool(counter[3]))
+        self.assertFalse(bool(counter[2]))
+        six.assertCountEqual(self, words, counter[1].keys())
+
+    def test_train_on_illegal_sentences(self):
+        str_sent = ["Check", "this", "out", "!"]
+        list_sent = [["Check", "this"], ["this", "out"], ["out", "!"]]
+
+        with self.assertRaises(TypeError):
+            NgramCounter([str_sent])
+
+        with self.assertRaises(TypeError):
+            NgramCounter([list_sent])
+
+    def test_train_on_bigrams(self):
+        bigram_sent = [("a", "b"), ("c", "d")]
+        counter = NgramCounter([bigram_sent])
+
+        self.assertFalse(bool(counter[3]))
+
+    def test_train_on_mix(self):
+        mixed_sent = [("a", "b"), ("c", "d"), ("e", "f", "g"), ("h",)]
+        counter = NgramCounter([mixed_sent])
+        unigrams = ["h"]
+        bigram_contexts = [("a",), ("c",)]
+        trigram_contexts = [("e", "f")]
+
+        six.assertCountEqual(self, unigrams, counter[1].keys())
+        six.assertCountEqual(self, bigram_contexts, counter[2].keys())
+        six.assertCountEqual(self, trigram_contexts, counter[3].keys())
--- a/venv/lib/python3.7/site-packages/nltk/test/unit/lm/test_models.py
+++ b/venv/lib/python3.7/site-packages/nltk/test/unit/lm/test_models.py
@@ -0,0 +1,446 @@
+# -*- coding: utf-8 -*-
+# Natural Language Toolkit: Language Model Unit Tests
+#
+# Copyright (C) 2001-2019 NLTK Project
+# Author: Ilia Kurenkov <ilia.kurenkov@gmail.com>
+# URL: <http://nltk.org/>
+# For license information, see LICENSE.TXT
+
+from __future__ import division
+
+import math
+import sys
+import unittest
+
+from six import add_metaclass
+
+from nltk.lm import (
+    Vocabulary,
+    MLE,
+    Lidstone,
+    Laplace,
+    WittenBellInterpolated,
+    KneserNeyInterpolated,
+)
+from nltk.lm.preprocessing import padded_everygrams
+
+
+def _prepare_test_data(ngram_order):
+    return (
+        Vocabulary(["a", "b", "c", "d", "z", "<s>", "</s>"], unk_cutoff=1),
+        [
+            list(padded_everygrams(ngram_order, sent))
+            for sent in (list("abcd"), list("egadbe"))
+        ],
+    )
+
+
+class ParametrizeTestsMeta(type):
+    """Metaclass for generating parametrized tests."""
+
+    def __new__(cls, name, bases, dct):
+        contexts = (
+            ("a",),
+            ("c",),
+            (u"<s>",),
+            ("b",),
+            (u"<UNK>",),
+            ("d",),
+            ("e",),
+            ("r",),
+            ("w",),
+        )
+        for i, c in enumerate(contexts):
+            dct["test_sumto1_{0}".format(i)] = cls.add_sum_to_1_test(c)
+        scores = dct.get("score_tests", [])
+        for i, (word, context, expected_score) in enumerate(scores):
+            dct["test_score_{0}".format(i)] = cls.add_score_test(
+                word, context, expected_score
+            )
+        return super(ParametrizeTestsMeta, cls).__new__(cls, name, bases, dct)
+
+    @classmethod
+    def add_score_test(cls, word, context, expected_score):
+        if sys.version_info > (3, 5):
+            message = "word='{word}', context={context}"
+        else:
+            # Python 2 doesn't report the mismatched values if we pass a custom
+            # message, so we have to report them manually.
+            message = (
+                "{score} != {expected_score} within 4 places, "
+                "word='{word}', context={context}"
+            )
+
+        def test_method(self):
+            score = self.model.score(word, context)
+            self.assertAlmostEqual(
+                score, expected_score, msg=message.format(**locals()), places=4
+            )
+
+        return test_method
+
+    @classmethod
+    def add_sum_to_1_test(cls, context):
+        def test(self):
+            s = sum(self.model.score(w, context) for w in self.model.vocab)
+            self.assertAlmostEqual(s, 1.0, msg="The context is {}".format(context))
+
+        return test
+
+
+@add_metaclass(ParametrizeTestsMeta)
+class MleBigramTests(unittest.TestCase):
+    """unit tests for MLENgramModel class"""
+
+    score_tests = [
+        ("d", ["c"], 1),
+        # Unseen ngrams should yield 0
+        ("d", ["e"], 0),
+        # Unigrams should also be 0
+        ("z", None, 0),
+        # N unigrams = 14
+        # count('a') = 2
+        ("a", None, 2.0 / 14),
+        # count('y') = 3
+        ("y", None, 3.0 / 14),
+    ]
+
+    def setUp(self):
+        vocab, training_text = _prepare_test_data(2)
+        self.model = MLE(2, vocabulary=vocab)
+        self.model.fit(training_text)
+
+    def test_logscore_zero_score(self):
+        # logscore of unseen ngrams should be -inf
+        logscore = self.model.logscore("d", ["e"])
+
+        self.assertTrue(math.isinf(logscore))
+
+    def test_entropy_perplexity_seen(self):
+        # ngrams seen during training
+        trained = [
+            ("<s>", "a"),
+            ("a", "b"),
+            ("b", "<UNK>"),
+            ("<UNK>", "a"),
+            ("a", "d"),
+            ("d", "</s>"),
+        ]
+        # Ngram = Log score
+        # <s>, a    = -1
+        # a, b      = -1
+        # b, UNK    = -1
+        # UNK, a    = -1.585
+        # a, d      = -1
+        # d, </s>   = -1
+        # TOTAL logscores   = -6.585
+        # - AVG logscores   = 1.0975
+        H = 1.0975
+        perplexity = 2.1398
+
+        self.assertAlmostEqual(H, self.model.entropy(trained), places=4)
+        self.assertAlmostEqual(perplexity, self.model.perplexity(trained), places=4)
+
+    def test_entropy_perplexity_unseen(self):
+        # In MLE, even one unseen ngram should make entropy and perplexity infinite
+        untrained = [("<s>", "a"), ("a", "c"), ("c", "d"), ("d", "</s>")]
+
+        self.assertTrue(math.isinf(self.model.entropy(untrained)))
+        self.assertTrue(math.isinf(self.model.perplexity(untrained)))
+
+    def test_entropy_perplexity_unigrams(self):
+        # word = score, log score
+        # <s>   = 0.1429, -2.8074
+        # a     = 0.1429, -2.8074
+        # c     = 0.0714, -3.8073
+        # UNK   = 0.2143, -2.2224
+        # d     = 0.1429, -2.8074
+        # c     = 0.0714, -3.8073
+        # </s>  = 0.1429, -2.8074
+        # TOTAL logscores = -21.6243
+        # - AVG logscores = 3.0095
+        H = 3.0095
+        perplexity = 8.0529
+
+        text = [("<s>",), ("a",), ("c",), ("-",), ("d",), ("c",), ("</s>",)]
+
+        self.assertAlmostEqual(H, self.model.entropy(text), places=4)
+        self.assertAlmostEqual(perplexity, self.model.perplexity(text), places=4)
+
+
+@add_metaclass(ParametrizeTestsMeta)
+class MleTrigramTests(unittest.TestCase):
+    """MLE trigram model tests"""
+
+    score_tests = [
+        # count(d | b, c) = 1
+        # count(b, c) = 1
+        ("d", ("b", "c"), 1),
+        # count(d | c) = 1
+        # count(c) = 1
+        ("d", ["c"], 1),
+        # total number of tokens is 18, of which "a" occured 2 times
+        ("a", None, 2.0 / 18),
+        # in vocabulary but unseen
+        ("z", None, 0),
+        # out of vocabulary should use "UNK" score
+        ("y", None, 3.0 / 18),
+    ]
+
+    def setUp(self):
+        vocab, training_text = _prepare_test_data(3)
+        self.model = MLE(3, vocabulary=vocab)
+        self.model.fit(training_text)
+
+
+@add_metaclass(ParametrizeTestsMeta)
+class LidstoneBigramTests(unittest.TestCase):
+    """unit tests for Lidstone class"""
+
+    score_tests = [
+        # count(d | c) = 1
+        # *count(d | c) = 1.1
+        # Count(w | c for w in vocab) = 1
+        # *Count(w | c for w in vocab) = 1.8
+        ("d", ["c"], 1.1 / 1.8),
+        # Total unigrams: 14
+        # Vocab size: 8
+        # Denominator: 14 + 0.8 = 14.8
+        # count("a") = 2
+        # *count("a") = 2.1
+        ("a", None, 2.1 / 14.8),
+        # in vocabulary but unseen
+        # count("z") = 0
+        # *count("z") = 0.1
+        ("z", None, 0.1 / 14.8),
+        # out of vocabulary should use "UNK" score
+        # count("<UNK>") = 3
+        # *count("<UNK>") = 3.1
+        ("y", None, 3.1 / 14.8),
+    ]
+
+    def setUp(self):
+        vocab, training_text = _prepare_test_data(2)
+        self.model = Lidstone(0.1, 2, vocabulary=vocab)
+        self.model.fit(training_text)
+
+    def test_gamma(self):
+        self.assertEqual(0.1, self.model.gamma)
+
+    def test_entropy_perplexity(self):
+        text = [
+            ("<s>", "a"),
+            ("a", "c"),
+            ("c", "<UNK>"),
+            ("<UNK>", "d"),
+            ("d", "c"),
+            ("c", "</s>"),
+        ]
+        # Unlike MLE this should be able to handle completely novel ngrams
+        # Ngram = score, log score
+        # <s>, a    = 0.3929, -1.3479
+        # a, c      = 0.0357, -4.8074
+        # c, UNK    = 0.0(5), -4.1699
+        # UNK, d    = 0.0263,  -5.2479
+        # d, c      = 0.0357, -4.8074
+        # c, </s>   = 0.0(5), -4.1699
+        # TOTAL logscore: −24.5504
+        # - AVG logscore: 4.0917
+        H = 4.0917
+        perplexity = 17.0504
+        self.assertAlmostEqual(H, self.model.entropy(text), places=4)
+        self.assertAlmostEqual(perplexity, self.model.perplexity(text), places=4)
+
+
+@add_metaclass(ParametrizeTestsMeta)
+class LidstoneTrigramTests(unittest.TestCase):
+    score_tests = [
+        # Logic behind this is the same as for bigram model
+        ("d", ["c"], 1.1 / 1.8),
+        # if we choose a word that hasn't appeared after (b, c)
+        ("e", ["c"], 0.1 / 1.8),
+        # Trigram score now
+        ("d", ["b", "c"], 1.1 / 1.8),
+        ("e", ["b", "c"], 0.1 / 1.8),
+    ]
+
+    def setUp(self):
+        vocab, training_text = _prepare_test_data(3)
+        self.model = Lidstone(0.1, 3, vocabulary=vocab)
+        self.model.fit(training_text)
+
+
+@add_metaclass(ParametrizeTestsMeta)
+class LaplaceBigramTests(unittest.TestCase):
+    """unit tests for Laplace class"""
+
+    score_tests = [
+        # basic sanity-check:
+        # count(d | c) = 1
+        # *count(d | c) = 2
+        # Count(w | c for w in vocab) = 1
+        # *Count(w | c for w in vocab) = 9
+        ("d", ["c"], 2.0 / 9),
+        # Total unigrams: 14
+        # Vocab size: 8
+        # Denominator: 14 + 8 = 22
+        # count("a") = 2
+        # *count("a") = 3
+        ("a", None, 3.0 / 22),
+        # in vocabulary but unseen
+        # count("z") = 0
+        # *count("z") = 1
+        ("z", None, 1.0 / 22),
+        # out of vocabulary should use "UNK" score
+        # count("<UNK>") = 3
+        # *count("<UNK>") = 4
+        ("y", None, 4.0 / 22),
+    ]
+
+    def setUp(self):
+        vocab, training_text = _prepare_test_data(2)
+        self.model = Laplace(2, vocabulary=vocab)
+        self.model.fit(training_text)
+
+    def test_gamma(self):
+        # Make sure the gamma is set to 1
+        self.assertEqual(1, self.model.gamma)
+
+    def test_entropy_perplexity(self):
+        text = [
+            ("<s>", "a"),
+            ("a", "c"),
+            ("c", "<UNK>"),
+            ("<UNK>", "d"),
+            ("d", "c"),
+            ("c", "</s>"),
+        ]
+        # Unlike MLE this should be able to handle completely novel ngrams
+        # Ngram = score, log score
+        # <s>, a    = 0.2, -2.3219
+        # a, c      = 0.1, -3.3219
+        # c, UNK    = 0.(1), -3.1699
+        # UNK, d    = 0.(09), 3.4594
+        # d, c      = 0.1 -3.3219
+        # c, </s>   = 0.(1), -3.1699
+        # Total logscores: −18.7651
+        # - AVG logscores: 3.1275
+        H = 3.1275
+        perplexity = 8.7393
+        self.assertAlmostEqual(H, self.model.entropy(text), places=4)
+        self.assertAlmostEqual(perplexity, self.model.perplexity(text), places=4)
+
+
+@add_metaclass(ParametrizeTestsMeta)
+class WittenBellInterpolatedTrigramTests(unittest.TestCase):
+    def setUp(self):
+        vocab, training_text = _prepare_test_data(3)
+        self.model = WittenBellInterpolated(3, vocabulary=vocab)
+        self.model.fit(training_text)
+
+    score_tests = [
+        # For unigram scores by default revert to MLE
+        # Total unigrams: 18
+        # count('c'): 1
+        ("c", None, 1.0 / 18),
+        # in vocabulary but unseen
+        # count("z") = 0
+        ("z", None, 0.0 / 18),
+        # out of vocabulary should use "UNK" score
+        # count("<UNK>") = 3
+        ("y", None, 3.0 / 18),
+        # gamma(['b']) = 0.1111
+        # mle.score('c', ['b']) = 0.5
+        # (1 - gamma) * mle + gamma * mle('c') ~= 0.45 + .3 / 18
+        ("c", ["b"], (1 - 0.1111) * 0.5 + 0.1111 * 1 / 18),
+        # building on that, let's try 'a b c' as the trigram
+        # gamma(['a', 'b']) = 0.0667
+        # mle("c", ["a", "b"]) = 1
+        ("c", ["a", "b"], (1 - 0.0667) + 0.0667 * ((1 - 0.1111) * 0.5 + 0.1111 / 18)),
+    ]
+
+
+@add_metaclass(ParametrizeTestsMeta)
+class KneserNeyInterpolatedTrigramTests(unittest.TestCase):
+    def setUp(self):
+        vocab, training_text = _prepare_test_data(3)
+        self.model = KneserNeyInterpolated(3, vocabulary=vocab)
+        self.model.fit(training_text)
+
+    score_tests = [
+        # For unigram scores revert to uniform
+        # Vocab size: 8
+        # count('c'): 1
+        ("c", None, 1.0 / 8),
+        # in vocabulary but unseen, still uses uniform
+        ("z", None, 1 / 8),
+        # out of vocabulary should use "UNK" score, i.e. again uniform
+        ("y", None, 1.0 / 8),
+        # alpha = count('bc') - discount = 1 - 0.1 = 0.9
+        # gamma(['b']) = discount * number of unique words that follow ['b'] = 0.1 * 2
+        # normalizer = total number of bigrams with this context = 2
+        # the final should be: (alpha + gamma * unigram_score("c"))
+        ("c", ["b"], (0.9 + 0.2 * (1 / 8)) / 2),
+        # building on that, let's try 'a b c' as the trigram
+        # alpha = count('abc') - discount = 1 - 0.1 = 0.9
+        # gamma(['a', 'b']) = 0.1 * 1
+        # normalizer = total number of trigrams with prefix "ab" = 1 => we can ignore it!
+        ("c", ["a", "b"], 0.9 + 0.1 * ((0.9 + 0.2 * (1 / 8)) / 2)),
+    ]
+
+
+class NgramModelTextGenerationTests(unittest.TestCase):
+    """Using MLE estimator, generate some text."""
+
+    def setUp(self):
+        vocab, training_text = _prepare_test_data(3)
+        self.model = MLE(3, vocabulary=vocab)
+        self.model.fit(training_text)
+
+    def test_generate_one_no_context(self):
+        self.assertEqual(self.model.generate(random_seed=3), "<UNK>")
+
+    def test_generate_one_limiting_context(self):
+        # We don't need random_seed for contexts with only one continuation
+        self.assertEqual(self.model.generate(text_seed=["c"]), "d")
+        self.assertEqual(self.model.generate(text_seed=["b", "c"]), "d")
+        self.assertEqual(self.model.generate(text_seed=["a", "c"]), "d")
+
+    def test_generate_one_varied_context(self):
+        # When context doesn't limit our options enough, seed the random choice
+        self.assertEqual(
+            self.model.generate(text_seed=("a", "<s>"), random_seed=2), "a"
+        )
+
+    def test_generate_cycle(self):
+        # Add a cycle to the model: bd -> b, db -> d
+        more_training_text = [list(padded_everygrams(self.model.order, list("bdbdbd")))]
+        self.model.fit(more_training_text)
+        # Test that we can escape the cycle
+        self.assertEqual(
+            self.model.generate(7, text_seed=("b", "d"), random_seed=5),
+            ["b", "d", "b", "d", "b", "d", "</s>"],
+        )
+
+    def test_generate_with_text_seed(self):
+        self.assertEqual(
+            self.model.generate(5, text_seed=("<s>", "e"), random_seed=3),
+            ["<UNK>", "a", "d", "b", "<UNK>"],
+        )
+
+    def test_generate_oov_text_seed(self):
+        self.assertEqual(
+            self.model.generate(text_seed=("aliens",), random_seed=3),
+            self.model.generate(text_seed=("<UNK>",), random_seed=3),
+        )
+
+    def test_generate_None_text_seed(self):
+        # should crash with type error when we try to look it up in vocabulary
+        with self.assertRaises(TypeError):
+            self.model.generate(text_seed=(None,))
+
+        # This will work
+        self.assertEqual(
+            self.model.generate(text_seed=None, random_seed=3),
+            self.model.generate(random_seed=3),
+        )
--- a/venv/lib/python3.7/site-packages/nltk/test/unit/lm/test_preprocessing.py
+++ b/venv/lib/python3.7/site-packages/nltk/test/unit/lm/test_preprocessing.py
@@ -0,0 +1,31 @@
+# -*- coding: utf-8 -*-
+# Natural Language Toolkit: Language Model Unit Tests
+#
+# Copyright (C) 2001-2019 NLTK Project
+# Author: Ilia Kurenkov <ilia.kurenkov@gmail.com>
+# URL: <http://nltk.org/>
+# For license information, see LICENSE.TXT
+import unittest
+
+from nltk.lm.preprocessing import padded_everygram_pipeline
+
+
+class TestPreprocessing(unittest.TestCase):
+    def test_padded_everygram_pipeline(self):
+        expected_train = [
+            [
+                ("<s>",),
+                ("a",),
+                ("b",),
+                ("c",),
+                ("</s>",),
+                ("<s>", "a"),
+                ("a", "b"),
+                ("b", "c"),
+                ("c", "</s>"),
+            ]
+        ]
+        expected_vocab = ["<s>", "a", "b", "c", "</s>"]
+        train_data, vocab_data = padded_everygram_pipeline(2, [["a", "b", "c"]])
+        self.assertEqual([list(sent) for sent in train_data], expected_train)
+        self.assertEqual(list(vocab_data), expected_vocab)
--- a/venv/lib/python3.7/site-packages/nltk/test/unit/lm/test_vocabulary.py
+++ b/venv/lib/python3.7/site-packages/nltk/test/unit/lm/test_vocabulary.py
@@ -0,0 +1,141 @@
+# -*- coding: utf-8 -*-
+# Natural Language Toolkit: Language Model Unit Tests
+#
+# Copyright (C) 2001-2019 NLTK Project
+# Author: Ilia Kurenkov <ilia.kurenkov@gmail.com>
+# URL: <http://nltk.org/>
+# For license information, see LICENSE.TXT
+
+import unittest
+from collections import Counter
+
+import six
+from nltk.lm import Vocabulary
+
+
+class NgramModelVocabularyTests(unittest.TestCase):
+    """tests Vocabulary Class"""
+
+    @classmethod
+    def setUpClass(cls):
+        cls.vocab = Vocabulary(
+            ["z", "a", "b", "c", "f", "d", "e", "g", "a", "d", "b", "e", "w"],
+            unk_cutoff=2,
+        )
+
+    def test_truthiness(self):
+        self.assertTrue(self.vocab)
+
+    def test_cutoff_value_set_correctly(self):
+        self.assertEqual(self.vocab.cutoff, 2)
+
+    def test_unable_to_change_cutoff(self):
+        with self.assertRaises(AttributeError):
+            self.vocab.cutoff = 3
+
+    def test_cutoff_setter_checks_value(self):
+        with self.assertRaises(ValueError) as exc_info:
+            Vocabulary("abc", unk_cutoff=0)
+        expected_error_msg = "Cutoff value cannot be less than 1. Got: 0"
+        self.assertEqual(expected_error_msg, str(exc_info.exception))
+
+    def test_counts_set_correctly(self):
+        self.assertEqual(self.vocab.counts["a"], 2)
+        self.assertEqual(self.vocab.counts["b"], 2)
+        self.assertEqual(self.vocab.counts["c"], 1)
+
+    def test_membership_check_respects_cutoff(self):
+        # a was seen 2 times, so it should be considered part of the vocabulary
+        self.assertTrue("a" in self.vocab)
+        # "c" was seen once, it shouldn't be considered part of the vocab
+        self.assertFalse("c" in self.vocab)
+        # "z" was never seen at all, also shouldn't be considered in the vocab
+        self.assertFalse("z" in self.vocab)
+
+    def test_vocab_len_respects_cutoff(self):
+        # Vocab size is the number of unique tokens that occur at least as often
+        # as the cutoff value, plus 1 to account for unknown words.
+        self.assertEqual(5, len(self.vocab))
+
+    def test_vocab_iter_respects_cutoff(self):
+        vocab_counts = ["a", "b", "c", "d", "e", "f", "g", "w", "z"]
+        vocab_items = ["a", "b", "d", "e", "<UNK>"]
+
+        six.assertCountEqual(self, vocab_counts, list(self.vocab.counts.keys()))
+        six.assertCountEqual(self, vocab_items, list(self.vocab))
+
+    def test_update_empty_vocab(self):
+        empty = Vocabulary(unk_cutoff=2)
+        self.assertEqual(len(empty), 0)
+        self.assertFalse(empty)
+        self.assertIn(empty.unk_label, empty)
+
+        empty.update(list("abcde"))
+        self.assertIn(empty.unk_label, empty)
+
+    def test_lookup(self):
+        self.assertEqual(self.vocab.lookup("a"), "a")
+        self.assertEqual(self.vocab.lookup("c"), "<UNK>")
+
+    def test_lookup_iterables(self):
+        self.assertEqual(self.vocab.lookup(["a", "b"]), ("a", "b"))
+        self.assertEqual(self.vocab.lookup(("a", "b")), ("a", "b"))
+        self.assertEqual(self.vocab.lookup(("a", "c")), ("a", "<UNK>"))
+        self.assertEqual(
+            self.vocab.lookup(map(str, range(3))), ("<UNK>", "<UNK>", "<UNK>")
+        )
+
+    def test_lookup_empty_iterables(self):
+        self.assertEqual(self.vocab.lookup(()), ())
+        self.assertEqual(self.vocab.lookup([]), ())
+        self.assertEqual(self.vocab.lookup(iter([])), ())
+        self.assertEqual(self.vocab.lookup(n for n in range(0, 0)), ())
+
+    def test_lookup_recursive(self):
+        self.assertEqual(
+            self.vocab.lookup([["a", "b"], ["a", "c"]]), (("a", "b"), ("a", "<UNK>"))
+        )
+        self.assertEqual(self.vocab.lookup([["a", "b"], "c"]), (("a", "b"), "<UNK>"))
+        self.assertEqual(self.vocab.lookup([[[[["a", "b"]]]]]), ((((("a", "b"),),),),))
+
+    def test_lookup_None(self):
+        with self.assertRaises(TypeError):
+            self.vocab.lookup(None)
+        with self.assertRaises(TypeError):
+            list(self.vocab.lookup([None, None]))
+
+    def test_lookup_int(self):
+        with self.assertRaises(TypeError):
+            self.vocab.lookup(1)
+        with self.assertRaises(TypeError):
+            list(self.vocab.lookup([1, 2]))
+
+    def test_lookup_empty_str(self):
+        self.assertEqual(self.vocab.lookup(""), "<UNK>")
+
+    def test_eqality(self):
+        v1 = Vocabulary(["a", "b", "c"], unk_cutoff=1)
+        v2 = Vocabulary(["a", "b", "c"], unk_cutoff=1)
+        v3 = Vocabulary(["a", "b", "c"], unk_cutoff=1, unk_label="blah")
+        v4 = Vocabulary(["a", "b"], unk_cutoff=1)
+
+        self.assertEqual(v1, v2)
+        self.assertNotEqual(v1, v3)
+        self.assertNotEqual(v1, v4)
+
+    def test_str(self):
+        self.assertEqual(
+            str(self.vocab),
+            ("<Vocabulary with cutoff=2 " "unk_label='<UNK>' and 5 items>"),
+        )
+
+    def test_creation_with_counter(self):
+        self.assertEqual(
+            self.vocab,
+            Vocabulary(
+                Counter(
+                    ["z", "a", "b", "c", "f", "d", "e", "g", "a", "d", "b", "e", "w"]
+                ),
+                unk_cutoff=2,
+            ),
+        )
--- a/venv/lib/python3.7/site-packages/nltk/test/unit/test_2x_compat.py
+++ b/venv/lib/python3.7/site-packages/nltk/test/unit/test_2x_compat.py
@@ -0,0 +1,65 @@
+# -*- coding: utf-8 -*-
+"""
+Unit tests for nltk.compat.
+See also nltk/test/compat.doctest.
+"""
+from __future__ import absolute_import, unicode_literals
+import unittest
+
+from nltk.text import Text
+from nltk.compat import PY3, python_2_unicode_compatible
+
+
+def setup_module(module):
+    from nose import SkipTest
+
+    if PY3:
+        raise SkipTest("test_2x_compat is for testing nltk.compat under Python 2.x")
+
+
+class TestTextTransliteration(unittest.TestCase):
+    txt = Text(["São", "Tomé", "and", "Príncipe"])
+
+    def test_repr(self):
+        self.assertEqual(repr(self.txt), br"<Text: S\xe3o Tom\xe9 and Pr\xedncipe...>")
+
+    def test_str(self):
+        self.assertEqual(str(self.txt), b"<Text: Sao Tome and Principe...>")
+
+
+class TestFraction(unittest.TestCase):
+    def test_unnoramlize_fraction(self):
+        from fractions import Fraction as NativePythonFraction
+        from nltk.compat import Fraction as NLTKFraction
+
+        # The native fraction should throw a TypeError in Python < 3.5
+        with self.assertRaises(TypeError):
+            NativePythonFraction(0, 1000, _normalize=False)
+
+        # Using nltk.compat.Fraction in Python < 3.5
+        compat_frac = NLTKFraction(0, 1000, _normalize=False)
+        # The numerator and denominator does not change.
+        assert compat_frac.numerator == 0
+        assert compat_frac.denominator == 1000
+        # The floating point value remains normalized.
+        assert float(compat_frac) == 0.0
+
+        # Checks that the division is not divided by
+        # # by greatest common divisor (gcd).
+        six_twelve = NLTKFraction(6, 12, _normalize=False)
+        assert six_twelve.numerator == 6
+        assert six_twelve.denominator == 12
+
+        one_two = NLTKFraction(1, 2, _normalize=False)
+        assert one_two.numerator == 1
+        assert one_two.denominator == 2
+
+        # Checks against the native fraction.
+        six_twelve_original = NativePythonFraction(6, 12)
+        # Checks that rational values of one_two and six_twelve is the same.
+        assert float(one_two) == float(six_twelve) == float(six_twelve_original)
+
+        # Checks that the fraction does get normalized, even when
+        # _normalize == False when numerator is using native
+        # fractions.Fraction.from_float
+        assert NLTKFraction(3.142, _normalize=False) == NativePythonFraction(3.142)
--- a/venv/lib/python3.7/site-packages/nltk/test/unit/test_aline.py
+++ b/venv/lib/python3.7/site-packages/nltk/test/unit/test_aline.py
@@ -0,0 +1,78 @@
+# -*- coding: utf-8 -*-
+"""
+Unit tests for nltk.metrics.aline
+"""
+
+from __future__ import unicode_literals
+
+import unittest
+
+from nltk.metrics import aline
+
+
+class TestAline(unittest.TestCase):
+    """
+    Test Aline algorithm for aligning phonetic sequences
+    """
+
+    def test_aline(self):
+        result = aline.align('θin', 'tenwis')
+        expected = [
+            [('θ', 't'), ('i', 'e'), ('n', 'n'), ('-', 'w'), ('-', 'i'), ('-', 's')]
+        ]
+
+        self.assertEqual(result, expected)
+
+        result = aline.align('jo', 'ʒə')
+        expected = [[('j', 'ʒ'), ('o', 'ə')]]
+
+        self.assertEqual(result, expected)
+
+        result = aline.align('pematesiweni', 'pematesewen')
+        expected = [
+            [
+                ('p', 'p'),
+                ('e', 'e'),
+                ('m', 'm'),
+                ('a', 'a'),
+                ('t', 't'),
+                ('e', 'e'),
+                ('s', 's'),
+                ('i', 'e'),
+                ('w', 'w'),
+                ('e', 'e'),
+                ('n', 'n'),
+                ('i', '-'),
+            ]
+        ]
+
+        self.assertEqual(result, expected)
+
+        result = aline.align('tuwθ', 'dentis')
+        expected = [
+            [
+                ('t', 'd'),
+                ('u', 'e'),
+                ('w', '-'),
+                ('-', 'n'),
+                ('-', 't'),
+                ('-', 'i'),
+                ('θ', 's'),
+            ]
+        ]
+
+        self.assertEqual(result, expected)
+
+    def test_aline_delta(self):
+        """
+        Test aline for computing the difference between two segments
+        """
+        result = aline.delta('p', 'q')
+        expected = 20.0
+
+        self.assertEqual(result, expected)
+
+        result = aline.delta('a', 'A')
+        expected = 0.0
+
+        self.assertEqual(result, expected)
--- a/venv/lib/python3.7/site-packages/nltk/test/unit/test_brill.py
+++ b/venv/lib/python3.7/site-packages/nltk/test/unit/test_brill.py
@@ -0,0 +1,37 @@
+# -*- coding: utf-8 -*-
+"""
+Tests for Brill tagger.
+"""
+
+import unittest
+
+from nltk.tag import UnigramTagger, brill, brill_trainer
+from nltk.tbl import Template
+from nltk.corpus import treebank
+
+from nltk.tbl import demo
+
+
+class TestBrill(unittest.TestCase):
+    def test_pos_template(self):
+        train_sents = treebank.tagged_sents()[:1000]
+        tagger = UnigramTagger(train_sents)
+        trainer = brill_trainer.BrillTaggerTrainer(
+            tagger, [brill.Template(brill.Pos([-1]))]
+        )
+        brill_tagger = trainer.train(train_sents)
+        # Example from https://github.com/nltk/nltk/issues/769
+        result = brill_tagger.tag('This is a foo bar sentence'.split())
+        expected = [
+            ('This', 'DT'),
+            ('is', 'VBZ'),
+            ('a', 'DT'),
+            ('foo', None),
+            ('bar', 'NN'),
+            ('sentence', None),
+        ]
+        self.assertEqual(result, expected)
+
+    @unittest.skip("Should be tested in __main__ of nltk.tbl.demo")
+    def test_brill_demo(self):
+        demo()
--- a/venv/lib/python3.7/site-packages/nltk/test/unit/test_cfd_mutation.py
+++ b/venv/lib/python3.7/site-packages/nltk/test/unit/test_cfd_mutation.py
@@ -0,0 +1,39 @@
+import unittest
+from nltk import ConditionalFreqDist, tokenize
+
+class TestEmptyCondFreq(unittest.TestCase):
+    def test_tabulate(self):
+        empty = ConditionalFreqDist()
+        self.assertEqual(empty.conditions(),[])
+        try:
+            empty.tabulate(conditions="BUG") # nonexistent keys shouldn't be added
+        except:
+            pass
+        self.assertEqual(empty.conditions(), [])
+
+
+    def test_plot(self):
+        empty = ConditionalFreqDist()
+        self.assertEqual(empty.conditions(),[])
+        try:
+            empty.plot(conditions=["BUG"]) # nonexistent keys shouldn't be added
+        except:
+            pass
+        self.assertEqual(empty.conditions(),[])
+
+    def test_increment(self):
+        # make sure that we can still mutate cfd normally
+        text = "cow cat mouse cat tiger"
+        cfd = ConditionalFreqDist()
+
+        # create cfd with word length as condition 
+        for word in tokenize.word_tokenize(text):
+            condition = len(word)
+            cfd[condition][word] += 1
+
+        self.assertEqual(cfd.conditions(), [3,5])
+
+        # incrementing previously unseen key is still possible
+        cfd[2]['hi'] += 1
+        self.assertEqual(set(cfd.conditions()),set([3,5,2])) # new condition added
+        self.assertEqual(cfd[2]['hi'], 1) # key's frequency incremented from 0 (unseen) to 1
--- a/venv/lib/python3.7/site-packages/nltk/test/unit/test_cfg2chomsky.py
+++ b/venv/lib/python3.7/site-packages/nltk/test/unit/test_cfg2chomsky.py
@@ -0,0 +1,49 @@
+# -*- coding: utf-8 -*-
+import unittest
+import nltk
+from nltk.grammar import CFG
+
+
+class ChomskyNormalFormForCFGTest(unittest.TestCase):
+    def test_simple(self):
+        grammar = CFG.fromstring(
+            """
+          S -> NP VP
+          PP -> P NP
+          NP -> Det N | NP PP P
+          VP -> V NP | VP PP
+          VP -> Det
+          Det -> 'a' | 'the'
+          N -> 'dog' | 'cat'
+          V -> 'chased' | 'sat'
+          P -> 'on' | 'in'
+        """
+        )
+        self.assertFalse(grammar.is_flexible_chomsky_normal_form())
+        self.assertFalse(grammar.is_chomsky_normal_form())
+        grammar = grammar.chomsky_normal_form(flexible=True)
+        self.assertTrue(grammar.is_flexible_chomsky_normal_form())
+        self.assertFalse(grammar.is_chomsky_normal_form())
+
+        grammar2 = CFG.fromstring(
+            """
+          S -> NP VP
+          NP -> VP N P
+          VP -> P
+          N -> 'dog' | 'cat'
+          P -> 'on' | 'in'
+        """
+        )
+        self.assertFalse(grammar2.is_flexible_chomsky_normal_form())
+        self.assertFalse(grammar2.is_chomsky_normal_form())
+        grammar2 = grammar2.chomsky_normal_form()
+        self.assertTrue(grammar2.is_flexible_chomsky_normal_form())
+        self.assertTrue(grammar2.is_chomsky_normal_form())
+
+    def test_complex(self):
+        grammar = nltk.data.load('grammars/large_grammars/atis.cfg')
+        self.assertFalse(grammar.is_flexible_chomsky_normal_form())
+        self.assertFalse(grammar.is_chomsky_normal_form())
+        grammar = grammar.chomsky_normal_form(flexible=True)
+        self.assertTrue(grammar.is_flexible_chomsky_normal_form())
+        self.assertFalse(grammar.is_chomsky_normal_form())
--- a/venv/lib/python3.7/site-packages/nltk/test/unit/test_chunk.py
+++ b/venv/lib/python3.7/site-packages/nltk/test/unit/test_chunk.py
@@ -0,0 +1,87 @@
+# -*- coding: utf-8 -*-
+from __future__ import absolute_import, unicode_literals
+import unittest
+
+from nltk import RegexpParser
+
+
+class TestChunkRule(unittest.TestCase):
+    def test_tag_pattern2re_pattern_quantifier(self):
+        """Test for bug https://github.com/nltk/nltk/issues/1597
+
+        Ensures that curly bracket quantifiers can be used inside a chunk rule.
+        This type of quantifier has been used for the supplementary example
+        in http://www.nltk.org/book/ch07.html#exploring-text-corpora.
+        """
+        sent = [
+            ('The', 'AT'),
+            ('September-October', 'NP'),
+            ('term', 'NN'),
+            ('jury', 'NN'),
+            ('had', 'HVD'),
+            ('been', 'BEN'),
+            ('charged', 'VBN'),
+            ('by', 'IN'),
+            ('Fulton', 'NP-TL'),
+            ('Superior', 'JJ-TL'),
+            ('Court', 'NN-TL'),
+            ('Judge', 'NN-TL'),
+            ('Durwood', 'NP'),
+            ('Pye', 'NP'),
+            ('to', 'TO'),
+            ('investigate', 'VB'),
+            ('reports', 'NNS'),
+            ('of', 'IN'),
+            ('possible', 'JJ'),
+            ('``', '``'),
+            ('irregularities', 'NNS'),
+            ("''", "''"),
+            ('in', 'IN'),
+            ('the', 'AT'),
+            ('hard-fought', 'JJ'),
+            ('primary', 'NN'),
+            ('which', 'WDT'),
+            ('was', 'BEDZ'),
+            ('won', 'VBN'),
+            ('by', 'IN'),
+            ('Mayor-nominate', 'NN-TL'),
+            ('Ivan', 'NP'),
+            ('Allen', 'NP'),
+            ('Jr.', 'NP'),
+            ('.', '.'),
+        ]  # source: brown corpus
+        cp = RegexpParser('CHUNK: {<N.*>{4,}}')
+        tree = cp.parse(sent)
+        assert (
+            tree.pformat()
+            == """(S
+  The/AT
+  September-October/NP
+  term/NN
+  jury/NN
+  had/HVD
+  been/BEN
+  charged/VBN
+  by/IN
+  Fulton/NP-TL
+  Superior/JJ-TL
+  (CHUNK Court/NN-TL Judge/NN-TL Durwood/NP Pye/NP)
+  to/TO
+  investigate/VB
+  reports/NNS
+  of/IN
+  possible/JJ
+  ``/``
+  irregularities/NNS
+  ''/''
+  in/IN
+  the/AT
+  hard-fought/JJ
+  primary/NN
+  which/WDT
+  was/BEDZ
+  won/VBN
+  by/IN
+  (CHUNK Mayor-nominate/NN-TL Ivan/NP Allen/NP Jr./NP)
+  ./.)"""
+        )
--- a/venv/lib/python3.7/site-packages/nltk/test/unit/test_classify.py
+++ b/venv/lib/python3.7/site-packages/nltk/test/unit/test_classify.py
@@ -0,0 +1,50 @@
+# -*- coding: utf-8 -*-
+"""
+Unit tests for nltk.classify. See also: nltk/test/classify.doctest
+"""
+from __future__ import absolute_import
+from nose import SkipTest
+from nltk import classify
+
+TRAIN = [
+    (dict(a=1, b=1, c=1), 'y'),
+    (dict(a=1, b=1, c=1), 'x'),
+    (dict(a=1, b=1, c=0), 'y'),
+    (dict(a=0, b=1, c=1), 'x'),
+    (dict(a=0, b=1, c=1), 'y'),
+    (dict(a=0, b=0, c=1), 'y'),
+    (dict(a=0, b=1, c=0), 'x'),
+    (dict(a=0, b=0, c=0), 'x'),
+    (dict(a=0, b=1, c=1), 'y'),
+]
+
+TEST = [
+    (dict(a=1, b=0, c=1)),  # unseen
+    (dict(a=1, b=0, c=0)),  # unseen
+    (dict(a=0, b=1, c=1)),  # seen 3 times, labels=y,y,x
+    (dict(a=0, b=1, c=0)),  # seen 1 time, label=x
+]
+
+RESULTS = [(0.16, 0.84), (0.46, 0.54), (0.41, 0.59), (0.76, 0.24)]
+
+
+def assert_classifier_correct(algorithm):
+    try:
+        classifier = classify.MaxentClassifier.train(
+            TRAIN, algorithm, trace=0, max_iter=1000
+        )
+    except (LookupError, AttributeError) as e:
+        raise SkipTest(str(e))
+
+    for (px, py), featureset in zip(RESULTS, TEST):
+        pdist = classifier.prob_classify(featureset)
+        assert abs(pdist.prob('x') - px) < 1e-2, (pdist.prob('x'), px)
+        assert abs(pdist.prob('y') - py) < 1e-2, (pdist.prob('y'), py)
+
+
+def test_megam():
+    assert_classifier_correct('MEGAM')
+
+
+def test_tadm():
+    assert_classifier_correct('TADM')
--- a/venv/lib/python3.7/site-packages/nltk/test/unit/test_collocations.py
+++ b/venv/lib/python3.7/site-packages/nltk/test/unit/test_collocations.py
@@ -0,0 +1,159 @@
+# -*- coding: utf-8 -*-
+from __future__ import absolute_import, unicode_literals
+import unittest
+
+from nltk.collocations import BigramCollocationFinder
+from nltk.metrics import BigramAssocMeasures
+
+## Test bigram counters with discontinuous bigrams and repeated words
+
+_EPSILON = 1e-8
+
+
+def close_enough(x, y):
+    """Verify that two sequences of n-gram association values are within
+       _EPSILON of each other.
+    """
+
+    for (x1, y1) in zip(x, y):
+        if x1[0] != y1[0] or abs(x1[1] - y1[1]) > _EPSILON:
+            return False
+    return True
+
+
+class TestBigram(unittest.TestCase):
+    def test_bigram2(self):
+        sent = 'this this is is a a test test'.split()
+
+        b = BigramCollocationFinder.from_words(sent)
+
+        # python 2.6 does not have assertItemsEqual or assertListEqual
+        self.assertEqual(
+            sorted(b.ngram_fd.items()),
+            sorted(
+                [
+                    (('a', 'a'), 1),
+                    (('a', 'test'), 1),
+                    (('is', 'a'), 1),
+                    (('is', 'is'), 1),
+                    (('test', 'test'), 1),
+                    (('this', 'is'), 1),
+                    (('this', 'this'), 1),
+                ]
+            ),
+        )
+        self.assertEqual(
+            sorted(b.word_fd.items()),
+            sorted([('a', 2), ('is', 2), ('test', 2), ('this', 2)]),
+        )
+        self.assertTrue(
+            len(sent) == sum(b.word_fd.values()) == sum(b.ngram_fd.values()) + 1
+        )
+        self.assertTrue(
+            close_enough(
+                sorted(b.score_ngrams(BigramAssocMeasures.pmi)),
+                sorted(
+                    [
+                        (('a', 'a'), 1.0),
+                        (('a', 'test'), 1.0),
+                        (('is', 'a'), 1.0),
+                        (('is', 'is'), 1.0),
+                        (('test', 'test'), 1.0),
+                        (('this', 'is'), 1.0),
+                        (('this', 'this'), 1.0),
+                    ]
+                ),
+            )
+        )
+
+    def test_bigram3(self):
+        sent = 'this this is is a a test test'.split()
+
+        b = BigramCollocationFinder.from_words(sent, window_size=3)
+        self.assertEqual(
+            sorted(b.ngram_fd.items()),
+            sorted(
+                [
+                    (('a', 'test'), 3),
+                    (('is', 'a'), 3),
+                    (('this', 'is'), 3),
+                    (('a', 'a'), 1),
+                    (('is', 'is'), 1),
+                    (('test', 'test'), 1),
+                    (('this', 'this'), 1),
+                ]
+            ),
+        )
+        self.assertEqual(
+            sorted(b.word_fd.items()),
+            sorted([('a', 2), ('is', 2), ('test', 2), ('this', 2)]),
+        )
+        self.assertTrue(
+            len(sent)
+            == sum(b.word_fd.values())
+            == (sum(b.ngram_fd.values()) + 2 + 1) / 2.0
+        )
+        self.assertTrue(
+            close_enough(
+                sorted(b.score_ngrams(BigramAssocMeasures.pmi)),
+                sorted(
+                    [
+                        (('a', 'test'), 1.584962500721156),
+                        (('is', 'a'), 1.584962500721156),
+                        (('this', 'is'), 1.584962500721156),
+                        (('a', 'a'), 0.0),
+                        (('is', 'is'), 0.0),
+                        (('test', 'test'), 0.0),
+                        (('this', 'this'), 0.0),
+                    ]
+                ),
+            )
+        )
+
+    def test_bigram5(self):
+        sent = 'this this is is a a test test'.split()
+
+        b = BigramCollocationFinder.from_words(sent, window_size=5)
+        self.assertEqual(
+            sorted(b.ngram_fd.items()),
+            sorted(
+                [
+                    (('a', 'test'), 4),
+                    (('is', 'a'), 4),
+                    (('this', 'is'), 4),
+                    (('is', 'test'), 3),
+                    (('this', 'a'), 3),
+                    (('a', 'a'), 1),
+                    (('is', 'is'), 1),
+                    (('test', 'test'), 1),
+                    (('this', 'this'), 1),
+                ]
+            ),
+        )
+        self.assertEqual(
+            sorted(b.word_fd.items()),
+            sorted([('a', 2), ('is', 2), ('test', 2), ('this', 2)]),
+        )
+        self.assertTrue(
+            len(sent)
+            == sum(b.word_fd.values())
+            == (sum(b.ngram_fd.values()) + 4 + 3 + 2 + 1) / 4.0
+        )
+        self.assertTrue(
+            close_enough(
+                sorted(b.score_ngrams(BigramAssocMeasures.pmi)),
+                sorted(
+                    [
+                        (('a', 'test'), 1.0),
+                        (('is', 'a'), 1.0),
+                        (('this', 'is'), 1.0),
+                        (('is', 'test'), 0.5849625007211562),
+                        (('this', 'a'), 0.5849625007211562),
+                        (('a', 'a'), -1.0),
+                        (('is', 'is'), -1.0),
+                        (('test', 'test'), -1.0),
+                        (('this', 'this'), -1.0),
+                    ]
+                ),
+            )
+        )
--- a/venv/lib/python3.7/site-packages/nltk/test/unit/test_concordance.py
+++ b/venv/lib/python3.7/site-packages/nltk/test/unit/test_concordance.py
@@ -0,0 +1,107 @@
+# -*- coding: utf-8 -*-
+from __future__ import absolute_import, unicode_literals
+
+import unittest
+import contextlib
+import sys
+
+from nose import with_setup
+
+from nltk.corpus import gutenberg
+from nltk.text import Text
+
+try:
+    from StringIO import StringIO
+except ImportError as e:
+    from io import StringIO
+
+
+@contextlib.contextmanager
+def stdout_redirect(where):
+    sys.stdout = where
+    try:
+        yield where
+    finally:
+        sys.stdout = sys.__stdout__
+
+
+class TestConcordance(unittest.TestCase):
+    """Text constructed using: http://www.nltk.org/book/ch01.html"""
+
+    @classmethod
+    def setup_class(cls):
+        cls.corpus = gutenberg.words('melville-moby_dick.txt')
+
+    @classmethod
+    def teardown_class(cls):
+        pass
+
+    def setUp(self):
+        self.text = Text(TestConcordance.corpus)
+        self.query = "monstrous"
+        self.maxDiff = None
+        self.list_out = [
+            'ong the former , one was of a most monstrous size . ... This came towards us , ',
+            'ON OF THE PSALMS . " Touching that monstrous bulk of the whale or ork we have r',
+            'll over with a heathenish array of monstrous clubs and spears . Some were thick',
+            'd as you gazed , and wondered what monstrous cannibal and savage could ever hav',
+            'that has survived the flood ; most monstrous and most mountainous ! That Himmal',
+            'they might scout at Moby Dick as a monstrous fable , or still worse and more de',
+            'th of Radney .\'" CHAPTER 55 Of the Monstrous Pictures of Whales . I shall ere l',
+            'ing Scenes . In connexion with the monstrous pictures of whales , I am strongly',
+            'ere to enter upon those still more monstrous stories of them which are to be fo',
+            'ght have been rummaged out of this monstrous cabinet there is no telling . But ',
+            'of Whale - Bones ; for Whales of a monstrous size are oftentimes cast up dead u',
+        ]
+
+    def tearDown(self):
+        pass
+
+    def test_concordance_list(self):
+        concordance_out = self.text.concordance_list(self.query)
+        self.assertEqual(self.list_out, [c.line for c in concordance_out])
+
+    def test_concordance_width(self):
+        list_out = [
+            "monstrous",
+            "monstrous",
+            "monstrous",
+            "monstrous",
+            "monstrous",
+            "monstrous",
+            "Monstrous",
+            "monstrous",
+            "monstrous",
+            "monstrous",
+            "monstrous",
+        ]
+
+        concordance_out = self.text.concordance_list(self.query, width=0)
+        self.assertEqual(list_out, [c.query for c in concordance_out])
+
+    def test_concordance_lines(self):
+        concordance_out = self.text.concordance_list(self.query, lines=3)
+        self.assertEqual(self.list_out[:3], [c.line for c in concordance_out])
+
+    def test_concordance_print(self):
+        print_out = """Displaying 11 of 11 matches:
+        ong the former , one was of a most monstrous size . ... This came towards us ,
+        ON OF THE PSALMS . " Touching that monstrous bulk of the whale or ork we have r
+        ll over with a heathenish array of monstrous clubs and spears . Some were thick
+        d as you gazed , and wondered what monstrous cannibal and savage could ever hav
+        that has survived the flood ; most monstrous and most mountainous ! That Himmal
+        they might scout at Moby Dick as a monstrous fable , or still worse and more de
+        th of Radney .'" CHAPTER 55 Of the Monstrous Pictures of Whales . I shall ere l
+        ing Scenes . In connexion with the monstrous pictures of whales , I am strongly
+        ere to enter upon those still more monstrous stories of them which are to be fo
+        ght have been rummaged out of this monstrous cabinet there is no telling . But
+        of Whale - Bones ; for Whales of a monstrous size are oftentimes cast up dead u
+        """
+
+        with stdout_redirect(StringIO()) as stdout:
+            self.text.concordance(self.query)
+
+        def strip_space(raw_str):
+            return raw_str.replace(" ", "")
+
+        self.assertEqual(strip_space(print_out), strip_space(stdout.getvalue()))
--- a/venv/lib/python3.7/site-packages/nltk/test/unit/test_corenlp.py
+++ b/venv/lib/python3.7/site-packages/nltk/test/unit/test_corenlp.py
--- a/venv/lib/python3.7/site-packages/nltk/test/unit/test_corpora.py
+++ b/venv/lib/python3.7/site-packages/nltk/test/unit/test_corpora.py
@@ -0,0 +1,272 @@
+# -*- coding: utf-8 -*-
+from __future__ import absolute_import, unicode_literals
+import unittest
+
+from nltk.corpus import (
+    sinica_treebank,
+    conll2007,
+    indian,
+    cess_cat,
+    cess_esp,
+    floresta,
+    ptb,
+    udhr,
+)  # mwa_ppdb
+
+from nltk.compat import python_2_unicode_compatible
+from nltk.tree import Tree
+from nltk.test.unit.utils import skipIf
+
+
+class TestUdhr(unittest.TestCase):
+    def test_words(self):
+        for name in udhr.fileids():
+            try:
+                words = list(udhr.words(name))
+            except AssertionError:
+                print(name)
+                raise
+            self.assertTrue(words)
+
+    def test_raw_unicode(self):
+        for name in udhr.fileids():
+            txt = udhr.raw(name)
+            assert not isinstance(txt, bytes), name
+
+
+class TestIndian(unittest.TestCase):
+    def test_words(self):
+        words = indian.words()[:3]
+        self.assertEqual(words, ['মহিষের', 'সন্তান', ':'])
+
+    def test_tagged_words(self):
+        tagged_words = indian.tagged_words()[:3]
+        self.assertEqual(
+            tagged_words, [('মহিষের', 'NN'), ('সন্তান', 'NN'), (':', 'SYM')]
+        )
+
+
+class TestCess(unittest.TestCase):
+    def test_catalan(self):
+        words = cess_cat.words()[:15]
+        txt = "El Tribunal_Suprem -Fpa- TS -Fpt- ha confirmat la condemna a quatre anys d' inhabilitació especial"
+        self.assertEqual(words, txt.split())
+        self.assertEqual(cess_cat.tagged_sents()[0][34][0], "càrrecs")
+
+    def test_esp(self):
+        words = cess_esp.words()[:15]
+        txt = "El grupo estatal Electricité_de_France -Fpa- EDF -Fpt- anunció hoy , jueves , la compra del"
+        self.assertEqual(words, txt.split())
+        self.assertEqual(cess_esp.words()[115], "años")
+
+
+class TestFloresta(unittest.TestCase):
+    def test_words(self):
+        words = floresta.words()[:10]
+        txt = "Um revivalismo refrescante O 7_e_Meio é um ex-libris de a"
+        self.assertEqual(words, txt.split())
+
+
+class TestSinicaTreebank(unittest.TestCase):
+    def test_sents(self):
+        first_3_sents = sinica_treebank.sents()[:3]
+        self.assertEqual(
+            first_3_sents, [['一'], ['友情'], ['嘉珍', '和', '我', '住在', '同一條', '巷子']]
+        )
+
+    def test_parsed_sents(self):
+        parsed_sents = sinica_treebank.parsed_sents()[25]
+        self.assertEqual(
+            parsed_sents,
+            Tree(
+                'S',
+                [
+                    Tree('NP', [Tree('Nba', ['嘉珍'])]),
+                    Tree('V‧地', [Tree('VA11', ['不停']), Tree('DE', ['的'])]),
+                    Tree('VA4', ['哭泣']),
+                ],
+            ),
+        )
+
+
+class TestCoNLL2007(unittest.TestCase):
+    # Reading the CoNLL 2007 Dependency Treebanks
+
+    def test_sents(self):
+        sents = conll2007.sents('esp.train')[0]
+        self.assertEqual(
+            sents[:6], ['El', 'aumento', 'del', 'índice', 'de', 'desempleo']
+        )
+
+    def test_parsed_sents(self):
+
+        parsed_sents = conll2007.parsed_sents('esp.train')[0]
+
+        self.assertEqual(
+            parsed_sents.tree(),
+            Tree(
+                'fortaleció',
+                [
+                    Tree(
+                        'aumento',
+                        [
+                            'El',
+                            Tree(
+                                'del',
+                                [
+                                    Tree(
+                                        'índice',
+                                        [
+                                            Tree(
+                                                'de',
+                                                [Tree('desempleo', ['estadounidense'])],
+                                            )
+                                        ],
+                                    )
+                                ],
+                            ),
+                        ],
+                    ),
+                    'hoy',
+                    'considerablemente',
+                    Tree(
+                        'al',
+                        [
+                            Tree(
+                                'euro',
+                                [
+                                    Tree(
+                                        'cotizaba',
+                                        [
+                                            ',',
+                                            'que',
+                                            Tree('a', [Tree('15.35', ['las', 'GMT'])]),
+                                            'se',
+                                            Tree(
+                                                'en',
+                                                [
+                                                    Tree(
+                                                        'mercado',
+                                                        [
+                                                            'el',
+                                                            Tree('de', ['divisas']),
+                                                            Tree('de', ['Fráncfort']),
+                                                        ],
+                                                    )
+                                                ],
+                                            ),
+                                            Tree('a', ['0,9452_dólares']),
+                                            Tree(
+                                                'frente_a',
+                                                [
+                                                    ',',
+                                                    Tree(
+                                                        '0,9349_dólares',
+                                                        [
+                                                            'los',
+                                                            Tree(
+                                                                'de',
+                                                                [
+                                                                    Tree(
+                                                                        'mañana',
+                                                                        ['esta'],
+                                                                    )
+                                                                ],
+                                                            ),
+                                                        ],
+                                                    ),
+                                                ],
+                                            ),
+                                        ],
+                                    )
+                                ],
+                            )
+                        ],
+                    ),
+                    '.',
+                ],
+            ),
+        )
+
+
+@skipIf(not ptb.fileids(), "A full installation of the Penn Treebank is not available")
+class TestPTB(unittest.TestCase):
+    def test_fileids(self):
+        self.assertEqual(
+            ptb.fileids()[:4],
+            [
+                'BROWN/CF/CF01.MRG',
+                'BROWN/CF/CF02.MRG',
+                'BROWN/CF/CF03.MRG',
+                'BROWN/CF/CF04.MRG',
+            ],
+        )
+
+    def test_words(self):
+        self.assertEqual(
+            ptb.words('WSJ/00/WSJ_0003.MRG')[:7],
+            ['A', 'form', 'of', 'asbestos', 'once', 'used', '*'],
+        )
+
+    def test_tagged_words(self):
+        self.assertEqual(
+            ptb.tagged_words('WSJ/00/WSJ_0003.MRG')[:3],
+            [('A', 'DT'), ('form', 'NN'), ('of', 'IN')],
+        )
+
+    def test_categories(self):
+        self.assertEqual(
+            ptb.categories(),
+            [
+                'adventure',
+                'belles_lettres',
+                'fiction',
+                'humor',
+                'lore',
+                'mystery',
+                'news',
+                'romance',
+                'science_fiction',
+            ],
+        )
+
+    def test_news_fileids(self):
+        self.assertEqual(
+            ptb.fileids('news')[:3],
+            ['WSJ/00/WSJ_0001.MRG', 'WSJ/00/WSJ_0002.MRG', 'WSJ/00/WSJ_0003.MRG'],
+        )
+
+    def test_category_words(self):
+        self.assertEqual(
+            ptb.words(categories=['humor', 'fiction'])[:6],
+            ['Thirty-three', 'Scotty', 'did', 'not', 'go', 'back'],
+        )
+
+
+@unittest.skip("Skipping test for mwa_ppdb.")
+class TestMWAPPDB(unittest.TestCase):
+    def test_fileids(self):
+        self.assertEqual(
+            mwa_ppdb.fileids(), ['ppdb-1.0-xxxl-lexical.extended.synonyms.uniquepairs']
+        )
+
+    def test_entries(self):
+        self.assertEqual(
+            mwa_ppdb.entries()[:10],
+            [
+                ('10/17/01', '17/10/2001'),
+                ('102,70', '102.70'),
+                ('13,53', '13.53'),
+                ('3.2.5.3.2.1', '3.2.5.3.2.1.'),
+                ('53,76', '53.76'),
+                ('6.9.5', '6.9.5.'),
+                ('7.7.6.3', '7.7.6.3.'),
+                ('76,20', '76.20'),
+                ('79,85', '79.85'),
+                ('93,65', '93.65'),
+            ],
+        )
+
+
+# unload corpora
+from nltk.corpus import teardown_module
--- a/venv/lib/python3.7/site-packages/nltk/test/unit/test_corpus_views.py
+++ b/venv/lib/python3.7/site-packages/nltk/test/unit/test_corpus_views.py
@@ -0,0 +1,49 @@
+# -*- coding: utf-8 -*-
+"""
+Corpus View Regression Tests
+"""
+from __future__ import absolute_import, unicode_literals
+import unittest
+import nltk.data
+from nltk.corpus.reader.util import (
+    StreamBackedCorpusView,
+    read_whitespace_block,
+    read_line_block,
+)
+
+
+class TestCorpusViews(unittest.TestCase):
+
+    linetok = nltk.LineTokenizer(blanklines='keep')
+    names = [
+        'corpora/inaugural/README',  # A very short file (160 chars)
+        'corpora/inaugural/1793-Washington.txt',  # A relatively short file (791 chars)
+        'corpora/inaugural/1909-Taft.txt',  # A longer file (32k chars)
+    ]
+
+    def data(self):
+        for name in self.names:
+            f = nltk.data.find(name)
+            with f.open() as fp:
+                file_data = fp.read().decode('utf8')
+            yield f, file_data
+
+    def test_correct_values(self):
+        # Check that corpus views produce the correct sequence of values.
+
+        for f, file_data in self.data():
+            v = StreamBackedCorpusView(f, read_whitespace_block)
+            self.assertEqual(list(v), file_data.split())
+
+            v = StreamBackedCorpusView(f, read_line_block)
+            self.assertEqual(list(v), self.linetok.tokenize(file_data))
+
+    def test_correct_length(self):
+        # Check that the corpus views report the correct lengths:
+
+        for f, file_data in self.data():
+            v = StreamBackedCorpusView(f, read_whitespace_block)
+            self.assertEqual(len(v), len(file_data.split()))
+
+            v = StreamBackedCorpusView(f, read_line_block)
+            self.assertEqual(len(v), len(self.linetok.tokenize(file_data)))
--- a/venv/lib/python3.7/site-packages/nltk/test/unit/test_data.py
+++ b/venv/lib/python3.7/site-packages/nltk/test/unit/test_data.py
@@ -0,0 +1,22 @@
+import unittest
+import nltk.data
+from nose.tools import assert_raises
+
+
+class TestData(unittest.TestCase):
+    def test_find_raises_exception(self):
+
+        with assert_raises(LookupError) as context:
+            nltk.data.find('no_such_resource/foo')
+
+        assert type(context.exception) == LookupError, 'Unexpected exception raised'
+
+    def test_find_raises_exception_with_full_resource_name(self):
+        no_such_thing = 'no_such_thing/bar'
+
+        with assert_raises(LookupError) as context:
+            nltk.data.find(no_such_thing)
+
+        assert no_such_thing in str(
+            context.exception
+        ), 'Exception message does not include full resource name'
--- a/venv/lib/python3.7/site-packages/nltk/test/unit/test_disagreement.py
+++ b/venv/lib/python3.7/site-packages/nltk/test/unit/test_disagreement.py
@@ -0,0 +1,142 @@
+# -*- coding: utf-8 -*-
+from __future__ import absolute_import, unicode_literals
+import unittest
+
+from nltk.metrics.agreement import AnnotationTask
+
+class TestDisagreement(unittest.TestCase):
+
+    '''
+    Class containing unit tests for nltk.metrics.agreement.Disagreement.
+    '''
+
+    def test_easy(self):
+        '''
+        Simple test, based on
+        https://github.com/foolswood/krippendorffs_alpha/raw/master/krippendorff.pdf.
+        '''
+        data = [('coder1', 'dress1', 'YES'),
+                ('coder2', 'dress1', 'NO'),
+                ('coder3', 'dress1', 'NO'),
+                ('coder1', 'dress2', 'YES'),
+                ('coder2', 'dress2', 'NO'),
+                ('coder3', 'dress3', 'NO'),
+                ]
+        annotation_task = AnnotationTask(data)
+        self.assertAlmostEqual(annotation_task.alpha(), -0.3333333)
+
+    def test_easy2(self):
+        '''
+        Same simple test with 1 rating removed.
+        Removal of that rating should not matter: K-Apha ignores items with
+        only 1 rating.
+        '''
+        data = [('coder1', 'dress1', 'YES'),
+                ('coder2', 'dress1', 'NO'),
+                ('coder3', 'dress1', 'NO'),
+                ('coder1', 'dress2', 'YES'),
+                ('coder2', 'dress2', 'NO'),
+                ]
+        annotation_task = AnnotationTask(data)
+        self.assertAlmostEqual(annotation_task.alpha(), -0.3333333)
+
+    def test_advanced(self):
+        '''
+        More advanced test, based on 
+        http://www.agreestat.com/research_papers/onkrippendorffalpha.pdf
+        '''
+        data = [('A', '1', '1'),
+                ('B', '1', '1'),
+                ('D', '1', '1'),
+                ('A', '2', '2'),
+                ('B', '2', '2'),
+                ('C', '2', '3'),
+                ('D', '2', '2'),
+                ('A', '3', '3'),
+                ('B', '3', '3'),
+                ('C', '3', '3'),
+                ('D', '3', '3'),
+                ('A', '4', '3'),
+                ('B', '4', '3'),
+                ('C', '4', '3'),
+                ('D', '4', '3'),
+                ('A', '5', '2'),
+                ('B', '5', '2'),
+                ('C', '5', '2'),
+                ('D', '5', '2'),
+                ('A', '6', '1'),
+                ('B', '6', '2'),
+                ('C', '6', '3'),
+                ('D', '6', '4'),
+                ('A', '7', '4'),
+                ('B', '7', '4'),
+                ('C', '7', '4'),
+                ('D', '7', '4'),
+                ('A', '8', '1'),
+                ('B', '8', '1'),
+                ('C', '8', '2'),
+                ('D', '8', '1'),
+                ('A', '9', '2'),
+                ('B', '9', '2'),
+                ('C', '9', '2'),
+                ('D', '9', '2'),
+                ('B', '10', '5'),
+                ('C', '10', '5'),
+                ('D', '10', '5'),
+                ('C', '11', '1'),
+                ('D', '11', '1'),
+                ('C', '12', '3'),
+                ]
+        annotation_task = AnnotationTask(data)
+        self.assertAlmostEqual(annotation_task.alpha(), 0.743421052632)
+
+    def test_advanced2(self):
+        '''
+        Same more advanced example, but with 1 rating removed.
+        Again, removal of that 1 rating shoudl not matter.
+        '''
+        data = [('A', '1', '1'),
+                ('B', '1', '1'),
+                ('D', '1', '1'),
+                ('A', '2', '2'),
+                ('B', '2', '2'),
+                ('C', '2', '3'),
+                ('D', '2', '2'),
+                ('A', '3', '3'),
+                ('B', '3', '3'),
+                ('C', '3', '3'),
+                ('D', '3', '3'),
+                ('A', '4', '3'),
+                ('B', '4', '3'),
+                ('C', '4', '3'),
+                ('D', '4', '3'),
+                ('A', '5', '2'),
+                ('B', '5', '2'),
+                ('C', '5', '2'),
+                ('D', '5', '2'),
+                ('A', '6', '1'),
+                ('B', '6', '2'),
+                ('C', '6', '3'),
+                ('D', '6', '4'),
+                ('A', '7', '4'),
+                ('B', '7', '4'),
+                ('C', '7', '4'),
+                ('D', '7', '4'),
+                ('A', '8', '1'),
+                ('B', '8', '1'),
+                ('C', '8', '2'),
+                ('D', '8', '1'),
+                ('A', '9', '2'),
+                ('B', '9', '2'),
+                ('C', '9', '2'),
+                ('D', '9', '2'),
+                ('B', '10', '5'),
+                ('C', '10', '5'),
+                ('D', '10', '5'),
+                ('C', '11', '1'),
+                ('D', '11', '1'),
+                ('C', '12', '3'),
+                ]
+        annotation_task = AnnotationTask(data)
+        self.assertAlmostEqual(annotation_task.alpha(), 0.743421052632)
+
--- a/venv/lib/python3.7/site-packages/nltk/test/unit/test_hmm.py
+++ b/venv/lib/python3.7/site-packages/nltk/test/unit/test_hmm.py
@@ -0,0 +1,87 @@
+# -*- coding: utf-8 -*-
+from __future__ import absolute_import, unicode_literals
+from nltk.tag import hmm
+
+
+def _wikipedia_example_hmm():
+    # Example from wikipedia
+    # (http://en.wikipedia.org/wiki/Forward%E2%80%93backward_algorithm)
+
+    states = ['rain', 'no rain']
+    symbols = ['umbrella', 'no umbrella']
+
+    A = [[0.7, 0.3], [0.3, 0.7]]  # transition probabilities
+    B = [[0.9, 0.1], [0.2, 0.8]]  # emission probabilities
+    pi = [0.5, 0.5]  # initial probabilities
+
+    seq = ['umbrella', 'umbrella', 'no umbrella', 'umbrella', 'umbrella']
+    seq = list(zip(seq, [None] * len(seq)))
+
+    model = hmm._create_hmm_tagger(states, symbols, A, B, pi)
+    return model, states, symbols, seq
+
+
+def test_forward_probability():
+    from numpy.testing import assert_array_almost_equal
+
+    # example from p. 385, Huang et al
+    model, states, symbols = hmm._market_hmm_example()
+    seq = [('up', None), ('up', None)]
+    expected = [[0.35, 0.02, 0.09], [0.1792, 0.0085, 0.0357]]
+
+    fp = 2 ** model._forward_probability(seq)
+
+    assert_array_almost_equal(fp, expected)
+
+
+def test_forward_probability2():
+    from numpy.testing import assert_array_almost_equal
+
+    model, states, symbols, seq = _wikipedia_example_hmm()
+    fp = 2 ** model._forward_probability(seq)
+
+    # examples in wikipedia are normalized
+    fp = (fp.T / fp.sum(axis=1)).T
+
+    wikipedia_results = [
+        [0.8182, 0.1818],
+        [0.8834, 0.1166],
+        [0.1907, 0.8093],
+        [0.7308, 0.2692],
+        [0.8673, 0.1327],
+    ]
+
+    assert_array_almost_equal(wikipedia_results, fp, 4)
+
+
+def test_backward_probability():
+    from numpy.testing import assert_array_almost_equal
+
+    model, states, symbols, seq = _wikipedia_example_hmm()
+
+    bp = 2 ** model._backward_probability(seq)
+    # examples in wikipedia are normalized
+
+    bp = (bp.T / bp.sum(axis=1)).T
+
+    wikipedia_results = [
+        # Forward-backward algorithm doesn't need b0_5,
+        # so .backward_probability doesn't compute it.
+        # [0.6469, 0.3531],
+        [0.5923, 0.4077],
+        [0.3763, 0.6237],
+        [0.6533, 0.3467],
+        [0.6273, 0.3727],
+        [0.5, 0.5],
+    ]
+
+    assert_array_almost_equal(wikipedia_results, bp, 4)
+
+
+def setup_module(module):
+    from nose import SkipTest
+
+    try:
+        import numpy
+    except ImportError:
+        raise SkipTest("numpy is required for nltk.test.test_hmm")
--- a/venv/lib/python3.7/site-packages/nltk/test/unit/test_json2csv_corpus.py
+++ b/venv/lib/python3.7/site-packages/nltk/test/unit/test_json2csv_corpus.py
@@ -0,0 +1,237 @@
+# -*- coding: utf-8 -*-
+# Natural Language Toolkit: Twitter client
+#
+# Copyright (C) 2001-2019 NLTK Project
+# Author: Lorenzo Rubio <lrnzcig@gmail.com>
+# URL: <http://nltk.org/>
+# For license information, see LICENSE.TXT
+
+"""
+Regression tests for `json2csv()` and `json2csv_entities()` in Twitter
+package.
+
+"""
+
+import os
+import unittest
+
+from six.moves import zip
+
+from nltk.compat import TemporaryDirectory
+from nltk.corpus import twitter_samples
+from nltk.twitter.common import json2csv, json2csv_entities
+
+
+def are_files_identical(filename1, filename2, debug=False):
+    """
+    Compare two files, ignoring carriage returns.
+    """
+    with open(filename1, "rb") as fileA:
+        with open(filename2, "rb") as fileB:
+            result = True
+            for lineA, lineB in zip(
+                sorted(fileA.readlines()), sorted(fileB.readlines())
+            ):
+                if lineA.strip() != lineB.strip():
+                    if debug:
+                        print(
+                            "Error while comparing files. "
+                            + "First difference at line below."
+                        )
+                        print("=> Output file line: {0}".format(lineA))
+                        print("=> Refer. file line: {0}".format(lineB))
+                    result = False
+                    break
+            return result
+
+
+class TestJSON2CSV(unittest.TestCase):
+    def setUp(self):
+        with open(twitter_samples.abspath("tweets.20150430-223406.json")) as infile:
+            self.infile = [next(infile) for x in range(100)]
+        infile.close()
+        self.msg = "Test and reference files are not the same"
+        self.subdir = os.path.join(os.path.dirname(__file__), 'files')
+
+    def tearDown(self):
+        return
+
+    def test_textoutput(self):
+        ref_fn = os.path.join(self.subdir, 'tweets.20150430-223406.text.csv.ref')
+        with TemporaryDirectory() as tempdir:
+            outfn = os.path.join(tempdir, 'tweets.20150430-223406.text.csv')
+            json2csv(self.infile, outfn, ['text'], gzip_compress=False)
+            self.assertTrue(are_files_identical(outfn, ref_fn), msg=self.msg)
+
+    def test_tweet_metadata(self):
+        ref_fn = os.path.join(self.subdir, 'tweets.20150430-223406.tweet.csv.ref')
+        fields = [
+            'created_at',
+            'favorite_count',
+            'id',
+            'in_reply_to_status_id',
+            'in_reply_to_user_id',
+            'retweet_count',
+            'retweeted',
+            'text',
+            'truncated',
+            'user.id',
+        ]
+
+        with TemporaryDirectory() as tempdir:
+            outfn = os.path.join(tempdir, 'tweets.20150430-223406.tweet.csv')
+            json2csv(self.infile, outfn, fields, gzip_compress=False)
+            self.assertTrue(are_files_identical(outfn, ref_fn), msg=self.msg)
+
+    def test_user_metadata(self):
+        ref_fn = os.path.join(self.subdir, 'tweets.20150430-223406.user.csv.ref')
+        fields = ['id', 'text', 'user.id', 'user.followers_count', 'user.friends_count']
+
+        with TemporaryDirectory() as tempdir:
+            outfn = os.path.join(tempdir, 'tweets.20150430-223406.user.csv')
+            json2csv(self.infile, outfn, fields, gzip_compress=False)
+            self.assertTrue(are_files_identical(outfn, ref_fn), msg=self.msg)
+
+    def test_tweet_hashtag(self):
+        ref_fn = os.path.join(self.subdir, 'tweets.20150430-223406.hashtag.csv.ref')
+        with TemporaryDirectory() as tempdir:
+            outfn = os.path.join(tempdir, 'tweets.20150430-223406.hashtag.csv')
+            json2csv_entities(
+                self.infile,
+                outfn,
+                ['id', 'text'],
+                'hashtags',
+                ['text'],
+                gzip_compress=False,
+            )
+            self.assertTrue(are_files_identical(outfn, ref_fn), msg=self.msg)
+
+    def test_tweet_usermention(self):
+        ref_fn = os.path.join(self.subdir, 'tweets.20150430-223406.usermention.csv.ref')
+        with TemporaryDirectory() as tempdir:
+            outfn = os.path.join(tempdir, 'tweets.20150430-223406.usermention.csv')
+            json2csv_entities(
+                self.infile,
+                outfn,
+                ['id', 'text'],
+                'user_mentions',
+                ['id', 'screen_name'],
+                gzip_compress=False,
+            )
+            self.assertTrue(are_files_identical(outfn, ref_fn), msg=self.msg)
+
+    def test_tweet_media(self):
+        ref_fn = os.path.join(self.subdir, 'tweets.20150430-223406.media.csv.ref')
+        with TemporaryDirectory() as tempdir:
+            outfn = os.path.join(tempdir, 'tweets.20150430-223406.media.csv')
+            json2csv_entities(
+                self.infile,
+                outfn,
+                ['id'],
+                'media',
+                ['media_url', 'url'],
+                gzip_compress=False,
+            )
+
+            self.assertTrue(are_files_identical(outfn, ref_fn), msg=self.msg)
+
+    def test_tweet_url(self):
+        ref_fn = os.path.join(self.subdir, 'tweets.20150430-223406.url.csv.ref')
+        with TemporaryDirectory() as tempdir:
+            outfn = os.path.join(tempdir, 'tweets.20150430-223406.url.csv')
+            json2csv_entities(
+                self.infile,
+                outfn,
+                ['id'],
+                'urls',
+                ['url', 'expanded_url'],
+                gzip_compress=False,
+            )
+
+            self.assertTrue(are_files_identical(outfn, ref_fn), msg=self.msg)
+
+    def test_userurl(self):
+        ref_fn = os.path.join(self.subdir, 'tweets.20150430-223406.userurl.csv.ref')
+        with TemporaryDirectory() as tempdir:
+            outfn = os.path.join(tempdir, 'tweets.20150430-223406.userurl.csv')
+            json2csv_entities(
+                self.infile,
+                outfn,
+                ['id', 'screen_name'],
+                'user.urls',
+                ['url', 'expanded_url'],
+                gzip_compress=False,
+            )
+
+            self.assertTrue(are_files_identical(outfn, ref_fn), msg=self.msg)
+
+    def test_tweet_place(self):
+        ref_fn = os.path.join(self.subdir, 'tweets.20150430-223406.place.csv.ref')
+        with TemporaryDirectory() as tempdir:
+            outfn = os.path.join(tempdir, 'tweets.20150430-223406.place.csv')
+            json2csv_entities(
+                self.infile,
+                outfn,
+                ['id', 'text'],
+                'place',
+                ['name', 'country'],
+                gzip_compress=False,
+            )
+
+            self.assertTrue(are_files_identical(outfn, ref_fn), msg=self.msg)
+
+    def test_tweet_place_boundingbox(self):
+        ref_fn = os.path.join(
+            self.subdir, 'tweets.20150430-223406.placeboundingbox.csv.ref'
+        )
+        with TemporaryDirectory() as tempdir:
+            outfn = os.path.join(tempdir, 'tweets.20150430-223406.placeboundingbox.csv')
+            json2csv_entities(
+                self.infile,
+                outfn,
+                ['id', 'name'],
+                'place.bounding_box',
+                ['coordinates'],
+                gzip_compress=False,
+            )
+
+            self.assertTrue(are_files_identical(outfn, ref_fn), msg=self.msg)
+
+    def test_retweet_original_tweet(self):
+        ref_fn = os.path.join(self.subdir, 'tweets.20150430-223406.retweet.csv.ref')
+        with TemporaryDirectory() as tempdir:
+            outfn = os.path.join(tempdir, 'tweets.20150430-223406.retweet.csv')
+            json2csv_entities(
+                self.infile,
+                outfn,
+                ['id'],
+                'retweeted_status',
+                [
+                    'created_at',
+                    'favorite_count',
+                    'id',
+                    'in_reply_to_status_id',
+                    'in_reply_to_user_id',
+                    'retweet_count',
+                    'text',
+                    'truncated',
+                    'user.id',
+                ],
+                gzip_compress=False,
+            )
+
+            self.assertTrue(are_files_identical(outfn, ref_fn), msg=self.msg)
+
+    def test_file_is_wrong(self):
+        """
+        Sanity check that file comparison is not giving false positives.
+        """
+        ref_fn = os.path.join(self.subdir, 'tweets.20150430-223406.retweet.csv.ref')
+        with TemporaryDirectory() as tempdir:
+            outfn = os.path.join(tempdir, 'tweets.20150430-223406.text.csv')
+            json2csv(self.infile, outfn, ['text'], gzip_compress=False)
+            self.assertFalse(are_files_identical(outfn, ref_fn), msg=self.msg)
+
+
+if __name__ == "__main__":
+    unittest.main()
--- a/venv/lib/python3.7/site-packages/nltk/test/unit/test_naivebayes.py
+++ b/venv/lib/python3.7/site-packages/nltk/test/unit/test_naivebayes.py
@@ -0,0 +1,24 @@
+# -*- coding: utf-8 -*-
+from __future__ import print_function, unicode_literals
+
+
+import unittest
+from nltk.classify.naivebayes import NaiveBayesClassifier
+
+
+class NaiveBayesClassifierTest(unittest.TestCase):
+    def test_simple(self):
+        training_features = [
+            ({'nice': True, 'good': True}, 'positive'),
+            ({'bad': True, 'mean': True}, 'negative'),
+        ]
+
+        classifier = NaiveBayesClassifier.train(training_features)
+
+        result = classifier.prob_classify({'nice': True})
+        self.assertTrue(result.prob('positive') > result.prob('negative'))
+        self.assertEqual(result.max(), 'positive')
+
+        result = classifier.prob_classify({'bad': True})
+        self.assertTrue(result.prob('positive') < result.prob('negative'))
+        self.assertEqual(result.max(), 'negative')
--- a/venv/lib/python3.7/site-packages/nltk/test/unit/test_nombank.py
+++ b/venv/lib/python3.7/site-packages/nltk/test/unit/test_nombank.py
@@ -0,0 +1,28 @@
+# -*- coding: utf-8 -*-
+"""
+Unit tests for nltk.corpus.nombank
+"""
+
+from __future__ import unicode_literals
+import unittest
+
+from nltk.corpus import nombank
+# Load the nombank once.
+nombank.nouns()
+
+class NombankDemo(unittest.TestCase):
+    def test_numbers(self):
+        # No. of instances.
+        self.assertEqual(len(nombank.instances()), 114574)
+        # No. of rolesets
+        self.assertEqual(len(nombank.rolesets()), 5577)
+        # No. of nouns.
+        self.assertEqual(len(nombank.nouns()), 4704)
+
+
+    def test_instance(self):
+        self.assertEqual(nombank.instances()[0].roleset, 'perc-sign.01')
+
+    def test_framefiles_fileids(self):
+        self.assertEqual(len(nombank.fileids()), 4705)
+        self.assertTrue(all(fileid.endswith('.xml') for fileid in nombank.fileids()))
--- a/venv/lib/python3.7/site-packages/nltk/test/unit/test_pos_tag.py
+++ b/venv/lib/python3.7/site-packages/nltk/test/unit/test_pos_tag.py
@@ -0,0 +1,85 @@
+# -*- coding: utf-8 -*-
+"""
+Tests for nltk.pos_tag
+"""
+
+from __future__ import unicode_literals
+
+import unittest
+
+from nltk import word_tokenize, pos_tag
+
+
+class TestPosTag(unittest.TestCase):
+    def test_pos_tag_eng(self):
+        text = "John's big idea isn't all that bad."
+        expected_tagged = [
+            ('John', 'NNP'),
+            ("'s", 'POS'),
+            ('big', 'JJ'),
+            ('idea', 'NN'),
+            ('is', 'VBZ'),
+            ("n't", 'RB'),
+            ('all', 'PDT'),
+            ('that', 'DT'),
+            ('bad', 'JJ'),
+            ('.', '.'),
+        ]
+        assert pos_tag(word_tokenize(text)) == expected_tagged
+
+    def test_pos_tag_eng_universal(self):
+        text = "John's big idea isn't all that bad."
+        expected_tagged = [
+            ('John', 'NOUN'),
+            ("'s", 'PRT'),
+            ('big', 'ADJ'),
+            ('idea', 'NOUN'),
+            ('is', 'VERB'),
+            ("n't", 'ADV'),
+            ('all', 'DET'),
+            ('that', 'DET'),
+            ('bad', 'ADJ'),
+            ('.', '.'),
+        ]
+        assert pos_tag(word_tokenize(text), tagset='universal') == expected_tagged
+
+    def test_pos_tag_rus(self):
+        text = u"Илья оторопел и дважды перечитал бумажку."
+        expected_tagged = [
+            ('Илья', 'S'),
+            ('оторопел', 'V'),
+            ('и', 'CONJ'),
+            ('дважды', 'ADV'),
+            ('перечитал', 'V'),
+            ('бумажку', 'S'),
+            ('.', 'NONLEX'),
+        ]
+        assert pos_tag(word_tokenize(text), lang='rus') == expected_tagged
+
+    def test_pos_tag_rus_universal(self):
+        text = u"Илья оторопел и дважды перечитал бумажку."
+        expected_tagged = [
+            ('Илья', 'NOUN'),
+            ('оторопел', 'VERB'),
+            ('и', 'CONJ'),
+            ('дважды', 'ADV'),
+            ('перечитал', 'VERB'),
+            ('бумажку', 'NOUN'),
+            ('.', '.'),
+        ]
+        assert (
+            pos_tag(word_tokenize(text), tagset='universal', lang='rus')
+            == expected_tagged
+        )
+
+    def test_pos_tag_unknown_lang(self):
+        text = u"모르겠 습니 다"
+        self.assertRaises(NotImplementedError, pos_tag, word_tokenize(text), lang='kor')
+        # Test for default kwarg, `lang=None`
+        self.assertRaises(NotImplementedError, pos_tag, word_tokenize(text), lang=None)
+
+    def test_unspecified_lang(self):
+        # Tries to force the lang='eng' option.
+        text = u"모르겠 습니 다"
+        expected_but_wrong = [('모르겠', 'JJ'), ('습니', 'NNP'), ('다', 'NN')]
+        assert pos_tag(word_tokenize(text)) == expected_but_wrong
--- a/venv/lib/python3.7/site-packages/nltk/test/unit/test_rte_classify.py
+++ b/venv/lib/python3.7/site-packages/nltk/test/unit/test_rte_classify.py
@@ -0,0 +1,92 @@
+# -*- coding: utf-8 -*-
+from __future__ import print_function, unicode_literals
+
+import unittest
+
+from nltk.corpus import rte as rte_corpus
+from nltk.classify.rte_classify import RTEFeatureExtractor, rte_features, rte_classifier
+
+expected_from_rte_feature_extration = """
+alwayson        => True
+ne_hyp_extra    => 0
+ne_overlap      => 1
+neg_hyp         => 0
+neg_txt         => 0
+word_hyp_extra  => 3
+word_overlap    => 3
+
+alwayson        => True
+ne_hyp_extra    => 0
+ne_overlap      => 1
+neg_hyp         => 0
+neg_txt         => 0
+word_hyp_extra  => 2
+word_overlap    => 1
+
+alwayson        => True
+ne_hyp_extra    => 1
+ne_overlap      => 1
+neg_hyp         => 0
+neg_txt         => 0
+word_hyp_extra  => 1
+word_overlap    => 2
+
+alwayson        => True
+ne_hyp_extra    => 1
+ne_overlap      => 0
+neg_hyp         => 0
+neg_txt         => 0
+word_hyp_extra  => 6
+word_overlap    => 2
+
+alwayson        => True
+ne_hyp_extra    => 1
+ne_overlap      => 0
+neg_hyp         => 0
+neg_txt         => 0
+word_hyp_extra  => 4
+word_overlap    => 0
+
+alwayson        => True
+ne_hyp_extra    => 1
+ne_overlap      => 0
+neg_hyp         => 0
+neg_txt         => 0
+word_hyp_extra  => 3
+word_overlap    => 1
+"""
+
+
+class RTEClassifierTest(unittest.TestCase):
+    # Test the feature extraction method.
+    def test_rte_feature_extraction(self):
+        pairs = rte_corpus.pairs(['rte1_dev.xml'])[:6]
+        test_output = [
+            "%-15s => %s" % (key, rte_features(pair)[key])
+            for pair in pairs
+            for key in sorted(rte_features(pair))
+        ]
+        expected_output = expected_from_rte_feature_extration.strip().split('\n')
+        # Remove null strings.
+        expected_output = list(filter(None, expected_output))
+        self.assertEqual(test_output, expected_output)
+
+    # Test the RTEFeatureExtractor object.
+    def test_feature_extractor_object(self):
+        rtepair = rte_corpus.pairs(['rte3_dev.xml'])[33]
+        extractor = RTEFeatureExtractor(rtepair)
+        self.assertEqual(extractor.hyp_words, {'member', 'China', 'SCO.'})
+        self.assertEqual(extractor.overlap('word'), set())
+        self.assertEqual(extractor.overlap('ne'), {'China'})
+        self.assertEqual(extractor.hyp_extra('word'), {'member'})
+
+    # Test the RTE classifier training.
+    def test_rte_classification_without_megam(self):
+        clf = rte_classifier('IIS')
+        clf = rte_classifier('GIS')
+
+    @unittest.skip("Skipping tests with dependencies on MEGAM")
+    def test_rte_classification_with_megam(self):
+        nltk.config_megam('/usr/local/bin/megam')
+        clf = rte_classifier('megam')
+        clf = rte_classifier('BFGS')
--- a/venv/lib/python3.7/site-packages/nltk/test/unit/test_seekable_unicode_stream_reader.py
+++ b/venv/lib/python3.7/site-packages/nltk/test/unit/test_seekable_unicode_stream_reader.py
@@ -0,0 +1,140 @@
+# -*- coding: utf-8 -*-
+"""
+The following test performs a random series of reads, seeks, and
+tells, and checks that the results are consistent.
+"""
+from __future__ import absolute_import, unicode_literals
+import random
+import functools
+from io import BytesIO
+from nltk.corpus.reader import SeekableUnicodeStreamReader
+
+
+def check_reader(unicode_string, encoding, n=1000):
+    bytestr = unicode_string.encode(encoding)
+    strlen = len(unicode_string)
+    stream = BytesIO(bytestr)
+    reader = SeekableUnicodeStreamReader(stream, encoding)
+    # Find all character positions
+    chars = []
+    while True:
+        pos = reader.tell()
+        chars.append((pos, reader.read(1)))
+        if chars[-1][1] == '':
+            break
+    # Find all strings
+    strings = dict((pos, '') for (pos, c) in chars)
+    for pos1, char in chars:
+        for pos2, _ in chars:
+            if pos2 <= pos1:
+                strings[pos2] += char
+    while True:
+        op = random.choice('tsrr')
+        # Check our position?
+        if op == 't':  # tell
+            reader.tell()
+        # Perform a seek?
+        if op == 's':  # seek
+            new_pos = random.choice([p for (p, c) in chars])
+            reader.seek(new_pos)
+        # Perform a read?
+        if op == 'r':  # read
+            if random.random() < 0.3:
+                pos = reader.tell()
+            else:
+                pos = None
+            if random.random() < 0.2:
+                size = None
+            elif random.random() < 0.8:
+                size = random.randint(0, int(strlen / 6))
+            else:
+                size = random.randint(0, strlen + 20)
+            if random.random() < 0.8:
+                s = reader.read(size)
+            else:
+                s = reader.readline(size)
+            # check that everything's consistent
+            if pos is not None:
+                assert pos in strings
+                assert strings[pos].startswith(s)
+                n -= 1
+                if n == 0:
+                    return 'passed'
+
+
+# Call the randomized test function `check_reader` with a variety of
+# input strings and encodings.
+
+ENCODINGS = ['ascii', 'latin1', 'greek', 'hebrew', 'utf-16', 'utf-8']
+
+STRINGS = [
+    """
+    This is a test file.
+    It is fairly short.
+    """,
+    "This file can be encoded with latin1. \x83",
+    """\
+    This is a test file.
+    Here's a blank line:
+
+    And here's some unicode: \xee \u0123 \uffe3
+    """,
+    """\
+    This is a test file.
+    Unicode characters: \xf3 \u2222 \u3333\u4444 \u5555
+    """,
+]
+
+
+def test_reader():
+    for string in STRINGS:
+        for encoding in ENCODINGS:
+            try:
+                # skip strings that can't be encoded with the current encoding
+                string.encode(encoding)
+                yield check_reader, string, encoding
+            except UnicodeEncodeError:
+                pass
+
+
+# nose shows the whole string arguments in a verbose mode; this is annoying,
+# so large string test is separated.
+
+LARGE_STRING = (
+    """\
+This is a larger file.  It has some lines that are longer \
+than 72 characters.  It's got lots of repetition.  Here's \
+some unicode chars: \xee \u0123 \uffe3 \ueeee \u2345
+
+How fun!  Let's repeat it twenty times.
+"""
+    * 10
+)
+
+
+def test_reader_on_large_string():
+    for encoding in ENCODINGS:
+        try:
+            # skip strings that can't be encoded with the current encoding
+            LARGE_STRING.encode(encoding)
+
+            def _check(encoding, n=1000):
+                check_reader(LARGE_STRING, encoding, n)
+
+            yield _check, encoding
+
+        except UnicodeEncodeError:
+            pass
+
+
+def test_reader_stream_is_closed():
+    reader = SeekableUnicodeStreamReader(BytesIO(b''), 'ascii')
+    assert reader.stream.closed is False
+    reader.__del__()
+    assert reader.stream.closed is True
+
+
+def teardown_module(module=None):
+    import gc
+
+    gc.collect()
--- a/venv/lib/python3.7/site-packages/nltk/test/unit/test_senna.py
+++ b/venv/lib/python3.7/site-packages/nltk/test/unit/test_senna.py
@@ -0,0 +1,116 @@
+# -*- coding: utf-8 -*-
+"""
+Unit tests for Senna
+"""
+
+from __future__ import unicode_literals
+from os import environ, path, sep
+
+import logging
+import unittest
+
+from nltk.classify import Senna
+from nltk.tag import SennaTagger, SennaChunkTagger, SennaNERTagger
+
+# Set Senna executable path for tests if it is not specified as an environment variable
+if 'SENNA' in environ:
+    SENNA_EXECUTABLE_PATH = path.normpath(environ['SENNA']) + sep
+else:
+    SENNA_EXECUTABLE_PATH = '/usr/share/senna-v3.0'
+
+senna_is_installed = path.exists(SENNA_EXECUTABLE_PATH)
+
+
+@unittest.skipUnless(senna_is_installed, "Requires Senna executable")
+class TestSennaPipeline(unittest.TestCase):
+    """Unittest for nltk.classify.senna"""
+
+    def test_senna_pipeline(self):
+        """Senna pipeline interface"""
+
+        pipeline = Senna(SENNA_EXECUTABLE_PATH, ['pos', 'chk', 'ner'])
+        sent = 'Dusseldorf is an international business center'.split()
+        result = [
+            (token['word'], token['chk'], token['ner'], token['pos'])
+            for token in pipeline.tag(sent)
+        ]
+        expected = [
+            ('Dusseldorf', 'B-NP', 'B-LOC', 'NNP'),
+            ('is', 'B-VP', 'O', 'VBZ'),
+            ('an', 'B-NP', 'O', 'DT'),
+            ('international', 'I-NP', 'O', 'JJ'),
+            ('business', 'I-NP', 'O', 'NN'),
+            ('center', 'I-NP', 'O', 'NN'),
+        ]
+        self.assertEqual(result, expected)
+
+
+@unittest.skipUnless(senna_is_installed, "Requires Senna executable")
+class TestSennaTagger(unittest.TestCase):
+    """Unittest for nltk.tag.senna"""
+
+    def test_senna_tagger(self):
+        tagger = SennaTagger(SENNA_EXECUTABLE_PATH)
+        result = tagger.tag('What is the airspeed of an unladen swallow ?'.split())
+        expected = [
+            ('What', 'WP'),
+            ('is', 'VBZ'),
+            ('the', 'DT'),
+            ('airspeed', 'NN'),
+            ('of', 'IN'),
+            ('an', 'DT'),
+            ('unladen', 'NN'),
+            ('swallow', 'NN'),
+            ('?', '.'),
+        ]
+        self.assertEqual(result, expected)
+
+    def test_senna_chunk_tagger(self):
+        chktagger = SennaChunkTagger(SENNA_EXECUTABLE_PATH)
+        result_1 = chktagger.tag('What is the airspeed of an unladen swallow ?'.split())
+        expected_1 = [
+            ('What', 'B-NP'),
+            ('is', 'B-VP'),
+            ('the', 'B-NP'),
+            ('airspeed', 'I-NP'),
+            ('of', 'B-PP'),
+            ('an', 'B-NP'),
+            ('unladen', 'I-NP'),
+            ('swallow', 'I-NP'),
+            ('?', 'O'),
+        ]
+
+        result_2 = list(chktagger.bio_to_chunks(result_1, chunk_type='NP'))
+        expected_2 = [
+            ('What', '0'),
+            ('the airspeed', '2-3'),
+            ('an unladen swallow', '5-6-7'),
+        ]
+        self.assertEqual(result_1, expected_1)
+        self.assertEqual(result_2, expected_2)
+
+    def test_senna_ner_tagger(self):
+        nertagger = SennaNERTagger(SENNA_EXECUTABLE_PATH)
+        result_1 = nertagger.tag('Shakespeare theatre was in London .'.split())
+        expected_1 = [
+            ('Shakespeare', 'B-PER'),
+            ('theatre', 'O'),
+            ('was', 'O'),
+            ('in', 'O'),
+            ('London', 'B-LOC'),
+            ('.', 'O'),
+        ]
+
+        result_2 = nertagger.tag('UN headquarters are in NY , USA .'.split())
+        expected_2 = [
+            ('UN', 'B-ORG'),
+            ('headquarters', 'O'),
+            ('are', 'O'),
+            ('in', 'O'),
+            ('NY', 'B-LOC'),
+            (',', 'O'),
+            ('USA', 'B-LOC'),
+            ('.', 'O'),
+        ]
+        self.assertEqual(result_1, expected_1)
+        self.assertEqual(result_2, expected_2)
--- a/venv/lib/python3.7/site-packages/nltk/test/unit/test_stem.py
+++ b/venv/lib/python3.7/site-packages/nltk/test/unit/test_stem.py
@@ -0,0 +1,146 @@
+# -*- coding: utf-8 -*-
+from __future__ import print_function, unicode_literals
+
+import os
+import unittest
+from contextlib import closing
+
+from nltk import data
+from nltk.stem.snowball import SnowballStemmer
+from nltk.stem.porter import PorterStemmer
+
+
+class SnowballTest(unittest.TestCase):
+    def test_arabic(self):
+        """
+        this unit testing for test the snowball arabic light stemmer
+        this stemmer deals with prefixes and suffixes
+        """
+        # Test where the ignore_stopwords=True.
+        ar_stemmer = SnowballStemmer("arabic", True)
+        assert ar_stemmer.stem('الْعَرَبِــــــيَّة') == "عرب"
+        assert ar_stemmer.stem("العربية") == "عرب"
+        assert ar_stemmer.stem("فقالوا") == "قال"
+        assert ar_stemmer.stem("الطالبات") == "طالب"
+        assert ar_stemmer.stem("فالطالبات") == "طالب"
+        assert ar_stemmer.stem("والطالبات") == "طالب"
+        assert ar_stemmer.stem("الطالبون") == "طالب"
+        assert ar_stemmer.stem("اللذان") == "اللذان"
+        assert ar_stemmer.stem("من") == "من"
+        # Test where the ignore_stopwords=False.
+        ar_stemmer = SnowballStemmer("arabic", False)
+        assert ar_stemmer.stem("اللذان") == "اللذ"  # this is a stop word
+        assert ar_stemmer.stem("الطالبات") == "طالب"
+        assert ar_stemmer.stem("الكلمات") == "كلم"
+        # test where create the arabic stemmer without given init value to ignore_stopwords
+        ar_stemmer = SnowballStemmer("arabic")
+        assert ar_stemmer.stem('الْعَرَبِــــــيَّة') == "عرب"
+        assert ar_stemmer.stem("العربية") == "عرب"
+        assert ar_stemmer.stem("فقالوا") == "قال"
+        assert ar_stemmer.stem("الطالبات") == "طالب"
+        assert ar_stemmer.stem("الكلمات") == "كلم"
+
+    def test_russian(self):
+        stemmer_russian = SnowballStemmer("russian")
+        assert stemmer_russian.stem("авантненькая") == "авантненьк"
+
+    def test_german(self):
+        stemmer_german = SnowballStemmer("german")
+        stemmer_german2 = SnowballStemmer("german", ignore_stopwords=True)
+
+        assert stemmer_german.stem("Schr\xe4nke") == 'schrank'
+        assert stemmer_german2.stem("Schr\xe4nke") == 'schrank'
+
+        assert stemmer_german.stem("keinen") == 'kein'
+        assert stemmer_german2.stem("keinen") == 'keinen'
+
+    def test_spanish(self):
+        stemmer = SnowballStemmer('spanish')
+
+        assert stemmer.stem("Visionado") == 'vision'
+
+        # The word 'algue' was raising an IndexError
+        assert stemmer.stem("algue") == 'algu'
+
+    def test_short_strings_bug(self):
+        stemmer = SnowballStemmer('english')
+        assert stemmer.stem("y's") == 'y'
+
+
+class PorterTest(unittest.TestCase):
+    def _vocabulary(self):
+        with closing(
+            data.find('stemmers/porter_test/porter_vocabulary.txt').open(
+                encoding='utf-8'
+            )
+        ) as fp:
+            return fp.read().splitlines()
+
+    def _test_against_expected_output(self, stemmer_mode, expected_stems):
+        stemmer = PorterStemmer(mode=stemmer_mode)
+        for word, true_stem in zip(self._vocabulary(), expected_stems):
+            our_stem = stemmer.stem(word)
+            assert our_stem == true_stem, (
+                "%s should stem to %s in %s mode but got %s"
+                % (word, true_stem, stemmer_mode, our_stem)
+            )
+
+    def test_vocabulary_martin_mode(self):
+        """Tests all words from the test vocabulary provided by M Porter
+
+        The sample vocabulary and output were sourced from:
+            http://tartarus.org/martin/PorterStemmer/voc.txt
+            http://tartarus.org/martin/PorterStemmer/output.txt
+        and are linked to from the Porter Stemmer algorithm's homepage
+        at
+            http://tartarus.org/martin/PorterStemmer/
+        """
+        with closing(
+            data.find('stemmers/porter_test/porter_martin_output.txt').open(
+                encoding='utf-8'
+            )
+        ) as fp:
+            self._test_against_expected_output(
+                PorterStemmer.MARTIN_EXTENSIONS, fp.read().splitlines()
+            )
+
+    def test_vocabulary_nltk_mode(self):
+        with closing(
+            data.find('stemmers/porter_test/porter_nltk_output.txt').open(
+                encoding='utf-8'
+            )
+        ) as fp:
+            self._test_against_expected_output(
+                PorterStemmer.NLTK_EXTENSIONS, fp.read().splitlines()
+            )
+
+    def test_vocabulary_original_mode(self):
+        # The list of stems for this test was generated by taking the
+        # Martin-blessed stemmer from
+        # http://tartarus.org/martin/PorterStemmer/c.txt
+        # and removing all the --DEPARTURE-- sections from it and
+        # running it against Martin's test vocabulary.
+
+        with closing(
+            data.find('stemmers/porter_test/porter_original_output.txt').open(
+                encoding='utf-8'
+            )
+        ) as fp:
+            self._test_against_expected_output(
+                PorterStemmer.ORIGINAL_ALGORITHM, fp.read().splitlines()
+            )
+
+        self._test_against_expected_output(
+            PorterStemmer.ORIGINAL_ALGORITHM,
+            data.find('stemmers/porter_test/porter_original_output.txt')
+            .open(encoding='utf-8')
+            .read()
+            .splitlines(),
+        )
+
+    def test_oed_bug(self):
+        """Test for bug https://github.com/nltk/nltk/issues/1581
+
+        Ensures that 'oed' can be stemmed without throwing an error.
+        """
+        assert PorterStemmer().stem('oed') == 'o'
--- a/venv/lib/python3.7/site-packages/nltk/test/unit/test_tag.py
+++ b/venv/lib/python3.7/site-packages/nltk/test/unit/test_tag.py
@@ -0,0 +1,30 @@
+# -*- coding: utf-8 -*-
+from __future__ import absolute_import, unicode_literals
+
+
+def test_basic():
+    from nltk.tag import pos_tag
+    from nltk.tokenize import word_tokenize
+
+    result = pos_tag(word_tokenize("John's big idea isn't all that bad."))
+    assert result == [
+        ('John', 'NNP'),
+        ("'s", 'POS'),
+        ('big', 'JJ'),
+        ('idea', 'NN'),
+        ('is', 'VBZ'),
+        ("n't", 'RB'),
+        ('all', 'PDT'),
+        ('that', 'DT'),
+        ('bad', 'JJ'),
+        ('.', '.'),
+    ]
+
+
+def setup_module(module):
+    from nose import SkipTest
+
+    try:
+        import numpy
+    except ImportError:
+        raise SkipTest("numpy is required for nltk.test.test_tag")
--- a/venv/lib/python3.7/site-packages/nltk/test/unit/test_tgrep.py
+++ b/venv/lib/python3.7/site-packages/nltk/test/unit/test_tgrep.py
@@ -0,0 +1,790 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+#
+# Natural Language Toolkit: TGrep search
+#
+# Copyright (C) 2001-2019 NLTK Project
+# Author: Will Roberts <wildwilhelm@gmail.com>
+# URL: <http://nltk.org/>
+# For license information, see LICENSE.TXT
+
+'''
+Unit tests for nltk.tgrep.
+'''
+
+from __future__ import absolute_import, print_function, unicode_literals
+
+import unittest
+
+from six import b
+
+from nltk.tree import ParentedTree
+from nltk import tgrep
+
+
+class TestSequenceFunctions(unittest.TestCase):
+
+    '''
+    Class containing unit tests for nltk.tgrep.
+    '''
+
+    def test_tokenize_simple(self):
+        '''
+        Simple test of tokenization.
+        '''
+        tokens = tgrep.tgrep_tokenize('A .. (B !< C . D) | ![<< (E , F) $ G]')
+        self.assertEqual(
+            tokens,
+            [
+                'A',
+                '..',
+                '(',
+                'B',
+                '!',
+                '<',
+                'C',
+                '.',
+                'D',
+                ')',
+                '|',
+                '!',
+                '[',
+                '<<',
+                '(',
+                'E',
+                ',',
+                'F',
+                ')',
+                '$',
+                'G',
+                ']',
+            ],
+        )
+
+    def test_tokenize_encoding(self):
+        '''
+        Test that tokenization handles bytes and strs the same way.
+        '''
+        self.assertEqual(
+            tgrep.tgrep_tokenize(b('A .. (B !< C . D) | ![<< (E , F) $ G]')),
+            tgrep.tgrep_tokenize('A .. (B !< C . D) | ![<< (E , F) $ G]'),
+        )
+
+    def test_tokenize_link_types(self):
+        '''
+        Test tokenization of basic link types.
+        '''
+        self.assertEqual(tgrep.tgrep_tokenize('A<B'), ['A', '<', 'B'])
+        self.assertEqual(tgrep.tgrep_tokenize('A>B'), ['A', '>', 'B'])
+        self.assertEqual(tgrep.tgrep_tokenize('A<3B'), ['A', '<3', 'B'])
+        self.assertEqual(tgrep.tgrep_tokenize('A>3B'), ['A', '>3', 'B'])
+        self.assertEqual(tgrep.tgrep_tokenize('A<,B'), ['A', '<,', 'B'])
+        self.assertEqual(tgrep.tgrep_tokenize('A>,B'), ['A', '>,', 'B'])
+        self.assertEqual(tgrep.tgrep_tokenize('A<-3B'), ['A', '<-3', 'B'])
+        self.assertEqual(tgrep.tgrep_tokenize('A>-3B'), ['A', '>-3', 'B'])
+        self.assertEqual(tgrep.tgrep_tokenize('A<-B'), ['A', '<-', 'B'])
+        self.assertEqual(tgrep.tgrep_tokenize('A>-B'), ['A', '>-', 'B'])
+        self.assertEqual(tgrep.tgrep_tokenize('A<\'B'), ['A', '<\'', 'B'])
+        self.assertEqual(tgrep.tgrep_tokenize('A>\'B'), ['A', '>\'', 'B'])
+        self.assertEqual(tgrep.tgrep_tokenize('A<:B'), ['A', '<:', 'B'])
+        self.assertEqual(tgrep.tgrep_tokenize('A>:B'), ['A', '>:', 'B'])
+        self.assertEqual(tgrep.tgrep_tokenize('A<<B'), ['A', '<<', 'B'])
+        self.assertEqual(tgrep.tgrep_tokenize('A>>B'), ['A', '>>', 'B'])
+        self.assertEqual(tgrep.tgrep_tokenize('A<<,B'), ['A', '<<,', 'B'])
+        self.assertEqual(tgrep.tgrep_tokenize('A>>,B'), ['A', '>>,', 'B'])
+        self.assertEqual(tgrep.tgrep_tokenize('A<<\'B'), ['A', '<<\'', 'B'])
+        self.assertEqual(tgrep.tgrep_tokenize('A>>\'B'), ['A', '>>\'', 'B'])
+        self.assertEqual(tgrep.tgrep_tokenize('A<<:B'), ['A', '<<:', 'B'])
+        self.assertEqual(tgrep.tgrep_tokenize('A>>:B'), ['A', '>>:', 'B'])
+        self.assertEqual(tgrep.tgrep_tokenize('A.B'), ['A', '.', 'B'])
+        self.assertEqual(tgrep.tgrep_tokenize('A,B'), ['A', ',', 'B'])
+        self.assertEqual(tgrep.tgrep_tokenize('A..B'), ['A', '..', 'B'])
+        self.assertEqual(tgrep.tgrep_tokenize('A,,B'), ['A', ',,', 'B'])
+        self.assertEqual(tgrep.tgrep_tokenize('A$B'), ['A', '$', 'B'])
+        self.assertEqual(tgrep.tgrep_tokenize('A$.B'), ['A', '$.', 'B'])
+        self.assertEqual(tgrep.tgrep_tokenize('A$,B'), ['A', '$,', 'B'])
+        self.assertEqual(tgrep.tgrep_tokenize('A$..B'), ['A', '$..', 'B'])
+        self.assertEqual(tgrep.tgrep_tokenize('A$,,B'), ['A', '$,,', 'B'])
+        self.assertEqual(tgrep.tgrep_tokenize('A!<B'), ['A', '!', '<', 'B'])
+        self.assertEqual(tgrep.tgrep_tokenize('A!>B'), ['A', '!', '>', 'B'])
+        self.assertEqual(tgrep.tgrep_tokenize('A!<3B'), ['A', '!', '<3', 'B'])
+        self.assertEqual(tgrep.tgrep_tokenize('A!>3B'), ['A', '!', '>3', 'B'])
+        self.assertEqual(tgrep.tgrep_tokenize('A!<,B'), ['A', '!', '<,', 'B'])
+        self.assertEqual(tgrep.tgrep_tokenize('A!>,B'), ['A', '!', '>,', 'B'])
+        self.assertEqual(tgrep.tgrep_tokenize('A!<-3B'), ['A', '!', '<-3', 'B'])
+        self.assertEqual(tgrep.tgrep_tokenize('A!>-3B'), ['A', '!', '>-3', 'B'])
+        self.assertEqual(tgrep.tgrep_tokenize('A!<-B'), ['A', '!', '<-', 'B'])
+        self.assertEqual(tgrep.tgrep_tokenize('A!>-B'), ['A', '!', '>-', 'B'])
+        self.assertEqual(tgrep.tgrep_tokenize('A!<\'B'), ['A', '!', '<\'', 'B'])
+        self.assertEqual(tgrep.tgrep_tokenize('A!>\'B'), ['A', '!', '>\'', 'B'])
+        self.assertEqual(tgrep.tgrep_tokenize('A!<:B'), ['A', '!', '<:', 'B'])
+        self.assertEqual(tgrep.tgrep_tokenize('A!>:B'), ['A', '!', '>:', 'B'])
+        self.assertEqual(tgrep.tgrep_tokenize('A!<<B'), ['A', '!', '<<', 'B'])
+        self.assertEqual(tgrep.tgrep_tokenize('A!>>B'), ['A', '!', '>>', 'B'])
+        self.assertEqual(tgrep.tgrep_tokenize('A!<<,B'), ['A', '!', '<<,', 'B'])
+        self.assertEqual(tgrep.tgrep_tokenize('A!>>,B'), ['A', '!', '>>,', 'B'])
+        self.assertEqual(tgrep.tgrep_tokenize('A!<<\'B'), ['A', '!', '<<\'', 'B'])
+        self.assertEqual(tgrep.tgrep_tokenize('A!>>\'B'), ['A', '!', '>>\'', 'B'])
+        self.assertEqual(tgrep.tgrep_tokenize('A!<<:B'), ['A', '!', '<<:', 'B'])
+        self.assertEqual(tgrep.tgrep_tokenize('A!>>:B'), ['A', '!', '>>:', 'B'])
+        self.assertEqual(tgrep.tgrep_tokenize('A!.B'), ['A', '!', '.', 'B'])
+        self.assertEqual(tgrep.tgrep_tokenize('A!,B'), ['A', '!', ',', 'B'])
+        self.assertEqual(tgrep.tgrep_tokenize('A!..B'), ['A', '!', '..', 'B'])
+        self.assertEqual(tgrep.tgrep_tokenize('A!,,B'), ['A', '!', ',,', 'B'])
+        self.assertEqual(tgrep.tgrep_tokenize('A!$B'), ['A', '!', '$', 'B'])
+        self.assertEqual(tgrep.tgrep_tokenize('A!$.B'), ['A', '!', '$.', 'B'])
+        self.assertEqual(tgrep.tgrep_tokenize('A!$,B'), ['A', '!', '$,', 'B'])
+        self.assertEqual(tgrep.tgrep_tokenize('A!$..B'), ['A', '!', '$..', 'B'])
+        self.assertEqual(tgrep.tgrep_tokenize('A!$,,B'), ['A', '!', '$,,', 'B'])
+
+    def test_tokenize_examples(self):
+        '''
+        Test tokenization of the TGrep2 manual example patterns.
+        '''
+        self.assertEqual(tgrep.tgrep_tokenize('NP < PP'), ['NP', '<', 'PP'])
+        self.assertEqual(tgrep.tgrep_tokenize('/^NP/'), ['/^NP/'])
+        self.assertEqual(
+            tgrep.tgrep_tokenize('NP << PP . VP'), ['NP', '<<', 'PP', '.', 'VP']
+        )
+        self.assertEqual(
+            tgrep.tgrep_tokenize('NP << PP | . VP'), ['NP', '<<', 'PP', '|', '.', 'VP']
+        )
+        self.assertEqual(
+            tgrep.tgrep_tokenize('NP !<< PP [> NP | >> VP]'),
+            ['NP', '!', '<<', 'PP', '[', '>', 'NP', '|', '>>', 'VP', ']'],
+        )
+        self.assertEqual(
+            tgrep.tgrep_tokenize('NP << (PP . VP)'),
+            ['NP', '<<', '(', 'PP', '.', 'VP', ')'],
+        )
+        self.assertEqual(
+            tgrep.tgrep_tokenize('NP <\' (PP <, (IN < on))'),
+            ['NP', '<\'', '(', 'PP', '<,', '(', 'IN', '<', 'on', ')', ')'],
+        )
+        self.assertEqual(
+            tgrep.tgrep_tokenize('S < (A < B) < C'),
+            ['S', '<', '(', 'A', '<', 'B', ')', '<', 'C'],
+        )
+        self.assertEqual(
+            tgrep.tgrep_tokenize('S < ((A < B) < C)'),
+            ['S', '<', '(', '(', 'A', '<', 'B', ')', '<', 'C', ')'],
+        )
+        self.assertEqual(
+            tgrep.tgrep_tokenize('S < (A < B < C)'),
+            ['S', '<', '(', 'A', '<', 'B', '<', 'C', ')'],
+        )
+        self.assertEqual(tgrep.tgrep_tokenize('A<B&.C'), ['A', '<', 'B', '&', '.', 'C'])
+
+    def test_tokenize_quoting(self):
+        '''
+        Test tokenization of quoting.
+        '''
+        self.assertEqual(
+            tgrep.tgrep_tokenize('"A<<:B"<<:"A $.. B"<"A>3B"<C'),
+            ['"A<<:B"', '<<:', '"A $.. B"', '<', '"A>3B"', '<', 'C'],
+        )
+
+    def test_tokenize_nodenames(self):
+        '''
+        Test tokenization of node names.
+        '''
+        self.assertEqual(tgrep.tgrep_tokenize('Robert'), ['Robert'])
+        self.assertEqual(tgrep.tgrep_tokenize('/^[Bb]ob/'), ['/^[Bb]ob/'])
+        self.assertEqual(tgrep.tgrep_tokenize('*'), ['*'])
+        self.assertEqual(tgrep.tgrep_tokenize('__'), ['__'])
+        # test tokenization of NLTK tree position syntax
+        self.assertEqual(tgrep.tgrep_tokenize('N()'), ['N(', ')'])
+        self.assertEqual(tgrep.tgrep_tokenize('N(0,)'), ['N(', '0', ',', ')'])
+        self.assertEqual(tgrep.tgrep_tokenize('N(0,0)'), ['N(', '0', ',', '0', ')'])
+        self.assertEqual(
+            tgrep.tgrep_tokenize('N(0,0,)'), ['N(', '0', ',', '0', ',', ')']
+        )
+
+    def test_tokenize_macros(self):
+        '''
+        Test tokenization of macro definitions.
+        '''
+        self.assertEqual(
+            tgrep.tgrep_tokenize(
+                '@ NP /^NP/;\n@ NN /^NN/;\n@NP [!< NP | < @NN] !$.. @NN'
+            ),
+            [
+                '@',
+                'NP',
+                '/^NP/',
+                ';',
+                '@',
+                'NN',
+                '/^NN/',
+                ';',
+                '@NP',
+                '[',
+                '!',
+                '<',
+                'NP',
+                '|',
+                '<',
+                '@NN',
+                ']',
+                '!',
+                '$..',
+                '@NN',
+            ],
+        )
+
+    def test_node_simple(self):
+        '''
+        Test a simple use of tgrep for finding nodes matching a given
+        pattern.
+        '''
+        tree = ParentedTree.fromstring(
+            '(S (NP (DT the) (JJ big) (NN dog)) ' '(VP bit) (NP (DT a) (NN cat)))'
+        )
+        self.assertEqual(list(tgrep.tgrep_positions('NN', [tree])), [[(0, 2), (2, 1)]])
+        self.assertEqual(
+            list(tgrep.tgrep_nodes('NN', [tree])), [[tree[0, 2], tree[2, 1]]]
+        )
+        self.assertEqual(
+            list(tgrep.tgrep_positions('NN|JJ', [tree])), [[(0, 1), (0, 2), (2, 1)]]
+        )
+
+    def test_node_printing(self):
+        '''Test that the tgrep print operator ' is properly ignored.'''
+        tree = ParentedTree.fromstring('(S (n x) (N x))')
+        self.assertEqual(
+            list(tgrep.tgrep_positions('N', [tree])),
+            list(tgrep.tgrep_positions('\'N', [tree])),
+        )
+        self.assertEqual(
+            list(tgrep.tgrep_positions('/[Nn]/', [tree])),
+            list(tgrep.tgrep_positions('\'/[Nn]/', [tree])),
+        )
+
+    def test_node_encoding(self):
+        '''
+        Test that tgrep search strings handles bytes and strs the same
+        way.
+        '''
+        tree = ParentedTree.fromstring(
+            '(S (NP (DT the) (JJ big) (NN dog)) ' '(VP bit) (NP (DT a) (NN cat)))'
+        )
+        self.assertEqual(
+            list(tgrep.tgrep_positions(b('NN'), [tree])),
+            list(tgrep.tgrep_positions('NN', [tree])),
+        )
+        self.assertEqual(
+            list(tgrep.tgrep_nodes(b('NN'), [tree])),
+            list(tgrep.tgrep_nodes('NN', [tree])),
+        )
+        self.assertEqual(
+            list(tgrep.tgrep_positions(b('NN|JJ'), [tree])),
+            list(tgrep.tgrep_positions('NN|JJ', [tree])),
+        )
+
+    def test_node_nocase(self):
+        '''
+        Test selecting nodes using case insensitive node names.
+        '''
+        tree = ParentedTree.fromstring('(S (n x) (N x))')
+        self.assertEqual(list(tgrep.tgrep_positions('"N"', [tree])), [[(1,)]])
+        self.assertEqual(list(tgrep.tgrep_positions('i@"N"', [tree])), [[(0,), (1,)]])
+
+    def test_node_quoted(self):
+        '''
+        Test selecting nodes using quoted node names.
+        '''
+        tree = ParentedTree.fromstring('(N ("N" x) (N" x) ("\\" x))')
+        self.assertEqual(list(tgrep.tgrep_positions('"N"', [tree])), [[()]])
+        self.assertEqual(list(tgrep.tgrep_positions('"\\"N\\""', [tree])), [[(0,)]])
+        self.assertEqual(list(tgrep.tgrep_positions('"N\\""', [tree])), [[(1,)]])
+        self.assertEqual(list(tgrep.tgrep_positions('"\\"\\\\\\""', [tree])), [[(2,)]])
+
+    def test_node_regex(self):
+        '''
+        Test regex matching on nodes.
+        '''
+        tree = ParentedTree.fromstring('(S (NP-SBJ x) (NP x) (NNP x) (VP x))')
+        # This is a regular expression that matches any node whose
+        # name starts with NP, including NP-SBJ:
+        self.assertEqual(list(tgrep.tgrep_positions('/^NP/', [tree])), [[(0,), (1,)]])
+
+    def test_node_regex_2(self):
+        '''
+        Test regex matching on nodes.
+        '''
+        tree = ParentedTree.fromstring('(S (SBJ x) (SBJ1 x) (NP-SBJ x))')
+        self.assertEqual(list(tgrep.tgrep_positions('/^SBJ/', [tree])), [[(0,), (1,)]])
+        # This is a regular expression that matches any node whose
+        # name includes SBJ, including NP-SBJ:
+        self.assertEqual(
+            list(tgrep.tgrep_positions('/SBJ/', [tree])), [[(0,), (1,), (2,)]]
+        )
+
+    def test_node_tree_position(self):
+        '''
+        Test matching on nodes based on NLTK tree position.
+        '''
+        tree = ParentedTree.fromstring('(S (NP-SBJ x) (NP x) (NNP x) (VP x))')
+        # test all tree positions that are not leaves
+        leaf_positions = set(
+            tree.leaf_treeposition(x) for x in range(len(tree.leaves()))
+        )
+        tree_positions = [x for x in tree.treepositions() if x not in leaf_positions]
+        for position in tree_positions:
+            node_id = 'N{0}'.format(position)
+            tgrep_positions = list(tgrep.tgrep_positions(node_id, [tree]))
+            self.assertEqual(len(tgrep_positions[0]), 1)
+            self.assertEqual(tgrep_positions[0][0], position)
+
+    def test_node_noleaves(self):
+        '''
+        Test node name matching with the search_leaves flag set to False.
+        '''
+        tree = ParentedTree.fromstring('(S (A (T x)) (B (N x)))')
+        self.assertEqual(
+            list(tgrep.tgrep_positions('x', [tree])), [[(0, 0, 0), (1, 0, 0)]]
+        )
+        self.assertEqual(list(tgrep.tgrep_positions('x', [tree], False)), [[]])
+
+    def tests_rel_dominance(self):
+        '''
+        Test matching nodes based on dominance relations.
+        '''
+        tree = ParentedTree.fromstring('(S (A (T x)) (B (N x)))')
+        self.assertEqual(list(tgrep.tgrep_positions('* < T', [tree])), [[(0,)]])
+        self.assertEqual(list(tgrep.tgrep_positions('* < T > S', [tree])), [[(0,)]])
+        self.assertEqual(
+            list(tgrep.tgrep_positions('* !< T', [tree])),
+            [[(), (0, 0), (0, 0, 0), (1,), (1, 0), (1, 0, 0)]],
+        )
+        self.assertEqual(list(tgrep.tgrep_positions('* !< T > S', [tree])), [[(1,)]])
+        self.assertEqual(list(tgrep.tgrep_positions('* > A', [tree])), [[(0, 0)]])
+        self.assertEqual(list(tgrep.tgrep_positions('* > B', [tree])), [[(1, 0)]])
+        self.assertEqual(
+            list(tgrep.tgrep_positions('* !> B', [tree])),
+            [[(), (0,), (0, 0), (0, 0, 0), (1,), (1, 0, 0)]],
+        )
+        self.assertEqual(
+            list(tgrep.tgrep_positions('* !> B >> S', [tree])), [[(0,), (0, 0), (1,)]]
+        )
+        self.assertEqual(
+            list(tgrep.tgrep_positions('* >> S', [tree])),
+            [[(0,), (0, 0), (1,), (1, 0)]],
+        )
+        self.assertEqual(
+            list(tgrep.tgrep_positions('* >>, S', [tree])), [[(0,), (0, 0)]]
+        )
+        self.assertEqual(
+            list(tgrep.tgrep_positions('* >>\' S', [tree])), [[(1,), (1, 0)]]
+        )
+        # Known issue:
+        # self.assertEqual(list(tgrep.tgrep_positions('* !>> S', [tree])),
+        #                 [[()]])
+        self.assertEqual(list(tgrep.tgrep_positions('* << T', [tree])), [[(), (0,)]])
+        self.assertEqual(list(tgrep.tgrep_positions('* <<\' T', [tree])), [[(0,)]])
+        self.assertEqual(list(tgrep.tgrep_positions('* <<1 N', [tree])), [[(1,)]])
+        self.assertEqual(
+            list(tgrep.tgrep_positions('* !<< T', [tree])),
+            [[(0, 0), (0, 0, 0), (1,), (1, 0), (1, 0, 0)]],
+        )
+        tree = ParentedTree.fromstring('(S (A (T x)) (B (T x) (N x )))')
+        self.assertEqual(list(tgrep.tgrep_positions('* <: T', [tree])), [[(0,)]])
+        self.assertEqual(list(tgrep.tgrep_positions('* < T', [tree])), [[(0,), (1,)]])
+        self.assertEqual(
+            list(tgrep.tgrep_positions('* !<: T', [tree])),
+            [[(), (0, 0), (0, 0, 0), (1,), (1, 0), (1, 0, 0), (1, 1), (1, 1, 0)]],
+        )
+        self.assertEqual(list(tgrep.tgrep_positions('* !<: T > S', [tree])), [[(1,)]])
+        tree = ParentedTree.fromstring('(S (T (A x) (B x)) (T (C x)))')
+        self.assertEqual(list(tgrep.tgrep_positions('* >: T', [tree])), [[(1, 0)]])
+        self.assertEqual(
+            list(tgrep.tgrep_positions('* !>: T', [tree])),
+            [[(), (0,), (0, 0), (0, 0, 0), (0, 1), (0, 1, 0), (1,), (1, 0, 0)]],
+        )
+        tree = ParentedTree.fromstring(
+            '(S (A (B (C (D (E (T x))))))' ' (A (B (C (D (E (T x))) (N x)))))'
+        )
+        self.assertEqual(
+            list(tgrep.tgrep_positions('* <<: T', [tree])),
+            [
+                [
+                    (0,),
+                    (0, 0),
+                    (0, 0, 0),
+                    (0, 0, 0, 0),
+                    (0, 0, 0, 0, 0),
+                    (1, 0, 0, 0),
+                    (1, 0, 0, 0, 0),
+                ]
+            ],
+        )
+        self.assertEqual(
+            list(tgrep.tgrep_positions('* >>: A', [tree])),
+            [
+                [
+                    (0, 0),
+                    (0, 0, 0),
+                    (0, 0, 0, 0),
+                    (0, 0, 0, 0, 0),
+                    (0, 0, 0, 0, 0, 0),
+                    (1, 0),
+                    (1, 0, 0),
+                ]
+            ],
+        )
+
+    def test_bad_operator(self):
+        '''
+        Test error handling of undefined tgrep operators.
+        '''
+        tree = ParentedTree.fromstring('(S (A (T x)) (B (N x)))')
+        self.assertRaises(
+            tgrep.TgrepException, list, tgrep.tgrep_positions('* >>> S', [tree])
+        )
+
+    def test_comments(self):
+        '''
+        Test that comments are correctly filtered out of tgrep search
+        strings.
+        '''
+        tree = ParentedTree.fromstring('(S (NN x) (NP x) (NN x))')
+        search1 = '''
+        @ NP /^NP/;
+        @ NN /^NN/;
+        @NN
+        '''
+        self.assertEqual(list(tgrep.tgrep_positions(search1, [tree])), [[(0,), (2,)]])
+        search2 = '''
+        # macros
+        @ NP /^NP/;
+        @ NN /^NN/;
+
+        # search string
+        @NN
+        '''
+        self.assertEqual(list(tgrep.tgrep_positions(search2, [tree])), [[(0,), (2,)]])
+
+    def test_rel_sister_nodes(self):
+        '''
+        Test matching sister nodes in a tree.
+        '''
+        tree = ParentedTree.fromstring('(S (A x) (B x) (C x))')
+        self.assertEqual(list(tgrep.tgrep_positions('* $. B', [tree])), [[(0,)]])
+        self.assertEqual(list(tgrep.tgrep_positions('* $.. B', [tree])), [[(0,)]])
+        self.assertEqual(list(tgrep.tgrep_positions('* $, B', [tree])), [[(2,)]])
+        self.assertEqual(list(tgrep.tgrep_positions('* $,, B', [tree])), [[(2,)]])
+        self.assertEqual(list(tgrep.tgrep_positions('* $ B', [tree])), [[(0,), (2,)]])
+
+    def tests_rel_indexed_children(self):
+        '''
+        Test matching nodes based on their index in their parent node.
+        '''
+        tree = ParentedTree.fromstring('(S (A x) (B x) (C x))')
+        self.assertEqual(list(tgrep.tgrep_positions('* >, S', [tree])), [[(0,)]])
+        self.assertEqual(list(tgrep.tgrep_positions('* >1 S', [tree])), [[(0,)]])
+        self.assertEqual(list(tgrep.tgrep_positions('* >2 S', [tree])), [[(1,)]])
+        self.assertEqual(list(tgrep.tgrep_positions('* >3 S', [tree])), [[(2,)]])
+        self.assertEqual(list(tgrep.tgrep_positions('* >\' S', [tree])), [[(2,)]])
+        self.assertEqual(list(tgrep.tgrep_positions('* >-1 S', [tree])), [[(2,)]])
+        self.assertEqual(list(tgrep.tgrep_positions('* >-2 S', [tree])), [[(1,)]])
+        self.assertEqual(list(tgrep.tgrep_positions('* >-3 S', [tree])), [[(0,)]])
+        tree = ParentedTree.fromstring(
+            '(S (D (A x) (B x) (C x)) (E (B x) (C x) (A x)) ' '(F (C x) (A x) (B x)))'
+        )
+        self.assertEqual(list(tgrep.tgrep_positions('* <, A', [tree])), [[(0,)]])
+        self.assertEqual(list(tgrep.tgrep_positions('* <1 A', [tree])), [[(0,)]])
+        self.assertEqual(list(tgrep.tgrep_positions('* <2 A', [tree])), [[(2,)]])
+        self.assertEqual(list(tgrep.tgrep_positions('* <3 A', [tree])), [[(1,)]])
+        self.assertEqual(list(tgrep.tgrep_positions('* <\' A', [tree])), [[(1,)]])
+        self.assertEqual(list(tgrep.tgrep_positions('* <-1 A', [tree])), [[(1,)]])
+        self.assertEqual(list(tgrep.tgrep_positions('* <-2 A', [tree])), [[(2,)]])
+        self.assertEqual(list(tgrep.tgrep_positions('* <-3 A', [tree])), [[(0,)]])
+
+    def test_rel_precedence(self):
+        '''
+        Test matching nodes based on precedence relations.
+        '''
+        tree = ParentedTree.fromstring(
+            '(S (NP (NP (PP x)) (NP (AP x)))'
+            ' (VP (AP (X (PP x)) (Y (AP x))))'
+            ' (NP (RC (NP (AP x)))))'
+        )
+        self.assertEqual(
+            list(tgrep.tgrep_positions('* . X', [tree])), [[(0,), (0, 1), (0, 1, 0)]]
+        )
+        self.assertEqual(
+            list(tgrep.tgrep_positions('* . Y', [tree])), [[(1, 0, 0), (1, 0, 0, 0)]]
+        )
+        self.assertEqual(
+            list(tgrep.tgrep_positions('* .. X', [tree])),
+            [[(0,), (0, 0), (0, 0, 0), (0, 1), (0, 1, 0)]],
+        )
+        self.assertEqual(
+            list(tgrep.tgrep_positions('* .. Y', [tree])),
+            [[(0,), (0, 0), (0, 0, 0), (0, 1), (0, 1, 0), (1, 0, 0), (1, 0, 0, 0)]],
+        )
+        self.assertEqual(
+            list(tgrep.tgrep_positions('* , X', [tree])), [[(1, 0, 1), (1, 0, 1, 0)]]
+        )
+        self.assertEqual(
+            list(tgrep.tgrep_positions('* , Y', [tree])),
+            [[(2,), (2, 0), (2, 0, 0), (2, 0, 0, 0)]],
+        )
+        self.assertEqual(
+            list(tgrep.tgrep_positions('* ,, X', [tree])),
+            [[(1, 0, 1), (1, 0, 1, 0), (2,), (2, 0), (2, 0, 0), (2, 0, 0, 0)]],
+        )
+        self.assertEqual(
+            list(tgrep.tgrep_positions('* ,, Y', [tree])),
+            [[(2,), (2, 0), (2, 0, 0), (2, 0, 0, 0)]],
+        )
+
+    def test_examples(self):
+        '''
+        Test the Basic Examples from the TGrep2 manual.
+        '''
+        tree = ParentedTree.fromstring('(S (NP (AP x)) (NP (PP x)))')
+        # This matches any NP node that immediately dominates a PP:
+        self.assertEqual(list(tgrep.tgrep_positions('NP < PP', [tree])), [[(1,)]])
+
+        tree = ParentedTree.fromstring('(S (NP x) (VP x) (NP (PP x)) (VP x))')
+        # This matches an NP that dominates a PP and is immediately
+        # followed by a VP:
+        self.assertEqual(list(tgrep.tgrep_positions('NP << PP . VP', [tree])), [[(2,)]])
+
+        tree = ParentedTree.fromstring(
+            '(S (NP (AP x)) (NP (PP x)) ' '(NP (DET x) (NN x)) (VP x))'
+        )
+        # This matches an NP that dominates a PP or is immediately
+        # followed by a VP:
+        self.assertEqual(
+            list(tgrep.tgrep_positions('NP << PP | . VP', [tree])), [[(1,), (2,)]]
+        )
+
+        tree = ParentedTree.fromstring(
+            '(S (NP (NP (PP x)) (NP (AP x)))'
+            ' (VP (AP (NP (PP x)) (NP (AP x))))'
+            ' (NP (RC (NP (AP x)))))'
+        )
+        # This matches an NP that does not dominate a PP. Also, the NP
+        # must either have a parent that is an NP or be dominated by a
+        # VP:
+        self.assertEqual(
+            list(tgrep.tgrep_positions('NP !<< PP [> NP | >> VP]', [tree])),
+            [[(0, 1), (1, 0, 1)]],
+        )
+
+        tree = ParentedTree.fromstring(
+            '(S (NP (AP (PP x) (VP x))) ' '(NP (AP (PP x) (NP x))) (NP x))'
+        )
+        # This matches an NP that dominates a PP which itself is
+        # immediately followed by a VP. Note the use of parentheses to
+        # group ". VP" with the PP rather than with the NP:
+        self.assertEqual(
+            list(tgrep.tgrep_positions('NP << (PP . VP)', [tree])), [[(0,)]]
+        )
+
+        tree = ParentedTree.fromstring(
+            '(S (NP (DET a) (NN cat) (PP (IN on) (NP x)))'
+            ' (NP (DET a) (NN cat) (PP (IN on) (NP x)) (PP x))'
+            ' (NP x))'
+        )
+        # This matches an NP whose last child is a PP that begins with
+        # the preposition "on":
+        self.assertEqual(
+            list(tgrep.tgrep_positions('NP <\' (PP <, (IN < on))', [tree])), [[(0,)]]
+        )
+
+        tree = ParentedTree.fromstring(
+            '(S (S (C x) (A (B x))) (S (C x) (A x)) ' '(S (D x) (A (B x))))'
+        )
+        # The following pattern matches an S which has a child A and
+        # another child that is a C and that the A has a child B:
+        self.assertEqual(
+            list(tgrep.tgrep_positions('S < (A < B) < C', [tree])), [[(0,)]]
+        )
+
+        tree = ParentedTree.fromstring(
+            '(S (S (A (B x) (C x))) (S (S (C x) (A (B x)))))'
+        )
+        # However, this pattern means that S has child A and that A
+        # has children B and C:
+        self.assertEqual(
+            list(tgrep.tgrep_positions('S < ((A < B) < C)', [tree])), [[(0,)]]
+        )
+
+        # It is equivalent to this:
+        self.assertEqual(
+            list(tgrep.tgrep_positions('S < (A < B < C)', [tree])), [[(0,)]]
+        )
+
+    def test_use_macros(self):
+        '''
+        Test defining and using tgrep2 macros.
+        '''
+        tree = ParentedTree.fromstring(
+            '(VP (VB sold) (NP (DET the) '
+            '(NN heiress)) (NP (NN deed) (PREP to) '
+            '(NP (DET the) (NN school) (NN house))))'
+        )
+        self.assertEqual(
+            list(
+                tgrep.tgrep_positions(
+                    '@ NP /^NP/;\n@ NN /^NN/;\n@NP !< @NP !$.. @NN', [tree]
+                )
+            ),
+            [[(1,), (2, 2)]],
+        )
+        # use undefined macro @CNP
+        self.assertRaises(
+            tgrep.TgrepException,
+            list,
+            tgrep.tgrep_positions(
+                '@ NP /^NP/;\n@ NN /^NN/;\n@CNP !< @NP !$.. @NN', [tree]
+            ),
+        )
+
+    def test_tokenize_node_labels(self):
+        '''Test tokenization of labeled nodes.'''
+        self.assertEqual(
+            tgrep.tgrep_tokenize('S < @SBJ < (@VP < (@VB $.. @OBJ))'),
+            [
+                'S',
+                '<',
+                '@SBJ',
+                '<',
+                '(',
+                '@VP',
+                '<',
+                '(',
+                '@VB',
+                '$..',
+                '@OBJ',
+                ')',
+                ')',
+            ],
+        )
+        self.assertEqual(
+            tgrep.tgrep_tokenize('S < @SBJ=s < (@VP=v < (@VB $.. @OBJ))'),
+            [
+                'S',
+                '<',
+                '@SBJ',
+                '=',
+                's',
+                '<',
+                '(',
+                '@VP',
+                '=',
+                'v',
+                '<',
+                '(',
+                '@VB',
+                '$..',
+                '@OBJ',
+                ')',
+                ')',
+            ],
+        )
+
+    def test_tokenize_segmented_patterns(self):
+        '''Test tokenization of segmented patterns.'''
+        self.assertEqual(
+            tgrep.tgrep_tokenize('S < @SBJ=s < (@VP=v < (@VB $.. @OBJ)) : =s .. =v'),
+            [
+                'S',
+                '<',
+                '@SBJ',
+                '=',
+                's',
+                '<',
+                '(',
+                '@VP',
+                '=',
+                'v',
+                '<',
+                '(',
+                '@VB',
+                '$..',
+                '@OBJ',
+                ')',
+                ')',
+                ':',
+                '=s',
+                '..',
+                '=v',
+            ],
+        )
+
+    def test_labeled_nodes(self):
+        '''
+        Test labeled nodes.
+
+        Test case from Emily M. Bender.
+        '''
+        search = '''
+            # macros
+            @ SBJ /SBJ/;
+            @ VP /VP/;
+            @ VB /VB/;
+            @ VPoB /V[PB]/;
+            @ OBJ /OBJ/;
+
+            # 1 svo
+            S < @SBJ=s < (@VP=v < (@VB $.. @OBJ)) : =s .. =v'''
+        sent1 = ParentedTree.fromstring(
+            '(S (NP-SBJ I) (VP (VB eat) (NP-OBJ (NNS apples))))'
+        )
+        sent2 = ParentedTree.fromstring(
+            '(S (VP (VB eat) (NP-OBJ (NNS apples))) (NP-SBJ I))'
+        )
+        search_firsthalf = search.split('\n\n')[0] + 'S < @SBJ < (@VP < (@VB $.. @OBJ))'
+        search_rewrite = 'S < (/.*SBJ/ $.. (/VP/ < (/VB/ $.. /.*OBJ/)))'
+
+        self.assertTrue(list(tgrep.tgrep_positions(search_firsthalf, [sent1]))[0])
+        self.assertTrue(list(tgrep.tgrep_positions(search, [sent1]))[0])
+        self.assertTrue(list(tgrep.tgrep_positions(search_rewrite, [sent1]))[0])
+        self.assertEqual(
+            list(tgrep.tgrep_positions(search, [sent1])),
+            list(tgrep.tgrep_positions(search_rewrite, [sent1])),
+        )
+        self.assertTrue(list(tgrep.tgrep_positions(search_firsthalf, [sent2]))[0])
+        self.assertFalse(list(tgrep.tgrep_positions(search, [sent2]))[0])
+        self.assertFalse(list(tgrep.tgrep_positions(search_rewrite, [sent2]))[0])
+        self.assertEqual(
+            list(tgrep.tgrep_positions(search, [sent2])),
+            list(tgrep.tgrep_positions(search_rewrite, [sent2])),
+        )
+
+    def test_multiple_conjs(self):
+        '''
+        Test that multiple (3 or more) conjunctions of node relations are
+        handled properly.
+        '''
+        sent = ParentedTree.fromstring('((A (B b) (C c)) (A (B b) (C c) (D d)))')
+        # search = '(A < B < C < D)'
+        # search_tworels = '(A < B < C)'
+        self.assertEqual(
+            list(tgrep.tgrep_positions('(A < B < C < D)', [sent])), [[(1,)]]
+        )
+        self.assertEqual(
+            list(tgrep.tgrep_positions('(A < B < C)', [sent])), [[(0,), (1,)]]
+        )
+
+    def test_trailing_semicolon(self):
+        '''
+        Test that semicolons at the end of a tgrep2 search string won't
+        cause a parse failure.
+        '''
+        tree = ParentedTree.fromstring(
+            '(S (NP (DT the) (JJ big) (NN dog)) ' '(VP bit) (NP (DT a) (NN cat)))'
+        )
+        self.assertEqual(list(tgrep.tgrep_positions('NN', [tree])), [[(0, 2), (2, 1)]])
+        self.assertEqual(list(tgrep.tgrep_positions('NN;', [tree])), [[(0, 2), (2, 1)]])
+        self.assertEqual(
+            list(tgrep.tgrep_positions('NN;;', [tree])), [[(0, 2), (2, 1)]]
+        )
+
+
+if __name__ == '__main__':
+    unittest.main()
--- a/venv/lib/python3.7/site-packages/nltk/test/unit/test_tokenize.py
+++ b/venv/lib/python3.7/site-packages/nltk/test/unit/test_tokenize.py
@@ -0,0 +1,407 @@
+# -*- coding: utf-8 -*-
+"""
+Unit tests for nltk.tokenize.
+See also nltk/test/tokenize.doctest
+"""
+
+from __future__ import unicode_literals
+
+import unittest
+
+from nose import SkipTest
+from nose.tools import assert_equal
+
+from nltk.tokenize import (
+    punkt,
+    word_tokenize,
+    TweetTokenizer,
+    StanfordSegmenter,
+    TreebankWordTokenizer,
+    SyllableTokenizer,
+)
+
+
+class TestTokenize(unittest.TestCase):
+    def test_tweet_tokenizer(self):
+        """
+        Test TweetTokenizer using words with special and accented characters.
+        """
+
+        tokenizer = TweetTokenizer(strip_handles=True, reduce_len=True)
+        s9 = "@myke: Let's test these words: resumé España München français"
+        tokens = tokenizer.tokenize(s9)
+        expected = [
+            ':',
+            "Let's",
+            'test',
+            'these',
+            'words',
+            ':',
+            'resumé',
+            'España',
+            'München',
+            'français',
+        ]
+        self.assertEqual(tokens, expected)
+        
+    def test_sonority_sequencing_syllable_tokenizer(self):
+        """
+        Test SyllableTokenizer tokenizer.
+        """
+        tokenizer = SyllableTokenizer()
+        tokens = tokenizer.tokenize('justification')
+        self.assertEqual(tokens, ['jus', 'ti', 'fi', 'ca', 'tion'])
+
+    def test_stanford_segmenter_arabic(self):
+        """
+        Test the Stanford Word Segmenter for Arabic (default config)
+        """
+        try:
+            seg = StanfordSegmenter()
+            seg.default_config('ar')
+            sent = u'يبحث علم الحاسوب استخدام الحوسبة بجميع اشكالها لحل المشكلات'
+            segmented_sent = seg.segment(sent.split())
+            assert segmented_sent.split() == [
+                'يبحث',
+                'علم',
+                'الحاسوب',
+                'استخدام',
+                'الحوسبة',
+                'ب',
+                'جميع',
+                'اشكال',
+                'ها',
+                'ل',
+                'حل',
+                'المشكلات',
+            ]
+        except LookupError as e:
+            raise SkipTest(str(e))
+
+    def test_stanford_segmenter_chinese(self):
+        """
+        Test the Stanford Word Segmenter for Chinese (default config)
+        """
+        try:
+            seg = StanfordSegmenter()
+            seg.default_config('zh')
+            sent = u"这是斯坦福中文分词器测试"
+            segmented_sent = seg.segment(sent.split())
+            assert segmented_sent.split() == ['这', '是', '斯坦福', '中文', '分词器', '测试']
+        except LookupError as e:
+            raise SkipTest(str(e))
+
+    def test_phone_tokenizer(self):
+        """
+        Test a string that resembles a phone number but contains a newline
+        """
+
+        # Should be recognized as a phone number, albeit one with multiple spaces
+        tokenizer = TweetTokenizer()
+        test1 = "(393)  928 -3010"
+        expected = ['(393)  928 -3010']
+        result = tokenizer.tokenize(test1)
+        self.assertEqual(result, expected)
+
+        # Due to newline, first three elements aren't part of a phone number;
+        # fourth is
+        test2 = "(393)\n928 -3010"
+        expected = ['(', '393', ')', "928 -3010"]
+        result = tokenizer.tokenize(test2)
+        self.assertEqual(result, expected)
+
+    def test_remove_handle(self):
+        """
+        Test remove_handle() from casual.py with specially crafted edge cases
+        """
+
+        tokenizer = TweetTokenizer(strip_handles=True)
+
+        # Simple example. Handles with just numbers should be allowed
+        test1 = "@twitter hello @twi_tter_. hi @12345 @123news"
+        expected = ['hello', '.', 'hi']
+        result = tokenizer.tokenize(test1)
+        self.assertEqual(result, expected)
+
+        # Handles are allowed to follow any of the following characters
+        test2 = "@n`@n~@n(@n)@n-@n=@n+@n\\@n|@n[@n]@n{@n}@n;@n:@n'@n\"@n/@n?@n.@n,@n<@n>@n @n\n@n ñ@n.ü@n.ç@n."
+        expected = [
+            '`',
+            '~',
+            '(',
+            ')',
+            '-',
+            '=',
+            '+',
+            '\\',
+            '|',
+            '[',
+            ']',
+            '{',
+            '}',
+            ';',
+            ':',
+            "'",
+            '"',
+            '/',
+            '?',
+            '.',
+            ',',
+            '<',
+            '>',
+            'ñ',
+            '.',
+            'ü',
+            '.',
+            'ç',
+            '.',
+        ]
+        result = tokenizer.tokenize(test2)
+        self.assertEqual(result, expected)
+
+        # Handles are NOT allowed to follow any of the following characters
+        test3 = "a@n j@n z@n A@n L@n Z@n 1@n 4@n 7@n 9@n 0@n _@n !@n @@n #@n $@n %@n &@n *@n"
+        expected = [
+            'a',
+            '@n',
+            'j',
+            '@n',
+            'z',
+            '@n',
+            'A',
+            '@n',
+            'L',
+            '@n',
+            'Z',
+            '@n',
+            '1',
+            '@n',
+            '4',
+            '@n',
+            '7',
+            '@n',
+            '9',
+            '@n',
+            '0',
+            '@n',
+            '_',
+            '@n',
+            '!',
+            '@n',
+            '@',
+            '@n',
+            '#',
+            '@n',
+            '$',
+            '@n',
+            '%',
+            '@n',
+            '&',
+            '@n',
+            '*',
+            '@n',
+        ]
+        result = tokenizer.tokenize(test3)
+        self.assertEqual(result, expected)
+
+        # Handles are allowed to precede the following characters
+        test4 = "@n!a @n#a @n$a @n%a @n&a @n*a"
+        expected = ['!', 'a', '#', 'a', '$', 'a', '%', 'a', '&', 'a', '*', 'a']
+        result = tokenizer.tokenize(test4)
+        self.assertEqual(result, expected)
+
+        # Tests interactions with special symbols and multiple @
+        test5 = "@n!@n @n#@n @n$@n @n%@n @n&@n @n*@n @n@n @@n @n@@n @n_@n @n7@n @nj@n"
+        expected = [
+            '!',
+            '@n',
+            '#',
+            '@n',
+            '$',
+            '@n',
+            '%',
+            '@n',
+            '&',
+            '@n',
+            '*',
+            '@n',
+            '@n',
+            '@n',
+            '@',
+            '@n',
+            '@n',
+            '@',
+            '@n',
+            '@n_',
+            '@n',
+            '@n7',
+            '@n',
+            '@nj',
+            '@n',
+        ]
+        result = tokenizer.tokenize(test5)
+        self.assertEqual(result, expected)
+
+        # Tests that handles can have a max length of 20
+        test6 = "@abcdefghijklmnopqrstuvwxyz @abcdefghijklmnopqrst1234 @abcdefghijklmnopqrst_ @abcdefghijklmnopqrstendofhandle"
+        expected = ['uvwxyz', '1234', '_', 'endofhandle']
+        result = tokenizer.tokenize(test6)
+        self.assertEqual(result, expected)
+
+        # Edge case where an @ comes directly after a long handle
+        test7 = "@abcdefghijklmnopqrstu@abcde @abcdefghijklmnopqrst@abcde @abcdefghijklmnopqrst_@abcde @abcdefghijklmnopqrst5@abcde"
+        expected = [
+            'u',
+            '@abcde',
+            '@abcdefghijklmnopqrst',
+            '@abcde',
+            '_',
+            '@abcde',
+            '5',
+            '@abcde',
+        ]
+        result = tokenizer.tokenize(test7)
+        self.assertEqual(result, expected)
+
+    def test_treebank_span_tokenizer(self):
+        """
+        Test TreebankWordTokenizer.span_tokenize function
+        """
+
+        tokenizer = TreebankWordTokenizer()
+
+        # Test case in the docstring
+        test1 = "Good muffins cost $3.88\nin New (York).  Please (buy) me\ntwo of them.\n(Thanks)."
+        expected = [
+            (0, 4),
+            (5, 12),
+            (13, 17),
+            (18, 19),
+            (19, 23),
+            (24, 26),
+            (27, 30),
+            (31, 32),
+            (32, 36),
+            (36, 37),
+            (37, 38),
+            (40, 46),
+            (47, 48),
+            (48, 51),
+            (51, 52),
+            (53, 55),
+            (56, 59),
+            (60, 62),
+            (63, 68),
+            (69, 70),
+            (70, 76),
+            (76, 77),
+            (77, 78),
+        ]
+        result = list(tokenizer.span_tokenize(test1))
+        self.assertEqual(result, expected)
+
+        # Test case with double quotation
+        test2 = "The DUP is similar to the \"religious right\" in the United States and takes a hardline stance on social issues"
+        expected = [
+            (0, 3),
+            (4, 7),
+            (8, 10),
+            (11, 18),
+            (19, 21),
+            (22, 25),
+            (26, 27),
+            (27, 36),
+            (37, 42),
+            (42, 43),
+            (44, 46),
+            (47, 50),
+            (51, 57),
+            (58, 64),
+            (65, 68),
+            (69, 74),
+            (75, 76),
+            (77, 85),
+            (86, 92),
+            (93, 95),
+            (96, 102),
+            (103, 109),
+        ]
+        result = list(tokenizer.span_tokenize(test2))
+        self.assertEqual(result, expected)
+
+        # Test case with double qoutation as well as converted quotations
+        test3 = "The DUP is similar to the \"religious right\" in the United States and takes a ``hardline'' stance on social issues"
+        expected = [
+            (0, 3),
+            (4, 7),
+            (8, 10),
+            (11, 18),
+            (19, 21),
+            (22, 25),
+            (26, 27),
+            (27, 36),
+            (37, 42),
+            (42, 43),
+            (44, 46),
+            (47, 50),
+            (51, 57),
+            (58, 64),
+            (65, 68),
+            (69, 74),
+            (75, 76),
+            (77, 79),
+            (79, 87),
+            (87, 89),
+            (90, 96),
+            (97, 99),
+            (100, 106),
+            (107, 113),
+        ]
+        result = list(tokenizer.span_tokenize(test3))
+        self.assertEqual(result, expected)
+
+    def test_word_tokenize(self):
+        """
+        Test word_tokenize function
+        """
+        
+        sentence = "The 'v', I've been fooled but I'll seek revenge."
+        expected = ['The', "'", 'v', "'", ',', 'I', "'ve", 'been', 'fooled', 
+                    'but', 'I', "'ll", 'seek', 'revenge', '.']
+        self.assertEqual(word_tokenize(sentence), expected)
+        
+        sentence = "'v' 're'"
+        expected = ["'", 'v', "'", "'re", "'"]
+        self.assertEqual(word_tokenize(sentence), expected)
+
+    def test_punkt_pair_iter(self):
+
+        test_cases = [
+            ('12', [('1', '2'), ('2', None)]),
+            ('123', [('1', '2'), ('2', '3'), ('3', None)]),
+            ('1234', [('1', '2'), ('2', '3'), ('3', '4'), ('4', None)]),
+        ]
+
+        for (test_input, expected_output) in test_cases:
+            actual_output = [x for x in punkt._pair_iter(test_input)]
+
+            assert_equal(actual_output, expected_output)
+
+    def test_punkt_pair_iter_handles_stop_iteration_exception(self):
+        # test input to trigger StopIteration from next()
+        it = iter([])
+        # call method under test and produce a generator
+        gen = punkt._pair_iter(it)
+        # unpack generator, ensure that no error is raised
+        list(gen)
+
+    def test_punkt_tokenize_words_handles_stop_iteration_exception(self):
+        obj = punkt.PunktBaseClass()
+
+        class TestPunktTokenizeWordsMock:
+            def word_tokenize(self, s):
+                return iter([])
+
+        obj._lang_vars = TestPunktTokenizeWordsMock()
+        # unpack generator, ensure that no error is raised
+        list(obj._tokenize_words('test'))
--- a/venv/lib/python3.7/site-packages/nltk/test/unit/test_twitter_auth.py
+++ b/venv/lib/python3.7/site-packages/nltk/test/unit/test_twitter_auth.py
@@ -0,0 +1,181 @@
+# -*- coding: utf-8 -*-
+"""
+Tests for static parts of Twitter package
+"""
+
+import os
+import unittest
+from nose import SkipTest
+
+try:
+    import twython
+except ImportError as e:
+    raise SkipTest("The twython library has not been installed.")
+
+from nltk.twitter import Authenticate
+
+
+class TestCredentials(unittest.TestCase):
+    """
+    Tests that Twitter credentials information from file is handled correctly.
+    """
+
+    def setUp(self):
+        self.subdir = os.path.join(os.path.dirname(__file__), 'files')
+        self.auth = Authenticate()
+        os.environ['TWITTER'] = 'twitter-files'
+
+    def test_environment(self):
+        """
+        Test that environment variable has been read correctly.
+        """
+        fn = os.path.basename(self.auth.creds_subdir)
+        self.assertEqual(fn, os.environ['TWITTER'])
+
+    def test_empty_subdir1(self):
+        """
+        Setting subdir to empty path should raise an error.
+        """
+        try:
+            self.auth.load_creds(subdir='')
+        # raises ValueError (zero length field name in format) for python 2.6
+        # OSError for the rest
+        except OSError:
+            pass
+        except ValueError:
+            pass
+        except Exception as e:
+            self.fail('Unexpected exception thrown: %s' % e)
+        else:
+            self.fail('OSError exception not thrown.')
+
+    def test_empty_subdir2(self):
+        """
+        Setting subdir to `None` should raise an error.
+        """
+        self.auth.creds_subdir = None
+        try:
+            self.auth.load_creds()
+        except ValueError:
+            pass
+        except Exception as e:
+            self.fail('Unexpected exception thrown: %s' % e)
+        else:
+            self.fail('ValueError exception not thrown.')
+
+    def test_missingdir(self):
+        """
+        Setting subdir to nonexistent directory should raise an error.
+        """
+        try:
+            self.auth.load_creds(subdir='/nosuchdir')
+        # raises ValueError (zero length field name in format) for python 2.6
+        # OSError for the rest
+        except OSError:
+            pass
+        except ValueError:
+            pass
+        except Exception as e:
+            self.fail('Unexpected exception thrown: %s' % e)
+        else:
+            self.fail('OSError exception not thrown.')
+
+    def test_missingfile1(self):
+        """
+        Defaults for authentication will fail since 'credentials.txt' not
+        present in default subdir, as read from `os.environ['TWITTER']`.
+        """
+        try:
+            self.auth.load_creds()
+        # raises ValueError (zero length field name in format) for python 2.6
+        # OSError for the rest
+        except OSError:
+            pass
+        except ValueError:
+            pass
+        except Exception as e:
+            self.fail('Unexpected exception thrown: %s' % e)
+        else:
+            self.fail('OSError exception not thrown.')
+
+    def test_missingfile2(self):
+        """
+        Credentials file 'foobar' cannot be found in default subdir.
+        """
+        try:
+            self.auth.load_creds(creds_file='foobar')
+        # raises ValueError (zero length field name in format) for python 2.6
+        # OSError for the rest
+        except OSError:
+            pass
+        except ValueError:
+            pass
+        except Exception as e:
+            self.fail('Unexpected exception thrown: %s' % e)
+        else:
+            self.fail('OSError exception not thrown.')
+
+    def test_incomplete_file(self):
+        """
+        Credentials file 'bad_oauth1-1.txt' is incomplete
+        """
+        try:
+            self.auth.load_creds(creds_file='bad_oauth1-1.txt', subdir=self.subdir)
+        except ValueError:
+            pass
+        except Exception as e:
+            self.fail('Unexpected exception thrown: %s' % e)
+        else:
+            self.fail('ValueError exception not thrown.')
+
+    def test_malformed_file1(self):
+        """
+        First key in credentials file 'bad_oauth1-2.txt' is ill-formed
+        """
+        try:
+            self.auth.load_creds(creds_file='bad_oauth1-2.txt', subdir=self.subdir)
+        except ValueError:
+            pass
+        except Exception as e:
+            self.fail('Unexpected exception thrown: %s' % e)
+        else:
+            self.fail('ValueError exception not thrown.')
+
+    def test_malformed_file2(self):
+        """
+        First key in credentials file 'bad_oauth1-2.txt' is ill-formed
+        """
+        try:
+            self.auth.load_creds(creds_file='bad_oauth1-3.txt', subdir=self.subdir)
+        except ValueError:
+            pass
+        except Exception as e:
+            self.fail('Unexpected exception thrown: %s' % e)
+        else:
+            self.fail('ValueError exception not thrown.')
+
+    def test_correct_path(self):
+        """
+        Path to default credentials file is well-formed, given specified
+        subdir.
+        """
+        self.auth.load_creds(subdir=self.subdir)
+        self.auth.creds_fullpath = os.path.join(self.subdir, self.auth.creds_file)
+
+    def test_correct_file1(self):
+        """
+        Default credentials file is identified
+        """
+        self.auth.load_creds(subdir=self.subdir)
+        self.assertEqual(self.auth.creds_file, 'credentials.txt')
+
+    def test_correct_file2(self):
+        """
+        Default credentials file has been read correctluy
+        """
+        oauth = self.auth.load_creds(subdir=self.subdir)
+        self.assertEqual(oauth['app_key'], 'a')
+
+
+if __name__ == '__main__':
+    unittest.main()
--- a/venv/lib/python3.7/site-packages/nltk/test/unit/test_wordnet.py
+++ b/venv/lib/python3.7/site-packages/nltk/test/unit/test_wordnet.py
@@ -0,0 +1,221 @@
+# -*- coding: utf-8 -*-
+"""
+Unit tests for nltk.corpus.wordnet
+See also nltk/test/wordnet.doctest
+"""
+
+from __future__ import unicode_literals
+
+import collections
+import os
+import unittest
+
+from nose import SkipTest
+
+from nltk.corpus.reader.wordnet import WordNetCorpusReader
+from nltk.corpus import wordnet as wn
+from nltk.corpus import wordnet_ic as wnic
+from nltk.data import find as find_data
+
+
+wn.ensure_loaded()
+S = wn.synset
+L = wn.lemma
+
+
+class WordnNetDemo(unittest.TestCase):
+    def test_retrieve_synset(self):
+        move_synset = S('go.v.21')
+        self.assertEqual(move_synset.name(), "move.v.15")
+        self.assertEqual(move_synset.lemma_names(), ['move', 'go'])
+        self.assertEqual(
+            move_synset.definition(), "have a turn; make one's move in a game"
+        )
+        self.assertEqual(move_synset.examples(), ['Can I go now?'])
+
+    def test_retrieve_synsets(self):
+        self.assertEqual(sorted(wn.synsets('zap', pos='n')), [S('zap.n.01')])
+        self.assertEqual(
+            sorted(wn.synsets('zap', pos='v')),
+            [S('microwave.v.01'), S('nuke.v.01'), S('zap.v.01'), S('zap.v.02')],
+        )
+
+    def test_hyperhyponyms(self):
+        # Not every synset as hypernyms()
+        self.assertEqual(S('travel.v.01').hypernyms(), [])
+        self.assertEqual(S('travel.v.02').hypernyms(), [S('travel.v.03')])
+        self.assertEqual(S('travel.v.03').hypernyms(), [])
+
+        # Test hyper-/hyponyms.
+        self.assertEqual(S('breakfast.n.1').hypernyms(), [S('meal.n.01')])
+        first_five_meal_hypo = [
+            S('banquet.n.02'),
+            S('bite.n.04'),
+            S('breakfast.n.01'),
+            S('brunch.n.01'),
+            S('buffet.n.02'),
+        ]
+        self.assertEqual(sorted(S('meal.n.1').hyponyms()[:5]), first_five_meal_hypo)
+        self.assertEqual(S('Austen.n.1').instance_hypernyms(), [S('writer.n.01')])
+        first_five_composer_hypo = [
+            S('ambrose.n.01'),
+            S('bach.n.01'),
+            S('barber.n.01'),
+            S('bartok.n.01'),
+            S('beethoven.n.01'),
+        ]
+        self.assertEqual(
+            S('composer.n.1').instance_hyponyms()[:5], first_five_composer_hypo
+        )
+
+        # Test root hyper-/hyponyms
+        self.assertEqual(S('person.n.01').root_hypernyms(), [S('entity.n.01')])
+        self.assertEqual(S('sail.v.01').root_hypernyms(), [S('travel.v.01')])
+        self.assertEqual(
+            S('fall.v.12').root_hypernyms(), [S('act.v.01'), S('fall.v.17')]
+        )
+
+    def test_derivationally_related_forms(self):
+        # Test `derivationally_related_forms()`
+        self.assertEqual(
+            L('zap.v.03.nuke').derivationally_related_forms(),
+            [L('atomic_warhead.n.01.nuke')],
+        )
+        self.assertEqual(
+            L('zap.v.03.atomize').derivationally_related_forms(),
+            [L('atomization.n.02.atomization')],
+        )
+        self.assertEqual(
+            L('zap.v.03.atomise').derivationally_related_forms(),
+            [L('atomization.n.02.atomisation')],
+        )
+        self.assertEqual(L('zap.v.03.zap').derivationally_related_forms(), [])
+
+    def test_meronyms_holonyms(self):
+        # Test meronyms, holonyms.
+        self.assertEqual(
+            S('dog.n.01').member_holonyms(), [S('canis.n.01'), S('pack.n.06')]
+        )
+        self.assertEqual(S('dog.n.01').part_meronyms(), [S('flag.n.07')])
+
+        self.assertEqual(S('faculty.n.2').member_meronyms(), [S('professor.n.01')])
+        self.assertEqual(S('copilot.n.1').member_holonyms(), [S('crew.n.01')])
+
+        self.assertEqual(
+            S('table.n.2').part_meronyms(),
+            [S('leg.n.03'), S('tabletop.n.01'), S('tableware.n.01')],
+        )
+        self.assertEqual(S('course.n.7').part_holonyms(), [S('meal.n.01')])
+
+        self.assertEqual(
+            S('water.n.1').substance_meronyms(), [S('hydrogen.n.01'), S('oxygen.n.01')]
+        )
+        self.assertEqual(
+            S('gin.n.1').substance_holonyms(),
+            [
+                S('gin_and_it.n.01'),
+                S('gin_and_tonic.n.01'),
+                S('martini.n.01'),
+                S('pink_lady.n.01'),
+            ],
+        )
+
+    def test_antonyms(self):
+        # Test antonyms.
+        self.assertEqual(
+            L('leader.n.1.leader').antonyms(), [L('follower.n.01.follower')]
+        )
+        self.assertEqual(
+            L('increase.v.1.increase').antonyms(), [L('decrease.v.01.decrease')]
+        )
+
+    def test_misc_relations(self):
+        # Test misc relations.
+        self.assertEqual(S('snore.v.1').entailments(), [S('sleep.v.01')])
+        self.assertEqual(
+            S('heavy.a.1').similar_tos(),
+            [
+                S('dense.s.03'),
+                S('doughy.s.01'),
+                S('heavier-than-air.s.01'),
+                S('hefty.s.02'),
+                S('massive.s.04'),
+                S('non-buoyant.s.01'),
+                S('ponderous.s.02'),
+            ],
+        )
+        self.assertEqual(S('light.a.1').attributes(), [S('weight.n.01')])
+        self.assertEqual(S('heavy.a.1').attributes(), [S('weight.n.01')])
+
+        # Test pertainyms.
+        self.assertEqual(
+            L('English.a.1.English').pertainyms(), [L('england.n.01.England')]
+        )
+
+    def test_lch(self):
+        # Test LCH.
+        self.assertEqual(
+            S('person.n.01').lowest_common_hypernyms(S('dog.n.01')),
+            [S('organism.n.01')],
+        )
+        self.assertEqual(
+            S('woman.n.01').lowest_common_hypernyms(S('girlfriend.n.02')),
+            [S('woman.n.01')],
+        )
+
+    def test_domains(self):
+        # Test domains.
+        self.assertEqual(S('code.n.03').topic_domains(), [S('computer_science.n.01')])
+        self.assertEqual(S('pukka.a.01').region_domains(), [S('india.n.01')])
+        self.assertEqual(S('freaky.a.01').usage_domains(), [S('slang.n.02')])
+
+    def test_in_topic_domains(self):
+        # Test in domains.
+        self.assertEqual(
+            S('computer_science.n.01').in_topic_domains()[0], S('access.n.05')
+        )
+        self.assertEqual(S('germany.n.01').in_region_domains()[23], S('trillion.n.02'))
+        self.assertEqual(S('slang.n.02').in_usage_domains()[1], S('airhead.n.01'))
+
+    def test_wordnet_similarities(self):
+        # Path based similarities.
+        self.assertAlmostEqual(S('cat.n.01').path_similarity(S('cat.n.01')), 1.0)
+        self.assertAlmostEqual(S('dog.n.01').path_similarity(S('cat.n.01')), 0.2)
+        self.assertAlmostEqual(
+            S('dog.n.01').lch_similarity(S('cat.n.01')), 2.028, places=3
+        )
+        self.assertAlmostEqual(
+            S('dog.n.01').wup_similarity(S('cat.n.01')), 0.8571, places=3
+        )
+        # Information Content similarities.
+        brown_ic = wnic.ic('ic-brown.dat')
+        self.assertAlmostEqual(
+            S('dog.n.01').jcn_similarity(S('cat.n.01'), brown_ic), 0.4497, places=3
+        )
+        semcor_ic = wnic.ic('ic-semcor.dat')
+        self.assertAlmostEqual(
+            S('dog.n.01').lin_similarity(S('cat.n.01'), semcor_ic), 0.8863, places=3
+        )
+
+    def test_omw_lemma_no_trailing_underscore(self):
+        expected = [
+            u'popolna_sprememba_v_mišljenju',
+            u'popoln_obrat',
+            u'preobrat',
+            u'preobrat_v_mišljenju'
+            ]
+        self.assertEqual(S('about-face.n.02').lemma_names(lang='slv'), expected)
+
+    def test_iterable_type_for_all_lemma_names(self):
+        # Duck-test for iterables.
+        # See https://stackoverflow.com/a/36230057/610569
+        cat_lemmas = wn.all_lemma_names(lang='cat')
+        eng_lemmas = wn.all_lemma_names(lang='eng')
+
+        self.assertTrue(hasattr(eng_lemmas, '__iter__'))
+        self.assertTrue(hasattr(eng_lemmas, '__next__') or hasattr(eng_lemmas, 'next'))
+        self.assertTrue(eng_lemmas.__iter__() is eng_lemmas)
+
+        self.assertTrue(hasattr(cat_lemmas, '__iter__'))
+        self.assertTrue(hasattr(cat_lemmas, '__next__') or hasattr(eng_lemmas, 'next'))
+        self.assertTrue(cat_lemmas.__iter__() is cat_lemmas)
--- a/venv/lib/python3.7/site-packages/nltk/test/unit/translate/init.py
+++ b/venv/lib/python3.7/site-packages/nltk/test/unit/translate/init.py
--- a/venv/lib/python3.7/site-packages/nltk/test/unit/translate/pycache/init.cpython-37.pyc
+++ b/venv/lib/python3.7/site-packages/nltk/test/unit/translate/pycache/init.cpython-37.pyc
--- a/venv/lib/python3.7/site-packages/nltk/test/unit/translate/pycache/test_bleu.cpython-37.pyc
+++ b/venv/lib/python3.7/site-packages/nltk/test/unit/translate/pycache/test_bleu.cpython-37.pyc
--- a/venv/lib/python3.7/site-packages/nltk/test/unit/translate/pycache/test_gdfa.cpython-37.pyc
+++ b/venv/lib/python3.7/site-packages/nltk/test/unit/translate/pycache/test_gdfa.cpython-37.pyc
--- a/venv/lib/python3.7/site-packages/nltk/test/unit/translate/pycache/test_ibm1.cpython-37.pyc
+++ b/venv/lib/python3.7/site-packages/nltk/test/unit/translate/pycache/test_ibm1.cpython-37.pyc
--- a/venv/lib/python3.7/site-packages/nltk/test/unit/translate/pycache/test_ibm2.cpython-37.pyc
+++ b/venv/lib/python3.7/site-packages/nltk/test/unit/translate/pycache/test_ibm2.cpython-37.pyc
--- a/venv/lib/python3.7/site-packages/nltk/test/unit/translate/pycache/test_ibm3.cpython-37.pyc
+++ b/venv/lib/python3.7/site-packages/nltk/test/unit/translate/pycache/test_ibm3.cpython-37.pyc
--- a/venv/lib/python3.7/site-packages/nltk/test/unit/translate/pycache/test_ibm4.cpython-37.pyc
+++ b/venv/lib/python3.7/site-packages/nltk/test/unit/translate/pycache/test_ibm4.cpython-37.pyc
--- a/venv/lib/python3.7/site-packages/nltk/test/unit/translate/pycache/test_ibm5.cpython-37.pyc
+++ b/venv/lib/python3.7/site-packages/nltk/test/unit/translate/pycache/test_ibm5.cpython-37.pyc
--- a/venv/lib/python3.7/site-packages/nltk/test/unit/translate/pycache/test_ibm_model.cpython-37.pyc
+++ b/venv/lib/python3.7/site-packages/nltk/test/unit/translate/pycache/test_ibm_model.cpython-37.pyc
--- a/venv/lib/python3.7/site-packages/nltk/test/unit/translate/pycache/test_nist.cpython-37.pyc
+++ b/venv/lib/python3.7/site-packages/nltk/test/unit/translate/pycache/test_nist.cpython-37.pyc
--- a/venv/lib/python3.7/site-packages/nltk/test/unit/translate/pycache/test_stack_decoder.cpython-37.pyc
+++ b/venv/lib/python3.7/site-packages/nltk/test/unit/translate/pycache/test_stack_decoder.cpython-37.pyc
--- a/venv/lib/python3.7/site-packages/nltk/test/unit/translate/test_bleu.py
+++ b/venv/lib/python3.7/site-packages/nltk/test/unit/translate/test_bleu.py
@@ -0,0 +1,271 @@
+# -*- coding: utf-8 -*-
+"""
+Tests for BLEU translation evaluation metric
+"""
+
+import functools
+import io
+import unittest
+
+from nltk.data import find
+from nltk.translate.bleu_score import (
+    modified_precision,
+    brevity_penalty,
+    closest_ref_length,
+)
+from nltk.translate.bleu_score import sentence_bleu, corpus_bleu, SmoothingFunction
+
+
+class TestBLEU(unittest.TestCase):
+    def test_modified_precision(self):
+        """
+        Examples from the original BLEU paper
+        http://www.aclweb.org/anthology/P02-1040.pdf
+        """
+        # Example 1: the "the*" example.
+        # Reference sentences.
+        ref1 = 'the cat is on the mat'.split()
+        ref2 = 'there is a cat on the mat'.split()
+        # Hypothesis sentence(s).
+        hyp1 = 'the the the the the the the'.split()
+
+        references = [ref1, ref2]
+
+        # Testing modified unigram precision.
+        hyp1_unigram_precision = float(modified_precision(references, hyp1, n=1))
+        assert round(hyp1_unigram_precision, 4) == 0.2857
+        # With assertAlmostEqual at 4 place precision.
+        self.assertAlmostEqual(hyp1_unigram_precision, 0.28571428, places=4)
+
+        # Testing modified bigram precision.
+        assert float(modified_precision(references, hyp1, n=2)) == 0.0
+
+        # Example 2: the "of the" example.
+        # Reference sentences
+        ref1 = str(
+            'It is a guide to action that ensures that the military '
+            'will forever heed Party commands'
+        ).split()
+        ref2 = str(
+            'It is the guiding principle which guarantees the military '
+            'forces always being under the command of the Party'
+        ).split()
+        ref3 = str(
+            'It is the practical guide for the army always to heed '
+            'the directions of the party'
+        ).split()
+        # Hypothesis sentence(s).
+        hyp1 = 'of the'.split()
+
+        references = [ref1, ref2, ref3]
+        # Testing modified unigram precision.
+        assert float(modified_precision(references, hyp1, n=1)) == 1.0
+
+        # Testing modified bigram precision.
+        assert float(modified_precision(references, hyp1, n=2)) == 1.0
+
+        # Example 3: Proper MT outputs.
+        hyp1 = str(
+            'It is a guide to action which ensures that the military '
+            'always obeys the commands of the party'
+        ).split()
+        hyp2 = str(
+            'It is to insure the troops forever hearing the activity '
+            'guidebook that party direct'
+        ).split()
+
+        references = [ref1, ref2, ref3]
+
+        # Unigram precision.
+        hyp1_unigram_precision = float(modified_precision(references, hyp1, n=1))
+        hyp2_unigram_precision = float(modified_precision(references, hyp2, n=1))
+        # Test unigram precision with assertAlmostEqual at 4 place precision.
+        self.assertAlmostEqual(hyp1_unigram_precision, 0.94444444, places=4)
+        self.assertAlmostEqual(hyp2_unigram_precision, 0.57142857, places=4)
+        # Test unigram precision with rounding.
+        assert round(hyp1_unigram_precision, 4) == 0.9444
+        assert round(hyp2_unigram_precision, 4) == 0.5714
+
+        # Bigram precision
+        hyp1_bigram_precision = float(modified_precision(references, hyp1, n=2))
+        hyp2_bigram_precision = float(modified_precision(references, hyp2, n=2))
+        # Test bigram precision with assertAlmostEqual at 4 place precision.
+        self.assertAlmostEqual(hyp1_bigram_precision, 0.58823529, places=4)
+        self.assertAlmostEqual(hyp2_bigram_precision, 0.07692307, places=4)
+        # Test bigram precision with rounding.
+        assert round(hyp1_bigram_precision, 4) == 0.5882
+        assert round(hyp2_bigram_precision, 4) == 0.0769
+
+    def test_brevity_penalty(self):
+        # Test case from brevity_penalty_closest function in mteval-v13a.pl.
+        # Same test cases as in the doctest in nltk.translate.bleu_score.py
+        references = [['a'] * 11, ['a'] * 8]
+        hypothesis = ['a'] * 7
+        hyp_len = len(hypothesis)
+        closest_ref_len = closest_ref_length(references, hyp_len)
+        self.assertAlmostEqual(
+            brevity_penalty(closest_ref_len, hyp_len), 0.8669, places=4
+        )
+
+        references = [['a'] * 11, ['a'] * 8, ['a'] * 6, ['a'] * 7]
+        hypothesis = ['a'] * 7
+        hyp_len = len(hypothesis)
+        closest_ref_len = closest_ref_length(references, hyp_len)
+        assert brevity_penalty(closest_ref_len, hyp_len) == 1.0
+
+    def test_zero_matches(self):
+        # Test case where there's 0 matches
+        references = ['The candidate has no alignment to any of the references'.split()]
+        hypothesis = 'John loves Mary'.split()
+
+        # Test BLEU to nth order of n-grams, where n is len(hypothesis).
+        for n in range(1, len(hypothesis)):
+            weights = [1.0 / n] * n  # Uniform weights.
+            assert sentence_bleu(references, hypothesis, weights) == 0
+
+    def test_full_matches(self):
+        # Test case where there's 100% matches
+        references = ['John loves Mary'.split()]
+        hypothesis = 'John loves Mary'.split()
+
+        # Test BLEU to nth order of n-grams, where n is len(hypothesis).
+        for n in range(1, len(hypothesis)):
+            weights = [1.0 / n] * n  # Uniform weights.
+            assert sentence_bleu(references, hypothesis, weights) == 1.0
+
+    def test_partial_matches_hypothesis_longer_than_reference(self):
+        references = ['John loves Mary'.split()]
+        hypothesis = 'John loves Mary who loves Mike'.split()
+        # Since no 4-grams matches were found the result should be zero
+        # exp(w_1 * 1 * w_2 * 1 * w_3 * 1 * w_4 * -inf) = 0
+        self.assertAlmostEqual(sentence_bleu(references, hypothesis), 0.0, places=4)
+        # Checks that the warning has been raised because len(reference) < 4.
+        try:
+            self.assertWarns(UserWarning, sentence_bleu, references, hypothesis)
+        except AttributeError:
+            pass  # unittest.TestCase.assertWarns is only supported in Python >= 3.2.
+
+
+# @unittest.skip("Skipping fringe cases for BLEU.")
+class TestBLEUFringeCases(unittest.TestCase):
+    def test_case_where_n_is_bigger_than_hypothesis_length(self):
+        # Test BLEU to nth order of n-grams, where n > len(hypothesis).
+        references = ['John loves Mary ?'.split()]
+        hypothesis = 'John loves Mary'.split()
+        n = len(hypothesis) + 1  #
+        weights = [1.0 / n] * n  # Uniform weights.
+        # Since no n-grams matches were found the result should be zero
+        # exp(w_1 * 1 * w_2 * 1 * w_3 * 1 * w_4 * -inf) = 0
+        self.assertAlmostEqual(
+            sentence_bleu(references, hypothesis, weights), 0.0, places=4
+        )
+        # Checks that the warning has been raised because len(hypothesis) < 4.
+        try:
+            self.assertWarns(UserWarning, sentence_bleu, references, hypothesis)
+        except AttributeError:
+            pass  # unittest.TestCase.assertWarns is only supported in Python >= 3.2.
+
+        # Test case where n > len(hypothesis) but so is n > len(reference), and
+        # it's a special case where reference == hypothesis.
+        references = ['John loves Mary'.split()]
+        hypothesis = 'John loves Mary'.split()
+        # Since no 4-grams matches were found the result should be zero
+        # exp(w_1 * 1 * w_2 * 1 * w_3 * 1 * w_4 * -inf) = 0
+        self.assertAlmostEqual(
+            sentence_bleu(references, hypothesis, weights), 0.0, places=4
+        )
+
+    def test_empty_hypothesis(self):
+        # Test case where there's hypothesis is empty.
+        references = ['The candidate has no alignment to any of the references'.split()]
+        hypothesis = []
+        assert sentence_bleu(references, hypothesis) == 0
+
+    def test_empty_references(self):
+        # Test case where there's reference is empty.
+        references = [[]]
+        hypothesis = 'John loves Mary'.split()
+        assert sentence_bleu(references, hypothesis) == 0
+
+    def test_empty_references_and_hypothesis(self):
+        # Test case where both references and hypothesis is empty.
+        references = [[]]
+        hypothesis = []
+        assert sentence_bleu(references, hypothesis) == 0
+
+    def test_reference_or_hypothesis_shorter_than_fourgrams(self):
+        # Tese case where the length of reference or hypothesis
+        # is shorter than 4.
+        references = ['let it go'.split()]
+        hypothesis = 'let go it'.split()
+        # Checks that the value the hypothesis and reference returns is 0.0
+        # exp(w_1 * 1 * w_2 * 1 * w_3 * 1 * w_4 * -inf) = 0
+        self.assertAlmostEqual(sentence_bleu(references, hypothesis), 0.0, places=4)
+        # Checks that the warning has been raised.
+        try:
+            self.assertWarns(UserWarning, sentence_bleu, references, hypothesis)
+        except AttributeError:
+            pass  # unittest.TestCase.assertWarns is only supported in Python >= 3.2.
+
+
+class TestBLEUvsMteval13a(unittest.TestCase):
+    def test_corpus_bleu(self):
+        ref_file = find('models/wmt15_eval/ref.ru')
+        hyp_file = find('models/wmt15_eval/google.ru')
+        mteval_output_file = find('models/wmt15_eval/mteval-13a.output')
+
+        # Reads the BLEU scores from the `mteval-13a.output` file.
+        # The order of the list corresponds to the order of the ngrams.
+        with open(mteval_output_file, 'r') as mteval_fin:
+            # The numbers are located in the last 2nd line of the file.
+            # The first and 2nd item in the list are the score and system names.
+            mteval_bleu_scores = map(float, mteval_fin.readlines()[-2].split()[1:-1])
+
+        with io.open(ref_file, 'r', encoding='utf8') as ref_fin:
+            with io.open(hyp_file, 'r', encoding='utf8') as hyp_fin:
+                # Whitespace tokenize the file.
+                # Note: split() automatically strip().
+                hypothesis = list(map(lambda x: x.split(), hyp_fin))
+                # Note that the corpus_bleu input is list of list of references.
+                references = list(map(lambda x: [x.split()], ref_fin))
+                # Without smoothing.
+                for i, mteval_bleu in zip(range(1, 10), mteval_bleu_scores):
+                    nltk_bleu = corpus_bleu(
+                        references, hypothesis, weights=(1.0 / i,) * i
+                    )
+                    # Check that the BLEU scores difference is less than 0.005 .
+                    # Note: This is an approximate comparison; as much as
+                    #       +/- 0.01 BLEU might be "statistically significant",
+                    #       the actual translation quality might not be.
+                    assert abs(mteval_bleu - nltk_bleu) < 0.005
+
+                # With the same smoothing method used in mteval-v13a.pl
+                chencherry = SmoothingFunction()
+                for i, mteval_bleu in zip(range(1, 10), mteval_bleu_scores):
+                    nltk_bleu = corpus_bleu(
+                        references,
+                        hypothesis,
+                        weights=(1.0 / i,) * i,
+                        smoothing_function=chencherry.method3,
+                    )
+                    assert abs(mteval_bleu - nltk_bleu) < 0.005
+
+
+class TestBLEUWithBadSentence(unittest.TestCase):
+    def test_corpus_bleu_with_bad_sentence(self):
+        hyp = "Teo S yb , oe uNb , R , T t , , t Tue Ar saln S , , 5istsi l , 5oe R ulO sae oR R"
+        ref = str(
+            "Their tasks include changing a pump on the faulty stokehold ."
+            "Likewise , two species that are very similar in morphology "
+            "were distinguished using genetics ."
+        )
+        references = [[ref.split()]]
+        hypotheses = [hyp.split()]
+        try:  # Check that the warning is raised since no. of 2-grams < 0.
+            with self.assertWarns(UserWarning):
+                # Verify that the BLEU output is undesired since no. of 2-grams < 0.
+                self.assertAlmostEqual(
+                    corpus_bleu(references, hypotheses), 0.0, places=4
+                )
+        except AttributeError:  # unittest.TestCase.assertWarns is only supported in Python >= 3.2.
+            self.assertAlmostEqual(corpus_bleu(references, hypotheses), 0.0, places=4)
--- a/venv/lib/python3.7/site-packages/nltk/test/unit/translate/test_gdfa.py
+++ b/venv/lib/python3.7/site-packages/nltk/test/unit/translate/test_gdfa.py
@@ -0,0 +1,157 @@
+# -*- coding: utf-8 -*-
+"""
+Tests GDFA alignments
+"""
+
+import functools
+import io
+import unittest
+
+from nltk.translate.gdfa import grow_diag_final_and
+
+
+class TestGDFA(unittest.TestCase):
+    def test_from_eflomal_outputs(self):
+        """
+        Testing GDFA with first 10 eflomal outputs from issue #1829
+        https://github.com/nltk/nltk/issues/1829
+        """
+        # Input.
+        forwards = [
+            '0-0 1-2',
+            '0-0 1-1',
+            '0-0 2-1 3-2 4-3 5-4 6-5 7-6 8-7 7-8 9-9 10-10 9-11 11-12 12-13 13-14',
+            '0-0 1-1 1-2 2-3 3-4 4-5 4-6 5-7 6-8 8-9 9-10',
+            '0-0 14-1 15-2 16-3 20-5 21-6 22-7 5-8 6-9 7-10 8-11 9-12 10-13 11-14 12-15 13-16 14-17 17-18 18-19 19-20 20-21 23-22 24-23 25-24 26-25 27-27 28-28 29-29 30-30 31-31',
+            '0-0 1-1 0-2 2-3',
+            '0-0 2-2 4-4',
+            '0-0 1-1 2-3 3-4 5-5 7-6 8-7 9-8 10-9 11-10 12-11 13-12 14-13 15-14 16-16 17-17 18-18 19-19 20-20',
+            '3-0 4-1 6-2 5-3 6-4 7-5 8-6 9-7 10-8 11-9 16-10 9-12 10-13 12-14',
+            '1-0',
+        ]
+        backwards = [
+            '0-0 1-2',
+            '0-0 1-1',
+            '0-0 2-1 3-2 4-3 5-4 6-5 7-6 8-7 9-8 10-10 11-12 12-11 13-13',
+            '0-0 1-2 2-3 3-4 4-6 6-8 7-5 8-7 9-8',
+            '0-0 1-8 2-9 3-10 4-11 5-12 6-11 8-13 9-14 10-15 11-16 12-17 13-18 14-19 15-20 16-21 17-22 18-23 19-24 20-29 21-30 22-31 23-2 24-3 25-4 26-5 27-5 28-6 29-7 30-28 31-31',
+            '0-0 1-1 2-3',
+            '0-0 1-1 2-3 4-4',
+            '0-0 1-1 2-3 3-4 5-5 7-6 8-7 9-8 10-9 11-10 12-11 13-12 14-13 15-14 16-16 17-17 18-18 19-19 20-16 21-18',
+            '0-0 1-1 3-2 4-1 5-3 6-4 7-5 8-6 9-7 10-8 11-9 12-8 13-9 14-8 15-9 16-10',
+            '1-0',
+        ]
+        source_lens = [2, 3, 3, 15, 11, 33, 4, 6, 23, 18]
+        target_lens = [2, 4, 3, 16, 12, 33, 5, 6, 22, 16]
+        # Expected Output.
+        expected = [
+            [(0, 0), (1, 2)],
+            [(0, 0), (1, 1)],
+            [
+                (0, 0),
+                (2, 1),
+                (3, 2),
+                (4, 3),
+                (5, 4),
+                (6, 5),
+                (7, 6),
+                (8, 7),
+                (10, 10),
+                (11, 12),
+            ],
+            [
+                (0, 0),
+                (1, 1),
+                (1, 2),
+                (2, 3),
+                (3, 4),
+                (4, 5),
+                (4, 6),
+                (5, 7),
+                (6, 8),
+                (7, 5),
+                (8, 7),
+                (8, 9),
+                (9, 8),
+                (9, 10),
+            ],
+            [
+                (0, 0),
+                (1, 8),
+                (2, 9),
+                (3, 10),
+                (4, 11),
+                (5, 8),
+                (6, 9),
+                (6, 11),
+                (7, 10),
+                (8, 11),
+                (31, 31),
+            ],
+            [(0, 0), (0, 2), (1, 1), (2, 3)],
+            [(0, 0), (1, 1), (2, 2), (2, 3), (4, 4)],
+            [
+                (0, 0),
+                (1, 1),
+                (2, 3),
+                (3, 4),
+                (5, 5),
+                (7, 6),
+                (8, 7),
+                (9, 8),
+                (10, 9),
+                (11, 10),
+                (12, 11),
+                (13, 12),
+                (14, 13),
+                (15, 14),
+                (16, 16),
+                (17, 17),
+                (18, 18),
+                (19, 19),
+            ],
+            [
+                (0, 0),
+                (1, 1),
+                (3, 0),
+                (3, 2),
+                (4, 1),
+                (5, 3),
+                (6, 2),
+                (6, 4),
+                (7, 5),
+                (8, 6),
+                (9, 7),
+                (9, 12),
+                (10, 8),
+                (10, 13),
+                (11, 9),
+                (12, 8),
+                (12, 14),
+                (13, 9),
+                (14, 8),
+                (15, 9),
+                (16, 10),
+            ],
+            [(1, 0)],
+            [
+                (0, 0),
+                (1, 1),
+                (3, 2),
+                (4, 3),
+                (5, 4),
+                (6, 5),
+                (7, 6),
+                (9, 10),
+                (10, 12),
+                (11, 13),
+                (12, 14),
+                (13, 15),
+            ],
+        ]
+
+        # Iterate through all 10 examples and check for expected outputs.
+        for fw, bw, src_len, trg_len, expect in zip(
+            forwards, backwards, source_lens, target_lens, expected
+        ):
+            self.assertListEqual(expect, grow_diag_final_and(src_len, trg_len, fw, bw))
--- a/venv/lib/python3.7/site-packages/nltk/test/unit/translate/test_ibm1.py
+++ b/venv/lib/python3.7/site-packages/nltk/test/unit/translate/test_ibm1.py
@@ -0,0 +1,76 @@
+# -*- coding: utf-8 -*-
+"""
+Tests for IBM Model 1 training methods
+"""
+
+import unittest
+
+from collections import defaultdict
+from nltk.translate import AlignedSent
+from nltk.translate import IBMModel
+from nltk.translate import IBMModel1
+from nltk.translate.ibm_model import AlignmentInfo
+
+
+class TestIBMModel1(unittest.TestCase):
+    def test_set_uniform_translation_probabilities(self):
+        # arrange
+        corpus = [
+            AlignedSent(['ham', 'eggs'], ['schinken', 'schinken', 'eier']),
+            AlignedSent(['spam', 'spam', 'spam', 'spam'], ['spam', 'spam']),
+        ]
+        model1 = IBMModel1(corpus, 0)
+
+        # act
+        model1.set_uniform_probabilities(corpus)
+
+        # assert
+        # expected_prob = 1.0 / (target vocab size + 1)
+        self.assertEqual(model1.translation_table['ham']['eier'], 1.0 / 3)
+        self.assertEqual(model1.translation_table['eggs'][None], 1.0 / 3)
+
+    def test_set_uniform_translation_probabilities_of_non_domain_values(self):
+        # arrange
+        corpus = [
+            AlignedSent(['ham', 'eggs'], ['schinken', 'schinken', 'eier']),
+            AlignedSent(['spam', 'spam', 'spam', 'spam'], ['spam', 'spam']),
+        ]
+        model1 = IBMModel1(corpus, 0)
+
+        # act
+        model1.set_uniform_probabilities(corpus)
+
+        # assert
+        # examine target words that are not in the training data domain
+        self.assertEqual(model1.translation_table['parrot']['eier'], IBMModel.MIN_PROB)
+
+    def test_prob_t_a_given_s(self):
+        # arrange
+        src_sentence = ["ich", 'esse', 'ja', 'gern', 'räucherschinken']
+        trg_sentence = ['i', 'love', 'to', 'eat', 'smoked', 'ham']
+        corpus = [AlignedSent(trg_sentence, src_sentence)]
+        alignment_info = AlignmentInfo(
+            (0, 1, 4, 0, 2, 5, 5),
+            [None] + src_sentence,
+            ['UNUSED'] + trg_sentence,
+            None,
+        )
+
+        translation_table = defaultdict(lambda: defaultdict(float))
+        translation_table['i']['ich'] = 0.98
+        translation_table['love']['gern'] = 0.98
+        translation_table['to'][None] = 0.98
+        translation_table['eat']['esse'] = 0.98
+        translation_table['smoked']['räucherschinken'] = 0.98
+        translation_table['ham']['räucherschinken'] = 0.98
+
+        model1 = IBMModel1(corpus, 0)
+        model1.translation_table = translation_table
+
+        # act
+        probability = model1.prob_t_a_given_s(alignment_info)
+
+        # assert
+        lexical_translation = 0.98 * 0.98 * 0.98 * 0.98 * 0.98 * 0.98
+        expected_probability = lexical_translation
+        self.assertEqual(round(probability, 4), round(expected_probability, 4))
--- a/venv/lib/python3.7/site-packages/nltk/test/unit/translate/test_ibm2.py
+++ b/venv/lib/python3.7/site-packages/nltk/test/unit/translate/test_ibm2.py
@@ -0,0 +1,89 @@
+# -*- coding: utf-8 -*-
+"""
+Tests for IBM Model 2 training methods
+"""
+
+import unittest
+
+from collections import defaultdict
+from nltk.translate import AlignedSent
+from nltk.translate import IBMModel
+from nltk.translate import IBMModel2
+from nltk.translate.ibm_model import AlignmentInfo
+
+
+class TestIBMModel2(unittest.TestCase):
+    def test_set_uniform_alignment_probabilities(self):
+        # arrange
+        corpus = [
+            AlignedSent(['ham', 'eggs'], ['schinken', 'schinken', 'eier']),
+            AlignedSent(['spam', 'spam', 'spam', 'spam'], ['spam', 'spam']),
+        ]
+        model2 = IBMModel2(corpus, 0)
+
+        # act
+        model2.set_uniform_probabilities(corpus)
+
+        # assert
+        # expected_prob = 1.0 / (length of source sentence + 1)
+        self.assertEqual(model2.alignment_table[0][1][3][2], 1.0 / 4)
+        self.assertEqual(model2.alignment_table[2][4][2][4], 1.0 / 3)
+
+    def test_set_uniform_alignment_probabilities_of_non_domain_values(self):
+        # arrange
+        corpus = [
+            AlignedSent(['ham', 'eggs'], ['schinken', 'schinken', 'eier']),
+            AlignedSent(['spam', 'spam', 'spam', 'spam'], ['spam', 'spam']),
+        ]
+        model2 = IBMModel2(corpus, 0)
+
+        # act
+        model2.set_uniform_probabilities(corpus)
+
+        # assert
+        # examine i and j values that are not in the training data domain
+        self.assertEqual(model2.alignment_table[99][1][3][2], IBMModel.MIN_PROB)
+        self.assertEqual(model2.alignment_table[2][99][2][4], IBMModel.MIN_PROB)
+
+    def test_prob_t_a_given_s(self):
+        # arrange
+        src_sentence = ["ich", 'esse', 'ja', 'gern', 'räucherschinken']
+        trg_sentence = ['i', 'love', 'to', 'eat', 'smoked', 'ham']
+        corpus = [AlignedSent(trg_sentence, src_sentence)]
+        alignment_info = AlignmentInfo(
+            (0, 1, 4, 0, 2, 5, 5),
+            [None] + src_sentence,
+            ['UNUSED'] + trg_sentence,
+            None,
+        )
+
+        translation_table = defaultdict(lambda: defaultdict(float))
+        translation_table['i']['ich'] = 0.98
+        translation_table['love']['gern'] = 0.98
+        translation_table['to'][None] = 0.98
+        translation_table['eat']['esse'] = 0.98
+        translation_table['smoked']['räucherschinken'] = 0.98
+        translation_table['ham']['räucherschinken'] = 0.98
+
+        alignment_table = defaultdict(
+            lambda: defaultdict(lambda: defaultdict(lambda: defaultdict(float)))
+        )
+        alignment_table[0][3][5][6] = 0.97  # None -> to
+        alignment_table[1][1][5][6] = 0.97  # ich -> i
+        alignment_table[2][4][5][6] = 0.97  # esse -> eat
+        alignment_table[4][2][5][6] = 0.97  # gern -> love
+        alignment_table[5][5][5][6] = 0.96  # räucherschinken -> smoked
+        alignment_table[5][6][5][6] = 0.96  # räucherschinken -> ham
+
+        model2 = IBMModel2(corpus, 0)
+        model2.translation_table = translation_table
+        model2.alignment_table = alignment_table
+
+        # act
+        probability = model2.prob_t_a_given_s(alignment_info)
+
+        # assert
+        lexical_translation = 0.98 * 0.98 * 0.98 * 0.98 * 0.98 * 0.98
+        alignment = 0.97 * 0.97 * 0.97 * 0.97 * 0.96 * 0.96
+        expected_probability = lexical_translation * alignment
+        self.assertEqual(round(probability, 4), round(expected_probability, 4))
--- a/venv/lib/python3.7/site-packages/nltk/test/unit/translate/test_ibm3.py
+++ b/venv/lib/python3.7/site-packages/nltk/test/unit/translate/test_ibm3.py
@@ -0,0 +1,108 @@
+# -*- coding: utf-8 -*-
+"""
+Tests for IBM Model 3 training methods
+"""
+
+import unittest
+
+from collections import defaultdict
+from nltk.translate import AlignedSent
+from nltk.translate import IBMModel
+from nltk.translate import IBMModel3
+from nltk.translate.ibm_model import AlignmentInfo
+
+
+class TestIBMModel3(unittest.TestCase):
+    def test_set_uniform_distortion_probabilities(self):
+        # arrange
+        corpus = [
+            AlignedSent(['ham', 'eggs'], ['schinken', 'schinken', 'eier']),
+            AlignedSent(['spam', 'spam', 'spam', 'spam'], ['spam', 'spam']),
+        ]
+        model3 = IBMModel3(corpus, 0)
+
+        # act
+        model3.set_uniform_probabilities(corpus)
+
+        # assert
+        # expected_prob = 1.0 / length of target sentence
+        self.assertEqual(model3.distortion_table[1][0][3][2], 1.0 / 2)
+        self.assertEqual(model3.distortion_table[4][2][2][4], 1.0 / 4)
+
+    def test_set_uniform_distortion_probabilities_of_non_domain_values(self):
+        # arrange
+        corpus = [
+            AlignedSent(['ham', 'eggs'], ['schinken', 'schinken', 'eier']),
+            AlignedSent(['spam', 'spam', 'spam', 'spam'], ['spam', 'spam']),
+        ]
+        model3 = IBMModel3(corpus, 0)
+
+        # act
+        model3.set_uniform_probabilities(corpus)
+
+        # assert
+        # examine i and j values that are not in the training data domain
+        self.assertEqual(model3.distortion_table[0][0][3][2], IBMModel.MIN_PROB)
+        self.assertEqual(model3.distortion_table[9][2][2][4], IBMModel.MIN_PROB)
+        self.assertEqual(model3.distortion_table[2][9][2][4], IBMModel.MIN_PROB)
+
+    def test_prob_t_a_given_s(self):
+        # arrange
+        src_sentence = ["ich", 'esse', 'ja', 'gern', 'räucherschinken']
+        trg_sentence = ['i', 'love', 'to', 'eat', 'smoked', 'ham']
+        corpus = [AlignedSent(trg_sentence, src_sentence)]
+        alignment_info = AlignmentInfo(
+            (0, 1, 4, 0, 2, 5, 5),
+            [None] + src_sentence,
+            ['UNUSED'] + trg_sentence,
+            [[3], [1], [4], [], [2], [5, 6]],
+        )
+
+        distortion_table = defaultdict(
+            lambda: defaultdict(lambda: defaultdict(lambda: defaultdict(float)))
+        )
+        distortion_table[1][1][5][6] = 0.97  # i -> ich
+        distortion_table[2][4][5][6] = 0.97  # love -> gern
+        distortion_table[3][0][5][6] = 0.97  # to -> NULL
+        distortion_table[4][2][5][6] = 0.97  # eat -> esse
+        distortion_table[5][5][5][6] = 0.97  # smoked -> räucherschinken
+        distortion_table[6][5][5][6] = 0.97  # ham -> räucherschinken
+
+        translation_table = defaultdict(lambda: defaultdict(float))
+        translation_table['i']['ich'] = 0.98
+        translation_table['love']['gern'] = 0.98
+        translation_table['to'][None] = 0.98
+        translation_table['eat']['esse'] = 0.98
+        translation_table['smoked']['räucherschinken'] = 0.98
+        translation_table['ham']['räucherschinken'] = 0.98
+
+        fertility_table = defaultdict(lambda: defaultdict(float))
+        fertility_table[1]['ich'] = 0.99
+        fertility_table[1]['esse'] = 0.99
+        fertility_table[0]['ja'] = 0.99
+        fertility_table[1]['gern'] = 0.99
+        fertility_table[2]['räucherschinken'] = 0.999
+        fertility_table[1][None] = 0.99
+
+        probabilities = {
+            'p1': 0.167,
+            'translation_table': translation_table,
+            'distortion_table': distortion_table,
+            'fertility_table': fertility_table,
+            'alignment_table': None,
+        }
+
+        model3 = IBMModel3(corpus, 0, probabilities)
+
+        # act
+        probability = model3.prob_t_a_given_s(alignment_info)
+
+        # assert
+        null_generation = 5 * pow(0.167, 1) * pow(0.833, 4)
+        fertility = 1 * 0.99 * 1 * 0.99 * 1 * 0.99 * 1 * 0.99 * 2 * 0.999
+        lexical_translation = 0.98 * 0.98 * 0.98 * 0.98 * 0.98 * 0.98
+        distortion = 0.97 * 0.97 * 0.97 * 0.97 * 0.97 * 0.97
+        expected_probability = (
+            null_generation * fertility * lexical_translation * distortion
+        )
+        self.assertEqual(round(probability, 4), round(expected_probability, 4))
--- a/venv/lib/python3.7/site-packages/nltk/test/unit/translate/test_ibm4.py
+++ b/venv/lib/python3.7/site-packages/nltk/test/unit/translate/test_ibm4.py
@@ -0,0 +1,123 @@
+# -*- coding: utf-8 -*-
+"""
+Tests for IBM Model 4 training methods
+"""
+
+import unittest
+
+from collections import defaultdict
+from nltk.translate import AlignedSent
+from nltk.translate import IBMModel
+from nltk.translate import IBMModel4
+from nltk.translate.ibm_model import AlignmentInfo
+
+
+class TestIBMModel4(unittest.TestCase):
+    def test_set_uniform_distortion_probabilities_of_max_displacements(self):
+        # arrange
+        src_classes = {'schinken': 0, 'eier': 0, 'spam': 1}
+        trg_classes = {'ham': 0, 'eggs': 1, 'spam': 2}
+        corpus = [
+            AlignedSent(['ham', 'eggs'], ['schinken', 'schinken', 'eier']),
+            AlignedSent(['spam', 'spam', 'spam', 'spam'], ['spam', 'spam']),
+        ]
+        model4 = IBMModel4(corpus, 0, src_classes, trg_classes)
+
+        # act
+        model4.set_uniform_probabilities(corpus)
+
+        # assert
+        # number of displacement values =
+        #     2 *(number of words in longest target sentence - 1)
+        expected_prob = 1.0 / (2 * (4 - 1))
+
+        # examine the boundary values for (displacement, src_class, trg_class)
+        self.assertEqual(model4.head_distortion_table[3][0][0], expected_prob)
+        self.assertEqual(model4.head_distortion_table[-3][1][2], expected_prob)
+        self.assertEqual(model4.non_head_distortion_table[3][0], expected_prob)
+        self.assertEqual(model4.non_head_distortion_table[-3][2], expected_prob)
+
+    def test_set_uniform_distortion_probabilities_of_non_domain_values(self):
+        # arrange
+        src_classes = {'schinken': 0, 'eier': 0, 'spam': 1}
+        trg_classes = {'ham': 0, 'eggs': 1, 'spam': 2}
+        corpus = [
+            AlignedSent(['ham', 'eggs'], ['schinken', 'schinken', 'eier']),
+            AlignedSent(['spam', 'spam', 'spam', 'spam'], ['spam', 'spam']),
+        ]
+        model4 = IBMModel4(corpus, 0, src_classes, trg_classes)
+
+        # act
+        model4.set_uniform_probabilities(corpus)
+
+        # assert
+        # examine displacement values that are not in the training data domain
+        self.assertEqual(model4.head_distortion_table[4][0][0], IBMModel.MIN_PROB)
+        self.assertEqual(model4.head_distortion_table[100][1][2], IBMModel.MIN_PROB)
+        self.assertEqual(model4.non_head_distortion_table[4][0], IBMModel.MIN_PROB)
+        self.assertEqual(model4.non_head_distortion_table[100][2], IBMModel.MIN_PROB)
+
+    def test_prob_t_a_given_s(self):
+        # arrange
+        src_sentence = ["ich", 'esse', 'ja', 'gern', 'räucherschinken']
+        trg_sentence = ['i', 'love', 'to', 'eat', 'smoked', 'ham']
+        src_classes = {'räucherschinken': 0, 'ja': 1, 'ich': 2, 'esse': 3, 'gern': 4}
+        trg_classes = {'ham': 0, 'smoked': 1, 'i': 3, 'love': 4, 'to': 2, 'eat': 4}
+        corpus = [AlignedSent(trg_sentence, src_sentence)]
+        alignment_info = AlignmentInfo(
+            (0, 1, 4, 0, 2, 5, 5),
+            [None] + src_sentence,
+            ['UNUSED'] + trg_sentence,
+            [[3], [1], [4], [], [2], [5, 6]],
+        )
+
+        head_distortion_table = defaultdict(
+            lambda: defaultdict(lambda: defaultdict(float))
+        )
+        head_distortion_table[1][None][3] = 0.97  # None, i
+        head_distortion_table[3][2][4] = 0.97  # ich, eat
+        head_distortion_table[-2][3][4] = 0.97  # esse, love
+        head_distortion_table[3][4][1] = 0.97  # gern, smoked
+
+        non_head_distortion_table = defaultdict(lambda: defaultdict(float))
+        non_head_distortion_table[1][0] = 0.96  # ham
+
+        translation_table = defaultdict(lambda: defaultdict(float))
+        translation_table['i']['ich'] = 0.98
+        translation_table['love']['gern'] = 0.98
+        translation_table['to'][None] = 0.98
+        translation_table['eat']['esse'] = 0.98
+        translation_table['smoked']['räucherschinken'] = 0.98
+        translation_table['ham']['räucherschinken'] = 0.98
+
+        fertility_table = defaultdict(lambda: defaultdict(float))
+        fertility_table[1]['ich'] = 0.99
+        fertility_table[1]['esse'] = 0.99
+        fertility_table[0]['ja'] = 0.99
+        fertility_table[1]['gern'] = 0.99
+        fertility_table[2]['räucherschinken'] = 0.999
+        fertility_table[1][None] = 0.99
+
+        probabilities = {
+            'p1': 0.167,
+            'translation_table': translation_table,
+            'head_distortion_table': head_distortion_table,
+            'non_head_distortion_table': non_head_distortion_table,
+            'fertility_table': fertility_table,
+            'alignment_table': None,
+        }
+
+        model4 = IBMModel4(corpus, 0, src_classes, trg_classes, probabilities)
+
+        # act
+        probability = model4.prob_t_a_given_s(alignment_info)
+
+        # assert
+        null_generation = 5 * pow(0.167, 1) * pow(0.833, 4)
+        fertility = 1 * 0.99 * 1 * 0.99 * 1 * 0.99 * 1 * 0.99 * 2 * 0.999
+        lexical_translation = 0.98 * 0.98 * 0.98 * 0.98 * 0.98 * 0.98
+        distortion = 0.97 * 0.97 * 1 * 0.97 * 0.97 * 0.96
+        expected_probability = (
+            null_generation * fertility * lexical_translation * distortion
+        )
+        self.assertEqual(round(probability, 4), round(expected_probability, 4))
--- a/venv/lib/python3.7/site-packages/nltk/test/unit/translate/test_ibm5.py
+++ b/venv/lib/python3.7/site-packages/nltk/test/unit/translate/test_ibm5.py
@@ -0,0 +1,164 @@
+# -*- coding: utf-8 -*-
+"""
+Tests for IBM Model 5 training methods
+"""
+
+import unittest
+
+from collections import defaultdict
+from nltk.translate import AlignedSent
+from nltk.translate import IBMModel
+from nltk.translate import IBMModel4
+from nltk.translate import IBMModel5
+from nltk.translate.ibm_model import AlignmentInfo
+
+
+class TestIBMModel5(unittest.TestCase):
+    def test_set_uniform_vacancy_probabilities_of_max_displacements(self):
+        # arrange
+        src_classes = {'schinken': 0, 'eier': 0, 'spam': 1}
+        trg_classes = {'ham': 0, 'eggs': 1, 'spam': 2}
+        corpus = [
+            AlignedSent(['ham', 'eggs'], ['schinken', 'schinken', 'eier']),
+            AlignedSent(['spam', 'spam', 'spam', 'spam'], ['spam', 'spam']),
+        ]
+        model5 = IBMModel5(corpus, 0, src_classes, trg_classes)
+
+        # act
+        model5.set_uniform_probabilities(corpus)
+
+        # assert
+        # number of vacancy difference values =
+        #     2 * number of words in longest target sentence
+        expected_prob = 1.0 / (2 * 4)
+
+        # examine the boundary values for (dv, max_v, trg_class)
+        self.assertEqual(model5.head_vacancy_table[4][4][0], expected_prob)
+        self.assertEqual(model5.head_vacancy_table[-3][1][2], expected_prob)
+        self.assertEqual(model5.non_head_vacancy_table[4][4][0], expected_prob)
+        self.assertEqual(model5.non_head_vacancy_table[-3][1][2], expected_prob)
+
+    def test_set_uniform_vacancy_probabilities_of_non_domain_values(self):
+        # arrange
+        src_classes = {'schinken': 0, 'eier': 0, 'spam': 1}
+        trg_classes = {'ham': 0, 'eggs': 1, 'spam': 2}
+        corpus = [
+            AlignedSent(['ham', 'eggs'], ['schinken', 'schinken', 'eier']),
+            AlignedSent(['spam', 'spam', 'spam', 'spam'], ['spam', 'spam']),
+        ]
+        model5 = IBMModel5(corpus, 0, src_classes, trg_classes)
+
+        # act
+        model5.set_uniform_probabilities(corpus)
+
+        # assert
+        # examine dv and max_v values that are not in the training data domain
+        self.assertEqual(model5.head_vacancy_table[5][4][0], IBMModel.MIN_PROB)
+        self.assertEqual(model5.head_vacancy_table[-4][1][2], IBMModel.MIN_PROB)
+        self.assertEqual(model5.head_vacancy_table[4][0][0], IBMModel.MIN_PROB)
+        self.assertEqual(model5.non_head_vacancy_table[5][4][0], IBMModel.MIN_PROB)
+        self.assertEqual(model5.non_head_vacancy_table[-4][1][2], IBMModel.MIN_PROB)
+
+    def test_prob_t_a_given_s(self):
+        # arrange
+        src_sentence = ["ich", 'esse', 'ja', 'gern', 'räucherschinken']
+        trg_sentence = ['i', 'love', 'to', 'eat', 'smoked', 'ham']
+        src_classes = {'räucherschinken': 0, 'ja': 1, 'ich': 2, 'esse': 3, 'gern': 4}
+        trg_classes = {'ham': 0, 'smoked': 1, 'i': 3, 'love': 4, 'to': 2, 'eat': 4}
+        corpus = [AlignedSent(trg_sentence, src_sentence)]
+        alignment_info = AlignmentInfo(
+            (0, 1, 4, 0, 2, 5, 5),
+            [None] + src_sentence,
+            ['UNUSED'] + trg_sentence,
+            [[3], [1], [4], [], [2], [5, 6]],
+        )
+
+        head_vacancy_table = defaultdict(
+            lambda: defaultdict(lambda: defaultdict(float))
+        )
+        head_vacancy_table[1 - 0][6][3] = 0.97  # ich -> i
+        head_vacancy_table[3 - 0][5][4] = 0.97  # esse -> eat
+        head_vacancy_table[1 - 2][4][4] = 0.97  # gern -> love
+        head_vacancy_table[2 - 0][2][1] = 0.97  # räucherschinken -> smoked
+
+        non_head_vacancy_table = defaultdict(
+            lambda: defaultdict(lambda: defaultdict(float))
+        )
+        non_head_vacancy_table[1 - 0][1][0] = 0.96  # räucherschinken -> ham
+
+        translation_table = defaultdict(lambda: defaultdict(float))
+        translation_table['i']['ich'] = 0.98
+        translation_table['love']['gern'] = 0.98
+        translation_table['to'][None] = 0.98
+        translation_table['eat']['esse'] = 0.98
+        translation_table['smoked']['räucherschinken'] = 0.98
+        translation_table['ham']['räucherschinken'] = 0.98
+
+        fertility_table = defaultdict(lambda: defaultdict(float))
+        fertility_table[1]['ich'] = 0.99
+        fertility_table[1]['esse'] = 0.99
+        fertility_table[0]['ja'] = 0.99
+        fertility_table[1]['gern'] = 0.99
+        fertility_table[2]['räucherschinken'] = 0.999
+        fertility_table[1][None] = 0.99
+
+        probabilities = {
+            'p1': 0.167,
+            'translation_table': translation_table,
+            'fertility_table': fertility_table,
+            'head_vacancy_table': head_vacancy_table,
+            'non_head_vacancy_table': non_head_vacancy_table,
+            'head_distortion_table': None,
+            'non_head_distortion_table': None,
+            'alignment_table': None,
+        }
+
+        model5 = IBMModel5(corpus, 0, src_classes, trg_classes, probabilities)
+
+        # act
+        probability = model5.prob_t_a_given_s(alignment_info)
+
+        # assert
+        null_generation = 5 * pow(0.167, 1) * pow(0.833, 4)
+        fertility = 1 * 0.99 * 1 * 0.99 * 1 * 0.99 * 1 * 0.99 * 2 * 0.999
+        lexical_translation = 0.98 * 0.98 * 0.98 * 0.98 * 0.98 * 0.98
+        vacancy = 0.97 * 0.97 * 1 * 0.97 * 0.97 * 0.96
+        expected_probability = (
+            null_generation * fertility * lexical_translation * vacancy
+        )
+        self.assertEqual(round(probability, 4), round(expected_probability, 4))
+
+    def test_prune(self):
+        # arrange
+        alignment_infos = [
+            AlignmentInfo((1, 1), None, None, None),
+            AlignmentInfo((1, 2), None, None, None),
+            AlignmentInfo((2, 1), None, None, None),
+            AlignmentInfo((2, 2), None, None, None),
+            AlignmentInfo((0, 0), None, None, None),
+        ]
+        min_factor = IBMModel5.MIN_SCORE_FACTOR
+        best_score = 0.9
+        scores = {
+            (1, 1): min(min_factor * 1.5, 1) * best_score,  # above threshold
+            (1, 2): best_score,
+            (2, 1): min_factor * best_score,  # at threshold
+            (2, 2): min_factor * best_score * 0.5,  # low score
+            (0, 0): min(min_factor * 1.1, 1) * 1.2,  # above threshold
+        }
+        corpus = [AlignedSent(['a'], ['b'])]
+        original_prob_function = IBMModel4.model4_prob_t_a_given_s
+        # mock static method
+        IBMModel4.model4_prob_t_a_given_s = staticmethod(
+            lambda a, model: scores[a.alignment]
+        )
+        model5 = IBMModel5(corpus, 0, None, None)
+
+        # act
+        pruned_alignments = model5.prune(alignment_infos)
+
+        # assert
+        self.assertEqual(len(pruned_alignments), 3)
+
+        # restore static method
+        IBMModel4.model4_prob_t_a_given_s = original_prob_function
--- a/venv/lib/python3.7/site-packages/nltk/test/unit/translate/test_ibm_model.py
+++ b/venv/lib/python3.7/site-packages/nltk/test/unit/translate/test_ibm_model.py
@@ -0,0 +1,279 @@
+# -*- coding: utf-8 -*-
+"""
+Tests for common methods of IBM translation models
+"""
+
+import unittest
+
+from collections import defaultdict
+from nltk.translate import AlignedSent
+from nltk.translate import IBMModel
+from nltk.translate.ibm_model import AlignmentInfo
+
+
+class TestIBMModel(unittest.TestCase):
+    __TEST_SRC_SENTENCE = ["j'", 'aime', 'bien', 'jambon']
+    __TEST_TRG_SENTENCE = ['i', 'love', 'ham']
+
+    def test_vocabularies_are_initialized(self):
+        parallel_corpora = [
+            AlignedSent(['one', 'two', 'three', 'four'], ['un', 'deux', 'trois']),
+            AlignedSent(['five', 'one', 'six'], ['quatre', 'cinq', 'six']),
+            AlignedSent([], ['sept']),
+        ]
+
+        ibm_model = IBMModel(parallel_corpora)
+        self.assertEqual(len(ibm_model.src_vocab), 8)
+        self.assertEqual(len(ibm_model.trg_vocab), 6)
+
+    def test_vocabularies_are_initialized_even_with_empty_corpora(self):
+        parallel_corpora = []
+
+        ibm_model = IBMModel(parallel_corpora)
+        self.assertEqual(len(ibm_model.src_vocab), 1)  # addition of NULL token
+        self.assertEqual(len(ibm_model.trg_vocab), 0)
+
+    def test_best_model2_alignment(self):
+        # arrange
+        sentence_pair = AlignedSent(
+            TestIBMModel.__TEST_TRG_SENTENCE, TestIBMModel.__TEST_SRC_SENTENCE
+        )
+        # None and 'bien' have zero fertility
+        translation_table = {
+            'i': {"j'": 0.9, 'aime': 0.05, 'bien': 0.02, 'jambon': 0.03, None: 0},
+            'love': {"j'": 0.05, 'aime': 0.9, 'bien': 0.01, 'jambon': 0.01, None: 0.03},
+            'ham': {"j'": 0, 'aime': 0.01, 'bien': 0, 'jambon': 0.99, None: 0},
+        }
+        alignment_table = defaultdict(
+            lambda: defaultdict(lambda: defaultdict(lambda: defaultdict(lambda: 0.2)))
+        )
+
+        ibm_model = IBMModel([])
+        ibm_model.translation_table = translation_table
+        ibm_model.alignment_table = alignment_table
+
+        # act
+        a_info = ibm_model.best_model2_alignment(sentence_pair)
+
+        # assert
+        self.assertEqual(a_info.alignment[1:], (1, 2, 4))  # 0th element unused
+        self.assertEqual(a_info.cepts, [[], [1], [2], [], [3]])
+
+    def test_best_model2_alignment_does_not_change_pegged_alignment(self):
+        # arrange
+        sentence_pair = AlignedSent(
+            TestIBMModel.__TEST_TRG_SENTENCE, TestIBMModel.__TEST_SRC_SENTENCE
+        )
+        translation_table = {
+            'i': {"j'": 0.9, 'aime': 0.05, 'bien': 0.02, 'jambon': 0.03, None: 0},
+            'love': {"j'": 0.05, 'aime': 0.9, 'bien': 0.01, 'jambon': 0.01, None: 0.03},
+            'ham': {"j'": 0, 'aime': 0.01, 'bien': 0, 'jambon': 0.99, None: 0},
+        }
+        alignment_table = defaultdict(
+            lambda: defaultdict(lambda: defaultdict(lambda: defaultdict(lambda: 0.2)))
+        )
+
+        ibm_model = IBMModel([])
+        ibm_model.translation_table = translation_table
+        ibm_model.alignment_table = alignment_table
+
+        # act: force 'love' to be pegged to 'jambon'
+        a_info = ibm_model.best_model2_alignment(sentence_pair, 2, 4)
+        # assert
+        self.assertEqual(a_info.alignment[1:], (1, 4, 4))
+        self.assertEqual(a_info.cepts, [[], [1], [], [], [2, 3]])
+
+    def test_best_model2_alignment_handles_fertile_words(self):
+        # arrange
+        sentence_pair = AlignedSent(
+            ['i', 'really', ',', 'really', 'love', 'ham'],
+            TestIBMModel.__TEST_SRC_SENTENCE,
+        )
+        # 'bien' produces 2 target words: 'really' and another 'really'
+        translation_table = {
+            'i': {"j'": 0.9, 'aime': 0.05, 'bien': 0.02, 'jambon': 0.03, None: 0},
+            'really': {"j'": 0, 'aime': 0, 'bien': 0.9, 'jambon': 0.01, None: 0.09},
+            ',': {"j'": 0, 'aime': 0, 'bien': 0.3, 'jambon': 0, None: 0.7},
+            'love': {"j'": 0.05, 'aime': 0.9, 'bien': 0.01, 'jambon': 0.01, None: 0.03},
+            'ham': {"j'": 0, 'aime': 0.01, 'bien': 0, 'jambon': 0.99, None: 0},
+        }
+        alignment_table = defaultdict(
+            lambda: defaultdict(lambda: defaultdict(lambda: defaultdict(lambda: 0.2)))
+        )
+
+        ibm_model = IBMModel([])
+        ibm_model.translation_table = translation_table
+        ibm_model.alignment_table = alignment_table
+
+        # act
+        a_info = ibm_model.best_model2_alignment(sentence_pair)
+
+        # assert
+        self.assertEqual(a_info.alignment[1:], (1, 3, 0, 3, 2, 4))
+        self.assertEqual(a_info.cepts, [[3], [1], [5], [2, 4], [6]])
+
+    def test_best_model2_alignment_handles_empty_src_sentence(self):
+        # arrange
+        sentence_pair = AlignedSent(TestIBMModel.__TEST_TRG_SENTENCE, [])
+        ibm_model = IBMModel([])
+
+        # act
+        a_info = ibm_model.best_model2_alignment(sentence_pair)
+
+        # assert
+        self.assertEqual(a_info.alignment[1:], (0, 0, 0))
+        self.assertEqual(a_info.cepts, [[1, 2, 3]])
+
+    def test_best_model2_alignment_handles_empty_trg_sentence(self):
+        # arrange
+        sentence_pair = AlignedSent([], TestIBMModel.__TEST_SRC_SENTENCE)
+        ibm_model = IBMModel([])
+
+        # act
+        a_info = ibm_model.best_model2_alignment(sentence_pair)
+
+        # assert
+        self.assertEqual(a_info.alignment[1:], ())
+        self.assertEqual(a_info.cepts, [[], [], [], [], []])
+
+    def test_neighboring_finds_neighbor_alignments(self):
+        # arrange
+        a_info = AlignmentInfo(
+            (0, 3, 2),
+            (None, 'des', 'œufs', 'verts'),
+            ('UNUSED', 'green', 'eggs'),
+            [[], [], [2], [1]],
+        )
+        ibm_model = IBMModel([])
+
+        # act
+        neighbors = ibm_model.neighboring(a_info)
+
+        # assert
+        neighbor_alignments = set()
+        for neighbor in neighbors:
+            neighbor_alignments.add(neighbor.alignment)
+        expected_alignments = set(
+            [
+                # moves
+                (0, 0, 2),
+                (0, 1, 2),
+                (0, 2, 2),
+                (0, 3, 0),
+                (0, 3, 1),
+                (0, 3, 3),
+                # swaps
+                (0, 2, 3),
+                # original alignment
+                (0, 3, 2),
+            ]
+        )
+        self.assertEqual(neighbor_alignments, expected_alignments)
+
+    def test_neighboring_sets_neighbor_alignment_info(self):
+        # arrange
+        a_info = AlignmentInfo(
+            (0, 3, 2),
+            (None, 'des', 'œufs', 'verts'),
+            ('UNUSED', 'green', 'eggs'),
+            [[], [], [2], [1]],
+        )
+        ibm_model = IBMModel([])
+
+        # act
+        neighbors = ibm_model.neighboring(a_info)
+
+        # assert: select a few particular alignments
+        for neighbor in neighbors:
+            if neighbor.alignment == (0, 2, 2):
+                moved_alignment = neighbor
+            elif neighbor.alignment == (0, 3, 2):
+                swapped_alignment = neighbor
+
+        self.assertEqual(moved_alignment.cepts, [[], [], [1, 2], []])
+        self.assertEqual(swapped_alignment.cepts, [[], [], [2], [1]])
+
+    def test_neighboring_returns_neighbors_with_pegged_alignment(self):
+        # arrange
+        a_info = AlignmentInfo(
+            (0, 3, 2),
+            (None, 'des', 'œufs', 'verts'),
+            ('UNUSED', 'green', 'eggs'),
+            [[], [], [2], [1]],
+        )
+        ibm_model = IBMModel([])
+
+        # act: peg 'eggs' to align with 'œufs'
+        neighbors = ibm_model.neighboring(a_info, 2)
+
+        # assert
+        neighbor_alignments = set()
+        for neighbor in neighbors:
+            neighbor_alignments.add(neighbor.alignment)
+        expected_alignments = set(
+            [
+                # moves
+                (0, 0, 2),
+                (0, 1, 2),
+                (0, 2, 2),
+                # no swaps
+                # original alignment
+                (0, 3, 2),
+            ]
+        )
+        self.assertEqual(neighbor_alignments, expected_alignments)
+
+    def test_hillclimb(self):
+        # arrange
+        initial_alignment = AlignmentInfo((0, 3, 2), None, None, None)
+
+        def neighboring_mock(a, j):
+            if a.alignment == (0, 3, 2):
+                return set(
+                    [
+                        AlignmentInfo((0, 2, 2), None, None, None),
+                        AlignmentInfo((0, 1, 1), None, None, None),
+                    ]
+                )
+            elif a.alignment == (0, 2, 2):
+                return set(
+                    [
+                        AlignmentInfo((0, 3, 3), None, None, None),
+                        AlignmentInfo((0, 4, 4), None, None, None),
+                    ]
+                )
+            return set()
+
+        def prob_t_a_given_s_mock(a):
+            prob_values = {
+                (0, 3, 2): 0.5,
+                (0, 2, 2): 0.6,
+                (0, 1, 1): 0.4,
+                (0, 3, 3): 0.6,
+                (0, 4, 4): 0.7,
+            }
+            return prob_values.get(a.alignment, 0.01)
+
+        ibm_model = IBMModel([])
+        ibm_model.neighboring = neighboring_mock
+        ibm_model.prob_t_a_given_s = prob_t_a_given_s_mock
+
+        # act
+        best_alignment = ibm_model.hillclimb(initial_alignment)
+
+        # assert: hill climbing goes from (0, 3, 2) -> (0, 2, 2) -> (0, 4, 4)
+        self.assertEqual(best_alignment.alignment, (0, 4, 4))
+
+    def test_sample(self):
+        # arrange
+        sentence_pair = AlignedSent(
+            TestIBMModel.__TEST_TRG_SENTENCE, TestIBMModel.__TEST_SRC_SENTENCE
+        )
+        ibm_model = IBMModel([])
+        ibm_model.prob_t_a_given_s = lambda x: 0.001
+
+        # act
+        samples, best_alignment = ibm_model.sample(sentence_pair)
+
+        # assert
+        self.assertEqual(len(samples), 61)
--- a/venv/lib/python3.7/site-packages/nltk/test/unit/translate/test_nist.py
+++ b/venv/lib/python3.7/site-packages/nltk/test/unit/translate/test_nist.py
@@ -0,0 +1,37 @@
+# -*- coding: utf-8 -*-
+"""
+Tests for NIST translation evaluation metric
+"""
+
+import io
+import unittest
+
+from nltk.data import find
+from nltk.translate.nist_score import sentence_nist, corpus_nist
+
+
+class TestNIST(unittest.TestCase):
+    def test_sentence_nist(self):
+        ref_file = find('models/wmt15_eval/ref.ru')
+        hyp_file = find('models/wmt15_eval/google.ru')
+        mteval_output_file = find('models/wmt15_eval/mteval-13a.output')
+
+        # Reads the NIST scores from the `mteval-13a.output` file.
+        # The order of the list corresponds to the order of the ngrams.
+        with open(mteval_output_file, 'r') as mteval_fin:
+            # The numbers are located in the last 4th line of the file.
+            # The first and 2nd item in the list are the score and system names.
+            mteval_nist_scores = map(float, mteval_fin.readlines()[-4].split()[1:-1])
+
+        with io.open(ref_file, 'r', encoding='utf8') as ref_fin:
+            with io.open(hyp_file, 'r', encoding='utf8') as hyp_fin:
+                # Whitespace tokenize the file.
+                # Note: split() automatically strip().
+                hypotheses = list(map(lambda x: x.split(), hyp_fin))
+                # Note that the corpus_bleu input is list of list of references.
+                references = list(map(lambda x: [x.split()], ref_fin))
+                # Without smoothing.
+                for i, mteval_nist in zip(range(1, 10), mteval_nist_scores):
+                    nltk_nist = corpus_nist(references, hypotheses, i)
+                    # Check that the NIST scores difference is less than 0.5
+                    assert abs(mteval_nist - nltk_nist) < 0.05
--- a/venv/lib/python3.7/site-packages/nltk/test/unit/translate/test_stack_decoder.py
+++ b/venv/lib/python3.7/site-packages/nltk/test/unit/translate/test_stack_decoder.py
@@ -0,0 +1,295 @@
+# -*- coding: utf-8 -*-
+# Natural Language Toolkit: Stack decoder
+#
+# Copyright (C) 2001-2019 NLTK Project
+# Author: Tah Wei Hoon <hoon.tw@gmail.com>
+# URL: <http://nltk.org/>
+# For license information, see LICENSE.TXT
+
+"""
+Tests for stack decoder
+"""
+
+import unittest
+from collections import defaultdict
+from math import log
+from nltk.translate import PhraseTable
+from nltk.translate import StackDecoder
+from nltk.translate.stack_decoder import _Hypothesis, _Stack
+
+
+class TestStackDecoder(unittest.TestCase):
+    def test_find_all_src_phrases(self):
+        # arrange
+        phrase_table = TestStackDecoder.create_fake_phrase_table()
+        stack_decoder = StackDecoder(phrase_table, None)
+        sentence = ('my', 'hovercraft', 'is', 'full', 'of', 'eels')
+
+        # act
+        src_phrase_spans = stack_decoder.find_all_src_phrases(sentence)
+
+        # assert
+        self.assertEqual(src_phrase_spans[0], [2])  # 'my hovercraft'
+        self.assertEqual(src_phrase_spans[1], [2])  # 'hovercraft'
+        self.assertEqual(src_phrase_spans[2], [3])  # 'is'
+        self.assertEqual(src_phrase_spans[3], [5, 6])  # 'full of', 'full of eels'
+        self.assertFalse(src_phrase_spans[4])  # no entry starting with 'of'
+        self.assertEqual(src_phrase_spans[5], [6])  # 'eels'
+
+    def test_distortion_score(self):
+        # arrange
+        stack_decoder = StackDecoder(None, None)
+        stack_decoder.distortion_factor = 0.5
+        hypothesis = _Hypothesis()
+        hypothesis.src_phrase_span = (3, 5)
+
+        # act
+        score = stack_decoder.distortion_score(hypothesis, (8, 10))
+
+        # assert
+        expected_score = log(stack_decoder.distortion_factor) * (8 - 5)
+        self.assertEqual(score, expected_score)
+
+    def test_distortion_score_of_first_expansion(self):
+        # arrange
+        stack_decoder = StackDecoder(None, None)
+        stack_decoder.distortion_factor = 0.5
+        hypothesis = _Hypothesis()
+
+        # act
+        score = stack_decoder.distortion_score(hypothesis, (8, 10))
+
+        # assert
+        # expansion from empty hypothesis always has zero distortion cost
+        self.assertEqual(score, 0.0)
+
+    def test_compute_future_costs(self):
+        # arrange
+        phrase_table = TestStackDecoder.create_fake_phrase_table()
+        language_model = TestStackDecoder.create_fake_language_model()
+        stack_decoder = StackDecoder(phrase_table, language_model)
+        sentence = ('my', 'hovercraft', 'is', 'full', 'of', 'eels')
+
+        # act
+        future_scores = stack_decoder.compute_future_scores(sentence)
+
+        # assert
+        self.assertEqual(
+            future_scores[1][2],
+            (
+                phrase_table.translations_for(('hovercraft',))[0].log_prob
+                + language_model.probability(('hovercraft',))
+            ),
+        )
+        self.assertEqual(
+            future_scores[0][2],
+            (
+                phrase_table.translations_for(('my', 'hovercraft'))[0].log_prob
+                + language_model.probability(('my', 'hovercraft'))
+            ),
+        )
+
+    def test_compute_future_costs_for_phrases_not_in_phrase_table(self):
+        # arrange
+        phrase_table = TestStackDecoder.create_fake_phrase_table()
+        language_model = TestStackDecoder.create_fake_language_model()
+        stack_decoder = StackDecoder(phrase_table, language_model)
+        sentence = ('my', 'hovercraft', 'is', 'full', 'of', 'eels')
+
+        # act
+        future_scores = stack_decoder.compute_future_scores(sentence)
+
+        # assert
+        self.assertEqual(
+            future_scores[1][3],  # 'hovercraft is' is not in phrase table
+            future_scores[1][2] + future_scores[2][3],
+        )  # backoff
+
+    def test_future_score(self):
+        # arrange: sentence with 8 words; words 2, 3, 4 already translated
+        hypothesis = _Hypothesis()
+        hypothesis.untranslated_spans = lambda _: [(0, 2), (5, 8)]  # mock
+        future_score_table = defaultdict(lambda: defaultdict(float))
+        future_score_table[0][2] = 0.4
+        future_score_table[5][8] = 0.5
+        stack_decoder = StackDecoder(None, None)
+
+        # act
+        future_score = stack_decoder.future_score(hypothesis, future_score_table, 8)
+
+        # assert
+        self.assertEqual(future_score, 0.4 + 0.5)
+
+    def test_valid_phrases(self):
+        # arrange
+        hypothesis = _Hypothesis()
+        # mock untranslated_spans method
+        hypothesis.untranslated_spans = lambda _: [(0, 2), (3, 6)]
+        all_phrases_from = [[1, 4], [2], [], [5], [5, 6, 7], [], [7]]
+
+        # act
+        phrase_spans = StackDecoder.valid_phrases(all_phrases_from, hypothesis)
+
+        # assert
+        self.assertEqual(phrase_spans, [(0, 1), (1, 2), (3, 5), (4, 5), (4, 6)])
+
+    @staticmethod
+    def create_fake_phrase_table():
+        phrase_table = PhraseTable()
+        phrase_table.add(('hovercraft',), ('',), 0.8)
+        phrase_table.add(('my', 'hovercraft'), ('', ''), 0.7)
+        phrase_table.add(('my', 'cheese'), ('', ''), 0.7)
+        phrase_table.add(('is',), ('',), 0.8)
+        phrase_table.add(('is',), ('',), 0.5)
+        phrase_table.add(('full', 'of'), ('', ''), 0.01)
+        phrase_table.add(('full', 'of', 'eels'), ('', '', ''), 0.5)
+        phrase_table.add(('full', 'of', 'spam'), ('', ''), 0.5)
+        phrase_table.add(('eels',), ('',), 0.5)
+        phrase_table.add(('spam',), ('',), 0.5)
+        return phrase_table
+
+    @staticmethod
+    def create_fake_language_model():
+        # nltk.model should be used here once it is implemented
+        language_prob = defaultdict(lambda: -999.0)
+        language_prob[('my',)] = log(0.1)
+        language_prob[('hovercraft',)] = log(0.1)
+        language_prob[('is',)] = log(0.1)
+        language_prob[('full',)] = log(0.1)
+        language_prob[('of',)] = log(0.1)
+        language_prob[('eels',)] = log(0.1)
+        language_prob[('my', 'hovercraft')] = log(0.3)
+        language_model = type(
+            '', (object,), {'probability': lambda _, phrase: language_prob[phrase]}
+        )()
+        return language_model
+
+
+class TestHypothesis(unittest.TestCase):
+    def setUp(self):
+        root = _Hypothesis()
+        child = _Hypothesis(
+            raw_score=0.5,
+            src_phrase_span=(3, 7),
+            trg_phrase=('hello', 'world'),
+            previous=root,
+        )
+        grandchild = _Hypothesis(
+            raw_score=0.4,
+            src_phrase_span=(1, 2),
+            trg_phrase=('and', 'goodbye'),
+            previous=child,
+        )
+        self.hypothesis_chain = grandchild
+
+    def test_translation_so_far(self):
+        # act
+        translation = self.hypothesis_chain.translation_so_far()
+
+        # assert
+        self.assertEqual(translation, ['hello', 'world', 'and', 'goodbye'])
+
+    def test_translation_so_far_for_empty_hypothesis(self):
+        # arrange
+        hypothesis = _Hypothesis()
+
+        # act
+        translation = hypothesis.translation_so_far()
+
+        # assert
+        self.assertEqual(translation, [])
+
+    def test_total_translated_words(self):
+        # act
+        total_translated_words = self.hypothesis_chain.total_translated_words()
+
+        # assert
+        self.assertEqual(total_translated_words, 5)
+
+    def test_translated_positions(self):
+        # act
+        translated_positions = self.hypothesis_chain.translated_positions()
+
+        # assert
+        translated_positions.sort()
+        self.assertEqual(translated_positions, [1, 3, 4, 5, 6])
+
+    def test_untranslated_spans(self):
+        # act
+        untranslated_spans = self.hypothesis_chain.untranslated_spans(10)
+
+        # assert
+        self.assertEqual(untranslated_spans, [(0, 1), (2, 3), (7, 10)])
+
+    def test_untranslated_spans_for_empty_hypothesis(self):
+        # arrange
+        hypothesis = _Hypothesis()
+
+        # act
+        untranslated_spans = hypothesis.untranslated_spans(10)
+
+        # assert
+        self.assertEqual(untranslated_spans, [(0, 10)])
+
+
+class TestStack(unittest.TestCase):
+    def test_push_bumps_off_worst_hypothesis_when_stack_is_full(self):
+        # arrange
+        stack = _Stack(3)
+        poor_hypothesis = _Hypothesis(0.01)
+
+        # act
+        stack.push(_Hypothesis(0.2))
+        stack.push(poor_hypothesis)
+        stack.push(_Hypothesis(0.1))
+        stack.push(_Hypothesis(0.3))
+
+        # assert
+        self.assertFalse(poor_hypothesis in stack)
+
+    def test_push_removes_hypotheses_that_fall_below_beam_threshold(self):
+        # arrange
+        stack = _Stack(3, 0.5)
+        poor_hypothesis = _Hypothesis(0.01)
+        worse_hypothesis = _Hypothesis(0.009)
+
+        # act
+        stack.push(poor_hypothesis)
+        stack.push(worse_hypothesis)
+        stack.push(_Hypothesis(0.9))  # greatly superior hypothesis
+
+        # assert
+        self.assertFalse(poor_hypothesis in stack)
+        self.assertFalse(worse_hypothesis in stack)
+
+    def test_push_does_not_add_hypothesis_that_falls_below_beam_threshold(self):
+        # arrange
+        stack = _Stack(3, 0.5)
+        poor_hypothesis = _Hypothesis(0.01)
+
+        # act
+        stack.push(_Hypothesis(0.9))  # greatly superior hypothesis
+        stack.push(poor_hypothesis)
+
+        # assert
+        self.assertFalse(poor_hypothesis in stack)
+
+    def test_best_returns_the_best_hypothesis(self):
+        # arrange
+        stack = _Stack(3)
+        best_hypothesis = _Hypothesis(0.99)
+
+        # act
+        stack.push(_Hypothesis(0.0))
+        stack.push(best_hypothesis)
+        stack.push(_Hypothesis(0.5))
+
+        # assert
+        self.assertEqual(stack.best(), best_hypothesis)
+
+    def test_best_returns_none_when_stack_is_empty(self):
+        # arrange
+        stack = _Stack(3)
+
+        # assert
+        self.assertEqual(stack.best(), None)
--- a/venv/lib/python3.7/site-packages/nltk/test/unit/utils.py
+++ b/venv/lib/python3.7/site-packages/nltk/test/unit/utils.py
@@ -0,0 +1,47 @@
+# -*- coding: utf-8 -*-
+from __future__ import absolute_import
+from unittest import TestCase
+from functools import wraps
+from nose.plugins.skip import SkipTest
+from nltk.util import py26
+
+
+def skip(reason):
+    """
+    Unconditionally skip a test.
+    """
+
+    def decorator(test_item):
+        is_test_class = isinstance(test_item, type) and issubclass(test_item, TestCase)
+
+        if is_test_class and py26():
+            # Patch all test_ methods to raise SkipText exception.
+            # This is necessary for Python 2.6 because its unittest
+            # doesn't understand __unittest_skip__.
+            for meth_name in (m for m in dir(test_item) if m.startswith('test_')):
+                patched_method = skip(reason)(getattr(test_item, meth_name))
+                setattr(test_item, meth_name, patched_method)
+
+        if not is_test_class:
+
+            @wraps(test_item)
+            def skip_wrapper(*args, **kwargs):
+                raise SkipTest(reason)
+
+            skip_wrapper.__name__ = test_item.__name__
+            test_item = skip_wrapper
+
+        test_item.__unittest_skip__ = True
+        test_item.__unittest_skip_why__ = reason
+        return test_item
+
+    return decorator
+
+
+def skipIf(condition, reason):
+    """
+    Skip a test if the condition is true.
+    """
+    if condition:
+        return skip(reason)
+    return lambda obj: obj