Initial commit

2019-10-20 13:16:49 +02:00
commit 233066caf4
2099 changed files with 360824 additions and 0 deletions
--- a/venv/lib/python3.7/site-packages/nltk/corpus/init.py
+++ b/venv/lib/python3.7/site-packages/nltk/corpus/init.py
@@ -0,0 +1,493 @@
+# Natural Language Toolkit: Corpus Readers
+#
+# Copyright (C) 2001-2019 NLTK Project
+# Author: Edward Loper <edloper@gmail.com>
+# URL: <http://nltk.org/>
+# For license information, see LICENSE.TXT
+
+# TODO this docstring isn't up-to-date!
+"""
+NLTK corpus readers.  The modules in this package provide functions
+that can be used to read corpus files in a variety of formats.  These
+functions can be used to read both the corpus files that are
+distributed in the NLTK corpus package, and corpus files that are part
+of external corpora.
+
+Available Corpora
+=================
+
+Please see http://www.nltk.org/nltk_data/ for a complete list.
+Install corpora using nltk.download().
+
+Corpus Reader Functions
+=======================
+Each corpus module defines one or more "corpus reader functions",
+which can be used to read documents from that corpus.  These functions
+take an argument, ``item``, which is used to indicate which document
+should be read from the corpus:
+
+- If ``item`` is one of the unique identifiers listed in the corpus
+  module's ``items`` variable, then the corresponding document will
+  be loaded from the NLTK corpus package.
+- If ``item`` is a filename, then that file will be read.
+
+Additionally, corpus reader functions can be given lists of item
+names; in which case, they will return a concatenation of the
+corresponding documents.
+
+Corpus reader functions are named based on the type of information
+they return.  Some common examples, and their return types, are:
+
+- words(): list of str
+- sents(): list of (list of str)
+- paras(): list of (list of (list of str))
+- tagged_words(): list of (str,str) tuple
+- tagged_sents(): list of (list of (str,str))
+- tagged_paras(): list of (list of (list of (str,str)))
+- chunked_sents(): list of (Tree w/ (str,str) leaves)
+- parsed_sents(): list of (Tree with str leaves)
+- parsed_paras(): list of (list of (Tree with str leaves))
+- xml(): A single xml ElementTree
+- raw(): unprocessed corpus contents
+
+For example, to read a list of the words in the Brown Corpus, use
+``nltk.corpus.brown.words()``:
+
+    >>> from nltk.corpus import brown
+    >>> print(", ".join(brown.words()))
+    The, Fulton, County, Grand, Jury, said, ...
+
+"""
+
+import re
+
+from nltk.tokenize import RegexpTokenizer
+from nltk.corpus.util import LazyCorpusLoader
+from nltk.corpus.reader import *
+
+abc = LazyCorpusLoader(
+    'abc',
+    PlaintextCorpusReader,
+    r'(?!\.).*\.txt',
+    encoding=[('science', 'latin_1'), ('rural', 'utf8')],
+)
+alpino = LazyCorpusLoader('alpino', AlpinoCorpusReader, tagset='alpino')
+brown = LazyCorpusLoader(
+    'brown',
+    CategorizedTaggedCorpusReader,
+    r'c[a-z]\d\d',
+    cat_file='cats.txt',
+    tagset='brown',
+    encoding="ascii",
+)
+cess_cat = LazyCorpusLoader(
+    'cess_cat',
+    BracketParseCorpusReader,
+    r'(?!\.).*\.tbf',
+    tagset='unknown',
+    encoding='ISO-8859-15',
+)
+cess_esp = LazyCorpusLoader(
+    'cess_esp',
+    BracketParseCorpusReader,
+    r'(?!\.).*\.tbf',
+    tagset='unknown',
+    encoding='ISO-8859-15',
+)
+cmudict = LazyCorpusLoader('cmudict', CMUDictCorpusReader, ['cmudict'])
+comtrans = LazyCorpusLoader('comtrans', AlignedCorpusReader, r'(?!\.).*\.txt')
+comparative_sentences = LazyCorpusLoader(
+    'comparative_sentences',
+    ComparativeSentencesCorpusReader,
+    r'labeledSentences\.txt',
+    encoding='latin-1',
+)
+conll2000 = LazyCorpusLoader(
+    'conll2000',
+    ConllChunkCorpusReader,
+    ['train.txt', 'test.txt'],
+    ('NP', 'VP', 'PP'),
+    tagset='wsj',
+    encoding='ascii',
+)
+conll2002 = LazyCorpusLoader(
+    'conll2002',
+    ConllChunkCorpusReader,
+    '.*\.(test|train).*',
+    ('LOC', 'PER', 'ORG', 'MISC'),
+    encoding='utf-8',
+)
+conll2007 = LazyCorpusLoader(
+    'conll2007',
+    DependencyCorpusReader,
+    '.*\.(test|train).*',
+    encoding=[('eus', 'ISO-8859-2'), ('esp', 'utf8')],
+)
+crubadan = LazyCorpusLoader('crubadan', CrubadanCorpusReader, '.*\.txt')
+dependency_treebank = LazyCorpusLoader(
+    'dependency_treebank', DependencyCorpusReader, '.*\.dp', encoding='ascii'
+)
+floresta = LazyCorpusLoader(
+    'floresta',
+    BracketParseCorpusReader,
+    r'(?!\.).*\.ptb',
+    '#',
+    tagset='unknown',
+    encoding='ISO-8859-15',
+)
+framenet15 = LazyCorpusLoader(
+    'framenet_v15',
+    FramenetCorpusReader,
+    [
+        'frRelation.xml',
+        'frameIndex.xml',
+        'fulltextIndex.xml',
+        'luIndex.xml',
+        'semTypes.xml',
+    ],
+)
+framenet = LazyCorpusLoader(
+    'framenet_v17',
+    FramenetCorpusReader,
+    [
+        'frRelation.xml',
+        'frameIndex.xml',
+        'fulltextIndex.xml',
+        'luIndex.xml',
+        'semTypes.xml',
+    ],
+)
+gazetteers = LazyCorpusLoader(
+    'gazetteers', WordListCorpusReader, r'(?!LICENSE|\.).*\.txt', encoding='ISO-8859-2'
+)
+genesis = LazyCorpusLoader(
+    'genesis',
+    PlaintextCorpusReader,
+    r'(?!\.).*\.txt',
+    encoding=[
+        ('finnish|french|german', 'latin_1'),
+        ('swedish', 'cp865'),
+        ('.*', 'utf_8'),
+    ],
+)
+gutenberg = LazyCorpusLoader(
+    'gutenberg', PlaintextCorpusReader, r'(?!\.).*\.txt', encoding='latin1'
+)
+ieer = LazyCorpusLoader('ieer', IEERCorpusReader, r'(?!README|\.).*')
+inaugural = LazyCorpusLoader(
+    'inaugural', PlaintextCorpusReader, r'(?!\.).*\.txt', encoding='latin1'
+)
+# [XX] This should probably just use TaggedCorpusReader:
+indian = LazyCorpusLoader(
+    'indian', IndianCorpusReader, r'(?!\.).*\.pos', tagset='unknown', encoding='utf8'
+)
+
+jeita = LazyCorpusLoader('jeita', ChasenCorpusReader, r'.*\.chasen', encoding='utf-8')
+knbc = LazyCorpusLoader('knbc/corpus1', KNBCorpusReader, r'.*/KN.*', encoding='euc-jp')
+lin_thesaurus = LazyCorpusLoader('lin_thesaurus', LinThesaurusCorpusReader, r'.*\.lsp')
+mac_morpho = LazyCorpusLoader(
+    'mac_morpho',
+    MacMorphoCorpusReader,
+    r'(?!\.).*\.txt',
+    tagset='unknown',
+    encoding='latin-1',
+)
+machado = LazyCorpusLoader(
+    'machado',
+    PortugueseCategorizedPlaintextCorpusReader,
+    r'(?!\.).*\.txt',
+    cat_pattern=r'([a-z]*)/.*',
+    encoding='latin-1',
+)
+masc_tagged = LazyCorpusLoader(
+    'masc_tagged',
+    CategorizedTaggedCorpusReader,
+    r'(spoken|written)/.*\.txt',
+    cat_file='categories.txt',
+    tagset='wsj',
+    encoding="utf-8",
+    sep="_",
+)
+movie_reviews = LazyCorpusLoader(
+    'movie_reviews',
+    CategorizedPlaintextCorpusReader,
+    r'(?!\.).*\.txt',
+    cat_pattern=r'(neg|pos)/.*',
+    encoding='ascii',
+)
+multext_east = LazyCorpusLoader(
+    'mte_teip5', MTECorpusReader, r'(oana).*\.xml', encoding="utf-8"
+)
+names = LazyCorpusLoader(
+    'names', WordListCorpusReader, r'(?!\.).*\.txt', encoding='ascii'
+)
+nps_chat = LazyCorpusLoader(
+    'nps_chat', NPSChatCorpusReader, r'(?!README|\.).*\.xml', tagset='wsj'
+)
+opinion_lexicon = LazyCorpusLoader(
+    'opinion_lexicon',
+    OpinionLexiconCorpusReader,
+    r'(\w+)\-words\.txt',
+    encoding='ISO-8859-2',
+)
+ppattach = LazyCorpusLoader(
+    'ppattach', PPAttachmentCorpusReader, ['training', 'test', 'devset']
+)
+product_reviews_1 = LazyCorpusLoader(
+    'product_reviews_1', ReviewsCorpusReader, r'^(?!Readme).*\.txt', encoding='utf8'
+)
+product_reviews_2 = LazyCorpusLoader(
+    'product_reviews_2', ReviewsCorpusReader, r'^(?!Readme).*\.txt', encoding='utf8'
+)
+pros_cons = LazyCorpusLoader(
+    'pros_cons',
+    ProsConsCorpusReader,
+    r'Integrated(Cons|Pros)\.txt',
+    cat_pattern=r'Integrated(Cons|Pros)\.txt',
+    encoding='ISO-8859-2',
+)
+ptb = LazyCorpusLoader(  # Penn Treebank v3: WSJ and Brown portions
+    'ptb',
+    CategorizedBracketParseCorpusReader,
+    r'(WSJ/\d\d/WSJ_\d\d|BROWN/C[A-Z]/C[A-Z])\d\d.MRG',
+    cat_file='allcats.txt',
+    tagset='wsj',
+)
+qc = LazyCorpusLoader(
+    'qc', StringCategoryCorpusReader, ['train.txt', 'test.txt'], encoding='ISO-8859-2'
+)
+reuters = LazyCorpusLoader(
+    'reuters',
+    CategorizedPlaintextCorpusReader,
+    '(training|test).*',
+    cat_file='cats.txt',
+    encoding='ISO-8859-2',
+)
+rte = LazyCorpusLoader('rte', RTECorpusReader, r'(?!\.).*\.xml')
+senseval = LazyCorpusLoader('senseval', SensevalCorpusReader, r'(?!\.).*\.pos')
+sentence_polarity = LazyCorpusLoader(
+    'sentence_polarity',
+    CategorizedSentencesCorpusReader,
+    r'rt-polarity\.(neg|pos)',
+    cat_pattern=r'rt-polarity\.(neg|pos)',
+    encoding='utf-8',
+)
+sentiwordnet = LazyCorpusLoader(
+    'sentiwordnet', SentiWordNetCorpusReader, 'SentiWordNet_3.0.0.txt', encoding='utf-8'
+)
+shakespeare = LazyCorpusLoader('shakespeare', XMLCorpusReader, r'(?!\.).*\.xml')
+sinica_treebank = LazyCorpusLoader(
+    'sinica_treebank',
+    SinicaTreebankCorpusReader,
+    ['parsed'],
+    tagset='unknown',
+    encoding='utf-8',
+)
+state_union = LazyCorpusLoader(
+    'state_union', PlaintextCorpusReader, r'(?!\.).*\.txt', encoding='ISO-8859-2'
+)
+stopwords = LazyCorpusLoader(
+    'stopwords', WordListCorpusReader, r'(?!README|\.).*', encoding='utf8'
+)
+subjectivity = LazyCorpusLoader(
+    'subjectivity',
+    CategorizedSentencesCorpusReader,
+    r'(quote.tok.gt9|plot.tok.gt9)\.5000',
+    cat_map={'quote.tok.gt9.5000': ['subj'], 'plot.tok.gt9.5000': ['obj']},
+    encoding='latin-1',
+)
+swadesh = LazyCorpusLoader(
+    'swadesh', SwadeshCorpusReader, r'(?!README|\.).*', encoding='utf8'
+)
+swadesh110 = LazyCorpusLoader(
+    'panlex_swadesh', PanlexSwadeshCorpusReader, r'swadesh110/.*\.txt', encoding='utf8'
+)
+swadesh207 = LazyCorpusLoader(
+    'panlex_swadesh', PanlexSwadeshCorpusReader, r'swadesh207/.*\.txt', encoding='utf8'
+)
+switchboard = LazyCorpusLoader('switchboard', SwitchboardCorpusReader, tagset='wsj')
+timit = LazyCorpusLoader('timit', TimitCorpusReader)
+timit_tagged = LazyCorpusLoader(
+    'timit', TimitTaggedCorpusReader, '.+\.tags', tagset='wsj', encoding='ascii'
+)
+toolbox = LazyCorpusLoader(
+    'toolbox', ToolboxCorpusReader, r'(?!.*(README|\.)).*\.(dic|txt)'
+)
+treebank = LazyCorpusLoader(
+    'treebank/combined',
+    BracketParseCorpusReader,
+    r'wsj_.*\.mrg',
+    tagset='wsj',
+    encoding='ascii',
+)
+treebank_chunk = LazyCorpusLoader(
+    'treebank/tagged',
+    ChunkedCorpusReader,
+    r'wsj_.*\.pos',
+    sent_tokenizer=RegexpTokenizer(r'(?<=/\.)\s*(?![^\[]*\])', gaps=True),
+    para_block_reader=tagged_treebank_para_block_reader,
+    tagset='wsj',
+    encoding='ascii',
+)
+treebank_raw = LazyCorpusLoader(
+    'treebank/raw', PlaintextCorpusReader, r'wsj_.*', encoding='ISO-8859-2'
+)
+twitter_samples = LazyCorpusLoader('twitter_samples', TwitterCorpusReader, '.*\.json')
+udhr = LazyCorpusLoader('udhr', UdhrCorpusReader)
+udhr2 = LazyCorpusLoader('udhr2', PlaintextCorpusReader, r'.*\.txt', encoding='utf8')
+universal_treebanks = LazyCorpusLoader(
+    'universal_treebanks_v20',
+    ConllCorpusReader,
+    r'.*\.conll',
+    columntypes=(
+        'ignore',
+        'words',
+        'ignore',
+        'ignore',
+        'pos',
+        'ignore',
+        'ignore',
+        'ignore',
+        'ignore',
+        'ignore',
+    ),
+)
+verbnet = LazyCorpusLoader('verbnet', VerbnetCorpusReader, r'(?!\.).*\.xml')
+webtext = LazyCorpusLoader(
+    'webtext', PlaintextCorpusReader, r'(?!README|\.).*\.txt', encoding='ISO-8859-2'
+)
+wordnet = LazyCorpusLoader(
+    'wordnet',
+    WordNetCorpusReader,
+    LazyCorpusLoader('omw', CorpusReader, r'.*/wn-data-.*\.tab', encoding='utf8'),
+)
+wordnet_ic = LazyCorpusLoader('wordnet_ic', WordNetICCorpusReader, '.*\.dat')
+words = LazyCorpusLoader(
+    'words', WordListCorpusReader, r'(?!README|\.).*', encoding='ascii'
+)
+
+# defined after treebank
+propbank = LazyCorpusLoader(
+    'propbank',
+    PropbankCorpusReader,
+    'prop.txt',
+    'frames/.*\.xml',
+    'verbs.txt',
+    lambda filename: re.sub(r'^wsj/\d\d/', '', filename),
+    treebank,
+)  # Must be defined *after* treebank corpus.
+nombank = LazyCorpusLoader(
+    'nombank.1.0',
+    NombankCorpusReader,
+    'nombank.1.0',
+    'frames/.*\.xml',
+    'nombank.1.0.words',
+    lambda filename: re.sub(r'^wsj/\d\d/', '', filename),
+    treebank,
+)  # Must be defined *after* treebank corpus.
+propbank_ptb = LazyCorpusLoader(
+    'propbank',
+    PropbankCorpusReader,
+    'prop.txt',
+    'frames/.*\.xml',
+    'verbs.txt',
+    lambda filename: filename.upper(),
+    ptb,
+)  # Must be defined *after* ptb corpus.
+nombank_ptb = LazyCorpusLoader(
+    'nombank.1.0',
+    NombankCorpusReader,
+    'nombank.1.0',
+    'frames/.*\.xml',
+    'nombank.1.0.words',
+    lambda filename: filename.upper(),
+    ptb,
+)  # Must be defined *after* ptb corpus.
+semcor = LazyCorpusLoader(
+    'semcor', SemcorCorpusReader, r'brown./tagfiles/br-.*\.xml', wordnet
+)  # Must be defined *after* wordnet corpus.
+
+nonbreaking_prefixes = LazyCorpusLoader(
+    'nonbreaking_prefixes',
+    NonbreakingPrefixesCorpusReader,
+    r'(?!README|\.).*',
+    encoding='utf8',
+)
+perluniprops = LazyCorpusLoader(
+    'perluniprops',
+    UnicharsCorpusReader,
+    r'(?!README|\.).*',
+    nltk_data_subdir='misc',
+    encoding='utf8',
+)
+
+# mwa_ppdb = LazyCorpusLoader(
+#     'mwa_ppdb', MWAPPDBCorpusReader, r'(?!README|\.).*', nltk_data_subdir='misc', encoding='utf8')
+
+# See https://github.com/nltk/nltk/issues/1579
+# and https://github.com/nltk/nltk/issues/1716
+#
+# pl196x = LazyCorpusLoader(
+#     'pl196x', Pl196xCorpusReader, r'[a-z]-.*\.xml',
+#     cat_file='cats.txt', textid_file='textids.txt', encoding='utf8')
+#
+# ipipan = LazyCorpusLoader(
+#     'ipipan', IPIPANCorpusReader, r'(?!\.).*morph\.xml')
+#
+# nkjp = LazyCorpusLoader(
+#     'nkjp', NKJPCorpusReader, r'', encoding='utf8')
+#
+# panlex_lite = LazyCorpusLoader(
+#    'panlex_lite', PanLexLiteCorpusReader)
+#
+# ycoe = LazyCorpusLoader(
+#     'ycoe', YCOECorpusReader)
+#
+# corpus not available with NLTK; these lines caused help(nltk.corpus) to break
+# hebrew_treebank = LazyCorpusLoader(
+#    'hebrew_treebank', BracketParseCorpusReader, r'.*\.txt')
+
+# FIXME:  override any imported demo from various corpora, see https://github.com/nltk/nltk/issues/2116
+def demo():
+    # This is out-of-date:
+    abc.demo()
+    brown.demo()
+    #    chat80.demo()
+    cmudict.demo()
+    conll2000.demo()
+    conll2002.demo()
+    genesis.demo()
+    gutenberg.demo()
+    ieer.demo()
+    inaugural.demo()
+    indian.demo()
+    names.demo()
+    ppattach.demo()
+    senseval.demo()
+    shakespeare.demo()
+    sinica_treebank.demo()
+    state_union.demo()
+    stopwords.demo()
+    timit.demo()
+    toolbox.demo()
+    treebank.demo()
+    udhr.demo()
+    webtext.demo()
+    words.demo()
+
+
+#    ycoe.demo()
+
+if __name__ == '__main__':
+    # demo()
+    pass
+
+# ** this is for nose **
+# unload all corpus after tests
+def teardown_module(module=None):
+    import nltk.corpus
+
+    for name in dir(nltk.corpus):
+        obj = getattr(nltk.corpus, name, None)
+        if isinstance(obj, CorpusReader) and hasattr(obj, '_unload'):
+            obj._unload()
--- a/venv/lib/python3.7/site-packages/nltk/corpus/pycache/init.cpython-37.pyc
+++ b/venv/lib/python3.7/site-packages/nltk/corpus/pycache/init.cpython-37.pyc
--- a/venv/lib/python3.7/site-packages/nltk/corpus/pycache/europarl_raw.cpython-37.pyc
+++ b/venv/lib/python3.7/site-packages/nltk/corpus/pycache/europarl_raw.cpython-37.pyc
--- a/venv/lib/python3.7/site-packages/nltk/corpus/pycache/util.cpython-37.pyc
+++ b/venv/lib/python3.7/site-packages/nltk/corpus/pycache/util.cpython-37.pyc
--- a/venv/lib/python3.7/site-packages/nltk/corpus/europarl_raw.py
+++ b/venv/lib/python3.7/site-packages/nltk/corpus/europarl_raw.py
@@ -0,0 +1,55 @@
+# Natural Language Toolkit: Europarl Corpus Readers
+#
+# Copyright (C) 2001-2019 NLTK Project
+# Author:  Nitin Madnani <nmadnani@umiacs.umd.edu>
+# URL: <http://nltk.org/>
+# For license information, see LICENSE.TXT
+
+import re
+from nltk.corpus.util import LazyCorpusLoader
+from nltk.corpus.reader import *
+
+# Create a new corpus reader instance for each European language
+danish = LazyCorpusLoader(
+    'europarl_raw/danish', EuroparlCorpusReader, r'ep-.*\.da', encoding='utf-8'
+)
+
+dutch = LazyCorpusLoader(
+    'europarl_raw/dutch', EuroparlCorpusReader, r'ep-.*\.nl', encoding='utf-8'
+)
+
+english = LazyCorpusLoader(
+    'europarl_raw/english', EuroparlCorpusReader, r'ep-.*\.en', encoding='utf-8'
+)
+
+finnish = LazyCorpusLoader(
+    'europarl_raw/finnish', EuroparlCorpusReader, r'ep-.*\.fi', encoding='utf-8'
+)
+
+french = LazyCorpusLoader(
+    'europarl_raw/french', EuroparlCorpusReader, r'ep-.*\.fr', encoding='utf-8'
+)
+
+german = LazyCorpusLoader(
+    'europarl_raw/german', EuroparlCorpusReader, r'ep-.*\.de', encoding='utf-8'
+)
+
+greek = LazyCorpusLoader(
+    'europarl_raw/greek', EuroparlCorpusReader, r'ep-.*\.el', encoding='utf-8'
+)
+
+italian = LazyCorpusLoader(
+    'europarl_raw/italian', EuroparlCorpusReader, r'ep-.*\.it', encoding='utf-8'
+)
+
+portuguese = LazyCorpusLoader(
+    'europarl_raw/portuguese', EuroparlCorpusReader, r'ep-.*\.pt', encoding='utf-8'
+)
+
+spanish = LazyCorpusLoader(
+    'europarl_raw/spanish', EuroparlCorpusReader, r'ep-.*\.es', encoding='utf-8'
+)
+
+swedish = LazyCorpusLoader(
+    'europarl_raw/swedish', EuroparlCorpusReader, r'ep-.*\.sv', encoding='utf-8'
+)
--- a/venv/lib/python3.7/site-packages/nltk/corpus/reader/init.py
+++ b/venv/lib/python3.7/site-packages/nltk/corpus/reader/init.py
@@ -0,0 +1,183 @@
+# Natural Language Toolkit: Corpus Readers
+#
+# Copyright (C) 2001-2019 NLTK Project
+# Author: Steven Bird <stevenbird1@gmail.com>
+#         Edward Loper <edloper@gmail.com>
+# URL: <http://nltk.org/>
+# For license information, see LICENSE.TXT
+
+"""
+NLTK corpus readers.  The modules in this package provide functions
+that can be used to read corpus fileids in a variety of formats.  These
+functions can be used to read both the corpus fileids that are
+distributed in the NLTK corpus package, and corpus fileids that are part
+of external corpora.
+
+Corpus Reader Functions
+=======================
+Each corpus module defines one or more "corpus reader functions",
+which can be used to read documents from that corpus.  These functions
+take an argument, ``item``, which is used to indicate which document
+should be read from the corpus:
+
+- If ``item`` is one of the unique identifiers listed in the corpus
+  module's ``items`` variable, then the corresponding document will
+  be loaded from the NLTK corpus package.
+- If ``item`` is a fileid, then that file will be read.
+
+Additionally, corpus reader functions can be given lists of item
+names; in which case, they will return a concatenation of the
+corresponding documents.
+
+Corpus reader functions are named based on the type of information
+they return.  Some common examples, and their return types, are:
+
+- words(): list of str
+- sents(): list of (list of str)
+- paras(): list of (list of (list of str))
+- tagged_words(): list of (str,str) tuple
+- tagged_sents(): list of (list of (str,str))
+- tagged_paras(): list of (list of (list of (str,str)))
+- chunked_sents(): list of (Tree w/ (str,str) leaves)
+- parsed_sents(): list of (Tree with str leaves)
+- parsed_paras(): list of (list of (Tree with str leaves))
+- xml(): A single xml ElementTree
+- raw(): unprocessed corpus contents
+
+For example, to read a list of the words in the Brown Corpus, use
+``nltk.corpus.brown.words()``:
+
+    >>> from nltk.corpus import brown
+    >>> print(", ".join(brown.words()))
+    The, Fulton, County, Grand, Jury, said, ...
+
+"""
+
+from nltk.corpus.reader.plaintext import *
+from nltk.corpus.reader.util import *
+from nltk.corpus.reader.api import *
+from nltk.corpus.reader.tagged import *
+from nltk.corpus.reader.cmudict import *
+from nltk.corpus.reader.conll import *
+from nltk.corpus.reader.chunked import *
+from nltk.corpus.reader.wordlist import *
+from nltk.corpus.reader.xmldocs import *
+from nltk.corpus.reader.ppattach import *
+from nltk.corpus.reader.senseval import *
+from nltk.corpus.reader.ieer import *
+from nltk.corpus.reader.sinica_treebank import *
+from nltk.corpus.reader.bracket_parse import *
+from nltk.corpus.reader.indian import *
+from nltk.corpus.reader.toolbox import *
+from nltk.corpus.reader.timit import *
+from nltk.corpus.reader.ycoe import *
+from nltk.corpus.reader.rte import *
+from nltk.corpus.reader.string_category import *
+from nltk.corpus.reader.propbank import *
+from nltk.corpus.reader.verbnet import *
+from nltk.corpus.reader.bnc import *
+from nltk.corpus.reader.nps_chat import *
+from nltk.corpus.reader.wordnet import *
+from nltk.corpus.reader.switchboard import *
+from nltk.corpus.reader.dependency import *
+from nltk.corpus.reader.nombank import *
+from nltk.corpus.reader.ipipan import *
+from nltk.corpus.reader.pl196x import *
+from nltk.corpus.reader.knbc import *
+from nltk.corpus.reader.chasen import *
+from nltk.corpus.reader.childes import *
+from nltk.corpus.reader.aligned import *
+from nltk.corpus.reader.lin import *
+from nltk.corpus.reader.semcor import *
+from nltk.corpus.reader.framenet import *
+from nltk.corpus.reader.udhr import *
+from nltk.corpus.reader.bnc import *
+from nltk.corpus.reader.sentiwordnet import *
+from nltk.corpus.reader.twitter import *
+from nltk.corpus.reader.nkjp import *
+from nltk.corpus.reader.crubadan import *
+from nltk.corpus.reader.mte import *
+from nltk.corpus.reader.reviews import *
+from nltk.corpus.reader.opinion_lexicon import *
+from nltk.corpus.reader.pros_cons import *
+from nltk.corpus.reader.categorized_sents import *
+from nltk.corpus.reader.comparative_sents import *
+from nltk.corpus.reader.panlex_lite import *
+from nltk.corpus.reader.panlex_swadesh import *
+
+# Make sure that nltk.corpus.reader.bracket_parse gives the module, not
+# the function bracket_parse() defined in nltk.tree:
+from nltk.corpus.reader import bracket_parse
+
+__all__ = [
+    'CorpusReader',
+    'CategorizedCorpusReader',
+    'PlaintextCorpusReader',
+    'find_corpus_fileids',
+    'TaggedCorpusReader',
+    'CMUDictCorpusReader',
+    'ConllChunkCorpusReader',
+    'WordListCorpusReader',
+    'PPAttachmentCorpusReader',
+    'SensevalCorpusReader',
+    'IEERCorpusReader',
+    'ChunkedCorpusReader',
+    'SinicaTreebankCorpusReader',
+    'BracketParseCorpusReader',
+    'IndianCorpusReader',
+    'ToolboxCorpusReader',
+    'TimitCorpusReader',
+    'YCOECorpusReader',
+    'MacMorphoCorpusReader',
+    'SyntaxCorpusReader',
+    'AlpinoCorpusReader',
+    'RTECorpusReader',
+    'StringCategoryCorpusReader',
+    'EuroparlCorpusReader',
+    'CategorizedBracketParseCorpusReader',
+    'CategorizedTaggedCorpusReader',
+    'CategorizedPlaintextCorpusReader',
+    'PortugueseCategorizedPlaintextCorpusReader',
+    'tagged_treebank_para_block_reader',
+    'PropbankCorpusReader',
+    'VerbnetCorpusReader',
+    'BNCCorpusReader',
+    'ConllCorpusReader',
+    'XMLCorpusReader',
+    'NPSChatCorpusReader',
+    'SwadeshCorpusReader',
+    'WordNetCorpusReader',
+    'WordNetICCorpusReader',
+    'SwitchboardCorpusReader',
+    'DependencyCorpusReader',
+    'NombankCorpusReader',
+    'IPIPANCorpusReader',
+    'Pl196xCorpusReader',
+    'TEICorpusView',
+    'KNBCorpusReader',
+    'ChasenCorpusReader',
+    'CHILDESCorpusReader',
+    'AlignedCorpusReader',
+    'TimitTaggedCorpusReader',
+    'LinThesaurusCorpusReader',
+    'SemcorCorpusReader',
+    'FramenetCorpusReader',
+    'UdhrCorpusReader',
+    'BNCCorpusReader',
+    'SentiWordNetCorpusReader',
+    'SentiSynset',
+    'TwitterCorpusReader',
+    'NKJPCorpusReader',
+    'CrubadanCorpusReader',
+    'MTECorpusReader',
+    'ReviewsCorpusReader',
+    'OpinionLexiconCorpusReader',
+    'ProsConsCorpusReader',
+    'CategorizedSentencesCorpusReader',
+    'ComparativeSentencesCorpusReader',
+    'PanLexLiteCorpusReader',
+    'NonbreakingPrefixesCorpusReader',
+    'UnicharsCorpusReader',
+    'MWAPPDBCorpusReader',
+    'PanlexSwadeshCorpusReader',
+]
--- a/venv/lib/python3.7/site-packages/nltk/corpus/reader/pycache/init.cpython-37.pyc
+++ b/venv/lib/python3.7/site-packages/nltk/corpus/reader/pycache/init.cpython-37.pyc
--- a/venv/lib/python3.7/site-packages/nltk/corpus/reader/pycache/aligned.cpython-37.pyc
+++ b/venv/lib/python3.7/site-packages/nltk/corpus/reader/pycache/aligned.cpython-37.pyc
--- a/venv/lib/python3.7/site-packages/nltk/corpus/reader/pycache/api.cpython-37.pyc
+++ b/venv/lib/python3.7/site-packages/nltk/corpus/reader/pycache/api.cpython-37.pyc
--- a/venv/lib/python3.7/site-packages/nltk/corpus/reader/pycache/bnc.cpython-37.pyc
+++ b/venv/lib/python3.7/site-packages/nltk/corpus/reader/pycache/bnc.cpython-37.pyc
--- a/venv/lib/python3.7/site-packages/nltk/corpus/reader/pycache/bracket_parse.cpython-37.pyc
+++ b/venv/lib/python3.7/site-packages/nltk/corpus/reader/pycache/bracket_parse.cpython-37.pyc
--- a/venv/lib/python3.7/site-packages/nltk/corpus/reader/pycache/categorized_sents.cpython-37.pyc
+++ b/venv/lib/python3.7/site-packages/nltk/corpus/reader/pycache/categorized_sents.cpython-37.pyc
--- a/venv/lib/python3.7/site-packages/nltk/corpus/reader/pycache/chasen.cpython-37.pyc
+++ b/venv/lib/python3.7/site-packages/nltk/corpus/reader/pycache/chasen.cpython-37.pyc
--- a/venv/lib/python3.7/site-packages/nltk/corpus/reader/pycache/childes.cpython-37.pyc
+++ b/venv/lib/python3.7/site-packages/nltk/corpus/reader/pycache/childes.cpython-37.pyc
--- a/venv/lib/python3.7/site-packages/nltk/corpus/reader/pycache/chunked.cpython-37.pyc
+++ b/venv/lib/python3.7/site-packages/nltk/corpus/reader/pycache/chunked.cpython-37.pyc
--- a/venv/lib/python3.7/site-packages/nltk/corpus/reader/pycache/cmudict.cpython-37.pyc
+++ b/venv/lib/python3.7/site-packages/nltk/corpus/reader/pycache/cmudict.cpython-37.pyc
--- a/venv/lib/python3.7/site-packages/nltk/corpus/reader/pycache/comparative_sents.cpython-37.pyc
+++ b/venv/lib/python3.7/site-packages/nltk/corpus/reader/pycache/comparative_sents.cpython-37.pyc
--- a/venv/lib/python3.7/site-packages/nltk/corpus/reader/pycache/conll.cpython-37.pyc
+++ b/venv/lib/python3.7/site-packages/nltk/corpus/reader/pycache/conll.cpython-37.pyc
--- a/venv/lib/python3.7/site-packages/nltk/corpus/reader/pycache/crubadan.cpython-37.pyc
+++ b/venv/lib/python3.7/site-packages/nltk/corpus/reader/pycache/crubadan.cpython-37.pyc
--- a/venv/lib/python3.7/site-packages/nltk/corpus/reader/pycache/dependency.cpython-37.pyc
+++ b/venv/lib/python3.7/site-packages/nltk/corpus/reader/pycache/dependency.cpython-37.pyc
--- a/venv/lib/python3.7/site-packages/nltk/corpus/reader/pycache/framenet.cpython-37.pyc
+++ b/venv/lib/python3.7/site-packages/nltk/corpus/reader/pycache/framenet.cpython-37.pyc
--- a/venv/lib/python3.7/site-packages/nltk/corpus/reader/pycache/ieer.cpython-37.pyc
+++ b/venv/lib/python3.7/site-packages/nltk/corpus/reader/pycache/ieer.cpython-37.pyc
--- a/venv/lib/python3.7/site-packages/nltk/corpus/reader/pycache/indian.cpython-37.pyc
+++ b/venv/lib/python3.7/site-packages/nltk/corpus/reader/pycache/indian.cpython-37.pyc
--- a/venv/lib/python3.7/site-packages/nltk/corpus/reader/pycache/ipipan.cpython-37.pyc
+++ b/venv/lib/python3.7/site-packages/nltk/corpus/reader/pycache/ipipan.cpython-37.pyc
--- a/venv/lib/python3.7/site-packages/nltk/corpus/reader/pycache/knbc.cpython-37.pyc
+++ b/venv/lib/python3.7/site-packages/nltk/corpus/reader/pycache/knbc.cpython-37.pyc
--- a/venv/lib/python3.7/site-packages/nltk/corpus/reader/pycache/lin.cpython-37.pyc
+++ b/venv/lib/python3.7/site-packages/nltk/corpus/reader/pycache/lin.cpython-37.pyc
--- a/venv/lib/python3.7/site-packages/nltk/corpus/reader/pycache/mte.cpython-37.pyc
+++ b/venv/lib/python3.7/site-packages/nltk/corpus/reader/pycache/mte.cpython-37.pyc
--- a/venv/lib/python3.7/site-packages/nltk/corpus/reader/pycache/nkjp.cpython-37.pyc
+++ b/venv/lib/python3.7/site-packages/nltk/corpus/reader/pycache/nkjp.cpython-37.pyc
--- a/venv/lib/python3.7/site-packages/nltk/corpus/reader/pycache/nombank.cpython-37.pyc
+++ b/venv/lib/python3.7/site-packages/nltk/corpus/reader/pycache/nombank.cpython-37.pyc
--- a/venv/lib/python3.7/site-packages/nltk/corpus/reader/pycache/nps_chat.cpython-37.pyc
+++ b/venv/lib/python3.7/site-packages/nltk/corpus/reader/pycache/nps_chat.cpython-37.pyc
--- a/venv/lib/python3.7/site-packages/nltk/corpus/reader/pycache/opinion_lexicon.cpython-37.pyc
+++ b/venv/lib/python3.7/site-packages/nltk/corpus/reader/pycache/opinion_lexicon.cpython-37.pyc
--- a/venv/lib/python3.7/site-packages/nltk/corpus/reader/pycache/panlex_lite.cpython-37.pyc
+++ b/venv/lib/python3.7/site-packages/nltk/corpus/reader/pycache/panlex_lite.cpython-37.pyc
--- a/venv/lib/python3.7/site-packages/nltk/corpus/reader/pycache/panlex_swadesh.cpython-37.pyc
+++ b/venv/lib/python3.7/site-packages/nltk/corpus/reader/pycache/panlex_swadesh.cpython-37.pyc
--- a/venv/lib/python3.7/site-packages/nltk/corpus/reader/pycache/pl196x.cpython-37.pyc
+++ b/venv/lib/python3.7/site-packages/nltk/corpus/reader/pycache/pl196x.cpython-37.pyc
--- a/venv/lib/python3.7/site-packages/nltk/corpus/reader/pycache/plaintext.cpython-37.pyc
+++ b/venv/lib/python3.7/site-packages/nltk/corpus/reader/pycache/plaintext.cpython-37.pyc
--- a/venv/lib/python3.7/site-packages/nltk/corpus/reader/pycache/ppattach.cpython-37.pyc
+++ b/venv/lib/python3.7/site-packages/nltk/corpus/reader/pycache/ppattach.cpython-37.pyc
--- a/venv/lib/python3.7/site-packages/nltk/corpus/reader/pycache/propbank.cpython-37.pyc
+++ b/venv/lib/python3.7/site-packages/nltk/corpus/reader/pycache/propbank.cpython-37.pyc
--- a/venv/lib/python3.7/site-packages/nltk/corpus/reader/pycache/pros_cons.cpython-37.pyc
+++ b/venv/lib/python3.7/site-packages/nltk/corpus/reader/pycache/pros_cons.cpython-37.pyc
--- a/venv/lib/python3.7/site-packages/nltk/corpus/reader/pycache/reviews.cpython-37.pyc
+++ b/venv/lib/python3.7/site-packages/nltk/corpus/reader/pycache/reviews.cpython-37.pyc
--- a/venv/lib/python3.7/site-packages/nltk/corpus/reader/pycache/rte.cpython-37.pyc
+++ b/venv/lib/python3.7/site-packages/nltk/corpus/reader/pycache/rte.cpython-37.pyc
--- a/venv/lib/python3.7/site-packages/nltk/corpus/reader/pycache/semcor.cpython-37.pyc
+++ b/venv/lib/python3.7/site-packages/nltk/corpus/reader/pycache/semcor.cpython-37.pyc
--- a/venv/lib/python3.7/site-packages/nltk/corpus/reader/pycache/senseval.cpython-37.pyc
+++ b/venv/lib/python3.7/site-packages/nltk/corpus/reader/pycache/senseval.cpython-37.pyc
--- a/venv/lib/python3.7/site-packages/nltk/corpus/reader/pycache/sentiwordnet.cpython-37.pyc
+++ b/venv/lib/python3.7/site-packages/nltk/corpus/reader/pycache/sentiwordnet.cpython-37.pyc
--- a/venv/lib/python3.7/site-packages/nltk/corpus/reader/pycache/sinica_treebank.cpython-37.pyc
+++ b/venv/lib/python3.7/site-packages/nltk/corpus/reader/pycache/sinica_treebank.cpython-37.pyc
--- a/venv/lib/python3.7/site-packages/nltk/corpus/reader/pycache/string_category.cpython-37.pyc
+++ b/venv/lib/python3.7/site-packages/nltk/corpus/reader/pycache/string_category.cpython-37.pyc
--- a/venv/lib/python3.7/site-packages/nltk/corpus/reader/pycache/switchboard.cpython-37.pyc
+++ b/venv/lib/python3.7/site-packages/nltk/corpus/reader/pycache/switchboard.cpython-37.pyc
--- a/venv/lib/python3.7/site-packages/nltk/corpus/reader/pycache/tagged.cpython-37.pyc
+++ b/venv/lib/python3.7/site-packages/nltk/corpus/reader/pycache/tagged.cpython-37.pyc
--- a/venv/lib/python3.7/site-packages/nltk/corpus/reader/pycache/timit.cpython-37.pyc
+++ b/venv/lib/python3.7/site-packages/nltk/corpus/reader/pycache/timit.cpython-37.pyc
--- a/venv/lib/python3.7/site-packages/nltk/corpus/reader/pycache/toolbox.cpython-37.pyc
+++ b/venv/lib/python3.7/site-packages/nltk/corpus/reader/pycache/toolbox.cpython-37.pyc
--- a/venv/lib/python3.7/site-packages/nltk/corpus/reader/pycache/twitter.cpython-37.pyc
+++ b/venv/lib/python3.7/site-packages/nltk/corpus/reader/pycache/twitter.cpython-37.pyc
--- a/venv/lib/python3.7/site-packages/nltk/corpus/reader/pycache/udhr.cpython-37.pyc
+++ b/venv/lib/python3.7/site-packages/nltk/corpus/reader/pycache/udhr.cpython-37.pyc
--- a/venv/lib/python3.7/site-packages/nltk/corpus/reader/pycache/util.cpython-37.pyc
+++ b/venv/lib/python3.7/site-packages/nltk/corpus/reader/pycache/util.cpython-37.pyc
--- a/venv/lib/python3.7/site-packages/nltk/corpus/reader/pycache/verbnet.cpython-37.pyc
+++ b/venv/lib/python3.7/site-packages/nltk/corpus/reader/pycache/verbnet.cpython-37.pyc
--- a/venv/lib/python3.7/site-packages/nltk/corpus/reader/pycache/wordlist.cpython-37.pyc
+++ b/venv/lib/python3.7/site-packages/nltk/corpus/reader/pycache/wordlist.cpython-37.pyc
--- a/venv/lib/python3.7/site-packages/nltk/corpus/reader/pycache/wordnet.cpython-37.pyc
+++ b/venv/lib/python3.7/site-packages/nltk/corpus/reader/pycache/wordnet.cpython-37.pyc
--- a/venv/lib/python3.7/site-packages/nltk/corpus/reader/pycache/xmldocs.cpython-37.pyc
+++ b/venv/lib/python3.7/site-packages/nltk/corpus/reader/pycache/xmldocs.cpython-37.pyc
--- a/venv/lib/python3.7/site-packages/nltk/corpus/reader/pycache/ycoe.cpython-37.pyc
+++ b/venv/lib/python3.7/site-packages/nltk/corpus/reader/pycache/ycoe.cpython-37.pyc
--- a/venv/lib/python3.7/site-packages/nltk/corpus/reader/aligned.py
+++ b/venv/lib/python3.7/site-packages/nltk/corpus/reader/aligned.py
@@ -0,0 +1,168 @@
+# Natural Language Toolkit: Aligned Corpus Reader
+#
+# Copyright (C) 2001-2019 NLTK Project
+# URL: <http://nltk.org/>
+# Author: Steven Bird <stevenbird1@gmail.com>
+# For license information, see LICENSE.TXT
+
+from six import string_types
+
+from nltk.tokenize import WhitespaceTokenizer, RegexpTokenizer
+from nltk.translate import AlignedSent, Alignment
+
+from nltk.corpus.reader.api import CorpusReader
+from nltk.corpus.reader.util import (
+    StreamBackedCorpusView,
+    concat,
+    read_alignedsent_block,
+)
+
+
+class AlignedCorpusReader(CorpusReader):
+    """
+    Reader for corpora of word-aligned sentences.  Tokens are assumed
+    to be separated by whitespace.  Sentences begin on separate lines.
+    """
+
+    def __init__(
+        self,
+        root,
+        fileids,
+        sep='/',
+        word_tokenizer=WhitespaceTokenizer(),
+        sent_tokenizer=RegexpTokenizer('\n', gaps=True),
+        alignedsent_block_reader=read_alignedsent_block,
+        encoding='latin1',
+    ):
+        """
+        Construct a new Aligned Corpus reader for a set of documents
+        located at the given root directory.  Example usage:
+
+            >>> root = '/...path to corpus.../'
+            >>> reader = AlignedCorpusReader(root, '.*', '.txt') # doctest: +SKIP
+
+        :param root: The root directory for this corpus.
+        :param fileids: A list or regexp specifying the fileids in this corpus.
+        """
+        CorpusReader.__init__(self, root, fileids, encoding)
+        self._sep = sep
+        self._word_tokenizer = word_tokenizer
+        self._sent_tokenizer = sent_tokenizer
+        self._alignedsent_block_reader = alignedsent_block_reader
+
+    def raw(self, fileids=None):
+        """
+        :return: the given file(s) as a single string.
+        :rtype: str
+        """
+        if fileids is None:
+            fileids = self._fileids
+        elif isinstance(fileids, string_types):
+            fileids = [fileids]
+        return concat([self.open(f).read() for f in fileids])
+
+    def words(self, fileids=None):
+        """
+        :return: the given file(s) as a list of words
+            and punctuation symbols.
+        :rtype: list(str)
+        """
+        return concat(
+            [
+                AlignedSentCorpusView(
+                    fileid,
+                    enc,
+                    False,
+                    False,
+                    self._word_tokenizer,
+                    self._sent_tokenizer,
+                    self._alignedsent_block_reader,
+                )
+                for (fileid, enc) in self.abspaths(fileids, True)
+            ]
+        )
+
+    def sents(self, fileids=None):
+        """
+        :return: the given file(s) as a list of
+            sentences or utterances, each encoded as a list of word
+            strings.
+        :rtype: list(list(str))
+        """
+        return concat(
+            [
+                AlignedSentCorpusView(
+                    fileid,
+                    enc,
+                    False,
+                    True,
+                    self._word_tokenizer,
+                    self._sent_tokenizer,
+                    self._alignedsent_block_reader,
+                )
+                for (fileid, enc) in self.abspaths(fileids, True)
+            ]
+        )
+
+    def aligned_sents(self, fileids=None):
+        """
+        :return: the given file(s) as a list of AlignedSent objects.
+        :rtype: list(AlignedSent)
+        """
+        return concat(
+            [
+                AlignedSentCorpusView(
+                    fileid,
+                    enc,
+                    True,
+                    True,
+                    self._word_tokenizer,
+                    self._sent_tokenizer,
+                    self._alignedsent_block_reader,
+                )
+                for (fileid, enc) in self.abspaths(fileids, True)
+            ]
+        )
+
+
+class AlignedSentCorpusView(StreamBackedCorpusView):
+    """
+    A specialized corpus view for aligned sentences.
+    ``AlignedSentCorpusView`` objects are typically created by
+    ``AlignedCorpusReader`` (not directly by nltk users).
+    """
+
+    def __init__(
+        self,
+        corpus_file,
+        encoding,
+        aligned,
+        group_by_sent,
+        word_tokenizer,
+        sent_tokenizer,
+        alignedsent_block_reader,
+    ):
+        self._aligned = aligned
+        self._group_by_sent = group_by_sent
+        self._word_tokenizer = word_tokenizer
+        self._sent_tokenizer = sent_tokenizer
+        self._alignedsent_block_reader = alignedsent_block_reader
+        StreamBackedCorpusView.__init__(self, corpus_file, encoding=encoding)
+
+    def read_block(self, stream):
+        block = [
+            self._word_tokenizer.tokenize(sent_str)
+            for alignedsent_str in self._alignedsent_block_reader(stream)
+            for sent_str in self._sent_tokenizer.tokenize(alignedsent_str)
+        ]
+        if self._aligned:
+            block[2] = Alignment.fromstring(
+                " ".join(block[2])
+            )  # kludge; we shouldn't have tokenized the alignment string
+            block = [AlignedSent(*block)]
+        elif self._group_by_sent:
+            block = [block[0]]
+        else:
+            block = block[0]
+
+        return block
--- a/venv/lib/python3.7/site-packages/nltk/corpus/reader/api.py
+++ b/venv/lib/python3.7/site-packages/nltk/corpus/reader/api.py
@@ -0,0 +1,484 @@
+# Natural Language Toolkit: API for Corpus Readers
+#
+# Copyright (C) 2001-2019 NLTK Project
+# Author: Steven Bird <stevenbird1@gmail.com>
+#         Edward Loper <edloper@gmail.com>
+# URL: <http://nltk.org/>
+# For license information, see LICENSE.TXT
+
+"""
+API for corpus readers.
+"""
+from __future__ import unicode_literals
+
+import os
+import re
+from collections import defaultdict
+from itertools import chain
+
+from six import string_types
+
+from nltk import compat
+from nltk.data import PathPointer, FileSystemPathPointer, ZipFilePathPointer
+
+from nltk.corpus.reader.util import *
+
+
+@compat.python_2_unicode_compatible
+class CorpusReader(object):
+    """
+    A base class for "corpus reader" classes, each of which can be
+    used to read a specific corpus format.  Each individual corpus
+    reader instance is used to read a specific corpus, consisting of
+    one or more files under a common root directory.  Each file is
+    identified by its ``file identifier``, which is the relative path
+    to the file from the root directory.
+
+    A separate subclass is defined for each corpus format.  These
+    subclasses define one or more methods that provide 'views' on the
+    corpus contents, such as ``words()`` (for a list of words) and
+    ``parsed_sents()`` (for a list of parsed sentences).  Called with
+    no arguments, these methods will return the contents of the entire
+    corpus.  For most corpora, these methods define one or more
+    selection arguments, such as ``fileids`` or ``categories``, which can
+    be used to select which portion of the corpus should be returned.
+    """
+
+    def __init__(self, root, fileids, encoding='utf8', tagset=None):
+        """
+        :type root: PathPointer or str
+        :param root: A path pointer identifying the root directory for
+            this corpus.  If a string is specified, then it will be
+            converted to a ``PathPointer`` automatically.
+        :param fileids: A list of the files that make up this corpus.
+            This list can either be specified explicitly, as a list of
+            strings; or implicitly, as a regular expression over file
+            paths.  The absolute path for each file will be constructed
+            by joining the reader's root to each file name.
+        :param encoding: The default unicode encoding for the files
+            that make up the corpus.  The value of ``encoding`` can be any
+            of the following:
+            - A string: ``encoding`` is the encoding name for all files.
+            - A dictionary: ``encoding[file_id]`` is the encoding
+              name for the file whose identifier is ``file_id``.  If
+              ``file_id`` is not in ``encoding``, then the file
+              contents will be processed using non-unicode byte strings.
+            - A list: ``encoding`` should be a list of ``(regexp, encoding)``
+              tuples.  The encoding for a file whose identifier is ``file_id``
+              will be the ``encoding`` value for the first tuple whose
+              ``regexp`` matches the ``file_id``.  If no tuple's ``regexp``
+              matches the ``file_id``, the file contents will be processed
+              using non-unicode byte strings.
+            - None: the file contents of all files will be
+              processed using non-unicode byte strings.
+        :param tagset: The name of the tagset used by this corpus, to be used
+              for normalizing or converting the POS tags returned by the
+              tagged_...() methods.
+        """
+        # Convert the root to a path pointer, if necessary.
+        if isinstance(root, string_types) and not isinstance(root, PathPointer):
+            m = re.match('(.*\.zip)/?(.*)$|', root)
+            zipfile, zipentry = m.groups()
+            if zipfile:
+                root = ZipFilePathPointer(zipfile, zipentry)
+            else:
+                root = FileSystemPathPointer(root)
+        elif not isinstance(root, PathPointer):
+            raise TypeError('CorpusReader: expected a string or a PathPointer')
+
+        # If `fileids` is a regexp, then expand it.
+        if isinstance(fileids, string_types):
+            fileids = find_corpus_fileids(root, fileids)
+
+        self._fileids = fileids
+        """A list of the relative paths for the fileids that make up
+        this corpus."""
+
+        self._root = root
+        """The root directory for this corpus."""
+
+        # If encoding was specified as a list of regexps, then convert
+        # it to a dictionary.
+        if isinstance(encoding, list):
+            encoding_dict = {}
+            for fileid in self._fileids:
+                for x in encoding:
+                    (regexp, enc) = x
+                    if re.match(regexp, fileid):
+                        encoding_dict[fileid] = enc
+                        break
+            encoding = encoding_dict
+
+        self._encoding = encoding
+        """The default unicode encoding for the fileids that make up
+           this corpus.  If ``encoding`` is None, then the file
+           contents are processed using byte strings."""
+        self._tagset = tagset
+
+    def __repr__(self):
+        if isinstance(self._root, ZipFilePathPointer):
+            path = '%s/%s' % (self._root.zipfile.filename, self._root.entry)
+        else:
+            path = '%s' % self._root.path
+        return '<%s in %r>' % (self.__class__.__name__, path)
+
+    def ensure_loaded(self):
+        """
+        Load this corpus (if it has not already been loaded).  This is
+        used by LazyCorpusLoader as a simple method that can be used to
+        make sure a corpus is loaded -- e.g., in case a user wants to
+        do help(some_corpus).
+        """
+        pass  # no need to actually do anything.
+
+    def readme(self):
+        """
+        Return the contents of the corpus README file, if it exists.
+        """
+        return self.open("README").read()
+
+    def license(self):
+        """
+        Return the contents of the corpus LICENSE file, if it exists.
+        """
+        return self.open("LICENSE").read()
+
+    def citation(self):
+        """
+        Return the contents of the corpus citation.bib file, if it exists.
+        """
+        return self.open("citation.bib").read()
+
+    def fileids(self):
+        """
+        Return a list of file identifiers for the fileids that make up
+        this corpus.
+        """
+        return self._fileids
+
+    def abspath(self, fileid):
+        """
+        Return the absolute path for the given file.
+
+        :type fileid: str
+        :param fileid: The file identifier for the file whose path
+            should be returned.
+        :rtype: PathPointer
+        """
+        return self._root.join(fileid)
+
+    def abspaths(self, fileids=None, include_encoding=False, include_fileid=False):
+        """
+        Return a list of the absolute paths for all fileids in this corpus;
+        or for the given list of fileids, if specified.
+
+        :type fileids: None or str or list
+        :param fileids: Specifies the set of fileids for which paths should
+            be returned.  Can be None, for all fileids; a list of
+            file identifiers, for a specified set of fileids; or a single
+            file identifier, for a single file.  Note that the return
+            value is always a list of paths, even if ``fileids`` is a
+            single file identifier.
+
+        :param include_encoding: If true, then return a list of
+            ``(path_pointer, encoding)`` tuples.
+
+        :rtype: list(PathPointer)
+        """
+        if fileids is None:
+            fileids = self._fileids
+        elif isinstance(fileids, string_types):
+            fileids = [fileids]
+
+        paths = [self._root.join(f) for f in fileids]
+
+        if include_encoding and include_fileid:
+            return list(zip(paths, [self.encoding(f) for f in fileids], fileids))
+        elif include_fileid:
+            return list(zip(paths, fileids))
+        elif include_encoding:
+            return list(zip(paths, [self.encoding(f) for f in fileids]))
+        else:
+            return paths
+
+    def open(self, file):
+        """
+        Return an open stream that can be used to read the given file.
+        If the file's encoding is not None, then the stream will
+        automatically decode the file's contents into unicode.
+
+        :param file: The file identifier of the file to read.
+        """
+        encoding = self.encoding(file)
+        stream = self._root.join(file).open(encoding)
+        return stream
+
+    def encoding(self, file):
+        """
+        Return the unicode encoding for the given corpus file, if known.
+        If the encoding is unknown, or if the given file should be
+        processed using byte strings (str), then return None.
+        """
+        if isinstance(self._encoding, dict):
+            return self._encoding.get(file)
+        else:
+            return self._encoding
+
+    def _get_root(self):
+        return self._root
+
+    root = property(
+        _get_root,
+        doc="""
+        The directory where this corpus is stored.
+
+        :type: PathPointer""",
+    )
+
+
+######################################################################
+# { Corpora containing categorized items
+######################################################################
+
+
+class CategorizedCorpusReader(object):
+    """
+    A mixin class used to aid in the implementation of corpus readers
+    for categorized corpora.  This class defines the method
+    ``categories()``, which returns a list of the categories for the
+    corpus or for a specified set of fileids; and overrides ``fileids()``
+    to take a ``categories`` argument, restricting the set of fileids to
+    be returned.
+
+    Subclasses are expected to:
+
+      - Call ``__init__()`` to set up the mapping.
+
+      - Override all view methods to accept a ``categories`` parameter,
+        which can be used *instead* of the ``fileids`` parameter, to
+        select which fileids should be included in the returned view.
+    """
+
+    def __init__(self, kwargs):
+        """
+        Initialize this mapping based on keyword arguments, as
+        follows:
+
+          - cat_pattern: A regular expression pattern used to find the
+            category for each file identifier.  The pattern will be
+            applied to each file identifier, and the first matching
+            group will be used as the category label for that file.
+
+          - cat_map: A dictionary, mapping from file identifiers to
+            category labels.
+
+          - cat_file: The name of a file that contains the mapping
+            from file identifiers to categories.  The argument
+            ``cat_delimiter`` can be used to specify a delimiter.
+
+        The corresponding argument will be deleted from ``kwargs``.  If
+        more than one argument is specified, an exception will be
+        raised.
+        """
+        self._f2c = None  #: file-to-category mapping
+        self._c2f = None  #: category-to-file mapping
+
+        self._pattern = None  #: regexp specifying the mapping
+        self._map = None  #: dict specifying the mapping
+        self._file = None  #: fileid of file containing the mapping
+        self._delimiter = None  #: delimiter for ``self._file``
+
+        if 'cat_pattern' in kwargs:
+            self._pattern = kwargs['cat_pattern']
+            del kwargs['cat_pattern']
+        elif 'cat_map' in kwargs:
+            self._map = kwargs['cat_map']
+            del kwargs['cat_map']
+        elif 'cat_file' in kwargs:
+            self._file = kwargs['cat_file']
+            del kwargs['cat_file']
+            if 'cat_delimiter' in kwargs:
+                self._delimiter = kwargs['cat_delimiter']
+                del kwargs['cat_delimiter']
+        else:
+            raise ValueError(
+                'Expected keyword argument cat_pattern or ' 'cat_map or cat_file.'
+            )
+
+        if 'cat_pattern' in kwargs or 'cat_map' in kwargs or 'cat_file' in kwargs:
+            raise ValueError(
+                'Specify exactly one of: cat_pattern, ' 'cat_map, cat_file.'
+            )
+
+    def _init(self):
+        self._f2c = defaultdict(set)
+        self._c2f = defaultdict(set)
+
+        if self._pattern is not None:
+            for file_id in self._fileids:
+                category = re.match(self._pattern, file_id).group(1)
+                self._add(file_id, category)
+
+        elif self._map is not None:
+            for (file_id, categories) in self._map.items():
+                for category in categories:
+                    self._add(file_id, category)
+
+        elif self._file is not None:
+            for line in self.open(self._file).readlines():
+                line = line.strip()
+                file_id, categories = line.split(self._delimiter, 1)
+                if file_id not in self.fileids():
+                    raise ValueError(
+                        'In category mapping file %s: %s '
+                        'not found' % (self._file, file_id)
+                    )
+                for category in categories.split(self._delimiter):
+                    self._add(file_id, category)
+
+    def _add(self, file_id, category):
+        self._f2c[file_id].add(category)
+        self._c2f[category].add(file_id)
+
+    def categories(self, fileids=None):
+        """
+        Return a list of the categories that are defined for this corpus,
+        or for the file(s) if it is given.
+        """
+        if self._f2c is None:
+            self._init()
+        if fileids is None:
+            return sorted(self._c2f)
+        if isinstance(fileids, string_types):
+            fileids = [fileids]
+        return sorted(set.union(*[self._f2c[d] for d in fileids]))
+
+    def fileids(self, categories=None):
+        """
+        Return a list of file identifiers for the files that make up
+        this corpus, or that make up the given category(s) if specified.
+        """
+        if categories is None:
+            return super(CategorizedCorpusReader, self).fileids()
+        elif isinstance(categories, string_types):
+            if self._f2c is None:
+                self._init()
+            if categories in self._c2f:
+                return sorted(self._c2f[categories])
+            else:
+                raise ValueError('Category %s not found' % categories)
+        else:
+            if self._f2c is None:
+                self._init()
+            return sorted(set.union(*[self._c2f[c] for c in categories]))
+
+
+######################################################################
+# { Treebank readers
+######################################################################
+
+# [xx] is it worth it to factor this out?
+class SyntaxCorpusReader(CorpusReader):
+    """
+    An abstract base class for reading corpora consisting of
+    syntactically parsed text.  Subclasses should define:
+
+      - ``__init__``, which specifies the location of the corpus
+        and a method for detecting the sentence blocks in corpus files.
+      - ``_read_block``, which reads a block from the input stream.
+      - ``_word``, which takes a block and returns a list of list of words.
+      - ``_tag``, which takes a block and returns a list of list of tagged
+        words.
+      - ``_parse``, which takes a block and returns a list of parsed
+        sentences.
+    """
+
+    def _parse(self, s):
+        raise NotImplementedError()
+
+    def _word(self, s):
+        raise NotImplementedError()
+
+    def _tag(self, s):
+        raise NotImplementedError()
+
+    def _read_block(self, stream):
+        raise NotImplementedError()
+
+    def raw(self, fileids=None):
+        if fileids is None:
+            fileids = self._fileids
+        elif isinstance(fileids, string_types):
+            fileids = [fileids]
+        return concat([self.open(f).read() for f in fileids])
+
+    def parsed_sents(self, fileids=None):
+        reader = self._read_parsed_sent_block
+        return concat(
+            [
+                StreamBackedCorpusView(fileid, reader, encoding=enc)
+                for fileid, enc in self.abspaths(fileids, True)
+            ]
+        )
+
+    def tagged_sents(self, fileids=None, tagset=None):
+        def reader(stream):
+            return self._read_tagged_sent_block(stream, tagset)
+
+        return concat(
+            [
+                StreamBackedCorpusView(fileid, reader, encoding=enc)
+                for fileid, enc in self.abspaths(fileids, True)
+            ]
+        )
+
+    def sents(self, fileids=None):
+        reader = self._read_sent_block
+        return concat(
+            [
+                StreamBackedCorpusView(fileid, reader, encoding=enc)
+                for fileid, enc in self.abspaths(fileids, True)
+            ]
+        )
+
+    def tagged_words(self, fileids=None, tagset=None):
+        def reader(stream):
+            return self._read_tagged_word_block(stream, tagset)
+
+        return concat(
+            [
+                StreamBackedCorpusView(fileid, reader, encoding=enc)
+                for fileid, enc in self.abspaths(fileids, True)
+            ]
+        )
+
+    def words(self, fileids=None):
+        return concat(
+            [
+                StreamBackedCorpusView(fileid, self._read_word_block, encoding=enc)
+                for fileid, enc in self.abspaths(fileids, True)
+            ]
+        )
+
+    # ------------------------------------------------------------
+    # { Block Readers
+
+    def _read_word_block(self, stream):
+        return list(chain(*self._read_sent_block(stream)))
+
+    def _read_tagged_word_block(self, stream, tagset=None):
+        return list(chain(*self._read_tagged_sent_block(stream, tagset)))
+
+    def _read_sent_block(self, stream):
+        return list(filter(None, [self._word(t) for t in self._read_block(stream)]))
+
+    def _read_tagged_sent_block(self, stream, tagset=None):
+        return list(
+            filter(None, [self._tag(t, tagset) for t in self._read_block(stream)])
+        )
+
+    def _read_parsed_sent_block(self, stream):
+        return list(filter(None, [self._parse(t) for t in self._read_block(stream)]))
+
+    # } End of Block Readers
+    # ------------------------------------------------------------
--- a/venv/lib/python3.7/site-packages/nltk/corpus/reader/bnc.py
+++ b/venv/lib/python3.7/site-packages/nltk/corpus/reader/bnc.py
@@ -0,0 +1,258 @@
+# Natural Language Toolkit: Plaintext Corpus Reader
+#
+# Copyright (C) 2001-2019 NLTK Project
+# Author: Edward Loper <edloper@gmail.com>
+# URL: <http://nltk.org/>
+# For license information, see LICENSE.TXT
+
+"""Corpus reader for the XML version of the British National Corpus."""
+
+from nltk.corpus.reader.util import concat
+from nltk.corpus.reader.xmldocs import XMLCorpusReader, XMLCorpusView, ElementTree
+
+
+class BNCCorpusReader(XMLCorpusReader):
+    """Corpus reader for the XML version of the British National Corpus.
+
+    For access to the complete XML data structure, use the ``xml()``
+    method.  For access to simple word lists and tagged word lists, use
+    ``words()``, ``sents()``, ``tagged_words()``, and ``tagged_sents()``.
+
+    You can obtain the full version of the BNC corpus at
+    http://www.ota.ox.ac.uk/desc/2554
+
+    If you extracted the archive to a directory called `BNC`, then you can
+    instantiate the reader as::
+
+        BNCCorpusReader(root='BNC/Texts/', fileids=r'[A-K]/\w*/\w*\.xml')
+
+    """
+
+    def __init__(self, root, fileids, lazy=True):
+        XMLCorpusReader.__init__(self, root, fileids)
+        self._lazy = lazy
+
+    def words(self, fileids=None, strip_space=True, stem=False):
+        """
+        :return: the given file(s) as a list of words
+            and punctuation symbols.
+        :rtype: list(str)
+
+        :param strip_space: If true, then strip trailing spaces from
+            word tokens.  Otherwise, leave the spaces on the tokens.
+        :param stem: If true, then use word stems instead of word strings.
+        """
+        return self._views(fileids, False, None, strip_space, stem)
+
+    def tagged_words(self, fileids=None, c5=False, strip_space=True, stem=False):
+        """
+        :return: the given file(s) as a list of tagged
+            words and punctuation symbols, encoded as tuples
+            ``(word,tag)``.
+        :rtype: list(tuple(str,str))
+
+        :param c5: If true, then the tags used will be the more detailed
+            c5 tags.  Otherwise, the simplified tags will be used.
+        :param strip_space: If true, then strip trailing spaces from
+            word tokens.  Otherwise, leave the spaces on the tokens.
+        :param stem: If true, then use word stems instead of word strings.
+        """
+        tag = 'c5' if c5 else 'pos'
+        return self._views(fileids, False, tag, strip_space, stem)
+
+    def sents(self, fileids=None, strip_space=True, stem=False):
+        """
+        :return: the given file(s) as a list of
+            sentences or utterances, each encoded as a list of word
+            strings.
+        :rtype: list(list(str))
+
+        :param strip_space: If true, then strip trailing spaces from
+            word tokens.  Otherwise, leave the spaces on the tokens.
+        :param stem: If true, then use word stems instead of word strings.
+        """
+        return self._views(fileids, True, None, strip_space, stem)
+
+    def tagged_sents(self, fileids=None, c5=False, strip_space=True, stem=False):
+        """
+        :return: the given file(s) as a list of
+            sentences, each encoded as a list of ``(word,tag)`` tuples.
+        :rtype: list(list(tuple(str,str)))
+
+        :param c5: If true, then the tags used will be the more detailed
+            c5 tags.  Otherwise, the simplified tags will be used.
+        :param strip_space: If true, then strip trailing spaces from
+            word tokens.  Otherwise, leave the spaces on the tokens.
+        :param stem: If true, then use word stems instead of word strings.
+        """
+        tag = 'c5' if c5 else 'pos'
+        return self._views(
+            fileids, sent=True, tag=tag, strip_space=strip_space, stem=stem
+        )
+
+    def _views(self, fileids=None, sent=False, tag=False, strip_space=True, stem=False):
+        """A helper function that instantiates BNCWordViews or the list of words/sentences."""
+        f = BNCWordView if self._lazy else self._words
+        return concat(
+            [
+                f(fileid, sent, tag, strip_space, stem)
+                for fileid in self.abspaths(fileids)
+            ]
+        )
+
+    def _words(self, fileid, bracket_sent, tag, strip_space, stem):
+        """
+        Helper used to implement the view methods -- returns a list of
+        words or a list of sentences, optionally tagged.
+
+        :param fileid: The name of the underlying file.
+        :param bracket_sent: If true, include sentence bracketing.
+        :param tag: The name of the tagset to use, or None for no tags.
+        :param strip_space: If true, strip spaces from word tokens.
+        :param stem: If true, then substitute stems for words.
+        """
+        result = []
+
+        xmldoc = ElementTree.parse(fileid).getroot()
+        for xmlsent in xmldoc.findall('.//s'):
+            sent = []
+            for xmlword in _all_xmlwords_in(xmlsent):
+                word = xmlword.text
+                if not word:
+                    word = ""  # fixes issue 337?
+                if strip_space or stem:
+                    word = word.strip()
+                if stem:
+                    word = xmlword.get('hw', word)
+                if tag == 'c5':
+                    word = (word, xmlword.get('c5'))
+                elif tag == 'pos':
+                    word = (word, xmlword.get('pos', xmlword.get('c5')))
+                sent.append(word)
+            if bracket_sent:
+                result.append(BNCSentence(xmlsent.attrib['n'], sent))
+            else:
+                result.extend(sent)
+
+        assert None not in result
+        return result
+
+
+def _all_xmlwords_in(elt, result=None):
+    if result is None:
+        result = []
+    for child in elt:
+        if child.tag in ('c', 'w'):
+            result.append(child)
+        else:
+            _all_xmlwords_in(child, result)
+    return result
+
+
+class BNCSentence(list):
+    """
+    A list of words, augmented by an attribute ``num`` used to record
+    the sentence identifier (the ``n`` attribute from the XML).
+    """
+
+    def __init__(self, num, items):
+        self.num = num
+        list.__init__(self, items)
+
+
+class BNCWordView(XMLCorpusView):
+    """
+    A stream backed corpus view specialized for use with the BNC corpus.
+    """
+
+    tags_to_ignore = set(
+        ['pb', 'gap', 'vocal', 'event', 'unclear', 'shift', 'pause', 'align']
+    )
+    """These tags are ignored. For their description refer to the
+    technical documentation, for example,
+    http://www.natcorp.ox.ac.uk/docs/URG/ref-vocal.html
+
+    """
+
+    def __init__(self, fileid, sent, tag, strip_space, stem):
+        """
+        :param fileid: The name of the underlying file.
+        :param sent: If true, include sentence bracketing.
+        :param tag: The name of the tagset to use, or None for no tags.
+        :param strip_space: If true, strip spaces from word tokens.
+        :param stem: If true, then substitute stems for words.
+        """
+        if sent:
+            tagspec = '.*/s'
+        else:
+            tagspec = '.*/s/(.*/)?(c|w)'
+        self._sent = sent
+        self._tag = tag
+        self._strip_space = strip_space
+        self._stem = stem
+
+        self.title = None  #: Title of the document.
+        self.author = None  #: Author of the document.
+        self.editor = None  #: Editor
+        self.resps = None  #: Statement of responsibility
+
+        XMLCorpusView.__init__(self, fileid, tagspec)
+
+        # Read in a tasty header.
+        self._open()
+        self.read_block(self._stream, '.*/teiHeader$', self.handle_header)
+        self.close()
+
+        # Reset tag context.
+        self._tag_context = {0: ()}
+
+    def handle_header(self, elt, context):
+        # Set up some metadata!
+        titles = elt.findall('titleStmt/title')
+        if titles:
+            self.title = '\n'.join(title.text.strip() for title in titles)
+
+        authors = elt.findall('titleStmt/author')
+        if authors:
+            self.author = '\n'.join(author.text.strip() for author in authors)
+
+        editors = elt.findall('titleStmt/editor')
+        if editors:
+            self.editor = '\n'.join(editor.text.strip() for editor in editors)
+
+        resps = elt.findall('titleStmt/respStmt')
+        if resps:
+            self.resps = '\n\n'.join(
+                '\n'.join(resp_elt.text.strip() for resp_elt in resp) for resp in resps
+            )
+
+    def handle_elt(self, elt, context):
+        if self._sent:
+            return self.handle_sent(elt)
+        else:
+            return self.handle_word(elt)
+
+    def handle_word(self, elt):
+        word = elt.text
+        if not word:
+            word = ""  # fixes issue 337?
+        if self._strip_space or self._stem:
+            word = word.strip()
+        if self._stem:
+            word = elt.get('hw', word)
+        if self._tag == 'c5':
+            word = (word, elt.get('c5'))
+        elif self._tag == 'pos':
+            word = (word, elt.get('pos', elt.get('c5')))
+        return word
+
+    def handle_sent(self, elt):
+        sent = []
+        for child in elt:
+            if child.tag in ('mw', 'hi', 'corr', 'trunc'):
+                sent += [self.handle_word(w) for w in child]
+            elif child.tag in ('w', 'c'):
+                sent.append(self.handle_word(child))
+            elif child.tag not in self.tags_to_ignore:
+                raise ValueError('Unexpected element %s' % child.tag)
+        return BNCSentence(elt.attrib['n'], sent)
--- a/venv/lib/python3.7/site-packages/nltk/corpus/reader/bracket_parse.py
+++ b/venv/lib/python3.7/site-packages/nltk/corpus/reader/bracket_parse.py
@@ -0,0 +1,271 @@
+# Natural Language Toolkit: Penn Treebank Reader
+#
+# Copyright (C) 2001-2019 NLTK Project
+# Author: Steven Bird <stevenbird1@gmail.com>
+#         Edward Loper <edloper@gmail.com>
+# URL: <http://nltk.org/>
+# For license information, see LICENSE.TXT
+"""
+Corpus reader for corpora that consist of parenthesis-delineated parse trees.
+"""
+
+import sys
+
+from nltk.tree import Tree
+from nltk.tag import map_tag
+
+from nltk.corpus.reader.util import *
+from nltk.corpus.reader.api import *
+
+# we use [^\s()]+ instead of \S+? to avoid matching ()
+SORTTAGWRD = re.compile(r'\((\d+) ([^\s()]+) ([^\s()]+)\)')
+TAGWORD = re.compile(r'\(([^\s()]+) ([^\s()]+)\)')
+WORD = re.compile(r'\([^\s()]+ ([^\s()]+)\)')
+EMPTY_BRACKETS = re.compile(r'\s*\(\s*\(')
+
+
+class BracketParseCorpusReader(SyntaxCorpusReader):
+    """
+    Reader for corpora that consist of parenthesis-delineated parse trees,
+    like those found in the "combined" section of the Penn Treebank,
+    e.g. "(S (NP (DT the) (JJ little) (NN dog)) (VP (VBD barked)))".
+
+    """
+
+    def __init__(
+        self,
+        root,
+        fileids,
+        comment_char=None,
+        detect_blocks='unindented_paren',
+        encoding='utf8',
+        tagset=None,
+    ):
+        """
+        :param root: The root directory for this corpus.
+        :param fileids: A list or regexp specifying the fileids in this corpus.
+        :param comment_char: The character which can appear at the start of
+            a line to indicate that the rest of the line is a comment.
+        :param detect_blocks: The method that is used to find blocks
+          in the corpus; can be 'unindented_paren' (every unindented
+          parenthesis starts a new parse) or 'sexpr' (brackets are
+          matched).
+        :param tagset: The name of the tagset used by this corpus, to be used
+              for normalizing or converting the POS tags returned by the
+              tagged_...() methods.
+        """
+        # FIXME: Why is it inheritting from SyntaxCorpusReader but initializing
+        #       from CorpusReader?
+        CorpusReader.__init__(self, root, fileids, encoding)
+        self._comment_char = comment_char
+        self._detect_blocks = detect_blocks
+        self._tagset = tagset
+
+    def _read_block(self, stream):
+        if self._detect_blocks == 'sexpr':
+            return read_sexpr_block(stream, comment_char=self._comment_char)
+        elif self._detect_blocks == 'blankline':
+            return read_blankline_block(stream)
+        elif self._detect_blocks == 'unindented_paren':
+            # Tokens start with unindented left parens.
+            toks = read_regexp_block(stream, start_re=r'^\(')
+            # Strip any comments out of the tokens.
+            if self._comment_char:
+                toks = [
+                    re.sub('(?m)^%s.*' % re.escape(self._comment_char), '', tok)
+                    for tok in toks
+                ]
+            return toks
+        else:
+            assert 0, 'bad block type'
+
+    def _normalize(self, t):
+        # If there's an empty set of brackets surrounding the actual
+        # parse, then strip them off.
+        if EMPTY_BRACKETS.match(t):
+            t = t.strip()[1:-1]
+        # Replace leaves of the form (!), (,), with (! !), (, ,)
+        t = re.sub(r"\((.)\)", r"(\1 \1)", t)
+        # Replace leaves of the form (tag word root) with (tag word)
+        t = re.sub(r"\(([^\s()]+) ([^\s()]+) [^\s()]+\)", r"(\1 \2)", t)
+        return t
+
+    def _parse(self, t):
+        try:
+            return Tree.fromstring(self._normalize(t))
+
+        except ValueError as e:
+            sys.stderr.write("Bad tree detected; trying to recover...\n")
+            # Try to recover, if we can:
+            if e.args == ('mismatched parens',):
+                for n in range(1, 5):
+                    try:
+                        v = Tree(self._normalize(t + ')' * n))
+                        sys.stderr.write(
+                            "  Recovered by adding %d close " "paren(s)\n" % n
+                        )
+                        return v
+                    except ValueError:
+                        pass
+            # Try something else:
+            sys.stderr.write("  Recovered by returning a flat parse.\n")
+            # sys.stderr.write(' '.join(t.split())+'\n')
+            return Tree('S', self._tag(t))
+
+    def _tag(self, t, tagset=None):
+        tagged_sent = [(w, p) for (p, w) in TAGWORD.findall(self._normalize(t))]
+        if tagset and tagset != self._tagset:
+            tagged_sent = [
+                (w, map_tag(self._tagset, tagset, p)) for (w, p) in tagged_sent
+            ]
+        return tagged_sent
+
+    def _word(self, t):
+        return WORD.findall(self._normalize(t))
+
+
+class CategorizedBracketParseCorpusReader(
+    CategorizedCorpusReader, BracketParseCorpusReader
+):
+    """
+    A reader for parsed corpora whose documents are
+    divided into categories based on their file identifiers.
+    @author: Nathan Schneider <nschneid@cs.cmu.edu>
+    """
+
+    def __init__(self, *args, **kwargs):
+        """
+        Initialize the corpus reader.  Categorization arguments
+        (C{cat_pattern}, C{cat_map}, and C{cat_file}) are passed to
+        the L{CategorizedCorpusReader constructor
+        <CategorizedCorpusReader.__init__>}.  The remaining arguments
+        are passed to the L{BracketParseCorpusReader constructor
+        <BracketParseCorpusReader.__init__>}.
+        """
+        CategorizedCorpusReader.__init__(self, kwargs)
+        BracketParseCorpusReader.__init__(self, *args, **kwargs)
+
+    def _resolve(self, fileids, categories):
+        if fileids is not None and categories is not None:
+            raise ValueError('Specify fileids or categories, not both')
+        if categories is not None:
+            return self.fileids(categories)
+        else:
+            return fileids
+
+    def raw(self, fileids=None, categories=None):
+        return BracketParseCorpusReader.raw(self, self._resolve(fileids, categories))
+
+    def words(self, fileids=None, categories=None):
+        return BracketParseCorpusReader.words(self, self._resolve(fileids, categories))
+
+    def sents(self, fileids=None, categories=None):
+        return BracketParseCorpusReader.sents(self, self._resolve(fileids, categories))
+
+    def paras(self, fileids=None, categories=None):
+        return BracketParseCorpusReader.paras(self, self._resolve(fileids, categories))
+
+    def tagged_words(self, fileids=None, categories=None, tagset=None):
+        return BracketParseCorpusReader.tagged_words(
+            self, self._resolve(fileids, categories), tagset
+        )
+
+    def tagged_sents(self, fileids=None, categories=None, tagset=None):
+        return BracketParseCorpusReader.tagged_sents(
+            self, self._resolve(fileids, categories), tagset
+        )
+
+    def tagged_paras(self, fileids=None, categories=None, tagset=None):
+        return BracketParseCorpusReader.tagged_paras(
+            self, self._resolve(fileids, categories), tagset
+        )
+
+    def parsed_words(self, fileids=None, categories=None):
+        return BracketParseCorpusReader.parsed_words(
+            self, self._resolve(fileids, categories)
+        )
+
+    def parsed_sents(self, fileids=None, categories=None):
+        return BracketParseCorpusReader.parsed_sents(
+            self, self._resolve(fileids, categories)
+        )
+
+    def parsed_paras(self, fileids=None, categories=None):
+        return BracketParseCorpusReader.parsed_paras(
+            self, self._resolve(fileids, categories)
+        )
+
+
+class AlpinoCorpusReader(BracketParseCorpusReader):
+    """
+    Reader for the Alpino Dutch Treebank.
+    This corpus has a lexical breakdown structure embedded, as read by _parse
+    Unfortunately this puts punctuation and some other words out of the sentence
+    order in the xml element tree. This is no good for tag_ and word_
+    _tag and _word will be overridden to use a non-default new parameter 'ordered'
+    to the overridden _normalize function. The _parse function can then remain
+    untouched.
+    """
+
+    def __init__(self, root, encoding='ISO-8859-1', tagset=None):
+        BracketParseCorpusReader.__init__(
+            self,
+            root,
+            'alpino\.xml',
+            detect_blocks='blankline',
+            encoding=encoding,
+            tagset=tagset,
+        )
+
+    def _normalize(self, t, ordered=False):
+        """Normalize the xml sentence element in t.
+        The sentence elements <alpino_ds>, although embedded in a few overall
+        xml elements, are seperated by blank lines. That's how the reader can
+        deliver them one at a time.
+        Each sentence has a few category subnodes that are of no use to us.
+        The remaining word nodes may or may not appear in the proper order.
+        Each word node has attributes, among which:
+        - begin : the position of the word in the sentence
+        - pos   : Part of Speech: the Tag
+        - word  : the actual word
+        The return value is a string with all xml elementes replaced by
+        clauses: either a cat clause with nested clauses, or a word clause.
+        The order of the bracket clauses closely follows the xml.
+        If ordered == True, the word clauses include an order sequence number.
+        If ordered == False, the word clauses only have pos and word parts.
+        """
+        if t[:10] != "<alpino_ds":
+            return ""
+        # convert XML to sexpr notation
+        t = re.sub(r'  <node .*? cat="(\w+)".*>', r"(\1", t)
+        if ordered:
+            t = re.sub(
+                r'  <node. *?begin="(\d+)".*? pos="(\w+)".*? word="([^"]+)".*?/>',
+                r"(\1 \2 \3)",
+                t,
+            )
+        else:
+            t = re.sub(r'  <node .*?pos="(\w+)".*? word="([^"]+)".*?/>', r"(\1 \2)", t)
+        t = re.sub(r"  </node>", r")", t)
+        t = re.sub(r"<sentence>.*</sentence>", r"", t)
+        t = re.sub(r"</?alpino_ds.*>", r"", t)
+        return t
+
+    def _tag(self, t, tagset=None):
+        tagged_sent = [
+            (int(o), w, p)
+            for (o, p, w) in SORTTAGWRD.findall(self._normalize(t, ordered=True))
+        ]
+        tagged_sent.sort()
+        if tagset and tagset != self._tagset:
+            tagged_sent = [
+                (w, map_tag(self._tagset, tagset, p)) for (o, w, p) in tagged_sent
+            ]
+        else:
+            tagged_sent = [(w, p) for (o, w, p) in tagged_sent]
+        return tagged_sent
+
+    def _word(self, t):
+        """Return a correctly ordered list if words"""
+        tagged_sent = self._tag(t)
+        return [w for (w, p) in tagged_sent]
--- a/venv/lib/python3.7/site-packages/nltk/corpus/reader/categorized_sents.py
+++ b/venv/lib/python3.7/site-packages/nltk/corpus/reader/categorized_sents.py
@@ -0,0 +1,199 @@
+# Natural Language Toolkit: Categorized Sentences Corpus Reader
+#
+# Copyright (C) 2001-2019 NLTK Project
+# Author: Pierpaolo Pantone <24alsecondo@gmail.com>
+# URL: <http://nltk.org/>
+# For license information, see LICENSE.TXT
+
+"""
+CorpusReader structured for corpora that contain one instance on each row.
+This CorpusReader is specifically used for the Subjectivity Dataset and the
+Sentence Polarity Dataset.
+
+- Subjectivity Dataset information -
+
+Authors: Bo Pang and Lillian Lee.
+Url: http://www.cs.cornell.edu/people/pabo/movie-review-data
+
+Distributed with permission.
+
+Related papers:
+
+- Bo Pang and Lillian Lee. "A Sentimental Education: Sentiment Analysis Using
+    Subjectivity Summarization Based on Minimum Cuts". Proceedings of the ACL,
+    2004.
+
+- Sentence Polarity Dataset information -
+
+Authors: Bo Pang and Lillian Lee.
+Url: http://www.cs.cornell.edu/people/pabo/movie-review-data
+
+Related papers:
+
+- Bo Pang and Lillian Lee. "Seeing stars: Exploiting class relationships for
+    sentiment categorization with respect to rating scales". Proceedings of the
+    ACL, 2005.
+"""
+from six import string_types
+
+from nltk.corpus.reader.api import *
+from nltk.tokenize import *
+
+
+class CategorizedSentencesCorpusReader(CategorizedCorpusReader, CorpusReader):
+    """
+    A reader for corpora in which each row represents a single instance, mainly
+    a sentence. Istances are divided into categories based on their file identifiers
+    (see CategorizedCorpusReader).
+    Since many corpora allow rows that contain more than one sentence, it is
+    possible to specify a sentence tokenizer to retrieve all sentences instead
+    than all rows.
+
+    Examples using the Subjectivity Dataset:
+
+    >>> from nltk.corpus import subjectivity
+    >>> subjectivity.sents()[23]
+    ['television', 'made', 'him', 'famous', ',', 'but', 'his', 'biggest', 'hits',
+    'happened', 'off', 'screen', '.']
+    >>> subjectivity.categories()
+    ['obj', 'subj']
+    >>> subjectivity.words(categories='subj')
+    ['smart', 'and', 'alert', ',', 'thirteen', ...]
+
+    Examples using the Sentence Polarity Dataset:
+
+    >>> from nltk.corpus import sentence_polarity
+    >>> sentence_polarity.sents()
+    [['simplistic', ',', 'silly', 'and', 'tedious', '.'], ["it's", 'so', 'laddish',
+    'and', 'juvenile', ',', 'only', 'teenage', 'boys', 'could', 'possibly', 'find',
+    'it', 'funny', '.'], ...]
+    >>> sentence_polarity.categories()
+    ['neg', 'pos']
+    """
+
+    CorpusView = StreamBackedCorpusView
+
+    def __init__(
+        self,
+        root,
+        fileids,
+        word_tokenizer=WhitespaceTokenizer(),
+        sent_tokenizer=None,
+        encoding='utf8',
+        **kwargs
+    ):
+        """
+        :param root: The root directory for the corpus.
+        :param fileids: a list or regexp specifying the fileids in the corpus.
+        :param word_tokenizer: a tokenizer for breaking sentences or paragraphs
+            into words. Default: `WhitespaceTokenizer`
+        :param sent_tokenizer: a tokenizer for breaking paragraphs into sentences.
+        :param encoding: the encoding that should be used to read the corpus.
+        :param kwargs: additional parameters passed to CategorizedCorpusReader.
+        """
+
+        CorpusReader.__init__(self, root, fileids, encoding)
+        CategorizedCorpusReader.__init__(self, kwargs)
+        self._word_tokenizer = word_tokenizer
+        self._sent_tokenizer = sent_tokenizer
+
+    def _resolve(self, fileids, categories):
+        if fileids is not None and categories is not None:
+            raise ValueError('Specify fileids or categories, not both')
+        if categories is not None:
+            return self.fileids(categories)
+        else:
+            return fileids
+
+    def raw(self, fileids=None, categories=None):
+        """
+        :param fileids: a list or regexp specifying the fileids that have to be
+            returned as a raw string.
+        :param categories: a list specifying the categories whose files have to
+            be returned as a raw string.
+        :return: the given file(s) as a single string.
+        :rtype: str
+        """
+        fileids = self._resolve(fileids, categories)
+        if fileids is None:
+            fileids = self._fileids
+        elif isinstance(fileids, string_types):
+            fileids = [fileids]
+        return concat([self.open(f).read() for f in fileids])
+
+    def readme(self):
+        """
+        Return the contents of the corpus Readme.txt file.
+        """
+        return self.open("README").read()
+
+    def sents(self, fileids=None, categories=None):
+        """
+        Return all sentences in the corpus or in the specified file(s).
+
+        :param fileids: a list or regexp specifying the ids of the files whose
+            sentences have to be returned.
+        :param categories: a list specifying the categories whose sentences have
+            to be returned.
+        :return: the given file(s) as a list of sentences.
+            Each sentence is tokenized using the specified word_tokenizer.
+        :rtype: list(list(str))
+        """
+        fileids = self._resolve(fileids, categories)
+        if fileids is None:
+            fileids = self._fileids
+        elif isinstance(fileids, string_types):
+            fileids = [fileids]
+        return concat(
+            [
+                self.CorpusView(path, self._read_sent_block, encoding=enc)
+                for (path, enc, fileid) in self.abspaths(fileids, True, True)
+            ]
+        )
+
+    def words(self, fileids=None, categories=None):
+        """
+        Return all words and punctuation symbols in the corpus or in the specified
+        file(s).
+
+        :param fileids: a list or regexp specifying the ids of the files whose
+            words have to be returned.
+        :param categories: a list specifying the categories whose words have to
+            be returned.
+        :return: the given file(s) as a list of words and punctuation symbols.
+        :rtype: list(str)
+        """
+        fileids = self._resolve(fileids, categories)
+        if fileids is None:
+            fileids = self._fileids
+        elif isinstance(fileids, string_types):
+            fileids = [fileids]
+        return concat(
+            [
+                self.CorpusView(path, self._read_word_block, encoding=enc)
+                for (path, enc, fileid) in self.abspaths(fileids, True, True)
+            ]
+        )
+
+    def _read_sent_block(self, stream):
+        sents = []
+        for i in range(20):  # Read 20 lines at a time.
+            line = stream.readline()
+            if not line:
+                continue
+            if self._sent_tokenizer:
+                sents.extend(
+                    [
+                        self._word_tokenizer.tokenize(sent)
+                        for sent in self._sent_tokenizer.tokenize(line)
+                    ]
+                )
+            else:
+                sents.append(self._word_tokenizer.tokenize(line))
+        return sents
+
+    def _read_word_block(self, stream):
+        words = []
+        for sent in self._read_sent_block(stream):
+            words.extend(sent)
+        return words
--- a/venv/lib/python3.7/site-packages/nltk/corpus/reader/chasen.py
+++ b/venv/lib/python3.7/site-packages/nltk/corpus/reader/chasen.py
@@ -0,0 +1,171 @@
+#
+# Copyright (C) 2001-2019 NLTK Project
+# Author: Masato Hagiwara <hagisan@gmail.com>
+# URL: <http://nltk.org/>
+# For license information, see LICENSE.TXT
+
+# For more information, see http://lilyx.net/pages/nltkjapanesecorpus.html
+from __future__ import print_function
+
+import sys
+
+from six import string_types
+
+from nltk.corpus.reader import util
+
+from nltk.corpus.reader.util import *
+from nltk.corpus.reader.api import *
+
+
+class ChasenCorpusReader(CorpusReader):
+    def __init__(self, root, fileids, encoding='utf8', sent_splitter=None):
+        self._sent_splitter = sent_splitter
+        CorpusReader.__init__(self, root, fileids, encoding)
+
+    def raw(self, fileids=None):
+        if fileids is None:
+            fileids = self._fileids
+        elif isinstance(fileids, string_types):
+            fileids = [fileids]
+        return concat([self.open(f).read() for f in fileids])
+
+    def words(self, fileids=None):
+        return concat(
+            [
+                ChasenCorpusView(fileid, enc, False, False, False, self._sent_splitter)
+                for (fileid, enc) in self.abspaths(fileids, True)
+            ]
+        )
+
+    def tagged_words(self, fileids=None):
+        return concat(
+            [
+                ChasenCorpusView(fileid, enc, True, False, False, self._sent_splitter)
+                for (fileid, enc) in self.abspaths(fileids, True)
+            ]
+        )
+
+    def sents(self, fileids=None):
+        return concat(
+            [
+                ChasenCorpusView(fileid, enc, False, True, False, self._sent_splitter)
+                for (fileid, enc) in self.abspaths(fileids, True)
+            ]
+        )
+
+    def tagged_sents(self, fileids=None):
+        return concat(
+            [
+                ChasenCorpusView(fileid, enc, True, True, False, self._sent_splitter)
+                for (fileid, enc) in self.abspaths(fileids, True)
+            ]
+        )
+
+    def paras(self, fileids=None):
+        return concat(
+            [
+                ChasenCorpusView(fileid, enc, False, True, True, self._sent_splitter)
+                for (fileid, enc) in self.abspaths(fileids, True)
+            ]
+        )
+
+    def tagged_paras(self, fileids=None):
+        return concat(
+            [
+                ChasenCorpusView(fileid, enc, True, True, True, self._sent_splitter)
+                for (fileid, enc) in self.abspaths(fileids, True)
+            ]
+        )
+
+
+class ChasenCorpusView(StreamBackedCorpusView):
+    """
+    A specialized corpus view for ChasenReader. Similar to ``TaggedCorpusView``,
+    but this'll use fixed sets of word and sentence tokenizer.
+    """
+
+    def __init__(
+        self,
+        corpus_file,
+        encoding,
+        tagged,
+        group_by_sent,
+        group_by_para,
+        sent_splitter=None,
+    ):
+        self._tagged = tagged
+        self._group_by_sent = group_by_sent
+        self._group_by_para = group_by_para
+        self._sent_splitter = sent_splitter
+        StreamBackedCorpusView.__init__(self, corpus_file, encoding=encoding)
+
+    def read_block(self, stream):
+        """Reads one paragraph at a time."""
+        block = []
+        for para_str in read_regexp_block(stream, r".", r"^EOS\n"):
+
+            para = []
+
+            sent = []
+            for line in para_str.splitlines():
+
+                _eos = line.strip() == 'EOS'
+                _cells = line.split('\t')
+                w = (_cells[0], '\t'.join(_cells[1:]))
+                if not _eos:
+                    sent.append(w)
+
+                if _eos or (self._sent_splitter and self._sent_splitter(w)):
+                    if not self._tagged:
+                        sent = [w for (w, t) in sent]
+                    if self._group_by_sent:
+                        para.append(sent)
+                    else:
+                        para.extend(sent)
+                    sent = []
+
+            if len(sent) > 0:
+                if not self._tagged:
+                    sent = [w for (w, t) in sent]
+
+                if self._group_by_sent:
+                    para.append(sent)
+                else:
+                    para.extend(sent)
+
+            if self._group_by_para:
+                block.append(para)
+            else:
+                block.extend(para)
+
+        return block
+
+
+def demo():
+
+    import nltk
+    from nltk.corpus.util import LazyCorpusLoader
+
+    jeita = LazyCorpusLoader('jeita', ChasenCorpusReader, r'.*chasen', encoding='utf-8')
+    print('/'.join(jeita.words()[22100:22140]))
+
+    print(
+        '\nEOS\n'.join(
+            '\n'.join("%s/%s" % (w[0], w[1].split('\t')[2]) for w in sent)
+            for sent in jeita.tagged_sents()[2170:2173]
+        )
+    )
+
+
+def test():
+
+    from nltk.corpus.util import LazyCorpusLoader
+
+    jeita = LazyCorpusLoader('jeita', ChasenCorpusReader, r'.*chasen', encoding='utf-8')
+
+    assert isinstance(jeita.tagged_words()[0][1], string_types)
+
+
+if __name__ == '__main__':
+    demo()
+    test()
--- a/venv/lib/python3.7/site-packages/nltk/corpus/reader/childes.py
+++ b/venv/lib/python3.7/site-packages/nltk/corpus/reader/childes.py
@@ -0,0 +1,633 @@
+# CHILDES XML Corpus Reader
+
+# Copyright (C) 2001-2019 NLTK Project
+# Author: Tomonori Nagano <tnagano@gc.cuny.edu>
+#         Alexis Dimitriadis <A.Dimitriadis@uu.nl>
+# URL: <http://nltk.org/>
+# For license information, see LICENSE.TXT
+
+"""
+Corpus reader for the XML version of the CHILDES corpus.
+"""
+from __future__ import print_function, division
+
+__docformat__ = 'epytext en'
+
+import re
+from collections import defaultdict
+from six import string_types
+
+from nltk.util import flatten, LazyMap, LazyConcatenation
+
+from nltk.corpus.reader.util import concat
+from nltk.corpus.reader.xmldocs import XMLCorpusReader, ElementTree
+
+# to resolve the namespace issue
+NS = 'http://www.talkbank.org/ns/talkbank'
+
+
+class CHILDESCorpusReader(XMLCorpusReader):
+    """
+    Corpus reader for the XML version of the CHILDES corpus.
+    The CHILDES corpus is available at ``https://childes.talkbank.org/``. The XML
+    version of CHILDES is located at ``https://childes.talkbank.org/data-xml/``.
+    Copy the needed parts of the CHILDES XML corpus into the NLTK data directory
+    (``nltk_data/corpora/CHILDES/``).
+
+    For access to the file text use the usual nltk functions,
+    ``words()``, ``sents()``, ``tagged_words()`` and ``tagged_sents()``.
+    """
+
+    def __init__(self, root, fileids, lazy=True):
+        XMLCorpusReader.__init__(self, root, fileids)
+        self._lazy = lazy
+
+    def words(
+        self,
+        fileids=None,
+        speaker='ALL',
+        stem=False,
+        relation=False,
+        strip_space=True,
+        replace=False,
+    ):
+        """
+        :return: the given file(s) as a list of words
+        :rtype: list(str)
+
+        :param speaker: If specified, select specific speaker(s) defined
+            in the corpus. Default is 'ALL' (all participants). Common choices
+            are 'CHI' (the child), 'MOT' (mother), ['CHI','MOT'] (exclude
+            researchers)
+        :param stem: If true, then use word stems instead of word strings.
+        :param relation: If true, then return tuples of (stem, index,
+            dependent_index)
+        :param strip_space: If true, then strip trailing spaces from word
+            tokens. Otherwise, leave the spaces on the tokens.
+        :param replace: If true, then use the replaced (intended) word instead
+            of the original word (e.g., 'wat' will be replaced with 'watch')
+        """
+        sent = None
+        pos = False
+        if not self._lazy:
+            return [
+                self._get_words(
+                    fileid, speaker, sent, stem, relation, pos, strip_space, replace
+                )
+                for fileid in self.abspaths(fileids)
+            ]
+
+        get_words = lambda fileid: self._get_words(
+            fileid, speaker, sent, stem, relation, pos, strip_space, replace
+        )
+        return LazyConcatenation(LazyMap(get_words, self.abspaths(fileids)))
+
+    def tagged_words(
+        self,
+        fileids=None,
+        speaker='ALL',
+        stem=False,
+        relation=False,
+        strip_space=True,
+        replace=False,
+    ):
+        """
+        :return: the given file(s) as a list of tagged
+            words and punctuation symbols, encoded as tuples
+            ``(word,tag)``.
+        :rtype: list(tuple(str,str))
+
+        :param speaker: If specified, select specific speaker(s) defined
+            in the corpus. Default is 'ALL' (all participants). Common choices
+            are 'CHI' (the child), 'MOT' (mother), ['CHI','MOT'] (exclude
+            researchers)
+        :param stem: If true, then use word stems instead of word strings.
+        :param relation: If true, then return tuples of (stem, index,
+            dependent_index)
+        :param strip_space: If true, then strip trailing spaces from word
+            tokens. Otherwise, leave the spaces on the tokens.
+        :param replace: If true, then use the replaced (intended) word instead
+            of the original word (e.g., 'wat' will be replaced with 'watch')
+        """
+        sent = None
+        pos = True
+        if not self._lazy:
+            return [
+                self._get_words(
+                    fileid, speaker, sent, stem, relation, pos, strip_space, replace
+                )
+                for fileid in self.abspaths(fileids)
+            ]
+
+        get_words = lambda fileid: self._get_words(
+            fileid, speaker, sent, stem, relation, pos, strip_space, replace
+        )
+        return LazyConcatenation(LazyMap(get_words, self.abspaths(fileids)))
+
+    def sents(
+        self,
+        fileids=None,
+        speaker='ALL',
+        stem=False,
+        relation=None,
+        strip_space=True,
+        replace=False,
+    ):
+        """
+        :return: the given file(s) as a list of sentences or utterances, each
+            encoded as a list of word strings.
+        :rtype: list(list(str))
+
+        :param speaker: If specified, select specific speaker(s) defined
+            in the corpus. Default is 'ALL' (all participants). Common choices
+            are 'CHI' (the child), 'MOT' (mother), ['CHI','MOT'] (exclude
+            researchers)
+        :param stem: If true, then use word stems instead of word strings.
+        :param relation: If true, then return tuples of ``(str,pos,relation_list)``.
+            If there is manually-annotated relation info, it will return
+            tuples of ``(str,pos,test_relation_list,str,pos,gold_relation_list)``
+        :param strip_space: If true, then strip trailing spaces from word
+            tokens. Otherwise, leave the spaces on the tokens.
+        :param replace: If true, then use the replaced (intended) word instead
+            of the original word (e.g., 'wat' will be replaced with 'watch')
+        """
+        sent = True
+        pos = False
+        if not self._lazy:
+            return [
+                self._get_words(
+                    fileid, speaker, sent, stem, relation, pos, strip_space, replace
+                )
+                for fileid in self.abspaths(fileids)
+            ]
+
+        get_words = lambda fileid: self._get_words(
+            fileid, speaker, sent, stem, relation, pos, strip_space, replace
+        )
+        return LazyConcatenation(LazyMap(get_words, self.abspaths(fileids)))
+
+    def tagged_sents(
+        self,
+        fileids=None,
+        speaker='ALL',
+        stem=False,
+        relation=None,
+        strip_space=True,
+        replace=False,
+    ):
+        """
+        :return: the given file(s) as a list of
+            sentences, each encoded as a list of ``(word,tag)`` tuples.
+        :rtype: list(list(tuple(str,str)))
+
+        :param speaker: If specified, select specific speaker(s) defined
+            in the corpus. Default is 'ALL' (all participants). Common choices
+            are 'CHI' (the child), 'MOT' (mother), ['CHI','MOT'] (exclude
+            researchers)
+        :param stem: If true, then use word stems instead of word strings.
+        :param relation: If true, then return tuples of ``(str,pos,relation_list)``.
+            If there is manually-annotated relation info, it will return
+            tuples of ``(str,pos,test_relation_list,str,pos,gold_relation_list)``
+        :param strip_space: If true, then strip trailing spaces from word
+            tokens. Otherwise, leave the spaces on the tokens.
+        :param replace: If true, then use the replaced (intended) word instead
+            of the original word (e.g., 'wat' will be replaced with 'watch')
+        """
+        sent = True
+        pos = True
+        if not self._lazy:
+            return [
+                self._get_words(
+                    fileid, speaker, sent, stem, relation, pos, strip_space, replace
+                )
+                for fileid in self.abspaths(fileids)
+            ]
+
+        get_words = lambda fileid: self._get_words(
+            fileid, speaker, sent, stem, relation, pos, strip_space, replace
+        )
+        return LazyConcatenation(LazyMap(get_words, self.abspaths(fileids)))
+
+    def corpus(self, fileids=None):
+        """
+        :return: the given file(s) as a dict of ``(corpus_property_key, value)``
+        :rtype: list(dict)
+        """
+        if not self._lazy:
+            return [self._get_corpus(fileid) for fileid in self.abspaths(fileids)]
+        return LazyMap(self._get_corpus, self.abspaths(fileids))
+
+    def _get_corpus(self, fileid):
+        results = dict()
+        xmldoc = ElementTree.parse(fileid).getroot()
+        for key, value in xmldoc.items():
+            results[key] = value
+        return results
+
+    def participants(self, fileids=None):
+        """
+        :return: the given file(s) as a dict of
+            ``(participant_property_key, value)``
+        :rtype: list(dict)
+        """
+        if not self._lazy:
+            return [self._get_participants(fileid) for fileid in self.abspaths(fileids)]
+        return LazyMap(self._get_participants, self.abspaths(fileids))
+
+    def _get_participants(self, fileid):
+        # multidimensional dicts
+        def dictOfDicts():
+            return defaultdict(dictOfDicts)
+
+        xmldoc = ElementTree.parse(fileid).getroot()
+        # getting participants' data
+        pat = dictOfDicts()
+        for participant in xmldoc.findall(
+            './/{%s}Participants/{%s}participant' % (NS, NS)
+        ):
+            for (key, value) in participant.items():
+                pat[participant.get('id')][key] = value
+        return pat
+
+    def age(self, fileids=None, speaker='CHI', month=False):
+        """
+        :return: the given file(s) as string or int
+        :rtype: list or int
+
+        :param month: If true, return months instead of year-month-date
+        """
+        if not self._lazy:
+            return [
+                self._get_age(fileid, speaker, month)
+                for fileid in self.abspaths(fileids)
+            ]
+        get_age = lambda fileid: self._get_age(fileid, speaker, month)
+        return LazyMap(get_age, self.abspaths(fileids))
+
+    def _get_age(self, fileid, speaker, month):
+        xmldoc = ElementTree.parse(fileid).getroot()
+        for pat in xmldoc.findall('.//{%s}Participants/{%s}participant' % (NS, NS)):
+            try:
+                if pat.get('id') == speaker:
+                    age = pat.get('age')
+                    if month:
+                        age = self.convert_age(age)
+                    return age
+            # some files don't have age data
+            except (TypeError, AttributeError) as e:
+                return None
+
+    def convert_age(self, age_year):
+        "Caclculate age in months from a string in CHILDES format"
+        m = re.match("P(\d+)Y(\d+)M?(\d?\d?)D?", age_year)
+        age_month = int(m.group(1)) * 12 + int(m.group(2))
+        try:
+            if int(m.group(3)) > 15:
+                age_month += 1
+        # some corpora don't have age information?
+        except ValueError as e:
+            pass
+        return age_month
+
+    def MLU(self, fileids=None, speaker='CHI'):
+        """
+        :return: the given file(s) as a floating number
+        :rtype: list(float)
+        """
+        if not self._lazy:
+            return [
+                self._getMLU(fileid, speaker=speaker)
+                for fileid in self.abspaths(fileids)
+            ]
+        get_MLU = lambda fileid: self._getMLU(fileid, speaker=speaker)
+        return LazyMap(get_MLU, self.abspaths(fileids))
+
+    def _getMLU(self, fileid, speaker):
+        sents = self._get_words(
+            fileid,
+            speaker=speaker,
+            sent=True,
+            stem=True,
+            relation=False,
+            pos=True,
+            strip_space=True,
+            replace=True,
+        )
+        results = []
+        lastSent = []
+        numFillers = 0
+        sentDiscount = 0
+        for sent in sents:
+            posList = [pos for (word, pos) in sent]
+            # if any part of the sentence is intelligible
+            if any(pos == 'unk' for pos in posList):
+                continue
+            # if the sentence is null
+            elif sent == []:
+                continue
+            # if the sentence is the same as the last sent
+            elif sent == lastSent:
+                continue
+            else:
+                results.append([word for (word, pos) in sent])
+                # count number of fillers
+                if len(set(['co', None]).intersection(posList)) > 0:
+                    numFillers += posList.count('co')
+                    numFillers += posList.count(None)
+                    sentDiscount += 1
+            lastSent = sent
+        try:
+            thisWordList = flatten(results)
+            # count number of morphemes
+            # (e.g., 'read' = 1 morpheme but 'read-PAST' is 2 morphemes)
+            numWords = (
+                len(flatten([word.split('-') for word in thisWordList])) - numFillers
+            )
+            numSents = len(results) - sentDiscount
+            mlu = numWords / numSents
+        except ZeroDivisionError:
+            mlu = 0
+        # return {'mlu':mlu,'wordNum':numWords,'sentNum':numSents}
+        return mlu
+
+    def _get_words(
+        self, fileid, speaker, sent, stem, relation, pos, strip_space, replace
+    ):
+        if (
+            isinstance(speaker, string_types) and speaker != 'ALL'
+        ):  # ensure we have a list of speakers
+            speaker = [speaker]
+        xmldoc = ElementTree.parse(fileid).getroot()
+        # processing each xml doc
+        results = []
+        for xmlsent in xmldoc.findall('.//{%s}u' % NS):
+            sents = []
+            # select speakers
+            if speaker == 'ALL' or xmlsent.get('who') in speaker:
+                for xmlword in xmlsent.findall('.//{%s}w' % NS):
+                    infl = None
+                    suffixStem = None
+                    suffixTag = None
+                    # getting replaced words
+                    if replace and xmlsent.find('.//{%s}w/{%s}replacement' % (NS, NS)):
+                        xmlword = xmlsent.find(
+                            './/{%s}w/{%s}replacement/{%s}w' % (NS, NS, NS)
+                        )
+                    elif replace and xmlsent.find('.//{%s}w/{%s}wk' % (NS, NS)):
+                        xmlword = xmlsent.find('.//{%s}w/{%s}wk' % (NS, NS))
+                    # get text
+                    if xmlword.text:
+                        word = xmlword.text
+                    else:
+                        word = ''
+                    # strip tailing space
+                    if strip_space:
+                        word = word.strip()
+                    # stem
+                    if relation or stem:
+                        try:
+                            xmlstem = xmlword.find('.//{%s}stem' % NS)
+                            word = xmlstem.text
+                        except AttributeError as e:
+                            pass
+                        # if there is an inflection
+                        try:
+                            xmlinfl = xmlword.find(
+                                './/{%s}mor/{%s}mw/{%s}mk' % (NS, NS, NS)
+                            )
+                            word += '-' + xmlinfl.text
+                        except:
+                            pass
+                        # if there is a suffix
+                        try:
+                            xmlsuffix = xmlword.find(
+                                './/{%s}mor/{%s}mor-post/{%s}mw/{%s}stem'
+                                % (NS, NS, NS, NS)
+                            )
+                            suffixStem = xmlsuffix.text
+                        except AttributeError:
+                            suffixStem = ""
+                        if suffixStem:
+                            word += "~" + suffixStem
+                    # pos
+                    if relation or pos:
+                        try:
+                            xmlpos = xmlword.findall(".//{%s}c" % NS)
+                            xmlpos2 = xmlword.findall(".//{%s}s" % NS)
+                            if xmlpos2 != []:
+                                tag = xmlpos[0].text + ":" + xmlpos2[0].text
+                            else:
+                                tag = xmlpos[0].text
+                        except (AttributeError, IndexError) as e:
+                            tag = ""
+                        try:
+                            xmlsuffixpos = xmlword.findall(
+                                './/{%s}mor/{%s}mor-post/{%s}mw/{%s}pos/{%s}c'
+                                % (NS, NS, NS, NS, NS)
+                            )
+                            xmlsuffixpos2 = xmlword.findall(
+                                './/{%s}mor/{%s}mor-post/{%s}mw/{%s}pos/{%s}s'
+                                % (NS, NS, NS, NS, NS)
+                            )
+                            if xmlsuffixpos2:
+                                suffixTag = (
+                                    xmlsuffixpos[0].text + ":" + xmlsuffixpos2[0].text
+                                )
+                            else:
+                                suffixTag = xmlsuffixpos[0].text
+                        except:
+                            pass
+                        if suffixTag:
+                            tag += "~" + suffixTag
+                        word = (word, tag)
+                    # relational
+                    # the gold standard is stored in
+                    # <mor></mor><mor type="trn"><gra type="grt">
+                    if relation == True:
+                        for xmlstem_rel in xmlword.findall(
+                            './/{%s}mor/{%s}gra' % (NS, NS)
+                        ):
+                            if not xmlstem_rel.get('type') == 'grt':
+                                word = (
+                                    word[0],
+                                    word[1],
+                                    xmlstem_rel.get('index')
+                                    + "|"
+                                    + xmlstem_rel.get('head')
+                                    + "|"
+                                    + xmlstem_rel.get('relation'),
+                                )
+                            else:
+                                word = (
+                                    word[0],
+                                    word[1],
+                                    word[2],
+                                    word[0],
+                                    word[1],
+                                    xmlstem_rel.get('index')
+                                    + "|"
+                                    + xmlstem_rel.get('head')
+                                    + "|"
+                                    + xmlstem_rel.get('relation'),
+                                )
+                        try:
+                            for xmlpost_rel in xmlword.findall(
+                                './/{%s}mor/{%s}mor-post/{%s}gra' % (NS, NS, NS)
+                            ):
+                                if not xmlpost_rel.get('type') == 'grt':
+                                    suffixStem = (
+                                        suffixStem[0],
+                                        suffixStem[1],
+                                        xmlpost_rel.get('index')
+                                        + "|"
+                                        + xmlpost_rel.get('head')
+                                        + "|"
+                                        + xmlpost_rel.get('relation'),
+                                    )
+                                else:
+                                    suffixStem = (
+                                        suffixStem[0],
+                                        suffixStem[1],
+                                        suffixStem[2],
+                                        suffixStem[0],
+                                        suffixStem[1],
+                                        xmlpost_rel.get('index')
+                                        + "|"
+                                        + xmlpost_rel.get('head')
+                                        + "|"
+                                        + xmlpost_rel.get('relation'),
+                                    )
+                        except:
+                            pass
+                    sents.append(word)
+                if sent or relation:
+                    results.append(sents)
+                else:
+                    results.extend(sents)
+        return LazyMap(lambda x: x, results)
+
+    # Ready-to-use browser opener
+
+    """
+    The base URL for viewing files on the childes website. This
+    shouldn't need to be changed, unless CHILDES changes the configuration
+    of their server or unless the user sets up their own corpus webserver.
+    """
+    childes_url_base = r'https://childes.talkbank.org/browser/index.php?url='
+
+    def webview_file(self, fileid, urlbase=None):
+        """Map a corpus file to its web version on the CHILDES website,
+        and open it in a web browser.
+
+        The complete URL to be used is:
+            childes.childes_url_base + urlbase + fileid.replace('.xml', '.cha')
+
+        If no urlbase is passed, we try to calculate it.  This
+        requires that the childes corpus was set up to mirror the
+        folder hierarchy under childes.psy.cmu.edu/data-xml/, e.g.:
+        nltk_data/corpora/childes/Eng-USA/Cornell/??? or
+        nltk_data/corpora/childes/Romance/Spanish/Aguirre/???
+
+        The function first looks (as a special case) if "Eng-USA" is
+        on the path consisting of <corpus root>+fileid; then if
+        "childes", possibly followed by "data-xml", appears. If neither
+        one is found, we use the unmodified fileid and hope for the best.
+        If this is not right, specify urlbase explicitly, e.g., if the
+        corpus root points to the Cornell folder, urlbase='Eng-USA/Cornell'.
+        """
+
+        import webbrowser
+
+        if urlbase:
+            path = urlbase + "/" + fileid
+        else:
+            full = self.root + "/" + fileid
+            full = re.sub(r'\\', '/', full)
+            if '/childes/' in full.lower():
+                # Discard /data-xml/ if present
+                path = re.findall(r'(?i)/childes(?:/data-xml)?/(.*)\.xml', full)[0]
+            elif 'eng-usa' in full.lower():
+                path = 'Eng-USA/' + re.findall(r'/(?i)Eng-USA/(.*)\.xml', full)[0]
+            else:
+                path = fileid
+
+        # Strip ".xml" and add ".cha", as necessary:
+        if path.endswith('.xml'):
+            path = path[:-4]
+
+        if not path.endswith('.cha'):
+            path = path + '.cha'
+
+        url = self.childes_url_base + path
+
+        webbrowser.open_new_tab(url)
+        print("Opening in browser:", url)
+        # Pausing is a good idea, but it's up to the user...
+        # raw_input("Hit Return to continue")
+
+
+def demo(corpus_root=None):
+    """
+    The CHILDES corpus should be manually downloaded and saved
+    to ``[NLTK_Data_Dir]/corpora/childes/``
+    """
+    if not corpus_root:
+        from nltk.data import find
+
+        corpus_root = find('corpora/childes/data-xml/Eng-USA/')
+
+    try:
+        childes = CHILDESCorpusReader(corpus_root, '.*.xml')
+        # describe all corpus
+        for file in childes.fileids()[:5]:
+            corpus = ''
+            corpus_id = ''
+            for (key, value) in childes.corpus(file)[0].items():
+                if key == "Corpus":
+                    corpus = value
+                if key == "Id":
+                    corpus_id = value
+            print('Reading', corpus, corpus_id, ' .....')
+            print("words:", childes.words(file)[:7], "...")
+            print(
+                "words with replaced words:",
+                childes.words(file, replace=True)[:7],
+                " ...",
+            )
+            print("words with pos tags:", childes.tagged_words(file)[:7], " ...")
+            print("words (only MOT):", childes.words(file, speaker='MOT')[:7], "...")
+            print("words (only CHI):", childes.words(file, speaker='CHI')[:7], "...")
+            print("stemmed words:", childes.words(file, stem=True)[:7], " ...")
+            print(
+                "words with relations and pos-tag:",
+                childes.words(file, relation=True)[:5],
+                " ...",
+            )
+            print("sentence:", childes.sents(file)[:2], " ...")
+            for (participant, values) in childes.participants(file)[0].items():
+                for (key, value) in values.items():
+                    print("\tparticipant", participant, key, ":", value)
+            print("num of sent:", len(childes.sents(file)))
+            print("num of morphemes:", len(childes.words(file, stem=True)))
+            print("age:", childes.age(file))
+            print("age in month:", childes.age(file, month=True))
+            print("MLU:", childes.MLU(file))
+            print()
+
+    except LookupError as e:
+        print(
+            """The CHILDES corpus, or the parts you need, should be manually
+        downloaded from https://childes.talkbank.org/data-xml/ and saved at
+        [NLTK_Data_Dir]/corpora/childes/
+            Alternately, you can call the demo with the path to a portion of the CHILDES corpus, e.g.:
+        demo('/path/to/childes/data-xml/Eng-USA/")
+        """
+        )
+        # corpus_root_http = urllib2.urlopen('https://childes.talkbank.org/data-xml/Eng-USA/Bates.zip')
+        # corpus_root_http_bates = zipfile.ZipFile(cStringIO.StringIO(corpus_root_http.read()))
+        ##this fails
+        # childes = CHILDESCorpusReader(corpus_root_http_bates,corpus_root_http_bates.namelist())
+
+
+if __name__ == "__main__":
+    demo()
--- a/venv/lib/python3.7/site-packages/nltk/corpus/reader/chunked.py
+++ b/venv/lib/python3.7/site-packages/nltk/corpus/reader/chunked.py
@@ -0,0 +1,285 @@
+# Natural Language Toolkit: Chunked Corpus Reader
+#
+# Copyright (C) 2001-2019 NLTK Project
+# Author: Steven Bird <stevenbird1@gmail.com>
+#         Edward Loper <edloper@gmail.com>
+# URL: <http://nltk.org/>
+# For license information, see LICENSE.TXT
+
+"""
+A reader for corpora that contain chunked (and optionally tagged)
+documents.
+"""
+
+import os.path, codecs
+
+from six import string_types
+
+import nltk
+from nltk.corpus.reader.bracket_parse import BracketParseCorpusReader
+from nltk.tree import Tree
+from nltk.tokenize import *
+from nltk.chunk import tagstr2tree
+from nltk.corpus.reader.util import *
+from nltk.corpus.reader.api import *
+
+
+class ChunkedCorpusReader(CorpusReader):
+    """
+    Reader for chunked (and optionally tagged) corpora.  Paragraphs
+    are split using a block reader.  They are then tokenized into
+    sentences using a sentence tokenizer.  Finally, these sentences
+    are parsed into chunk trees using a string-to-chunktree conversion
+    function.  Each of these steps can be performed using a default
+    function or a custom function.  By default, paragraphs are split
+    on blank lines; sentences are listed one per line; and sentences
+    are parsed into chunk trees using ``nltk.chunk.tagstr2tree``.
+    """
+
+    def __init__(
+        self,
+        root,
+        fileids,
+        extension='',
+        str2chunktree=tagstr2tree,
+        sent_tokenizer=RegexpTokenizer('\n', gaps=True),
+        para_block_reader=read_blankline_block,
+        encoding='utf8',
+        tagset=None,
+    ):
+        """
+        :param root: The root directory for this corpus.
+        :param fileids: A list or regexp specifying the fileids in this corpus.
+        """
+        CorpusReader.__init__(self, root, fileids, encoding)
+        self._cv_args = (str2chunktree, sent_tokenizer, para_block_reader, tagset)
+        """Arguments for corpus views generated by this corpus: a tuple
+        (str2chunktree, sent_tokenizer, para_block_tokenizer)"""
+
+    def raw(self, fileids=None):
+        """
+        :return: the given file(s) as a single string.
+        :rtype: str
+        """
+        if fileids is None:
+            fileids = self._fileids
+        elif isinstance(fileids, string_types):
+            fileids = [fileids]
+        return concat([self.open(f).read() for f in fileids])
+
+    def words(self, fileids=None):
+        """
+        :return: the given file(s) as a list of words
+            and punctuation symbols.
+        :rtype: list(str)
+        """
+        return concat(
+            [
+                ChunkedCorpusView(f, enc, 0, 0, 0, 0, *self._cv_args)
+                for (f, enc) in self.abspaths(fileids, True)
+            ]
+        )
+
+    def sents(self, fileids=None):
+        """
+        :return: the given file(s) as a list of
+            sentences or utterances, each encoded as a list of word
+            strings.
+        :rtype: list(list(str))
+        """
+        return concat(
+            [
+                ChunkedCorpusView(f, enc, 0, 1, 0, 0, *self._cv_args)
+                for (f, enc) in self.abspaths(fileids, True)
+            ]
+        )
+
+    def paras(self, fileids=None):
+        """
+        :return: the given file(s) as a list of
+            paragraphs, each encoded as a list of sentences, which are
+            in turn encoded as lists of word strings.
+        :rtype: list(list(list(str)))
+        """
+        return concat(
+            [
+                ChunkedCorpusView(f, enc, 0, 1, 1, 0, *self._cv_args)
+                for (f, enc) in self.abspaths(fileids, True)
+            ]
+        )
+
+    def tagged_words(self, fileids=None, tagset=None):
+        """
+        :return: the given file(s) as a list of tagged
+            words and punctuation symbols, encoded as tuples
+            ``(word,tag)``.
+        :rtype: list(tuple(str,str))
+        """
+        return concat(
+            [
+                ChunkedCorpusView(
+                    f, enc, 1, 0, 0, 0, *self._cv_args, target_tagset=tagset
+                )
+                for (f, enc) in self.abspaths(fileids, True)
+            ]
+        )
+
+    def tagged_sents(self, fileids=None, tagset=None):
+        """
+        :return: the given file(s) as a list of
+            sentences, each encoded as a list of ``(word,tag)`` tuples.
+
+        :rtype: list(list(tuple(str,str)))
+        """
+        return concat(
+            [
+                ChunkedCorpusView(
+                    f, enc, 1, 1, 0, 0, *self._cv_args, target_tagset=tagset
+                )
+                for (f, enc) in self.abspaths(fileids, True)
+            ]
+        )
+
+    def tagged_paras(self, fileids=None, tagset=None):
+        """
+        :return: the given file(s) as a list of
+            paragraphs, each encoded as a list of sentences, which are
+            in turn encoded as lists of ``(word,tag)`` tuples.
+        :rtype: list(list(list(tuple(str,str))))
+        """
+        return concat(
+            [
+                ChunkedCorpusView(
+                    f, enc, 1, 1, 1, 0, *self._cv_args, target_tagset=tagset
+                )
+                for (f, enc) in self.abspaths(fileids, True)
+            ]
+        )
+
+    def chunked_words(self, fileids=None, tagset=None):
+        """
+        :return: the given file(s) as a list of tagged
+            words and chunks.  Words are encoded as ``(word, tag)``
+            tuples (if the corpus has tags) or word strings (if the
+            corpus has no tags).  Chunks are encoded as depth-one
+            trees over ``(word,tag)`` tuples or word strings.
+        :rtype: list(tuple(str,str) and Tree)
+        """
+        return concat(
+            [
+                ChunkedCorpusView(
+                    f, enc, 1, 0, 0, 1, *self._cv_args, target_tagset=tagset
+                )
+                for (f, enc) in self.abspaths(fileids, True)
+            ]
+        )
+
+    def chunked_sents(self, fileids=None, tagset=None):
+        """
+        :return: the given file(s) as a list of
+            sentences, each encoded as a shallow Tree.  The leaves
+            of these trees are encoded as ``(word, tag)`` tuples (if
+            the corpus has tags) or word strings (if the corpus has no
+            tags).
+        :rtype: list(Tree)
+        """
+        return concat(
+            [
+                ChunkedCorpusView(
+                    f, enc, 1, 1, 0, 1, *self._cv_args, target_tagset=tagset
+                )
+                for (f, enc) in self.abspaths(fileids, True)
+            ]
+        )
+
+    def chunked_paras(self, fileids=None, tagset=None):
+        """
+        :return: the given file(s) as a list of
+            paragraphs, each encoded as a list of sentences, which are
+            in turn encoded as a shallow Tree.  The leaves of these
+            trees are encoded as ``(word, tag)`` tuples (if the corpus
+            has tags) or word strings (if the corpus has no tags).
+        :rtype: list(list(Tree))
+        """
+        return concat(
+            [
+                ChunkedCorpusView(
+                    f, enc, 1, 1, 1, 1, *self._cv_args, target_tagset=tagset
+                )
+                for (f, enc) in self.abspaths(fileids, True)
+            ]
+        )
+
+    def _read_block(self, stream):
+        return [tagstr2tree(t) for t in read_blankline_block(stream)]
+
+
+class ChunkedCorpusView(StreamBackedCorpusView):
+    def __init__(
+        self,
+        fileid,
+        encoding,
+        tagged,
+        group_by_sent,
+        group_by_para,
+        chunked,
+        str2chunktree,
+        sent_tokenizer,
+        para_block_reader,
+        source_tagset=None,
+        target_tagset=None,
+    ):
+        StreamBackedCorpusView.__init__(self, fileid, encoding=encoding)
+        self._tagged = tagged
+        self._group_by_sent = group_by_sent
+        self._group_by_para = group_by_para
+        self._chunked = chunked
+        self._str2chunktree = str2chunktree
+        self._sent_tokenizer = sent_tokenizer
+        self._para_block_reader = para_block_reader
+        self._source_tagset = source_tagset
+        self._target_tagset = target_tagset
+
+    def read_block(self, stream):
+        block = []
+        for para_str in self._para_block_reader(stream):
+            para = []
+            for sent_str in self._sent_tokenizer.tokenize(para_str):
+                sent = self._str2chunktree(
+                    sent_str,
+                    source_tagset=self._source_tagset,
+                    target_tagset=self._target_tagset,
+                )
+
+                # If requested, throw away the tags.
+                if not self._tagged:
+                    sent = self._untag(sent)
+
+                # If requested, throw away the chunks.
+                if not self._chunked:
+                    sent = sent.leaves()
+
+                # Add the sentence to `para`.
+                if self._group_by_sent:
+                    para.append(sent)
+                else:
+                    para.extend(sent)
+
+            # Add the paragraph to `block`.
+            if self._group_by_para:
+                block.append(para)
+            else:
+                block.extend(para)
+
+        # Return the block
+        return block
+
+    def _untag(self, tree):
+        for i, child in enumerate(tree):
+            if isinstance(child, Tree):
+                self._untag(child)
+            elif isinstance(child, tuple):
+                tree[i] = child[0]
+            else:
+                raise ValueError('expected child to be Tree or tuple')
+        return tree
--- a/venv/lib/python3.7/site-packages/nltk/corpus/reader/cmudict.py
+++ b/venv/lib/python3.7/site-packages/nltk/corpus/reader/cmudict.py
@@ -0,0 +1,99 @@
+# Natural Language Toolkit: Carnegie Mellon Pronouncing Dictionary Corpus Reader
+#
+# Copyright (C) 2001-2019 NLTK Project
+# Author: Steven Bird <stevenbird1@gmail.com>
+# URL: <http://nltk.org/>
+# For license information, see LICENSE.TXT
+
+"""
+The Carnegie Mellon Pronouncing Dictionary [cmudict.0.6]
+ftp://ftp.cs.cmu.edu/project/speech/dict/
+Copyright 1998 Carnegie Mellon University
+
+File Format: Each line consists of an uppercased word, a counter
+(for alternative pronunciations), and a transcription.  Vowels are
+marked for stress (1=primary, 2=secondary, 0=no stress).  E.g.:
+NATURAL 1 N AE1 CH ER0 AH0 L
+
+The dictionary contains 127069 entries.  Of these, 119400 words are assigned
+a unique pronunciation, 6830 words have two pronunciations, and 839 words have
+three or more pronunciations.  Many of these are fast-speech variants.
+
+Phonemes: There are 39 phonemes, as shown below:
+
+Phoneme Example Translation    Phoneme Example Translation
+------- ------- -----------    ------- ------- -----------
+AA      odd     AA D           AE      at      AE T
+AH      hut     HH AH T        AO      ought   AO T
+AW      cow     K AW           AY      hide    HH AY D
+B       be      B IY           CH      cheese  CH IY Z
+D       dee     D IY           DH      thee    DH IY
+EH      Ed      EH D           ER      hurt    HH ER T
+EY      ate     EY T           F       fee     F IY
+G       green   G R IY N       HH      he      HH IY
+IH      it      IH T           IY      eat     IY T
+JH      gee     JH IY          K       key     K IY
+L       lee     L IY           M       me      M IY
+N       knee    N IY           NG      ping    P IH NG
+OW      oat     OW T           OY      toy     T OY
+P       pee     P IY           R       read    R IY D
+S       sea     S IY           SH      she     SH IY
+T       tea     T IY           TH      theta   TH EY T AH
+UH      hood    HH UH D        UW      two     T UW
+V       vee     V IY           W       we      W IY
+Y       yield   Y IY L D       Z       zee     Z IY
+ZH      seizure S IY ZH ER
+"""
+
+from nltk import compat
+from nltk.util import Index
+
+from nltk.corpus.reader.util import *
+from nltk.corpus.reader.api import *
+
+
+class CMUDictCorpusReader(CorpusReader):
+    def entries(self):
+        """
+        :return: the cmudict lexicon as a list of entries
+        containing (word, transcriptions) tuples.
+        """
+        return concat(
+            [
+                StreamBackedCorpusView(fileid, read_cmudict_block, encoding=enc)
+                for fileid, enc in self.abspaths(None, True)
+            ]
+        )
+
+    def raw(self):
+        """
+        :return: the cmudict lexicon as a raw string.
+        """
+        fileids = self._fileids
+        if isinstance(fileids, string_types):
+            fileids = [fileids]
+        return concat([self.open(f).read() for f in fileids])
+
+    def words(self):
+        """
+        :return: a list of all words defined in the cmudict lexicon.
+        """
+        return [word.lower() for (word, _) in self.entries()]
+
+    def dict(self):
+        """
+        :return: the cmudict lexicon as a dictionary, whose keys are
+        lowercase words and whose values are lists of pronunciations.
+        """
+        return dict(Index(self.entries()))
+
+
+def read_cmudict_block(stream):
+    entries = []
+    while len(entries) < 100:  # Read 100 at a time.
+        line = stream.readline()
+        if line == '':
+            return entries  # end of file.
+        pieces = line.split()
+        entries.append((pieces[0].lower(), pieces[2:]))
+    return entries
--- a/venv/lib/python3.7/site-packages/nltk/corpus/reader/comparative_sents.py
+++ b/venv/lib/python3.7/site-packages/nltk/corpus/reader/comparative_sents.py
@@ -0,0 +1,328 @@
+# Natural Language Toolkit: Comparative Sentence Corpus Reader
+#
+# Copyright (C) 2001-2019 NLTK Project
+# Author: Pierpaolo Pantone <24alsecondo@gmail.com>
+# URL: <http://nltk.org/>
+# For license information, see LICENSE.TXT
+
+"""
+CorpusReader for the Comparative Sentence Dataset.
+
+- Comparative Sentence Dataset information -
+
+Annotated by: Nitin Jindal and Bing Liu, 2006.
+              Department of Computer Sicence
+              University of Illinois at Chicago
+
+Contact: Nitin Jindal, njindal@cs.uic.edu
+         Bing Liu, liub@cs.uic.edu (http://www.cs.uic.edu/~liub)
+
+Distributed with permission.
+
+Related papers:
+
+- Nitin Jindal and Bing Liu. "Identifying Comparative Sentences in Text Documents".
+   Proceedings of the ACM SIGIR International Conference on Information Retrieval
+   (SIGIR-06), 2006.
+
+- Nitin Jindal and Bing Liu. "Mining Comprative Sentences and Relations".
+   Proceedings of Twenty First National Conference on Artificial Intelligence
+   (AAAI-2006), 2006.
+
+- Murthy Ganapathibhotla and Bing Liu. "Mining Opinions in Comparative Sentences".
+    Proceedings of the 22nd International Conference on Computational Linguistics
+    (Coling-2008), Manchester, 18-22 August, 2008.
+"""
+import re
+
+from six import string_types
+
+from nltk.corpus.reader.api import *
+from nltk.tokenize import *
+
+# Regular expressions for dataset components
+STARS = re.compile(r'^\*+$')
+COMPARISON = re.compile(r'<cs-[1234]>')
+CLOSE_COMPARISON = re.compile(r'</cs-[1234]>')
+GRAD_COMPARISON = re.compile(r'<cs-[123]>')
+NON_GRAD_COMPARISON = re.compile(r'<cs-4>')
+ENTITIES_FEATS = re.compile(r"(\d)_((?:[\.\w\s/-](?!\d_))+)")
+KEYWORD = re.compile(r'\((?!.*\()(.*)\)$')
+
+
+class Comparison(object):
+    """
+    A Comparison represents a comparative sentence and its constituents.
+    """
+
+    def __init__(
+        self,
+        text=None,
+        comp_type=None,
+        entity_1=None,
+        entity_2=None,
+        feature=None,
+        keyword=None,
+    ):
+        """
+        :param text: a string (optionally tokenized) containing a comparation.
+        :param comp_type: an integer defining the type of comparison expressed.
+            Values can be: 1 (Non-equal gradable), 2 (Equative), 3 (Superlative),
+            4 (Non-gradable).
+        :param entity_1: the first entity considered in the comparison relation.
+        :param entity_2: the second entity considered in the comparison relation.
+        :param feature: the feature considered in the comparison relation.
+        :param keyword: the word or phrase which is used for that comparative relation.
+        """
+        self.text = text
+        self.comp_type = comp_type
+        self.entity_1 = entity_1
+        self.entity_2 = entity_2
+        self.feature = feature
+        self.keyword = keyword
+
+    def __repr__(self):
+        return (
+            "Comparison(text=\"{}\", comp_type={}, entity_1=\"{}\", entity_2=\"{}\", "
+            "feature=\"{}\", keyword=\"{}\")"
+        ).format(
+            self.text,
+            self.comp_type,
+            self.entity_1,
+            self.entity_2,
+            self.feature,
+            self.keyword,
+        )
+
+
+class ComparativeSentencesCorpusReader(CorpusReader):
+    """
+    Reader for the Comparative Sentence Dataset by Jindal and Liu (2006).
+
+        >>> from nltk.corpus import comparative_sentences
+        >>> comparison = comparative_sentences.comparisons()[0]
+        >>> comparison.text
+        ['its', 'fast-forward', 'and', 'rewind', 'work', 'much', 'more', 'smoothly',
+        'and', 'consistently', 'than', 'those', 'of', 'other', 'models', 'i', "'ve",
+        'had', '.']
+        >>> comparison.entity_2
+        'models'
+        >>> (comparison.feature, comparison.keyword)
+        ('rewind', 'more')
+        >>> len(comparative_sentences.comparisons())
+        853
+    """
+
+    CorpusView = StreamBackedCorpusView
+
+    def __init__(
+        self,
+        root,
+        fileids,
+        word_tokenizer=WhitespaceTokenizer(),
+        sent_tokenizer=None,
+        encoding='utf8',
+    ):
+        """
+        :param root: The root directory for this corpus.
+        :param fileids: a list or regexp specifying the fileids in this corpus.
+        :param word_tokenizer: tokenizer for breaking sentences or paragraphs
+            into words. Default: `WhitespaceTokenizer`
+        :param sent_tokenizer: tokenizer for breaking paragraphs into sentences.
+        :param encoding: the encoding that should be used to read the corpus.
+        """
+
+        CorpusReader.__init__(self, root, fileids, encoding)
+        self._word_tokenizer = word_tokenizer
+        self._sent_tokenizer = sent_tokenizer
+
+    def comparisons(self, fileids=None):
+        """
+        Return all comparisons in the corpus.
+
+        :param fileids: a list or regexp specifying the ids of the files whose
+            comparisons have to be returned.
+        :return: the given file(s) as a list of Comparison objects.
+        :rtype: list(Comparison)
+        """
+        if fileids is None:
+            fileids = self._fileids
+        elif isinstance(fileids, string_types):
+            fileids = [fileids]
+        return concat(
+            [
+                self.CorpusView(path, self._read_comparison_block, encoding=enc)
+                for (path, enc, fileid) in self.abspaths(fileids, True, True)
+            ]
+        )
+
+    def keywords(self, fileids=None):
+        """
+        Return a set of all keywords used in the corpus.
+
+        :param fileids: a list or regexp specifying the ids of the files whose
+            keywords have to be returned.
+        :return: the set of keywords and comparative phrases used in the corpus.
+        :rtype: set(str)
+        """
+        all_keywords = concat(
+            [
+                self.CorpusView(path, self._read_keyword_block, encoding=enc)
+                for (path, enc, fileid) in self.abspaths(fileids, True, True)
+            ]
+        )
+
+        keywords_set = set(keyword.lower() for keyword in all_keywords if keyword)
+        return keywords_set
+
+    def keywords_readme(self):
+        """
+        Return the list of words and constituents considered as clues of a
+        comparison (from listOfkeywords.txt).
+        """
+        keywords = []
+        raw_text = self.open("listOfkeywords.txt").read()
+        for line in raw_text.split("\n"):
+            if not line or line.startswith("//"):
+                continue
+            keywords.append(line.strip())
+        return keywords
+
+    def raw(self, fileids=None):
+        """
+        :param fileids: a list or regexp specifying the fileids that have to be
+            returned as a raw string.
+        :return: the given file(s) as a single string.
+        :rtype: str
+        """
+        if fileids is None:
+            fileids = self._fileids
+        elif isinstance(fileids, string_types):
+            fileids = [fileids]
+        return concat([self.open(f).read() for f in fileids])
+
+    def readme(self):
+        """
+        Return the contents of the corpus readme file.
+        """
+        return self.open("README.txt").read()
+
+    def sents(self, fileids=None):
+        """
+        Return all sentences in the corpus.
+
+        :param fileids: a list or regexp specifying the ids of the files whose
+            sentences have to be returned.
+        :return: all sentences of the corpus as lists of tokens (or as plain
+            strings, if no word tokenizer is specified).
+        :rtype: list(list(str)) or list(str)
+        """
+        return concat(
+            [
+                self.CorpusView(path, self._read_sent_block, encoding=enc)
+                for (path, enc, fileid) in self.abspaths(fileids, True, True)
+            ]
+        )
+
+    def words(self, fileids=None):
+        """
+        Return all words and punctuation symbols in the corpus.
+
+        :param fileids: a list or regexp specifying the ids of the files whose
+            words have to be returned.
+        :return: the given file(s) as a list of words and punctuation symbols.
+        :rtype: list(str)
+        """
+        return concat(
+            [
+                self.CorpusView(path, self._read_word_block, encoding=enc)
+                for (path, enc, fileid) in self.abspaths(fileids, True, True)
+            ]
+        )
+
+    def _read_comparison_block(self, stream):
+        while True:
+            line = stream.readline()
+            if not line:
+                return []  # end of file.
+            comparison_tags = re.findall(COMPARISON, line)
+            if comparison_tags:
+                grad_comparisons = re.findall(GRAD_COMPARISON, line)
+                non_grad_comparisons = re.findall(NON_GRAD_COMPARISON, line)
+                # Advance to the next line (it contains the comparative sentence)
+                comparison_text = stream.readline().strip()
+                if self._word_tokenizer:
+                    comparison_text = self._word_tokenizer.tokenize(comparison_text)
+                # Skip the next line (it contains closing comparison tags)
+                stream.readline()
+                # If gradable comparisons are found, create Comparison instances
+                # and populate their fields
+                comparison_bundle = []
+                if grad_comparisons:
+                    # Each comparison tag has its own relations on a separate line
+                    for comp in grad_comparisons:
+                        comp_type = int(re.match(r'<cs-(\d)>', comp).group(1))
+                        comparison = Comparison(
+                            text=comparison_text, comp_type=comp_type
+                        )
+                        line = stream.readline()
+                        entities_feats = ENTITIES_FEATS.findall(line)
+                        if entities_feats:
+                            for (code, entity_feat) in entities_feats:
+                                if code == '1':
+                                    comparison.entity_1 = entity_feat.strip()
+                                elif code == '2':
+                                    comparison.entity_2 = entity_feat.strip()
+                                elif code == '3':
+                                    comparison.feature = entity_feat.strip()
+                        keyword = KEYWORD.findall(line)
+                        if keyword:
+                            comparison.keyword = keyword[0]
+                        comparison_bundle.append(comparison)
+                # If non-gradable comparisons are found, create a simple Comparison
+                # instance for each one
+                if non_grad_comparisons:
+                    for comp in non_grad_comparisons:
+                        # comp_type in this case should always be 4.
+                        comp_type = int(re.match(r'<cs-(\d)>', comp).group(1))
+                        comparison = Comparison(
+                            text=comparison_text, comp_type=comp_type
+                        )
+                        comparison_bundle.append(comparison)
+                # Flatten the list of comparisons before returning them
+                # return concat([comparison_bundle])
+                return comparison_bundle
+
+    def _read_keyword_block(self, stream):
+        keywords = []
+        for comparison in self._read_comparison_block(stream):
+            keywords.append(comparison.keyword)
+        return keywords
+
+    def _read_sent_block(self, stream):
+        while True:
+            line = stream.readline()
+            if re.match(STARS, line):
+                while True:
+                    line = stream.readline()
+                    if re.match(STARS, line):
+                        break
+                continue
+            if (
+                not re.findall(COMPARISON, line)
+                and not ENTITIES_FEATS.findall(line)
+                and not re.findall(CLOSE_COMPARISON, line)
+            ):
+                if self._sent_tokenizer:
+                    return [
+                        self._word_tokenizer.tokenize(sent)
+                        for sent in self._sent_tokenizer.tokenize(line)
+                    ]
+                else:
+                    return [self._word_tokenizer.tokenize(line)]
+
+    def _read_word_block(self, stream):
+        words = []
+        for sent in self._read_sent_block(stream):
+            words.extend(sent)
+        return words
--- a/venv/lib/python3.7/site-packages/nltk/corpus/reader/conll.py
+++ b/venv/lib/python3.7/site-packages/nltk/corpus/reader/conll.py
@@ -0,0 +1,592 @@
+# Natural Language Toolkit: CONLL Corpus Reader
+#
+# Copyright (C) 2001-2019 NLTK Project
+# Author: Steven Bird <stevenbird1@gmail.com>
+#         Edward Loper <edloper@gmail.com>
+# URL: <http://nltk.org/>
+# For license information, see LICENSE.TXT
+
+"""
+Read CoNLL-style chunk fileids.
+"""
+
+from __future__ import unicode_literals
+
+import textwrap
+
+from nltk import compat
+from nltk.tree import Tree
+from nltk.util import LazyMap, LazyConcatenation
+from nltk.tag import map_tag
+
+from nltk.corpus.reader.util import *
+from nltk.corpus.reader.api import *
+
+
+class ConllCorpusReader(CorpusReader):
+    """
+    A corpus reader for CoNLL-style files.  These files consist of a
+    series of sentences, separated by blank lines.  Each sentence is
+    encoded using a table (or "grid") of values, where each line
+    corresponds to a single word, and each column corresponds to an
+    annotation type.  The set of columns used by CoNLL-style files can
+    vary from corpus to corpus; the ``ConllCorpusReader`` constructor
+    therefore takes an argument, ``columntypes``, which is used to
+    specify the columns that are used by a given corpus. By default
+    columns are split by consecutive whitespaces, with the
+    ``separator`` argument you can set a string to split by (e.g.
+    ``\'\t\'``).
+
+
+    @todo: Add support for reading from corpora where different
+        parallel files contain different columns.
+    @todo: Possibly add caching of the grid corpus view?  This would
+        allow the same grid view to be used by different data access
+        methods (eg words() and parsed_sents() could both share the
+        same grid corpus view object).
+    @todo: Better support for -DOCSTART-.  Currently, we just ignore
+        it, but it could be used to define methods that retrieve a
+        document at a time (eg parsed_documents()).
+    """
+
+    # /////////////////////////////////////////////////////////////////
+    # Column Types
+    # /////////////////////////////////////////////////////////////////
+
+    WORDS = 'words'  #: column type for words
+    POS = 'pos'  #: column type for part-of-speech tags
+    TREE = 'tree'  #: column type for parse trees
+    CHUNK = 'chunk'  #: column type for chunk structures
+    NE = 'ne'  #: column type for named entities
+    SRL = 'srl'  #: column type for semantic role labels
+    IGNORE = 'ignore'  #: column type for column that should be ignored
+
+    #: A list of all column types supported by the conll corpus reader.
+    COLUMN_TYPES = (WORDS, POS, TREE, CHUNK, NE, SRL, IGNORE)
+
+    # /////////////////////////////////////////////////////////////////
+    # Constructor
+    # /////////////////////////////////////////////////////////////////
+
+    def __init__(
+        self,
+        root,
+        fileids,
+        columntypes,
+        chunk_types=None,
+        root_label='S',
+        pos_in_tree=False,
+        srl_includes_roleset=True,
+        encoding='utf8',
+        tree_class=Tree,
+        tagset=None,
+        separator=None,
+    ):
+        for columntype in columntypes:
+            if columntype not in self.COLUMN_TYPES:
+                raise ValueError('Bad column type %r' % columntype)
+        if isinstance(chunk_types, string_types):
+            chunk_types = [chunk_types]
+        self._chunk_types = chunk_types
+        self._colmap = dict((c, i) for (i, c) in enumerate(columntypes))
+        self._pos_in_tree = pos_in_tree
+        self._root_label = root_label  # for chunks
+        self._srl_includes_roleset = srl_includes_roleset
+        self._tree_class = tree_class
+        CorpusReader.__init__(self, root, fileids, encoding)
+        self._tagset = tagset
+        self.sep = separator
+
+    # /////////////////////////////////////////////////////////////////
+    # Data Access Methods
+    # /////////////////////////////////////////////////////////////////
+
+    def raw(self, fileids=None):
+        if fileids is None:
+            fileids = self._fileids
+        elif isinstance(fileids, string_types):
+            fileids = [fileids]
+        return concat([self.open(f).read() for f in fileids])
+
+    def words(self, fileids=None):
+        self._require(self.WORDS)
+        return LazyConcatenation(LazyMap(self._get_words, self._grids(fileids)))
+
+    def sents(self, fileids=None):
+        self._require(self.WORDS)
+        return LazyMap(self._get_words, self._grids(fileids))
+
+    def tagged_words(self, fileids=None, tagset=None):
+        self._require(self.WORDS, self.POS)
+
+        def get_tagged_words(grid):
+            return self._get_tagged_words(grid, tagset)
+
+        return LazyConcatenation(LazyMap(get_tagged_words, self._grids(fileids)))
+
+    def tagged_sents(self, fileids=None, tagset=None):
+        self._require(self.WORDS, self.POS)
+
+        def get_tagged_words(grid):
+            return self._get_tagged_words(grid, tagset)
+
+        return LazyMap(get_tagged_words, self._grids(fileids))
+
+    def chunked_words(self, fileids=None, chunk_types=None, tagset=None):
+        self._require(self.WORDS, self.POS, self.CHUNK)
+        if chunk_types is None:
+            chunk_types = self._chunk_types
+
+        def get_chunked_words(grid):  # capture chunk_types as local var
+            return self._get_chunked_words(grid, chunk_types, tagset)
+
+        return LazyConcatenation(LazyMap(get_chunked_words, self._grids(fileids)))
+
+    def chunked_sents(self, fileids=None, chunk_types=None, tagset=None):
+        self._require(self.WORDS, self.POS, self.CHUNK)
+        if chunk_types is None:
+            chunk_types = self._chunk_types
+
+        def get_chunked_words(grid):  # capture chunk_types as local var
+            return self._get_chunked_words(grid, chunk_types, tagset)
+
+        return LazyMap(get_chunked_words, self._grids(fileids))
+
+    def parsed_sents(self, fileids=None, pos_in_tree=None, tagset=None):
+        self._require(self.WORDS, self.POS, self.TREE)
+        if pos_in_tree is None:
+            pos_in_tree = self._pos_in_tree
+
+        def get_parsed_sent(grid):  # capture pos_in_tree as local var
+            return self._get_parsed_sent(grid, pos_in_tree, tagset)
+
+        return LazyMap(get_parsed_sent, self._grids(fileids))
+
+    def srl_spans(self, fileids=None):
+        self._require(self.SRL)
+        return LazyMap(self._get_srl_spans, self._grids(fileids))
+
+    def srl_instances(self, fileids=None, pos_in_tree=None, flatten=True):
+        self._require(self.WORDS, self.POS, self.TREE, self.SRL)
+        if pos_in_tree is None:
+            pos_in_tree = self._pos_in_tree
+
+        def get_srl_instances(grid):  # capture pos_in_tree as local var
+            return self._get_srl_instances(grid, pos_in_tree)
+
+        result = LazyMap(get_srl_instances, self._grids(fileids))
+        if flatten:
+            result = LazyConcatenation(result)
+        return result
+
+    def iob_words(self, fileids=None, tagset=None):
+        """
+        :return: a list of word/tag/IOB tuples
+        :rtype: list(tuple)
+        :param fileids: the list of fileids that make up this corpus
+        :type fileids: None or str or list
+        """
+        self._require(self.WORDS, self.POS, self.CHUNK)
+
+        def get_iob_words(grid):
+            return self._get_iob_words(grid, tagset)
+
+        return LazyConcatenation(LazyMap(get_iob_words, self._grids(fileids)))
+
+    def iob_sents(self, fileids=None, tagset=None):
+        """
+        :return: a list of lists of word/tag/IOB tuples
+        :rtype: list(list)
+        :param fileids: the list of fileids that make up this corpus
+        :type fileids: None or str or list
+        """
+        self._require(self.WORDS, self.POS, self.CHUNK)
+
+        def get_iob_words(grid):
+            return self._get_iob_words(grid, tagset)
+
+        return LazyMap(get_iob_words, self._grids(fileids))
+
+    # /////////////////////////////////////////////////////////////////
+    # Grid Reading
+    # /////////////////////////////////////////////////////////////////
+
+    def _grids(self, fileids=None):
+        # n.b.: we could cache the object returned here (keyed on
+        # fileids), which would let us reuse the same corpus view for
+        # different things (eg srl and parse trees).
+        return concat(
+            [
+                StreamBackedCorpusView(fileid, self._read_grid_block, encoding=enc)
+                for (fileid, enc) in self.abspaths(fileids, True)
+            ]
+        )
+
+    def _read_grid_block(self, stream):
+        grids = []
+        for block in read_blankline_block(stream):
+            block = block.strip()
+            if not block:
+                continue
+
+            grid = [line.split(self.sep) for line in block.split('\n')]
+
+            # If there's a docstart row, then discard. ([xx] eventually it
+            # would be good to actually use it)
+            if grid[0][self._colmap.get('words', 0)] == '-DOCSTART-':
+                del grid[0]
+
+            # Check that the grid is consistent.
+            for row in grid:
+                if len(row) != len(grid[0]):
+                    raise ValueError('Inconsistent number of columns:\n%s' % block)
+            grids.append(grid)
+        return grids
+
+    # /////////////////////////////////////////////////////////////////
+    # Transforms
+    # /////////////////////////////////////////////////////////////////
+    # given a grid, transform it into some representation (e.g.,
+    # a list of words or a parse tree).
+
+    def _get_words(self, grid):
+        return self._get_column(grid, self._colmap['words'])
+
+    def _get_tagged_words(self, grid, tagset=None):
+        pos_tags = self._get_column(grid, self._colmap['pos'])
+        if tagset and tagset != self._tagset:
+            pos_tags = [map_tag(self._tagset, tagset, t) for t in pos_tags]
+        return list(zip(self._get_column(grid, self._colmap['words']), pos_tags))
+
+    def _get_iob_words(self, grid, tagset=None):
+        pos_tags = self._get_column(grid, self._colmap['pos'])
+        if tagset and tagset != self._tagset:
+            pos_tags = [map_tag(self._tagset, tagset, t) for t in pos_tags]
+        return list(
+            zip(
+                self._get_column(grid, self._colmap['words']),
+                pos_tags,
+                self._get_column(grid, self._colmap['chunk']),
+            )
+        )
+
+    def _get_chunked_words(self, grid, chunk_types, tagset=None):
+        # n.b.: this method is very similar to conllstr2tree.
+        words = self._get_column(grid, self._colmap['words'])
+        pos_tags = self._get_column(grid, self._colmap['pos'])
+        if tagset and tagset != self._tagset:
+            pos_tags = [map_tag(self._tagset, tagset, t) for t in pos_tags]
+        chunk_tags = self._get_column(grid, self._colmap['chunk'])
+
+        stack = [Tree(self._root_label, [])]
+
+        for (word, pos_tag, chunk_tag) in zip(words, pos_tags, chunk_tags):
+            if chunk_tag == 'O':
+                state, chunk_type = 'O', ''
+            else:
+                (state, chunk_type) = chunk_tag.split('-')
+            # If it's a chunk we don't care about, treat it as O.
+            if chunk_types is not None and chunk_type not in chunk_types:
+                state = 'O'
+            # Treat a mismatching I like a B.
+            if state == 'I' and chunk_type != stack[-1].label():
+                state = 'B'
+            # For B or I: close any open chunks
+            if state in 'BO' and len(stack) == 2:
+                stack.pop()
+            # For B: start a new chunk.
+            if state == 'B':
+                new_chunk = Tree(chunk_type, [])
+                stack[-1].append(new_chunk)
+                stack.append(new_chunk)
+            # Add the word token.
+            stack[-1].append((word, pos_tag))
+
+        return stack[0]
+
+    def _get_parsed_sent(self, grid, pos_in_tree, tagset=None):
+        words = self._get_column(grid, self._colmap['words'])
+        pos_tags = self._get_column(grid, self._colmap['pos'])
+        if tagset and tagset != self._tagset:
+            pos_tags = [map_tag(self._tagset, tagset, t) for t in pos_tags]
+        parse_tags = self._get_column(grid, self._colmap['tree'])
+
+        treestr = ''
+        for (word, pos_tag, parse_tag) in zip(words, pos_tags, parse_tags):
+            if word == '(':
+                word = '-LRB-'
+            if word == ')':
+                word = '-RRB-'
+            if pos_tag == '(':
+                pos_tag = '-LRB-'
+            if pos_tag == ')':
+                pos_tag = '-RRB-'
+            (left, right) = parse_tag.split('*')
+            right = right.count(')') * ')'  # only keep ')'.
+            treestr += '%s (%s %s) %s' % (left, pos_tag, word, right)
+        try:
+            tree = self._tree_class.fromstring(treestr)
+        except (ValueError, IndexError):
+            tree = self._tree_class.fromstring('(%s %s)' % (self._root_label, treestr))
+
+        if not pos_in_tree:
+            for subtree in tree.subtrees():
+                for i, child in enumerate(subtree):
+                    if (
+                        isinstance(child, Tree)
+                        and len(child) == 1
+                        and isinstance(child[0], string_types)
+                    ):
+                        subtree[i] = (child[0], child.label())
+
+        return tree
+
+    def _get_srl_spans(self, grid):
+        """
+        list of list of (start, end), tag) tuples
+        """
+        if self._srl_includes_roleset:
+            predicates = self._get_column(grid, self._colmap['srl'] + 1)
+            start_col = self._colmap['srl'] + 2
+        else:
+            predicates = self._get_column(grid, self._colmap['srl'])
+            start_col = self._colmap['srl'] + 1
+
+        # Count how many predicates there are.  This tells us how many
+        # columns to expect for SRL data.
+        num_preds = len([p for p in predicates if p != '-'])
+
+        spanlists = []
+        for i in range(num_preds):
+            col = self._get_column(grid, start_col + i)
+            spanlist = []
+            stack = []
+            for wordnum, srl_tag in enumerate(col):
+                (left, right) = srl_tag.split('*')
+                for tag in left.split('('):
+                    if tag:
+                        stack.append((tag, wordnum))
+                for i in range(right.count(')')):
+                    (tag, start) = stack.pop()
+                    spanlist.append(((start, wordnum + 1), tag))
+            spanlists.append(spanlist)
+
+        return spanlists
+
+    def _get_srl_instances(self, grid, pos_in_tree):
+        tree = self._get_parsed_sent(grid, pos_in_tree)
+        spanlists = self._get_srl_spans(grid)
+        if self._srl_includes_roleset:
+            predicates = self._get_column(grid, self._colmap['srl'] + 1)
+            rolesets = self._get_column(grid, self._colmap['srl'])
+        else:
+            predicates = self._get_column(grid, self._colmap['srl'])
+            rolesets = [None] * len(predicates)
+
+        instances = ConllSRLInstanceList(tree)
+        for wordnum, predicate in enumerate(predicates):
+            if predicate == '-':
+                continue
+            # Decide which spanlist to use.  Don't assume that they're
+            # sorted in the same order as the predicates (even though
+            # they usually are).
+            for spanlist in spanlists:
+                for (start, end), tag in spanlist:
+                    if wordnum in range(start, end) and tag in ('V', 'C-V'):
+                        break
+                else:
+                    continue
+                break
+            else:
+                raise ValueError('No srl column found for %r' % predicate)
+            instances.append(
+                ConllSRLInstance(tree, wordnum, predicate, rolesets[wordnum], spanlist)
+            )
+
+        return instances
+
+    # /////////////////////////////////////////////////////////////////
+    # Helper Methods
+    # /////////////////////////////////////////////////////////////////
+
+    def _require(self, *columntypes):
+        for columntype in columntypes:
+            if columntype not in self._colmap:
+                raise ValueError(
+                    'This corpus does not contain a %s ' 'column.' % columntype
+                )
+
+    @staticmethod
+    def _get_column(grid, column_index):
+        return [grid[i][column_index] for i in range(len(grid))]
+
+
+@compat.python_2_unicode_compatible
+class ConllSRLInstance(object):
+    """
+    An SRL instance from a CoNLL corpus, which identifies and
+    providing labels for the arguments of a single verb.
+    """
+
+    # [xx] add inst.core_arguments, inst.argm_arguments?
+
+    def __init__(self, tree, verb_head, verb_stem, roleset, tagged_spans):
+        self.verb = []
+        """A list of the word indices of the words that compose the
+           verb whose arguments are identified by this instance.
+           This will contain multiple word indices when multi-word
+           verbs are used (e.g. 'turn on')."""
+
+        self.verb_head = verb_head
+        """The word index of the head word of the verb whose arguments
+           are identified by this instance.  E.g., for a sentence that
+           uses the verb 'turn on,' ``verb_head`` will be the word index
+           of the word 'turn'."""
+
+        self.verb_stem = verb_stem
+
+        self.roleset = roleset
+
+        self.arguments = []
+        """A list of ``(argspan, argid)`` tuples, specifying the location
+           and type for each of the arguments identified by this
+           instance.  ``argspan`` is a tuple ``start, end``, indicating
+           that the argument consists of the ``words[start:end]``."""
+
+        self.tagged_spans = tagged_spans
+        """A list of ``(span, id)`` tuples, specifying the location and
+           type for each of the arguments, as well as the verb pieces,
+           that make up this instance."""
+
+        self.tree = tree
+        """The parse tree for the sentence containing this instance."""
+
+        self.words = tree.leaves()
+        """A list of the words in the sentence containing this
+           instance."""
+
+        # Fill in the self.verb and self.arguments values.
+        for (start, end), tag in tagged_spans:
+            if tag in ('V', 'C-V'):
+                self.verb += list(range(start, end))
+            else:
+                self.arguments.append(((start, end), tag))
+
+    def __repr__(self):
+        # Originally, its:
+        ##plural = 's' if len(self.arguments) != 1 else ''
+        plural = 's' if len(self.arguments) != 1 else ''
+        return '<ConllSRLInstance for %r with %d argument%s>' % (
+            (self.verb_stem, len(self.arguments), plural)
+        )
+
+    def pprint(self):
+        verbstr = ' '.join(self.words[i][0] for i in self.verb)
+        hdr = 'SRL for %r (stem=%r):\n' % (verbstr, self.verb_stem)
+        s = ''
+        for i, word in enumerate(self.words):
+            if isinstance(word, tuple):
+                word = word[0]
+            for (start, end), argid in self.arguments:
+                if i == start:
+                    s += '[%s ' % argid
+                if i == end:
+                    s += '] '
+            if i in self.verb:
+                word = '<<%s>>' % word
+            s += word + ' '
+        return hdr + textwrap.fill(
+            s.replace(' ]', ']'), initial_indent='    ', subsequent_indent='    '
+        )
+
+
+@compat.python_2_unicode_compatible
+class ConllSRLInstanceList(list):
+    """
+    Set of instances for a single sentence
+    """
+
+    def __init__(self, tree, instances=()):
+        self.tree = tree
+        list.__init__(self, instances)
+
+    def __str__(self):
+        return self.pprint()
+
+    def pprint(self, include_tree=False):
+        # Sanity check: trees should be the same
+        for inst in self:
+            if inst.tree != self.tree:
+                raise ValueError('Tree mismatch!')
+
+        # If desired, add trees:
+        if include_tree:
+            words = self.tree.leaves()
+            pos = [None] * len(words)
+            synt = ['*'] * len(words)
+            self._tree2conll(self.tree, 0, words, pos, synt)
+
+        s = ''
+        for i in range(len(words)):
+            # optional tree columns
+            if include_tree:
+                s += '%-20s ' % words[i]
+                s += '%-8s ' % pos[i]
+                s += '%15s*%-8s ' % tuple(synt[i].split('*'))
+
+            # verb head column
+            for inst in self:
+                if i == inst.verb_head:
+                    s += '%-20s ' % inst.verb_stem
+                    break
+            else:
+                s += '%-20s ' % '-'
+            # Remaining columns: self
+            for inst in self:
+                argstr = '*'
+                for (start, end), argid in inst.tagged_spans:
+                    if i == start:
+                        argstr = '(%s%s' % (argid, argstr)
+                    if i == (end - 1):
+                        argstr += ')'
+                s += '%-12s ' % argstr
+            s += '\n'
+        return s
+
+    def _tree2conll(self, tree, wordnum, words, pos, synt):
+        assert isinstance(tree, Tree)
+        if len(tree) == 1 and isinstance(tree[0], string_types):
+            pos[wordnum] = tree.label()
+            assert words[wordnum] == tree[0]
+            return wordnum + 1
+        elif len(tree) == 1 and isinstance(tree[0], tuple):
+            assert len(tree[0]) == 2
+            pos[wordnum], pos[wordnum] = tree[0]
+            return wordnum + 1
+        else:
+            synt[wordnum] = '(%s%s' % (tree.label(), synt[wordnum])
+            for child in tree:
+                wordnum = self._tree2conll(child, wordnum, words, pos, synt)
+            synt[wordnum - 1] += ')'
+            return wordnum
+
+
+class ConllChunkCorpusReader(ConllCorpusReader):
+    """
+    A ConllCorpusReader whose data file contains three columns: words,
+    pos, and chunk.
+    """
+
+    def __init__(
+        self, root, fileids, chunk_types, encoding='utf8', tagset=None, separator=None
+    ):
+        ConllCorpusReader.__init__(
+            self,
+            root,
+            fileids,
+            ('words', 'pos', 'chunk'),
+            chunk_types=chunk_types,
+            encoding=encoding,
+            tagset=tagset,
+            separator=separator,
+        )
--- a/venv/lib/python3.7/site-packages/nltk/corpus/reader/crubadan.py
+++ b/venv/lib/python3.7/site-packages/nltk/corpus/reader/crubadan.py
@@ -0,0 +1,119 @@
+# -*- coding: utf-8 -*-
+# Natural Language Toolkit: An Crubadan N-grams Reader
+#
+# Copyright (C) 2001-2019 NLTK Project
+# Author: Avital Pekker <avital.pekker@utoronto.ca>
+#
+# URL: <http://nltk.org/>
+# For license information, see LICENSE.TXT
+
+"""
+An NLTK interface for the n-gram statistics gathered from
+the corpora for each language using An Crubadan.
+
+There are multiple potential applications for the data but
+this reader was created with the goal of using it in the
+context of language identification.
+
+For details about An Crubadan, this data, and its potential uses, see:
+http://borel.slu.edu/crubadan/index.html
+"""
+
+from __future__ import print_function, unicode_literals
+
+import re
+from os import path
+
+from nltk.compat import PY3
+from nltk.corpus.reader import CorpusReader
+from nltk.probability import FreqDist
+from nltk.data import ZipFilePathPointer
+
+
+class CrubadanCorpusReader(CorpusReader):
+    """
+    A corpus reader used to access language An Crubadan n-gram files.
+    """
+
+    _LANG_MAPPER_FILE = 'table.txt'
+    _all_lang_freq = {}
+
+    def __init__(self, root, fileids, encoding='utf8', tagset=None):
+        super(CrubadanCorpusReader, self).__init__(root, fileids, encoding='utf8')
+        self._lang_mapping_data = []
+        self._load_lang_mapping_data()
+
+    def lang_freq(self, lang):
+        ''' Return n-gram FreqDist for a specific language
+            given ISO 639-3 language code '''
+
+        if lang not in self._all_lang_freq:
+            self._all_lang_freq[lang] = self._load_lang_ngrams(lang)
+
+        return self._all_lang_freq[lang]
+
+    def langs(self):
+        ''' Return a list of supported languages as ISO 639-3 codes '''
+        return [row[1] for row in self._lang_mapping_data]
+
+    def iso_to_crubadan(self, lang):
+        ''' Return internal Crubadan code based on ISO 639-3 code '''
+        for i in self._lang_mapping_data:
+            if i[1].lower() == lang.lower():
+                return i[0]
+
+    def crubadan_to_iso(self, lang):
+        ''' Return ISO 639-3 code given internal Crubadan code '''
+        for i in self._lang_mapping_data:
+            if i[0].lower() == lang.lower():
+                return i[1]
+
+    def _load_lang_mapping_data(self):
+        ''' Load language mappings between codes and description from table.txt '''
+        if isinstance(self.root, ZipFilePathPointer):
+            raise RuntimeError(
+                "Please install the 'crubadan' corpus first, use nltk.download()"
+            )
+
+        mapper_file = path.join(self.root, self._LANG_MAPPER_FILE)
+        if self._LANG_MAPPER_FILE not in self.fileids():
+            raise RuntimeError("Could not find language mapper file: " + mapper_file)
+
+        if PY3:
+            raw = open(mapper_file, 'r', encoding='utf-8').read().strip()
+        else:
+            raw = open(mapper_file, 'rU').read().decode('utf-8').strip()
+
+        self._lang_mapping_data = [row.split('\t') for row in raw.split('\n')]
+
+    def _load_lang_ngrams(self, lang):
+        ''' Load single n-gram language file given the ISO 639-3 language code
+            and return its FreqDist '''
+
+        if lang not in self.langs():
+            raise RuntimeError("Unsupported language.")
+
+        crubadan_code = self.iso_to_crubadan(lang)
+        ngram_file = path.join(self.root, crubadan_code + '-3grams.txt')
+
+        if not path.isfile(ngram_file):
+            raise RuntimeError("No N-gram file found for requested language.")
+
+        counts = FreqDist()
+        if PY3:
+            f = open(ngram_file, 'r', encoding='utf-8')
+        else:
+            f = open(ngram_file, 'rU')
+
+        for line in f:
+            if PY3:
+                data = line.split(' ')
+            else:
+                data = line.decode('utf8').split(' ')
+
+            ngram = data[1].strip('\n')
+            freq = int(data[0])
+
+            counts[ngram] = freq
+
+        return counts
--- a/venv/lib/python3.7/site-packages/nltk/corpus/reader/dependency.py
+++ b/venv/lib/python3.7/site-packages/nltk/corpus/reader/dependency.py
@@ -0,0 +1,134 @@
+# Natural Language Toolkit: Dependency Corpus Reader
+#
+# Copyright (C) 2001-2019 NLTK Project
+# Author: Kepa Sarasola <kepa.sarasola@ehu.es>
+#         Iker Manterola <returntothehangar@hotmail.com>
+#
+# URL: <http://nltk.org/>
+# For license information, see LICENSE.TXT
+
+import codecs
+
+from nltk.parse import DependencyGraph
+from nltk.tokenize import *
+
+from nltk.corpus.reader.util import *
+from nltk.corpus.reader.api import *
+
+
+class DependencyCorpusReader(SyntaxCorpusReader):
+    def __init__(
+        self,
+        root,
+        fileids,
+        encoding='utf8',
+        word_tokenizer=TabTokenizer(),
+        sent_tokenizer=RegexpTokenizer('\n', gaps=True),
+        para_block_reader=read_blankline_block,
+    ):
+        # FIXME: Why is it inheritting from SyntaxCorpusReader but initializing
+        #       from CorpusReader?
+        CorpusReader.__init__(self, root, fileids, encoding)
+
+    #########################################################
+
+    def raw(self, fileids=None):
+        """
+        :return: the given file(s) as a single string.
+        :rtype: str
+        """
+        result = []
+        for fileid, encoding in self.abspaths(fileids, include_encoding=True):
+            if isinstance(fileid, PathPointer):
+                result.append(fileid.open(encoding=encoding).read())
+            else:
+                with codecs.open(fileid, "r", encoding) as fp:
+                    result.append(fp.read())
+        return concat(result)
+
+    def words(self, fileids=None):
+        return concat(
+            [
+                DependencyCorpusView(fileid, False, False, False, encoding=enc)
+                for fileid, enc in self.abspaths(fileids, include_encoding=True)
+            ]
+        )
+
+    def tagged_words(self, fileids=None):
+        return concat(
+            [
+                DependencyCorpusView(fileid, True, False, False, encoding=enc)
+                for fileid, enc in self.abspaths(fileids, include_encoding=True)
+            ]
+        )
+
+    def sents(self, fileids=None):
+        return concat(
+            [
+                DependencyCorpusView(fileid, False, True, False, encoding=enc)
+                for fileid, enc in self.abspaths(fileids, include_encoding=True)
+            ]
+        )
+
+    def tagged_sents(self, fileids=None):
+        return concat(
+            [
+                DependencyCorpusView(fileid, True, True, False, encoding=enc)
+                for fileid, enc in self.abspaths(fileids, include_encoding=True)
+            ]
+        )
+
+    def parsed_sents(self, fileids=None):
+        sents = concat(
+            [
+                DependencyCorpusView(fileid, False, True, True, encoding=enc)
+                for fileid, enc in self.abspaths(fileids, include_encoding=True)
+            ]
+        )
+        return [DependencyGraph(sent) for sent in sents]
+
+
+class DependencyCorpusView(StreamBackedCorpusView):
+    _DOCSTART = '-DOCSTART- -DOCSTART- O\n'  # dokumentu hasiera definitzen da
+
+    def __init__(
+        self,
+        corpus_file,
+        tagged,
+        group_by_sent,
+        dependencies,
+        chunk_types=None,
+        encoding='utf8',
+    ):
+        self._tagged = tagged
+        self._dependencies = dependencies
+        self._group_by_sent = group_by_sent
+        self._chunk_types = chunk_types
+        StreamBackedCorpusView.__init__(self, corpus_file, encoding=encoding)
+
+    def read_block(self, stream):
+        # Read the next sentence.
+        sent = read_blankline_block(stream)[0].strip()
+        # Strip off the docstart marker, if present.
+        if sent.startswith(self._DOCSTART):
+            sent = sent[len(self._DOCSTART) :].lstrip()
+
+        # extract word and tag from any of the formats
+        if not self._dependencies:
+            lines = [line.split('\t') for line in sent.split('\n')]
+            if len(lines[0]) == 3 or len(lines[0]) == 4:
+                sent = [(line[0], line[1]) for line in lines]
+            elif len(lines[0]) == 10:
+                sent = [(line[1], line[4]) for line in lines]
+            else:
+                raise ValueError('Unexpected number of fields in dependency tree file')
+
+            # discard tags if they weren't requested
+            if not self._tagged:
+                sent = [word for (word, tag) in sent]
+
+        # Return the result.
+        if self._group_by_sent:
+            return [sent]
+        else:
+            return list(sent)
--- a/venv/lib/python3.7/site-packages/nltk/corpus/reader/framenet.py
+++ b/venv/lib/python3.7/site-packages/nltk/corpus/reader/framenet.py
--- a/venv/lib/python3.7/site-packages/nltk/corpus/reader/ieer.py
+++ b/venv/lib/python3.7/site-packages/nltk/corpus/reader/ieer.py
@@ -0,0 +1,129 @@
+# Natural Language Toolkit: IEER Corpus Reader
+#
+# Copyright (C) 2001-2019 NLTK Project
+# Author: Steven Bird <stevenbird1@gmail.com>
+#         Edward Loper <edloper@gmail.com>
+# URL: <http://nltk.org/>
+# For license information, see LICENSE.TXT
+
+"""
+Corpus reader for the Information Extraction and Entity Recognition Corpus.
+
+NIST 1999 Information Extraction: Entity Recognition Evaluation
+http://www.itl.nist.gov/iad/894.01/tests/ie-er/er_99/er_99.htm
+
+This corpus contains the NEWSWIRE development test data for the
+NIST 1999 IE-ER Evaluation.  The files were taken from the
+subdirectory: /ie_er_99/english/devtest/newswire/*.ref.nwt
+and filenames were shortened.
+
+The corpus contains the following files: APW_19980314, APW_19980424,
+APW_19980429, NYT_19980315, NYT_19980403, and NYT_19980407.
+"""
+from __future__ import unicode_literals
+
+from six import string_types
+
+import nltk
+from nltk import compat
+from nltk.corpus.reader.api import *
+
+#: A dictionary whose keys are the names of documents in this corpus;
+#: and whose values are descriptions of those documents' contents.
+titles = {
+    'APW_19980314': 'Associated Press Weekly, 14 March 1998',
+    'APW_19980424': 'Associated Press Weekly, 24 April 1998',
+    'APW_19980429': 'Associated Press Weekly, 29 April 1998',
+    'NYT_19980315': 'New York Times, 15 March 1998',
+    'NYT_19980403': 'New York Times, 3 April 1998',
+    'NYT_19980407': 'New York Times, 7 April 1998',
+}
+
+#: A list of all documents in this corpus.
+documents = sorted(titles)
+
+
+@compat.python_2_unicode_compatible
+class IEERDocument(object):
+    def __init__(self, text, docno=None, doctype=None, date_time=None, headline=''):
+        self.text = text
+        self.docno = docno
+        self.doctype = doctype
+        self.date_time = date_time
+        self.headline = headline
+
+    def __repr__(self):
+        if self.headline:
+            headline = ' '.join(self.headline.leaves())
+        else:
+            headline = (
+                ' '.join([w for w in self.text.leaves() if w[:1] != '<'][:12]) + '...'
+            )
+        if self.docno is not None:
+            return '<IEERDocument %s: %r>' % (self.docno, headline)
+        else:
+            return '<IEERDocument: %r>' % headline
+
+
+class IEERCorpusReader(CorpusReader):
+    """
+    """
+
+    def raw(self, fileids=None):
+        if fileids is None:
+            fileids = self._fileids
+        elif isinstance(fileids, string_types):
+            fileids = [fileids]
+        return concat([self.open(f).read() for f in fileids])
+
+    def docs(self, fileids=None):
+        return concat(
+            [
+                StreamBackedCorpusView(fileid, self._read_block, encoding=enc)
+                for (fileid, enc) in self.abspaths(fileids, True)
+            ]
+        )
+
+    def parsed_docs(self, fileids=None):
+        return concat(
+            [
+                StreamBackedCorpusView(fileid, self._read_parsed_block, encoding=enc)
+                for (fileid, enc) in self.abspaths(fileids, True)
+            ]
+        )
+
+    def _read_parsed_block(self, stream):
+        # TODO: figure out while empty documents are being returned
+        return [
+            self._parse(doc)
+            for doc in self._read_block(stream)
+            if self._parse(doc).docno is not None
+        ]
+
+    def _parse(self, doc):
+        val = nltk.chunk.ieerstr2tree(doc, root_label="DOCUMENT")
+        if isinstance(val, dict):
+            return IEERDocument(**val)
+        else:
+            return IEERDocument(val)
+
+    def _read_block(self, stream):
+        out = []
+        # Skip any preamble.
+        while True:
+            line = stream.readline()
+            if not line:
+                break
+            if line.strip() == '<DOC>':
+                break
+        out.append(line)
+        # Read the document
+        while True:
+            line = stream.readline()
+            if not line:
+                break
+            out.append(line)
+            if line.strip() == '</DOC>':
+                break
+        # Return the document
+        return ['\n'.join(out)]
--- a/venv/lib/python3.7/site-packages/nltk/corpus/reader/indian.py
+++ b/venv/lib/python3.7/site-packages/nltk/corpus/reader/indian.py
@@ -0,0 +1,103 @@
+# Natural Language Toolkit: Indian Language POS-Tagged Corpus Reader
+#
+# Copyright (C) 2001-2019 NLTK Project
+# Author: Steven Bird <stevenbird1@gmail.com>
+#         Edward Loper <edloper@gmail.com>
+# URL: <http://nltk.org/>
+# For license information, see LICENSE.TXT
+
+"""
+Indian Language POS-Tagged Corpus
+Collected by A Kumaran, Microsoft Research, India
+Distributed with permission
+
+Contents:
+  - Bangla: IIT Kharagpur
+  - Hindi: Microsoft Research India
+  - Marathi: IIT Bombay
+  - Telugu: IIIT Hyderabad
+"""
+
+from six import string_types
+
+from nltk.tag import str2tuple, map_tag
+
+from nltk.corpus.reader.util import *
+from nltk.corpus.reader.api import *
+
+
+class IndianCorpusReader(CorpusReader):
+    """
+    List of words, one per line.  Blank lines are ignored.
+    """
+
+    def words(self, fileids=None):
+        return concat(
+            [
+                IndianCorpusView(fileid, enc, False, False)
+                for (fileid, enc) in self.abspaths(fileids, True)
+            ]
+        )
+
+    def tagged_words(self, fileids=None, tagset=None):
+        if tagset and tagset != self._tagset:
+            tag_mapping_function = lambda t: map_tag(self._tagset, tagset, t)
+        else:
+            tag_mapping_function = None
+        return concat(
+            [
+                IndianCorpusView(fileid, enc, True, False, tag_mapping_function)
+                for (fileid, enc) in self.abspaths(fileids, True)
+            ]
+        )
+
+    def sents(self, fileids=None):
+        return concat(
+            [
+                IndianCorpusView(fileid, enc, False, True)
+                for (fileid, enc) in self.abspaths(fileids, True)
+            ]
+        )
+
+    def tagged_sents(self, fileids=None, tagset=None):
+        if tagset and tagset != self._tagset:
+            tag_mapping_function = lambda t: map_tag(self._tagset, tagset, t)
+        else:
+            tag_mapping_function = None
+        return concat(
+            [
+                IndianCorpusView(fileid, enc, True, True, tag_mapping_function)
+                for (fileid, enc) in self.abspaths(fileids, True)
+            ]
+        )
+
+    def raw(self, fileids=None):
+        if fileids is None:
+            fileids = self._fileids
+        elif isinstance(fileids, string_types):
+            fileids = [fileids]
+        return concat([self.open(f).read() for f in fileids])
+
+
+class IndianCorpusView(StreamBackedCorpusView):
+    def __init__(
+        self, corpus_file, encoding, tagged, group_by_sent, tag_mapping_function=None
+    ):
+        self._tagged = tagged
+        self._group_by_sent = group_by_sent
+        self._tag_mapping_function = tag_mapping_function
+        StreamBackedCorpusView.__init__(self, corpus_file, encoding=encoding)
+
+    def read_block(self, stream):
+        line = stream.readline()
+        if line.startswith('<'):
+            return []
+        sent = [str2tuple(word, sep='_') for word in line.split()]
+        if self._tag_mapping_function:
+            sent = [(w, self._tag_mapping_function(t)) for (w, t) in sent]
+        if not self._tagged:
+            sent = [w for (w, t) in sent]
+        if self._group_by_sent:
+            return [sent]
+        else:
+            return sent
--- a/venv/lib/python3.7/site-packages/nltk/corpus/reader/ipipan.py
+++ b/venv/lib/python3.7/site-packages/nltk/corpus/reader/ipipan.py
@@ -0,0 +1,368 @@
+# Natural Language Toolkit: IPI PAN Corpus Reader
+#
+# Copyright (C) 2001-2019 NLTK Project
+# Author: Konrad Goluchowski <kodie@mimuw.edu.pl>
+# URL: <http://nltk.org/>
+# For license information, see LICENSE.TXT
+
+import functools
+
+from six import string_types
+
+from nltk.corpus.reader.util import StreamBackedCorpusView, concat
+from nltk.corpus.reader.api import CorpusReader
+
+
+def _parse_args(fun):
+    @functools.wraps(fun)
+    def decorator(self, fileids=None, **kwargs):
+        kwargs.pop('tags', None)
+        if not fileids:
+            fileids = self.fileids()
+        return fun(self, fileids, **kwargs)
+
+    return decorator
+
+
+class IPIPANCorpusReader(CorpusReader):
+    """
+    Corpus reader designed to work with corpus created by IPI PAN.
+    See http://korpus.pl/en/ for more details about IPI PAN corpus.
+
+    The corpus includes information about text domain, channel and categories.
+    You can access possible values using ``domains()``, ``channels()`` and
+    ``categories()``. You can use also this metadata to filter files, e.g.:
+    ``fileids(channel='prasa')``, ``fileids(categories='publicystyczny')``.
+
+    The reader supports methods: words, sents, paras and their tagged versions.
+    You can get part of speech instead of full tag by giving "simplify_tags=True"
+    parameter, e.g.: ``tagged_sents(simplify_tags=True)``.
+
+    Also you can get all tags disambiguated tags specifying parameter
+    "one_tag=False", e.g.: ``tagged_paras(one_tag=False)``.
+
+    You can get all tags that were assigned by a morphological analyzer specifying
+    parameter "disamb_only=False", e.g. ``tagged_words(disamb_only=False)``.
+
+    The IPIPAN Corpus contains tags indicating if there is a space between two
+    tokens. To add special "no space" markers, you should specify parameter
+    "append_no_space=True", e.g. ``tagged_words(append_no_space=True)``.
+    As a result in place where there should be no space between two tokens new
+    pair ('', 'no-space') will be inserted (for tagged data) and just '' for
+    methods without tags.
+
+    The corpus reader can also try to append spaces between words. To enable this
+    option, specify parameter "append_space=True", e.g. ``words(append_space=True)``.
+    As a result either ' ' or (' ', 'space') will be inserted between tokens.
+
+    By default, xml entities like &quot; and &amp; are replaced by corresponding
+    characters. You can turn off this feature, specifying parameter
+    "replace_xmlentities=False", e.g. ``words(replace_xmlentities=False)``.
+    """
+
+    def __init__(self, root, fileids):
+        CorpusReader.__init__(self, root, fileids, None, None)
+
+    def raw(self, fileids=None):
+        if not fileids:
+            fileids = self.fileids()
+
+        filecontents = []
+        for fileid in self._list_morph_files(fileids):
+            with open(fileid, 'r') as infile:
+                filecontents.append(infile.read())
+        return ''.join(filecontents)
+
+    def channels(self, fileids=None):
+        if not fileids:
+            fileids = self.fileids()
+        return self._parse_header(fileids, 'channel')
+
+    def domains(self, fileids=None):
+        if not fileids:
+            fileids = self.fileids()
+        return self._parse_header(fileids, 'domain')
+
+    def categories(self, fileids=None):
+        if not fileids:
+            fileids = self.fileids()
+        return [
+            self._map_category(cat) for cat in self._parse_header(fileids, 'keyTerm')
+        ]
+
+    def fileids(self, channels=None, domains=None, categories=None):
+        if channels is not None and domains is not None and categories is not None:
+            raise ValueError(
+                'You can specify only one of channels, domains '
+                'and categories parameter at once'
+            )
+        if channels is None and domains is None and categories is None:
+            return CorpusReader.fileids(self)
+        if isinstance(channels, string_types):
+            channels = [channels]
+        if isinstance(domains, string_types):
+            domains = [domains]
+        if isinstance(categories, string_types):
+            categories = [categories]
+        if channels:
+            return self._list_morph_files_by('channel', channels)
+        elif domains:
+            return self._list_morph_files_by('domain', domains)
+        else:
+            return self._list_morph_files_by(
+                'keyTerm', categories, map=self._map_category
+            )
+
+    @_parse_args
+    def sents(self, fileids=None, **kwargs):
+        return concat(
+            [
+                self._view(
+                    fileid, mode=IPIPANCorpusView.SENTS_MODE, tags=False, **kwargs
+                )
+                for fileid in self._list_morph_files(fileids)
+            ]
+        )
+
+    @_parse_args
+    def paras(self, fileids=None, **kwargs):
+        return concat(
+            [
+                self._view(
+                    fileid, mode=IPIPANCorpusView.PARAS_MODE, tags=False, **kwargs
+                )
+                for fileid in self._list_morph_files(fileids)
+            ]
+        )
+
+    @_parse_args
+    def words(self, fileids=None, **kwargs):
+        return concat(
+            [
+                self._view(fileid, tags=False, **kwargs)
+                for fileid in self._list_morph_files(fileids)
+            ]
+        )
+
+    @_parse_args
+    def tagged_sents(self, fileids=None, **kwargs):
+        return concat(
+            [
+                self._view(fileid, mode=IPIPANCorpusView.SENTS_MODE, **kwargs)
+                for fileid in self._list_morph_files(fileids)
+            ]
+        )
+
+    @_parse_args
+    def tagged_paras(self, fileids=None, **kwargs):
+        return concat(
+            [
+                self._view(fileid, mode=IPIPANCorpusView.PARAS_MODE, **kwargs)
+                for fileid in self._list_morph_files(fileids)
+            ]
+        )
+
+    @_parse_args
+    def tagged_words(self, fileids=None, **kwargs):
+        return concat(
+            [self._view(fileid, **kwargs) for fileid in self._list_morph_files(fileids)]
+        )
+
+    def _list_morph_files(self, fileids):
+        return [f for f in self.abspaths(fileids)]
+
+    def _list_header_files(self, fileids):
+        return [
+            f.replace('morph.xml', 'header.xml')
+            for f in self._list_morph_files(fileids)
+        ]
+
+    def _parse_header(self, fileids, tag):
+        values = set()
+        for f in self._list_header_files(fileids):
+            values_list = self._get_tag(f, tag)
+            for v in values_list:
+                values.add(v)
+        return list(values)
+
+    def _list_morph_files_by(self, tag, values, map=None):
+        fileids = self.fileids()
+        ret_fileids = set()
+        for f in fileids:
+            fp = self.abspath(f).replace('morph.xml', 'header.xml')
+            values_list = self._get_tag(fp, tag)
+            for value in values_list:
+                if map is not None:
+                    value = map(value)
+                if value in values:
+                    ret_fileids.add(f)
+        return list(ret_fileids)
+
+    def _get_tag(self, f, tag):
+        tags = []
+        with open(f, 'r') as infile:
+            header = infile.read()
+        tag_end = 0
+        while True:
+            tag_pos = header.find('<' + tag, tag_end)
+            if tag_pos < 0:
+                return tags
+            tag_end = header.find('</' + tag + '>', tag_pos)
+            tags.append(header[tag_pos + len(tag) + 2 : tag_end])
+
+    def _map_category(self, cat):
+        pos = cat.find('>')
+        if pos == -1:
+            return cat
+        else:
+            return cat[pos + 1 :]
+
+    def _view(self, filename, **kwargs):
+        tags = kwargs.pop('tags', True)
+        mode = kwargs.pop('mode', 0)
+        simplify_tags = kwargs.pop('simplify_tags', False)
+        one_tag = kwargs.pop('one_tag', True)
+        disamb_only = kwargs.pop('disamb_only', True)
+        append_no_space = kwargs.pop('append_no_space', False)
+        append_space = kwargs.pop('append_space', False)
+        replace_xmlentities = kwargs.pop('replace_xmlentities', True)
+
+        if len(kwargs) > 0:
+            raise ValueError('Unexpected arguments: %s' % kwargs.keys())
+        if not one_tag and not disamb_only:
+            raise ValueError(
+                'You cannot specify both one_tag=False and ' 'disamb_only=False'
+            )
+        if not tags and (simplify_tags or not one_tag or not disamb_only):
+            raise ValueError(
+                'You cannot specify simplify_tags, one_tag or '
+                'disamb_only with functions other than tagged_*'
+            )
+
+        return IPIPANCorpusView(
+            filename,
+            tags=tags,
+            mode=mode,
+            simplify_tags=simplify_tags,
+            one_tag=one_tag,
+            disamb_only=disamb_only,
+            append_no_space=append_no_space,
+            append_space=append_space,
+            replace_xmlentities=replace_xmlentities,
+        )
+
+
+class IPIPANCorpusView(StreamBackedCorpusView):
+
+    WORDS_MODE = 0
+    SENTS_MODE = 1
+    PARAS_MODE = 2
+
+    def __init__(self, filename, startpos=0, **kwargs):
+        StreamBackedCorpusView.__init__(self, filename, None, startpos, None)
+        self.in_sentence = False
+        self.position = 0
+
+        self.show_tags = kwargs.pop('tags', True)
+        self.disamb_only = kwargs.pop('disamb_only', True)
+        self.mode = kwargs.pop('mode', IPIPANCorpusView.WORDS_MODE)
+        self.simplify_tags = kwargs.pop('simplify_tags', False)
+        self.one_tag = kwargs.pop('one_tag', True)
+        self.append_no_space = kwargs.pop('append_no_space', False)
+        self.append_space = kwargs.pop('append_space', False)
+        self.replace_xmlentities = kwargs.pop('replace_xmlentities', True)
+
+    def read_block(self, stream):
+        sentence = []
+        sentences = []
+        space = False
+        no_space = False
+
+        tags = set()
+
+        lines = self._read_data(stream)
+
+        while True:
+
+            # we may have only part of last line
+            if len(lines) <= 1:
+                self._seek(stream)
+                lines = self._read_data(stream)
+
+            if lines == ['']:
+                assert not sentences
+                return []
+
+            line = lines.pop()
+            self.position += len(line) + 1
+
+            if line.startswith('<chunk type="s"'):
+                self.in_sentence = True
+            elif line.startswith('<chunk type="p"'):
+                pass
+            elif line.startswith('<tok'):
+                if self.append_space and space and not no_space:
+                    self._append_space(sentence)
+                space = True
+                no_space = False
+                orth = ""
+                tags = set()
+            elif line.startswith('</chunk'):
+                if self.in_sentence:
+                    self.in_sentence = False
+                    self._seek(stream)
+                    if self.mode == self.SENTS_MODE:
+                        return [sentence]
+                    elif self.mode == self.WORDS_MODE:
+                        if self.append_space:
+                            self._append_space(sentence)
+                        return sentence
+                    else:
+                        sentences.append(sentence)
+                elif self.mode == self.PARAS_MODE:
+                    self._seek(stream)
+                    return [sentences]
+            elif line.startswith('<orth'):
+                orth = line[6:-7]
+                if self.replace_xmlentities:
+                    orth = orth.replace('&quot;', '"').replace('&amp;', '&')
+            elif line.startswith('<lex'):
+                if not self.disamb_only or line.find('disamb=') != -1:
+                    tag = line[line.index('<ctag') + 6 : line.index('</ctag')]
+                    tags.add(tag)
+            elif line.startswith('</tok'):
+                if self.show_tags:
+                    if self.simplify_tags:
+                        tags = [t.split(':')[0] for t in tags]
+                    if not self.one_tag or not self.disamb_only:
+                        sentence.append((orth, tuple(tags)))
+                    else:
+                        sentence.append((orth, tags.pop()))
+                else:
+                    sentence.append(orth)
+            elif line.startswith('<ns/>'):
+                if self.append_space:
+                    no_space = True
+                if self.append_no_space:
+                    if self.show_tags:
+                        sentence.append(('', 'no-space'))
+                    else:
+                        sentence.append('')
+            elif line.startswith('</cesAna'):
+                pass
+
+    def _read_data(self, stream):
+        self.position = stream.tell()
+        buff = stream.read(4096)
+        lines = buff.split('\n')
+        lines.reverse()
+        return lines
+
+    def _seek(self, stream):
+        stream.seek(self.position)
+
+    def _append_space(self, sentence):
+        if self.show_tags:
+            sentence.append((' ', 'space'))
+        else:
+            sentence.append(' ')
--- a/venv/lib/python3.7/site-packages/nltk/corpus/reader/knbc.py
+++ b/venv/lib/python3.7/site-packages/nltk/corpus/reader/knbc.py
@@ -0,0 +1,194 @@
+#! /usr/bin/env python
+# KNB Corpus reader
+# Copyright (C) 2001-2019 NLTK Project
+# Author: Masato Hagiwara <hagisan@gmail.com>
+# URL: <http://nltk.org/>
+# For license information, see LICENSE.TXT
+
+# For more information, see http://lilyx.net/pages/nltkjapanesecorpus.html
+from __future__ import print_function
+
+import re
+from six import string_types
+
+from nltk.parse import DependencyGraph
+
+from nltk.corpus.reader.util import (
+    FileSystemPathPointer,
+    find_corpus_fileids,
+    read_blankline_block,
+)
+from nltk.corpus.reader.api import SyntaxCorpusReader, CorpusReader
+
+# default function to convert morphlist to str for tree representation
+_morphs2str_default = lambda morphs: '/'.join(m[0] for m in morphs if m[0] != 'EOS')
+
+
+class KNBCorpusReader(SyntaxCorpusReader):
+    """
+    This class implements:
+      - ``__init__``, which specifies the location of the corpus
+        and a method for detecting the sentence blocks in corpus files.
+      - ``_read_block``, which reads a block from the input stream.
+      - ``_word``, which takes a block and returns a list of list of words.
+      - ``_tag``, which takes a block and returns a list of list of tagged
+        words.
+      - ``_parse``, which takes a block and returns a list of parsed
+        sentences.
+
+    The structure of tagged words:
+      tagged_word = (word(str), tags(tuple))
+      tags = (surface, reading, lemma, pos1, posid1, pos2, posid2, pos3, posid3, others ...)
+
+    Usage example
+    -------------
+
+    >>> from nltk.corpus.util import LazyCorpusLoader
+    >>> knbc = LazyCorpusLoader(
+    ...     'knbc/corpus1',
+    ...     KNBCorpusReader,
+    ...     r'.*/KN.*',
+    ...     encoding='euc-jp',
+    ... )
+
+    >>> len(knbc.sents()[0])
+    9
+
+    """
+
+    def __init__(self, root, fileids, encoding='utf8', morphs2str=_morphs2str_default):
+        """
+        Initialize KNBCorpusReader
+        morphs2str is a function to convert morphlist to str for tree representation
+        for _parse()
+        """
+        # FIXME: Why is it inheritting from SyntaxCorpusReader but initializing
+        #       from CorpusReader?
+        CorpusReader.__init__(self, root, fileids, encoding)
+        self.morphs2str = morphs2str
+
+    def _read_block(self, stream):
+        # blocks are split by blankline (or EOF) - default
+        return read_blankline_block(stream)
+
+    def _word(self, t):
+        res = []
+        for line in t.splitlines():
+            # ignore the Bunsets headers
+            if not re.match(r"EOS|\*|\#|\+", line):
+                cells = line.strip().split(" ")
+                res.append(cells[0])
+
+        return res
+
+    # ignores tagset argument
+    def _tag(self, t, tagset=None):
+        res = []
+        for line in t.splitlines():
+            # ignore the Bunsets headers
+            if not re.match(r"EOS|\*|\#|\+", line):
+                cells = line.strip().split(" ")
+                # convert cells to morph tuples
+                res.append((cells[0], ' '.join(cells[1:])))
+
+        return res
+
+    def _parse(self, t):
+        dg = DependencyGraph()
+        i = 0
+        for line in t.splitlines():
+            if line[0] in '*+':
+                # start of bunsetsu or tag
+
+                cells = line.strip().split(" ", 3)
+                m = re.match(r"([\-0-9]*)([ADIP])", cells[1])
+
+                assert m is not None
+
+                node = dg.nodes[i]
+                node.update({'address': i, 'rel': m.group(2), 'word': []})
+
+                dep_parent = int(m.group(1))
+
+                if dep_parent == -1:
+                    dg.root = node
+                else:
+                    dg.nodes[dep_parent]['deps'].append(i)
+
+                i += 1
+            elif line[0] != '#':
+                # normal morph
+                cells = line.strip().split(" ")
+                # convert cells to morph tuples
+                morph = cells[0], ' '.join(cells[1:])
+                dg.nodes[i - 1]['word'].append(morph)
+
+        if self.morphs2str:
+            for node in dg.nodes.values():
+                node['word'] = self.morphs2str(node['word'])
+
+        return dg.tree()
+
+
+######################################################################
+# Demo
+######################################################################
+
+
+def demo():
+
+    import nltk
+    from nltk.corpus.util import LazyCorpusLoader
+
+    root = nltk.data.find('corpora/knbc/corpus1')
+    fileids = [
+        f
+        for f in find_corpus_fileids(FileSystemPathPointer(root), ".*")
+        if re.search(r"\d\-\d\-[\d]+\-[\d]+", f)
+    ]
+
+    def _knbc_fileids_sort(x):
+        cells = x.split('-')
+        return (cells[0], int(cells[1]), int(cells[2]), int(cells[3]))
+
+    knbc = LazyCorpusLoader(
+        'knbc/corpus1',
+        KNBCorpusReader,
+        sorted(fileids, key=_knbc_fileids_sort),
+        encoding='euc-jp',
+    )
+
+    print(knbc.fileids()[:10])
+    print(''.join(knbc.words()[:100]))
+
+    print('\n\n'.join(str(tree) for tree in knbc.parsed_sents()[:2]))
+
+    knbc.morphs2str = lambda morphs: '/'.join(
+        "%s(%s)" % (m[0], m[1].split(' ')[2]) for m in morphs if m[0] != 'EOS'
+    ).encode('utf-8')
+
+    print('\n\n'.join('%s' % tree for tree in knbc.parsed_sents()[:2]))
+
+    print(
+        '\n'.join(
+            ' '.join("%s/%s" % (w[0], w[1].split(' ')[2]) for w in sent)
+            for sent in knbc.tagged_sents()[0:2]
+        )
+    )
+
+
+def test():
+
+    from nltk.corpus.util import LazyCorpusLoader
+
+    knbc = LazyCorpusLoader(
+        'knbc/corpus1', KNBCorpusReader, r'.*/KN.*', encoding='euc-jp'
+    )
+    assert isinstance(knbc.words()[0], string_types)
+    assert isinstance(knbc.sents()[0][0], string_types)
+    assert isinstance(knbc.tagged_words()[0], tuple)
+    assert isinstance(knbc.tagged_sents()[0][0], tuple)
+
+
+if __name__ == '__main__':
+    demo()
--- a/venv/lib/python3.7/site-packages/nltk/corpus/reader/lin.py
+++ b/venv/lib/python3.7/site-packages/nltk/corpus/reader/lin.py
@@ -0,0 +1,184 @@
+# Natural Language Toolkit: Lin's Thesaurus
+#
+# Copyright (C) 2001-2019 NLTK Project
+# Author: Dan Blanchard <dblanchard@ets.org>
+# URL: <http://nltk.org/>
+# For license information, see LICENSE.txt
+from __future__ import print_function
+
+import re
+from collections import defaultdict
+from functools import reduce
+
+from nltk.corpus.reader import CorpusReader
+
+
+class LinThesaurusCorpusReader(CorpusReader):
+    """ Wrapper for the LISP-formatted thesauruses distributed by Dekang Lin. """
+
+    # Compiled regular expression for extracting the key from the first line of each
+    # thesaurus entry
+    _key_re = re.compile(r'\("?([^"]+)"? \(desc [0-9.]+\).+')
+
+    @staticmethod
+    def __defaultdict_factory():
+        ''' Factory for creating defaultdict of defaultdict(dict)s '''
+        return defaultdict(dict)
+
+    def __init__(self, root, badscore=0.0):
+        '''
+        Initialize the thesaurus.
+
+        :param root: root directory containing thesaurus LISP files
+        :type root: C{string}
+        :param badscore: the score to give to words which do not appear in each other's sets of synonyms
+        :type badscore: C{float}
+        '''
+
+        super(LinThesaurusCorpusReader, self).__init__(root, r'sim[A-Z]\.lsp')
+        self._thesaurus = defaultdict(LinThesaurusCorpusReader.__defaultdict_factory)
+        self._badscore = badscore
+        for path, encoding, fileid in self.abspaths(
+            include_encoding=True, include_fileid=True
+        ):
+            with open(path) as lin_file:
+                first = True
+                for line in lin_file:
+                    line = line.strip()
+                    # Start of entry
+                    if first:
+                        key = LinThesaurusCorpusReader._key_re.sub(r'\1', line)
+                        first = False
+                    # End of entry
+                    elif line == '))':
+                        first = True
+                    # Lines with pairs of ngrams and scores
+                    else:
+                        split_line = line.split('\t')
+                        if len(split_line) == 2:
+                            ngram, score = split_line
+                            self._thesaurus[fileid][key][ngram.strip('"')] = float(
+                                score
+                            )
+
+    def similarity(self, ngram1, ngram2, fileid=None):
+        '''
+        Returns the similarity score for two ngrams.
+
+        :param ngram1: first ngram to compare
+        :type ngram1: C{string}
+        :param ngram2: second ngram to compare
+        :type ngram2: C{string}
+        :param fileid: thesaurus fileid to search in. If None, search all fileids.
+        :type fileid: C{string}
+        :return: If fileid is specified, just the score for the two ngrams; otherwise,
+                 list of tuples of fileids and scores.
+        '''
+        # Entries don't contain themselves, so make sure similarity between item and itself is 1.0
+        if ngram1 == ngram2:
+            if fileid:
+                return 1.0
+            else:
+                return [(fid, 1.0) for fid in self._fileids]
+        else:
+            if fileid:
+                return (
+                    self._thesaurus[fileid][ngram1][ngram2]
+                    if ngram2 in self._thesaurus[fileid][ngram1]
+                    else self._badscore
+                )
+            else:
+                return [
+                    (
+                        fid,
+                        (
+                            self._thesaurus[fid][ngram1][ngram2]
+                            if ngram2 in self._thesaurus[fid][ngram1]
+                            else self._badscore
+                        ),
+                    )
+                    for fid in self._fileids
+                ]
+
+    def scored_synonyms(self, ngram, fileid=None):
+        '''
+        Returns a list of scored synonyms (tuples of synonyms and scores) for the current ngram
+
+        :param ngram: ngram to lookup
+        :type ngram: C{string}
+        :param fileid: thesaurus fileid to search in. If None, search all fileids.
+        :type fileid: C{string}
+        :return: If fileid is specified, list of tuples of scores and synonyms; otherwise,
+                 list of tuples of fileids and lists, where inner lists consist of tuples of
+                 scores and synonyms.
+        '''
+        if fileid:
+            return self._thesaurus[fileid][ngram].items()
+        else:
+            return [
+                (fileid, self._thesaurus[fileid][ngram].items())
+                for fileid in self._fileids
+            ]
+
+    def synonyms(self, ngram, fileid=None):
+        '''
+        Returns a list of synonyms for the current ngram.
+
+        :param ngram: ngram to lookup
+        :type ngram: C{string}
+        :param fileid: thesaurus fileid to search in. If None, search all fileids.
+        :type fileid: C{string}
+        :return: If fileid is specified, list of synonyms; otherwise, list of tuples of fileids and
+                 lists, where inner lists contain synonyms.
+        '''
+        if fileid:
+            return self._thesaurus[fileid][ngram].keys()
+        else:
+            return [
+                (fileid, self._thesaurus[fileid][ngram].keys())
+                for fileid in self._fileids
+            ]
+
+    def __contains__(self, ngram):
+        '''
+        Determines whether or not the given ngram is in the thesaurus.
+
+        :param ngram: ngram to lookup
+        :type ngram: C{string}
+        :return: whether the given ngram is in the thesaurus.
+        '''
+        return reduce(
+            lambda accum, fileid: accum or (ngram in self._thesaurus[fileid]),
+            self._fileids,
+            False,
+        )
+
+
+######################################################################
+# Demo
+######################################################################
+
+
+def demo():
+    from nltk.corpus import lin_thesaurus as thes
+
+    word1 = "business"
+    word2 = "enterprise"
+    print("Getting synonyms for " + word1)
+    print(thes.synonyms(word1))
+
+    print("Getting scored synonyms for " + word1)
+    print(thes.scored_synonyms(word1))
+
+    print("Getting synonyms from simN.lsp (noun subsection) for " + word1)
+    print(thes.synonyms(word1, fileid="simN.lsp"))
+
+    print("Getting synonyms from simN.lsp (noun subsection) for " + word1)
+    print(thes.synonyms(word1, fileid="simN.lsp"))
+
+    print("Similarity score for %s and %s:" % (word1, word2))
+    print(thes.similarity(word1, word2))
+
+
+if __name__ == '__main__':
+    demo()
--- a/venv/lib/python3.7/site-packages/nltk/corpus/reader/mte.py
+++ b/venv/lib/python3.7/site-packages/nltk/corpus/reader/mte.py
@@ -0,0 +1,414 @@
+"""
+A reader for corpora whose documents are in MTE format.
+"""
+import os
+import re
+from functools import reduce
+
+from six import string_types
+
+from nltk.corpus.reader import concat, TaggedCorpusReader
+from nltk.corpus.reader.xmldocs import XMLCorpusView
+
+
+def xpath(root, path, ns):
+    return root.findall(path, ns)
+
+
+class MTECorpusView(XMLCorpusView):
+    """
+    Class for lazy viewing the MTE Corpus.
+    """
+
+    def __init__(self, fileid, tagspec, elt_handler=None):
+        XMLCorpusView.__init__(self, fileid, tagspec, elt_handler)
+
+    def read_block(self, stream, tagspec=None, elt_handler=None):
+        return list(
+            filter(
+                lambda x: x is not None,
+                XMLCorpusView.read_block(self, stream, tagspec, elt_handler),
+            )
+        )
+
+
+class MTEFileReader:
+    """
+    Class for loading the content of the multext-east corpus. It
+    parses the xml files and does some tag-filtering depending on the
+    given method parameters.
+    """
+
+    ns = {
+        'tei': 'http://www.tei-c.org/ns/1.0',
+        'xml': 'http://www.w3.org/XML/1998/namespace',
+    }
+    tag_ns = '{http://www.tei-c.org/ns/1.0}'
+    xml_ns = '{http://www.w3.org/XML/1998/namespace}'
+    word_path = "TEI/text/body/div/div/p/s/(w|c)"
+    sent_path = "TEI/text/body/div/div/p/s"
+    para_path = "TEI/text/body/div/div/p"
+
+    def __init__(self, file_path):
+        self.__file_path = file_path
+
+    @classmethod
+    def _word_elt(cls, elt, context):
+        return elt.text
+
+    @classmethod
+    def _sent_elt(cls, elt, context):
+        return [cls._word_elt(w, None) for w in xpath(elt, '*', cls.ns)]
+
+    @classmethod
+    def _para_elt(cls, elt, context):
+        return [cls._sent_elt(s, None) for s in xpath(elt, '*', cls.ns)]
+
+    @classmethod
+    def _tagged_word_elt(cls, elt, context):
+        if 'ana' not in elt.attrib:
+            return (elt.text, '')
+
+        if cls.__tags == "" and cls.__tagset == "msd":
+            return (elt.text, elt.attrib['ana'])
+        elif cls.__tags == "" and cls.__tagset == "universal":
+            return (elt.text, MTETagConverter.msd_to_universal(elt.attrib['ana']))
+        else:
+            tags = re.compile('^' + re.sub("-", ".", cls.__tags) + '.*$')
+            if tags.match(elt.attrib['ana']):
+                if cls.__tagset == "msd":
+                    return (elt.text, elt.attrib['ana'])
+                else:
+                    return (
+                        elt.text,
+                        MTETagConverter.msd_to_universal(elt.attrib['ana']),
+                    )
+            else:
+                return None
+
+    @classmethod
+    def _tagged_sent_elt(cls, elt, context):
+        return list(
+            filter(
+                lambda x: x is not None,
+                [cls._tagged_word_elt(w, None) for w in xpath(elt, '*', cls.ns)],
+            )
+        )
+
+    @classmethod
+    def _tagged_para_elt(cls, elt, context):
+        return list(
+            filter(
+                lambda x: x is not None,
+                [cls._tagged_sent_elt(s, None) for s in xpath(elt, '*', cls.ns)],
+            )
+        )
+
+    @classmethod
+    def _lemma_word_elt(cls, elt, context):
+        if 'lemma' not in elt.attrib:
+            return (elt.text, '')
+        else:
+            return (elt.text, elt.attrib['lemma'])
+
+    @classmethod
+    def _lemma_sent_elt(cls, elt, context):
+        return [cls._lemma_word_elt(w, None) for w in xpath(elt, '*', cls.ns)]
+
+    @classmethod
+    def _lemma_para_elt(cls, elt, context):
+        return [cls._lemma_sent_elt(s, None) for s in xpath(elt, '*', cls.ns)]
+
+    def words(self):
+        return MTECorpusView(
+            self.__file_path, MTEFileReader.word_path, MTEFileReader._word_elt
+        )
+
+    def sents(self):
+        return MTECorpusView(
+            self.__file_path, MTEFileReader.sent_path, MTEFileReader._sent_elt
+        )
+
+    def paras(self):
+        return MTECorpusView(
+            self.__file_path, MTEFileReader.para_path, MTEFileReader._para_elt
+        )
+
+    def lemma_words(self):
+        return MTECorpusView(
+            self.__file_path, MTEFileReader.word_path, MTEFileReader._lemma_word_elt
+        )
+
+    def tagged_words(self, tagset, tags):
+        MTEFileReader.__tagset = tagset
+        MTEFileReader.__tags = tags
+        return MTECorpusView(
+            self.__file_path, MTEFileReader.word_path, MTEFileReader._tagged_word_elt
+        )
+
+    def lemma_sents(self):
+        return MTECorpusView(
+            self.__file_path, MTEFileReader.sent_path, MTEFileReader._lemma_sent_elt
+        )
+
+    def tagged_sents(self, tagset, tags):
+        MTEFileReader.__tagset = tagset
+        MTEFileReader.__tags = tags
+        return MTECorpusView(
+            self.__file_path, MTEFileReader.sent_path, MTEFileReader._tagged_sent_elt
+        )
+
+    def lemma_paras(self):
+        return MTECorpusView(
+            self.__file_path, MTEFileReader.para_path, MTEFileReader._lemma_para_elt
+        )
+
+    def tagged_paras(self, tagset, tags):
+        MTEFileReader.__tagset = tagset
+        MTEFileReader.__tags = tags
+        return MTECorpusView(
+            self.__file_path, MTEFileReader.para_path, MTEFileReader._tagged_para_elt
+        )
+
+
+class MTETagConverter:
+    """
+    Class for converting msd tags to universal tags, more conversion
+    options are currently not implemented.
+    """
+
+    mapping_msd_universal = {
+        'A': 'ADJ',
+        'S': 'ADP',
+        'R': 'ADV',
+        'C': 'CONJ',
+        'D': 'DET',
+        'N': 'NOUN',
+        'M': 'NUM',
+        'Q': 'PRT',
+        'P': 'PRON',
+        'V': 'VERB',
+        '.': '.',
+        '-': 'X',
+    }
+
+    @staticmethod
+    def msd_to_universal(tag):
+        """
+        This function converts the annotation from the Multex-East to the universal tagset
+        as described in Chapter 5 of the NLTK-Book
+
+        Unknown Tags will be mapped to X. Punctuation marks are not supported in MSD tags, so
+        """
+        indicator = tag[0] if not tag[0] == "#" else tag[1]
+
+        if not indicator in MTETagConverter.mapping_msd_universal:
+            indicator = '-'
+
+        return MTETagConverter.mapping_msd_universal[indicator]
+
+
+class MTECorpusReader(TaggedCorpusReader):
+    """
+    Reader for corpora following the TEI-p5 xml scheme, such as MULTEXT-East.
+    MULTEXT-East contains part-of-speech-tagged words with a quite precise tagging
+    scheme. These tags can be converted to the Universal tagset
+    """
+
+    def __init__(self, root=None, fileids=None, encoding='utf8'):
+        """
+        Construct a new MTECorpusreader for a set of documents
+        located at the given root directory.  Example usage:
+
+            >>> root = '/...path to corpus.../'
+            >>> reader = MTECorpusReader(root, 'oana-*.xml', 'utf8') # doctest: +SKIP
+
+        :param root: The root directory for this corpus. (default points to location in multext config file)
+        :param fileids: A list or regexp specifying the fileids in this corpus. (default is oana-en.xml)
+        :param enconding: The encoding of the given files (default is utf8)
+        """
+        TaggedCorpusReader.__init__(self, root, fileids, encoding)
+
+    def __fileids(self, fileids):
+        if fileids is None:
+            fileids = self._fileids
+        elif isinstance(fileids, string_types):
+            fileids = [fileids]
+        # filter wrong userinput
+        fileids = filter(lambda x: x in self._fileids, fileids)
+        # filter multext-east sourcefiles that are not compatible to the teip5 specification
+        fileids = filter(lambda x: x not in ["oana-bg.xml", "oana-mk.xml"], fileids)
+        if not fileids:
+            print("No valid multext-east file specified")
+        return fileids
+
+    def readme(self):
+        """
+        Prints some information about this corpus.
+        :return: the content of the attached README file
+        :rtype: str
+        """
+        return self.open("00README.txt").read()
+
+    def raw(self, fileids=None):
+        """
+	    :param fileids: A list specifying the fileids that should be used.
+        :return: the given file(s) as a single string.
+        :rtype: str
+        """
+        return reduce([self.open(f).read() for f in self.__fileids(fileids)], [])
+
+    def words(self, fileids=None):
+        """
+	    :param fileids: A list specifying the fileids that should be used.
+        :return: the given file(s) as a list of words and punctuation symbols.
+        :rtype: list(str)
+        """
+        return concat(
+            [
+                MTEFileReader(os.path.join(self._root, f)).words()
+                for f in self.__fileids(fileids)
+            ]
+        )
+
+    def sents(self, fileids=None):
+        """
+	    :param fileids: A list specifying the fileids that should be used.
+        :return: the given file(s) as a list of sentences or utterances,
+                 each encoded as a list of word strings
+        :rtype: list(list(str))
+        """
+        return concat(
+            [
+                MTEFileReader(os.path.join(self._root, f)).sents()
+                for f in self.__fileids(fileids)
+            ]
+        )
+
+    def paras(self, fileids=None):
+        """
+	    :param fileids: A list specifying the fileids that should be used.
+        :return: the given file(s) as a list of paragraphs, each encoded as a list
+                 of sentences, which are in turn encoded as lists of word string
+        :rtype: list(list(list(str)))
+        """
+        return concat(
+            [
+                MTEFileReader(os.path.join(self._root, f)).paras()
+                for f in self.__fileids(fileids)
+            ]
+        )
+
+    def lemma_words(self, fileids=None):
+        """
+	    :param fileids: A list specifying the fileids that should be used.
+        :return: the given file(s) as a list of words, the corresponding lemmas
+                 and punctuation symbols, encoded as tuples (word, lemma)
+        :rtype: list(tuple(str,str))
+        """
+        return concat(
+            [
+                MTEFileReader(os.path.join(self._root, f)).lemma_words()
+                for f in self.__fileids(fileids)
+            ]
+        )
+
+    def tagged_words(self, fileids=None, tagset="msd", tags=""):
+        """
+	    :param fileids: A list specifying the fileids that should be used.
+        :param tagset: The tagset that should be used in the returned object,
+                       either "universal" or "msd", "msd" is the default
+        :param tags: An MSD Tag that is used to filter all parts of the used corpus
+                     that are not more precise or at least equal to the given tag
+        :return: the given file(s) as a list of tagged words and punctuation symbols
+                 encoded as tuples (word, tag)
+        :rtype: list(tuple(str, str))
+        """
+        if tagset == "universal" or tagset == "msd":
+            return concat(
+                [
+                    MTEFileReader(os.path.join(self._root, f)).tagged_words(
+                        tagset, tags
+                    )
+                    for f in self.__fileids(fileids)
+                ]
+            )
+        else:
+            print("Unknown tagset specified.")
+
+    def lemma_sents(self, fileids=None):
+        """
+	    :param fileids: A list specifying the fileids that should be used.
+        :return: the given file(s) as a list of sentences or utterances, each
+                 encoded as a list of tuples of the word and the corresponding
+                 lemma (word, lemma)
+        :rtype: list(list(tuple(str, str)))
+        """
+        return concat(
+            [
+                MTEFileReader(os.path.join(self._root, f)).lemma_sents()
+                for f in self.__fileids(fileids)
+            ]
+        )
+
+    def tagged_sents(self, fileids=None, tagset="msd", tags=""):
+        """
+	    :param fileids: A list specifying the fileids that should be used.
+        :param tagset: The tagset that should be used in the returned object,
+                       either "universal" or "msd", "msd" is the default
+        :param tags: An MSD Tag that is used to filter all parts of the used corpus
+                     that are not more precise or at least equal to the given tag
+        :return: the given file(s) as a list of sentences or utterances, each
+                 each encoded as a list of (word,tag) tuples
+        :rtype: list(list(tuple(str, str)))
+        """
+        if tagset == "universal" or tagset == "msd":
+            return concat(
+                [
+                    MTEFileReader(os.path.join(self._root, f)).tagged_sents(
+                        tagset, tags
+                    )
+                    for f in self.__fileids(fileids)
+                ]
+            )
+        else:
+            print("Unknown tagset specified.")
+
+    def lemma_paras(self, fileids=None):
+        """
+	    :param fileids: A list specifying the fileids that should be used.
+        :return: the given file(s) as a list of paragraphs, each encoded as a
+                 list of sentences, which are in turn encoded as a list of
+                 tuples of the word and the corresponding lemma (word, lemma)
+        :rtype: list(List(List(tuple(str, str))))
+        """
+        return concat(
+            [
+                MTEFileReader(os.path.join(self._root, f)).lemma_paras()
+                for f in self.__fileids(fileids)
+            ]
+        )
+
+    def tagged_paras(self, fileids=None, tagset="msd", tags=""):
+        """
+	    :param fileids: A list specifying the fileids that should be used.
+        :param tagset: The tagset that should be used in the returned object,
+                       either "universal" or "msd", "msd" is the default
+        :param tags: An MSD Tag that is used to filter all parts of the used corpus
+                     that are not more precise or at least equal to the given tag
+        :return: the given file(s) as a list of paragraphs, each encoded as a
+                 list of sentences, which are in turn encoded as a list
+                 of (word,tag) tuples
+        :rtype: list(list(list(tuple(str, str))))
+        """
+        if tagset == "universal" or tagset == "msd":
+            return concat(
+                [
+                    MTEFileReader(os.path.join(self._root, f)).tagged_paras(
+                        tagset, tags
+                    )
+                    for f in self.__fileids(fileids)
+                ]
+            )
+        else:
+            print("Unknown tagset specified.")
--- a/venv/lib/python3.7/site-packages/nltk/corpus/reader/nkjp.py
+++ b/venv/lib/python3.7/site-packages/nltk/corpus/reader/nkjp.py
@@ -0,0 +1,489 @@
+# Natural Language Toolkit: NKJP Corpus Reader
+#
+# Copyright (C) 2001-2019 NLTK Project
+# Author: Gabriela Kaczka
+# URL: <http://nltk.org/>
+# For license information, see LICENSE.TXT
+
+import functools
+import os
+import re
+import tempfile
+
+from six import string_types
+
+from nltk.corpus.reader.util import concat
+from nltk.corpus.reader.xmldocs import XMLCorpusReader, XMLCorpusView
+
+
+def _parse_args(fun):
+    """
+    Wraps function arguments:
+    if fileids not specified then function set NKJPCorpusReader paths.
+    """
+
+    @functools.wraps(fun)
+    def decorator(self, fileids=None, **kwargs):
+        if not fileids:
+            fileids = self._paths
+        return fun(self, fileids, **kwargs)
+
+    return decorator
+
+
+class NKJPCorpusReader(XMLCorpusReader):
+    WORDS_MODE = 0
+    SENTS_MODE = 1
+    HEADER_MODE = 2
+    RAW_MODE = 3
+
+    def __init__(self, root, fileids='.*'):
+        """
+        Corpus reader designed to work with National Corpus of Polish.
+        See http://nkjp.pl/ for more details about NKJP.
+        use example:
+        import nltk
+        import nkjp
+        from nkjp import NKJPCorpusReader
+        x = NKJPCorpusReader(root='/home/USER/nltk_data/corpora/nkjp/', fileids='') # obtain the whole corpus
+        x.header()
+        x.raw()
+        x.words()
+        x.tagged_words(tags=['subst', 'comp'])  #Link to find more tags: nkjp.pl/poliqarp/help/ense2.html
+        x.sents()
+        x = NKJPCorpusReader(root='/home/USER/nltk_data/corpora/nkjp/', fileids='Wilk*') # obtain particular file(s)
+        x.header(fileids=['WilkDom', '/home/USER/nltk_data/corpora/nkjp/WilkWilczy'])
+        x.tagged_words(fileids=['WilkDom', '/home/USER/nltk_data/corpora/nkjp/WilkWilczy'], tags=['subst', 'comp'])
+        """
+        if isinstance(fileids, string_types):
+            XMLCorpusReader.__init__(self, root, fileids + '.*/header.xml')
+        else:
+            XMLCorpusReader.__init__(
+                self, root, [fileid + '/header.xml' for fileid in fileids]
+            )
+        self._paths = self.get_paths()
+
+    def get_paths(self):
+        return [
+            os.path.join(str(self._root), f.split("header.xml")[0])
+            for f in self._fileids
+        ]
+
+    def fileids(self):
+        """
+        Returns a list of file identifiers for the fileids that make up
+        this corpus.
+        """
+        return [f.split("header.xml")[0] for f in self._fileids]
+
+    def _view(self, filename, tags=None, **kwargs):
+        """
+        Returns a view specialised for use with particular corpus file.
+        """
+        mode = kwargs.pop('mode', NKJPCorpusReader.WORDS_MODE)
+        if mode is NKJPCorpusReader.WORDS_MODE:
+            return NKJPCorpus_Morph_View(filename, tags=tags)
+        elif mode is NKJPCorpusReader.SENTS_MODE:
+            return NKJPCorpus_Segmentation_View(filename, tags=tags)
+        elif mode is NKJPCorpusReader.HEADER_MODE:
+            return NKJPCorpus_Header_View(filename, tags=tags)
+        elif mode is NKJPCorpusReader.RAW_MODE:
+            return NKJPCorpus_Text_View(
+                filename, tags=tags, mode=NKJPCorpus_Text_View.RAW_MODE
+            )
+
+        else:
+            raise NameError('No such mode!')
+
+    def add_root(self, fileid):
+        """
+        Add root if necessary to specified fileid.
+        """
+        if self.root in fileid:
+            return fileid
+        return self.root + fileid
+
+    @_parse_args
+    def header(self, fileids=None, **kwargs):
+        """
+        Returns header(s) of specified fileids.
+        """
+        return concat(
+            [
+                self._view(
+                    self.add_root(fileid), mode=NKJPCorpusReader.HEADER_MODE, **kwargs
+                ).handle_query()
+                for fileid in fileids
+            ]
+        )
+
+    @_parse_args
+    def sents(self, fileids=None, **kwargs):
+        """
+        Returns sentences in specified fileids.
+        """
+        return concat(
+            [
+                self._view(
+                    self.add_root(fileid), mode=NKJPCorpusReader.SENTS_MODE, **kwargs
+                ).handle_query()
+                for fileid in fileids
+            ]
+        )
+
+    @_parse_args
+    def words(self, fileids=None, **kwargs):
+        """
+        Returns words in specified fileids.
+        """
+
+        return concat(
+            [
+                self._view(
+                    self.add_root(fileid), mode=NKJPCorpusReader.WORDS_MODE, **kwargs
+                ).handle_query()
+                for fileid in fileids
+            ]
+        )
+
+    @_parse_args
+    def tagged_words(self, fileids=None, **kwargs):
+        """
+        Call with specified tags as a list, e.g. tags=['subst', 'comp'].
+        Returns tagged words in specified fileids.
+        """
+        tags = kwargs.pop('tags', [])
+        return concat(
+            [
+                self._view(
+                    self.add_root(fileid),
+                    mode=NKJPCorpusReader.WORDS_MODE,
+                    tags=tags,
+                    **kwargs
+                ).handle_query()
+                for fileid in fileids
+            ]
+        )
+
+    @_parse_args
+    def raw(self, fileids=None, **kwargs):
+        """
+        Returns words in specified fileids.
+        """
+        return concat(
+            [
+                self._view(
+                    self.add_root(fileid), mode=NKJPCorpusReader.RAW_MODE, **kwargs
+                ).handle_query()
+                for fileid in fileids
+            ]
+        )
+
+
+class NKJPCorpus_Header_View(XMLCorpusView):
+    def __init__(self, filename, **kwargs):
+        """
+        HEADER_MODE
+        A stream backed corpus view specialized for use with
+        header.xml files in NKJP corpus.
+        """
+        self.tagspec = ".*/sourceDesc$"
+        XMLCorpusView.__init__(self, filename + 'header.xml', self.tagspec)
+
+    def handle_query(self):
+        self._open()
+        header = []
+        while True:
+            segm = XMLCorpusView.read_block(self, self._stream)
+            if len(segm) == 0:
+                break
+            header.extend(segm)
+        self.close()
+        return header
+
+    def handle_elt(self, elt, context):
+        titles = elt.findall('bibl/title')
+        title = []
+        if titles:
+            title = '\n'.join(title.text.strip() for title in titles)
+
+        authors = elt.findall('bibl/author')
+        author = []
+        if authors:
+            author = '\n'.join(author.text.strip() for author in authors)
+
+        dates = elt.findall('bibl/date')
+        date = []
+        if dates:
+            date = '\n'.join(date.text.strip() for date in dates)
+
+        publishers = elt.findall('bibl/publisher')
+        publisher = []
+        if publishers:
+            publisher = '\n'.join(publisher.text.strip() for publisher in publishers)
+
+        idnos = elt.findall('bibl/idno')
+        idno = []
+        if idnos:
+            idno = '\n'.join(idno.text.strip() for idno in idnos)
+
+        notes = elt.findall('bibl/note')
+        note = []
+        if notes:
+            note = '\n'.join(note.text.strip() for note in notes)
+
+        return {
+            'title': title,
+            'author': author,
+            'date': date,
+            'publisher': publisher,
+            'idno': idno,
+            'note': note,
+        }
+
+
+class XML_Tool:
+    """
+    Helper class creating xml file to one without references to nkjp: namespace.
+    That's needed because the XMLCorpusView assumes that one can find short substrings
+    of XML that are valid XML, which is not true if a namespace is declared at top level
+    """
+
+    def __init__(self, root, filename):
+        self.read_file = os.path.join(root, filename)
+        self.write_file = tempfile.NamedTemporaryFile(delete=False)
+
+    def build_preprocessed_file(self):
+        try:
+            fr = open(self.read_file, 'r')
+            fw = self.write_file
+            line = ' '
+            while len(line):
+                line = fr.readline()
+                x = re.split(r'nkjp:[^ ]* ', line)  # in all files
+                ret = ' '.join(x)
+                x = re.split('<nkjp:paren>', ret)  # in ann_segmentation.xml
+                ret = ' '.join(x)
+                x = re.split('</nkjp:paren>', ret)  # in ann_segmentation.xml
+                ret = ' '.join(x)
+                x = re.split('<choice>', ret)  # in ann_segmentation.xml
+                ret = ' '.join(x)
+                x = re.split('</choice>', ret)  # in ann_segmentation.xml
+                ret = ' '.join(x)
+                fw.write(ret)
+            fr.close()
+            fw.close()
+            return self.write_file.name
+        except Exception:
+            self.remove_preprocessed_file()
+            raise Exception
+
+    def remove_preprocessed_file(self):
+        os.remove(self.write_file.name)
+
+
+class NKJPCorpus_Segmentation_View(XMLCorpusView):
+    """
+    A stream backed corpus view specialized for use with
+    ann_segmentation.xml files in NKJP corpus.
+    """
+
+    def __init__(self, filename, **kwargs):
+        self.tagspec = '.*p/.*s'
+        # intersperse NKJPCorpus_Text_View
+        self.text_view = NKJPCorpus_Text_View(
+            filename, mode=NKJPCorpus_Text_View.SENTS_MODE
+        )
+        self.text_view.handle_query()
+        # xml preprocessing
+        self.xml_tool = XML_Tool(filename, 'ann_segmentation.xml')
+        # base class init
+        XMLCorpusView.__init__(
+            self, self.xml_tool.build_preprocessed_file(), self.tagspec
+        )
+
+    def get_segm_id(self, example_word):
+        return example_word.split('(')[1].split(',')[0]
+
+    def get_sent_beg(self, beg_word):
+        # returns index of beginning letter in sentence
+        return int(beg_word.split(',')[1])
+
+    def get_sent_end(self, end_word):
+        # returns index of end letter in sentence
+        splitted = end_word.split(')')[0].split(',')
+        return int(splitted[1]) + int(splitted[2])
+
+    def get_sentences(self, sent_segm):
+        # returns one sentence
+        id = self.get_segm_id(sent_segm[0])
+        segm = self.text_view.segm_dict[id]  # text segment
+        beg = self.get_sent_beg(sent_segm[0])
+        end = self.get_sent_end(sent_segm[len(sent_segm) - 1])
+        return segm[beg:end]
+
+    def remove_choice(self, segm):
+        ret = []
+        prev_txt_end = -1
+        prev_txt_nr = -1
+        for word in segm:
+            txt_nr = self.get_segm_id(word)
+            # get increasing sequence of ids: in case of choice get first possibility
+            if self.get_sent_beg(word) > prev_txt_end - 1 or prev_txt_nr != txt_nr:
+                ret.append(word)
+                prev_txt_end = self.get_sent_end(word)
+            prev_txt_nr = txt_nr
+
+        return ret
+
+    def handle_query(self):
+        try:
+            self._open()
+            sentences = []
+            while True:
+                sent_segm = XMLCorpusView.read_block(self, self._stream)
+                if len(sent_segm) == 0:
+                    break
+                for segm in sent_segm:
+                    segm = self.remove_choice(segm)
+                    sentences.append(self.get_sentences(segm))
+            self.close()
+            self.xml_tool.remove_preprocessed_file()
+            return sentences
+        except Exception:
+            self.xml_tool.remove_preprocessed_file()
+            raise Exception
+
+    def handle_elt(self, elt, context):
+        ret = []
+        for seg in elt:
+            ret.append(seg.get('corresp'))
+        return ret
+
+
+class NKJPCorpus_Text_View(XMLCorpusView):
+    """
+    A stream backed corpus view specialized for use with
+    text.xml files in NKJP corpus.
+    """
+
+    SENTS_MODE = 0
+    RAW_MODE = 1
+
+    def __init__(self, filename, **kwargs):
+        self.mode = kwargs.pop('mode', 0)
+        self.tagspec = '.*/div/ab'
+        self.segm_dict = dict()
+        # xml preprocessing
+        self.xml_tool = XML_Tool(filename, 'text.xml')
+        # base class init
+        XMLCorpusView.__init__(
+            self, self.xml_tool.build_preprocessed_file(), self.tagspec
+        )
+
+    def handle_query(self):
+        try:
+            self._open()
+            x = self.read_block(self._stream)
+            self.close()
+            self.xml_tool.remove_preprocessed_file()
+            return x
+        except Exception:
+            self.xml_tool.remove_preprocessed_file()
+            raise Exception
+
+    def read_block(self, stream, tagspec=None, elt_handler=None):
+        """
+        Returns text as a list of sentences.
+        """
+        txt = []
+        while True:
+            segm = XMLCorpusView.read_block(self, stream)
+            if len(segm) == 0:
+                break
+            for part in segm:
+                txt.append(part)
+
+        return [' '.join([segm for segm in txt])]
+
+    def get_segm_id(self, elt):
+        for attr in elt.attrib:
+            if attr.endswith('id'):
+                return elt.get(attr)
+
+    def handle_elt(self, elt, context):
+        # fill dictionary to use later in sents mode
+        if self.mode is NKJPCorpus_Text_View.SENTS_MODE:
+            self.segm_dict[self.get_segm_id(elt)] = elt.text
+        return elt.text
+
+
+class NKJPCorpus_Morph_View(XMLCorpusView):
+    """
+    A stream backed corpus view specialized for use with
+    ann_morphosyntax.xml files in NKJP corpus.
+    """
+
+    def __init__(self, filename, **kwargs):
+        self.tags = kwargs.pop('tags', None)
+        self.tagspec = '.*/seg/fs'
+        self.xml_tool = XML_Tool(filename, 'ann_morphosyntax.xml')
+        XMLCorpusView.__init__(
+            self, self.xml_tool.build_preprocessed_file(), self.tagspec
+        )
+
+    def handle_query(self):
+        try:
+            self._open()
+            words = []
+            while True:
+                segm = XMLCorpusView.read_block(self, self._stream)
+                if len(segm) == 0:
+                    break
+                for part in segm:
+                    if part is not None:
+                        words.append(part)
+            self.close()
+            self.xml_tool.remove_preprocessed_file()
+            return words
+        except Exception:
+            self.xml_tool.remove_preprocessed_file()
+            raise Exception
+
+    def handle_elt(self, elt, context):
+        word = ''
+        flag = False
+        is_not_interp = True
+        # if tags not specified, then always return word
+        if self.tags is None:
+            flag = True
+
+        for child in elt:
+
+            # get word
+            if 'name' in child.keys() and child.attrib['name'] == 'orth':
+                for symbol in child:
+                    if symbol.tag == 'string':
+                        word = symbol.text
+            elif 'name' in child.keys() and child.attrib['name'] == 'interps':
+                for symbol in child:
+                    if 'type' in symbol.keys() and symbol.attrib['type'] == 'lex':
+                        for symbol2 in symbol:
+                            if (
+                                'name' in symbol2.keys()
+                                and symbol2.attrib['name'] == 'ctag'
+                            ):
+                                for symbol3 in symbol2:
+                                    if (
+                                        'value' in symbol3.keys()
+                                        and self.tags is not None
+                                        and symbol3.attrib['value'] in self.tags
+                                    ):
+                                        flag = True
+                                    elif (
+                                        'value' in symbol3.keys()
+                                        and symbol3.attrib['value'] == 'interp'
+                                    ):
+                                        is_not_interp = False
+        if flag and is_not_interp:
+            return word
--- a/venv/lib/python3.7/site-packages/nltk/corpus/reader/nombank.py
+++ b/venv/lib/python3.7/site-packages/nltk/corpus/reader/nombank.py
@@ -0,0 +1,485 @@
+# Natural Language Toolkit: NomBank Corpus Reader
+#
+# Copyright (C) 2001-2019 NLTK Project
+# Authors: Paul Bedaride <paul.bedaride@gmail.com>
+#          Edward Loper <edloper@gmail.com>
+# URL: <http://nltk.org/>
+# For license information, see LICENSE.TXT
+
+from __future__ import unicode_literals
+from xml.etree import ElementTree
+from functools import total_ordering
+
+from six import string_types
+
+from nltk.tree import Tree
+from nltk.internals import raise_unorderable_types
+from nltk.compat import python_2_unicode_compatible
+
+from nltk.corpus.reader.util import *
+from nltk.corpus.reader.api import *
+
+
+class NombankCorpusReader(CorpusReader):
+    """
+    Corpus reader for the nombank corpus, which augments the Penn
+    Treebank with information about the predicate argument structure
+    of every noun instance.  The corpus consists of two parts: the
+    predicate-argument annotations themselves, and a set of "frameset
+    files" which define the argument labels used by the annotations,
+    on a per-noun basis.  Each "frameset file" contains one or more
+    predicates, such as ``'turn'`` or ``'turn_on'``, each of which is
+    divided into coarse-grained word senses called "rolesets".  For
+    each "roleset", the frameset file provides descriptions of the
+    argument roles, along with examples.
+    """
+
+    def __init__(
+        self,
+        root,
+        nomfile,
+        framefiles='',
+        nounsfile=None,
+        parse_fileid_xform=None,
+        parse_corpus=None,
+        encoding='utf8',
+    ):
+        """
+        :param root: The root directory for this corpus.
+        :param nomfile: The name of the file containing the predicate-
+            argument annotations (relative to ``root``).
+        :param framefiles: A list or regexp specifying the frameset
+            fileids for this corpus.
+        :param parse_fileid_xform: A transform that should be applied
+            to the fileids in this corpus.  This should be a function
+            of one argument (a fileid) that returns a string (the new
+            fileid).
+        :param parse_corpus: The corpus containing the parse trees
+            corresponding to this corpus.  These parse trees are
+            necessary to resolve the tree pointers used by nombank.
+        """
+        
+	    # If framefiles is specified as a regexp, expand it.
+        if isinstance(framefiles, string_types):
+            self._fileids = find_corpus_fileids(root, framefiles)
+        self._fileids = list(framefiles)
+        # Initialze the corpus reader.
+        CorpusReader.__init__(self, root, framefiles, encoding)
+
+        # Record our nom file & nouns file.
+        self._nomfile = nomfile
+        self._nounsfile = nounsfile
+        self._parse_fileid_xform = parse_fileid_xform
+        self._parse_corpus = parse_corpus
+
+    def raw(self, fileids=None):
+        """
+        :return: the text contents of the given fileids, as a single string.
+        """
+        if fileids is None:
+            fileids = self._fileids
+        elif isinstance(fileids, string_types):
+            fileids = [fileids]
+        return concat([self.open(f).read() for f in fileids])
+
+    def instances(self, baseform=None):
+        """
+        :return: a corpus view that acts as a list of
+        ``NombankInstance`` objects, one for each noun in the corpus.
+        """
+        kwargs = {}
+        if baseform is not None:
+            kwargs['instance_filter'] = lambda inst: inst.baseform == baseform
+        return StreamBackedCorpusView(
+            self.abspath(self._nomfile),
+            lambda stream: self._read_instance_block(stream, **kwargs),
+            encoding=self.encoding(self._nomfile),
+        )
+
+    def lines(self):
+        """
+        :return: a corpus view that acts as a list of strings, one for
+        each line in the predicate-argument annotation file.
+        """
+        return StreamBackedCorpusView(
+            self.abspath(self._nomfile),
+            read_line_block,
+            encoding=self.encoding(self._nomfile),
+        )
+
+    def roleset(self, roleset_id):
+        """
+        :return: the xml description for the given roleset.
+        """
+        baseform = roleset_id.split('.')[0]
+        baseform = baseform.replace('perc-sign', '%')
+        baseform = baseform.replace('oneslashonezero', '1/10').replace(
+            '1/10', '1-slash-10'
+        )
+        framefile = 'frames/%s.xml' % baseform
+        if framefile not in self.fileids():
+            raise ValueError('Frameset file for %s not found' % roleset_id)
+
+        # n.b.: The encoding for XML fileids is specified by the file
+        # itself; so we ignore self._encoding here.
+        etree = ElementTree.parse(self.abspath(framefile).open()).getroot()
+        for roleset in etree.findall('predicate/roleset'):
+            if roleset.attrib['id'] == roleset_id:
+                return roleset
+        raise ValueError('Roleset %s not found in %s' % (roleset_id, framefile))
+
+    def rolesets(self, baseform=None):
+        """
+        :return: list of xml descriptions for rolesets.
+        """
+        if baseform is not None:
+            framefile = 'frames/%s.xml' % baseform
+            if framefile not in self.fileids():
+                raise ValueError('Frameset file for %s not found' % baseform)
+            framefiles = [framefile]
+        else:
+            framefiles = self.fileids()
+
+        rsets = []
+        for framefile in framefiles:
+            # n.b.: The encoding for XML fileids is specified by the file
+            # itself; so we ignore self._encoding here.
+            etree = ElementTree.parse(self.abspath(framefile).open()).getroot()
+            rsets.append(etree.findall('predicate/roleset'))
+        return LazyConcatenation(rsets)
+
+    def nouns(self):
+        """
+        :return: a corpus view that acts as a list of all noun lemmas
+        in this corpus (from the nombank.1.0.words file).
+        """
+        return StreamBackedCorpusView(
+            self.abspath(self._nounsfile),
+            read_line_block,
+            encoding=self.encoding(self._nounsfile),
+        )
+
+    def _read_instance_block(self, stream, instance_filter=lambda inst: True):
+        block = []
+
+        # Read 100 at a time.
+        for i in range(100):
+            line = stream.readline().strip()
+            if line:
+                inst = NombankInstance.parse(
+                    line, self._parse_fileid_xform, self._parse_corpus
+                )
+                if instance_filter(inst):
+                    block.append(inst)
+
+        return block
+
+
+######################################################################
+# { Nombank Instance & related datatypes
+######################################################################
+
+
+@python_2_unicode_compatible
+class NombankInstance(object):
+    def __init__(
+        self,
+        fileid,
+        sentnum,
+        wordnum,
+        baseform,
+        sensenumber,
+        predicate,
+        predid,
+        arguments,
+        parse_corpus=None,
+    ):
+
+        self.fileid = fileid
+        """The name of the file containing the parse tree for this
+        instance's sentence."""
+
+        self.sentnum = sentnum
+        """The sentence number of this sentence within ``fileid``.
+        Indexing starts from zero."""
+
+        self.wordnum = wordnum
+        """The word number of this instance's predicate within its
+        containing sentence.  Word numbers are indexed starting from
+        zero, and include traces and other empty parse elements."""
+
+        self.baseform = baseform
+        """The baseform of the predicate."""
+
+        self.sensenumber = sensenumber
+        """The sense number of the predicate."""
+
+        self.predicate = predicate
+        """A ``NombankTreePointer`` indicating the position of this
+        instance's predicate within its containing sentence."""
+
+        self.predid = predid
+        """Identifier of the predicate."""
+
+        self.arguments = tuple(arguments)
+        """A list of tuples (argloc, argid), specifying the location
+        and identifier for each of the predicate's argument in the
+        containing sentence.  Argument identifiers are strings such as
+        ``'ARG0'`` or ``'ARGM-TMP'``.  This list does *not* contain
+        the predicate."""
+
+        self.parse_corpus = parse_corpus
+        """A corpus reader for the parse trees corresponding to the
+        instances in this nombank corpus."""
+
+    @property
+    def roleset(self):
+        """The name of the roleset used by this instance's predicate.
+        Use ``nombank.roleset() <NombankCorpusReader.roleset>`` to
+        look up information about the roleset."""
+        r = self.baseform.replace('%', 'perc-sign')
+        r = r.replace('1/10', '1-slash-10').replace('1-slash-10', 'oneslashonezero')
+        return '%s.%s' % (r, self.sensenumber)
+
+    def __repr__(self):
+        return '<NombankInstance: %s, sent %s, word %s>' % (
+            self.fileid,
+            self.sentnum,
+            self.wordnum,
+        )
+
+    def __str__(self):
+        s = '%s %s %s %s %s' % (
+            self.fileid,
+            self.sentnum,
+            self.wordnum,
+            self.baseform,
+            self.sensenumber,
+        )
+        items = self.arguments + ((self.predicate, 'rel'),)
+        for (argloc, argid) in sorted(items):
+            s += ' %s-%s' % (argloc, argid)
+        return s
+
+    def _get_tree(self):
+        if self.parse_corpus is None:
+            return None
+        if self.fileid not in self.parse_corpus.fileids():
+            return None
+        return self.parse_corpus.parsed_sents(self.fileid)[self.sentnum]
+
+    tree = property(
+        _get_tree,
+        doc="""
+        The parse tree corresponding to this instance, or None if
+        the corresponding tree is not available.""",
+    )
+
+    @staticmethod
+    def parse(s, parse_fileid_xform=None, parse_corpus=None):
+        pieces = s.split()
+        if len(pieces) < 6:
+            raise ValueError('Badly formatted nombank line: %r' % s)
+
+        # Divide the line into its basic pieces.
+        (fileid, sentnum, wordnum, baseform, sensenumber) = pieces[:5]
+
+        args = pieces[5:]
+        rel = [args.pop(i) for i, p in enumerate(args) if '-rel' in p]
+        if len(rel) != 1:
+            raise ValueError('Badly formatted nombank line: %r' % s)
+
+        # Apply the fileid selector, if any.
+        if parse_fileid_xform is not None:
+            fileid = parse_fileid_xform(fileid)
+
+        # Convert sentence & word numbers to ints.
+        sentnum = int(sentnum)
+        wordnum = int(wordnum)
+
+        # Parse the predicate location.
+
+        predloc, predid = rel[0].split('-', 1)
+        predicate = NombankTreePointer.parse(predloc)
+
+        # Parse the arguments.
+        arguments = []
+        for arg in args:
+            argloc, argid = arg.split('-', 1)
+            arguments.append((NombankTreePointer.parse(argloc), argid))
+
+        # Put it all together.
+        return NombankInstance(
+            fileid,
+            sentnum,
+            wordnum,
+            baseform,
+            sensenumber,
+            predicate,
+            predid,
+            arguments,
+            parse_corpus,
+        )
+
+
+class NombankPointer(object):
+    """
+    A pointer used by nombank to identify one or more constituents in
+    a parse tree.  ``NombankPointer`` is an abstract base class with
+    three concrete subclasses:
+
+    - ``NombankTreePointer`` is used to point to single constituents.
+    - ``NombankSplitTreePointer`` is used to point to 'split'
+      constituents, which consist of a sequence of two or more
+      ``NombankTreePointer`` pointers.
+    - ``NombankChainTreePointer`` is used to point to entire trace
+      chains in a tree.  It consists of a sequence of pieces, which
+      can be ``NombankTreePointer`` or ``NombankSplitTreePointer`` pointers.
+    """
+
+    def __init__(self):
+        if self.__class__ == NombankPointer:
+            raise NotImplementedError()
+
+
+@python_2_unicode_compatible
+class NombankChainTreePointer(NombankPointer):
+    def __init__(self, pieces):
+        self.pieces = pieces
+        """A list of the pieces that make up this chain.  Elements may
+           be either ``NombankSplitTreePointer`` or
+           ``NombankTreePointer`` pointers."""
+
+    def __str__(self):
+        return '*'.join('%s' % p for p in self.pieces)
+
+    def __repr__(self):
+        return '<NombankChainTreePointer: %s>' % self
+
+    def select(self, tree):
+        if tree is None:
+            raise ValueError('Parse tree not avaialable')
+        return Tree('*CHAIN*', [p.select(tree) for p in self.pieces])
+
+
+@python_2_unicode_compatible
+class NombankSplitTreePointer(NombankPointer):
+    def __init__(self, pieces):
+        self.pieces = pieces
+        """A list of the pieces that make up this chain.  Elements are
+           all ``NombankTreePointer`` pointers."""
+
+    def __str__(self):
+        return ','.join('%s' % p for p in self.pieces)
+
+    def __repr__(self):
+        return '<NombankSplitTreePointer: %s>' % self
+
+    def select(self, tree):
+        if tree is None:
+            raise ValueError('Parse tree not avaialable')
+        return Tree('*SPLIT*', [p.select(tree) for p in self.pieces])
+
+
+@total_ordering
+@python_2_unicode_compatible
+class NombankTreePointer(NombankPointer):
+    """
+    wordnum:height*wordnum:height*...
+    wordnum:height,
+
+    """
+
+    def __init__(self, wordnum, height):
+        self.wordnum = wordnum
+        self.height = height
+
+    @staticmethod
+    def parse(s):
+        # Deal with chains (xx*yy*zz)
+        pieces = s.split('*')
+        if len(pieces) > 1:
+            return NombankChainTreePointer(
+                [NombankTreePointer.parse(elt) for elt in pieces]
+            )
+
+        # Deal with split args (xx,yy,zz)
+        pieces = s.split(',')
+        if len(pieces) > 1:
+            return NombankSplitTreePointer(
+                [NombankTreePointer.parse(elt) for elt in pieces]
+            )
+
+        # Deal with normal pointers.
+        pieces = s.split(':')
+        if len(pieces) != 2:
+            raise ValueError('bad nombank pointer %r' % s)
+        return NombankTreePointer(int(pieces[0]), int(pieces[1]))
+
+    def __str__(self):
+        return '%s:%s' % (self.wordnum, self.height)
+
+    def __repr__(self):
+        return 'NombankTreePointer(%d, %d)' % (self.wordnum, self.height)
+
+    def __eq__(self, other):
+        while isinstance(other, (NombankChainTreePointer, NombankSplitTreePointer)):
+            other = other.pieces[0]
+
+        if not isinstance(other, NombankTreePointer):
+            return self is other
+
+        return self.wordnum == other.wordnum and self.height == other.height
+
+    def __ne__(self, other):
+        return not self == other
+
+    def __lt__(self, other):
+        while isinstance(other, (NombankChainTreePointer, NombankSplitTreePointer)):
+            other = other.pieces[0]
+
+        if not isinstance(other, NombankTreePointer):
+            return id(self) < id(other)
+
+        return (self.wordnum, -self.height) < (other.wordnum, -other.height)
+
+    def select(self, tree):
+        if tree is None:
+            raise ValueError('Parse tree not avaialable')
+        return tree[self.treepos(tree)]
+
+    def treepos(self, tree):
+        """
+        Convert this pointer to a standard 'tree position' pointer,
+        given that it points to the given tree.
+        """
+        if tree is None:
+            raise ValueError('Parse tree not avaialable')
+        stack = [tree]
+        treepos = []
+
+        wordnum = 0
+        while True:
+            # print treepos
+            # print stack[-1]
+            # tree node:
+            if isinstance(stack[-1], Tree):
+                # Select the next child.
+                if len(treepos) < len(stack):
+                    treepos.append(0)
+                else:
+                    treepos[-1] += 1
+                # Update the stack.
+                if treepos[-1] < len(stack[-1]):
+                    stack.append(stack[-1][treepos[-1]])
+                else:
+                    # End of node's child list: pop up a level.
+                    stack.pop()
+                    treepos.pop()
+            # word node:
+            else:
+                if wordnum == self.wordnum:
+                    return tuple(treepos[: len(treepos) - self.height - 1])
+                else:
+                    wordnum += 1
+                    stack.pop()
--- a/venv/lib/python3.7/site-packages/nltk/corpus/reader/nps_chat.py
+++ b/venv/lib/python3.7/site-packages/nltk/corpus/reader/nps_chat.py
@@ -0,0 +1,92 @@
+# Natural Language Toolkit: NPS Chat Corpus Reader
+#
+# Copyright (C) 2001-2019 NLTK Project
+# Author: Edward Loper <edloper@gmail.com>
+# URL: <http://nltk.org/>
+# For license information, see LICENSE.TXT
+from __future__ import unicode_literals
+
+import re
+import textwrap
+
+from nltk.util import LazyConcatenation
+from nltk.internals import ElementWrapper
+from nltk.tag import map_tag
+
+from nltk.corpus.reader.util import *
+from nltk.corpus.reader.api import *
+from nltk.corpus.reader.xmldocs import *
+
+
+class NPSChatCorpusReader(XMLCorpusReader):
+    def __init__(self, root, fileids, wrap_etree=False, tagset=None):
+        XMLCorpusReader.__init__(self, root, fileids, wrap_etree)
+        self._tagset = tagset
+
+    def xml_posts(self, fileids=None):
+        if self._wrap_etree:
+            return concat(
+                [
+                    XMLCorpusView(fileid, 'Session/Posts/Post', self._wrap_elt)
+                    for fileid in self.abspaths(fileids)
+                ]
+            )
+        else:
+            return concat(
+                [
+                    XMLCorpusView(fileid, 'Session/Posts/Post')
+                    for fileid in self.abspaths(fileids)
+                ]
+            )
+
+    def posts(self, fileids=None):
+        return concat(
+            [
+                XMLCorpusView(
+                    fileid, 'Session/Posts/Post/terminals', self._elt_to_words
+                )
+                for fileid in self.abspaths(fileids)
+            ]
+        )
+
+    def tagged_posts(self, fileids=None, tagset=None):
+        def reader(elt, handler):
+            return self._elt_to_tagged_words(elt, handler, tagset)
+
+        return concat(
+            [
+                XMLCorpusView(fileid, 'Session/Posts/Post/terminals', reader)
+                for fileid in self.abspaths(fileids)
+            ]
+        )
+
+    def words(self, fileids=None):
+        return LazyConcatenation(self.posts(fileids))
+
+    def tagged_words(self, fileids=None, tagset=None):
+        return LazyConcatenation(self.tagged_posts(fileids, tagset))
+
+    def _wrap_elt(self, elt, handler):
+        return ElementWrapper(elt)
+
+    def _elt_to_words(self, elt, handler):
+        return [self._simplify_username(t.attrib['word']) for t in elt.findall('t')]
+
+    def _elt_to_tagged_words(self, elt, handler, tagset=None):
+        tagged_post = [
+            (self._simplify_username(t.attrib['word']), t.attrib['pos'])
+            for t in elt.findall('t')
+        ]
+        if tagset and tagset != self._tagset:
+            tagged_post = [
+                (w, map_tag(self._tagset, tagset, t)) for (w, t) in tagged_post
+            ]
+        return tagged_post
+
+    @staticmethod
+    def _simplify_username(word):
+        if 'User' in word:
+            word = 'U' + word.split('User', 1)[1]
+        elif isinstance(word, bytes):
+            word = word.decode('ascii')
+        return word
--- a/venv/lib/python3.7/site-packages/nltk/corpus/reader/opinion_lexicon.py
+++ b/venv/lib/python3.7/site-packages/nltk/corpus/reader/opinion_lexicon.py
@@ -0,0 +1,123 @@
+# Natural Language Toolkit: Opinion Lexicon Corpus Reader
+#
+# Copyright (C) 2001-2019 NLTK Project
+# Author: Pierpaolo Pantone <24alsecondo@gmail.com>
+# URL: <http://nltk.org/>
+# For license information, see LICENSE.TXT
+
+"""
+CorpusReader for the Opinion Lexicon.
+
+- Opinion Lexicon information -
+Authors: Minqing Hu and Bing Liu, 2004.
+    Department of Computer Sicence
+    University of Illinois at Chicago
+
+Contact: Bing Liu, liub@cs.uic.edu
+        http://www.cs.uic.edu/~liub
+
+Distributed with permission.
+
+Related papers:
+- Minqing Hu and Bing Liu. "Mining and summarizing customer reviews".
+    Proceedings of the ACM SIGKDD International Conference on Knowledge Discovery
+    & Data Mining (KDD-04), Aug 22-25, 2004, Seattle, Washington, USA.
+
+- Bing Liu, Minqing Hu and Junsheng Cheng. "Opinion Observer: Analyzing and
+    Comparing Opinions on the Web". Proceedings of the 14th International World
+    Wide Web conference (WWW-2005), May 10-14, 2005, Chiba, Japan.
+"""
+from six import string_types
+
+from nltk.corpus.reader import WordListCorpusReader
+from nltk.corpus.reader.api import *
+
+
+class IgnoreReadmeCorpusView(StreamBackedCorpusView):
+    """
+    This CorpusView is used to skip the initial readme block of the corpus.
+    """
+
+    def __init__(self, *args, **kwargs):
+        StreamBackedCorpusView.__init__(self, *args, **kwargs)
+        # open self._stream
+        self._open()
+        # skip the readme block
+        read_blankline_block(self._stream)
+        # Set the initial position to the current stream position
+        self._filepos = [self._stream.tell()]
+
+
+class OpinionLexiconCorpusReader(WordListCorpusReader):
+    """
+    Reader for Liu and Hu opinion lexicon.  Blank lines and readme are ignored.
+
+        >>> from nltk.corpus import opinion_lexicon
+        >>> opinion_lexicon.words()
+        ['2-faced', '2-faces', 'abnormal', 'abolish', ...]
+
+    The OpinionLexiconCorpusReader provides shortcuts to retrieve positive/negative
+    words:
+
+        >>> opinion_lexicon.negative()
+        ['2-faced', '2-faces', 'abnormal', 'abolish', ...]
+
+    Note that words from `words()` method are sorted by file id, not alphabetically:
+
+        >>> opinion_lexicon.words()[0:10]
+        ['2-faced', '2-faces', 'abnormal', 'abolish', 'abominable', 'abominably',
+        'abominate', 'abomination', 'abort', 'aborted']
+        >>> sorted(opinion_lexicon.words())[0:10]
+        ['2-faced', '2-faces', 'a+', 'abnormal', 'abolish', 'abominable', 'abominably',
+        'abominate', 'abomination', 'abort']
+    """
+
+    CorpusView = IgnoreReadmeCorpusView
+
+    def words(self, fileids=None):
+        """
+        Return all words in the opinion lexicon. Note that these words are not
+        sorted in alphabetical order.
+
+        :param fileids: a list or regexp specifying the ids of the files whose
+            words have to be returned.
+        :return: the given file(s) as a list of words and punctuation symbols.
+        :rtype: list(str)
+        """
+        if fileids is None:
+            fileids = self._fileids
+        elif isinstance(fileids, string_types):
+            fileids = [fileids]
+        return concat(
+            [
+                self.CorpusView(path, self._read_word_block, encoding=enc)
+                for (path, enc, fileid) in self.abspaths(fileids, True, True)
+            ]
+        )
+
+    def positive(self):
+        """
+        Return all positive words in alphabetical order.
+
+        :return: a list of positive words.
+        :rtype: list(str)
+        """
+        return self.words('positive-words.txt')
+
+    def negative(self):
+        """
+        Return all negative words in alphabetical order.
+
+        :return: a list of negative words.
+        :rtype: list(str)
+        """
+        return self.words('negative-words.txt')
+
+    def _read_word_block(self, stream):
+        words = []
+        for i in range(20):  # Read 20 lines at a time.
+            line = stream.readline()
+            if not line:
+                continue
+            words.append(line.strip())
+        return words
--- a/venv/lib/python3.7/site-packages/nltk/corpus/reader/panlex_lite.py
+++ b/venv/lib/python3.7/site-packages/nltk/corpus/reader/panlex_lite.py
@@ -0,0 +1,174 @@
+# Natural Language Toolkit: PanLex Corpus Reader
+#
+# Copyright (C) 2001-2019 NLTK Project
+# Author: David Kamholz <kamholz@panlex.org>
+# URL: <http://nltk.org/>
+# For license information, see LICENSE.TXT
+
+"""
+CorpusReader for PanLex Lite, a stripped down version of PanLex distributed
+as an SQLite database. See the README.txt in the panlex_lite corpus directory
+for more information on PanLex Lite.
+"""
+
+import os
+import sqlite3
+
+from nltk.corpus.reader.api import CorpusReader
+
+
+class PanLexLiteCorpusReader(CorpusReader):
+    MEANING_Q = """
+        SELECT dnx2.mn, dnx2.uq, dnx2.ap, dnx2.ui, ex2.tt, ex2.lv
+        FROM dnx
+        JOIN ex ON (ex.ex = dnx.ex)
+        JOIN dnx dnx2 ON (dnx2.mn = dnx.mn)
+        JOIN ex ex2 ON (ex2.ex = dnx2.ex)
+        WHERE dnx.ex != dnx2.ex AND ex.tt = ? AND ex.lv = ?
+        ORDER BY dnx2.uq DESC
+    """
+
+    TRANSLATION_Q = """
+        SELECT s.tt, sum(s.uq) AS trq FROM (
+            SELECT ex2.tt, max(dnx.uq) AS uq
+            FROM dnx
+            JOIN ex ON (ex.ex = dnx.ex)
+            JOIN dnx dnx2 ON (dnx2.mn = dnx.mn)
+            JOIN ex ex2 ON (ex2.ex = dnx2.ex)
+            WHERE dnx.ex != dnx2.ex AND ex.lv = ? AND ex.tt = ? AND ex2.lv = ?
+            GROUP BY ex2.tt, dnx.ui
+        ) s
+        GROUP BY s.tt
+        ORDER BY trq DESC, s.tt
+    """
+
+    def __init__(self, root):
+        self._c = sqlite3.connect(os.path.join(root, 'db.sqlite')).cursor()
+
+        self._uid_lv = {}
+        self._lv_uid = {}
+
+        for row in self._c.execute('SELECT uid, lv FROM lv'):
+            self._uid_lv[row[0]] = row[1]
+            self._lv_uid[row[1]] = row[0]
+
+    def language_varieties(self, lc=None):
+        """
+        Return a list of PanLex language varieties.
+
+        :param lc: ISO 639 alpha-3 code. If specified, filters returned varieties
+            by this code. If unspecified, all varieties are returned.
+        :return: the specified language varieties as a list of tuples. The first
+            element is the language variety's seven-character uniform identifier,
+            and the second element is its default name.
+        :rtype: list(tuple)
+        """
+
+        if lc is None:
+            return self._c.execute('SELECT uid, tt FROM lv ORDER BY uid').fetchall()
+        else:
+            return self._c.execute(
+                'SELECT uid, tt FROM lv WHERE lc = ? ORDER BY uid', (lc,)
+            ).fetchall()
+
+    def meanings(self, expr_uid, expr_tt):
+        """
+        Return a list of meanings for an expression.
+
+        :param expr_uid: the expression's language variety, as a seven-character
+            uniform identifier.
+        :param expr_tt: the expression's text.
+        :return: a list of Meaning objects.
+        :rtype: list(Meaning)
+        """
+
+        expr_lv = self._uid_lv[expr_uid]
+
+        mn_info = {}
+
+        for i in self._c.execute(self.MEANING_Q, (expr_tt, expr_lv)):
+            mn = i[0]
+            uid = self._lv_uid[i[5]]
+
+            if not mn in mn_info:
+                mn_info[mn] = {
+                    'uq': i[1],
+                    'ap': i[2],
+                    'ui': i[3],
+                    'ex': {expr_uid: [expr_tt]},
+                }
+
+            if not uid in mn_info[mn]['ex']:
+                mn_info[mn]['ex'][uid] = []
+
+            mn_info[mn]['ex'][uid].append(i[4])
+
+        return [Meaning(mn, mn_info[mn]) for mn in mn_info]
+
+    def translations(self, from_uid, from_tt, to_uid):
+        """
+        Return a list of translations for an expression into a single language
+            variety.
+
+        :param from_uid: the source expression's language variety, as a
+            seven-character uniform identifier.
+        :param from_tt: the source expression's text.
+        :param to_uid: the target language variety, as a seven-character
+            uniform identifier.
+        :return a list of translation tuples. The first element is the expression
+            text and the second element is the translation quality.
+        :rtype: list(tuple)
+        """
+
+        from_lv = self._uid_lv[from_uid]
+        to_lv = self._uid_lv[to_uid]
+
+        return self._c.execute(self.TRANSLATION_Q, (from_lv, from_tt, to_lv)).fetchall()
+
+
+class Meaning(dict):
+    """
+    Represents a single PanLex meaning. A meaning is a translation set derived
+    from a single source.
+    """
+
+    def __init__(self, mn, attr):
+        super(Meaning, self).__init__(**attr)
+        self['mn'] = mn
+
+    def id(self):
+        """
+        :return: the meaning's id.
+        :rtype: int
+        """
+        return self['mn']
+
+    def quality(self):
+        """
+        :return: the meaning's source's quality (0=worst, 9=best).
+        :rtype: int
+        """
+        return self['uq']
+
+    def source(self):
+        """
+        :return: the meaning's source id.
+        :rtype: int
+        """
+        return self['ap']
+
+    def source_group(self):
+        """
+        :return: the meaning's source group id.
+        :rtype: int
+        """
+        return self['ui']
+
+    def expressions(self):
+        """
+        :return: the meaning's expressions as a dictionary whose keys are language
+            variety uniform identifiers and whose values are lists of expression
+            texts.
+        :rtype: dict
+        """
+        return self['ex']
--- a/venv/lib/python3.7/site-packages/nltk/corpus/reader/panlex_swadesh.py
+++ b/venv/lib/python3.7/site-packages/nltk/corpus/reader/panlex_swadesh.py
@@ -0,0 +1,94 @@
+# -*- coding: utf-8 -*-
+# Natural Language Toolkit: Word List Corpus Reader
+#
+# Copyright (C) 2001-2019 NLTK Project
+# Author: Steven Bird <stevenbird1@gmail.com>
+#         Edward Loper <edloper@gmail.com>
+# URL: <http://nltk.org/>
+# For license information, see LICENSE.TXT
+
+
+from __future__ import print_function
+from collections import namedtuple, defaultdict
+import re
+from six import string_types
+
+
+from nltk.tokenize import line_tokenize
+
+from nltk.corpus.reader.wordlist import WordListCorpusReader
+from nltk.corpus.reader.util import *
+from nltk.corpus.reader.api import *
+
+PanlexLanguage = namedtuple('PanlexLanguage',
+                          ['panlex_uid',  # (1) PanLex UID
+                           'iso639',      # (2) ISO 639 language code
+                           'iso639_type', # (3) ISO 639 language type, see README
+                           'script',      # (4) normal scripts of expressions
+                           'name',        # (5) PanLex default name
+                           'langvar_uid'  # (6) UID of the language variety in which the default name is an expression
+                           ])
+
+class PanlexSwadeshCorpusReader(WordListCorpusReader):
+    """
+    This is a class to read the PanLex Swadesh list from
+
+    David Kamholz, Jonathan Pool, and Susan M. Colowick (2014).
+    PanLex: Building a Resource for Panlingual Lexical Translation.
+    In LREC. http://www.lrec-conf.org/proceedings/lrec2014/pdf/1029_Paper.pdf
+
+    License: CC0 1.0 Universal
+    https://creativecommons.org/publicdomain/zero/1.0/legalcode
+    """
+    def __init__(self, *args, **kwargs):
+        super(PanlexSwadeshCorpusReader, self).__init__(*args, **kwargs)
+        # Find the swadesh size using the fileids' path.
+        self.swadesh_size = re.match(r'swadesh([0-9].*)\/', self.fileids()[0]).group(1)
+        self._languages = {lang.panlex_uid:lang for lang in self.get_languages()}
+        self._macro_langauges = self.get_macrolanguages()
+
+    def license(self):
+        print('CC0 1.0 Universal')
+
+    def readme(self):
+        print(self.raw('README'))
+
+    def language_codes(self):
+        return self._languages.keys()
+
+    def get_languages(self):
+        for line in self.raw('langs{}.txt'.format(self.swadesh_size)).split('\n'):
+            if not line.strip(): # Skip empty lines.
+                continue
+            yield PanlexLanguage(*line.strip().split('\t'))
+
+    def get_macrolanguages(self):
+        macro_langauges = defaultdict(list)
+        for lang in self._languages.values():
+            macro_langauges[lang.iso639].append(lang.panlex_uid)
+        return macro_langauges
+
+    def words_by_lang(self, lang_code):
+        """
+        :return: a list of list(str)
+        """
+        fileid = 'swadesh{}/{}.txt'.format(self.swadesh_size, lang_code)
+        return [concept.split('\t') for concept in self.words(fileid)]
+
+    def words_by_iso639(self, iso63_code):
+        """
+        :return: a list of list(str)
+        """
+        fileids = ['swadesh{}/{}.txt'.format(self.swadesh_size, lang_code)
+                   for lang_code in self._macro_langauges[iso63_code]]
+        return [concept.split('\t') for fileid in fileids for concept in self.words(fileid)]
+
+    def entries(self, fileids=None):
+        """
+        :return: a tuple of words for the specified fileids.
+        """
+        if not fileids:
+            fileids = self.fileids()
+
+        wordlists = [self.words(f) for f in fileids]
+        return list(zip(*wordlists))
--- a/venv/lib/python3.7/site-packages/nltk/corpus/reader/pl196x.py
+++ b/venv/lib/python3.7/site-packages/nltk/corpus/reader/pl196x.py
@@ -0,0 +1,383 @@
+# Natural Language Toolkit:
+#
+# Copyright (C) 2001-2019 NLTK Project
+# Author: Piotr Kasprzyk <p.j.kasprzyk@gmail.com>
+# URL: <http://nltk.org/>
+# For license information, see LICENSE.TXT
+
+from six import string_types
+
+from nltk.corpus.reader.api import *
+from nltk.corpus.reader.xmldocs import XMLCorpusReader
+
+
+PARA = re.compile(r'<p(?: [^>]*){0,1}>(.*?)</p>')
+SENT = re.compile(r'<s(?: [^>]*){0,1}>(.*?)</s>')
+
+TAGGEDWORD = re.compile(r'<([wc](?: [^>]*){0,1}>)(.*?)</[wc]>')
+WORD = re.compile(r'<[wc](?: [^>]*){0,1}>(.*?)</[wc]>')
+
+TYPE = re.compile(r'type="(.*?)"')
+ANA = re.compile(r'ana="(.*?)"')
+
+TEXTID = re.compile(r'text id="(.*?)"')
+
+
+class TEICorpusView(StreamBackedCorpusView):
+    def __init__(
+        self,
+        corpus_file,
+        tagged,
+        group_by_sent,
+        group_by_para,
+        tagset=None,
+        head_len=0,
+        textids=None,
+    ):
+
+        self._tagged = tagged
+        self._textids = textids
+
+        self._group_by_sent = group_by_sent
+        self._group_by_para = group_by_para
+        # WARNING -- skip header
+        StreamBackedCorpusView.__init__(self, corpus_file, startpos=head_len)
+
+    _pagesize = 4096
+
+    def read_block(self, stream):
+        block = stream.readlines(self._pagesize)
+        block = concat(block)
+        while (block.count('<text id') > block.count('</text>')) or block.count(
+            '<text id'
+        ) == 0:
+            tmp = stream.readline()
+            if len(tmp) <= 0:
+                break
+            block += tmp
+
+        block = block.replace('\n', '')
+
+        textids = TEXTID.findall(block)
+        if self._textids:
+            for tid in textids:
+                if tid not in self._textids:
+                    beg = block.find(tid) - 1
+                    end = block[beg:].find('</text>') + len('</text>')
+                    block = block[:beg] + block[beg + end :]
+
+        output = []
+        for para_str in PARA.findall(block):
+            para = []
+            for sent_str in SENT.findall(para_str):
+                if not self._tagged:
+                    sent = WORD.findall(sent_str)
+                else:
+                    sent = list(map(self._parse_tag, TAGGEDWORD.findall(sent_str)))
+                if self._group_by_sent:
+                    para.append(sent)
+                else:
+                    para.extend(sent)
+            if self._group_by_para:
+                output.append(para)
+            else:
+                output.extend(para)
+        return output
+
+    def _parse_tag(self, tag_word_tuple):
+        (tag, word) = tag_word_tuple
+        if tag.startswith('w'):
+            tag = ANA.search(tag).group(1)
+        else:  # tag.startswith('c')
+            tag = TYPE.search(tag).group(1)
+        return word, tag
+
+
+class Pl196xCorpusReader(CategorizedCorpusReader, XMLCorpusReader):
+    head_len = 2770
+
+    def __init__(self, *args, **kwargs):
+        if 'textid_file' in kwargs:
+            self._textids = kwargs['textid_file']
+        else:
+            self._textids = None
+
+        XMLCorpusReader.__init__(self, *args)
+        CategorizedCorpusReader.__init__(self, kwargs)
+
+        self._init_textids()
+
+    def _init_textids(self):
+        self._f2t = defaultdict(list)
+        self._t2f = defaultdict(list)
+        if self._textids is not None:
+            with open(self._textids) as fp:
+                for line in fp:
+                    line = line.strip()
+                    file_id, text_ids = line.split(' ', 1)
+                    if file_id not in self.fileids():
+                        raise ValueError(
+                            'In text_id mapping file %s: %s not found'
+                            % (self._textids, file_id)
+                        )
+                    for text_id in text_ids.split(self._delimiter):
+                        self._add_textids(file_id, text_id)
+
+    def _add_textids(self, file_id, text_id):
+        self._f2t[file_id].append(text_id)
+        self._t2f[text_id].append(file_id)
+
+    def _resolve(self, fileids, categories, textids=None):
+        tmp = None
+        if (
+            len(
+                filter(
+                    lambda accessor: accessor is None, (fileids, categories, textids)
+                )
+            )
+            != 1
+        ):
+
+            raise ValueError(
+                'Specify exactly one of: fileids, ' 'categories or textids'
+            )
+
+        if fileids is not None:
+            return fileids, None
+
+        if categories is not None:
+            return self.fileids(categories), None
+
+        if textids is not None:
+            if isinstance(textids, string_types):
+                textids = [textids]
+            files = sum((self._t2f[t] for t in textids), [])
+            tdict = dict()
+            for f in files:
+                tdict[f] = set(self._f2t[f]) & set(textids)
+            return files, tdict
+
+    def decode_tag(self, tag):
+        # to be implemented
+        return tag
+
+    def textids(self, fileids=None, categories=None):
+        """
+        In the pl196x corpus each category is stored in single
+        file and thus both methods provide identical functionality. In order
+        to accommodate finer granularity, a non-standard textids() method was
+        implemented. All the main functions can be supplied with a list
+        of required chunks---giving much more control to the user.
+        """
+        fileids, _ = self._resolve(fileids, categories)
+        if fileids is None:
+            return sorted(self._t2f)
+
+        if isinstance(fileids, string_types):
+            fileids = [fileids]
+        return sorted(sum((self._f2t[d] for d in fileids), []))
+
+    def words(self, fileids=None, categories=None, textids=None):
+        fileids, textids = self._resolve(fileids, categories, textids)
+        if fileids is None:
+            fileids = self._fileids
+        elif isinstance(fileids, string_types):
+            fileids = [fileids]
+
+        if textids:
+            return concat(
+                [
+                    TEICorpusView(
+                        self.abspath(fileid),
+                        False,
+                        False,
+                        False,
+                        head_len=self.head_len,
+                        textids=textids[fileid],
+                    )
+                    for fileid in fileids
+                ]
+            )
+        else:
+            return concat(
+                [
+                    TEICorpusView(
+                        self.abspath(fileid),
+                        False,
+                        False,
+                        False,
+                        head_len=self.head_len,
+                    )
+                    for fileid in fileids
+                ]
+            )
+
+    def sents(self, fileids=None, categories=None, textids=None):
+        fileids, textids = self._resolve(fileids, categories, textids)
+        if fileids is None:
+            fileids = self._fileids
+        elif isinstance(fileids, string_types):
+            fileids = [fileids]
+
+        if textids:
+            return concat(
+                [
+                    TEICorpusView(
+                        self.abspath(fileid),
+                        False,
+                        True,
+                        False,
+                        head_len=self.head_len,
+                        textids=textids[fileid],
+                    )
+                    for fileid in fileids
+                ]
+            )
+        else:
+            return concat(
+                [
+                    TEICorpusView(
+                        self.abspath(fileid), False, True, False, head_len=self.head_len
+                    )
+                    for fileid in fileids
+                ]
+            )
+
+    def paras(self, fileids=None, categories=None, textids=None):
+        fileids, textids = self._resolve(fileids, categories, textids)
+        if fileids is None:
+            fileids = self._fileids
+        elif isinstance(fileids, string_types):
+            fileids = [fileids]
+
+        if textids:
+            return concat(
+                [
+                    TEICorpusView(
+                        self.abspath(fileid),
+                        False,
+                        True,
+                        True,
+                        head_len=self.head_len,
+                        textids=textids[fileid],
+                    )
+                    for fileid in fileids
+                ]
+            )
+        else:
+            return concat(
+                [
+                    TEICorpusView(
+                        self.abspath(fileid), False, True, True, head_len=self.head_len
+                    )
+                    for fileid in fileids
+                ]
+            )
+
+    def tagged_words(self, fileids=None, categories=None, textids=None):
+        fileids, textids = self._resolve(fileids, categories, textids)
+        if fileids is None:
+            fileids = self._fileids
+        elif isinstance(fileids, string_types):
+            fileids = [fileids]
+
+        if textids:
+            return concat(
+                [
+                    TEICorpusView(
+                        self.abspath(fileid),
+                        True,
+                        False,
+                        False,
+                        head_len=self.head_len,
+                        textids=textids[fileid],
+                    )
+                    for fileid in fileids
+                ]
+            )
+        else:
+            return concat(
+                [
+                    TEICorpusView(
+                        self.abspath(fileid), True, False, False, head_len=self.head_len
+                    )
+                    for fileid in fileids
+                ]
+            )
+
+    def tagged_sents(self, fileids=None, categories=None, textids=None):
+        fileids, textids = self._resolve(fileids, categories, textids)
+        if fileids is None:
+            fileids = self._fileids
+        elif isinstance(fileids, string_types):
+            fileids = [fileids]
+
+        if textids:
+            return concat(
+                [
+                    TEICorpusView(
+                        self.abspath(fileid),
+                        True,
+                        True,
+                        False,
+                        head_len=self.head_len,
+                        textids=textids[fileid],
+                    )
+                    for fileid in fileids
+                ]
+            )
+        else:
+            return concat(
+                [
+                    TEICorpusView(
+                        self.abspath(fileid), True, True, False, head_len=self.head_len
+                    )
+                    for fileid in fileids
+                ]
+            )
+
+    def tagged_paras(self, fileids=None, categories=None, textids=None):
+        fileids, textids = self._resolve(fileids, categories, textids)
+        if fileids is None:
+            fileids = self._fileids
+        elif isinstance(fileids, string_types):
+            fileids = [fileids]
+
+        if textids:
+            return concat(
+                [
+                    TEICorpusView(
+                        self.abspath(fileid),
+                        True,
+                        True,
+                        True,
+                        head_len=self.head_len,
+                        textids=textids[fileid],
+                    )
+                    for fileid in fileids
+                ]
+            )
+        else:
+            return concat(
+                [
+                    TEICorpusView(
+                        self.abspath(fileid), True, True, True, head_len=self.head_len
+                    )
+                    for fileid in fileids
+                ]
+            )
+
+    def xml(self, fileids=None, categories=None):
+        fileids, _ = self._resolve(fileids, categories)
+        if len(fileids) == 1:
+            return XMLCorpusReader.xml(self, fileids[0])
+        else:
+            raise TypeError('Expected a single file')
+
+    def raw(self, fileids=None, categories=None):
+        fileids, _ = self._resolve(fileids, categories)
+        if fileids is None:
+            fileids = self._fileids
+        elif isinstance(fileids, string_types):
+            fileids = [fileids]
+        return concat([self.open(f).read() for f in fileids])
--- a/venv/lib/python3.7/site-packages/nltk/corpus/reader/plaintext.py
+++ b/venv/lib/python3.7/site-packages/nltk/corpus/reader/plaintext.py
@@ -0,0 +1,263 @@
+# Natural Language Toolkit: Plaintext Corpus Reader
+#
+# Copyright (C) 2001-2019 NLTK Project
+# Author: Steven Bird <stevenbird1@gmail.com>
+#         Edward Loper <edloper@gmail.com>
+#         Nitin Madnani <nmadnani@umiacs.umd.edu>
+# URL: <http://nltk.org/>
+# For license information, see LICENSE.TXT
+
+"""
+A reader for corpora that consist of plaintext documents.
+"""
+
+import nltk.data
+from nltk.tokenize import *
+
+from nltk.corpus.reader.util import *
+from nltk.corpus.reader.api import *
+
+
+class PlaintextCorpusReader(CorpusReader):
+    """
+    Reader for corpora that consist of plaintext documents.  Paragraphs
+    are assumed to be split using blank lines.  Sentences and words can
+    be tokenized using the default tokenizers, or by custom tokenizers
+    specificed as parameters to the constructor.
+
+    This corpus reader can be customized (e.g., to skip preface
+    sections of specific document formats) by creating a subclass and
+    overriding the ``CorpusView`` class variable.
+    """
+
+    CorpusView = StreamBackedCorpusView
+    """The corpus view class used by this reader.  Subclasses of
+       ``PlaintextCorpusReader`` may specify alternative corpus view
+       classes (e.g., to skip the preface sections of documents.)"""
+
+    def __init__(
+        self,
+        root,
+        fileids,
+        word_tokenizer=WordPunctTokenizer(),
+        sent_tokenizer=nltk.data.LazyLoader('tokenizers/punkt/english.pickle'),
+        para_block_reader=read_blankline_block,
+        encoding='utf8',
+    ):
+        """
+        Construct a new plaintext corpus reader for a set of documents
+        located at the given root directory.  Example usage:
+
+            >>> root = '/usr/local/share/nltk_data/corpora/webtext/'
+            >>> reader = PlaintextCorpusReader(root, '.*\.txt') # doctest: +SKIP
+
+        :param root: The root directory for this corpus.
+        :param fileids: A list or regexp specifying the fileids in this corpus.
+        :param word_tokenizer: Tokenizer for breaking sentences or
+            paragraphs into words.
+        :param sent_tokenizer: Tokenizer for breaking paragraphs
+            into words.
+        :param para_block_reader: The block reader used to divide the
+            corpus into paragraph blocks.
+        """
+        CorpusReader.__init__(self, root, fileids, encoding)
+        self._word_tokenizer = word_tokenizer
+        self._sent_tokenizer = sent_tokenizer
+        self._para_block_reader = para_block_reader
+
+    def raw(self, fileids=None):
+        """
+        :return: the given file(s) as a single string.
+        :rtype: str
+        """
+        if fileids is None:
+            fileids = self._fileids
+        elif isinstance(fileids, string_types):
+            fileids = [fileids]
+        raw_texts = []
+        for f in fileids:
+            _fin = self.open(f)
+            raw_texts.append(_fin.read())
+            _fin.close()
+        return concat(raw_texts)
+
+    def words(self, fileids=None):
+        """
+        :return: the given file(s) as a list of words
+            and punctuation symbols.
+        :rtype: list(str)
+        """
+        return concat(
+            [
+                self.CorpusView(path, self._read_word_block, encoding=enc)
+                for (path, enc, fileid) in self.abspaths(fileids, True, True)
+            ]
+        )
+
+    def sents(self, fileids=None):
+        """
+        :return: the given file(s) as a list of
+            sentences or utterances, each encoded as a list of word
+            strings.
+        :rtype: list(list(str))
+        """
+        if self._sent_tokenizer is None:
+            raise ValueError('No sentence tokenizer for this corpus')
+
+        return concat(
+            [
+                self.CorpusView(path, self._read_sent_block, encoding=enc)
+                for (path, enc, fileid) in self.abspaths(fileids, True, True)
+            ]
+        )
+
+    def paras(self, fileids=None):
+        """
+        :return: the given file(s) as a list of
+            paragraphs, each encoded as a list of sentences, which are
+            in turn encoded as lists of word strings.
+        :rtype: list(list(list(str)))
+        """
+        if self._sent_tokenizer is None:
+            raise ValueError('No sentence tokenizer for this corpus')
+
+        return concat(
+            [
+                self.CorpusView(path, self._read_para_block, encoding=enc)
+                for (path, enc, fileid) in self.abspaths(fileids, True, True)
+            ]
+        )
+
+    def _read_word_block(self, stream):
+        words = []
+        for i in range(20):  # Read 20 lines at a time.
+            words.extend(self._word_tokenizer.tokenize(stream.readline()))
+        return words
+
+    def _read_sent_block(self, stream):
+        sents = []
+        for para in self._para_block_reader(stream):
+            sents.extend(
+                [
+                    self._word_tokenizer.tokenize(sent)
+                    for sent in self._sent_tokenizer.tokenize(para)
+                ]
+            )
+        return sents
+
+    def _read_para_block(self, stream):
+        paras = []
+        for para in self._para_block_reader(stream):
+            paras.append(
+                [
+                    self._word_tokenizer.tokenize(sent)
+                    for sent in self._sent_tokenizer.tokenize(para)
+                ]
+            )
+        return paras
+
+
+class CategorizedPlaintextCorpusReader(CategorizedCorpusReader, PlaintextCorpusReader):
+    """
+    A reader for plaintext corpora whose documents are divided into
+    categories based on their file identifiers.
+    """
+
+    def __init__(self, *args, **kwargs):
+        """
+        Initialize the corpus reader.  Categorization arguments
+        (``cat_pattern``, ``cat_map``, and ``cat_file``) are passed to
+        the ``CategorizedCorpusReader`` constructor.  The remaining arguments
+        are passed to the ``PlaintextCorpusReader`` constructor.
+        """
+        CategorizedCorpusReader.__init__(self, kwargs)
+        PlaintextCorpusReader.__init__(self, *args, **kwargs)
+
+    def _resolve(self, fileids, categories):
+        if fileids is not None and categories is not None:
+            raise ValueError('Specify fileids or categories, not both')
+        if categories is not None:
+            return self.fileids(categories)
+        else:
+            return fileids
+
+    def raw(self, fileids=None, categories=None):
+        return PlaintextCorpusReader.raw(self, self._resolve(fileids, categories))
+
+    def words(self, fileids=None, categories=None):
+        return PlaintextCorpusReader.words(self, self._resolve(fileids, categories))
+
+    def sents(self, fileids=None, categories=None):
+        return PlaintextCorpusReader.sents(self, self._resolve(fileids, categories))
+
+    def paras(self, fileids=None, categories=None):
+        return PlaintextCorpusReader.paras(self, self._resolve(fileids, categories))
+
+
+# FIXME: Is there a better way? How to not hardcode this?
+#       Possibly, add a language kwargs to CategorizedPlaintextCorpusReader to
+#       override the `sent_tokenizer`.
+class PortugueseCategorizedPlaintextCorpusReader(CategorizedPlaintextCorpusReader):
+    def __init__(self, *args, **kwargs):
+        CategorizedCorpusReader.__init__(self, kwargs)
+        kwargs['sent_tokenizer'] = nltk.data.LazyLoader(
+            'tokenizers/punkt/portuguese.pickle'
+        )
+        PlaintextCorpusReader.__init__(self, *args, **kwargs)
+
+
+class EuroparlCorpusReader(PlaintextCorpusReader):
+
+    """
+    Reader for Europarl corpora that consist of plaintext documents.
+    Documents are divided into chapters instead of paragraphs as
+    for regular plaintext documents. Chapters are separated using blank
+    lines. Everything is inherited from ``PlaintextCorpusReader`` except
+    that:
+      - Since the corpus is pre-processed and pre-tokenized, the
+        word tokenizer should just split the line at whitespaces.
+      - For the same reason, the sentence tokenizer should just
+        split the paragraph at line breaks.
+      - There is a new 'chapters()' method that returns chapters instead
+        instead of paragraphs.
+      - The 'paras()' method inherited from PlaintextCorpusReader is
+        made non-functional to remove any confusion between chapters
+        and paragraphs for Europarl.
+    """
+
+    def _read_word_block(self, stream):
+        words = []
+        for i in range(20):  # Read 20 lines at a time.
+            words.extend(stream.readline().split())
+        return words
+
+    def _read_sent_block(self, stream):
+        sents = []
+        for para in self._para_block_reader(stream):
+            sents.extend([sent.split() for sent in para.splitlines()])
+        return sents
+
+    def _read_para_block(self, stream):
+        paras = []
+        for para in self._para_block_reader(stream):
+            paras.append([sent.split() for sent in para.splitlines()])
+        return paras
+
+    def chapters(self, fileids=None):
+        """
+        :return: the given file(s) as a list of
+            chapters, each encoded as a list of sentences, which are
+            in turn encoded as lists of word strings.
+        :rtype: list(list(list(str)))
+        """
+        return concat(
+            [
+                self.CorpusView(fileid, self._read_para_block, encoding=enc)
+                for (fileid, enc) in self.abspaths(fileids, True)
+            ]
+        )
+
+    def paras(self, fileids=None):
+        raise NotImplementedError(
+            'The Europarl corpus reader does not support paragraphs. Please use chapters() instead.'
+        )
--- a/venv/lib/python3.7/site-packages/nltk/corpus/reader/ppattach.py
+++ b/venv/lib/python3.7/site-packages/nltk/corpus/reader/ppattach.py
@@ -0,0 +1,107 @@
+# Natural Language Toolkit: PP Attachment Corpus Reader
+#
+# Copyright (C) 2001-2019 NLTK Project
+# Author: Steven Bird <stevenbird1@gmail.com>
+#         Edward Loper <edloper@gmail.com>
+# URL: <http://nltk.org/>
+# For license information, see LICENSE.TXT
+
+"""
+Read lines from the Prepositional Phrase Attachment Corpus.
+
+The PP Attachment Corpus contains several files having the format:
+
+sentence_id verb noun1 preposition noun2 attachment
+
+For example:
+
+42960 gives authority to administration V
+46742 gives inventors of microchip N
+
+The PP attachment is to the verb phrase (V) or noun phrase (N), i.e.:
+
+(VP gives (NP authority) (PP to administration))
+(VP gives (NP inventors (PP of microchip)))
+
+The corpus contains the following files:
+
+training:   training set
+devset:     development test set, used for algorithm development.
+test:       test set, used to report results
+bitstrings: word classes derived from Mutual Information Clustering for the Wall Street Journal.
+
+Ratnaparkhi, Adwait (1994). A Maximum Entropy Model for Prepositional
+Phrase Attachment.  Proceedings of the ARPA Human Language Technology
+Conference.  [http://www.cis.upenn.edu/~adwait/papers/hlt94.ps]
+
+The PP Attachment Corpus is distributed with NLTK with the permission
+of the author.
+"""
+from __future__ import unicode_literals
+
+from six import string_types
+
+from nltk import compat
+from nltk.corpus.reader.util import *
+from nltk.corpus.reader.api import *
+
+
+@compat.python_2_unicode_compatible
+class PPAttachment(object):
+    def __init__(self, sent, verb, noun1, prep, noun2, attachment):
+        self.sent = sent
+        self.verb = verb
+        self.noun1 = noun1
+        self.prep = prep
+        self.noun2 = noun2
+        self.attachment = attachment
+
+    def __repr__(self):
+        return (
+            'PPAttachment(sent=%r, verb=%r, noun1=%r, prep=%r, '
+            'noun2=%r, attachment=%r)'
+            % (self.sent, self.verb, self.noun1, self.prep, self.noun2, self.attachment)
+        )
+
+
+class PPAttachmentCorpusReader(CorpusReader):
+    """
+    sentence_id verb noun1 preposition noun2 attachment
+    """
+
+    def attachments(self, fileids):
+        return concat(
+            [
+                StreamBackedCorpusView(fileid, self._read_obj_block, encoding=enc)
+                for (fileid, enc) in self.abspaths(fileids, True)
+            ]
+        )
+
+    def tuples(self, fileids):
+        return concat(
+            [
+                StreamBackedCorpusView(fileid, self._read_tuple_block, encoding=enc)
+                for (fileid, enc) in self.abspaths(fileids, True)
+            ]
+        )
+
+    def raw(self, fileids=None):
+        if fileids is None:
+            fileids = self._fileids
+        elif isinstance(fileids, string_types):
+            fileids = [fileids]
+        return concat([self.open(f).read() for f in fileids])
+
+    def _read_tuple_block(self, stream):
+        line = stream.readline()
+        if line:
+            return [tuple(line.split())]
+        else:
+            return []
+
+    def _read_obj_block(self, stream):
+        line = stream.readline()
+        if line:
+            return [PPAttachment(*line.split())]
+        else:
+            return []
--- a/venv/lib/python3.7/site-packages/nltk/corpus/reader/propbank.py
+++ b/venv/lib/python3.7/site-packages/nltk/corpus/reader/propbank.py
@@ -0,0 +1,539 @@
+# Natural Language Toolkit: PropBank Corpus Reader
+#
+# Copyright (C) 2001-2019 NLTK Project
+# Author: Edward Loper <edloper@gmail.com>
+# URL: <http://nltk.org/>
+# For license information, see LICENSE.TXT
+
+from __future__ import unicode_literals
+import re
+from functools import total_ordering
+from xml.etree import ElementTree
+
+from six import string_types
+
+from nltk.tree import Tree
+from nltk.internals import raise_unorderable_types
+
+from nltk.corpus.reader.util import *
+from nltk.corpus.reader.api import *
+
+
+class PropbankCorpusReader(CorpusReader):
+    """
+    Corpus reader for the propbank corpus, which augments the Penn
+    Treebank with information about the predicate argument structure
+    of every verb instance.  The corpus consists of two parts: the
+    predicate-argument annotations themselves, and a set of "frameset
+    files" which define the argument labels used by the annotations,
+    on a per-verb basis.  Each "frameset file" contains one or more
+    predicates, such as ``'turn'`` or ``'turn_on'``, each of which is
+    divided into coarse-grained word senses called "rolesets".  For
+    each "roleset", the frameset file provides descriptions of the
+    argument roles, along with examples.
+    """
+
+    def __init__(
+        self,
+        root,
+        propfile,
+        framefiles='',
+        verbsfile=None,
+        parse_fileid_xform=None,
+        parse_corpus=None,
+        encoding='utf8',
+    ):
+        """
+        :param root: The root directory for this corpus.
+        :param propfile: The name of the file containing the predicate-
+            argument annotations (relative to ``root``).
+        :param framefiles: A list or regexp specifying the frameset
+            fileids for this corpus.
+        :param parse_fileid_xform: A transform that should be applied
+            to the fileids in this corpus.  This should be a function
+            of one argument (a fileid) that returns a string (the new
+            fileid).
+        :param parse_corpus: The corpus containing the parse trees
+            corresponding to this corpus.  These parse trees are
+            necessary to resolve the tree pointers used by propbank.
+        """
+        # If framefiles is specified as a regexp, expand it.
+        if isinstance(framefiles, string_types):
+            framefiles = find_corpus_fileids(root, framefiles)
+        framefiles = list(framefiles)
+        # Initialze the corpus reader.
+        CorpusReader.__init__(self, root, [propfile, verbsfile] + framefiles, encoding)
+
+        # Record our frame fileids & prop file.
+        self._propfile = propfile
+        self._framefiles = framefiles
+        self._verbsfile = verbsfile
+        self._parse_fileid_xform = parse_fileid_xform
+        self._parse_corpus = parse_corpus
+
+    def raw(self, fileids=None):
+        """
+        :return: the text contents of the given fileids, as a single string.
+        """
+        if fileids is None:
+            fileids = self._fileids
+        elif isinstance(fileids):
+            fileids = [fileids]
+        return concat([self.open(f).read() for f in fileids])
+
+    def instances(self, baseform=None):
+        """
+        :return: a corpus view that acts as a list of
+        ``PropBankInstance`` objects, one for each noun in the corpus.
+        """
+        kwargs = {}
+        if baseform is not None:
+            kwargs['instance_filter'] = lambda inst: inst.baseform == baseform
+        return StreamBackedCorpusView(
+            self.abspath(self._propfile),
+            lambda stream: self._read_instance_block(stream, **kwargs),
+            encoding=self.encoding(self._propfile),
+        )
+
+    def lines(self):
+        """
+        :return: a corpus view that acts as a list of strings, one for
+        each line in the predicate-argument annotation file.
+        """
+        return StreamBackedCorpusView(
+            self.abspath(self._propfile),
+            read_line_block,
+            encoding=self.encoding(self._propfile),
+        )
+
+    def roleset(self, roleset_id):
+        """
+        :return: the xml description for the given roleset.
+        """
+        baseform = roleset_id.split('.')[0]
+        framefile = 'frames/%s.xml' % baseform
+        if framefile not in self._framefiles:
+            raise ValueError('Frameset file for %s not found' % roleset_id)
+
+        # n.b.: The encoding for XML fileids is specified by the file
+        # itself; so we ignore self._encoding here.
+        etree = ElementTree.parse(self.abspath(framefile).open()).getroot()
+        for roleset in etree.findall('predicate/roleset'):
+            if roleset.attrib['id'] == roleset_id:
+                return roleset
+        raise ValueError('Roleset %s not found in %s' % (roleset_id, framefile))
+
+    def rolesets(self, baseform=None):
+        """
+        :return: list of xml descriptions for rolesets.
+        """
+        if baseform is not None:
+            framefile = 'frames/%s.xml' % baseform
+            if framefile not in self._framefiles:
+                raise ValueError('Frameset file for %s not found' % baseform)
+            framefiles = [framefile]
+        else:
+            framefiles = self._framefiles
+
+        rsets = []
+        for framefile in framefiles:
+            # n.b.: The encoding for XML fileids is specified by the file
+            # itself; so we ignore self._encoding here.
+            etree = ElementTree.parse(self.abspath(framefile).open()).getroot()
+            rsets.append(etree.findall('predicate/roleset'))
+        return LazyConcatenation(rsets)
+
+    def verbs(self):
+        """
+        :return: a corpus view that acts as a list of all verb lemmas
+        in this corpus (from the verbs.txt file).
+        """
+        return StreamBackedCorpusView(
+            self.abspath(self._verbsfile),
+            read_line_block,
+            encoding=self.encoding(self._verbsfile),
+        )
+
+    def _read_instance_block(self, stream, instance_filter=lambda inst: True):
+        block = []
+
+        # Read 100 at a time.
+        for i in range(100):
+            line = stream.readline().strip()
+            if line:
+                inst = PropbankInstance.parse(
+                    line, self._parse_fileid_xform, self._parse_corpus
+                )
+                if instance_filter(inst):
+                    block.append(inst)
+
+        return block
+
+
+######################################################################
+# { Propbank Instance & related datatypes
+######################################################################
+
+
+@compat.python_2_unicode_compatible
+class PropbankInstance(object):
+    def __init__(
+        self,
+        fileid,
+        sentnum,
+        wordnum,
+        tagger,
+        roleset,
+        inflection,
+        predicate,
+        arguments,
+        parse_corpus=None,
+    ):
+
+        self.fileid = fileid
+        """The name of the file containing the parse tree for this
+        instance's sentence."""
+
+        self.sentnum = sentnum
+        """The sentence number of this sentence within ``fileid``.
+        Indexing starts from zero."""
+
+        self.wordnum = wordnum
+        """The word number of this instance's predicate within its
+        containing sentence.  Word numbers are indexed starting from
+        zero, and include traces and other empty parse elements."""
+
+        self.tagger = tagger
+        """An identifier for the tagger who tagged this instance; or
+        ``'gold'`` if this is an adjuticated instance."""
+
+        self.roleset = roleset
+        """The name of the roleset used by this instance's predicate.
+        Use ``propbank.roleset() <PropbankCorpusReader.roleset>`` to
+        look up information about the roleset."""
+
+        self.inflection = inflection
+        """A ``PropbankInflection`` object describing the inflection of
+        this instance's predicate."""
+
+        self.predicate = predicate
+        """A ``PropbankTreePointer`` indicating the position of this
+        instance's predicate within its containing sentence."""
+
+        self.arguments = tuple(arguments)
+        """A list of tuples (argloc, argid), specifying the location
+        and identifier for each of the predicate's argument in the
+        containing sentence.  Argument identifiers are strings such as
+        ``'ARG0'`` or ``'ARGM-TMP'``.  This list does *not* contain
+        the predicate."""
+
+        self.parse_corpus = parse_corpus
+        """A corpus reader for the parse trees corresponding to the
+        instances in this propbank corpus."""
+
+    @property
+    def baseform(self):
+        """The baseform of the predicate."""
+        return self.roleset.split('.')[0]
+
+    @property
+    def sensenumber(self):
+        """The sense number of the predicate."""
+        return self.roleset.split('.')[1]
+
+    @property
+    def predid(self):
+        """Identifier of the predicate."""
+        return 'rel'
+
+    def __repr__(self):
+        return '<PropbankInstance: %s, sent %s, word %s>' % (
+            self.fileid,
+            self.sentnum,
+            self.wordnum,
+        )
+
+    def __str__(self):
+        s = '%s %s %s %s %s %s' % (
+            self.fileid,
+            self.sentnum,
+            self.wordnum,
+            self.tagger,
+            self.roleset,
+            self.inflection,
+        )
+        items = self.arguments + ((self.predicate, 'rel'),)
+        for (argloc, argid) in sorted(items):
+            s += ' %s-%s' % (argloc, argid)
+        return s
+
+    def _get_tree(self):
+        if self.parse_corpus is None:
+            return None
+        if self.fileid not in self.parse_corpus.fileids():
+            return None
+        return self.parse_corpus.parsed_sents(self.fileid)[self.sentnum]
+
+    tree = property(
+        _get_tree,
+        doc="""
+        The parse tree corresponding to this instance, or None if
+        the corresponding tree is not available.""",
+    )
+
+    @staticmethod
+    def parse(s, parse_fileid_xform=None, parse_corpus=None):
+        pieces = s.split()
+        if len(pieces) < 7:
+            raise ValueError('Badly formatted propbank line: %r' % s)
+
+        # Divide the line into its basic pieces.
+        (fileid, sentnum, wordnum, tagger, roleset, inflection) = pieces[:6]
+        rel = [p for p in pieces[6:] if p.endswith('-rel')]
+        args = [p for p in pieces[6:] if not p.endswith('-rel')]
+        if len(rel) != 1:
+            raise ValueError('Badly formatted propbank line: %r' % s)
+
+        # Apply the fileid selector, if any.
+        if parse_fileid_xform is not None:
+            fileid = parse_fileid_xform(fileid)
+
+        # Convert sentence & word numbers to ints.
+        sentnum = int(sentnum)
+        wordnum = int(wordnum)
+
+        # Parse the inflection
+        inflection = PropbankInflection.parse(inflection)
+
+        # Parse the predicate location.
+        predicate = PropbankTreePointer.parse(rel[0][:-4])
+
+        # Parse the arguments.
+        arguments = []
+        for arg in args:
+            argloc, argid = arg.split('-', 1)
+            arguments.append((PropbankTreePointer.parse(argloc), argid))
+
+        # Put it all together.
+        return PropbankInstance(
+            fileid,
+            sentnum,
+            wordnum,
+            tagger,
+            roleset,
+            inflection,
+            predicate,
+            arguments,
+            parse_corpus,
+        )
+
+
+class PropbankPointer(object):
+    """
+    A pointer used by propbank to identify one or more constituents in
+    a parse tree.  ``PropbankPointer`` is an abstract base class with
+    three concrete subclasses:
+
+      - ``PropbankTreePointer`` is used to point to single constituents.
+      - ``PropbankSplitTreePointer`` is used to point to 'split'
+        constituents, which consist of a sequence of two or more
+        ``PropbankTreePointer`` pointers.
+      - ``PropbankChainTreePointer`` is used to point to entire trace
+        chains in a tree.  It consists of a sequence of pieces, which
+        can be ``PropbankTreePointer`` or ``PropbankSplitTreePointer`` pointers.
+    """
+
+    def __init__(self):
+        if self.__class__ == PropbankPointer:
+            raise NotImplementedError()
+
+
+@compat.python_2_unicode_compatible
+class PropbankChainTreePointer(PropbankPointer):
+    def __init__(self, pieces):
+        self.pieces = pieces
+        """A list of the pieces that make up this chain.  Elements may
+           be either ``PropbankSplitTreePointer`` or
+           ``PropbankTreePointer`` pointers."""
+
+    def __str__(self):
+        return '*'.join('%s' % p for p in self.pieces)
+
+    def __repr__(self):
+        return '<PropbankChainTreePointer: %s>' % self
+
+    def select(self, tree):
+        if tree is None:
+            raise ValueError('Parse tree not avaialable')
+        return Tree('*CHAIN*', [p.select(tree) for p in self.pieces])
+
+
+@compat.python_2_unicode_compatible
+class PropbankSplitTreePointer(PropbankPointer):
+    def __init__(self, pieces):
+        self.pieces = pieces
+        """A list of the pieces that make up this chain.  Elements are
+           all ``PropbankTreePointer`` pointers."""
+
+    def __str__(self):
+        return ','.join('%s' % p for p in self.pieces)
+
+    def __repr__(self):
+        return '<PropbankSplitTreePointer: %s>' % self
+
+    def select(self, tree):
+        if tree is None:
+            raise ValueError('Parse tree not avaialable')
+        return Tree('*SPLIT*', [p.select(tree) for p in self.pieces])
+
+
+@total_ordering
+@compat.python_2_unicode_compatible
+class PropbankTreePointer(PropbankPointer):
+    """
+    wordnum:height*wordnum:height*...
+    wordnum:height,
+
+    """
+
+    def __init__(self, wordnum, height):
+        self.wordnum = wordnum
+        self.height = height
+
+    @staticmethod
+    def parse(s):
+        # Deal with chains (xx*yy*zz)
+        pieces = s.split('*')
+        if len(pieces) > 1:
+            return PropbankChainTreePointer(
+                [PropbankTreePointer.parse(elt) for elt in pieces]
+            )
+
+        # Deal with split args (xx,yy,zz)
+        pieces = s.split(',')
+        if len(pieces) > 1:
+            return PropbankSplitTreePointer(
+                [PropbankTreePointer.parse(elt) for elt in pieces]
+            )
+
+        # Deal with normal pointers.
+        pieces = s.split(':')
+        if len(pieces) != 2:
+            raise ValueError('bad propbank pointer %r' % s)
+        return PropbankTreePointer(int(pieces[0]), int(pieces[1]))
+
+    def __str__(self):
+        return '%s:%s' % (self.wordnum, self.height)
+
+    def __repr__(self):
+        return 'PropbankTreePointer(%d, %d)' % (self.wordnum, self.height)
+
+    def __eq__(self, other):
+        while isinstance(other, (PropbankChainTreePointer, PropbankSplitTreePointer)):
+            other = other.pieces[0]
+
+        if not isinstance(other, PropbankTreePointer):
+            return self is other
+
+        return self.wordnum == other.wordnum and self.height == other.height
+
+    def __ne__(self, other):
+        return not self == other
+
+    def __lt__(self, other):
+        while isinstance(other, (PropbankChainTreePointer, PropbankSplitTreePointer)):
+            other = other.pieces[0]
+
+        if not isinstance(other, PropbankTreePointer):
+            return id(self) < id(other)
+
+        return (self.wordnum, -self.height) < (other.wordnum, -other.height)
+
+    def select(self, tree):
+        if tree is None:
+            raise ValueError('Parse tree not avaialable')
+        return tree[self.treepos(tree)]
+
+    def treepos(self, tree):
+        """
+        Convert this pointer to a standard 'tree position' pointer,
+        given that it points to the given tree.
+        """
+        if tree is None:
+            raise ValueError('Parse tree not avaialable')
+        stack = [tree]
+        treepos = []
+
+        wordnum = 0
+        while True:
+            # print treepos
+            # print stack[-1]
+            # tree node:
+            if isinstance(stack[-1], Tree):
+                # Select the next child.
+                if len(treepos) < len(stack):
+                    treepos.append(0)
+                else:
+                    treepos[-1] += 1
+                # Update the stack.
+                if treepos[-1] < len(stack[-1]):
+                    stack.append(stack[-1][treepos[-1]])
+                else:
+                    # End of node's child list: pop up a level.
+                    stack.pop()
+                    treepos.pop()
+            # word node:
+            else:
+                if wordnum == self.wordnum:
+                    return tuple(treepos[: len(treepos) - self.height - 1])
+                else:
+                    wordnum += 1
+                    stack.pop()
+
+
+@compat.python_2_unicode_compatible
+class PropbankInflection(object):
+    # { Inflection Form
+    INFINITIVE = 'i'
+    GERUND = 'g'
+    PARTICIPLE = 'p'
+    FINITE = 'v'
+    # { Inflection Tense
+    FUTURE = 'f'
+    PAST = 'p'
+    PRESENT = 'n'
+    # { Inflection Aspect
+    PERFECT = 'p'
+    PROGRESSIVE = 'o'
+    PERFECT_AND_PROGRESSIVE = 'b'
+    # { Inflection Person
+    THIRD_PERSON = '3'
+    # { Inflection Voice
+    ACTIVE = 'a'
+    PASSIVE = 'p'
+    # { Inflection
+    NONE = '-'
+    # }
+
+    def __init__(self, form='-', tense='-', aspect='-', person='-', voice='-'):
+        self.form = form
+        self.tense = tense
+        self.aspect = aspect
+        self.person = person
+        self.voice = voice
+
+    def __str__(self):
+        return self.form + self.tense + self.aspect + self.person + self.voice
+
+    def __repr__(self):
+        return '<PropbankInflection: %s>' % self
+
+    _VALIDATE = re.compile(r'[igpv\-][fpn\-][pob\-][3\-][ap\-]$')
+
+    @staticmethod
+    def parse(s):
+        if not isinstance(s, string_types):
+            raise TypeError('expected a string')
+        if len(s) != 5 or not PropbankInflection._VALIDATE.match(s):
+            raise ValueError('Bad propbank inflection string %r' % s)
+        return PropbankInflection(*s)
--- a/venv/lib/python3.7/site-packages/nltk/corpus/reader/pros_cons.py
+++ b/venv/lib/python3.7/site-packages/nltk/corpus/reader/pros_cons.py
@@ -0,0 +1,143 @@
+# Natural Language Toolkit: Pros and Cons Corpus Reader
+#
+# Copyright (C) 2001-2019 NLTK Project
+# Author: Pierpaolo Pantone <24alsecondo@gmail.com>
+# URL: <http://nltk.org/>
+# For license information, see LICENSE.TXT
+
+"""
+CorpusReader for the Pros and Cons dataset.
+
+- Pros and Cons dataset information -
+
+Contact: Bing Liu, liub@cs.uic.edu
+        http://www.cs.uic.edu/~liub
+
+Distributed with permission.
+
+Related papers:
+
+- Murthy Ganapathibhotla and Bing Liu. "Mining Opinions in Comparative Sentences".
+    Proceedings of the 22nd International Conference on Computational Linguistics
+    (Coling-2008), Manchester, 18-22 August, 2008.
+
+- Bing Liu, Minqing Hu and Junsheng Cheng. "Opinion Observer: Analyzing and Comparing
+    Opinions on the Web". Proceedings of the 14th international World Wide Web
+    conference (WWW-2005), May 10-14, 2005, in Chiba, Japan.
+"""
+import re
+
+from six import string_types
+
+from nltk.corpus.reader.api import *
+from nltk.tokenize import *
+
+
+class ProsConsCorpusReader(CategorizedCorpusReader, CorpusReader):
+    """
+    Reader for the Pros and Cons sentence dataset.
+
+        >>> from nltk.corpus import pros_cons
+        >>> pros_cons.sents(categories='Cons')
+        [['East', 'batteries', '!', 'On', '-', 'off', 'switch', 'too', 'easy',
+        'to', 'maneuver', '.'], ['Eats', '...', 'no', ',', 'GULPS', 'batteries'],
+        ...]
+        >>> pros_cons.words('IntegratedPros.txt')
+        ['Easy', 'to', 'use', ',', 'economical', '!', ...]
+    """
+
+    CorpusView = StreamBackedCorpusView
+
+    def __init__(
+        self,
+        root,
+        fileids,
+        word_tokenizer=WordPunctTokenizer(),
+        encoding='utf8',
+        **kwargs
+    ):
+        """
+        :param root: The root directory for the corpus.
+        :param fileids: a list or regexp specifying the fileids in the corpus.
+        :param word_tokenizer: a tokenizer for breaking sentences or paragraphs
+            into words. Default: `WhitespaceTokenizer`
+        :param encoding: the encoding that should be used to read the corpus.
+        :param kwargs: additional parameters passed to CategorizedCorpusReader.
+        """
+
+        CorpusReader.__init__(self, root, fileids, encoding)
+        CategorizedCorpusReader.__init__(self, kwargs)
+        self._word_tokenizer = word_tokenizer
+
+    def sents(self, fileids=None, categories=None):
+        """
+        Return all sentences in the corpus or in the specified files/categories.
+
+        :param fileids: a list or regexp specifying the ids of the files whose
+            sentences have to be returned.
+        :param categories: a list specifying the categories whose sentences
+            have to be returned.
+        :return: the given file(s) as a list of sentences. Each sentence is
+            tokenized using the specified word_tokenizer.
+        :rtype: list(list(str))
+        """
+        fileids = self._resolve(fileids, categories)
+        if fileids is None:
+            fileids = self._fileids
+        elif isinstance(fileids, string_types):
+            fileids = [fileids]
+        return concat(
+            [
+                self.CorpusView(path, self._read_sent_block, encoding=enc)
+                for (path, enc, fileid) in self.abspaths(fileids, True, True)
+            ]
+        )
+
+    def words(self, fileids=None, categories=None):
+        """
+        Return all words and punctuation symbols in the corpus or in the specified
+        files/categories.
+
+        :param fileids: a list or regexp specifying the ids of the files whose
+            words have to be returned.
+        :param categories: a list specifying the categories whose words have
+            to be returned.
+        :return: the given file(s) as a list of words and punctuation symbols.
+        :rtype: list(str)
+        """
+        fileids = self._resolve(fileids, categories)
+        if fileids is None:
+            fileids = self._fileids
+        elif isinstance(fileids, string_types):
+            fileids = [fileids]
+        return concat(
+            [
+                self.CorpusView(path, self._read_word_block, encoding=enc)
+                for (path, enc, fileid) in self.abspaths(fileids, True, True)
+            ]
+        )
+
+    def _read_sent_block(self, stream):
+        sents = []
+        for i in range(20):  # Read 20 lines at a time.
+            line = stream.readline()
+            if not line:
+                continue
+            sent = re.match(r"^(?!\n)\s*<(Pros|Cons)>(.*)</(?:Pros|Cons)>", line)
+            if sent:
+                sents.append(self._word_tokenizer.tokenize(sent.group(2).strip()))
+        return sents
+
+    def _read_word_block(self, stream):
+        words = []
+        for sent in self._read_sent_block(stream):
+            words.extend(sent)
+        return words
+
+    def _resolve(self, fileids, categories):
+        if fileids is not None and categories is not None:
+            raise ValueError('Specify fileids or categories, not both')
+        if categories is not None:
+            return self.fileids(categories)
+        else:
+            return fileids
--- a/venv/lib/python3.7/site-packages/nltk/corpus/reader/reviews.py
+++ b/venv/lib/python3.7/site-packages/nltk/corpus/reader/reviews.py
@@ -0,0 +1,355 @@
+# Natural Language Toolkit: Product Reviews Corpus Reader
+#
+# Copyright (C) 2001-2019 NLTK Project
+# Author: Pierpaolo Pantone <24alsecondo@gmail.com>
+# URL: <http://nltk.org/>
+# For license information, see LICENSE.TXT
+
+"""
+CorpusReader for reviews corpora (syntax based on Customer Review Corpus).
+
+- Customer Review Corpus information -
+Annotated by: Minqing Hu and Bing Liu, 2004.
+    Department of Computer Sicence
+    University of Illinois at Chicago
+
+Contact: Bing Liu, liub@cs.uic.edu
+        http://www.cs.uic.edu/~liub
+
+Distributed with permission.
+
+The "product_reviews_1" and "product_reviews_2" datasets respectively contain
+annotated customer reviews of 5 and 9 products from amazon.com.
+
+Related papers:
+
+- Minqing Hu and Bing Liu. "Mining and summarizing customer reviews".
+    Proceedings of the ACM SIGKDD International Conference on Knowledge
+    Discovery & Data Mining (KDD-04), 2004.
+
+- Minqing Hu and Bing Liu. "Mining Opinion Features in Customer Reviews".
+    Proceedings of Nineteeth National Conference on Artificial Intelligence
+    (AAAI-2004), 2004.
+
+- Xiaowen Ding, Bing Liu and Philip S. Yu. "A Holistic Lexicon-Based Appraoch to
+    Opinion Mining." Proceedings of First ACM International Conference on Web
+    Search and Data Mining (WSDM-2008), Feb 11-12, 2008, Stanford University,
+    Stanford, California, USA.
+
+Symbols used in the annotated reviews:
+
+    [t] : the title of the review: Each [t] tag starts a review.
+    xxxx[+|-n]: xxxx is a product feature.
+    [+n]: Positive opinion, n is the opinion strength: 3 strongest, and 1 weakest.
+          Note that the strength is quite subjective.
+          You may want ignore it, but only considering + and -
+    [-n]: Negative opinion
+    ##  : start of each sentence. Each line is a sentence.
+    [u] : feature not appeared in the sentence.
+    [p] : feature not appeared in the sentence. Pronoun resolution is needed.
+    [s] : suggestion or recommendation.
+    [cc]: comparison with a competing product from a different brand.
+    [cs]: comparison with a competing product from the same brand.
+
+Note: Some of the files (e.g. "ipod.txt", "Canon PowerShot SD500.txt") do not
+    provide separation between different reviews. This is due to the fact that
+    the dataset was specifically designed for aspect/feature-based sentiment
+    analysis, for which sentence-level annotation is sufficient. For document-
+    level classification and analysis, this peculiarity should be taken into
+    consideration.
+"""
+
+from __future__ import division
+
+import re
+
+from six import string_types
+
+from nltk.corpus.reader.api import *
+from nltk.tokenize import *
+
+TITLE = re.compile(r'^\[t\](.*)$')  # [t] Title
+FEATURES = re.compile(
+    r'((?:(?:\w+\s)+)?\w+)\[((?:\+|\-)\d)\]'
+)  # find 'feature' in feature[+3]
+NOTES = re.compile(r'\[(?!t)(p|u|s|cc|cs)\]')  # find 'p' in camera[+2][p]
+SENT = re.compile(r'##(.*)$')  # find tokenized sentence
+
+
+@compat.python_2_unicode_compatible
+class Review(object):
+    """
+    A Review is the main block of a ReviewsCorpusReader.
+    """
+
+    def __init__(self, title=None, review_lines=None):
+        """
+        :param title: the title of the review.
+        :param review_lines: the list of the ReviewLines that belong to the Review.
+        """
+        self.title = title
+        if review_lines is None:
+            self.review_lines = []
+        else:
+            self.review_lines = review_lines
+
+    def add_line(self, review_line):
+        """
+        Add a line (ReviewLine) to the review.
+
+        :param review_line: a ReviewLine instance that belongs to the Review.
+        """
+        assert isinstance(review_line, ReviewLine)
+        self.review_lines.append(review_line)
+
+    def features(self):
+        """
+        Return a list of features in the review. Each feature is a tuple made of
+        the specific item feature and the opinion strength about that feature.
+
+        :return: all features of the review as a list of tuples (feat, score).
+        :rtype: list(tuple)
+        """
+        features = []
+        for review_line in self.review_lines:
+            features.extend(review_line.features)
+        return features
+
+    def sents(self):
+        """
+        Return all tokenized sentences in the review.
+
+        :return: all sentences of the review as lists of tokens.
+        :rtype: list(list(str))
+        """
+        return [review_line.sent for review_line in self.review_lines]
+
+    def __repr__(self):
+        return 'Review(title=\"{}\", review_lines={})'.format(
+            self.title, self.review_lines
+        )
+
+
+@compat.python_2_unicode_compatible
+class ReviewLine(object):
+    """
+    A ReviewLine represents a sentence of the review, together with (optional)
+    annotations of its features and notes about the reviewed item.
+    """
+
+    def __init__(self, sent, features=None, notes=None):
+        self.sent = sent
+        if features is None:
+            self.features = []
+        else:
+            self.features = features
+
+        if notes is None:
+            self.notes = []
+        else:
+            self.notes = notes
+
+    def __repr__(self):
+        return 'ReviewLine(features={}, notes={}, sent={})'.format(
+            self.features, self.notes, self.sent
+        )
+
+
+class ReviewsCorpusReader(CorpusReader):
+    """
+    Reader for the Customer Review Data dataset by Hu, Liu (2004).
+    Note: we are not applying any sentence tokenization at the moment, just word
+    tokenization.
+
+        >>> from nltk.corpus import product_reviews_1
+        >>> camera_reviews = product_reviews_1.reviews('Canon_G3.txt')
+        >>> review = camera_reviews[0]
+        >>> review.sents()[0]
+        ['i', 'recently', 'purchased', 'the', 'canon', 'powershot', 'g3', 'and', 'am',
+        'extremely', 'satisfied', 'with', 'the', 'purchase', '.']
+        >>> review.features()
+        [('canon powershot g3', '+3'), ('use', '+2'), ('picture', '+2'),
+        ('picture quality', '+1'), ('picture quality', '+1'), ('camera', '+2'),
+        ('use', '+2'), ('feature', '+1'), ('picture quality', '+3'), ('use', '+1'),
+        ('option', '+1')]
+
+    We can also reach the same information directly from the stream:
+
+        >>> product_reviews_1.features('Canon_G3.txt')
+        [('canon powershot g3', '+3'), ('use', '+2'), ...]
+
+    We can compute stats for specific product features:
+
+        >>> from __future__ import division
+        >>> n_reviews = len([(feat,score) for (feat,score) in product_reviews_1.features('Canon_G3.txt') if feat=='picture'])
+        >>> tot = sum([int(score) for (feat,score) in product_reviews_1.features('Canon_G3.txt') if feat=='picture'])
+        >>> # We use float for backward compatibility with division in Python2.7
+        >>> mean = tot / n_reviews
+        >>> print(n_reviews, tot, mean)
+        15 24 1.6
+    """
+
+    CorpusView = StreamBackedCorpusView
+
+    def __init__(
+        self, root, fileids, word_tokenizer=WordPunctTokenizer(), encoding='utf8'
+    ):
+        """
+        :param root: The root directory for the corpus.
+        :param fileids: a list or regexp specifying the fileids in the corpus.
+        :param word_tokenizer: a tokenizer for breaking sentences or paragraphs
+            into words. Default: `WordPunctTokenizer`
+        :param encoding: the encoding that should be used to read the corpus.
+        """
+
+        CorpusReader.__init__(self, root, fileids, encoding)
+        self._word_tokenizer = word_tokenizer
+
+    def features(self, fileids=None):
+        """
+        Return a list of features. Each feature is a tuple made of the specific
+        item feature and the opinion strength about that feature.
+
+        :param fileids: a list or regexp specifying the ids of the files whose
+            features have to be returned.
+        :return: all features for the item(s) in the given file(s).
+        :rtype: list(tuple)
+        """
+        if fileids is None:
+            fileids = self._fileids
+        elif isinstance(fileids, string_types):
+            fileids = [fileids]
+        return concat(
+            [
+                self.CorpusView(fileid, self._read_features, encoding=enc)
+                for (fileid, enc) in self.abspaths(fileids, True)
+            ]
+        )
+
+    def raw(self, fileids=None):
+        """
+        :param fileids: a list or regexp specifying the fileids of the files that
+            have to be returned as a raw string.
+        :return: the given file(s) as a single string.
+        :rtype: str
+        """
+        if fileids is None:
+            fileids = self._fileids
+        elif isinstance(fileids, string_types):
+            fileids = [fileids]
+        return concat([self.open(f).read() for f in fileids])
+
+    def readme(self):
+        """
+        Return the contents of the corpus README.txt file.
+        """
+        return self.open("README.txt").read()
+
+    def reviews(self, fileids=None):
+        """
+        Return all the reviews as a list of Review objects. If `fileids` is
+        specified, return all the reviews from each of the specified files.
+
+        :param fileids: a list or regexp specifying the ids of the files whose
+            reviews have to be returned.
+        :return: the given file(s) as a list of reviews.
+        """
+        if fileids is None:
+            fileids = self._fileids
+        return concat(
+            [
+                self.CorpusView(fileid, self._read_review_block, encoding=enc)
+                for (fileid, enc) in self.abspaths(fileids, True)
+            ]
+        )
+
+    def sents(self, fileids=None):
+        """
+        Return all sentences in the corpus or in the specified files.
+
+        :param fileids: a list or regexp specifying the ids of the files whose
+            sentences have to be returned.
+        :return: the given file(s) as a list of sentences, each encoded as a
+            list of word strings.
+        :rtype: list(list(str))
+        """
+        return concat(
+            [
+                self.CorpusView(path, self._read_sent_block, encoding=enc)
+                for (path, enc, fileid) in self.abspaths(fileids, True, True)
+            ]
+        )
+
+    def words(self, fileids=None):
+        """
+        Return all words and punctuation symbols in the corpus or in the specified
+        files.
+
+        :param fileids: a list or regexp specifying the ids of the files whose
+            words have to be returned.
+        :return: the given file(s) as a list of words and punctuation symbols.
+        :rtype: list(str)
+        """
+        return concat(
+            [
+                self.CorpusView(path, self._read_word_block, encoding=enc)
+                for (path, enc, fileid) in self.abspaths(fileids, True, True)
+            ]
+        )
+
+    def _read_features(self, stream):
+        features = []
+        for i in range(20):
+            line = stream.readline()
+            if not line:
+                return features
+            features.extend(re.findall(FEATURES, line))
+        return features
+
+    def _read_review_block(self, stream):
+        while True:
+            line = stream.readline()
+            if not line:
+                return []  # end of file.
+            title_match = re.match(TITLE, line)
+            if title_match:
+                review = Review(
+                    title=title_match.group(1).strip()
+                )  # We create a new review
+                break
+
+        # Scan until we find another line matching the regexp, or EOF.
+        while True:
+            oldpos = stream.tell()
+            line = stream.readline()
+            # End of file:
+            if not line:
+                return [review]
+            # Start of a new review: backup to just before it starts, and
+            # return the review we've already collected.
+            if re.match(TITLE, line):
+                stream.seek(oldpos)
+                return [review]
+            # Anything else is part of the review line.
+            feats = re.findall(FEATURES, line)
+            notes = re.findall(NOTES, line)
+            sent = re.findall(SENT, line)
+            if sent:
+                sent = self._word_tokenizer.tokenize(sent[0])
+            review_line = ReviewLine(sent=sent, features=feats, notes=notes)
+            review.add_line(review_line)
+
+    def _read_sent_block(self, stream):
+        sents = []
+        for review in self._read_review_block(stream):
+            sents.extend([sent for sent in review.sents()])
+        return sents
+
+    def _read_word_block(self, stream):
+        words = []
+        for i in range(20):  # Read 20 lines at a time.
+            line = stream.readline()
+            sent = re.findall(SENT, line)
+            if sent:
+                words.extend(self._word_tokenizer.tokenize(sent[0]))
+        return words
--- a/venv/lib/python3.7/site-packages/nltk/corpus/reader/rte.py
+++ b/venv/lib/python3.7/site-packages/nltk/corpus/reader/rte.py
@@ -0,0 +1,151 @@
+# Natural Language Toolkit: RTE Corpus Reader
+#
+# Copyright (C) 2001-2019 NLTK Project
+# Author:  Ewan Klein <ewan@inf.ed.ac.uk>
+# URL: <http://nltk.org/>
+# For license information, see LICENSE.TXT
+
+"""
+Corpus reader for the Recognizing Textual Entailment (RTE) Challenge Corpora.
+
+The files were taken from the RTE1, RTE2 and RTE3 datasets and the files
+were regularized.
+
+Filenames are of the form rte*_dev.xml and rte*_test.xml. The latter are the
+gold standard annotated files.
+
+Each entailment corpus is a list of 'text'/'hypothesis' pairs. The following
+example is taken from RTE3::
+
+ <pair id="1" entailment="YES" task="IE" length="short" >
+
+    <t>The sale was made to pay Yukos' US$ 27.5 billion tax bill,
+    Yuganskneftegaz was originally sold for US$ 9.4 billion to a little known
+    company Baikalfinansgroup which was later bought by the Russian
+    state-owned oil company Rosneft .</t>
+
+   <h>Baikalfinansgroup was sold to Rosneft.</h>
+ </pair>
+
+In order to provide globally unique IDs for each pair, a new attribute
+``challenge`` has been added to the root element ``entailment-corpus`` of each
+file, taking values 1, 2 or 3. The GID is formatted 'm-n', where 'm' is the
+challenge number and 'n' is the pair ID.
+"""
+from __future__ import unicode_literals
+
+from six import string_types
+
+from nltk import compat
+from nltk.corpus.reader.util import *
+from nltk.corpus.reader.api import *
+from nltk.corpus.reader.xmldocs import *
+
+
+def norm(value_string):
+    """
+    Normalize the string value in an RTE pair's ``value`` or ``entailment``
+    attribute as an integer (1, 0).
+
+    :param value_string: the label used to classify a text/hypothesis pair
+    :type value_string: str
+    :rtype: int
+    """
+
+    valdict = {"TRUE": 1, "FALSE": 0, "YES": 1, "NO": 0}
+    return valdict[value_string.upper()]
+
+
+@compat.python_2_unicode_compatible
+class RTEPair(object):
+    """
+    Container for RTE text-hypothesis pairs.
+
+    The entailment relation is signalled by the ``value`` attribute in RTE1, and by
+    ``entailment`` in RTE2 and RTE3. These both get mapped on to the ``entailment``
+    attribute of this class.
+    """
+
+    def __init__(
+        self,
+        pair,
+        challenge=None,
+        id=None,
+        text=None,
+        hyp=None,
+        value=None,
+        task=None,
+        length=None,
+    ):
+        """
+        :param challenge: version of the RTE challenge (i.e., RTE1, RTE2 or RTE3)
+        :param id: identifier for the pair
+        :param text: the text component of the pair
+        :param hyp: the hypothesis component of the pair
+        :param value: classification label for the pair
+        :param task: attribute for the particular NLP task that the data was drawn from
+        :param length: attribute for the length of the text of the pair
+        """
+        self.challenge = challenge
+        self.id = pair.attrib["id"]
+        self.gid = "%s-%s" % (self.challenge, self.id)
+        self.text = pair[0].text
+        self.hyp = pair[1].text
+
+        if "value" in pair.attrib:
+            self.value = norm(pair.attrib["value"])
+        elif "entailment" in pair.attrib:
+            self.value = norm(pair.attrib["entailment"])
+        else:
+            self.value = value
+        if "task" in pair.attrib:
+            self.task = pair.attrib["task"]
+        else:
+            self.task = task
+        if "length" in pair.attrib:
+            self.length = pair.attrib["length"]
+        else:
+            self.length = length
+
+    def __repr__(self):
+        if self.challenge:
+            return '<RTEPair: gid=%s-%s>' % (self.challenge, self.id)
+        else:
+            return '<RTEPair: id=%s>' % self.id
+
+
+class RTECorpusReader(XMLCorpusReader):
+    """
+    Corpus reader for corpora in RTE challenges.
+
+    This is just a wrapper around the XMLCorpusReader. See module docstring above for the expected
+    structure of input documents.
+    """
+
+    def _read_etree(self, doc):
+        """
+        Map the XML input into an RTEPair.
+
+        This uses the ``getiterator()`` method from the ElementTree package to
+        find all the ``<pair>`` elements.
+
+        :param doc: a parsed XML document
+        :rtype: list(RTEPair)
+        """
+        try:
+            challenge = doc.attrib['challenge']
+        except KeyError:
+            challenge = None
+        return [RTEPair(pair, challenge=challenge) for pair in doc.getiterator("pair")]
+
+    def pairs(self, fileids):
+        """
+        Build a list of RTEPairs from a RTE corpus.
+
+        :param fileids: a list of RTE corpus fileids
+        :type: list
+        :rtype: list(RTEPair)
+        """
+        if isinstance(fileids, string_types):
+            fileids = [fileids]
+        return concat([self._read_etree(self.xml(fileid)) for fileid in fileids])
--- a/venv/lib/python3.7/site-packages/nltk/corpus/reader/semcor.py
+++ b/venv/lib/python3.7/site-packages/nltk/corpus/reader/semcor.py
@@ -0,0 +1,297 @@
+# Natural Language Toolkit: SemCor Corpus Reader
+#
+# Copyright (C) 2001-2019 NLTK Project
+# Author: Nathan Schneider <nschneid@cs.cmu.edu>
+# URL: <http://nltk.org/>
+# For license information, see LICENSE.TXT
+
+"""
+Corpus reader for the SemCor Corpus.
+"""
+from __future__ import absolute_import, unicode_literals
+
+__docformat__ = 'epytext en'
+
+from nltk.corpus.reader.api import *
+from nltk.corpus.reader.xmldocs import XMLCorpusReader, XMLCorpusView
+from nltk.tree import Tree
+
+
+class SemcorCorpusReader(XMLCorpusReader):
+    """
+    Corpus reader for the SemCor Corpus.
+    For access to the complete XML data structure, use the ``xml()``
+    method.  For access to simple word lists and tagged word lists, use
+    ``words()``, ``sents()``, ``tagged_words()``, and ``tagged_sents()``.
+    """
+
+    def __init__(self, root, fileids, wordnet, lazy=True):
+        XMLCorpusReader.__init__(self, root, fileids)
+        self._lazy = lazy
+        self._wordnet = wordnet
+
+    def words(self, fileids=None):
+        """
+        :return: the given file(s) as a list of words and punctuation symbols.
+        :rtype: list(str)
+        """
+        return self._items(fileids, 'word', False, False, False)
+
+    def chunks(self, fileids=None):
+        """
+        :return: the given file(s) as a list of chunks,
+            each of which is a list of words and punctuation symbols
+            that form a unit.
+        :rtype: list(list(str))
+        """
+        return self._items(fileids, 'chunk', False, False, False)
+
+    def tagged_chunks(self, fileids=None, tag=('pos' or 'sem' or 'both')):
+        """
+        :return: the given file(s) as a list of tagged chunks, represented
+            in tree form.
+        :rtype: list(Tree)
+
+        :param tag: `'pos'` (part of speech), `'sem'` (semantic), or `'both'`
+            to indicate the kind of tags to include.  Semantic tags consist of
+            WordNet lemma IDs, plus an `'NE'` node if the chunk is a named entity
+            without a specific entry in WordNet.  (Named entities of type 'other'
+            have no lemma.  Other chunks not in WordNet have no semantic tag.
+            Punctuation tokens have `None` for their part of speech tag.)
+        """
+        return self._items(fileids, 'chunk', False, tag != 'sem', tag != 'pos')
+
+    def sents(self, fileids=None):
+        """
+        :return: the given file(s) as a list of sentences, each encoded
+            as a list of word strings.
+        :rtype: list(list(str))
+        """
+        return self._items(fileids, 'word', True, False, False)
+
+    def chunk_sents(self, fileids=None):
+        """
+        :return: the given file(s) as a list of sentences, each encoded
+            as a list of chunks.
+        :rtype: list(list(list(str)))
+        """
+        return self._items(fileids, 'chunk', True, False, False)
+
+    def tagged_sents(self, fileids=None, tag=('pos' or 'sem' or 'both')):
+        """
+        :return: the given file(s) as a list of sentences. Each sentence
+            is represented as a list of tagged chunks (in tree form).
+        :rtype: list(list(Tree))
+
+        :param tag: `'pos'` (part of speech), `'sem'` (semantic), or `'both'`
+            to indicate the kind of tags to include.  Semantic tags consist of
+            WordNet lemma IDs, plus an `'NE'` node if the chunk is a named entity
+            without a specific entry in WordNet.  (Named entities of type 'other'
+            have no lemma.  Other chunks not in WordNet have no semantic tag.
+            Punctuation tokens have `None` for their part of speech tag.)
+        """
+        return self._items(fileids, 'chunk', True, tag != 'sem', tag != 'pos')
+
+    def _items(self, fileids, unit, bracket_sent, pos_tag, sem_tag):
+        if unit == 'word' and not bracket_sent:
+            # the result of the SemcorWordView may be a multiword unit, so the
+            # LazyConcatenation will make sure the sentence is flattened
+            _ = lambda *args: LazyConcatenation(
+                (SemcorWordView if self._lazy else self._words)(*args)
+            )
+        else:
+            _ = SemcorWordView if self._lazy else self._words
+        return concat(
+            [
+                _(fileid, unit, bracket_sent, pos_tag, sem_tag, self._wordnet)
+                for fileid in self.abspaths(fileids)
+            ]
+        )
+
+    def _words(self, fileid, unit, bracket_sent, pos_tag, sem_tag):
+        """
+        Helper used to implement the view methods -- returns a list of
+        tokens, (segmented) words, chunks, or sentences. The tokens
+        and chunks may optionally be tagged (with POS and sense
+        information).
+
+        :param fileid: The name of the underlying file.
+        :param unit: One of `'token'`, `'word'`, or `'chunk'`.
+        :param bracket_sent: If true, include sentence bracketing.
+        :param pos_tag: Whether to include part-of-speech tags.
+        :param sem_tag: Whether to include semantic tags, namely WordNet lemma
+            and OOV named entity status.
+        """
+        assert unit in ('token', 'word', 'chunk')
+        result = []
+
+        xmldoc = ElementTree.parse(fileid).getroot()
+        for xmlsent in xmldoc.findall('.//s'):
+            sent = []
+            for xmlword in _all_xmlwords_in(xmlsent):
+                itm = SemcorCorpusReader._word(
+                    xmlword, unit, pos_tag, sem_tag, self._wordnet
+                )
+                if unit == 'word':
+                    sent.extend(itm)
+                else:
+                    sent.append(itm)
+
+            if bracket_sent:
+                result.append(SemcorSentence(xmlsent.attrib['snum'], sent))
+            else:
+                result.extend(sent)
+
+        assert None not in result
+        return result
+
+    @staticmethod
+    def _word(xmlword, unit, pos_tag, sem_tag, wordnet):
+        tkn = xmlword.text
+        if not tkn:
+            tkn = ""  # fixes issue 337?
+
+        lemma = xmlword.get('lemma', tkn)  # lemma or NE class
+        lexsn = xmlword.get('lexsn')  # lex_sense (locator for the lemma's sense)
+        if lexsn is not None:
+            sense_key = lemma + '%' + lexsn
+            wnpos = ('n', 'v', 'a', 'r', 's')[
+                int(lexsn.split(':')[0]) - 1
+            ]  # see http://wordnet.princeton.edu/man/senseidx.5WN.html
+        else:
+            sense_key = wnpos = None
+        redef = xmlword.get(
+            'rdf', tkn
+        )  # redefinition--this indicates the lookup string
+        # does not exactly match the enclosed string, e.g. due to typographical adjustments
+        # or discontinuity of a multiword expression. If a redefinition has occurred,
+        # the "rdf" attribute holds its inflected form and "lemma" holds its lemma.
+        # For NEs, "rdf", "lemma", and "pn" all hold the same value (the NE class).
+        sensenum = xmlword.get('wnsn')  # WordNet sense number
+        isOOVEntity = 'pn' in xmlword.keys()  # a "personal name" (NE) not in WordNet
+        pos = xmlword.get(
+            'pos'
+        )  # part of speech for the whole chunk (None for punctuation)
+
+        if unit == 'token':
+            if not pos_tag and not sem_tag:
+                itm = tkn
+            else:
+                itm = (
+                    (tkn,)
+                    + ((pos,) if pos_tag else ())
+                    + ((lemma, wnpos, sensenum, isOOVEntity) if sem_tag else ())
+                )
+            return itm
+        else:
+            ww = tkn.split('_')  # TODO: case where punctuation intervenes in MWE
+            if unit == 'word':
+                return ww
+            else:
+                if sensenum is not None:
+                    try:
+                        sense = wordnet.lemma_from_key(sense_key)  # Lemma object
+                    except Exception:
+                        # cannot retrieve the wordnet.Lemma object. possible reasons:
+                        #  (a) the wordnet corpus is not downloaded;
+                        #  (b) a nonexistant sense is annotated: e.g., such.s.00 triggers:
+                        #  nltk.corpus.reader.wordnet.WordNetError: No synset found for key u'such%5:00:01:specified:00'
+                        # solution: just use the lemma name as a string
+                        try:
+                            sense = '%s.%s.%02d' % (
+                                lemma,
+                                wnpos,
+                                int(sensenum),
+                            )  # e.g.: reach.v.02
+                        except ValueError:
+                            sense = (
+                                lemma + '.' + wnpos + '.' + sensenum
+                            )  # e.g. the sense number may be "2;1"
+
+                bottom = [Tree(pos, ww)] if pos_tag else ww
+
+                if sem_tag and isOOVEntity:
+                    if sensenum is not None:
+                        return Tree(sense, [Tree('NE', bottom)])
+                    else:  # 'other' NE
+                        return Tree('NE', bottom)
+                elif sem_tag and sensenum is not None:
+                    return Tree(sense, bottom)
+                elif pos_tag:
+                    return bottom[0]
+                else:
+                    return bottom  # chunk as a list
+
+
+def _all_xmlwords_in(elt, result=None):
+    if result is None:
+        result = []
+    for child in elt:
+        if child.tag in ('wf', 'punc'):
+            result.append(child)
+        else:
+            _all_xmlwords_in(child, result)
+    return result
+
+
+class SemcorSentence(list):
+    """
+    A list of words, augmented by an attribute ``num`` used to record
+    the sentence identifier (the ``n`` attribute from the XML).
+    """
+
+    def __init__(self, num, items):
+        self.num = num
+        list.__init__(self, items)
+
+
+class SemcorWordView(XMLCorpusView):
+    """
+    A stream backed corpus view specialized for use with the BNC corpus.
+    """
+
+    def __init__(self, fileid, unit, bracket_sent, pos_tag, sem_tag, wordnet):
+        """
+        :param fileid: The name of the underlying file.
+        :param unit: One of `'token'`, `'word'`, or `'chunk'`.
+        :param bracket_sent: If true, include sentence bracketing.
+        :param pos_tag: Whether to include part-of-speech tags.
+        :param sem_tag: Whether to include semantic tags, namely WordNet lemma
+            and OOV named entity status.
+        """
+        if bracket_sent:
+            tagspec = '.*/s'
+        else:
+            tagspec = '.*/s/(punc|wf)'
+
+        self._unit = unit
+        self._sent = bracket_sent
+        self._pos_tag = pos_tag
+        self._sem_tag = sem_tag
+        self._wordnet = wordnet
+
+        XMLCorpusView.__init__(self, fileid, tagspec)
+
+    def handle_elt(self, elt, context):
+        if self._sent:
+            return self.handle_sent(elt)
+        else:
+            return self.handle_word(elt)
+
+    def handle_word(self, elt):
+        return SemcorCorpusReader._word(
+            elt, self._unit, self._pos_tag, self._sem_tag, self._wordnet
+        )
+
+    def handle_sent(self, elt):
+        sent = []
+        for child in elt:
+            if child.tag in ('wf', 'punc'):
+                itm = self.handle_word(child)
+                if self._unit == 'word':
+                    sent.extend(itm)
+                else:
+                    sent.append(itm)
+            else:
+                raise ValueError('Unexpected element %s' % child.tag)
+        return SemcorSentence(elt.attrib['snum'], sent)
--- a/venv/lib/python3.7/site-packages/nltk/corpus/reader/senseval.py
+++ b/venv/lib/python3.7/site-packages/nltk/corpus/reader/senseval.py
@@ -0,0 +1,212 @@
+# Natural Language Toolkit: Senseval 2 Corpus Reader
+#
+# Copyright (C) 2001-2019 NLTK Project
+# Author: Trevor Cohn <tacohn@cs.mu.oz.au>
+#         Steven Bird <stevenbird1@gmail.com> (modifications)
+# URL: <http://nltk.org/>
+# For license information, see LICENSE.TXT
+
+"""
+Read from the Senseval 2 Corpus.
+
+SENSEVAL [http://www.senseval.org/]
+Evaluation exercises for Word Sense Disambiguation.
+Organized by ACL-SIGLEX [http://www.siglex.org/]
+
+Prepared by Ted Pedersen <tpederse@umn.edu>, University of Minnesota,
+http://www.d.umn.edu/~tpederse/data.html
+Distributed with permission.
+
+The NLTK version of the Senseval 2 files uses well-formed XML.
+Each instance of the ambiguous words "hard", "interest", "line", and "serve"
+is tagged with a sense identifier, and supplied with context.
+"""
+from __future__ import print_function, unicode_literals
+
+import re
+from xml.etree import ElementTree
+
+from six import string_types
+
+from nltk import compat
+from nltk.tokenize import *
+
+from nltk.corpus.reader.util import *
+from nltk.corpus.reader.api import *
+
+
+@compat.python_2_unicode_compatible
+class SensevalInstance(object):
+    def __init__(self, word, position, context, senses):
+        self.word = word
+        self.senses = tuple(senses)
+        self.position = position
+        self.context = context
+
+    def __repr__(self):
+        return 'SensevalInstance(word=%r, position=%r, ' 'context=%r, senses=%r)' % (
+            self.word,
+            self.position,
+            self.context,
+            self.senses,
+        )
+
+
+class SensevalCorpusReader(CorpusReader):
+    def instances(self, fileids=None):
+        return concat(
+            [
+                SensevalCorpusView(fileid, enc)
+                for (fileid, enc) in self.abspaths(fileids, True)
+            ]
+        )
+
+    def raw(self, fileids=None):
+        """
+        :return: the text contents of the given fileids, as a single string.
+        """
+        if fileids is None:
+            fileids = self._fileids
+        elif isinstance(fileids, string_types):
+            fileids = [fileids]
+        return concat([self.open(f).read() for f in fileids])
+
+    def _entry(self, tree):
+        elts = []
+        for lexelt in tree.findall('lexelt'):
+            for inst in lexelt.findall('instance'):
+                sense = inst[0].attrib['senseid']
+                context = [(w.text, w.attrib['pos']) for w in inst[1]]
+                elts.append((sense, context))
+        return elts
+
+
+class SensevalCorpusView(StreamBackedCorpusView):
+    def __init__(self, fileid, encoding):
+        StreamBackedCorpusView.__init__(self, fileid, encoding=encoding)
+
+        self._word_tokenizer = WhitespaceTokenizer()
+        self._lexelt_starts = [0]  # list of streampos
+        self._lexelts = [None]  # list of lexelt names
+
+    def read_block(self, stream):
+        # Decide which lexical element we're in.
+        lexelt_num = bisect.bisect_right(self._lexelt_starts, stream.tell()) - 1
+        lexelt = self._lexelts[lexelt_num]
+
+        instance_lines = []
+        in_instance = False
+        while True:
+            line = stream.readline()
+            if line == '':
+                assert instance_lines == []
+                return []
+
+            # Start of a lexical element?
+            if line.lstrip().startswith('<lexelt'):
+                lexelt_num += 1
+                m = re.search('item=("[^"]+"|\'[^\']+\')', line)
+                assert m is not None  # <lexelt> has no 'item=...'
+                lexelt = m.group(1)[1:-1]
+                if lexelt_num < len(self._lexelts):
+                    assert lexelt == self._lexelts[lexelt_num]
+                else:
+                    self._lexelts.append(lexelt)
+                    self._lexelt_starts.append(stream.tell())
+
+            # Start of an instance?
+            if line.lstrip().startswith('<instance'):
+                assert instance_lines == []
+                in_instance = True
+
+            # Body of an instance?
+            if in_instance:
+                instance_lines.append(line)
+
+            # End of an instance?
+            if line.lstrip().startswith('</instance'):
+                xml_block = '\n'.join(instance_lines)
+                xml_block = _fixXML(xml_block)
+                inst = ElementTree.fromstring(xml_block)
+                return [self._parse_instance(inst, lexelt)]
+
+    def _parse_instance(self, instance, lexelt):
+        senses = []
+        context = []
+        position = None
+        for child in instance:
+            if child.tag == 'answer':
+                senses.append(child.attrib['senseid'])
+            elif child.tag == 'context':
+                context += self._word_tokenizer.tokenize(child.text)
+                for cword in child:
+                    if cword.tag == 'compound':
+                        cword = cword[0]  # is this ok to do?
+
+                    if cword.tag == 'head':
+                        # Some santiy checks:
+                        assert position is None, 'head specified twice'
+                        assert cword.text.strip() or len(cword) == 1
+                        assert not (cword.text.strip() and len(cword) == 1)
+                        # Record the position of the head:
+                        position = len(context)
+                        # Addd on the head word itself:
+                        if cword.text.strip():
+                            context.append(cword.text.strip())
+                        elif cword[0].tag == 'wf':
+                            context.append((cword[0].text, cword[0].attrib['pos']))
+                            if cword[0].tail:
+                                context += self._word_tokenizer.tokenize(cword[0].tail)
+                        else:
+                            assert False, 'expected CDATA or wf in <head>'
+                    elif cword.tag == 'wf':
+                        context.append((cword.text, cword.attrib['pos']))
+                    elif cword.tag == 's':
+                        pass  # Sentence boundary marker.
+
+                    else:
+                        print('ACK', cword.tag)
+                        assert False, 'expected CDATA or <wf> or <head>'
+                    if cword.tail:
+                        context += self._word_tokenizer.tokenize(cword.tail)
+            else:
+                assert False, 'unexpected tag %s' % child.tag
+        return SensevalInstance(lexelt, position, context, senses)
+
+
+def _fixXML(text):
+    """
+    Fix the various issues with Senseval pseudo-XML.
+    """
+    # <~> or <^> => ~ or ^
+    text = re.sub(r'<([~\^])>', r'\1', text)
+    # fix lone &
+    text = re.sub(r'(\s+)\&(\s+)', r'\1&amp;\2', text)
+    # fix """
+    text = re.sub(r'"""', '\'"\'', text)
+    # fix <s snum=dd> => <s snum="dd"/>
+    text = re.sub(r'(<[^<]*snum=)([^">]+)>', r'\1"\2"/>', text)
+    # fix foreign word tag
+    text = re.sub(r'<\&frasl>\s*<p[^>]*>', 'FRASL', text)
+    # remove <&I .>
+    text = re.sub(r'<\&I[^>]*>', '', text)
+    # fix <{word}>
+    text = re.sub(r'<{([^}]+)}>', r'\1', text)
+    # remove <@>, <p>, </p>
+    text = re.sub(r'<(@|/?p)>', r'', text)
+    # remove <&M .> and <&T .> and <&Ms .>
+    text = re.sub(r'<&\w+ \.>', r'', text)
+    # remove <!DOCTYPE... > lines
+    text = re.sub(r'<!DOCTYPE[^>]*>', r'', text)
+    # remove <[hi]> and <[/p]> etc
+    text = re.sub(r'<\[\/?[^>]+\]*>', r'', text)
+    # take the thing out of the brackets: <&hellip;>
+    text = re.sub(r'<(\&\w+;)>', r'\1', text)
+    # and remove the & for those patterns that aren't regular XML
+    text = re.sub(r'&(?!amp|gt|lt|apos|quot)', r'', text)
+    # fix 'abc <p="foo"/>' style tags - now <wf pos="foo">abc</wf>
+    text = re.sub(
+        r'[ \t]*([^<>\s]+?)[ \t]*<p="([^"]*"?)"/>', r' <wf pos="\2">\1</wf>', text
+    )
+    text = re.sub(r'\s*"\s*<p=\'"\'/>', " <wf pos='\"'>\"</wf>", text)
+    return text
--- a/venv/lib/python3.7/site-packages/nltk/corpus/reader/sentiwordnet.py
+++ b/venv/lib/python3.7/site-packages/nltk/corpus/reader/sentiwordnet.py
@@ -0,0 +1,139 @@
+# -*- coding: utf-8 -*-
+# Natural Language Toolkit: SentiWordNet
+#
+# Copyright (C) 2001-2019 NLTK Project
+# Author: Christopher Potts <cgpotts@stanford.edu>
+# URL: <http://nltk.org/>
+# For license information, see LICENSE.TXT
+
+"""
+An NLTK interface for SentiWordNet
+
+SentiWordNet is a lexical resource for opinion mining.
+SentiWordNet assigns to each synset of WordNet three
+sentiment scores: positivity, negativity, and objectivity.
+
+For details about SentiWordNet see:
+http://sentiwordnet.isti.cnr.it/
+
+    >>> from nltk.corpus import sentiwordnet as swn
+    >>> print(swn.senti_synset('breakdown.n.03'))
+    <breakdown.n.03: PosScore=0.0 NegScore=0.25>
+    >>> list(swn.senti_synsets('slow'))
+    [SentiSynset('decelerate.v.01'), SentiSynset('slow.v.02'),
+    SentiSynset('slow.v.03'), SentiSynset('slow.a.01'),
+    SentiSynset('slow.a.02'), SentiSynset('dense.s.04'),
+    SentiSynset('slow.a.04'), SentiSynset('boring.s.01'),
+    SentiSynset('dull.s.08'), SentiSynset('slowly.r.01'),
+    SentiSynset('behind.r.03')]
+    >>> happy = swn.senti_synsets('happy', 'a')
+    >>> happy0 = list(happy)[0]
+    >>> happy0.pos_score()
+    0.875
+    >>> happy0.neg_score()
+    0.0
+    >>> happy0.obj_score()
+    0.125
+"""
+
+import re
+from nltk.compat import python_2_unicode_compatible
+from nltk.corpus.reader import CorpusReader
+
+
+@python_2_unicode_compatible
+class SentiWordNetCorpusReader(CorpusReader):
+    def __init__(self, root, fileids, encoding='utf-8'):
+        """
+        Construct a new SentiWordNet Corpus Reader, using data from
+   	the specified file.
+        """
+        super(SentiWordNetCorpusReader, self).__init__(root, fileids, encoding=encoding)
+        if len(self._fileids) != 1:
+            raise ValueError('Exactly one file must be specified')
+        self._db = {}
+        self._parse_src_file()
+
+    def _parse_src_file(self):
+        lines = self.open(self._fileids[0]).read().splitlines()
+        lines = filter((lambda x: not re.search(r"^\s*#", x)), lines)
+        for i, line in enumerate(lines):
+            fields = [field.strip() for field in re.split(r"\t+", line)]
+            try:
+                pos, offset, pos_score, neg_score, synset_terms, gloss = fields
+            except:
+                raise ValueError('Line %s formatted incorrectly: %s\n' % (i, line))
+            if pos and offset:
+                offset = int(offset)
+                self._db[(pos, offset)] = (float(pos_score), float(neg_score))
+
+    def senti_synset(self, *vals):
+        from nltk.corpus import wordnet as wn
+
+        if tuple(vals) in self._db:
+            pos_score, neg_score = self._db[tuple(vals)]
+            pos, offset = vals
+            if pos == 's':
+                pos = 'a'
+            synset = wn.synset_from_pos_and_offset(pos, offset)
+            return SentiSynset(pos_score, neg_score, synset)
+        else:
+            synset = wn.synset(vals[0])
+            pos = synset.pos()
+            if pos == 's':
+                pos = 'a'
+            offset = synset.offset()
+            if (pos, offset) in self._db:
+                pos_score, neg_score = self._db[(pos, offset)]
+                return SentiSynset(pos_score, neg_score, synset)
+            else:
+                return None
+
+    def senti_synsets(self, string, pos=None):
+        from nltk.corpus import wordnet as wn
+
+        sentis = []
+        synset_list = wn.synsets(string, pos)
+        for synset in synset_list:
+            sentis.append(self.senti_synset(synset.name()))
+        sentis = filter(lambda x: x, sentis)
+        return sentis
+
+    def all_senti_synsets(self):
+        from nltk.corpus import wordnet as wn
+
+        for key, fields in self._db.items():
+            pos, offset = key
+            pos_score, neg_score = fields
+            synset = wn.synset_from_pos_and_offset(pos, offset)
+            yield SentiSynset(pos_score, neg_score, synset)
+
+
+@python_2_unicode_compatible
+class SentiSynset(object):
+    def __init__(self, pos_score, neg_score, synset):
+        self._pos_score = pos_score
+        self._neg_score = neg_score
+        self._obj_score = 1.0 - (self._pos_score + self._neg_score)
+        self.synset = synset
+
+    def pos_score(self):
+        return self._pos_score
+
+    def neg_score(self):
+        return self._neg_score
+
+    def obj_score(self):
+        return self._obj_score
+
+    def __str__(self):
+        """Prints just the Pos/Neg scores for now."""
+        s = "<"
+        s += self.synset.name() + ": "
+        s += "PosScore=%s " % self._pos_score
+        s += "NegScore=%s" % self._neg_score
+        s += ">"
+        return s
+
+    def __repr__(self):
+        return "Senti" + repr(self.synset)
--- a/venv/lib/python3.7/site-packages/nltk/corpus/reader/sinica_treebank.py
+++ b/venv/lib/python3.7/site-packages/nltk/corpus/reader/sinica_treebank.py
@@ -0,0 +1,76 @@
+# Natural Language Toolkit: Sinica Treebank Reader
+#
+# Copyright (C) 2001-2019 NLTK Project
+# Author: Steven Bird <stevenbird1@gmail.com>
+# URL: <http://nltk.org/>
+# For license information, see LICENSE.TXT
+
+"""
+Sinica Treebank Corpus Sample
+
+http://rocling.iis.sinica.edu.tw/CKIP/engversion/treebank.htm
+
+10,000 parsed sentences, drawn from the Academia Sinica Balanced
+Corpus of Modern Chinese.  Parse tree notation is based on
+Information-based Case Grammar.  Tagset documentation is available
+at http://www.sinica.edu.tw/SinicaCorpus/modern_e_wordtype.html
+
+Language and Knowledge Processing Group, Institute of Information
+Science, Academia Sinica
+
+The data is distributed with the Natural Language Toolkit under the terms of
+the Creative Commons Attribution-NonCommercial-ShareAlike License
+[http://creativecommons.org/licenses/by-nc-sa/2.5/].
+
+References:
+
+Feng-Yi Chen, Pi-Fang Tsai, Keh-Jiann Chen, and Chu-Ren Huang (1999)
+The Construction of Sinica Treebank. Computational Linguistics and
+Chinese Language Processing, 4, pp 87-104.
+
+Huang Chu-Ren, Keh-Jiann Chen, Feng-Yi Chen, Keh-Jiann Chen, Zhao-Ming
+Gao, and Kuang-Yu Chen. 2000. Sinica Treebank: Design Criteria,
+Annotation Guidelines, and On-line Interface. Proceedings of 2nd
+Chinese Language Processing Workshop, Association for Computational
+Linguistics.
+
+Chen Keh-Jiann and Yu-Ming Hsieh (2004) Chinese Treebanks and Grammar
+Extraction, Proceedings of IJCNLP-04, pp560-565.
+"""
+
+from nltk.tree import sinica_parse
+from nltk.tag import map_tag
+
+from nltk.corpus.reader.util import *
+from nltk.corpus.reader.api import *
+
+IDENTIFIER = re.compile(r'^#\S+\s')
+APPENDIX = re.compile(r'(?<=\))#.*$')
+TAGWORD = re.compile(r':([^:()|]+):([^:()|]+)')
+WORD = re.compile(r':[^:()|]+:([^:()|]+)')
+
+
+class SinicaTreebankCorpusReader(SyntaxCorpusReader):
+    """
+    Reader for the sinica treebank.
+    """
+
+    def _read_block(self, stream):
+        sent = stream.readline()
+        sent = IDENTIFIER.sub('', sent)
+        sent = APPENDIX.sub('', sent)
+        return [sent]
+
+    def _parse(self, sent):
+        return sinica_parse(sent)
+
+    def _tag(self, sent, tagset=None):
+        tagged_sent = [(w, t) for (t, w) in TAGWORD.findall(sent)]
+        if tagset and tagset != self._tagset:
+            tagged_sent = [
+                (w, map_tag(self._tagset, tagset, t)) for (w, t) in tagged_sent
+            ]
+        return tagged_sent
+
+    def _word(self, sent):
+        return WORD.findall(sent)
--- a/venv/lib/python3.7/site-packages/nltk/corpus/reader/string_category.py
+++ b/venv/lib/python3.7/site-packages/nltk/corpus/reader/string_category.py
@@ -0,0 +1,67 @@
+# Natural Language Toolkit: String Category Corpus Reader
+#
+# Copyright (C) 2001-2019 NLTK Project
+# Author: Steven Bird <stevenbird1@gmail.com>
+#         Edward Loper <edloper@gmail.com>
+# URL: <http://nltk.org/>
+# For license information, see LICENSE.TXT
+
+"""
+Read tuples from a corpus consisting of categorized strings.
+For example, from the question classification corpus:
+
+NUM:dist How far is it from Denver to Aspen ?
+LOC:city What county is Modesto , California in ?
+HUM:desc Who was Galileo ?
+DESC:def What is an atom ?
+NUM:date When did Hawaii become a state ?
+"""
+
+# based on PPAttachmentCorpusReader
+from six import string_types
+
+from nltk import compat
+from nltk.corpus.reader.util import *
+from nltk.corpus.reader.api import *
+
+# [xx] Should the order of the tuple be reversed -- in most other places
+# in nltk, we use the form (data, tag) -- e.g., tagged words and
+# labeled texts for classifiers.
+class StringCategoryCorpusReader(CorpusReader):
+    def __init__(self, root, fileids, delimiter=' ', encoding='utf8'):
+        """
+        :param root: The root directory for this corpus.
+        :param fileids: A list or regexp specifying the fileids in this corpus.
+        :param delimiter: Field delimiter
+        """
+        CorpusReader.__init__(self, root, fileids, encoding)
+        self._delimiter = delimiter
+
+    def tuples(self, fileids=None):
+        if fileids is None:
+            fileids = self._fileids
+        elif isinstance(fileids, string_types):
+            fileids = [fileids]
+        return concat(
+            [
+                StreamBackedCorpusView(fileid, self._read_tuple_block, encoding=enc)
+                for (fileid, enc) in self.abspaths(fileids, True)
+            ]
+        )
+
+    def raw(self, fileids=None):
+        """
+        :return: the text contents of the given fileids, as a single string.
+        """
+        if fileids is None:
+            fileids = self._fileids
+        elif isinstance(fileids, string_types):
+            fileids = [fileids]
+        return concat([self.open(f).read() for f in fileids])
+
+    def _read_tuple_block(self, stream):
+        line = stream.readline().strip()
+        if line:
+            return [tuple(line.split(self._delimiter, 1))]
+        else:
+            return []
--- a/venv/lib/python3.7/site-packages/nltk/corpus/reader/switchboard.py
+++ b/venv/lib/python3.7/site-packages/nltk/corpus/reader/switchboard.py
@@ -0,0 +1,129 @@
+# Natural Language Toolkit: Switchboard Corpus Reader
+#
+# Copyright (C) 2001-2019 NLTK Project
+# Author: Edward Loper <edloper@gmail.com>
+# URL: <http://nltk.org/>
+# For license information, see LICENSE.TXT
+from __future__ import unicode_literals
+import re
+
+from nltk.tag import str2tuple, map_tag
+from nltk import compat
+
+from nltk.corpus.reader.util import *
+from nltk.corpus.reader.api import *
+
+
+@compat.python_2_unicode_compatible
+class SwitchboardTurn(list):
+    """
+    A specialized list object used to encode switchboard utterances.
+    The elements of the list are the words in the utterance; and two
+    attributes, ``speaker`` and ``id``, are provided to retrieve the
+    spearker identifier and utterance id.  Note that utterance ids
+    are only unique within a given discourse.
+    """
+
+    def __init__(self, words, speaker, id):
+        list.__init__(self, words)
+        self.speaker = speaker
+        self.id = int(id)
+
+    def __repr__(self):
+        if len(self) == 0:
+            text = ''
+        elif isinstance(self[0], tuple):
+            text = ' '.join('%s/%s' % w for w in self)
+        else:
+            text = ' '.join(self)
+        return '<%s.%s: %r>' % (self.speaker, self.id, text)
+
+
+class SwitchboardCorpusReader(CorpusReader):
+    _FILES = ['tagged']
+    # Use the "tagged" file even for non-tagged data methods, since
+    # it's tokenized.
+
+    def __init__(self, root, tagset=None):
+        CorpusReader.__init__(self, root, self._FILES)
+        self._tagset = tagset
+
+    def words(self):
+        return StreamBackedCorpusView(self.abspath('tagged'), self._words_block_reader)
+
+    def tagged_words(self, tagset=None):
+        def tagged_words_block_reader(stream):
+            return self._tagged_words_block_reader(stream, tagset)
+
+        return StreamBackedCorpusView(self.abspath('tagged'), tagged_words_block_reader)
+
+    def turns(self):
+        return StreamBackedCorpusView(self.abspath('tagged'), self._turns_block_reader)
+
+    def tagged_turns(self, tagset=None):
+        def tagged_turns_block_reader(stream):
+            return self._tagged_turns_block_reader(stream, tagset)
+
+        return StreamBackedCorpusView(self.abspath('tagged'), tagged_turns_block_reader)
+
+    def discourses(self):
+        return StreamBackedCorpusView(
+            self.abspath('tagged'), self._discourses_block_reader
+        )
+
+    def tagged_discourses(self, tagset=False):
+        def tagged_discourses_block_reader(stream):
+            return self._tagged_discourses_block_reader(stream, tagset)
+
+        return StreamBackedCorpusView(
+            self.abspath('tagged'), tagged_discourses_block_reader
+        )
+
+    def _discourses_block_reader(self, stream):
+        # returns at most 1 discourse.  (The other methods depend on this.)
+        return [
+            [
+                self._parse_utterance(u, include_tag=False)
+                for b in read_blankline_block(stream)
+                for u in b.split('\n')
+                if u.strip()
+            ]
+        ]
+
+    def _tagged_discourses_block_reader(self, stream, tagset=None):
+        # returns at most 1 discourse.  (The other methods depend on this.)
+        return [
+            [
+                self._parse_utterance(u, include_tag=True, tagset=tagset)
+                for b in read_blankline_block(stream)
+                for u in b.split('\n')
+                if u.strip()
+            ]
+        ]
+
+    def _turns_block_reader(self, stream):
+        return self._discourses_block_reader(stream)[0]
+
+    def _tagged_turns_block_reader(self, stream, tagset=None):
+        return self._tagged_discourses_block_reader(stream, tagset)[0]
+
+    def _words_block_reader(self, stream):
+        return sum(self._discourses_block_reader(stream)[0], [])
+
+    def _tagged_words_block_reader(self, stream, tagset=None):
+        return sum(self._tagged_discourses_block_reader(stream, tagset)[0], [])
+
+    _UTTERANCE_RE = re.compile('(\w+)\.(\d+)\:\s*(.*)')
+    _SEP = '/'
+
+    def _parse_utterance(self, utterance, include_tag, tagset=None):
+        m = self._UTTERANCE_RE.match(utterance)
+        if m is None:
+            raise ValueError('Bad utterance %r' % utterance)
+        speaker, id, text = m.groups()
+        words = [str2tuple(s, self._SEP) for s in text.split()]
+        if not include_tag:
+            words = [w for (w, t) in words]
+        elif tagset and tagset != self._tagset:
+            words = [(w, map_tag(self._tagset, tagset, t)) for (w, t) in words]
+        return SwitchboardTurn(words, speaker, id)
--- a/venv/lib/python3.7/site-packages/nltk/corpus/reader/tagged.py
+++ b/venv/lib/python3.7/site-packages/nltk/corpus/reader/tagged.py
@@ -0,0 +1,394 @@
+# Natural Language Toolkit: Tagged Corpus Reader
+#
+# Copyright (C) 2001-2019 NLTK Project
+# Author: Edward Loper <edloper@gmail.com>
+#         Steven Bird <stevenbird1@gmail.com>
+#         Jacob Perkins <japerk@gmail.com>
+# URL: <http://nltk.org/>
+# For license information, see LICENSE.TXT
+
+"""
+A reader for corpora whose documents contain part-of-speech-tagged words.
+"""
+
+import os
+
+from six import string_types
+
+from nltk.tag import str2tuple, map_tag
+from nltk.tokenize import *
+
+from nltk.corpus.reader.api import *
+from nltk.corpus.reader.util import *
+from nltk.corpus.reader.timit import read_timit_block
+
+
+class TaggedCorpusReader(CorpusReader):
+    """
+    Reader for simple part-of-speech tagged corpora.  Paragraphs are
+    assumed to be split using blank lines.  Sentences and words can be
+    tokenized using the default tokenizers, or by custom tokenizers
+    specified as parameters to the constructor.  Words are parsed
+    using ``nltk.tag.str2tuple``.  By default, ``'/'`` is used as the
+    separator.  I.e., words should have the form::
+
+       word1/tag1 word2/tag2 word3/tag3 ...
+
+    But custom separators may be specified as parameters to the
+    constructor.  Part of speech tags are case-normalized to upper
+    case.
+    """
+
+    def __init__(
+        self,
+        root,
+        fileids,
+        sep='/',
+        word_tokenizer=WhitespaceTokenizer(),
+        sent_tokenizer=RegexpTokenizer('\n', gaps=True),
+        para_block_reader=read_blankline_block,
+        encoding='utf8',
+        tagset=None,
+    ):
+        """
+        Construct a new Tagged Corpus reader for a set of documents
+        located at the given root directory.  Example usage:
+
+            >>> root = '/...path to corpus.../'
+            >>> reader = TaggedCorpusReader(root, '.*', '.txt') # doctest: +SKIP
+
+        :param root: The root directory for this corpus.
+        :param fileids: A list or regexp specifying the fileids in this corpus.
+        """
+        CorpusReader.__init__(self, root, fileids, encoding)
+        self._sep = sep
+        self._word_tokenizer = word_tokenizer
+        self._sent_tokenizer = sent_tokenizer
+        self._para_block_reader = para_block_reader
+        self._tagset = tagset
+
+    def raw(self, fileids=None):
+        """
+        :return: the given file(s) as a single string.
+        :rtype: str
+        """
+        if fileids is None:
+            fileids = self._fileids
+        elif isinstance(fileids, string_types):
+            fileids = [fileids]
+        return concat([self.open(f).read() for f in fileids])
+
+    def words(self, fileids=None):
+        """
+        :return: the given file(s) as a list of words
+            and punctuation symbols.
+        :rtype: list(str)
+        """
+        return concat(
+            [
+                TaggedCorpusView(
+                    fileid,
+                    enc,
+                    False,
+                    False,
+                    False,
+                    self._sep,
+                    self._word_tokenizer,
+                    self._sent_tokenizer,
+                    self._para_block_reader,
+                    None,
+                )
+                for (fileid, enc) in self.abspaths(fileids, True)
+            ]
+        )
+
+    def sents(self, fileids=None):
+        """
+        :return: the given file(s) as a list of
+            sentences or utterances, each encoded as a list of word
+            strings.
+        :rtype: list(list(str))
+        """
+        return concat(
+            [
+                TaggedCorpusView(
+                    fileid,
+                    enc,
+                    False,
+                    True,
+                    False,
+                    self._sep,
+                    self._word_tokenizer,
+                    self._sent_tokenizer,
+                    self._para_block_reader,
+                    None,
+                )
+                for (fileid, enc) in self.abspaths(fileids, True)
+            ]
+        )
+
+    def paras(self, fileids=None):
+        """
+        :return: the given file(s) as a list of
+            paragraphs, each encoded as a list of sentences, which are
+            in turn encoded as lists of word strings.
+        :rtype: list(list(list(str)))
+        """
+        return concat(
+            [
+                TaggedCorpusView(
+                    fileid,
+                    enc,
+                    False,
+                    True,
+                    True,
+                    self._sep,
+                    self._word_tokenizer,
+                    self._sent_tokenizer,
+                    self._para_block_reader,
+                    None,
+                )
+                for (fileid, enc) in self.abspaths(fileids, True)
+            ]
+        )
+
+    def tagged_words(self, fileids=None, tagset=None):
+        """
+        :return: the given file(s) as a list of tagged
+            words and punctuation symbols, encoded as tuples
+            ``(word,tag)``.
+        :rtype: list(tuple(str,str))
+        """
+        if tagset and tagset != self._tagset:
+            tag_mapping_function = lambda t: map_tag(self._tagset, tagset, t)
+        else:
+            tag_mapping_function = None
+        return concat(
+            [
+                TaggedCorpusView(
+                    fileid,
+                    enc,
+                    True,
+                    False,
+                    False,
+                    self._sep,
+                    self._word_tokenizer,
+                    self._sent_tokenizer,
+                    self._para_block_reader,
+                    tag_mapping_function,
+                )
+                for (fileid, enc) in self.abspaths(fileids, True)
+            ]
+        )
+
+    def tagged_sents(self, fileids=None, tagset=None):
+        """
+        :return: the given file(s) as a list of
+            sentences, each encoded as a list of ``(word,tag)`` tuples.
+
+        :rtype: list(list(tuple(str,str)))
+        """
+        if tagset and tagset != self._tagset:
+            tag_mapping_function = lambda t: map_tag(self._tagset, tagset, t)
+        else:
+            tag_mapping_function = None
+        return concat(
+            [
+                TaggedCorpusView(
+                    fileid,
+                    enc,
+                    True,
+                    True,
+                    False,
+                    self._sep,
+                    self._word_tokenizer,
+                    self._sent_tokenizer,
+                    self._para_block_reader,
+                    tag_mapping_function,
+                )
+                for (fileid, enc) in self.abspaths(fileids, True)
+            ]
+        )
+
+    def tagged_paras(self, fileids=None, tagset=None):
+        """
+        :return: the given file(s) as a list of
+            paragraphs, each encoded as a list of sentences, which are
+            in turn encoded as lists of ``(word,tag)`` tuples.
+        :rtype: list(list(list(tuple(str,str))))
+        """
+        if tagset and tagset != self._tagset:
+            tag_mapping_function = lambda t: map_tag(self._tagset, tagset, t)
+        else:
+            tag_mapping_function = None
+        return concat(
+            [
+                TaggedCorpusView(
+                    fileid,
+                    enc,
+                    True,
+                    True,
+                    True,
+                    self._sep,
+                    self._word_tokenizer,
+                    self._sent_tokenizer,
+                    self._para_block_reader,
+                    tag_mapping_function,
+                )
+                for (fileid, enc) in self.abspaths(fileids, True)
+            ]
+        )
+
+
+class CategorizedTaggedCorpusReader(CategorizedCorpusReader, TaggedCorpusReader):
+    """
+    A reader for part-of-speech tagged corpora whose documents are
+    divided into categories based on their file identifiers.
+    """
+
+    def __init__(self, *args, **kwargs):
+        """
+        Initialize the corpus reader.  Categorization arguments
+        (``cat_pattern``, ``cat_map``, and ``cat_file``) are passed to
+        the ``CategorizedCorpusReader`` constructor.  The remaining arguments
+        are passed to the ``TaggedCorpusReader``.
+        """
+        CategorizedCorpusReader.__init__(self, kwargs)
+        TaggedCorpusReader.__init__(self, *args, **kwargs)
+
+    def _resolve(self, fileids, categories):
+        if fileids is not None and categories is not None:
+            raise ValueError('Specify fileids or categories, not both')
+        if categories is not None:
+            return self.fileids(categories)
+        else:
+            return fileids
+
+    def raw(self, fileids=None, categories=None):
+        return TaggedCorpusReader.raw(self, self._resolve(fileids, categories))
+
+    def words(self, fileids=None, categories=None):
+        return TaggedCorpusReader.words(self, self._resolve(fileids, categories))
+
+    def sents(self, fileids=None, categories=None):
+        return TaggedCorpusReader.sents(self, self._resolve(fileids, categories))
+
+    def paras(self, fileids=None, categories=None):
+        return TaggedCorpusReader.paras(self, self._resolve(fileids, categories))
+
+    def tagged_words(self, fileids=None, categories=None, tagset=None):
+        return TaggedCorpusReader.tagged_words(
+            self, self._resolve(fileids, categories), tagset
+        )
+
+    def tagged_sents(self, fileids=None, categories=None, tagset=None):
+        return TaggedCorpusReader.tagged_sents(
+            self, self._resolve(fileids, categories), tagset
+        )
+
+    def tagged_paras(self, fileids=None, categories=None, tagset=None):
+        return TaggedCorpusReader.tagged_paras(
+            self, self._resolve(fileids, categories), tagset
+        )
+
+
+class TaggedCorpusView(StreamBackedCorpusView):
+    """
+    A specialized corpus view for tagged documents.  It can be
+    customized via flags to divide the tagged corpus documents up by
+    sentence or paragraph, and to include or omit part of speech tags.
+    ``TaggedCorpusView`` objects are typically created by
+    ``TaggedCorpusReader`` (not directly by nltk users).
+    """
+
+    def __init__(
+        self,
+        corpus_file,
+        encoding,
+        tagged,
+        group_by_sent,
+        group_by_para,
+        sep,
+        word_tokenizer,
+        sent_tokenizer,
+        para_block_reader,
+        tag_mapping_function=None,
+    ):
+        self._tagged = tagged
+        self._group_by_sent = group_by_sent
+        self._group_by_para = group_by_para
+        self._sep = sep
+        self._word_tokenizer = word_tokenizer
+        self._sent_tokenizer = sent_tokenizer
+        self._para_block_reader = para_block_reader
+        self._tag_mapping_function = tag_mapping_function
+        StreamBackedCorpusView.__init__(self, corpus_file, encoding=encoding)
+
+    def read_block(self, stream):
+        """Reads one paragraph at a time."""
+        block = []
+        for para_str in self._para_block_reader(stream):
+            para = []
+            for sent_str in self._sent_tokenizer.tokenize(para_str):
+                sent = [
+                    str2tuple(s, self._sep)
+                    for s in self._word_tokenizer.tokenize(sent_str)
+                ]
+                if self._tag_mapping_function:
+                    sent = [(w, self._tag_mapping_function(t)) for (w, t) in sent]
+                if not self._tagged:
+                    sent = [w for (w, t) in sent]
+                if self._group_by_sent:
+                    para.append(sent)
+                else:
+                    para.extend(sent)
+            if self._group_by_para:
+                block.append(para)
+            else:
+                block.extend(para)
+        return block
+
+
+# needs to implement simplified tags
+class MacMorphoCorpusReader(TaggedCorpusReader):
+    """
+    A corpus reader for the MAC_MORPHO corpus.  Each line contains a
+    single tagged word, using '_' as a separator.  Sentence boundaries
+    are based on the end-sentence tag ('_.').  Paragraph information
+    is not included in the corpus, so each paragraph returned by
+    ``self.paras()`` and ``self.tagged_paras()`` contains a single
+    sentence.
+    """
+
+    def __init__(self, root, fileids, encoding='utf8', tagset=None):
+        TaggedCorpusReader.__init__(
+            self,
+            root,
+            fileids,
+            sep='_',
+            word_tokenizer=LineTokenizer(),
+            sent_tokenizer=RegexpTokenizer('.*\n'),
+            para_block_reader=self._read_block,
+            encoding=encoding,
+            tagset=tagset,
+        )
+
+    def _read_block(self, stream):
+        return read_regexp_block(stream, r'.*', r'.*_\.')
+
+
+class TimitTaggedCorpusReader(TaggedCorpusReader):
+    """
+    A corpus reader for tagged sentences that are included in the TIMIT corpus.
+    """
+
+    def __init__(self, *args, **kwargs):
+        TaggedCorpusReader.__init__(
+            self, para_block_reader=read_timit_block, *args, **kwargs
+        )
+
+    def paras(self):
+        raise NotImplementedError('use sents() instead')
+
+    def tagged_paras(self):
+        raise NotImplementedError('use tagged_sents() instead')
--- a/venv/lib/python3.7/site-packages/nltk/corpus/reader/timit.py
+++ b/venv/lib/python3.7/site-packages/nltk/corpus/reader/timit.py
@@ -0,0 +1,499 @@
+# Natural Language Toolkit: TIMIT Corpus Reader
+#
+# Copyright (C) 2001-2007 NLTK Project
+# Author: Haejoong Lee <haejoong@ldc.upenn.edu>
+#         Steven Bird <stevenbird1@gmail.com>
+#         Jacob Perkins <japerk@gmail.com>
+# URL: <http://nltk.org/>
+# For license information, see LICENSE.TXT
+
+# [xx] this docstring is out-of-date:
+"""
+Read tokens, phonemes and audio data from the NLTK TIMIT Corpus.
+
+This corpus contains selected portion of the TIMIT corpus.
+
+ - 16 speakers from 8 dialect regions
+ - 1 male and 1 female from each dialect region
+ - total 130 sentences (10 sentences per speaker.  Note that some
+   sentences are shared among other speakers, especially sa1 and sa2
+   are spoken by all speakers.)
+ - total 160 recording of sentences (10 recordings per speaker)
+ - audio format: NIST Sphere, single channel, 16kHz sampling,
+  16 bit sample, PCM encoding
+
+
+Module contents
+===============
+
+The timit corpus reader provides 4 functions and 4 data items.
+
+ - utterances
+
+   List of utterances in the corpus.  There are total 160 utterances,
+   each of which corresponds to a unique utterance of a speaker.
+   Here's an example of an utterance identifier in the list::
+
+       dr1-fvmh0/sx206
+         - _----  _---
+         | |  |   | |
+         | |  |   | |
+         | |  |   | `--- sentence number
+         | |  |   `----- sentence type (a:all, i:shared, x:exclusive)
+         | |  `--------- speaker ID
+         | `------------ sex (m:male, f:female)
+         `-------------- dialect region (1..8)
+
+ - speakers
+
+   List of speaker IDs.  An example of speaker ID::
+
+       dr1-fvmh0
+
+   Note that if you split an item ID with colon and take the first element of
+   the result, you will get a speaker ID.
+
+       >>> itemid = 'dr1-fvmh0/sx206'
+       >>> spkrid , sentid = itemid.split('/')
+       >>> spkrid
+       'dr1-fvmh0'
+
+   The second element of the result is a sentence ID.
+
+ - dictionary()
+
+   Phonetic dictionary of words contained in this corpus.  This is a Python
+   dictionary from words to phoneme lists.
+
+ - spkrinfo()
+
+   Speaker information table.  It's a Python dictionary from speaker IDs to
+   records of 10 fields.  Speaker IDs the same as the ones in timie.speakers.
+   Each record is a dictionary from field names to values, and the fields are
+   as follows::
+
+     id         speaker ID as defined in the original TIMIT speaker info table
+     sex        speaker gender (M:male, F:female)
+     dr         speaker dialect region (1:new england, 2:northern,
+                3:north midland, 4:south midland, 5:southern, 6:new york city,
+                7:western, 8:army brat (moved around))
+     use        corpus type (TRN:training, TST:test)
+                in this sample corpus only TRN is available
+     recdate    recording date
+     birthdate  speaker birth date
+     ht         speaker height
+     race       speaker race (WHT:white, BLK:black, AMR:american indian,
+                SPN:spanish-american, ORN:oriental,???:unknown)
+     edu        speaker education level (HS:high school, AS:associate degree,
+                BS:bachelor's degree (BS or BA), MS:master's degree (MS or MA),
+                PHD:doctorate degree (PhD,JD,MD), ??:unknown)
+     comments   comments by the recorder
+
+The 4 functions are as follows.
+
+ - tokenized(sentences=items, offset=False)
+
+   Given a list of items, returns an iterator of a list of word lists,
+   each of which corresponds to an item (sentence).  If offset is set to True,
+   each element of the word list is a tuple of word(string), start offset and
+   end offset, where offset is represented as a number of 16kHz samples.
+
+ - phonetic(sentences=items, offset=False)
+
+   Given a list of items, returns an iterator of a list of phoneme lists,
+   each of which corresponds to an item (sentence).  If offset is set to True,
+   each element of the phoneme list is a tuple of word(string), start offset
+   and end offset, where offset is represented as a number of 16kHz samples.
+
+ - audiodata(item, start=0, end=None)
+
+   Given an item, returns a chunk of audio samples formatted into a string.
+   When the fuction is called, if start and end are omitted, the entire
+   samples of the recording will be returned.  If only end is omitted,
+   samples from the start offset to the end of the recording will be returned.
+
+ - play(data)
+
+   Play the given audio samples. The audio samples can be obtained from the
+   timit.audiodata function.
+
+"""
+from __future__ import print_function, unicode_literals
+
+import sys
+import os
+import re
+import tempfile
+import time
+
+from six import string_types
+
+from nltk import compat
+from nltk.tree import Tree
+from nltk.internals import import_from_stdlib
+
+from nltk.corpus.reader.util import *
+from nltk.corpus.reader.api import *
+
+
+class TimitCorpusReader(CorpusReader):
+    """
+    Reader for the TIMIT corpus (or any other corpus with the same
+    file layout and use of file formats).  The corpus root directory
+    should contain the following files:
+
+      - timitdic.txt: dictionary of standard transcriptions
+      - spkrinfo.txt: table of speaker information
+
+    In addition, the root directory should contain one subdirectory
+    for each speaker, containing three files for each utterance:
+
+      - <utterance-id>.txt: text content of utterances
+      - <utterance-id>.wrd: tokenized text content of utterances
+      - <utterance-id>.phn: phonetic transcription of utterances
+      - <utterance-id>.wav: utterance sound file
+    """
+
+    _FILE_RE = r'(\w+-\w+/\w+\.(phn|txt|wav|wrd))|' + r'timitdic\.txt|spkrinfo\.txt'
+    """A regexp matching fileids that are used by this corpus reader."""
+    _UTTERANCE_RE = r'\w+-\w+/\w+\.txt'
+
+    def __init__(self, root, encoding='utf8'):
+        """
+        Construct a new TIMIT corpus reader in the given directory.
+        :param root: The root directory for this corpus.
+        """
+        # Ensure that wave files don't get treated as unicode data:
+        if isinstance(encoding, string_types):
+            encoding = [('.*\.wav', None), ('.*', encoding)]
+
+        CorpusReader.__init__(
+            self, root, find_corpus_fileids(root, self._FILE_RE), encoding=encoding
+        )
+
+        self._utterances = [
+            name[:-4] for name in find_corpus_fileids(root, self._UTTERANCE_RE)
+        ]
+        """A list of the utterance identifiers for all utterances in
+        this corpus."""
+
+        self._speakerinfo = None
+        self._root = root
+        self.speakers = sorted(set(u.split('/')[0] for u in self._utterances))
+
+    def fileids(self, filetype=None):
+        """
+        Return a list of file identifiers for the files that make up
+        this corpus.
+
+        :param filetype: If specified, then ``filetype`` indicates that
+            only the files that have the given type should be
+            returned.  Accepted values are: ``txt``, ``wrd``, ``phn``,
+            ``wav``, or ``metadata``,
+        """
+        if filetype is None:
+            return CorpusReader.fileids(self)
+        elif filetype in ('txt', 'wrd', 'phn', 'wav'):
+            return ['%s.%s' % (u, filetype) for u in self._utterances]
+        elif filetype == 'metadata':
+            return ['timitdic.txt', 'spkrinfo.txt']
+        else:
+            raise ValueError('Bad value for filetype: %r' % filetype)
+
+    def utteranceids(
+        self, dialect=None, sex=None, spkrid=None, sent_type=None, sentid=None
+    ):
+        """
+        :return: A list of the utterance identifiers for all
+        utterances in this corpus, or for the given speaker, dialect
+        region, gender, sentence type, or sentence number, if
+        specified.
+        """
+        if isinstance(dialect, string_types):
+            dialect = [dialect]
+        if isinstance(sex, string_types):
+            sex = [sex]
+        if isinstance(spkrid, string_types):
+            spkrid = [spkrid]
+        if isinstance(sent_type, string_types):
+            sent_type = [sent_type]
+        if isinstance(sentid, string_types):
+            sentid = [sentid]
+
+        utterances = self._utterances[:]
+        if dialect is not None:
+            utterances = [u for u in utterances if u[2] in dialect]
+        if sex is not None:
+            utterances = [u for u in utterances if u[4] in sex]
+        if spkrid is not None:
+            utterances = [u for u in utterances if u[:9] in spkrid]
+        if sent_type is not None:
+            utterances = [u for u in utterances if u[11] in sent_type]
+        if sentid is not None:
+            utterances = [u for u in utterances if u[10:] in spkrid]
+        return utterances
+
+    def transcription_dict(self):
+        """
+        :return: A dictionary giving the 'standard' transcription for
+        each word.
+        """
+        _transcriptions = {}
+        for line in self.open('timitdic.txt'):
+            if not line.strip() or line[0] == ';':
+                continue
+            m = re.match(r'\s*(\S+)\s+/(.*)/\s*$', line)
+            if not m:
+                raise ValueError('Bad line: %r' % line)
+            _transcriptions[m.group(1)] = m.group(2).split()
+        return _transcriptions
+
+    def spkrid(self, utterance):
+        return utterance.split('/')[0]
+
+    def sentid(self, utterance):
+        return utterance.split('/')[1]
+
+    def utterance(self, spkrid, sentid):
+        return '%s/%s' % (spkrid, sentid)
+
+    def spkrutteranceids(self, speaker):
+        """
+        :return: A list of all utterances associated with a given
+        speaker.
+        """
+        return [
+            utterance
+            for utterance in self._utterances
+            if utterance.startswith(speaker + '/')
+        ]
+
+    def spkrinfo(self, speaker):
+        """
+        :return: A dictionary mapping .. something.
+        """
+        if speaker in self._utterances:
+            speaker = self.spkrid(speaker)
+
+        if self._speakerinfo is None:
+            self._speakerinfo = {}
+            for line in self.open('spkrinfo.txt'):
+                if not line.strip() or line[0] == ';':
+                    continue
+                rec = line.strip().split(None, 9)
+                key = "dr%s-%s%s" % (rec[2], rec[1].lower(), rec[0].lower())
+                self._speakerinfo[key] = SpeakerInfo(*rec)
+
+        return self._speakerinfo[speaker]
+
+    def phones(self, utterances=None):
+        return [
+            line.split()[-1]
+            for fileid in self._utterance_fileids(utterances, '.phn')
+            for line in self.open(fileid)
+            if line.strip()
+        ]
+
+    def phone_times(self, utterances=None):
+        """
+        offset is represented as a number of 16kHz samples!
+        """
+        return [
+            (line.split()[2], int(line.split()[0]), int(line.split()[1]))
+            for fileid in self._utterance_fileids(utterances, '.phn')
+            for line in self.open(fileid)
+            if line.strip()
+        ]
+
+    def words(self, utterances=None):
+        return [
+            line.split()[-1]
+            for fileid in self._utterance_fileids(utterances, '.wrd')
+            for line in self.open(fileid)
+            if line.strip()
+        ]
+
+    def word_times(self, utterances=None):
+        return [
+            (line.split()[2], int(line.split()[0]), int(line.split()[1]))
+            for fileid in self._utterance_fileids(utterances, '.wrd')
+            for line in self.open(fileid)
+            if line.strip()
+        ]
+
+    def sents(self, utterances=None):
+        return [
+            [line.split()[-1] for line in self.open(fileid) if line.strip()]
+            for fileid in self._utterance_fileids(utterances, '.wrd')
+        ]
+
+    def sent_times(self, utterances=None):
+        return [
+            (
+                line.split(None, 2)[-1].strip(),
+                int(line.split()[0]),
+                int(line.split()[1]),
+            )
+            for fileid in self._utterance_fileids(utterances, '.txt')
+            for line in self.open(fileid)
+            if line.strip()
+        ]
+
+    def phone_trees(self, utterances=None):
+        if utterances is None:
+            utterances = self._utterances
+        if isinstance(utterances, string_types):
+            utterances = [utterances]
+
+        trees = []
+        for utterance in utterances:
+            word_times = self.word_times(utterance)
+            phone_times = self.phone_times(utterance)
+            sent_times = self.sent_times(utterance)
+
+            while sent_times:
+                (sent, sent_start, sent_end) = sent_times.pop(0)
+                trees.append(Tree('S', []))
+                while (
+                    word_times and phone_times and phone_times[0][2] <= word_times[0][1]
+                ):
+                    trees[-1].append(phone_times.pop(0)[0])
+                while word_times and word_times[0][2] <= sent_end:
+                    (word, word_start, word_end) = word_times.pop(0)
+                    trees[-1].append(Tree(word, []))
+                    while phone_times and phone_times[0][2] <= word_end:
+                        trees[-1][-1].append(phone_times.pop(0)[0])
+                while phone_times and phone_times[0][2] <= sent_end:
+                    trees[-1].append(phone_times.pop(0)[0])
+        return trees
+
+    # [xx] NOTE: This is currently broken -- we're assuming that the
+    # fileids are WAV fileids (aka RIFF), but they're actually NIST SPHERE
+    # fileids.
+    def wav(self, utterance, start=0, end=None):
+        # nltk.chunk conflicts with the stdlib module 'chunk'
+        wave = import_from_stdlib('wave')
+
+        w = wave.open(self.open(utterance + '.wav'), 'rb')
+
+        if end is None:
+            end = w.getnframes()
+
+        # Skip past frames before start, then read the frames we want
+        w.readframes(start)
+        frames = w.readframes(end - start)
+
+        # Open a new temporary file -- the wave module requires
+        # an actual file, and won't work w/ stringio. :(
+        tf = tempfile.TemporaryFile()
+        out = wave.open(tf, 'w')
+
+        # Write the parameters & data to the new file.
+        out.setparams(w.getparams())
+        out.writeframes(frames)
+        out.close()
+
+        # Read the data back from the file, and return it.  The
+        # file will automatically be deleted when we return.
+        tf.seek(0)
+        return tf.read()
+
+    def audiodata(self, utterance, start=0, end=None):
+        assert end is None or end > start
+        headersize = 44
+        if end is None:
+            data = self.open(utterance + '.wav').read()
+        else:
+            data = self.open(utterance + '.wav').read(headersize + end * 2)
+        return data[headersize + start * 2 :]
+
+    def _utterance_fileids(self, utterances, extension):
+        if utterances is None:
+            utterances = self._utterances
+        if isinstance(utterances, string_types):
+            utterances = [utterances]
+        return ['%s%s' % (u, extension) for u in utterances]
+
+    def play(self, utterance, start=0, end=None):
+        """
+        Play the given audio sample.
+
+        :param utterance: The utterance id of the sample to play
+        """
+        # Method 1: os audio dev.
+        try:
+            import ossaudiodev
+
+            try:
+                dsp = ossaudiodev.open('w')
+                dsp.setfmt(ossaudiodev.AFMT_S16_LE)
+                dsp.channels(1)
+                dsp.speed(16000)
+                dsp.write(self.audiodata(utterance, start, end))
+                dsp.close()
+            except IOError as e:
+                print(
+                    (
+                        "can't acquire the audio device; please "
+                        "activate your audio device."
+                    ),
+                    file=sys.stderr,
+                )
+                print("system error message:", str(e), file=sys.stderr)
+            return
+        except ImportError:
+            pass
+
+        # Method 2: pygame
+        try:
+            # FIXME: this won't work under python 3
+            import pygame.mixer, StringIO
+
+            pygame.mixer.init(16000)
+            f = StringIO.StringIO(self.wav(utterance, start, end))
+            pygame.mixer.Sound(f).play()
+            while pygame.mixer.get_busy():
+                time.sleep(0.01)
+            return
+        except ImportError:
+            pass
+
+        # Method 3: complain. :)
+        print(
+            ("you must install pygame or ossaudiodev " "for audio playback."),
+            file=sys.stderr,
+        )
+
+
+@compat.python_2_unicode_compatible
+class SpeakerInfo(object):
+    def __init__(
+        self, id, sex, dr, use, recdate, birthdate, ht, race, edu, comments=None
+    ):
+        self.id = id
+        self.sex = sex
+        self.dr = dr
+        self.use = use
+        self.recdate = recdate
+        self.birthdate = birthdate
+        self.ht = ht
+        self.race = race
+        self.edu = edu
+        self.comments = comments
+
+    def __repr__(self):
+        attribs = 'id sex dr use recdate birthdate ht race edu comments'
+        args = ['%s=%r' % (attr, getattr(self, attr)) for attr in attribs.split()]
+        return 'SpeakerInfo(%s)' % (', '.join(args))
+
+
+def read_timit_block(stream):
+    """
+    Block reader for timit tagged sentences, which are preceded by a sentence
+    number that will be ignored.
+    """
+    line = stream.readline()
+    if not line:
+        return []
+    n, sent = line.split(' ', 1)
+    return [sent]
--- a/venv/lib/python3.7/site-packages/nltk/corpus/reader/toolbox.py
+++ b/venv/lib/python3.7/site-packages/nltk/corpus/reader/toolbox.py
@@ -0,0 +1,83 @@
+# Natural Language Toolkit: Toolbox Reader
+#
+# Copyright (C) 2001-2019 NLTK Project
+# Author: Greg Aumann <greg_aumann@sil.org>
+#         Stuart Robinson <Stuart.Robinson@mpi.nl>
+#         Steven Bird <stevenbird1@gmail.com>
+# URL: <http://nltk.org/>
+# For license information, see LICENSE.TXT
+
+"""
+Module for reading, writing and manipulating
+Toolbox databases and settings fileids.
+"""
+
+from nltk.toolbox import ToolboxData
+from nltk.corpus.reader.util import *
+from nltk.corpus.reader.api import *
+
+
+class ToolboxCorpusReader(CorpusReader):
+    def xml(self, fileids, key=None):
+        return concat(
+            [
+                ToolboxData(path, enc).parse(key=key)
+                for (path, enc) in self.abspaths(fileids, True)
+            ]
+        )
+
+    def fields(
+        self,
+        fileids,
+        strip=True,
+        unwrap=True,
+        encoding='utf8',
+        errors='strict',
+        unicode_fields=None,
+    ):
+        return concat(
+            [
+                list(
+                    ToolboxData(fileid, enc).fields(
+                        strip, unwrap, encoding, errors, unicode_fields
+                    )
+                )
+                for (fileid, enc) in self.abspaths(fileids, include_encoding=True)
+            ]
+        )
+
+    # should probably be done lazily:
+    def entries(self, fileids, **kwargs):
+        if 'key' in kwargs:
+            key = kwargs['key']
+            del kwargs['key']
+        else:
+            key = 'lx'  # the default key in MDF
+        entries = []
+        for marker, contents in self.fields(fileids, **kwargs):
+            if marker == key:
+                entries.append((contents, []))
+            else:
+                try:
+                    entries[-1][-1].append((marker, contents))
+                except IndexError:
+                    pass
+        return entries
+
+    def words(self, fileids, key='lx'):
+        return [contents for marker, contents in self.fields(fileids) if marker == key]
+
+    def raw(self, fileids):
+        if fileids is None:
+            fileids = self._fileids
+        elif isinstance(fileids, string_types):
+            fileids = [fileids]
+        return concat([self.open(f).read() for f in fileids])
+
+
+def demo():
+    pass
+
+
+if __name__ == '__main__':
+    demo()
--- a/venv/lib/python3.7/site-packages/nltk/corpus/reader/twitter.py
+++ b/venv/lib/python3.7/site-packages/nltk/corpus/reader/twitter.py
@@ -0,0 +1,153 @@
+# Natural Language Toolkit: Twitter Corpus Reader
+#
+# Copyright (C) 2001-2019 NLTK Project
+# Author: Ewan Klein <ewan@inf.ed.ac.uk>
+# URL: <http://nltk.org/>
+# For license information, see LICENSE.TXT
+
+"""
+A reader for corpora that consist of Tweets. It is assumed that the Tweets
+have been serialised into line-delimited JSON.
+"""
+
+import json
+import os
+
+from six import string_types
+
+from nltk.tokenize import TweetTokenizer
+
+from nltk.corpus.reader.util import StreamBackedCorpusView, concat, ZipFilePathPointer
+from nltk.corpus.reader.api import CorpusReader
+
+
+class TwitterCorpusReader(CorpusReader):
+    """
+    Reader for corpora that consist of Tweets represented as a list of line-delimited JSON.
+
+    Individual Tweets can be tokenized using the default tokenizer, or by a
+    custom tokenizer specified as a parameter to the constructor.
+
+    Construct a new Tweet corpus reader for a set of documents
+    located at the given root directory.
+
+    If you made your own tweet collection in a directory called
+    `twitter-files`, then you can initialise the reader as::
+
+        from nltk.corpus import TwitterCorpusReader
+        reader = TwitterCorpusReader(root='/path/to/twitter-files', '.*\.json')
+
+    However, the recommended approach is to set the relevant directory as the
+    value of the environmental variable `TWITTER`, and then invoke the reader
+    as follows::
+
+       root = os.environ['TWITTER']
+       reader = TwitterCorpusReader(root, '.*\.json')
+
+    If you want to work directly with the raw Tweets, the `json` library can
+    be used::
+
+       import json
+       for tweet in reader.docs():
+           print(json.dumps(tweet, indent=1, sort_keys=True))
+
+    """
+
+    CorpusView = StreamBackedCorpusView
+    """
+    The corpus view class used by this reader.
+    """
+
+    def __init__(
+        self, root, fileids=None, word_tokenizer=TweetTokenizer(), encoding='utf8'
+    ):
+        """
+
+        :param root: The root directory for this corpus.
+
+        :param fileids: A list or regexp specifying the fileids in this corpus.
+
+        :param word_tokenizer: Tokenizer for breaking the text of Tweets into
+        smaller units, including but not limited to words.
+
+        """
+        CorpusReader.__init__(self, root, fileids, encoding)
+
+        for path in self.abspaths(self._fileids):
+            if isinstance(path, ZipFilePathPointer):
+                pass
+            elif os.path.getsize(path) == 0:
+                raise ValueError("File {} is empty".format(path))
+        """Check that all user-created corpus files are non-empty."""
+
+        self._word_tokenizer = word_tokenizer
+
+    def docs(self, fileids=None):
+        """
+        Returns the full Tweet objects, as specified by `Twitter
+        documentation on Tweets
+        <https://dev.twitter.com/docs/platform-objects/tweets>`_
+
+        :return: the given file(s) as a list of dictionaries deserialised
+        from JSON.
+        :rtype: list(dict)
+        """
+        return concat(
+            [
+                self.CorpusView(path, self._read_tweets, encoding=enc)
+                for (path, enc, fileid) in self.abspaths(fileids, True, True)
+            ]
+        )
+
+    def strings(self, fileids=None):
+        """
+        Returns only the text content of Tweets in the file(s)
+
+        :return: the given file(s) as a list of Tweets.
+        :rtype: list(str)
+        """
+        fulltweets = self.docs(fileids)
+        tweets = []
+        for jsono in fulltweets:
+            try:
+                text = jsono['text']
+                if isinstance(text, bytes):
+                    text = text.decode(self.encoding)
+                tweets.append(text)
+            except KeyError:
+                pass
+        return tweets
+
+    def tokenized(self, fileids=None):
+        """
+        :return: the given file(s) as a list of the text content of Tweets as
+        as a list of words, screenanames, hashtags, URLs and punctuation symbols.
+
+        :rtype: list(list(str))
+        """
+        tweets = self.strings(fileids)
+        tokenizer = self._word_tokenizer
+        return [tokenizer.tokenize(t) for t in tweets]
+
+    def raw(self, fileids=None):
+        """
+        Return the corpora in their raw form.
+        """
+        if fileids is None:
+            fileids = self._fileids
+        elif isinstance(fileids, string_types):
+            fileids = [fileids]
+        return concat([self.open(f).read() for f in fileids])
+
+    def _read_tweets(self, stream):
+        """
+        Assumes that each line in ``stream`` is a JSON-serialised object.
+        """
+        tweets = []
+        for i in range(10):
+            line = stream.readline()
+            if not line:
+                return tweets
+            tweet = json.loads(line)
+            tweets.append(tweet)
+        return tweets
--- a/Show More
+++ b/Show More