Initial commit
This commit is contained in:
373
venv/lib/python3.7/site-packages/nltk/compat.py
Normal file
373
venv/lib/python3.7/site-packages/nltk/compat.py
Normal file
@@ -0,0 +1,373 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
# Natural Language Toolkit: Compatibility
|
||||
#
|
||||
# Copyright (C) 2001-2019 NLTK Project
|
||||
#
|
||||
# URL: <http://nltk.org/>
|
||||
# For license information, see LICENSE.TXT
|
||||
|
||||
from __future__ import absolute_import, print_function
|
||||
import os
|
||||
import sys
|
||||
from functools import update_wrapper, wraps
|
||||
import fractions
|
||||
import unicodedata
|
||||
|
||||
from six import string_types, text_type
|
||||
|
||||
# Python 2/3 compatibility layer. Based on six.
|
||||
|
||||
PY3 = sys.version_info[0] == 3
|
||||
|
||||
if PY3:
|
||||
|
||||
def get_im_class(meth):
|
||||
return meth.__self__.__class__
|
||||
|
||||
import io
|
||||
|
||||
StringIO = io.StringIO
|
||||
BytesIO = io.BytesIO
|
||||
|
||||
from datetime import timezone
|
||||
|
||||
UTC = timezone.utc
|
||||
|
||||
from tempfile import TemporaryDirectory
|
||||
|
||||
else:
|
||||
|
||||
def get_im_class(meth):
|
||||
return meth.im_class
|
||||
|
||||
try:
|
||||
from cStringIO import StringIO
|
||||
except ImportError:
|
||||
from StringIO import StringIO
|
||||
BytesIO = StringIO
|
||||
|
||||
from datetime import tzinfo, timedelta
|
||||
|
||||
ZERO = timedelta(0)
|
||||
HOUR = timedelta(hours=1)
|
||||
|
||||
# A UTC class for python 2.7
|
||||
class UTC(tzinfo):
|
||||
"""UTC"""
|
||||
|
||||
def utcoffset(self, dt):
|
||||
return ZERO
|
||||
|
||||
def tzname(self, dt):
|
||||
return "UTC"
|
||||
|
||||
def dst(self, dt):
|
||||
return ZERO
|
||||
|
||||
UTC = UTC()
|
||||
|
||||
import csv
|
||||
import codecs
|
||||
import cStringIO
|
||||
|
||||
class UnicodeWriter:
|
||||
"""
|
||||
A CSV writer which will write rows to CSV file "f",
|
||||
which is encoded in the given encoding.
|
||||
see https://docs.python.org/2/library/csv.html
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self, f, dialect=csv.excel, encoding="utf-8", errors='replace', **kwds
|
||||
):
|
||||
# Redirect output to a queue
|
||||
self.queue = cStringIO.StringIO()
|
||||
self.writer = csv.writer(self.queue, dialect=dialect, **kwds)
|
||||
self.stream = f
|
||||
encoder_cls = codecs.getincrementalencoder(encoding)
|
||||
self.encoder = encoder_cls(errors=errors)
|
||||
|
||||
def encode(self, data):
|
||||
if isinstance(data, string_types):
|
||||
return data.encode("utf-8")
|
||||
else:
|
||||
return data
|
||||
|
||||
def writerow(self, row):
|
||||
self.writer.writerow([self.encode(s) for s in row])
|
||||
# Fetch UTF-8 output from the queue ...
|
||||
data = self.queue.getvalue()
|
||||
data = data.decode("utf-8")
|
||||
# ... and reencode it into the target encoding
|
||||
data = self.encoder.encode(data, 'replace')
|
||||
# write to the target stream
|
||||
self.stream.write(data)
|
||||
# empty queue
|
||||
self.queue.truncate(0)
|
||||
|
||||
import warnings as _warnings
|
||||
import os as _os
|
||||
from tempfile import mkdtemp
|
||||
|
||||
class TemporaryDirectory(object):
|
||||
"""Create and return a temporary directory. This has the same
|
||||
behavior as mkdtemp but can be used as a context manager. For
|
||||
example:
|
||||
|
||||
with TemporaryDirectory() as tmpdir:
|
||||
...
|
||||
|
||||
Upon exiting the context, the directory and everything contained
|
||||
in it are removed.
|
||||
|
||||
http://stackoverflow.com/questions/19296146/tempfile-temporarydirectory-context-manager-in-python-2-7
|
||||
"""
|
||||
|
||||
def __init__(self, suffix="", prefix="tmp", dir=None):
|
||||
self._closed = False
|
||||
self.name = None # Handle mkdtemp raising an exception
|
||||
self.name = mkdtemp(suffix, prefix, dir)
|
||||
|
||||
def __repr__(self):
|
||||
return "<{} {!r}>".format(self.__class__.__name__, self.name)
|
||||
|
||||
def __enter__(self):
|
||||
return self.name
|
||||
|
||||
def cleanup(self, _warn=False):
|
||||
if self.name and not self._closed:
|
||||
try:
|
||||
self._rmtree(self.name)
|
||||
except (TypeError, AttributeError) as ex:
|
||||
# Issue #10188: Emit a warning on stderr
|
||||
# if the directory could not be cleaned
|
||||
# up due to missing globals
|
||||
if "None" not in str(ex):
|
||||
raise
|
||||
print(
|
||||
"ERROR: {!r} while cleaning up {!r}".format(ex, self),
|
||||
file=sys.stderr,
|
||||
)
|
||||
return
|
||||
self._closed = True
|
||||
if _warn:
|
||||
self._warn("Implicitly cleaning up {!r}".format(self), Warning)
|
||||
|
||||
def __exit__(self, exc, value, tb):
|
||||
self.cleanup()
|
||||
|
||||
def __del__(self):
|
||||
# Issue a Warning if implicit cleanup needed
|
||||
self.cleanup(_warn=True)
|
||||
|
||||
# XXX (ncoghlan): The following code attempts to make
|
||||
# this class tolerant of the module nulling out process
|
||||
# that happens during CPython interpreter shutdown
|
||||
# Alas, it doesn't actually manage it. See issue #10188
|
||||
_listdir = staticmethod(_os.listdir)
|
||||
_path_join = staticmethod(_os.path.join)
|
||||
_isdir = staticmethod(_os.path.isdir)
|
||||
_islink = staticmethod(_os.path.islink)
|
||||
_remove = staticmethod(_os.remove)
|
||||
_rmdir = staticmethod(_os.rmdir)
|
||||
_warn = _warnings.warn
|
||||
|
||||
def _rmtree(self, path):
|
||||
# Essentially a stripped down version of shutil.rmtree. We can't
|
||||
# use globals because they may be None'ed out at shutdown.
|
||||
for name in self._listdir(path):
|
||||
fullname = self._path_join(path, name)
|
||||
try:
|
||||
isdir = self._isdir(fullname) and not self._islink(fullname)
|
||||
except OSError:
|
||||
isdir = False
|
||||
if isdir:
|
||||
self._rmtree(fullname)
|
||||
else:
|
||||
try:
|
||||
self._remove(fullname)
|
||||
except OSError:
|
||||
pass
|
||||
try:
|
||||
self._rmdir(path)
|
||||
except OSError:
|
||||
pass
|
||||
|
||||
|
||||
# ======= Compatibility for datasets that care about Python versions ========
|
||||
|
||||
# The following datasets have a /PY3 subdirectory containing
|
||||
# a full copy of the data which has been re-encoded or repickled.
|
||||
DATA_UPDATES = [
|
||||
("chunkers", "maxent_ne_chunker"),
|
||||
("help", "tagsets"),
|
||||
("taggers", "maxent_treebank_pos_tagger"),
|
||||
("tokenizers", "punkt"),
|
||||
]
|
||||
|
||||
_PY3_DATA_UPDATES = [os.path.join(*path_list) for path_list in DATA_UPDATES]
|
||||
|
||||
|
||||
def add_py3_data(path):
|
||||
if PY3:
|
||||
for item in _PY3_DATA_UPDATES:
|
||||
if item in str(path) and "/PY3" not in str(path):
|
||||
pos = path.index(item) + len(item)
|
||||
if path[pos : pos + 4] == ".zip":
|
||||
pos += 4
|
||||
path = path[:pos] + "/PY3" + path[pos:]
|
||||
break
|
||||
return path
|
||||
|
||||
|
||||
# for use in adding /PY3 to the second (filename) argument
|
||||
# of the file pointers in data.py
|
||||
def py3_data(init_func):
|
||||
def _decorator(*args, **kwargs):
|
||||
args = (args[0], add_py3_data(args[1])) + args[2:]
|
||||
return init_func(*args, **kwargs)
|
||||
|
||||
return wraps(init_func)(_decorator)
|
||||
|
||||
|
||||
# ======= Compatibility layer for __str__ and __repr__ ==========
|
||||
def remove_accents(text):
|
||||
|
||||
if isinstance(text, bytes):
|
||||
text = text.decode('ascii')
|
||||
|
||||
category = unicodedata.category # this gives a small (~10%) speedup
|
||||
return ''.join(
|
||||
c for c in unicodedata.normalize('NFKD', text) if category(c) != 'Mn'
|
||||
)
|
||||
|
||||
|
||||
# Select the best transliteration method:
|
||||
try:
|
||||
# Older versions of Unidecode are licensed under Artistic License;
|
||||
# assume an older version is installed.
|
||||
from unidecode import unidecode as transliterate
|
||||
except ImportError:
|
||||
try:
|
||||
# text-unidecode implementation is worse than Unidecode
|
||||
# implementation so Unidecode is preferred.
|
||||
from text_unidecode import unidecode as transliterate
|
||||
except ImportError:
|
||||
# This transliteration method should be enough
|
||||
# for many Western languages.
|
||||
transliterate = remove_accents
|
||||
|
||||
|
||||
def python_2_unicode_compatible(klass):
|
||||
"""
|
||||
This decorator defines __unicode__ method and fixes
|
||||
__repr__ and __str__ methods under Python 2.
|
||||
|
||||
To support Python 2 and 3 with a single code base,
|
||||
define __str__ and __repr__ methods returning unicode
|
||||
text and apply this decorator to the class.
|
||||
|
||||
Original __repr__ and __str__ would be available
|
||||
as unicode_repr and __unicode__ (under both Python 2
|
||||
and Python 3).
|
||||
"""
|
||||
|
||||
if not issubclass(klass, object):
|
||||
raise ValueError("This decorator doesn't work for old-style classes")
|
||||
|
||||
# both __unicode__ and unicode_repr are public because they
|
||||
# may be useful in console under Python 2.x
|
||||
|
||||
# if __str__ or __repr__ are not overriden in a subclass,
|
||||
# they may be already fixed by this decorator in a parent class
|
||||
# and we shouldn't them again
|
||||
|
||||
if not _was_fixed(klass.__str__):
|
||||
klass.__unicode__ = klass.__str__
|
||||
if not PY3:
|
||||
klass.__str__ = _7bit(_transliterated(klass.__unicode__))
|
||||
|
||||
if not _was_fixed(klass.__repr__):
|
||||
klass.unicode_repr = klass.__repr__
|
||||
if not PY3:
|
||||
klass.__repr__ = _7bit(klass.unicode_repr)
|
||||
|
||||
return klass
|
||||
|
||||
|
||||
def unicode_repr(obj):
|
||||
"""
|
||||
For classes that was fixed with @python_2_unicode_compatible
|
||||
``unicode_repr`` returns ``obj.unicode_repr()``; for unicode strings
|
||||
the result is returned without "u" letter (to make output the
|
||||
same under Python 2.x and Python 3.x); for other variables
|
||||
it is the same as ``repr``.
|
||||
"""
|
||||
if PY3:
|
||||
return repr(obj)
|
||||
|
||||
# Python 2.x
|
||||
if hasattr(obj, 'unicode_repr'):
|
||||
return obj.unicode_repr()
|
||||
|
||||
if isinstance(obj, text_type):
|
||||
return repr(obj)[1:] # strip "u" letter from output
|
||||
|
||||
return repr(obj)
|
||||
|
||||
|
||||
def _transliterated(method):
|
||||
def wrapper(self):
|
||||
return transliterate(method(self))
|
||||
|
||||
update_wrapper(wrapper, method, ["__name__", "__doc__"])
|
||||
if hasattr(method, "_nltk_compat_7bit"):
|
||||
wrapper._nltk_compat_7bit = method._nltk_compat_7bit
|
||||
|
||||
wrapper._nltk_compat_transliterated = True
|
||||
return wrapper
|
||||
|
||||
|
||||
def _7bit(method):
|
||||
def wrapper(self):
|
||||
return method(self).encode('ascii', 'backslashreplace')
|
||||
|
||||
update_wrapper(wrapper, method, ["__name__", "__doc__"])
|
||||
|
||||
if hasattr(method, "_nltk_compat_transliterated"):
|
||||
wrapper._nltk_compat_transliterated = method._nltk_compat_transliterated
|
||||
|
||||
wrapper._nltk_compat_7bit = True
|
||||
return wrapper
|
||||
|
||||
|
||||
def _was_fixed(method):
|
||||
return getattr(method, "_nltk_compat_7bit", False) or getattr(
|
||||
method, "_nltk_compat_transliterated", False
|
||||
)
|
||||
|
||||
|
||||
class Fraction(fractions.Fraction):
|
||||
"""
|
||||
This is a simplified backwards compatible version of fractions.Fraction
|
||||
from Python >=3.5. It adds the `_normalize` parameter such that it does
|
||||
not normalize the denominator to the Greatest Common Divisor (gcd) when
|
||||
the numerator is 0.
|
||||
|
||||
This is most probably only used by the nltk.translate.bleu_score.py where
|
||||
numerator and denominator of the different ngram precisions are mutable.
|
||||
But the idea of "mutable" fraction might not be applicable to other usages,
|
||||
See http://stackoverflow.com/questions/34561265
|
||||
|
||||
This objects should be deprecated once NLTK stops supporting Python < 3.5
|
||||
See https://github.com/nltk/nltk/issues/1330
|
||||
"""
|
||||
|
||||
def __new__(cls, numerator=0, denominator=None, _normalize=True):
|
||||
cls = super(Fraction, cls).__new__(cls, numerator, denominator)
|
||||
# To emulate fraction.Fraction.from_float across Python >=2.7,
|
||||
# check that numerator is an integer and denominator is not None.
|
||||
if not _normalize and type(numerator) == int and denominator:
|
||||
cls._numerator = numerator
|
||||
cls._denominator = denominator
|
||||
return cls
|
||||
Reference in New Issue
Block a user