Initial commit
This commit is contained in:
44
venv/lib/python3.7/site-packages/nltk/tbl/erroranalysis.py
Normal file
44
venv/lib/python3.7/site-packages/nltk/tbl/erroranalysis.py
Normal file
@@ -0,0 +1,44 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
# Natural Language Toolkit: Transformation-based learning
|
||||
#
|
||||
# Copyright (C) 2001-2019 NLTK Project
|
||||
# Author: Marcus Uneson <marcus.uneson@gmail.com>
|
||||
# based on previous (nltk2) version by
|
||||
# Christopher Maloof, Edward Loper, Steven Bird
|
||||
# URL: <http://nltk.org/>
|
||||
# For license information, see LICENSE.TXT
|
||||
|
||||
from __future__ import print_function
|
||||
|
||||
|
||||
# returns a list of errors in string format
|
||||
|
||||
|
||||
def error_list(train_sents, test_sents):
|
||||
"""
|
||||
Returns a list of human-readable strings indicating the errors in the
|
||||
given tagging of the corpus.
|
||||
|
||||
:param train_sents: The correct tagging of the corpus
|
||||
:type train_sents: list(tuple)
|
||||
:param test_sents: The tagged corpus
|
||||
:type test_sents: list(tuple)
|
||||
"""
|
||||
hdr = ('%25s | %s | %s\n' + '-' * 26 + '+' + '-' * 24 + '+' + '-' * 26) % (
|
||||
'left context',
|
||||
'word/test->gold'.center(22),
|
||||
'right context',
|
||||
)
|
||||
errors = [hdr]
|
||||
for (train_sent, test_sent) in zip(train_sents, test_sents):
|
||||
for wordnum, (word, train_pos) in enumerate(train_sent):
|
||||
test_pos = test_sent[wordnum][1]
|
||||
if train_pos != test_pos:
|
||||
left = ' '.join('%s/%s' % w for w in train_sent[:wordnum])
|
||||
right = ' '.join('%s/%s' % w for w in train_sent[wordnum + 1 :])
|
||||
mid = '%s/%s->%s' % (word, test_pos, train_pos)
|
||||
errors.append(
|
||||
'%25s | %s | %s' % (left[-25:], mid.center(22), right[:25])
|
||||
)
|
||||
|
||||
return errors
|
||||
Reference in New Issue
Block a user