Initial commit
This commit is contained in:
261
venv/lib/python3.7/site-packages/nltk/sem/lfg.py
Normal file
261
venv/lib/python3.7/site-packages/nltk/sem/lfg.py
Normal file
@@ -0,0 +1,261 @@
|
||||
# Natural Language Toolkit: Lexical Functional Grammar
|
||||
#
|
||||
# Author: Dan Garrette <dhgarrette@gmail.com>
|
||||
#
|
||||
# Copyright (C) 2001-2019 NLTK Project
|
||||
# URL: <http://nltk.org/>
|
||||
# For license information, see LICENSE.TXT
|
||||
from __future__ import print_function, division, unicode_literals
|
||||
|
||||
from itertools import chain
|
||||
|
||||
from nltk.internals import Counter
|
||||
from nltk.compat import python_2_unicode_compatible
|
||||
|
||||
|
||||
@python_2_unicode_compatible
|
||||
class FStructure(dict):
|
||||
def safeappend(self, key, item):
|
||||
"""
|
||||
Append 'item' to the list at 'key'. If no list exists for 'key', then
|
||||
construct one.
|
||||
"""
|
||||
if key not in self:
|
||||
self[key] = []
|
||||
self[key].append(item)
|
||||
|
||||
def __setitem__(self, key, value):
|
||||
dict.__setitem__(self, key.lower(), value)
|
||||
|
||||
def __getitem__(self, key):
|
||||
return dict.__getitem__(self, key.lower())
|
||||
|
||||
def __contains__(self, key):
|
||||
return dict.__contains__(self, key.lower())
|
||||
|
||||
def to_glueformula_list(self, glue_dict):
|
||||
depgraph = self.to_depgraph()
|
||||
return glue_dict.to_glueformula_list(depgraph)
|
||||
|
||||
def to_depgraph(self, rel=None):
|
||||
from nltk.parse.dependencygraph import DependencyGraph
|
||||
|
||||
depgraph = DependencyGraph()
|
||||
nodes = depgraph.nodes
|
||||
|
||||
self._to_depgraph(nodes, 0, 'ROOT')
|
||||
|
||||
# Add all the dependencies for all the nodes
|
||||
for address, node in nodes.items():
|
||||
for n2 in (n for n in nodes.values() if n['rel'] != 'TOP'):
|
||||
if n2['head'] == address:
|
||||
relation = n2['rel']
|
||||
node['deps'].setdefault(relation, [])
|
||||
node['deps'][relation].append(n2['address'])
|
||||
|
||||
depgraph.root = nodes[1]
|
||||
|
||||
return depgraph
|
||||
|
||||
def _to_depgraph(self, nodes, head, rel):
|
||||
index = len(nodes)
|
||||
|
||||
nodes[index].update(
|
||||
{
|
||||
'address': index,
|
||||
'word': self.pred[0],
|
||||
'tag': self.pred[1],
|
||||
'head': head,
|
||||
'rel': rel,
|
||||
}
|
||||
)
|
||||
|
||||
for feature in sorted(self):
|
||||
for item in sorted(self[feature]):
|
||||
if isinstance(item, FStructure):
|
||||
item._to_depgraph(nodes, index, feature)
|
||||
elif isinstance(item, tuple):
|
||||
new_index = len(nodes)
|
||||
nodes[new_index].update(
|
||||
{
|
||||
'address': new_index,
|
||||
'word': item[0],
|
||||
'tag': item[1],
|
||||
'head': index,
|
||||
'rel': feature,
|
||||
}
|
||||
)
|
||||
elif isinstance(item, list):
|
||||
for n in item:
|
||||
n._to_depgraph(nodes, index, feature)
|
||||
else:
|
||||
raise Exception(
|
||||
'feature %s is not an FStruct, a list, or a tuple' % feature
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def read_depgraph(depgraph):
|
||||
return FStructure._read_depgraph(depgraph.root, depgraph)
|
||||
|
||||
@staticmethod
|
||||
def _read_depgraph(node, depgraph, label_counter=None, parent=None):
|
||||
if not label_counter:
|
||||
label_counter = Counter()
|
||||
|
||||
if node['rel'].lower() in ['spec', 'punct']:
|
||||
# the value of a 'spec' entry is a word, not an FStructure
|
||||
return (node['word'], node['tag'])
|
||||
|
||||
else:
|
||||
fstruct = FStructure()
|
||||
fstruct.pred = None
|
||||
fstruct.label = FStructure._make_label(label_counter.get())
|
||||
|
||||
fstruct.parent = parent
|
||||
|
||||
word, tag = node['word'], node['tag']
|
||||
if tag[:2] == 'VB':
|
||||
if tag[2:3] == 'D':
|
||||
fstruct.safeappend('tense', ('PAST', 'tense'))
|
||||
fstruct.pred = (word, tag[:2])
|
||||
|
||||
if not fstruct.pred:
|
||||
fstruct.pred = (word, tag)
|
||||
|
||||
children = [depgraph.nodes[idx] for idx in chain(*node['deps'].values())]
|
||||
for child in children:
|
||||
fstruct.safeappend(
|
||||
child['rel'],
|
||||
FStructure._read_depgraph(child, depgraph, label_counter, fstruct),
|
||||
)
|
||||
|
||||
return fstruct
|
||||
|
||||
@staticmethod
|
||||
def _make_label(value):
|
||||
"""
|
||||
Pick an alphabetic character as identifier for an entity in the model.
|
||||
|
||||
:param value: where to index into the list of characters
|
||||
:type value: int
|
||||
"""
|
||||
letter = [
|
||||
'f',
|
||||
'g',
|
||||
'h',
|
||||
'i',
|
||||
'j',
|
||||
'k',
|
||||
'l',
|
||||
'm',
|
||||
'n',
|
||||
'o',
|
||||
'p',
|
||||
'q',
|
||||
'r',
|
||||
's',
|
||||
't',
|
||||
'u',
|
||||
'v',
|
||||
'w',
|
||||
'x',
|
||||
'y',
|
||||
'z',
|
||||
'a',
|
||||
'b',
|
||||
'c',
|
||||
'd',
|
||||
'e',
|
||||
][value - 1]
|
||||
num = int(value) // 26
|
||||
if num > 0:
|
||||
return letter + str(num)
|
||||
else:
|
||||
return letter
|
||||
|
||||
def __repr__(self):
|
||||
return self.__unicode__().replace('\n', '')
|
||||
|
||||
def __str__(self):
|
||||
return self.pretty_format()
|
||||
|
||||
def pretty_format(self, indent=3):
|
||||
try:
|
||||
accum = '%s:[' % self.label
|
||||
except NameError:
|
||||
accum = '['
|
||||
try:
|
||||
accum += 'pred \'%s\'' % (self.pred[0])
|
||||
except NameError:
|
||||
pass
|
||||
|
||||
for feature in sorted(self):
|
||||
for item in self[feature]:
|
||||
if isinstance(item, FStructure):
|
||||
next_indent = indent + len(feature) + 3 + len(self.label)
|
||||
accum += '\n%s%s %s' % (
|
||||
' ' * (indent),
|
||||
feature,
|
||||
item.pretty_format(next_indent),
|
||||
)
|
||||
elif isinstance(item, tuple):
|
||||
accum += '\n%s%s \'%s\'' % (' ' * (indent), feature, item[0])
|
||||
elif isinstance(item, list):
|
||||
accum += '\n%s%s {%s}' % (
|
||||
' ' * (indent),
|
||||
feature,
|
||||
('\n%s' % (' ' * (indent + len(feature) + 2))).join(item),
|
||||
)
|
||||
else: # ERROR
|
||||
raise Exception(
|
||||
'feature %s is not an FStruct, a list, or a tuple' % feature
|
||||
)
|
||||
return accum + ']'
|
||||
|
||||
|
||||
def demo_read_depgraph():
|
||||
from nltk.parse.dependencygraph import DependencyGraph
|
||||
|
||||
dg1 = DependencyGraph(
|
||||
"""\
|
||||
Esso NNP 2 SUB
|
||||
said VBD 0 ROOT
|
||||
the DT 5 NMOD
|
||||
Whiting NNP 5 NMOD
|
||||
field NN 6 SUB
|
||||
started VBD 2 VMOD
|
||||
production NN 6 OBJ
|
||||
Tuesday NNP 6 VMOD
|
||||
"""
|
||||
)
|
||||
dg2 = DependencyGraph(
|
||||
"""\
|
||||
John NNP 2 SUB
|
||||
sees VBP 0 ROOT
|
||||
Mary NNP 2 OBJ
|
||||
"""
|
||||
)
|
||||
dg3 = DependencyGraph(
|
||||
"""\
|
||||
a DT 2 SPEC
|
||||
man NN 3 SUBJ
|
||||
walks VB 0 ROOT
|
||||
"""
|
||||
)
|
||||
dg4 = DependencyGraph(
|
||||
"""\
|
||||
every DT 2 SPEC
|
||||
girl NN 3 SUBJ
|
||||
chases VB 0 ROOT
|
||||
a DT 5 SPEC
|
||||
dog NN 3 OBJ
|
||||
"""
|
||||
)
|
||||
|
||||
depgraphs = [dg1, dg2, dg3, dg4]
|
||||
for dg in depgraphs:
|
||||
print(FStructure.read_depgraph(dg))
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
demo_read_depgraph()
|
||||
Reference in New Issue
Block a user