Initial commit
This commit is contained in:
33
venv/lib/python3.7/site-packages/nltk/draw/__init__.py
Normal file
33
venv/lib/python3.7/site-packages/nltk/draw/__init__.py
Normal file
@@ -0,0 +1,33 @@
|
||||
# Natural Language Toolkit: graphical representations package
|
||||
#
|
||||
# Copyright (C) 2001-2019 NLTK Project
|
||||
# Author: Edward Loper <edloper@gmail.com>
|
||||
# Steven Bird <stevenbird1@gmail.com>
|
||||
# URL: <http://nltk.org/>
|
||||
# For license information, see LICENSE.TXT
|
||||
|
||||
# Import Tkinter-based modules if Tkinter is installed
|
||||
try:
|
||||
from six.moves import tkinter
|
||||
except ImportError:
|
||||
import warnings
|
||||
|
||||
warnings.warn("nltk.draw package not loaded " "(please install Tkinter library).")
|
||||
else:
|
||||
from nltk.draw.cfg import ProductionList, CFGEditor, CFGDemo
|
||||
from nltk.draw.tree import (
|
||||
TreeSegmentWidget,
|
||||
tree_to_treesegment,
|
||||
TreeWidget,
|
||||
TreeView,
|
||||
draw_trees,
|
||||
)
|
||||
from nltk.draw.table import Table
|
||||
|
||||
from nltk.draw.dispersion import dispersion_plot
|
||||
|
||||
# skip doctests from this package
|
||||
def setup_module(module):
|
||||
from nose import SkipTest
|
||||
|
||||
raise SkipTest("nltk.draw examples are not doctests")
|
||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
861
venv/lib/python3.7/site-packages/nltk/draw/cfg.py
Normal file
861
venv/lib/python3.7/site-packages/nltk/draw/cfg.py
Normal file
@@ -0,0 +1,861 @@
|
||||
# Natural Language Toolkit: CFG visualization
|
||||
#
|
||||
# Copyright (C) 2001-2019 NLTK Project
|
||||
# Author: Edward Loper <edloper@gmail.com>
|
||||
# URL: <http://nltk.org/>
|
||||
# For license information, see LICENSE.TXT
|
||||
|
||||
"""
|
||||
Visualization tools for CFGs.
|
||||
"""
|
||||
|
||||
# Idea for a nice demo:
|
||||
# - 3 panes: grammar, treelet, working area
|
||||
# - grammar is a list of productions
|
||||
# - when you select a production, the treelet that it licenses appears
|
||||
# in the treelet area
|
||||
# - the working area has the text on the bottom, and S at top. When
|
||||
# you select a production, it shows (ghosted) the locations where
|
||||
# that production's treelet could be attached to either the text
|
||||
# or the tree rooted at S.
|
||||
# - the user can drag the treelet onto one of those (or click on them?)
|
||||
# - the user can delete pieces of the tree from the working area
|
||||
# (right click?)
|
||||
# - connecting top to bottom? drag one NP onto another?
|
||||
#
|
||||
# +-------------------------------------------------------------+
|
||||
# | S -> NP VP | S |
|
||||
# |[NP -> Det N ]| / \ |
|
||||
# | ... | NP VP |
|
||||
# | N -> 'dog' | |
|
||||
# | N -> 'cat' | |
|
||||
# | ... | |
|
||||
# +--------------+ |
|
||||
# | NP | Det N |
|
||||
# | / \ | | | |
|
||||
# | Det N | the cat saw the dog |
|
||||
# | | |
|
||||
# +--------------+----------------------------------------------+
|
||||
#
|
||||
# Operations:
|
||||
# - connect a new treelet -- drag or click shadow
|
||||
# - delete a treelet -- right click
|
||||
# - if only connected to top, delete everything below
|
||||
# - if only connected to bottom, delete everything above
|
||||
# - connect top & bottom -- drag a leaf to a root or a root to a leaf
|
||||
# - disconnect top & bottom -- right click
|
||||
# - if connected to top & bottom, then disconnect
|
||||
|
||||
import re
|
||||
|
||||
from six import string_types
|
||||
from six.moves.tkinter import (
|
||||
Button,
|
||||
Canvas,
|
||||
Entry,
|
||||
Frame,
|
||||
IntVar,
|
||||
Label,
|
||||
Scrollbar,
|
||||
Text,
|
||||
Tk,
|
||||
Toplevel,
|
||||
)
|
||||
|
||||
from nltk.grammar import CFG, _read_cfg_production, Nonterminal, nonterminals
|
||||
from nltk.tree import Tree
|
||||
from nltk.draw.tree import TreeSegmentWidget, tree_to_treesegment
|
||||
from nltk.draw.util import (
|
||||
CanvasFrame,
|
||||
ColorizedList,
|
||||
ShowText,
|
||||
SymbolWidget,
|
||||
TextWidget,
|
||||
)
|
||||
|
||||
######################################################################
|
||||
# Production List
|
||||
######################################################################
|
||||
|
||||
|
||||
class ProductionList(ColorizedList):
|
||||
ARROW = SymbolWidget.SYMBOLS['rightarrow']
|
||||
|
||||
def _init_colortags(self, textwidget, options):
|
||||
textwidget.tag_config('terminal', foreground='#006000')
|
||||
textwidget.tag_config('arrow', font='symbol', underline='0')
|
||||
textwidget.tag_config(
|
||||
'nonterminal', foreground='blue', font=('helvetica', -12, 'bold')
|
||||
)
|
||||
|
||||
def _item_repr(self, item):
|
||||
contents = []
|
||||
contents.append(('%s\t' % item.lhs(), 'nonterminal'))
|
||||
contents.append((self.ARROW, 'arrow'))
|
||||
for elt in item.rhs():
|
||||
if isinstance(elt, Nonterminal):
|
||||
contents.append((' %s' % elt.symbol(), 'nonterminal'))
|
||||
else:
|
||||
contents.append((' %r' % elt, 'terminal'))
|
||||
return contents
|
||||
|
||||
|
||||
######################################################################
|
||||
# CFG Editor
|
||||
######################################################################
|
||||
|
||||
_CFGEditor_HELP = """
|
||||
|
||||
The CFG Editor can be used to create or modify context free grammars.
|
||||
A context free grammar consists of a start symbol and a list of
|
||||
productions. The start symbol is specified by the text entry field in
|
||||
the upper right hand corner of the editor; and the list of productions
|
||||
are specified in the main text editing box.
|
||||
|
||||
Every non-blank line specifies a single production. Each production
|
||||
has the form "LHS -> RHS," where LHS is a single nonterminal, and RHS
|
||||
is a list of nonterminals and terminals.
|
||||
|
||||
Nonterminals must be a single word, such as S or NP or NP_subj.
|
||||
Currently, nonterminals must consists of alphanumeric characters and
|
||||
underscores (_). Nonterminals are colored blue. If you place the
|
||||
mouse over any nonterminal, then all occurrences of that nonterminal
|
||||
will be highlighted.
|
||||
|
||||
Terminals must be surrounded by single quotes (') or double
|
||||
quotes(\"). For example, "dog" and "New York" are terminals.
|
||||
Currently, the string within the quotes must consist of alphanumeric
|
||||
characters, underscores, and spaces.
|
||||
|
||||
To enter a new production, go to a blank line, and type a nonterminal,
|
||||
followed by an arrow (->), followed by a sequence of terminals and
|
||||
nonterminals. Note that "->" (dash + greater-than) is automatically
|
||||
converted to an arrow symbol. When you move your cursor to a
|
||||
different line, your production will automatically be colorized. If
|
||||
there are any errors, they will be highlighted in red.
|
||||
|
||||
Note that the order of the productions is significant for some
|
||||
algorithms. To re-order the productions, use cut and paste to move
|
||||
them.
|
||||
|
||||
Use the buttons at the bottom of the window when you are done editing
|
||||
the CFG:
|
||||
- Ok: apply the new CFG, and exit the editor.
|
||||
- Apply: apply the new CFG, and do not exit the editor.
|
||||
- Reset: revert to the original CFG, and do not exit the editor.
|
||||
- Cancel: revert to the original CFG, and exit the editor.
|
||||
|
||||
"""
|
||||
|
||||
|
||||
class CFGEditor(object):
|
||||
"""
|
||||
A dialog window for creating and editing context free grammars.
|
||||
``CFGEditor`` imposes the following restrictions:
|
||||
|
||||
- All nonterminals must be strings consisting of word
|
||||
characters.
|
||||
- All terminals must be strings consisting of word characters
|
||||
and space characters.
|
||||
"""
|
||||
|
||||
# Regular expressions used by _analyze_line. Precompile them, so
|
||||
# we can process the text faster.
|
||||
ARROW = SymbolWidget.SYMBOLS['rightarrow']
|
||||
_LHS_RE = re.compile(r"(^\s*\w+\s*)(->|(" + ARROW + "))")
|
||||
_ARROW_RE = re.compile("\s*(->|(" + ARROW + "))\s*")
|
||||
_PRODUCTION_RE = re.compile(
|
||||
r"(^\s*\w+\s*)"
|
||||
+ "(->|(" # LHS
|
||||
+ ARROW
|
||||
+ "))\s*"
|
||||
+ r"((\w+|'[\w ]*'|\"[\w ]*\"|\|)\s*)*$" # arrow
|
||||
) # RHS
|
||||
_TOKEN_RE = re.compile("\\w+|->|'[\\w ]+'|\"[\\w ]+\"|(" + ARROW + ")")
|
||||
_BOLD = ('helvetica', -12, 'bold')
|
||||
|
||||
def __init__(self, parent, cfg=None, set_cfg_callback=None):
|
||||
self._parent = parent
|
||||
if cfg is not None:
|
||||
self._cfg = cfg
|
||||
else:
|
||||
self._cfg = CFG(Nonterminal('S'), [])
|
||||
self._set_cfg_callback = set_cfg_callback
|
||||
|
||||
self._highlight_matching_nonterminals = 1
|
||||
|
||||
# Create the top-level window.
|
||||
self._top = Toplevel(parent)
|
||||
self._init_bindings()
|
||||
|
||||
self._init_startframe()
|
||||
self._startframe.pack(side='top', fill='x', expand=0)
|
||||
self._init_prodframe()
|
||||
self._prodframe.pack(side='top', fill='both', expand=1)
|
||||
self._init_buttons()
|
||||
self._buttonframe.pack(side='bottom', fill='x', expand=0)
|
||||
|
||||
self._textwidget.focus()
|
||||
|
||||
def _init_startframe(self):
|
||||
frame = self._startframe = Frame(self._top)
|
||||
self._start = Entry(frame)
|
||||
self._start.pack(side='right')
|
||||
Label(frame, text='Start Symbol:').pack(side='right')
|
||||
Label(frame, text='Productions:').pack(side='left')
|
||||
self._start.insert(0, self._cfg.start().symbol())
|
||||
|
||||
def _init_buttons(self):
|
||||
frame = self._buttonframe = Frame(self._top)
|
||||
Button(frame, text='Ok', command=self._ok, underline=0, takefocus=0).pack(
|
||||
side='left'
|
||||
)
|
||||
Button(frame, text='Apply', command=self._apply, underline=0, takefocus=0).pack(
|
||||
side='left'
|
||||
)
|
||||
Button(frame, text='Reset', command=self._reset, underline=0, takefocus=0).pack(
|
||||
side='left'
|
||||
)
|
||||
Button(
|
||||
frame, text='Cancel', command=self._cancel, underline=0, takefocus=0
|
||||
).pack(side='left')
|
||||
Button(frame, text='Help', command=self._help, underline=0, takefocus=0).pack(
|
||||
side='right'
|
||||
)
|
||||
|
||||
def _init_bindings(self):
|
||||
self._top.title('CFG Editor')
|
||||
self._top.bind('<Control-q>', self._cancel)
|
||||
self._top.bind('<Alt-q>', self._cancel)
|
||||
self._top.bind('<Control-d>', self._cancel)
|
||||
# self._top.bind('<Control-x>', self._cancel)
|
||||
self._top.bind('<Alt-x>', self._cancel)
|
||||
self._top.bind('<Escape>', self._cancel)
|
||||
# self._top.bind('<Control-c>', self._cancel)
|
||||
self._top.bind('<Alt-c>', self._cancel)
|
||||
|
||||
self._top.bind('<Control-o>', self._ok)
|
||||
self._top.bind('<Alt-o>', self._ok)
|
||||
self._top.bind('<Control-a>', self._apply)
|
||||
self._top.bind('<Alt-a>', self._apply)
|
||||
self._top.bind('<Control-r>', self._reset)
|
||||
self._top.bind('<Alt-r>', self._reset)
|
||||
self._top.bind('<Control-h>', self._help)
|
||||
self._top.bind('<Alt-h>', self._help)
|
||||
self._top.bind('<F1>', self._help)
|
||||
|
||||
def _init_prodframe(self):
|
||||
self._prodframe = Frame(self._top)
|
||||
|
||||
# Create the basic Text widget & scrollbar.
|
||||
self._textwidget = Text(
|
||||
self._prodframe, background='#e0e0e0', exportselection=1
|
||||
)
|
||||
self._textscroll = Scrollbar(self._prodframe, takefocus=0, orient='vertical')
|
||||
self._textwidget.config(yscrollcommand=self._textscroll.set)
|
||||
self._textscroll.config(command=self._textwidget.yview)
|
||||
self._textscroll.pack(side='right', fill='y')
|
||||
self._textwidget.pack(expand=1, fill='both', side='left')
|
||||
|
||||
# Initialize the colorization tags. Each nonterminal gets its
|
||||
# own tag, so they aren't listed here.
|
||||
self._textwidget.tag_config('terminal', foreground='#006000')
|
||||
self._textwidget.tag_config('arrow', font='symbol')
|
||||
self._textwidget.tag_config('error', background='red')
|
||||
|
||||
# Keep track of what line they're on. We use that to remember
|
||||
# to re-analyze a line whenever they leave it.
|
||||
self._linenum = 0
|
||||
|
||||
# Expand "->" to an arrow.
|
||||
self._top.bind('>', self._replace_arrows)
|
||||
|
||||
# Re-colorize lines when appropriate.
|
||||
self._top.bind('<<Paste>>', self._analyze)
|
||||
self._top.bind('<KeyPress>', self._check_analyze)
|
||||
self._top.bind('<ButtonPress>', self._check_analyze)
|
||||
|
||||
# Tab cycles focus. (why doesn't this work??)
|
||||
def cycle(e, textwidget=self._textwidget):
|
||||
textwidget.tk_focusNext().focus()
|
||||
|
||||
self._textwidget.bind('<Tab>', cycle)
|
||||
|
||||
prod_tuples = [(p.lhs(), [p.rhs()]) for p in self._cfg.productions()]
|
||||
for i in range(len(prod_tuples) - 1, 0, -1):
|
||||
if prod_tuples[i][0] == prod_tuples[i - 1][0]:
|
||||
if () in prod_tuples[i][1]:
|
||||
continue
|
||||
if () in prod_tuples[i - 1][1]:
|
||||
continue
|
||||
print(prod_tuples[i - 1][1])
|
||||
print(prod_tuples[i][1])
|
||||
prod_tuples[i - 1][1].extend(prod_tuples[i][1])
|
||||
del prod_tuples[i]
|
||||
|
||||
for lhs, rhss in prod_tuples:
|
||||
print(lhs, rhss)
|
||||
s = '%s ->' % lhs
|
||||
for rhs in rhss:
|
||||
for elt in rhs:
|
||||
if isinstance(elt, Nonterminal):
|
||||
s += ' %s' % elt
|
||||
else:
|
||||
s += ' %r' % elt
|
||||
s += ' |'
|
||||
s = s[:-2] + '\n'
|
||||
self._textwidget.insert('end', s)
|
||||
|
||||
self._analyze()
|
||||
|
||||
# # Add the producitons to the text widget, and colorize them.
|
||||
# prod_by_lhs = {}
|
||||
# for prod in self._cfg.productions():
|
||||
# if len(prod.rhs()) > 0:
|
||||
# prod_by_lhs.setdefault(prod.lhs(),[]).append(prod)
|
||||
# for (lhs, prods) in prod_by_lhs.items():
|
||||
# self._textwidget.insert('end', '%s ->' % lhs)
|
||||
# self._textwidget.insert('end', self._rhs(prods[0]))
|
||||
# for prod in prods[1:]:
|
||||
# print '\t|'+self._rhs(prod),
|
||||
# self._textwidget.insert('end', '\t|'+self._rhs(prod))
|
||||
# print
|
||||
# self._textwidget.insert('end', '\n')
|
||||
# for prod in self._cfg.productions():
|
||||
# if len(prod.rhs()) == 0:
|
||||
# self._textwidget.insert('end', '%s' % prod)
|
||||
# self._analyze()
|
||||
|
||||
# def _rhs(self, prod):
|
||||
# s = ''
|
||||
# for elt in prod.rhs():
|
||||
# if isinstance(elt, Nonterminal): s += ' %s' % elt.symbol()
|
||||
# else: s += ' %r' % elt
|
||||
# return s
|
||||
|
||||
def _clear_tags(self, linenum):
|
||||
"""
|
||||
Remove all tags (except ``arrow`` and ``sel``) from the given
|
||||
line of the text widget used for editing the productions.
|
||||
"""
|
||||
start = '%d.0' % linenum
|
||||
end = '%d.end' % linenum
|
||||
for tag in self._textwidget.tag_names():
|
||||
if tag not in ('arrow', 'sel'):
|
||||
self._textwidget.tag_remove(tag, start, end)
|
||||
|
||||
def _check_analyze(self, *e):
|
||||
"""
|
||||
Check if we've moved to a new line. If we have, then remove
|
||||
all colorization from the line we moved to, and re-colorize
|
||||
the line that we moved from.
|
||||
"""
|
||||
linenum = int(self._textwidget.index('insert').split('.')[0])
|
||||
if linenum != self._linenum:
|
||||
self._clear_tags(linenum)
|
||||
self._analyze_line(self._linenum)
|
||||
self._linenum = linenum
|
||||
|
||||
def _replace_arrows(self, *e):
|
||||
"""
|
||||
Replace any ``'->'`` text strings with arrows (char \\256, in
|
||||
symbol font). This searches the whole buffer, but is fast
|
||||
enough to be done anytime they press '>'.
|
||||
"""
|
||||
arrow = '1.0'
|
||||
while True:
|
||||
arrow = self._textwidget.search('->', arrow, 'end+1char')
|
||||
if arrow == '':
|
||||
break
|
||||
self._textwidget.delete(arrow, arrow + '+2char')
|
||||
self._textwidget.insert(arrow, self.ARROW, 'arrow')
|
||||
self._textwidget.insert(arrow, '\t')
|
||||
|
||||
arrow = '1.0'
|
||||
while True:
|
||||
arrow = self._textwidget.search(self.ARROW, arrow + '+1char', 'end+1char')
|
||||
if arrow == '':
|
||||
break
|
||||
self._textwidget.tag_add('arrow', arrow, arrow + '+1char')
|
||||
|
||||
def _analyze_token(self, match, linenum):
|
||||
"""
|
||||
Given a line number and a regexp match for a token on that
|
||||
line, colorize the token. Note that the regexp match gives us
|
||||
the token's text, start index (on the line), and end index (on
|
||||
the line).
|
||||
"""
|
||||
# What type of token is it?
|
||||
if match.group()[0] in "'\"":
|
||||
tag = 'terminal'
|
||||
elif match.group() in ('->', self.ARROW):
|
||||
tag = 'arrow'
|
||||
else:
|
||||
# If it's a nonterminal, then set up new bindings, so we
|
||||
# can highlight all instances of that nonterminal when we
|
||||
# put the mouse over it.
|
||||
tag = 'nonterminal_' + match.group()
|
||||
if tag not in self._textwidget.tag_names():
|
||||
self._init_nonterminal_tag(tag)
|
||||
|
||||
start = '%d.%d' % (linenum, match.start())
|
||||
end = '%d.%d' % (linenum, match.end())
|
||||
self._textwidget.tag_add(tag, start, end)
|
||||
|
||||
def _init_nonterminal_tag(self, tag, foreground='blue'):
|
||||
self._textwidget.tag_config(tag, foreground=foreground, font=CFGEditor._BOLD)
|
||||
if not self._highlight_matching_nonterminals:
|
||||
return
|
||||
|
||||
def enter(e, textwidget=self._textwidget, tag=tag):
|
||||
textwidget.tag_config(tag, background='#80ff80')
|
||||
|
||||
def leave(e, textwidget=self._textwidget, tag=tag):
|
||||
textwidget.tag_config(tag, background='')
|
||||
|
||||
self._textwidget.tag_bind(tag, '<Enter>', enter)
|
||||
self._textwidget.tag_bind(tag, '<Leave>', leave)
|
||||
|
||||
def _analyze_line(self, linenum):
|
||||
"""
|
||||
Colorize a given line.
|
||||
"""
|
||||
# Get rid of any tags that were previously on the line.
|
||||
self._clear_tags(linenum)
|
||||
|
||||
# Get the line line's text string.
|
||||
line = self._textwidget.get(repr(linenum) + '.0', repr(linenum) + '.end')
|
||||
|
||||
# If it's a valid production, then colorize each token.
|
||||
if CFGEditor._PRODUCTION_RE.match(line):
|
||||
# It's valid; Use _TOKEN_RE to tokenize the production,
|
||||
# and call analyze_token on each token.
|
||||
def analyze_token(match, self=self, linenum=linenum):
|
||||
self._analyze_token(match, linenum)
|
||||
return ''
|
||||
|
||||
CFGEditor._TOKEN_RE.sub(analyze_token, line)
|
||||
elif line.strip() != '':
|
||||
# It's invalid; show the user where the error is.
|
||||
self._mark_error(linenum, line)
|
||||
|
||||
def _mark_error(self, linenum, line):
|
||||
"""
|
||||
Mark the location of an error in a line.
|
||||
"""
|
||||
arrowmatch = CFGEditor._ARROW_RE.search(line)
|
||||
if not arrowmatch:
|
||||
# If there's no arrow at all, highlight the whole line.
|
||||
start = '%d.0' % linenum
|
||||
end = '%d.end' % linenum
|
||||
elif not CFGEditor._LHS_RE.match(line):
|
||||
# Otherwise, if the LHS is bad, highlight it.
|
||||
start = '%d.0' % linenum
|
||||
end = '%d.%d' % (linenum, arrowmatch.start())
|
||||
else:
|
||||
# Otherwise, highlight the RHS.
|
||||
start = '%d.%d' % (linenum, arrowmatch.end())
|
||||
end = '%d.end' % linenum
|
||||
|
||||
# If we're highlighting 0 chars, highlight the whole line.
|
||||
if self._textwidget.compare(start, '==', end):
|
||||
start = '%d.0' % linenum
|
||||
end = '%d.end' % linenum
|
||||
self._textwidget.tag_add('error', start, end)
|
||||
|
||||
def _analyze(self, *e):
|
||||
"""
|
||||
Replace ``->`` with arrows, and colorize the entire buffer.
|
||||
"""
|
||||
self._replace_arrows()
|
||||
numlines = int(self._textwidget.index('end').split('.')[0])
|
||||
for linenum in range(1, numlines + 1): # line numbers start at 1.
|
||||
self._analyze_line(linenum)
|
||||
|
||||
def _parse_productions(self):
|
||||
"""
|
||||
Parse the current contents of the textwidget buffer, to create
|
||||
a list of productions.
|
||||
"""
|
||||
productions = []
|
||||
|
||||
# Get the text, normalize it, and split it into lines.
|
||||
text = self._textwidget.get('1.0', 'end')
|
||||
text = re.sub(self.ARROW, '->', text)
|
||||
text = re.sub('\t', ' ', text)
|
||||
lines = text.split('\n')
|
||||
|
||||
# Convert each line to a CFG production
|
||||
for line in lines:
|
||||
line = line.strip()
|
||||
if line == '':
|
||||
continue
|
||||
productions += _read_cfg_production(line)
|
||||
# if line.strip() == '': continue
|
||||
# if not CFGEditor._PRODUCTION_RE.match(line):
|
||||
# raise ValueError('Bad production string %r' % line)
|
||||
#
|
||||
# (lhs_str, rhs_str) = line.split('->')
|
||||
# lhs = Nonterminal(lhs_str.strip())
|
||||
# rhs = []
|
||||
# def parse_token(match, rhs=rhs):
|
||||
# token = match.group()
|
||||
# if token[0] in "'\"": rhs.append(token[1:-1])
|
||||
# else: rhs.append(Nonterminal(token))
|
||||
# return ''
|
||||
# CFGEditor._TOKEN_RE.sub(parse_token, rhs_str)
|
||||
#
|
||||
# productions.append(Production(lhs, *rhs))
|
||||
|
||||
return productions
|
||||
|
||||
def _destroy(self, *e):
|
||||
if self._top is None:
|
||||
return
|
||||
self._top.destroy()
|
||||
self._top = None
|
||||
|
||||
def _ok(self, *e):
|
||||
self._apply()
|
||||
self._destroy()
|
||||
|
||||
def _apply(self, *e):
|
||||
productions = self._parse_productions()
|
||||
start = Nonterminal(self._start.get())
|
||||
cfg = CFG(start, productions)
|
||||
if self._set_cfg_callback is not None:
|
||||
self._set_cfg_callback(cfg)
|
||||
|
||||
def _reset(self, *e):
|
||||
self._textwidget.delete('1.0', 'end')
|
||||
for production in self._cfg.productions():
|
||||
self._textwidget.insert('end', '%s\n' % production)
|
||||
self._analyze()
|
||||
if self._set_cfg_callback is not None:
|
||||
self._set_cfg_callback(self._cfg)
|
||||
|
||||
def _cancel(self, *e):
|
||||
try:
|
||||
self._reset()
|
||||
except:
|
||||
pass
|
||||
self._destroy()
|
||||
|
||||
def _help(self, *e):
|
||||
# The default font's not very legible; try using 'fixed' instead.
|
||||
try:
|
||||
ShowText(
|
||||
self._parent,
|
||||
'Help: Chart Parser Demo',
|
||||
(_CFGEditor_HELP).strip(),
|
||||
width=75,
|
||||
font='fixed',
|
||||
)
|
||||
except:
|
||||
ShowText(
|
||||
self._parent,
|
||||
'Help: Chart Parser Demo',
|
||||
(_CFGEditor_HELP).strip(),
|
||||
width=75,
|
||||
)
|
||||
|
||||
|
||||
######################################################################
|
||||
# New Demo (built tree based on cfg)
|
||||
######################################################################
|
||||
|
||||
|
||||
class CFGDemo(object):
|
||||
def __init__(self, grammar, text):
|
||||
self._grammar = grammar
|
||||
self._text = text
|
||||
|
||||
# Set up the main window.
|
||||
self._top = Tk()
|
||||
self._top.title('Context Free Grammar Demo')
|
||||
|
||||
# Base font size
|
||||
self._size = IntVar(self._top)
|
||||
self._size.set(12) # = medium
|
||||
|
||||
# Set up the key bindings
|
||||
self._init_bindings(self._top)
|
||||
|
||||
# Create the basic frames
|
||||
frame1 = Frame(self._top)
|
||||
frame1.pack(side='left', fill='y', expand=0)
|
||||
self._init_menubar(self._top)
|
||||
self._init_buttons(self._top)
|
||||
self._init_grammar(frame1)
|
||||
self._init_treelet(frame1)
|
||||
self._init_workspace(self._top)
|
||||
|
||||
# //////////////////////////////////////////////////
|
||||
# Initialization
|
||||
# //////////////////////////////////////////////////
|
||||
|
||||
def _init_bindings(self, top):
|
||||
top.bind('<Control-q>', self.destroy)
|
||||
|
||||
def _init_menubar(self, parent):
|
||||
pass
|
||||
|
||||
def _init_buttons(self, parent):
|
||||
pass
|
||||
|
||||
def _init_grammar(self, parent):
|
||||
self._prodlist = ProductionList(parent, self._grammar, width=20)
|
||||
self._prodlist.pack(side='top', fill='both', expand=1)
|
||||
self._prodlist.focus()
|
||||
self._prodlist.add_callback('select', self._selectprod_cb)
|
||||
self._prodlist.add_callback('move', self._selectprod_cb)
|
||||
|
||||
def _init_treelet(self, parent):
|
||||
self._treelet_canvas = Canvas(parent, background='white')
|
||||
self._treelet_canvas.pack(side='bottom', fill='x')
|
||||
self._treelet = None
|
||||
|
||||
def _init_workspace(self, parent):
|
||||
self._workspace = CanvasFrame(parent, background='white')
|
||||
self._workspace.pack(side='right', fill='both', expand=1)
|
||||
self._tree = None
|
||||
self.reset_workspace()
|
||||
|
||||
# //////////////////////////////////////////////////
|
||||
# Workspace
|
||||
# //////////////////////////////////////////////////
|
||||
|
||||
def reset_workspace(self):
|
||||
c = self._workspace.canvas()
|
||||
fontsize = int(self._size.get())
|
||||
node_font = ('helvetica', -(fontsize + 4), 'bold')
|
||||
leaf_font = ('helvetica', -(fontsize + 2))
|
||||
|
||||
# Remove the old tree
|
||||
if self._tree is not None:
|
||||
self._workspace.remove_widget(self._tree)
|
||||
|
||||
# The root of the tree.
|
||||
start = self._grammar.start().symbol()
|
||||
rootnode = TextWidget(c, start, font=node_font, draggable=1)
|
||||
|
||||
# The leaves of the tree.
|
||||
leaves = []
|
||||
for word in self._text:
|
||||
leaves.append(TextWidget(c, word, font=leaf_font, draggable=1))
|
||||
|
||||
# Put it all together into one tree
|
||||
self._tree = TreeSegmentWidget(c, rootnode, leaves, color='white')
|
||||
|
||||
# Add it to the workspace.
|
||||
self._workspace.add_widget(self._tree)
|
||||
|
||||
# Move the leaves to the bottom of the workspace.
|
||||
for leaf in leaves:
|
||||
leaf.move(0, 100)
|
||||
|
||||
# self._nodes = {start:1}
|
||||
# self._leaves = dict([(l,1) for l in leaves])
|
||||
|
||||
def workspace_markprod(self, production):
|
||||
pass
|
||||
|
||||
def _markproduction(self, prod, tree=None):
|
||||
if tree is None:
|
||||
tree = self._tree
|
||||
for i in range(len(tree.subtrees()) - len(prod.rhs())):
|
||||
if tree['color', i] == 'white':
|
||||
self._markproduction # FIXME: Is this necessary at all?
|
||||
|
||||
for j, node in enumerate(prod.rhs()):
|
||||
widget = tree.subtrees()[i + j]
|
||||
if (
|
||||
isinstance(node, Nonterminal)
|
||||
and isinstance(widget, TreeSegmentWidget)
|
||||
and node.symbol == widget.label().text()
|
||||
):
|
||||
pass # matching nonterminal
|
||||
elif (
|
||||
isinstance(node, string_types)
|
||||
and isinstance(widget, TextWidget)
|
||||
and node == widget.text()
|
||||
):
|
||||
pass # matching nonterminal
|
||||
else:
|
||||
break
|
||||
else:
|
||||
# Everything matched!
|
||||
print('MATCH AT', i)
|
||||
|
||||
# //////////////////////////////////////////////////
|
||||
# Grammar
|
||||
# //////////////////////////////////////////////////
|
||||
|
||||
def _selectprod_cb(self, production):
|
||||
canvas = self._treelet_canvas
|
||||
|
||||
self._prodlist.highlight(production)
|
||||
if self._treelet is not None:
|
||||
self._treelet.destroy()
|
||||
|
||||
# Convert the production to a tree.
|
||||
rhs = production.rhs()
|
||||
for (i, elt) in enumerate(rhs):
|
||||
if isinstance(elt, Nonterminal):
|
||||
elt = Tree(elt)
|
||||
tree = Tree(production.lhs().symbol(), *rhs)
|
||||
|
||||
# Draw the tree in the treelet area.
|
||||
fontsize = int(self._size.get())
|
||||
node_font = ('helvetica', -(fontsize + 4), 'bold')
|
||||
leaf_font = ('helvetica', -(fontsize + 2))
|
||||
self._treelet = tree_to_treesegment(
|
||||
canvas, tree, node_font=node_font, leaf_font=leaf_font
|
||||
)
|
||||
self._treelet['draggable'] = 1
|
||||
|
||||
# Center the treelet.
|
||||
(x1, y1, x2, y2) = self._treelet.bbox()
|
||||
w, h = int(canvas['width']), int(canvas['height'])
|
||||
self._treelet.move((w - x1 - x2) / 2, (h - y1 - y2) / 2)
|
||||
|
||||
# Mark the places where we can add it to the workspace.
|
||||
self._markproduction(production)
|
||||
|
||||
def destroy(self, *args):
|
||||
self._top.destroy()
|
||||
|
||||
def mainloop(self, *args, **kwargs):
|
||||
self._top.mainloop(*args, **kwargs)
|
||||
|
||||
|
||||
def demo2():
|
||||
from nltk import Nonterminal, Production, CFG
|
||||
|
||||
nonterminals = 'S VP NP PP P N Name V Det'
|
||||
(S, VP, NP, PP, P, N, Name, V, Det) = [Nonterminal(s) for s in nonterminals.split()]
|
||||
productions = (
|
||||
# Syntactic Productions
|
||||
Production(S, [NP, VP]),
|
||||
Production(NP, [Det, N]),
|
||||
Production(NP, [NP, PP]),
|
||||
Production(VP, [VP, PP]),
|
||||
Production(VP, [V, NP, PP]),
|
||||
Production(VP, [V, NP]),
|
||||
Production(PP, [P, NP]),
|
||||
Production(PP, []),
|
||||
Production(PP, ['up', 'over', NP]),
|
||||
# Lexical Productions
|
||||
Production(NP, ['I']),
|
||||
Production(Det, ['the']),
|
||||
Production(Det, ['a']),
|
||||
Production(N, ['man']),
|
||||
Production(V, ['saw']),
|
||||
Production(P, ['in']),
|
||||
Production(P, ['with']),
|
||||
Production(N, ['park']),
|
||||
Production(N, ['dog']),
|
||||
Production(N, ['statue']),
|
||||
Production(Det, ['my']),
|
||||
)
|
||||
grammar = CFG(S, productions)
|
||||
|
||||
text = 'I saw a man in the park'.split()
|
||||
d = CFGDemo(grammar, text)
|
||||
d.mainloop()
|
||||
|
||||
|
||||
######################################################################
|
||||
# Old Demo
|
||||
######################################################################
|
||||
|
||||
|
||||
def demo():
|
||||
from nltk import Nonterminal, CFG
|
||||
|
||||
nonterminals = 'S VP NP PP P N Name V Det'
|
||||
(S, VP, NP, PP, P, N, Name, V, Det) = [Nonterminal(s) for s in nonterminals.split()]
|
||||
|
||||
grammar = CFG.fromstring(
|
||||
"""
|
||||
S -> NP VP
|
||||
PP -> P NP
|
||||
NP -> Det N
|
||||
NP -> NP PP
|
||||
VP -> V NP
|
||||
VP -> VP PP
|
||||
Det -> 'a'
|
||||
Det -> 'the'
|
||||
Det -> 'my'
|
||||
NP -> 'I'
|
||||
N -> 'dog'
|
||||
N -> 'man'
|
||||
N -> 'park'
|
||||
N -> 'statue'
|
||||
V -> 'saw'
|
||||
P -> 'in'
|
||||
P -> 'up'
|
||||
P -> 'over'
|
||||
P -> 'with'
|
||||
"""
|
||||
)
|
||||
|
||||
def cb(grammar):
|
||||
print(grammar)
|
||||
|
||||
top = Tk()
|
||||
editor = CFGEditor(top, grammar, cb)
|
||||
Label(top, text='\nTesting CFG Editor\n').pack()
|
||||
Button(top, text='Quit', command=top.destroy).pack()
|
||||
top.mainloop()
|
||||
|
||||
|
||||
def demo3():
|
||||
from nltk import Production
|
||||
|
||||
(S, VP, NP, PP, P, N, Name, V, Det) = nonterminals(
|
||||
'S, VP, NP, PP, P, N, Name, V, Det'
|
||||
)
|
||||
|
||||
productions = (
|
||||
# Syntactic Productions
|
||||
Production(S, [NP, VP]),
|
||||
Production(NP, [Det, N]),
|
||||
Production(NP, [NP, PP]),
|
||||
Production(VP, [VP, PP]),
|
||||
Production(VP, [V, NP, PP]),
|
||||
Production(VP, [V, NP]),
|
||||
Production(PP, [P, NP]),
|
||||
Production(PP, []),
|
||||
Production(PP, ['up', 'over', NP]),
|
||||
# Lexical Productions
|
||||
Production(NP, ['I']),
|
||||
Production(Det, ['the']),
|
||||
Production(Det, ['a']),
|
||||
Production(N, ['man']),
|
||||
Production(V, ['saw']),
|
||||
Production(P, ['in']),
|
||||
Production(P, ['with']),
|
||||
Production(N, ['park']),
|
||||
Production(N, ['dog']),
|
||||
Production(N, ['statue']),
|
||||
Production(Det, ['my']),
|
||||
)
|
||||
|
||||
t = Tk()
|
||||
|
||||
def destroy(e, t=t):
|
||||
t.destroy()
|
||||
|
||||
t.bind('q', destroy)
|
||||
p = ProductionList(t, productions)
|
||||
p.pack(expand=1, fill='both')
|
||||
p.add_callback('select', p.markonly)
|
||||
p.add_callback('move', p.markonly)
|
||||
p.focus()
|
||||
p.mark(productions[2])
|
||||
p.mark(productions[8])
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
demo()
|
||||
66
venv/lib/python3.7/site-packages/nltk/draw/dispersion.py
Normal file
66
venv/lib/python3.7/site-packages/nltk/draw/dispersion.py
Normal file
@@ -0,0 +1,66 @@
|
||||
# Natural Language Toolkit: Dispersion Plots
|
||||
#
|
||||
# Copyright (C) 2001-2019 NLTK Project
|
||||
# Author: Steven Bird <stevenbird1@gmail.com>
|
||||
# URL: <http://nltk.org/>
|
||||
# For license information, see LICENSE.TXT
|
||||
|
||||
"""
|
||||
A utility for displaying lexical dispersion.
|
||||
"""
|
||||
|
||||
|
||||
def dispersion_plot(text, words, ignore_case=False, title="Lexical Dispersion Plot"):
|
||||
"""
|
||||
Generate a lexical dispersion plot.
|
||||
|
||||
:param text: The source text
|
||||
:type text: list(str) or enum(str)
|
||||
:param words: The target words
|
||||
:type words: list of str
|
||||
:param ignore_case: flag to set if case should be ignored when searching text
|
||||
:type ignore_case: bool
|
||||
"""
|
||||
|
||||
try:
|
||||
from matplotlib import pylab
|
||||
except ImportError:
|
||||
raise ValueError(
|
||||
'The plot function requires matplotlib to be installed.'
|
||||
'See http://matplotlib.org/'
|
||||
)
|
||||
|
||||
text = list(text)
|
||||
words.reverse()
|
||||
|
||||
if ignore_case:
|
||||
words_to_comp = list(map(str.lower, words))
|
||||
text_to_comp = list(map(str.lower, text))
|
||||
else:
|
||||
words_to_comp = words
|
||||
text_to_comp = text
|
||||
|
||||
points = [
|
||||
(x, y)
|
||||
for x in range(len(text_to_comp))
|
||||
for y in range(len(words_to_comp))
|
||||
if text_to_comp[x] == words_to_comp[y]
|
||||
]
|
||||
if points:
|
||||
x, y = list(zip(*points))
|
||||
else:
|
||||
x = y = ()
|
||||
pylab.plot(x, y, "b|", scalex=0.1)
|
||||
pylab.yticks(list(range(len(words))), words, color="b")
|
||||
pylab.ylim(-1, len(words))
|
||||
pylab.title(title)
|
||||
pylab.xlabel("Word Offset")
|
||||
pylab.show()
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
import nltk.compat
|
||||
from nltk.corpus import gutenberg
|
||||
|
||||
words = ['Elinor', 'Marianne', 'Edward', 'Willoughby']
|
||||
dispersion_plot(gutenberg.words('austen-sense.txt'), words)
|
||||
1183
venv/lib/python3.7/site-packages/nltk/draw/table.py
Normal file
1183
venv/lib/python3.7/site-packages/nltk/draw/table.py
Normal file
File diff suppressed because it is too large
Load Diff
1129
venv/lib/python3.7/site-packages/nltk/draw/tree.py
Normal file
1129
venv/lib/python3.7/site-packages/nltk/draw/tree.py
Normal file
File diff suppressed because it is too large
Load Diff
2574
venv/lib/python3.7/site-packages/nltk/draw/util.py
Normal file
2574
venv/lib/python3.7/site-packages/nltk/draw/util.py
Normal file
File diff suppressed because it is too large
Load Diff
Reference in New Issue
Block a user