/nltk/draw/cfg.py
Python | 777 lines | 683 code | 22 blank | 72 comment | 13 complexity | d03358941f6a2398d4b99e8b4dd540d5 MD5 | raw file
Possible License(s): Apache-2.0
- # Natural Language Toolkit: CFG visualization
- #
- # Copyright (C) 2001-2013 NLTK Project
- # Author: Edward Loper <edloper@gradient.cis.upenn.edu>
- # URL: <http://www.nltk.org/>
- # For license information, see LICENSE.TXT
- """
- Visualization tools for CFGs.
- """
- # Idea for a nice demo:
- # - 3 panes: grammar, treelet, working area
- # - grammar is a list of productions
- # - when you select a production, the treelet that it licenses appears
- # in the treelet area
- # - the working area has the text on the bottom, and S at top. When
- # you select a production, it shows (ghosted) the locations where
- # that production's treelet could be attached to either the text
- # or the tree rooted at S.
- # - the user can drag the treelet onto one of those (or click on them?)
- # - the user can delete pieces of the tree from the working area
- # (right click?)
- # - connecting top to bottom? drag one NP onto another?
- #
- # +-------------------------------------------------------------+
- # | S -> NP VP | S |
- # |[NP -> Det N ]| / \ |
- # | ... | NP VP |
- # | N -> 'dog' | |
- # | N -> 'cat' | |
- # | ... | |
- # +--------------+ |
- # | NP | Det N |
- # | / \ | | | |
- # | Det N | the cat saw the dog |
- # | | |
- # +--------------+----------------------------------------------+
- #
- # Operations:
- # - connect a new treelet -- drag or click shadow
- # - delete a treelet -- right click
- # - if only connected to top, delete everything below
- # - if only connected to bottom, delete everything above
- # - connect top & bottom -- drag a leaf to a root or a root to a leaf
- # - disconnect top & bottom -- right click
- # - if connected to top & bottom, then disconnect
- import nltk.compat
- import re
- from tkinter import (Button, Canvas, Entry, Frame, IntVar, Label,
- Scrollbar, Text, Tk, Toplevel)
- from nltk.grammar import (ContextFreeGrammar, parse_cfg_production,
- Nonterminal, nonterminals)
- from nltk.tree import Tree
- from nltk.draw.tree import TreeSegmentWidget, tree_to_treesegment
- from nltk.draw.util import (CanvasFrame, ColorizedList, ShowText,
- SymbolWidget, TextWidget)
- from nltk import compat
- ######################################################################
- # Production List
- ######################################################################
- class ProductionList(ColorizedList):
- ARROW = SymbolWidget.SYMBOLS['rightarrow']
- def _init_colortags(self, textwidget, options):
- textwidget.tag_config('terminal', foreground='#006000')
- textwidget.tag_config('arrow', font='symbol', underline='0')
- textwidget.tag_config('nonterminal', foreground='blue',
- font=('helvetica', -12, 'bold'))
- def _item_repr(self, item):
- contents = []
- contents.append(('%s\t' % item.lhs(), 'nonterminal'))
- contents.append((self.ARROW, 'arrow'))
- for elt in item.rhs():
- if isinstance(elt, Nonterminal):
- contents.append((' %s' % elt.symbol(), 'nonterminal'))
- else:
- contents.append((' %r' % elt, 'terminal'))
- return contents
- ######################################################################
- # CFG Editor
- ######################################################################
- _CFGEditor_HELP = """
- The CFG Editor can be used to create or modify context free grammars.
- A context free grammar consists of a start symbol and a list of
- productions. The start symbol is specified by the text entry field in
- the upper right hand corner of the editor; and the list of productions
- are specified in the main text editing box.
- Every non-blank line specifies a single production. Each production
- has the form "LHS -> RHS," where LHS is a single nonterminal, and RHS
- is a list of nonterminals and terminals.
- Nonterminals must be a single word, such as S or NP or NP_subj.
- Currently, nonterminals must consists of alphanumeric characters and
- underscores (_). Nonterminals are colored blue. If you place the
- mouse over any nonterminal, then all occurrences of that nonterminal
- will be highlighted.
- Termianals must be surrounded by single quotes (') or double
- quotes(\"). For example, "dog" and "New York" are terminals.
- Currently, the string within the quotes must consist of alphanumeric
- characters, underscores, and spaces.
- To enter a new production, go to a blank line, and type a nonterminal,
- followed by an arrow (->), followed by a sequence of terminals and
- nonterminals. Note that "->" (dash + greater-than) is automatically
- converted to an arrow symbol. When you move your cursor to a
- different line, your production will automatically be colorized. If
- there are any errors, they will be highlighted in red.
- Note that the order of the productions is significant for some
- algorithms. To re-order the productions, use cut and paste to move
- them.
- Use the buttons at the bottom of the window when you are done editing
- the CFG:
- - Ok: apply the new CFG, and exit the editor.
- - Apply: apply the new CFG, and do not exit the editor.
- - Reset: revert to the original CFG, and do not exit the editor.
- - Cancel: revert to the original CFG, and exit the editor.
- """
- class CFGEditor(object):
- """
- A dialog window for creating and editing context free grammars.
- ``CFGEditor`` imposes the following restrictions:
- - All nonterminals must be strings consisting of word
- characters.
- - All terminals must be strings consisting of word characters
- and space characters.
- """
- # Regular expressions used by _analyze_line. Precompile them, so
- # we can process the text faster.
- ARROW = SymbolWidget.SYMBOLS['rightarrow']
- _LHS_RE = re.compile(r"(^\s*\w+\s*)(->|("+ARROW+"))")
- _ARROW_RE = re.compile("\s*(->|("+ARROW+"))\s*")
- _PRODUCTION_RE = re.compile(r"(^\s*\w+\s*)" + # LHS
- "(->|("+ARROW+"))\s*" + # arrow
- r"((\w+|'[\w ]*'|\"[\w ]*\"|\|)\s*)*$") # RHS
- _TOKEN_RE = re.compile("\\w+|->|'[\\w ]+'|\"[\\w ]+\"|("+ARROW+")")
- _BOLD = ('helvetica', -12, 'bold')
- def __init__(self, parent, cfg=None, set_cfg_callback=None):
- self._parent = parent
- if cfg is not None: self._cfg = cfg
- else: self._cfg = ContextFreeGrammar(Nonterminal('S'), [])
- self._set_cfg_callback = set_cfg_callback
- self._highlight_matching_nonterminals = 1
- # Create the top-level window.
- self._top = Toplevel(parent)
- self._init_bindings()
- self._init_startframe()
- self._startframe.pack(side='top', fill='x', expand=0)
- self._init_prodframe()
- self._prodframe.pack(side='top', fill='both', expand=1)
- self._init_buttons()
- self._buttonframe.pack(side='bottom', fill='x', expand=0)
- self._textwidget.focus()
- def _init_startframe(self):
- frame = self._startframe = Frame(self._top)
- self._start = Entry(frame)
- self._start.pack(side='right')
- Label(frame, text='Start Symbol:').pack(side='right')
- Label(frame, text='Productions:').pack(side='left')
- self._start.insert(0, self._cfg.start().symbol())
- def _init_buttons(self):
- frame = self._buttonframe = Frame(self._top)
- Button(frame, text='Ok', command=self._ok,
- underline=0, takefocus=0).pack(side='left')
- Button(frame, text='Apply', command=self._apply,
- underline=0, takefocus=0).pack(side='left')
- Button(frame, text='Reset', command=self._reset,
- underline=0, takefocus=0,).pack(side='left')
- Button(frame, text='Cancel', command=self._cancel,
- underline=0, takefocus=0).pack(side='left')
- Button(frame, text='Help', command=self._help,
- underline=0, takefocus=0).pack(side='right')
- def _init_bindings(self):
- self._top.title('CFG Editor')
- self._top.bind('<Control-q>', self._cancel)
- self._top.bind('<Alt-q>', self._cancel)
- self._top.bind('<Control-d>', self._cancel)
- #self._top.bind('<Control-x>', self._cancel)
- self._top.bind('<Alt-x>', self._cancel)
- self._top.bind('<Escape>', self._cancel)
- #self._top.bind('<Control-c>', self._cancel)
- self._top.bind('<Alt-c>', self._cancel)
- self._top.bind('<Control-o>', self._ok)
- self._top.bind('<Alt-o>', self._ok)
- self._top.bind('<Control-a>', self._apply)
- self._top.bind('<Alt-a>', self._apply)
- self._top.bind('<Control-r>', self._reset)
- self._top.bind('<Alt-r>', self._reset)
- self._top.bind('<Control-h>', self._help)
- self._top.bind('<Alt-h>', self._help)
- self._top.bind('<F1>', self._help)
- def _init_prodframe(self):
- self._prodframe = Frame(self._top)
- # Create the basic Text widget & scrollbar.
- self._textwidget = Text(self._prodframe, background='#e0e0e0',
- exportselection=1)
- self._textscroll = Scrollbar(self._prodframe, takefocus=0,
- orient='vertical')
- self._textwidget.config(yscrollcommand = self._textscroll.set)
- self._textscroll.config(command=self._textwidget.yview)
- self._textscroll.pack(side='right', fill='y')
- self._textwidget.pack(expand=1, fill='both', side='left')
- # Initialize the colorization tags. Each nonterminal gets its
- # own tag, so they aren't listed here.
- self._textwidget.tag_config('terminal', foreground='#006000')
- self._textwidget.tag_config('arrow', font='symbol')
- self._textwidget.tag_config('error', background='red')
- # Keep track of what line they're on. We use that to remember
- # to re-analyze a line whenever they leave it.
- self._linenum = 0
- # Expand "->" to an arrow.
- self._top.bind('>', self._replace_arrows)
- # Re-colorize lines when appropriate.
- self._top.bind('<<Paste>>', self._analyze)
- self._top.bind('<KeyPress>', self._check_analyze)
- self._top.bind('<ButtonPress>', self._check_analyze)
- # Tab cycles focus. (why doesn't this work??)
- def cycle(e, textwidget=self._textwidget):
- textwidget.tk_focusNext().focus()
- self._textwidget.bind('<Tab>', cycle)
- prod_tuples = [(p.lhs(),[p.rhs()]) for p in self._cfg.productions()]
- for i in range(len(prod_tuples)-1,0,-1):
- if (prod_tuples[i][0] == prod_tuples[i-1][0]):
- if () in prod_tuples[i][1]: continue
- if () in prod_tuples[i-1][1]: continue
- print(prod_tuples[i-1][1])
- print(prod_tuples[i][1])
- prod_tuples[i-1][1].extend(prod_tuples[i][1])
- del prod_tuples[i]
- for lhs, rhss in prod_tuples:
- print(lhs, rhss)
- s = '%s ->' % lhs
- for rhs in rhss:
- for elt in rhs:
- if isinstance(elt, Nonterminal): s += ' %s' % elt
- else: s += ' %r' % elt
- s += ' |'
- s = s[:-2] + '\n'
- self._textwidget.insert('end', s)
- self._analyze()
- # # Add the producitons to the text widget, and colorize them.
- # prod_by_lhs = {}
- # for prod in self._cfg.productions():
- # if len(prod.rhs()) > 0:
- # prod_by_lhs.setdefault(prod.lhs(),[]).append(prod)
- # for (lhs, prods) in prod_by_lhs.items():
- # self._textwidget.insert('end', '%s ->' % lhs)
- # self._textwidget.insert('end', self._rhs(prods[0]))
- # for prod in prods[1:]:
- # print '\t|'+self._rhs(prod),
- # self._textwidget.insert('end', '\t|'+self._rhs(prod))
- # print
- # self._textwidget.insert('end', '\n')
- # for prod in self._cfg.productions():
- # if len(prod.rhs()) == 0:
- # self._textwidget.insert('end', '%s' % prod)
- # self._analyze()
- # def _rhs(self, prod):
- # s = ''
- # for elt in prod.rhs():
- # if isinstance(elt, Nonterminal): s += ' %s' % elt.symbol()
- # else: s += ' %r' % elt
- # return s
- def _clear_tags(self, linenum):
- """
- Remove all tags (except ``arrow`` and ``sel``) from the given
- line of the text widget used for editing the productions.
- """
- start = '%d.0'%linenum
- end = '%d.end'%linenum
- for tag in self._textwidget.tag_names():
- if tag not in ('arrow', 'sel'):
- self._textwidget.tag_remove(tag, start, end)
- def _check_analyze(self, *e):
- """
- Check if we've moved to a new line. If we have, then remove
- all colorization from the line we moved to, and re-colorize
- the line that we moved from.
- """
- linenum = int(self._textwidget.index('insert').split('.')[0])
- if linenum != self._linenum:
- self._clear_tags(linenum)
- self._analyze_line(self._linenum)
- self._linenum = linenum
- def _replace_arrows(self, *e):
- """
- Replace any ``'->'`` text strings with arrows (char \\256, in
- symbol font). This searches the whole buffer, but is fast
- enough to be done anytime they press '>'.
- """
- arrow = '1.0'
- while True:
- arrow = self._textwidget.search('->', arrow, 'end+1char')
- if arrow == '': break
- self._textwidget.delete(arrow, arrow+'+2char')
- self._textwidget.insert(arrow, self.ARROW, 'arrow')
- self._textwidget.insert(arrow, '\t')
- arrow = '1.0'
- while True:
- arrow = self._textwidget.search(self.ARROW, arrow+'+1char',
- 'end+1char')
- if arrow == '': break
- self._textwidget.tag_add('arrow', arrow, arrow+'+1char')
- def _analyze_token(self, match, linenum):
- """
- Given a line number and a regexp match for a token on that
- line, colorize the token. Note that the regexp match gives us
- the token's text, start index (on the line), and end index (on
- the line).
- """
- # What type of token is it?
- if match.group()[0] in "'\"": tag = 'terminal'
- elif match.group() in ('->', self.ARROW): tag = 'arrow'
- else:
- # If it's a nonterminal, then set up new bindings, so we
- # can highlight all instances of that nonterminal when we
- # put the mouse over it.
- tag = 'nonterminal_'+match.group()
- if tag not in self._textwidget.tag_names():
- self._init_nonterminal_tag(tag)
- start = '%d.%d' % (linenum, match.start())
- end = '%d.%d' % (linenum, match.end())
- self._textwidget.tag_add(tag, start, end)
- def _init_nonterminal_tag(self, tag, foreground='blue'):
- self._textwidget.tag_config(tag, foreground=foreground,
- font=CFGEditor._BOLD)
- if not self._highlight_matching_nonterminals:
- return
- def enter(e, textwidget=self._textwidget, tag=tag):
- textwidget.tag_config(tag, background='#80ff80')
- def leave(e, textwidget=self._textwidget, tag=tag):
- textwidget.tag_config(tag, background='')
- self._textwidget.tag_bind(tag, '<Enter>', enter)
- self._textwidget.tag_bind(tag, '<Leave>', leave)
- def _analyze_line(self, linenum):
- """
- Colorize a given line.
- """
- # Get rid of any tags that were previously on the line.
- self._clear_tags(linenum)
- # Get the line line's text string.
- line = self._textwidget.get(repr(linenum)+'.0', repr(linenum)+'.end')
- # If it's a valid production, then colorize each token.
- if CFGEditor._PRODUCTION_RE.match(line):
- # It's valid; Use _TOKEN_RE to tokenize the production,
- # and call analyze_token on each token.
- def analyze_token(match, self=self, linenum=linenum):
- self._analyze_token(match, linenum)
- return ''
- CFGEditor._TOKEN_RE.sub(analyze_token, line)
- elif line.strip() != '':
- # It's invalid; show the user where the error is.
- self._mark_error(linenum, line)
- def _mark_error(self, linenum, line):
- """
- Mark the location of an error in a line.
- """
- arrowmatch = CFGEditor._ARROW_RE.search(line)
- if not arrowmatch:
- # If there's no arrow at all, highlight the whole line.
- start = '%d.0' % linenum
- end = '%d.end' % linenum
- elif not CFGEditor._LHS_RE.match(line):
- # Otherwise, if the LHS is bad, highlight it.
- start = '%d.0' % linenum
- end = '%d.%d' % (linenum, arrowmatch.start())
- else:
- # Otherwise, highlight the RHS.
- start = '%d.%d' % (linenum, arrowmatch.end())
- end = '%d.end' % linenum
- # If we're highlighting 0 chars, highlight the whole line.
- if self._textwidget.compare(start, '==', end):
- start = '%d.0' % linenum
- end = '%d.end' % linenum
- self._textwidget.tag_add('error', start, end)
- def _analyze(self, *e):
- """
- Replace ``->`` with arrows, and colorize the entire buffer.
- """
- self._replace_arrows()
- numlines = int(self._textwidget.index('end').split('.')[0])
- for linenum in range(1, numlines+1): # line numbers start at 1.
- self._analyze_line(linenum)
- def _parse_productions(self):
- """
- Parse the current contents of the textwidget buffer, to create
- a list of productions.
- """
- productions = []
- # Get the text, normalize it, and split it into lines.
- text = self._textwidget.get('1.0', 'end')
- text = re.sub(self.ARROW, '->', text)
- text = re.sub('\t', ' ', text)
- lines = text.split('\n')
- # Convert each line to a CFG production
- for line in lines:
- line = line.strip()
- if line=='': continue
- productions += parse_cfg_production(line)
- #if line.strip() == '': continue
- #if not CFGEditor._PRODUCTION_RE.match(line):
- # raise ValueError('Bad production string %r' % line)
- #
- #(lhs_str, rhs_str) = line.split('->')
- #lhs = Nonterminal(lhs_str.strip())
- #rhs = []
- #def parse_token(match, rhs=rhs):
- # token = match.group()
- # if token[0] in "'\"": rhs.append(token[1:-1])
- # else: rhs.append(Nonterminal(token))
- # return ''
- #CFGEditor._TOKEN_RE.sub(parse_token, rhs_str)
- #
- #productions.append(Production(lhs, *rhs))
- return productions
- def _destroy(self, *e):
- if self._top is None: return
- self._top.destroy()
- self._top = None
- def _ok(self, *e):
- self._apply()
- self._destroy()
- def _apply(self, *e):
- productions = self._parse_productions()
- start = Nonterminal(self._start.get())
- cfg = ContextFreeGrammar(start, productions)
- if self._set_cfg_callback is not None:
- self._set_cfg_callback(cfg)
- def _reset(self, *e):
- self._textwidget.delete('1.0', 'end')
- for production in self._cfg.productions():
- self._textwidget.insert('end', '%s\n' % production)
- self._analyze()
- if self._set_cfg_callback is not None:
- self._set_cfg_callback(self._cfg)
- def _cancel(self, *e):
- try: self._reset()
- except: pass
- self._destroy()
- def _help(self, *e):
- # The default font's not very legible; try using 'fixed' instead.
- try:
- ShowText(self._parent, 'Help: Chart Parser Demo',
- (_CFGEditor_HELP).strip(), width=75, font='fixed')
- except:
- ShowText(self._parent, 'Help: Chart Parser Demo',
- (_CFGEditor_HELP).strip(), width=75)
- ######################################################################
- # New Demo (built tree based on cfg)
- ######################################################################
- class CFGDemo(object):
- def __init__(self, grammar, text):
- self._grammar = grammar
- self._text = text
- # Set up the main window.
- self._top = Tk()
- self._top.title('Context Free Grammar Demo')
- # Base font size
- self._size = IntVar(self._top)
- self._size.set(12) # = medium
- # Set up the key bindings
- self._init_bindings(self._top)
- # Create the basic frames
- frame1 = Frame(self._top)
- frame1.pack(side='left', fill='y', expand=0)
- self._init_menubar(self._top)
- self._init_buttons(self._top)
- self._init_grammar(frame1)
- self._init_treelet(frame1)
- self._init_workspace(self._top)
- #//////////////////////////////////////////////////
- # Initialization
- #//////////////////////////////////////////////////
- def _init_bindings(self, top):
- top.bind('<Control-q>', self.destroy)
- def _init_menubar(self, parent): pass
- def _init_buttons(self, parent): pass
- def _init_grammar(self, parent):
- self._prodlist = ProductionList(parent, self._grammar, width=20)
- self._prodlist.pack(side='top', fill='both', expand=1)
- self._prodlist.focus()
- self._prodlist.add_callback('select', self._selectprod_cb)
- self._prodlist.add_callback('move', self._selectprod_cb)
- def _init_treelet(self, parent):
- self._treelet_canvas = Canvas(parent, background='white')
- self._treelet_canvas.pack(side='bottom', fill='x')
- self._treelet = None
- def _init_workspace(self, parent):
- self._workspace = CanvasFrame(parent, background='white')
- self._workspace.pack(side='right', fill='both', expand=1)
- self._tree = None
- self.reset_workspace()
- #//////////////////////////////////////////////////
- # Workspace
- #//////////////////////////////////////////////////
- def reset_workspace(self):
- c = self._workspace.canvas()
- fontsize = int(self._size.get())
- node_font = ('helvetica', -(fontsize+4), 'bold')
- leaf_font = ('helvetica', -(fontsize+2))
- # Remove the old tree
- if self._tree is not None:
- self._workspace.remove_widget(self._tree)
- # The root of the tree.
- start = self._grammar.start().symbol()
- rootnode = TextWidget(c, start, font=node_font, draggable=1)
- # The leaves of the tree.
- leaves = []
- for word in self._text:
- leaves.append(TextWidget(c, word, font=leaf_font, draggable=1))
- # Put it all together into one tree
- self._tree = TreeSegmentWidget(c, rootnode, leaves,
- color='white')
- # Add it to the workspace.
- self._workspace.add_widget(self._tree)
- # Move the leaves to the bottom of the workspace.
- for leaf in leaves: leaf.move(0,100)
- #self._nodes = {start:1}
- #self._leaves = dict([(l,1) for l in leaves])
- def workspace_markprod(self, production):
- pass
- def _markproduction(self, prod, tree=None):
- if tree is None: tree = self._tree
- for i in range(len(tree.subtrees())-len(prod.rhs())):
- if tree['color', i] == 'white':
- self._markproduction
- for j, node in enumerate(prod.rhs()):
- widget = tree.subtrees()[i+j]
- if (isinstance(node, Nonterminal) and
- isinstance(widget, TreeSegmentWidget) and
- node.symbol == widget.node().text()):
- pass # matching nonterminal
- elif (isinstance(node, compat.string_types) and
- isinstance(widget, TextWidget) and
- node == widget.text()):
- pass # matching nonterminal
- else: break
- else:
- # Everything matched!
- print('MATCH AT', i)
- #//////////////////////////////////////////////////
- # Grammar
- #//////////////////////////////////////////////////
- def _selectprod_cb(self, production):
- canvas = self._treelet_canvas
- self._prodlist.highlight(production)
- if self._treelet is not None: self._treelet.destroy()
- # Convert the production to a tree.
- rhs = production.rhs()
- for (i, elt) in enumerate(rhs):
- if isinstance(elt, Nonterminal): elt = Tree(elt)
- tree = Tree(production.lhs().symbol(), *rhs)
- # Draw the tree in the treelet area.
- fontsize = int(self._size.get())
- node_font = ('helvetica', -(fontsize+4), 'bold')
- leaf_font = ('helvetica', -(fontsize+2))
- self._treelet = tree_to_treesegment(canvas, tree,
- node_font=node_font,
- leaf_font=leaf_font)
- self._treelet['draggable'] = 1
- # Center the treelet.
- (x1, y1, x2, y2) = self._treelet.bbox()
- w, h = int(canvas['width']), int(canvas['height'])
- self._treelet.move((w-x1-x2)/2, (h-y1-y2)/2)
- # Mark the places where we can add it to the workspace.
- self._markproduction(production)
- def destroy(self, *args):
- self._top.destroy()
- def mainloop(self, *args, **kwargs):
- self._top.mainloop(*args, **kwargs)
- def demo2():
- from nltk import Nonterminal, Production, ContextFreeGrammar
- nonterminals = 'S VP NP PP P N Name V Det'
- (S, VP, NP, PP, P, N, Name, V, Det) = [Nonterminal(s)
- for s in nonterminals.split()]
- productions = (
- # Syntactic Productions
- Production(S, [NP, VP]),
- Production(NP, [Det, N]),
- Production(NP, [NP, PP]),
- Production(VP, [VP, PP]),
- Production(VP, [V, NP, PP]),
- Production(VP, [V, NP]),
- Production(PP, [P, NP]),
- Production(PP, []),
- Production(PP, ['up', 'over', NP]),
- # Lexical Productions
- Production(NP, ['I']), Production(Det, ['the']),
- Production(Det, ['a']), Production(N, ['man']),
- Production(V, ['saw']), Production(P, ['in']),
- Production(P, ['with']), Production(N, ['park']),
- Production(N, ['dog']), Production(N, ['statue']),
- Production(Det, ['my']),
- )
- grammar = ContextFreeGrammar(S, productions)
- text = 'I saw a man in the park'.split()
- d=CFGDemo(grammar, text)
- d.mainloop()
- ######################################################################
- # Old Demo
- ######################################################################
- def demo():
- from nltk import Nonterminal, parse_cfg
- nonterminals = 'S VP NP PP P N Name V Det'
- (S, VP, NP, PP, P, N, Name, V, Det) = [Nonterminal(s)
- for s in nonterminals.split()]
- grammar = parse_cfg("""
- S -> NP VP
- PP -> P NP
- NP -> Det N
- NP -> NP PP
- VP -> V NP
- VP -> VP PP
- Det -> 'a'
- Det -> 'the'
- Det -> 'my'
- NP -> 'I'
- N -> 'dog'
- N -> 'man'
- N -> 'park'
- N -> 'statue'
- V -> 'saw'
- P -> 'in'
- P -> 'up'
- P -> 'over'
- P -> 'with'
- """)
- def cb(grammar): print(grammar)
- top = Tk()
- editor = CFGEditor(top, grammar, cb)
- Label(top, text='\nTesting CFG Editor\n').pack()
- Button(top, text='Quit', command=top.destroy).pack()
- top.mainloop()
- def demo3():
- from nltk import Production
- (S, VP, NP, PP, P, N, Name, V, Det) = \
- nonterminals('S, VP, NP, PP, P, N, Name, V, Det')
- productions = (
- # Syntactic Productions
- Production(S, [NP, VP]),
- Production(NP, [Det, N]),
- Production(NP, [NP, PP]),
- Production(VP, [VP, PP]),
- Production(VP, [V, NP, PP]),
- Production(VP, [V, NP]),
- Production(PP, [P, NP]),
- Production(PP, []),
- Production(PP, ['up', 'over', NP]),
- # Lexical Productions
- Production(NP, ['I']), Production(Det, ['the']),
- Production(Det, ['a']), Production(N, ['man']),
- Production(V, ['saw']), Production(P, ['in']),
- Production(P, ['with']), Production(N, ['park']),
- Production(N, ['dog']), Production(N, ['statue']),
- Production(Det, ['my']),
- )
- t = Tk()
- def destroy(e, t=t): t.destroy()
- t.bind('q', destroy)
- p = ProductionList(t, productions)
- p.pack(expand=1, fill='both')
- p.add_callback('select', p.markonly)
- p.add_callback('move', p.markonly)
- p.focus()
- p.mark(productions[2])
- p.mark(productions[8])
- if __name__ == '__main__': demo()