cfg.py - Natural Language Toolkit: CFG visualization Copyri…

/nltk/nltk/draw/cfg.py

http://nltk.googlecode.com/ · Python · 776 lines · 509 code · 99 blank · 168 comment · 60 complexity · 9b746820dfdd23d529a37b0c305e6eec MD5 · raw file

# Natural Language Toolkit: CFG visualization
#
# Copyright (C) 2001-2011 NLTK Project
# Author: Edward Loper <edloper@gradient.cis.upenn.edu>
# URL: <http://www.nltk.org/>
# For license information, see LICENSE.TXT
#
# $Id: cfg.py 8730 2011-03-08 04:49:46Z StevenBird1 $

"""
Visualization tools for CFGs.
"""

import re


"""
Idea for a nice demo:
  - 3 panes: grammar, treelet, working area
    - grammar is a list of productions
    - when you select a production, the treelet that it licenses appears
      in the treelet area
    - the working area has the text on the bottom, and S at top.  When
      you select a production, it shows (ghosted) the locations where
      that production's treelet could be attached to either the text
      or the tree rooted at S.
    - the user can drag the treelet onto one of those (or click on them?)
    - the user can delete pieces of the tree from the working area
      (right click?)
    - connecting top to bottom? drag one NP onto another?

+-------------------------------------------------------------+
| S -> NP VP   |                 S                            |
|[NP -> Det N ]|                / \                           |
|     ...      |              NP  VP                          |
| N -> 'dog'   |                                              |
| N -> 'cat'   |                                              |
|     ...      |                                              |
+--------------+                                              |
|      NP      |                      Det     N               |
|     /  \     |                       |      |               |
|   Det   N    |  the    cat    saw   the    dog              |
|              |                                              |
+--------------+----------------------------------------------+

Operations:
  - connect a new treelet -- drag or click shadow
  - delete a treelet -- right click
    - if only connected to top, delete everything below
    - if only connected to bottom, delete everything above
  - connect top & bottom -- drag a leaf to a root or a root to a leaf
  - disconnect top & bottom -- right click
    - if connected to top & bottom, then disconnect
"""

from nltk.grammar import ContextFreeGrammar, Nonterminal, parse_cfg_production
from nltk.tree import Tree

from util import *
from tree import *


######################################################################
# Production List
######################################################################

class ProductionList(ColorizedList):
    ARROW = SymbolWidget.SYMBOLS['rightarrow']
    
    def _init_colortags(self, textwidget, options):
        textwidget.tag_config('terminal', foreground='#006000')
        textwidget.tag_config('arrow', font='symbol', underline='0')
        textwidget.tag_config('nonterminal', foreground='blue',
                              font=('helvetica', -12, 'bold'))

    def _item_repr(self, item):
        contents = []
        contents.append(('%s\t' % item.lhs(), 'nonterminal'))
        contents.append((self.ARROW, 'arrow'))
        for elt in item.rhs():
            if isinstance(elt, Nonterminal):
                contents.append((' %s' % elt.symbol(), 'nonterminal'))
            else:
                contents.append((' %r' % elt, 'terminal'))
        return contents

######################################################################
# CFG Editor
######################################################################

_CFGEditor_HELP = """

The CFG Editor can be used to create or modify context free grammars.
A context free grammar consists of a start symbol and a list of
productions.  The start symbol is specified by the text entry field in
the upper right hand corner of the editor; and the list of productions
are specified in the main text editing box.

Every non-blank line specifies a single production.  Each production
has the form "LHS -> RHS," where LHS is a single nonterminal, and RHS
is a list of nonterminals and terminals.

Nonterminals must be a single word, such as S or NP or NP_subj.
Currently, nonterminals must consists of alphanumeric characters and
underscores (_).  Nonterminals are colored blue.  If you place the
mouse over any nonterminal, then all occurances of that nonterminal
will be highlighted.

Termianals must be surrounded by single quotes (') or double
quotes(\").  For example, "dog" and "New York" are terminals.
Currently, the string within the quotes must consist of alphanumeric
characters, underscores, and spaces.

To enter a new production, go to a blank line, and type a nonterminal,
followed by an arrow (->), followed by a sequence of terminals and
nonterminals.  Note that "->" (dash + greater-than) is automatically
converted to an arrow symbol.  When you move your cursor to a
different line, your production will automatically be colorized.  If
there are any errors, they will be highlighted in red.

Note that the order of the productions is signifigant for some
algorithms.  To re-order the productions, use cut and paste to move
them.

Use the buttons at the bottom of the window when you are done editing
the CFG:
  - Ok: apply the new CFG, and exit the editor.
  - Apply: apply the new CFG, and do not exit the editor.
  - Reset: revert to the original CFG, and do not exit the editor.
  - Cancel: revert to the original CFG, and exit the editor.

"""

class CFGEditor(object):
    """
    A dialog window for creating and editing context free grammars.
    C{CFGEditor} places the following restrictions on what C{CFG}s can
    be edited:
        - All nonterminals must be strings consisting of word
          characters.
        - All terminals must be strings consisting of word characters
          and space characters.
    """
    # Regular expressions used by _analyze_line.  Precompile them, so
    # we can process the text faster.
    ARROW = SymbolWidget.SYMBOLS['rightarrow']
    _LHS_RE = re.compile(r"(^\s*\w+\s*)(->|("+ARROW+"))")
    _ARROW_RE = re.compile("\s*(->|("+ARROW+"))\s*")
    _PRODUCTION_RE = re.compile(r"(^\s*\w+\s*)" +              # LHS
                                "(->|("+ARROW+"))\s*" +        # arrow
                                r"((\w+|'[\w ]*'|\"[\w ]*\"|\|)\s*)*$") # RHS
    _TOKEN_RE = re.compile("\\w+|->|'[\\w ]+'|\"[\\w ]+\"|("+ARROW+")")
    _BOLD = ('helvetica', -12, 'bold')
    
    def __init__(self, parent, cfg=None, set_cfg_callback=None):
        self._parent = parent
        if cfg is not None: self._cfg = cfg
        else: self._cfg = ContextFreeGrammar(Nonterminal('S'), [])
        self._set_cfg_callback = set_cfg_callback

        self._highlight_matching_nonterminals = 1

        # Create the top-level window.
        self._top = Toplevel(parent)
        self._init_bindings()

        self._init_startframe()
        self._startframe.pack(side='top', fill='x', expand=0)
        self._init_prodframe()
        self._prodframe.pack(side='top', fill='both', expand=1)
        self._init_buttons()
        self._buttonframe.pack(side='bottom', fill='x', expand=0)

        self._textwidget.focus()

    def _init_startframe(self):
        frame = self._startframe = Frame(self._top)
        self._start = Entry(frame)
        self._start.pack(side='right')
        Label(frame, text='Start Symbol:').pack(side='right')
        Label(frame, text='Productions:').pack(side='left')
        self._start.insert(0, self._cfg.start().symbol())

    def _init_buttons(self):
        frame = self._buttonframe = Frame(self._top)
        Button(frame, text='Ok', command=self._ok,
               underline=0, takefocus=0).pack(side='left')
        Button(frame, text='Apply', command=self._apply,
               underline=0, takefocus=0).pack(side='left')
        Button(frame, text='Reset', command=self._reset,
               underline=0, takefocus=0,).pack(side='left')
        Button(frame, text='Cancel', command=self._cancel,
               underline=0, takefocus=0).pack(side='left')
        Button(frame, text='Help', command=self._help,
               underline=0, takefocus=0).pack(side='right')

    def _init_bindings(self):
        self._top.title('CFG Editor')
        self._top.bind('<Control-q>', self._cancel)
        self._top.bind('<Alt-q>', self._cancel)
        self._top.bind('<Control-d>', self._cancel)
        #self._top.bind('<Control-x>', self._cancel)
        self._top.bind('<Alt-x>', self._cancel)
        self._top.bind('<Escape>', self._cancel)
        #self._top.bind('<Control-c>', self._cancel)
        self._top.bind('<Alt-c>', self._cancel)
        
        self._top.bind('<Control-o>', self._ok)
        self._top.bind('<Alt-o>', self._ok)
        self._top.bind('<Control-a>', self._apply)
        self._top.bind('<Alt-a>', self._apply)
        self._top.bind('<Control-r>', self._reset)
        self._top.bind('<Alt-r>', self._reset)
        self._top.bind('<Control-h>', self._help)
        self._top.bind('<Alt-h>', self._help)
        self._top.bind('<F1>', self._help)

    def _init_prodframe(self):
        self._prodframe = Frame(self._top)

        # Create the basic Text widget & scrollbar.
        self._textwidget = Text(self._prodframe, background='#e0e0e0',
                                exportselection=1)
        self._textscroll = Scrollbar(self._prodframe, takefocus=0,
                                     orient='vertical')
        self._textwidget.config(yscrollcommand = self._textscroll.set)
        self._textscroll.config(command=self._textwidget.yview)
        self._textscroll.pack(side='right', fill='y')
        self._textwidget.pack(expand=1, fill='both', side='left')
        
        # Initialize the colorization tags.  Each nonterminal gets its
        # own tag, so they aren't listed here.  
        self._textwidget.tag_config('terminal', foreground='#006000')
        self._textwidget.tag_config('arrow', font='symbol')
        self._textwidget.tag_config('error', background='red')

        # Keep track of what line they're on.  We use that to remember
        # to re-analyze a line whenever they leave it.
        self._linenum = 0

        # Expand "->" to an arrow.
        self._top.bind('>', self._replace_arrows)

        # Re-colorize lines when appropriate.
        self._top.bind('<<Paste>>', self._analyze)
        self._top.bind('<KeyPress>', self._check_analyze)
        self._top.bind('<ButtonPress>', self._check_analyze)

        # Tab cycles focus. (why doesn't this work??)
        def cycle(e, textwidget=self._textwidget):
            textwidget.tk_focusNext().focus()
        self._textwidget.bind('<Tab>', cycle)

        prod_tuples = [(p.lhs(),[p.rhs()]) for p in self._cfg.productions()]
        for i in range(len(prod_tuples)-1,0,-1):
            if (prod_tuples[i][0] == prod_tuples[i-1][0]):
                if () in prod_tuples[i][1]: continue
                if () in prod_tuples[i-1][1]: continue
                print prod_tuples[i-1][1]
                print prod_tuples[i][1]
                prod_tuples[i-1][1].extend(prod_tuples[i][1])
                del prod_tuples[i]

        for lhs, rhss in prod_tuples:
            print lhs, rhss
            s = '%s ->' % lhs
            for rhs in rhss:
                for elt in rhs:
                    if isinstance(elt, Nonterminal): s += ' %s' % elt
                    else: s += ' %r' % elt
                s += ' |'
            s = s[:-2] + '\n'
            self._textwidget.insert('end', s)
                       
        self._analyze()
            
#         # Add the producitons to the text widget, and colorize them.
#         prod_by_lhs = {}
#         for prod in self._cfg.productions():
#             if len(prod.rhs()) > 0:
#                 prod_by_lhs.setdefault(prod.lhs(),[]).append(prod)
#         for (lhs, prods) in prod_by_lhs.items():
#             self._textwidget.insert('end', '%s ->' % lhs)
#             self._textwidget.insert('end', self._rhs(prods[0]))
#             for prod in prods[1:]:
#                 print '\t|'+self._rhs(prod),
#                 self._textwidget.insert('end', '\t|'+self._rhs(prod))
#             print
#             self._textwidget.insert('end', '\n')
#         for prod in self._cfg.productions():
#             if len(prod.rhs()) == 0:
#                 self._textwidget.insert('end', '%s' % prod)
#         self._analyze()

#     def _rhs(self, prod):
#         s = ''
#         for elt in prod.rhs():
#             if isinstance(elt, Nonterminal): s += ' %s' % elt.symbol()
#             else: s += ' %r' % elt
#         return s

    def _clear_tags(self, linenum):
        """
        Remove all tags (except C{arrow} and C{sel}) from the given
        line of the text widget used for editing the productions.
        """
        start = '%d.0'%linenum
        end = '%d.end'%linenum
        for tag in self._textwidget.tag_names():
            if tag not in ('arrow', 'sel'):
                self._textwidget.tag_remove(tag, start, end)

    def _check_analyze(self, *e):
        """
        Check if we've moved to a new line.  If we have, then remove
        all colorization from the line we moved to, and re-colorize
        the line that we moved from.
        """
        linenum = int(self._textwidget.index('insert').split('.')[0])
        if linenum != self._linenum:
            self._clear_tags(linenum)
            self._analyze_line(self._linenum)
            self._linenum = linenum

    def _replace_arrows(self, *e):
        """
        Replace any C{'->'} text strings with arrows (char \\256, in
        symbol font).  This searches the whole buffer, but is fast
        enough to be done anytime they press '>'.
        """
        arrow = '1.0'
        while 1:
            arrow = self._textwidget.search('->', arrow, 'end+1char')
            if arrow == '': break
            self._textwidget.delete(arrow, arrow+'+2char')
            self._textwidget.insert(arrow, self.ARROW, 'arrow')
            self._textwidget.insert(arrow, '\t')

        arrow = '1.0'
        while 1:
            arrow = self._textwidget.search(self.ARROW, arrow+'+1char',
                                            'end+1char')
            if arrow == '': break
            self._textwidget.tag_add('arrow', arrow, arrow+'+1char')

    def _analyze_token(self, match, linenum):
        """
        Given a line number and a regexp match for a token on that
        line, colorize the token.  Note that the regexp match gives us
        the token's text, start index (on the line), and end index (on
        the line).
        """
        # What type of token is it?
        if match.group()[0] in "'\"": tag = 'terminal'
        elif match.group() in ('->', self.ARROW): tag = 'arrow'
        else:
            # If it's a nonterminal, then set up new bindings, so we
            # can highlight all instances of that nonterminal when we
            # put the mouse over it.
            tag = 'nonterminal_'+match.group()
            if tag not in self._textwidget.tag_names():
                self._init_nonterminal_tag(tag)

        start = '%d.%d' % (linenum, match.start())
        end = '%d.%d' % (linenum, match.end())
        self._textwidget.tag_add(tag, start, end)

    def _init_nonterminal_tag(self, tag, foreground='blue'):
        self._textwidget.tag_config(tag, foreground=foreground,
                                    font=CFGEditor._BOLD)
        if not self._highlight_matching_nonterminals:
            return
        def enter(e, textwidget=self._textwidget, tag=tag):
            textwidget.tag_config(tag, background='#80ff80')
        def leave(e, textwidget=self._textwidget, tag=tag):
            textwidget.tag_config(tag, background='')
        self._textwidget.tag_bind(tag, '<Enter>', enter)
        self._textwidget.tag_bind(tag, '<Leave>', leave)

    def _analyze_line(self, linenum):
        """
        Colorize a given line.
        """
        # Get rid of any tags that were previously on the line.
        self._clear_tags(linenum)

        # Get the line line's text string.
        line = self._textwidget.get(`linenum`+'.0', `linenum`+'.end')

        # If it's a valid production, then colorize each token.
        if CFGEditor._PRODUCTION_RE.match(line):
            # It's valid; Use _TOKEN_RE to tokenize the production,
            # and call analyze_token on each token.
            def analyze_token(match, self=self, linenum=linenum):
                self._analyze_token(match, linenum)
                return ''
            CFGEditor._TOKEN_RE.sub(analyze_token, line)
        elif line.strip() != '':
            # It's invalid; show the user where the error is.
            self._mark_error(linenum, line)

    def _mark_error(self, linenum, line):
        """
        Mark the location of an error in a line.
        """
        arrowmatch = CFGEditor._ARROW_RE.search(line)
        if not arrowmatch:
            # If there's no arrow at all, highlight the whole line.
            start = '%d.0' % linenum
            end = '%d.end' % linenum
        elif not CFGEditor._LHS_RE.match(line):
            # Otherwise, if the LHS is bad, highlight it.
            start = '%d.0' % linenum
            end = '%d.%d' % (linenum, arrowmatch.start())
        else:
            # Otherwise, highlight the RHS.
            start = '%d.%d' % (linenum, arrowmatch.end())
            end = '%d.end' % linenum
            
        # If we're highlighting 0 chars, highlight the whole line.
        if self._textwidget.compare(start, '==', end):
            start = '%d.0' % linenum
            end = '%d.end' % linenum
        self._textwidget.tag_add('error', start, end)

    def _analyze(self, *e):
        """
        Replace C{->} with arrows, and colorize the entire buffer.
        """
        self._replace_arrows()
        numlines = int(self._textwidget.index('end').split('.')[0])
        for linenum in range(1, numlines+1):  # line numbers start at 1.
            self._analyze_line(linenum)

    def _parse_productions(self):
        """
        Parse the current contents of the textwidget buffer, to create
        a list of productions.
        """
        productions = []

        # Get the text, normalize it, and split it into lines.
        text = self._textwidget.get('1.0', 'end')
        text = re.sub(self.ARROW, '->', text)
        text = re.sub('\t', ' ', text)
        lines = text.split('\n')

        # Convert each line to a CFG production
        for line in lines:
            line = line.strip()
            if line=='': continue
            productions += parse_cfg_production(line)
            #if line.strip() == '': continue
            #if not CFGEditor._PRODUCTION_RE.match(line):
            #    raise ValueError('Bad production string %r' % line)
            #
            #(lhs_str, rhs_str) = line.split('->')
            #lhs = Nonterminal(lhs_str.strip())
            #rhs = []
            #def parse_token(match, rhs=rhs):
            #    token = match.group()
            #    if token[0] in "'\"": rhs.append(token[1:-1])
            #    else: rhs.append(Nonterminal(token))
            #    return ''
            #CFGEditor._TOKEN_RE.sub(parse_token, rhs_str)
            #
            #productions.append(Production(lhs, *rhs))

        return productions

    def _destroy(self, *e):
        if self._top is None: return
        self._top.destroy()
        self._top = None

    def _ok(self, *e):
        self._apply()
        self._destroy()
        
    def _apply(self, *e):
        productions = self._parse_productions()
        start = Nonterminal(self._start.get())
        cfg = ContextFreeGrammar(start, productions)
        if self._set_cfg_callback is not None:
            self._set_cfg_callback(cfg)
        
    def _reset(self, *e):
        self._textwidget.delete('1.0', 'end')
        for production in self._cfg.productions():
            self._textwidget.insert('end', '%s\n' % production)
        self._analyze()
        if self._set_cfg_callback is not None:
            self._set_cfg_callback(self._cfg)
    
    def _cancel(self, *e):
        try: self._reset()
        except: pass
        self._destroy()

    def _help(self, *e):
        # The default font's not very legible; try using 'fixed' instead. 
        try:
            ShowText(self._parent, 'Help: Chart Parser Demo',
                     (_CFGEditor_HELP).strip(), width=75, font='fixed')
        except:
            ShowText(self._parent, 'Help: Chart Parser Demo',
                     (_CFGEditor_HELP).strip(), width=75)

######################################################################
# New Demo (built tree based on cfg)
######################################################################

class CFGDemo(object):
    def __init__(self, grammar, text):
        self._grammar = grammar
        self._text = text

        # Set up the main window.
        self._top = Tk()
        self._top.title('Context Free Grammar Demo')

        # Base font size
        self._size = IntVar(self._top)
        self._size.set(12) # = medium

        # Set up the key bindings
        self._init_bindings(self._top)

        # Create the basic frames
        frame1 = Frame(self._top)
        frame1.pack(side='left', fill='y', expand=0)
        self._init_menubar(self._top)
        self._init_buttons(self._top)
        self._init_grammar(frame1)
        self._init_treelet(frame1)
        self._init_workspace(self._top)

    #//////////////////////////////////////////////////
    # Initialization
    #//////////////////////////////////////////////////

    def _init_bindings(self, top):
        top.bind('<Control-q>', self.destroy)
    
    def _init_menubar(self, parent): pass
    
    def _init_buttons(self, parent): pass
    
    def _init_grammar(self, parent):
        self._prodlist = ProductionList(parent, self._grammar, width=20)
        self._prodlist.pack(side='top', fill='both', expand=1)
        self._prodlist.focus()
        self._prodlist.add_callback('select', self._selectprod_cb)
        self._prodlist.add_callback('move', self._selectprod_cb)

    def _init_treelet(self, parent):
        self._treelet_canvas = Canvas(parent, background='white')
        self._treelet_canvas.pack(side='bottom', fill='x')
        self._treelet = None

    def _init_workspace(self, parent):
        self._workspace = CanvasFrame(parent, background='white')
        self._workspace.pack(side='right', fill='both', expand=1)
        self._tree = None
        self.reset_workspace()

    #//////////////////////////////////////////////////
    # Workspace
    #//////////////////////////////////////////////////

    def reset_workspace(self):
        c = self._workspace.canvas()
        fontsize = int(self._size.get())
        node_font = ('helvetica', -(fontsize+4), 'bold')
        leaf_font = ('helvetica', -(fontsize+2))
        
        # Remove the old tree
        if self._tree is not None:
            self._workspace.remove_widget(self._tree)

        # The root of the tree.
        start = self._grammar.start().symbol()
        rootnode = TextWidget(c, start, font=node_font, draggable=1)

        # The leaves of the tree.
        leaves = []
        for word in self._text:
            if isinstance(word, Token): word = word.type()
            leaves.append(TextWidget(c, word, font=leaf_font, draggable=1))

        # Put it all together into one tree
        self._tree = TreeSegmentWidget(c, rootnode, leaves,
                                       color='white')

        # Add it to the workspace.
        self._workspace.add_widget(self._tree)

        # Move the leaves to the bottom of the workspace.
        for leaf in leaves: leaf.move(0,100)
        
        #self._nodes = {start:1}
        #self._leaves = dict([(l,1) for l in leaves])

    def workspace_markprod(self, production):
        pass

    def _markproduction(self, prod, tree=None):
        if tree is None: tree = self._tree
        for i in range(len(tree.subtrees())-len(prod.rhs())):
            if tree['color', i] == 'white':
                self._markproduction
            
            for j, node in enumerate(prod.rhs()):
                widget = tree.subtrees()[i+j]
                if (isinstance(node, Nonterminal) and
                    isinstance(widget, TreeSegmentWidget) and
                    node.symbol == widget.node().text()):
                    pass # matching nonterminal
                elif (isinstance(node, (str, unicode)) and
                      isinstance(widget, TextWidget) and
                      node == widget.text()):
                    pass # matching nonterminal
                else: break
            else:
                # Everything matched!
                print 'MATCH AT', i

    #//////////////////////////////////////////////////
    # Grammar 
    #//////////////////////////////////////////////////

    def _selectprod_cb(self, production):
        canvas = self._treelet_canvas
        
        self._prodlist.highlight(production)
        if self._treelet is not None: self._treelet.destroy()

        # Convert the production to a tree.
        rhs = production.rhs()
        for (i, elt) in enumerate(rhs):
            if isinstance(elt, Nonterminal): elt = Tree(elt)
        tree = Tree(production.lhs().symbol(), *rhs)

        # Draw the tree in the treelet area.
        fontsize = int(self._size.get())
        node_font = ('helvetica', -(fontsize+4), 'bold')
        leaf_font = ('helvetica', -(fontsize+2))
        self._treelet = tree_to_treesegment(canvas, tree,
                                            node_font=node_font,
                                            leaf_font=leaf_font)
        self._treelet['draggable'] = 1

        # Center the treelet.
        (x1, y1, x2, y2) = self._treelet.bbox()
        w, h = int(canvas['width']), int(canvas['height'])
        self._treelet.move((w-x1-x2)/2, (h-y1-y2)/2)

        # Mark the places where we can add it to the workspace.
        self._markproduction(production)
    
    def destroy(self, *args):
        self._top.destroy()

    def mainloop(self, *args, **kwargs):
        self._top.mainloop(*args, **kwargs)

def demo2():
    from nltk import Nonterminal, Production, ContextFreeGrammar
    nonterminals = 'S VP NP PP P N Name V Det'
    (S, VP, NP, PP, P, N, Name, V, Det) = [Nonterminal(s)
                                           for s in nonterminals.split()]
    productions = (
        # Syntactic Productions
        Production(S, [NP, VP]),
        Production(NP, [Det, N]),
        Production(NP, [NP, PP]),
        Production(VP, [VP, PP]),
        Production(VP, [V, NP, PP]),
        Production(VP, [V, NP]),
        Production(PP, [P, NP]),
        Production(PP, []),

        Production(PP, ['up', 'over', NP]),
        
        # Lexical Productions
        Production(NP, ['I']),   Production(Det, ['the']),
        Production(Det, ['a']),  Production(N, ['man']),
        Production(V, ['saw']),  Production(P, ['in']),
        Production(P, ['with']), Production(N, ['park']),
        Production(N, ['dog']),  Production(N, ['statue']),
        Production(Det, ['my']),
        )
    grammar = ContextFreeGrammar(S, productions)

    text = 'I saw a man in the park'.split()
    d=CFGDemo(grammar, text)
    d.mainloop()

######################################################################
# Old Demo
######################################################################

def demo():
    from nltk import Nonterminal, parse_cfg
    nonterminals = 'S VP NP PP P N Name V Det'
    (S, VP, NP, PP, P, N, Name, V, Det) = [Nonterminal(s)
                                           for s in nonterminals.split()]
    
    grammar = parse_cfg("""
    S -> NP VP
    PP -> P NP
    NP -> Det N
    NP -> NP PP
    VP -> V NP
    VP -> VP PP
    Det -> 'a'
    Det -> 'the'
    Det -> 'my'
    NP -> 'I'
    N -> 'dog'
    N -> 'man'
    N -> 'park'
    N -> 'statue'
    V -> 'saw'
    P -> 'in'
    P -> 'up'
    P -> 'over'
    P -> 'with'
    """)

    def cb(grammar): print grammar
    top = Tk()
    editor = CFGEditor(top, grammar, cb)
    Label(top, text='\nTesting CFG Editor\n').pack()
    Button(top, text='Quit', command=top.destroy).pack()
    top.mainloop()

def demo3():
    from nltk import Production
    (S, VP, NP, PP, P, N, Name, V, Det) = \
        nonterminals('S, VP, NP, PP, P, N, Name, V, Det')
    
    productions = (
        # Syntactic Productions
        Production(S, [NP, VP]),
        Production(NP, [Det, N]),
        Production(NP, [NP, PP]),
        Production(VP, [VP, PP]),
        Production(VP, [V, NP, PP]),
        Production(VP, [V, NP]),
        Production(PP, [P, NP]),
        Production(PP, []),

        Production(PP, ['up', 'over', NP]),
        
        # Lexical Productions
        Production(NP, ['I']),   Production(Det, ['the']),
        Production(Det, ['a']),  Production(N, ['man']),
        Production(V, ['saw']),  Production(P, ['in']),
        Production(P, ['with']), Production(N, ['park']),
        Production(N, ['dog']),  Production(N, ['statue']),
        Production(Det, ['my']),
        )
    
    t = Tk()
    def destroy(e, t=t): t.destroy()
    t.bind('q', destroy)
    p = ProductionList(t, productions)
    p.pack(expand=1, fill='both')
    p.add_callback('select', p.markonly)
    p.add_callback('move', p.markonly)
    p.focus()
    p.mark(productions[2])
    p.mark(productions[8])

if __name__ == '__main__': demo()
Tech Fingerprint

Alerts (31)

'import *' Avoid to prevent namespace pollution; import specific names or use aliases
59 60
'isinstance(' Overuse may indicate design issues; consider polymorphism
81 269 298 588 616 620 641
'def' Ensure functions have docstrings for documentation
250 373 375 394 460 571 604 661 664 667 703 731 738 766
Complexity hotspot; lines 254 to 258 (total complexity: 5)
254 255 256 257 258
'del' Avoid unless necessary; Python's garbage collector typically handles object deletion
262
'except:' Avoid catching all exceptions; specify exception types to catch only expected errors
497 505