PageRenderTime 57ms CodeModel.GetById 8ms RepoModel.GetById 1ms app.codeStats 0ms

/nltk/nltk/draw/cfg.py

http://nltk.googlecode.com/
Python | 776 lines | 701 code | 18 blank | 57 comment | 19 complexity | 9b746820dfdd23d529a37b0c305e6eec MD5 | raw file
Possible License(s): Apache-2.0, AGPL-1.0
  1. # Natural Language Toolkit: CFG visualization
  2. #
  3. # Copyright (C) 2001-2011 NLTK Project
  4. # Author: Edward Loper <edloper@gradient.cis.upenn.edu>
  5. # URL: <http://www.nltk.org/>
  6. # For license information, see LICENSE.TXT
  7. #
  8. # $Id: cfg.py 8730 2011-03-08 04:49:46Z StevenBird1 $
  9. """
  10. Visualization tools for CFGs.
  11. """
  12. import re
  13. """
  14. Idea for a nice demo:
  15. - 3 panes: grammar, treelet, working area
  16. - grammar is a list of productions
  17. - when you select a production, the treelet that it licenses appears
  18. in the treelet area
  19. - the working area has the text on the bottom, and S at top. When
  20. you select a production, it shows (ghosted) the locations where
  21. that production's treelet could be attached to either the text
  22. or the tree rooted at S.
  23. - the user can drag the treelet onto one of those (or click on them?)
  24. - the user can delete pieces of the tree from the working area
  25. (right click?)
  26. - connecting top to bottom? drag one NP onto another?
  27. +-------------------------------------------------------------+
  28. | S -> NP VP | S |
  29. |[NP -> Det N ]| / \ |
  30. | ... | NP VP |
  31. | N -> 'dog' | |
  32. | N -> 'cat' | |
  33. | ... | |
  34. +--------------+ |
  35. | NP | Det N |
  36. | / \ | | | |
  37. | Det N | the cat saw the dog |
  38. | | |
  39. +--------------+----------------------------------------------+
  40. Operations:
  41. - connect a new treelet -- drag or click shadow
  42. - delete a treelet -- right click
  43. - if only connected to top, delete everything below
  44. - if only connected to bottom, delete everything above
  45. - connect top & bottom -- drag a leaf to a root or a root to a leaf
  46. - disconnect top & bottom -- right click
  47. - if connected to top & bottom, then disconnect
  48. """
  49. from nltk.grammar import ContextFreeGrammar, Nonterminal, parse_cfg_production
  50. from nltk.tree import Tree
  51. from util import *
  52. from tree import *
  53. ######################################################################
  54. # Production List
  55. ######################################################################
  56. class ProductionList(ColorizedList):
  57. ARROW = SymbolWidget.SYMBOLS['rightarrow']
  58. def _init_colortags(self, textwidget, options):
  59. textwidget.tag_config('terminal', foreground='#006000')
  60. textwidget.tag_config('arrow', font='symbol', underline='0')
  61. textwidget.tag_config('nonterminal', foreground='blue',
  62. font=('helvetica', -12, 'bold'))
  63. def _item_repr(self, item):
  64. contents = []
  65. contents.append(('%s\t' % item.lhs(), 'nonterminal'))
  66. contents.append((self.ARROW, 'arrow'))
  67. for elt in item.rhs():
  68. if isinstance(elt, Nonterminal):
  69. contents.append((' %s' % elt.symbol(), 'nonterminal'))
  70. else:
  71. contents.append((' %r' % elt, 'terminal'))
  72. return contents
  73. ######################################################################
  74. # CFG Editor
  75. ######################################################################
  76. _CFGEditor_HELP = """
  77. The CFG Editor can be used to create or modify context free grammars.
  78. A context free grammar consists of a start symbol and a list of
  79. productions. The start symbol is specified by the text entry field in
  80. the upper right hand corner of the editor; and the list of productions
  81. are specified in the main text editing box.
  82. Every non-blank line specifies a single production. Each production
  83. has the form "LHS -> RHS," where LHS is a single nonterminal, and RHS
  84. is a list of nonterminals and terminals.
  85. Nonterminals must be a single word, such as S or NP or NP_subj.
  86. Currently, nonterminals must consists of alphanumeric characters and
  87. underscores (_). Nonterminals are colored blue. If you place the
  88. mouse over any nonterminal, then all occurances of that nonterminal
  89. will be highlighted.
  90. Termianals must be surrounded by single quotes (') or double
  91. quotes(\"). For example, "dog" and "New York" are terminals.
  92. Currently, the string within the quotes must consist of alphanumeric
  93. characters, underscores, and spaces.
  94. To enter a new production, go to a blank line, and type a nonterminal,
  95. followed by an arrow (->), followed by a sequence of terminals and
  96. nonterminals. Note that "->" (dash + greater-than) is automatically
  97. converted to an arrow symbol. When you move your cursor to a
  98. different line, your production will automatically be colorized. If
  99. there are any errors, they will be highlighted in red.
  100. Note that the order of the productions is signifigant for some
  101. algorithms. To re-order the productions, use cut and paste to move
  102. them.
  103. Use the buttons at the bottom of the window when you are done editing
  104. the CFG:
  105. - Ok: apply the new CFG, and exit the editor.
  106. - Apply: apply the new CFG, and do not exit the editor.
  107. - Reset: revert to the original CFG, and do not exit the editor.
  108. - Cancel: revert to the original CFG, and exit the editor.
  109. """
  110. class CFGEditor(object):
  111. """
  112. A dialog window for creating and editing context free grammars.
  113. C{CFGEditor} places the following restrictions on what C{CFG}s can
  114. be edited:
  115. - All nonterminals must be strings consisting of word
  116. characters.
  117. - All terminals must be strings consisting of word characters
  118. and space characters.
  119. """
  120. # Regular expressions used by _analyze_line. Precompile them, so
  121. # we can process the text faster.
  122. ARROW = SymbolWidget.SYMBOLS['rightarrow']
  123. _LHS_RE = re.compile(r"(^\s*\w+\s*)(->|("+ARROW+"))")
  124. _ARROW_RE = re.compile("\s*(->|("+ARROW+"))\s*")
  125. _PRODUCTION_RE = re.compile(r"(^\s*\w+\s*)" + # LHS
  126. "(->|("+ARROW+"))\s*" + # arrow
  127. r"((\w+|'[\w ]*'|\"[\w ]*\"|\|)\s*)*$") # RHS
  128. _TOKEN_RE = re.compile("\\w+|->|'[\\w ]+'|\"[\\w ]+\"|("+ARROW+")")
  129. _BOLD = ('helvetica', -12, 'bold')
  130. def __init__(self, parent, cfg=None, set_cfg_callback=None):
  131. self._parent = parent
  132. if cfg is not None: self._cfg = cfg
  133. else: self._cfg = ContextFreeGrammar(Nonterminal('S'), [])
  134. self._set_cfg_callback = set_cfg_callback
  135. self._highlight_matching_nonterminals = 1
  136. # Create the top-level window.
  137. self._top = Toplevel(parent)
  138. self._init_bindings()
  139. self._init_startframe()
  140. self._startframe.pack(side='top', fill='x', expand=0)
  141. self._init_prodframe()
  142. self._prodframe.pack(side='top', fill='both', expand=1)
  143. self._init_buttons()
  144. self._buttonframe.pack(side='bottom', fill='x', expand=0)
  145. self._textwidget.focus()
  146. def _init_startframe(self):
  147. frame = self._startframe = Frame(self._top)
  148. self._start = Entry(frame)
  149. self._start.pack(side='right')
  150. Label(frame, text='Start Symbol:').pack(side='right')
  151. Label(frame, text='Productions:').pack(side='left')
  152. self._start.insert(0, self._cfg.start().symbol())
  153. def _init_buttons(self):
  154. frame = self._buttonframe = Frame(self._top)
  155. Button(frame, text='Ok', command=self._ok,
  156. underline=0, takefocus=0).pack(side='left')
  157. Button(frame, text='Apply', command=self._apply,
  158. underline=0, takefocus=0).pack(side='left')
  159. Button(frame, text='Reset', command=self._reset,
  160. underline=0, takefocus=0,).pack(side='left')
  161. Button(frame, text='Cancel', command=self._cancel,
  162. underline=0, takefocus=0).pack(side='left')
  163. Button(frame, text='Help', command=self._help,
  164. underline=0, takefocus=0).pack(side='right')
  165. def _init_bindings(self):
  166. self._top.title('CFG Editor')
  167. self._top.bind('<Control-q>', self._cancel)
  168. self._top.bind('<Alt-q>', self._cancel)
  169. self._top.bind('<Control-d>', self._cancel)
  170. #self._top.bind('<Control-x>', self._cancel)
  171. self._top.bind('<Alt-x>', self._cancel)
  172. self._top.bind('<Escape>', self._cancel)
  173. #self._top.bind('<Control-c>', self._cancel)
  174. self._top.bind('<Alt-c>', self._cancel)
  175. self._top.bind('<Control-o>', self._ok)
  176. self._top.bind('<Alt-o>', self._ok)
  177. self._top.bind('<Control-a>', self._apply)
  178. self._top.bind('<Alt-a>', self._apply)
  179. self._top.bind('<Control-r>', self._reset)
  180. self._top.bind('<Alt-r>', self._reset)
  181. self._top.bind('<Control-h>', self._help)
  182. self._top.bind('<Alt-h>', self._help)
  183. self._top.bind('<F1>', self._help)
  184. def _init_prodframe(self):
  185. self._prodframe = Frame(self._top)
  186. # Create the basic Text widget & scrollbar.
  187. self._textwidget = Text(self._prodframe, background='#e0e0e0',
  188. exportselection=1)
  189. self._textscroll = Scrollbar(self._prodframe, takefocus=0,
  190. orient='vertical')
  191. self._textwidget.config(yscrollcommand = self._textscroll.set)
  192. self._textscroll.config(command=self._textwidget.yview)
  193. self._textscroll.pack(side='right', fill='y')
  194. self._textwidget.pack(expand=1, fill='both', side='left')
  195. # Initialize the colorization tags. Each nonterminal gets its
  196. # own tag, so they aren't listed here.
  197. self._textwidget.tag_config('terminal', foreground='#006000')
  198. self._textwidget.tag_config('arrow', font='symbol')
  199. self._textwidget.tag_config('error', background='red')
  200. # Keep track of what line they're on. We use that to remember
  201. # to re-analyze a line whenever they leave it.
  202. self._linenum = 0
  203. # Expand "->" to an arrow.
  204. self._top.bind('>', self._replace_arrows)
  205. # Re-colorize lines when appropriate.
  206. self._top.bind('<<Paste>>', self._analyze)
  207. self._top.bind('<KeyPress>', self._check_analyze)
  208. self._top.bind('<ButtonPress>', self._check_analyze)
  209. # Tab cycles focus. (why doesn't this work??)
  210. def cycle(e, textwidget=self._textwidget):
  211. textwidget.tk_focusNext().focus()
  212. self._textwidget.bind('<Tab>', cycle)
  213. prod_tuples = [(p.lhs(),[p.rhs()]) for p in self._cfg.productions()]
  214. for i in range(len(prod_tuples)-1,0,-1):
  215. if (prod_tuples[i][0] == prod_tuples[i-1][0]):
  216. if () in prod_tuples[i][1]: continue
  217. if () in prod_tuples[i-1][1]: continue
  218. print prod_tuples[i-1][1]
  219. print prod_tuples[i][1]
  220. prod_tuples[i-1][1].extend(prod_tuples[i][1])
  221. del prod_tuples[i]
  222. for lhs, rhss in prod_tuples:
  223. print lhs, rhss
  224. s = '%s ->' % lhs
  225. for rhs in rhss:
  226. for elt in rhs:
  227. if isinstance(elt, Nonterminal): s += ' %s' % elt
  228. else: s += ' %r' % elt
  229. s += ' |'
  230. s = s[:-2] + '\n'
  231. self._textwidget.insert('end', s)
  232. self._analyze()
  233. # # Add the producitons to the text widget, and colorize them.
  234. # prod_by_lhs = {}
  235. # for prod in self._cfg.productions():
  236. # if len(prod.rhs()) > 0:
  237. # prod_by_lhs.setdefault(prod.lhs(),[]).append(prod)
  238. # for (lhs, prods) in prod_by_lhs.items():
  239. # self._textwidget.insert('end', '%s ->' % lhs)
  240. # self._textwidget.insert('end', self._rhs(prods[0]))
  241. # for prod in prods[1:]:
  242. # print '\t|'+self._rhs(prod),
  243. # self._textwidget.insert('end', '\t|'+self._rhs(prod))
  244. # print
  245. # self._textwidget.insert('end', '\n')
  246. # for prod in self._cfg.productions():
  247. # if len(prod.rhs()) == 0:
  248. # self._textwidget.insert('end', '%s' % prod)
  249. # self._analyze()
  250. # def _rhs(self, prod):
  251. # s = ''
  252. # for elt in prod.rhs():
  253. # if isinstance(elt, Nonterminal): s += ' %s' % elt.symbol()
  254. # else: s += ' %r' % elt
  255. # return s
  256. def _clear_tags(self, linenum):
  257. """
  258. Remove all tags (except C{arrow} and C{sel}) from the given
  259. line of the text widget used for editing the productions.
  260. """
  261. start = '%d.0'%linenum
  262. end = '%d.end'%linenum
  263. for tag in self._textwidget.tag_names():
  264. if tag not in ('arrow', 'sel'):
  265. self._textwidget.tag_remove(tag, start, end)
  266. def _check_analyze(self, *e):
  267. """
  268. Check if we've moved to a new line. If we have, then remove
  269. all colorization from the line we moved to, and re-colorize
  270. the line that we moved from.
  271. """
  272. linenum = int(self._textwidget.index('insert').split('.')[0])
  273. if linenum != self._linenum:
  274. self._clear_tags(linenum)
  275. self._analyze_line(self._linenum)
  276. self._linenum = linenum
  277. def _replace_arrows(self, *e):
  278. """
  279. Replace any C{'->'} text strings with arrows (char \\256, in
  280. symbol font). This searches the whole buffer, but is fast
  281. enough to be done anytime they press '>'.
  282. """
  283. arrow = '1.0'
  284. while 1:
  285. arrow = self._textwidget.search('->', arrow, 'end+1char')
  286. if arrow == '': break
  287. self._textwidget.delete(arrow, arrow+'+2char')
  288. self._textwidget.insert(arrow, self.ARROW, 'arrow')
  289. self._textwidget.insert(arrow, '\t')
  290. arrow = '1.0'
  291. while 1:
  292. arrow = self._textwidget.search(self.ARROW, arrow+'+1char',
  293. 'end+1char')
  294. if arrow == '': break
  295. self._textwidget.tag_add('arrow', arrow, arrow+'+1char')
  296. def _analyze_token(self, match, linenum):
  297. """
  298. Given a line number and a regexp match for a token on that
  299. line, colorize the token. Note that the regexp match gives us
  300. the token's text, start index (on the line), and end index (on
  301. the line).
  302. """
  303. # What type of token is it?
  304. if match.group()[0] in "'\"": tag = 'terminal'
  305. elif match.group() in ('->', self.ARROW): tag = 'arrow'
  306. else:
  307. # If it's a nonterminal, then set up new bindings, so we
  308. # can highlight all instances of that nonterminal when we
  309. # put the mouse over it.
  310. tag = 'nonterminal_'+match.group()
  311. if tag not in self._textwidget.tag_names():
  312. self._init_nonterminal_tag(tag)
  313. start = '%d.%d' % (linenum, match.start())
  314. end = '%d.%d' % (linenum, match.end())
  315. self._textwidget.tag_add(tag, start, end)
  316. def _init_nonterminal_tag(self, tag, foreground='blue'):
  317. self._textwidget.tag_config(tag, foreground=foreground,
  318. font=CFGEditor._BOLD)
  319. if not self._highlight_matching_nonterminals:
  320. return
  321. def enter(e, textwidget=self._textwidget, tag=tag):
  322. textwidget.tag_config(tag, background='#80ff80')
  323. def leave(e, textwidget=self._textwidget, tag=tag):
  324. textwidget.tag_config(tag, background='')
  325. self._textwidget.tag_bind(tag, '<Enter>', enter)
  326. self._textwidget.tag_bind(tag, '<Leave>', leave)
  327. def _analyze_line(self, linenum):
  328. """
  329. Colorize a given line.
  330. """
  331. # Get rid of any tags that were previously on the line.
  332. self._clear_tags(linenum)
  333. # Get the line line's text string.
  334. line = self._textwidget.get(`linenum`+'.0', `linenum`+'.end')
  335. # If it's a valid production, then colorize each token.
  336. if CFGEditor._PRODUCTION_RE.match(line):
  337. # It's valid; Use _TOKEN_RE to tokenize the production,
  338. # and call analyze_token on each token.
  339. def analyze_token(match, self=self, linenum=linenum):
  340. self._analyze_token(match, linenum)
  341. return ''
  342. CFGEditor._TOKEN_RE.sub(analyze_token, line)
  343. elif line.strip() != '':
  344. # It's invalid; show the user where the error is.
  345. self._mark_error(linenum, line)
  346. def _mark_error(self, linenum, line):
  347. """
  348. Mark the location of an error in a line.
  349. """
  350. arrowmatch = CFGEditor._ARROW_RE.search(line)
  351. if not arrowmatch:
  352. # If there's no arrow at all, highlight the whole line.
  353. start = '%d.0' % linenum
  354. end = '%d.end' % linenum
  355. elif not CFGEditor._LHS_RE.match(line):
  356. # Otherwise, if the LHS is bad, highlight it.
  357. start = '%d.0' % linenum
  358. end = '%d.%d' % (linenum, arrowmatch.start())
  359. else:
  360. # Otherwise, highlight the RHS.
  361. start = '%d.%d' % (linenum, arrowmatch.end())
  362. end = '%d.end' % linenum
  363. # If we're highlighting 0 chars, highlight the whole line.
  364. if self._textwidget.compare(start, '==', end):
  365. start = '%d.0' % linenum
  366. end = '%d.end' % linenum
  367. self._textwidget.tag_add('error', start, end)
  368. def _analyze(self, *e):
  369. """
  370. Replace C{->} with arrows, and colorize the entire buffer.
  371. """
  372. self._replace_arrows()
  373. numlines = int(self._textwidget.index('end').split('.')[0])
  374. for linenum in range(1, numlines+1): # line numbers start at 1.
  375. self._analyze_line(linenum)
  376. def _parse_productions(self):
  377. """
  378. Parse the current contents of the textwidget buffer, to create
  379. a list of productions.
  380. """
  381. productions = []
  382. # Get the text, normalize it, and split it into lines.
  383. text = self._textwidget.get('1.0', 'end')
  384. text = re.sub(self.ARROW, '->', text)
  385. text = re.sub('\t', ' ', text)
  386. lines = text.split('\n')
  387. # Convert each line to a CFG production
  388. for line in lines:
  389. line = line.strip()
  390. if line=='': continue
  391. productions += parse_cfg_production(line)
  392. #if line.strip() == '': continue
  393. #if not CFGEditor._PRODUCTION_RE.match(line):
  394. # raise ValueError('Bad production string %r' % line)
  395. #
  396. #(lhs_str, rhs_str) = line.split('->')
  397. #lhs = Nonterminal(lhs_str.strip())
  398. #rhs = []
  399. #def parse_token(match, rhs=rhs):
  400. # token = match.group()
  401. # if token[0] in "'\"": rhs.append(token[1:-1])
  402. # else: rhs.append(Nonterminal(token))
  403. # return ''
  404. #CFGEditor._TOKEN_RE.sub(parse_token, rhs_str)
  405. #
  406. #productions.append(Production(lhs, *rhs))
  407. return productions
  408. def _destroy(self, *e):
  409. if self._top is None: return
  410. self._top.destroy()
  411. self._top = None
  412. def _ok(self, *e):
  413. self._apply()
  414. self._destroy()
  415. def _apply(self, *e):
  416. productions = self._parse_productions()
  417. start = Nonterminal(self._start.get())
  418. cfg = ContextFreeGrammar(start, productions)
  419. if self._set_cfg_callback is not None:
  420. self._set_cfg_callback(cfg)
  421. def _reset(self, *e):
  422. self._textwidget.delete('1.0', 'end')
  423. for production in self._cfg.productions():
  424. self._textwidget.insert('end', '%s\n' % production)
  425. self._analyze()
  426. if self._set_cfg_callback is not None:
  427. self._set_cfg_callback(self._cfg)
  428. def _cancel(self, *e):
  429. try: self._reset()
  430. except: pass
  431. self._destroy()
  432. def _help(self, *e):
  433. # The default font's not very legible; try using 'fixed' instead.
  434. try:
  435. ShowText(self._parent, 'Help: Chart Parser Demo',
  436. (_CFGEditor_HELP).strip(), width=75, font='fixed')
  437. except:
  438. ShowText(self._parent, 'Help: Chart Parser Demo',
  439. (_CFGEditor_HELP).strip(), width=75)
  440. ######################################################################
  441. # New Demo (built tree based on cfg)
  442. ######################################################################
  443. class CFGDemo(object):
  444. def __init__(self, grammar, text):
  445. self._grammar = grammar
  446. self._text = text
  447. # Set up the main window.
  448. self._top = Tk()
  449. self._top.title('Context Free Grammar Demo')
  450. # Base font size
  451. self._size = IntVar(self._top)
  452. self._size.set(12) # = medium
  453. # Set up the key bindings
  454. self._init_bindings(self._top)
  455. # Create the basic frames
  456. frame1 = Frame(self._top)
  457. frame1.pack(side='left', fill='y', expand=0)
  458. self._init_menubar(self._top)
  459. self._init_buttons(self._top)
  460. self._init_grammar(frame1)
  461. self._init_treelet(frame1)
  462. self._init_workspace(self._top)
  463. #//////////////////////////////////////////////////
  464. # Initialization
  465. #//////////////////////////////////////////////////
  466. def _init_bindings(self, top):
  467. top.bind('<Control-q>', self.destroy)
  468. def _init_menubar(self, parent): pass
  469. def _init_buttons(self, parent): pass
  470. def _init_grammar(self, parent):
  471. self._prodlist = ProductionList(parent, self._grammar, width=20)
  472. self._prodlist.pack(side='top', fill='both', expand=1)
  473. self._prodlist.focus()
  474. self._prodlist.add_callback('select', self._selectprod_cb)
  475. self._prodlist.add_callback('move', self._selectprod_cb)
  476. def _init_treelet(self, parent):
  477. self._treelet_canvas = Canvas(parent, background='white')
  478. self._treelet_canvas.pack(side='bottom', fill='x')
  479. self._treelet = None
  480. def _init_workspace(self, parent):
  481. self._workspace = CanvasFrame(parent, background='white')
  482. self._workspace.pack(side='right', fill='both', expand=1)
  483. self._tree = None
  484. self.reset_workspace()
  485. #//////////////////////////////////////////////////
  486. # Workspace
  487. #//////////////////////////////////////////////////
  488. def reset_workspace(self):
  489. c = self._workspace.canvas()
  490. fontsize = int(self._size.get())
  491. node_font = ('helvetica', -(fontsize+4), 'bold')
  492. leaf_font = ('helvetica', -(fontsize+2))
  493. # Remove the old tree
  494. if self._tree is not None:
  495. self._workspace.remove_widget(self._tree)
  496. # The root of the tree.
  497. start = self._grammar.start().symbol()
  498. rootnode = TextWidget(c, start, font=node_font, draggable=1)
  499. # The leaves of the tree.
  500. leaves = []
  501. for word in self._text:
  502. if isinstance(word, Token): word = word.type()
  503. leaves.append(TextWidget(c, word, font=leaf_font, draggable=1))
  504. # Put it all together into one tree
  505. self._tree = TreeSegmentWidget(c, rootnode, leaves,
  506. color='white')
  507. # Add it to the workspace.
  508. self._workspace.add_widget(self._tree)
  509. # Move the leaves to the bottom of the workspace.
  510. for leaf in leaves: leaf.move(0,100)
  511. #self._nodes = {start:1}
  512. #self._leaves = dict([(l,1) for l in leaves])
  513. def workspace_markprod(self, production):
  514. pass
  515. def _markproduction(self, prod, tree=None):
  516. if tree is None: tree = self._tree
  517. for i in range(len(tree.subtrees())-len(prod.rhs())):
  518. if tree['color', i] == 'white':
  519. self._markproduction
  520. for j, node in enumerate(prod.rhs()):
  521. widget = tree.subtrees()[i+j]
  522. if (isinstance(node, Nonterminal) and
  523. isinstance(widget, TreeSegmentWidget) and
  524. node.symbol == widget.node().text()):
  525. pass # matching nonterminal
  526. elif (isinstance(node, (str, unicode)) and
  527. isinstance(widget, TextWidget) and
  528. node == widget.text()):
  529. pass # matching nonterminal
  530. else: break
  531. else:
  532. # Everything matched!
  533. print 'MATCH AT', i
  534. #//////////////////////////////////////////////////
  535. # Grammar
  536. #//////////////////////////////////////////////////
  537. def _selectprod_cb(self, production):
  538. canvas = self._treelet_canvas
  539. self._prodlist.highlight(production)
  540. if self._treelet is not None: self._treelet.destroy()
  541. # Convert the production to a tree.
  542. rhs = production.rhs()
  543. for (i, elt) in enumerate(rhs):
  544. if isinstance(elt, Nonterminal): elt = Tree(elt)
  545. tree = Tree(production.lhs().symbol(), *rhs)
  546. # Draw the tree in the treelet area.
  547. fontsize = int(self._size.get())
  548. node_font = ('helvetica', -(fontsize+4), 'bold')
  549. leaf_font = ('helvetica', -(fontsize+2))
  550. self._treelet = tree_to_treesegment(canvas, tree,
  551. node_font=node_font,
  552. leaf_font=leaf_font)
  553. self._treelet['draggable'] = 1
  554. # Center the treelet.
  555. (x1, y1, x2, y2) = self._treelet.bbox()
  556. w, h = int(canvas['width']), int(canvas['height'])
  557. self._treelet.move((w-x1-x2)/2, (h-y1-y2)/2)
  558. # Mark the places where we can add it to the workspace.
  559. self._markproduction(production)
  560. def destroy(self, *args):
  561. self._top.destroy()
  562. def mainloop(self, *args, **kwargs):
  563. self._top.mainloop(*args, **kwargs)
  564. def demo2():
  565. from nltk import Nonterminal, Production, ContextFreeGrammar
  566. nonterminals = 'S VP NP PP P N Name V Det'
  567. (S, VP, NP, PP, P, N, Name, V, Det) = [Nonterminal(s)
  568. for s in nonterminals.split()]
  569. productions = (
  570. # Syntactic Productions
  571. Production(S, [NP, VP]),
  572. Production(NP, [Det, N]),
  573. Production(NP, [NP, PP]),
  574. Production(VP, [VP, PP]),
  575. Production(VP, [V, NP, PP]),
  576. Production(VP, [V, NP]),
  577. Production(PP, [P, NP]),
  578. Production(PP, []),
  579. Production(PP, ['up', 'over', NP]),
  580. # Lexical Productions
  581. Production(NP, ['I']), Production(Det, ['the']),
  582. Production(Det, ['a']), Production(N, ['man']),
  583. Production(V, ['saw']), Production(P, ['in']),
  584. Production(P, ['with']), Production(N, ['park']),
  585. Production(N, ['dog']), Production(N, ['statue']),
  586. Production(Det, ['my']),
  587. )
  588. grammar = ContextFreeGrammar(S, productions)
  589. text = 'I saw a man in the park'.split()
  590. d=CFGDemo(grammar, text)
  591. d.mainloop()
  592. ######################################################################
  593. # Old Demo
  594. ######################################################################
  595. def demo():
  596. from nltk import Nonterminal, parse_cfg
  597. nonterminals = 'S VP NP PP P N Name V Det'
  598. (S, VP, NP, PP, P, N, Name, V, Det) = [Nonterminal(s)
  599. for s in nonterminals.split()]
  600. grammar = parse_cfg("""
  601. S -> NP VP
  602. PP -> P NP
  603. NP -> Det N
  604. NP -> NP PP
  605. VP -> V NP
  606. VP -> VP PP
  607. Det -> 'a'
  608. Det -> 'the'
  609. Det -> 'my'
  610. NP -> 'I'
  611. N -> 'dog'
  612. N -> 'man'
  613. N -> 'park'
  614. N -> 'statue'
  615. V -> 'saw'
  616. P -> 'in'
  617. P -> 'up'
  618. P -> 'over'
  619. P -> 'with'
  620. """)
  621. def cb(grammar): print grammar
  622. top = Tk()
  623. editor = CFGEditor(top, grammar, cb)
  624. Label(top, text='\nTesting CFG Editor\n').pack()
  625. Button(top, text='Quit', command=top.destroy).pack()
  626. top.mainloop()
  627. def demo3():
  628. from nltk import Production
  629. (S, VP, NP, PP, P, N, Name, V, Det) = \
  630. nonterminals('S, VP, NP, PP, P, N, Name, V, Det')
  631. productions = (
  632. # Syntactic Productions
  633. Production(S, [NP, VP]),
  634. Production(NP, [Det, N]),
  635. Production(NP, [NP, PP]),
  636. Production(VP, [VP, PP]),
  637. Production(VP, [V, NP, PP]),
  638. Production(VP, [V, NP]),
  639. Production(PP, [P, NP]),
  640. Production(PP, []),
  641. Production(PP, ['up', 'over', NP]),
  642. # Lexical Productions
  643. Production(NP, ['I']), Production(Det, ['the']),
  644. Production(Det, ['a']), Production(N, ['man']),
  645. Production(V, ['saw']), Production(P, ['in']),
  646. Production(P, ['with']), Production(N, ['park']),
  647. Production(N, ['dog']), Production(N, ['statue']),
  648. Production(Det, ['my']),
  649. )
  650. t = Tk()
  651. def destroy(e, t=t): t.destroy()
  652. t.bind('q', destroy)
  653. p = ProductionList(t, productions)
  654. p.pack(expand=1, fill='both')
  655. p.add_callback('select', p.markonly)
  656. p.add_callback('move', p.markonly)
  657. p.focus()
  658. p.mark(productions[2])
  659. p.mark(productions[8])
  660. if __name__ == '__main__': demo()