PageRenderTime 54ms CodeModel.GetById 20ms RepoModel.GetById 1ms app.codeStats 0ms

/nltk/draw/cfg.py

https://github.com/BrucePHill/nltk
Python | 777 lines | 683 code | 22 blank | 72 comment | 13 complexity | d03358941f6a2398d4b99e8b4dd540d5 MD5 | raw file
Possible License(s): Apache-2.0
  1. # Natural Language Toolkit: CFG visualization
  2. #
  3. # Copyright (C) 2001-2013 NLTK Project
  4. # Author: Edward Loper <edloper@gradient.cis.upenn.edu>
  5. # URL: <http://www.nltk.org/>
  6. # For license information, see LICENSE.TXT
  7. """
  8. Visualization tools for CFGs.
  9. """
  10. # Idea for a nice demo:
  11. # - 3 panes: grammar, treelet, working area
  12. # - grammar is a list of productions
  13. # - when you select a production, the treelet that it licenses appears
  14. # in the treelet area
  15. # - the working area has the text on the bottom, and S at top. When
  16. # you select a production, it shows (ghosted) the locations where
  17. # that production's treelet could be attached to either the text
  18. # or the tree rooted at S.
  19. # - the user can drag the treelet onto one of those (or click on them?)
  20. # - the user can delete pieces of the tree from the working area
  21. # (right click?)
  22. # - connecting top to bottom? drag one NP onto another?
  23. #
  24. # +-------------------------------------------------------------+
  25. # | S -> NP VP | S |
  26. # |[NP -> Det N ]| / \ |
  27. # | ... | NP VP |
  28. # | N -> 'dog' | |
  29. # | N -> 'cat' | |
  30. # | ... | |
  31. # +--------------+ |
  32. # | NP | Det N |
  33. # | / \ | | | |
  34. # | Det N | the cat saw the dog |
  35. # | | |
  36. # +--------------+----------------------------------------------+
  37. #
  38. # Operations:
  39. # - connect a new treelet -- drag or click shadow
  40. # - delete a treelet -- right click
  41. # - if only connected to top, delete everything below
  42. # - if only connected to bottom, delete everything above
  43. # - connect top & bottom -- drag a leaf to a root or a root to a leaf
  44. # - disconnect top & bottom -- right click
  45. # - if connected to top & bottom, then disconnect
  46. import nltk.compat
  47. import re
  48. from tkinter import (Button, Canvas, Entry, Frame, IntVar, Label,
  49. Scrollbar, Text, Tk, Toplevel)
  50. from nltk.grammar import (ContextFreeGrammar, parse_cfg_production,
  51. Nonterminal, nonterminals)
  52. from nltk.tree import Tree
  53. from nltk.draw.tree import TreeSegmentWidget, tree_to_treesegment
  54. from nltk.draw.util import (CanvasFrame, ColorizedList, ShowText,
  55. SymbolWidget, TextWidget)
  56. from nltk import compat
  57. ######################################################################
  58. # Production List
  59. ######################################################################
  60. class ProductionList(ColorizedList):
  61. ARROW = SymbolWidget.SYMBOLS['rightarrow']
  62. def _init_colortags(self, textwidget, options):
  63. textwidget.tag_config('terminal', foreground='#006000')
  64. textwidget.tag_config('arrow', font='symbol', underline='0')
  65. textwidget.tag_config('nonterminal', foreground='blue',
  66. font=('helvetica', -12, 'bold'))
  67. def _item_repr(self, item):
  68. contents = []
  69. contents.append(('%s\t' % item.lhs(), 'nonterminal'))
  70. contents.append((self.ARROW, 'arrow'))
  71. for elt in item.rhs():
  72. if isinstance(elt, Nonterminal):
  73. contents.append((' %s' % elt.symbol(), 'nonterminal'))
  74. else:
  75. contents.append((' %r' % elt, 'terminal'))
  76. return contents
  77. ######################################################################
  78. # CFG Editor
  79. ######################################################################
  80. _CFGEditor_HELP = """
  81. The CFG Editor can be used to create or modify context free grammars.
  82. A context free grammar consists of a start symbol and a list of
  83. productions. The start symbol is specified by the text entry field in
  84. the upper right hand corner of the editor; and the list of productions
  85. are specified in the main text editing box.
  86. Every non-blank line specifies a single production. Each production
  87. has the form "LHS -> RHS," where LHS is a single nonterminal, and RHS
  88. is a list of nonterminals and terminals.
  89. Nonterminals must be a single word, such as S or NP or NP_subj.
  90. Currently, nonterminals must consists of alphanumeric characters and
  91. underscores (_). Nonterminals are colored blue. If you place the
  92. mouse over any nonterminal, then all occurrences of that nonterminal
  93. will be highlighted.
  94. Termianals must be surrounded by single quotes (') or double
  95. quotes(\"). For example, "dog" and "New York" are terminals.
  96. Currently, the string within the quotes must consist of alphanumeric
  97. characters, underscores, and spaces.
  98. To enter a new production, go to a blank line, and type a nonterminal,
  99. followed by an arrow (->), followed by a sequence of terminals and
  100. nonterminals. Note that "->" (dash + greater-than) is automatically
  101. converted to an arrow symbol. When you move your cursor to a
  102. different line, your production will automatically be colorized. If
  103. there are any errors, they will be highlighted in red.
  104. Note that the order of the productions is significant for some
  105. algorithms. To re-order the productions, use cut and paste to move
  106. them.
  107. Use the buttons at the bottom of the window when you are done editing
  108. the CFG:
  109. - Ok: apply the new CFG, and exit the editor.
  110. - Apply: apply the new CFG, and do not exit the editor.
  111. - Reset: revert to the original CFG, and do not exit the editor.
  112. - Cancel: revert to the original CFG, and exit the editor.
  113. """
  114. class CFGEditor(object):
  115. """
  116. A dialog window for creating and editing context free grammars.
  117. ``CFGEditor`` imposes the following restrictions:
  118. - All nonterminals must be strings consisting of word
  119. characters.
  120. - All terminals must be strings consisting of word characters
  121. and space characters.
  122. """
  123. # Regular expressions used by _analyze_line. Precompile them, so
  124. # we can process the text faster.
  125. ARROW = SymbolWidget.SYMBOLS['rightarrow']
  126. _LHS_RE = re.compile(r"(^\s*\w+\s*)(->|("+ARROW+"))")
  127. _ARROW_RE = re.compile("\s*(->|("+ARROW+"))\s*")
  128. _PRODUCTION_RE = re.compile(r"(^\s*\w+\s*)" + # LHS
  129. "(->|("+ARROW+"))\s*" + # arrow
  130. r"((\w+|'[\w ]*'|\"[\w ]*\"|\|)\s*)*$") # RHS
  131. _TOKEN_RE = re.compile("\\w+|->|'[\\w ]+'|\"[\\w ]+\"|("+ARROW+")")
  132. _BOLD = ('helvetica', -12, 'bold')
  133. def __init__(self, parent, cfg=None, set_cfg_callback=None):
  134. self._parent = parent
  135. if cfg is not None: self._cfg = cfg
  136. else: self._cfg = ContextFreeGrammar(Nonterminal('S'), [])
  137. self._set_cfg_callback = set_cfg_callback
  138. self._highlight_matching_nonterminals = 1
  139. # Create the top-level window.
  140. self._top = Toplevel(parent)
  141. self._init_bindings()
  142. self._init_startframe()
  143. self._startframe.pack(side='top', fill='x', expand=0)
  144. self._init_prodframe()
  145. self._prodframe.pack(side='top', fill='both', expand=1)
  146. self._init_buttons()
  147. self._buttonframe.pack(side='bottom', fill='x', expand=0)
  148. self._textwidget.focus()
  149. def _init_startframe(self):
  150. frame = self._startframe = Frame(self._top)
  151. self._start = Entry(frame)
  152. self._start.pack(side='right')
  153. Label(frame, text='Start Symbol:').pack(side='right')
  154. Label(frame, text='Productions:').pack(side='left')
  155. self._start.insert(0, self._cfg.start().symbol())
  156. def _init_buttons(self):
  157. frame = self._buttonframe = Frame(self._top)
  158. Button(frame, text='Ok', command=self._ok,
  159. underline=0, takefocus=0).pack(side='left')
  160. Button(frame, text='Apply', command=self._apply,
  161. underline=0, takefocus=0).pack(side='left')
  162. Button(frame, text='Reset', command=self._reset,
  163. underline=0, takefocus=0,).pack(side='left')
  164. Button(frame, text='Cancel', command=self._cancel,
  165. underline=0, takefocus=0).pack(side='left')
  166. Button(frame, text='Help', command=self._help,
  167. underline=0, takefocus=0).pack(side='right')
  168. def _init_bindings(self):
  169. self._top.title('CFG Editor')
  170. self._top.bind('<Control-q>', self._cancel)
  171. self._top.bind('<Alt-q>', self._cancel)
  172. self._top.bind('<Control-d>', self._cancel)
  173. #self._top.bind('<Control-x>', self._cancel)
  174. self._top.bind('<Alt-x>', self._cancel)
  175. self._top.bind('<Escape>', self._cancel)
  176. #self._top.bind('<Control-c>', self._cancel)
  177. self._top.bind('<Alt-c>', self._cancel)
  178. self._top.bind('<Control-o>', self._ok)
  179. self._top.bind('<Alt-o>', self._ok)
  180. self._top.bind('<Control-a>', self._apply)
  181. self._top.bind('<Alt-a>', self._apply)
  182. self._top.bind('<Control-r>', self._reset)
  183. self._top.bind('<Alt-r>', self._reset)
  184. self._top.bind('<Control-h>', self._help)
  185. self._top.bind('<Alt-h>', self._help)
  186. self._top.bind('<F1>', self._help)
  187. def _init_prodframe(self):
  188. self._prodframe = Frame(self._top)
  189. # Create the basic Text widget & scrollbar.
  190. self._textwidget = Text(self._prodframe, background='#e0e0e0',
  191. exportselection=1)
  192. self._textscroll = Scrollbar(self._prodframe, takefocus=0,
  193. orient='vertical')
  194. self._textwidget.config(yscrollcommand = self._textscroll.set)
  195. self._textscroll.config(command=self._textwidget.yview)
  196. self._textscroll.pack(side='right', fill='y')
  197. self._textwidget.pack(expand=1, fill='both', side='left')
  198. # Initialize the colorization tags. Each nonterminal gets its
  199. # own tag, so they aren't listed here.
  200. self._textwidget.tag_config('terminal', foreground='#006000')
  201. self._textwidget.tag_config('arrow', font='symbol')
  202. self._textwidget.tag_config('error', background='red')
  203. # Keep track of what line they're on. We use that to remember
  204. # to re-analyze a line whenever they leave it.
  205. self._linenum = 0
  206. # Expand "->" to an arrow.
  207. self._top.bind('>', self._replace_arrows)
  208. # Re-colorize lines when appropriate.
  209. self._top.bind('<<Paste>>', self._analyze)
  210. self._top.bind('<KeyPress>', self._check_analyze)
  211. self._top.bind('<ButtonPress>', self._check_analyze)
  212. # Tab cycles focus. (why doesn't this work??)
  213. def cycle(e, textwidget=self._textwidget):
  214. textwidget.tk_focusNext().focus()
  215. self._textwidget.bind('<Tab>', cycle)
  216. prod_tuples = [(p.lhs(),[p.rhs()]) for p in self._cfg.productions()]
  217. for i in range(len(prod_tuples)-1,0,-1):
  218. if (prod_tuples[i][0] == prod_tuples[i-1][0]):
  219. if () in prod_tuples[i][1]: continue
  220. if () in prod_tuples[i-1][1]: continue
  221. print(prod_tuples[i-1][1])
  222. print(prod_tuples[i][1])
  223. prod_tuples[i-1][1].extend(prod_tuples[i][1])
  224. del prod_tuples[i]
  225. for lhs, rhss in prod_tuples:
  226. print(lhs, rhss)
  227. s = '%s ->' % lhs
  228. for rhs in rhss:
  229. for elt in rhs:
  230. if isinstance(elt, Nonterminal): s += ' %s' % elt
  231. else: s += ' %r' % elt
  232. s += ' |'
  233. s = s[:-2] + '\n'
  234. self._textwidget.insert('end', s)
  235. self._analyze()
  236. # # Add the producitons to the text widget, and colorize them.
  237. # prod_by_lhs = {}
  238. # for prod in self._cfg.productions():
  239. # if len(prod.rhs()) > 0:
  240. # prod_by_lhs.setdefault(prod.lhs(),[]).append(prod)
  241. # for (lhs, prods) in prod_by_lhs.items():
  242. # self._textwidget.insert('end', '%s ->' % lhs)
  243. # self._textwidget.insert('end', self._rhs(prods[0]))
  244. # for prod in prods[1:]:
  245. # print '\t|'+self._rhs(prod),
  246. # self._textwidget.insert('end', '\t|'+self._rhs(prod))
  247. # print
  248. # self._textwidget.insert('end', '\n')
  249. # for prod in self._cfg.productions():
  250. # if len(prod.rhs()) == 0:
  251. # self._textwidget.insert('end', '%s' % prod)
  252. # self._analyze()
  253. # def _rhs(self, prod):
  254. # s = ''
  255. # for elt in prod.rhs():
  256. # if isinstance(elt, Nonterminal): s += ' %s' % elt.symbol()
  257. # else: s += ' %r' % elt
  258. # return s
  259. def _clear_tags(self, linenum):
  260. """
  261. Remove all tags (except ``arrow`` and ``sel``) from the given
  262. line of the text widget used for editing the productions.
  263. """
  264. start = '%d.0'%linenum
  265. end = '%d.end'%linenum
  266. for tag in self._textwidget.tag_names():
  267. if tag not in ('arrow', 'sel'):
  268. self._textwidget.tag_remove(tag, start, end)
  269. def _check_analyze(self, *e):
  270. """
  271. Check if we've moved to a new line. If we have, then remove
  272. all colorization from the line we moved to, and re-colorize
  273. the line that we moved from.
  274. """
  275. linenum = int(self._textwidget.index('insert').split('.')[0])
  276. if linenum != self._linenum:
  277. self._clear_tags(linenum)
  278. self._analyze_line(self._linenum)
  279. self._linenum = linenum
  280. def _replace_arrows(self, *e):
  281. """
  282. Replace any ``'->'`` text strings with arrows (char \\256, in
  283. symbol font). This searches the whole buffer, but is fast
  284. enough to be done anytime they press '>'.
  285. """
  286. arrow = '1.0'
  287. while True:
  288. arrow = self._textwidget.search('->', arrow, 'end+1char')
  289. if arrow == '': break
  290. self._textwidget.delete(arrow, arrow+'+2char')
  291. self._textwidget.insert(arrow, self.ARROW, 'arrow')
  292. self._textwidget.insert(arrow, '\t')
  293. arrow = '1.0'
  294. while True:
  295. arrow = self._textwidget.search(self.ARROW, arrow+'+1char',
  296. 'end+1char')
  297. if arrow == '': break
  298. self._textwidget.tag_add('arrow', arrow, arrow+'+1char')
  299. def _analyze_token(self, match, linenum):
  300. """
  301. Given a line number and a regexp match for a token on that
  302. line, colorize the token. Note that the regexp match gives us
  303. the token's text, start index (on the line), and end index (on
  304. the line).
  305. """
  306. # What type of token is it?
  307. if match.group()[0] in "'\"": tag = 'terminal'
  308. elif match.group() in ('->', self.ARROW): tag = 'arrow'
  309. else:
  310. # If it's a nonterminal, then set up new bindings, so we
  311. # can highlight all instances of that nonterminal when we
  312. # put the mouse over it.
  313. tag = 'nonterminal_'+match.group()
  314. if tag not in self._textwidget.tag_names():
  315. self._init_nonterminal_tag(tag)
  316. start = '%d.%d' % (linenum, match.start())
  317. end = '%d.%d' % (linenum, match.end())
  318. self._textwidget.tag_add(tag, start, end)
  319. def _init_nonterminal_tag(self, tag, foreground='blue'):
  320. self._textwidget.tag_config(tag, foreground=foreground,
  321. font=CFGEditor._BOLD)
  322. if not self._highlight_matching_nonterminals:
  323. return
  324. def enter(e, textwidget=self._textwidget, tag=tag):
  325. textwidget.tag_config(tag, background='#80ff80')
  326. def leave(e, textwidget=self._textwidget, tag=tag):
  327. textwidget.tag_config(tag, background='')
  328. self._textwidget.tag_bind(tag, '<Enter>', enter)
  329. self._textwidget.tag_bind(tag, '<Leave>', leave)
  330. def _analyze_line(self, linenum):
  331. """
  332. Colorize a given line.
  333. """
  334. # Get rid of any tags that were previously on the line.
  335. self._clear_tags(linenum)
  336. # Get the line line's text string.
  337. line = self._textwidget.get(repr(linenum)+'.0', repr(linenum)+'.end')
  338. # If it's a valid production, then colorize each token.
  339. if CFGEditor._PRODUCTION_RE.match(line):
  340. # It's valid; Use _TOKEN_RE to tokenize the production,
  341. # and call analyze_token on each token.
  342. def analyze_token(match, self=self, linenum=linenum):
  343. self._analyze_token(match, linenum)
  344. return ''
  345. CFGEditor._TOKEN_RE.sub(analyze_token, line)
  346. elif line.strip() != '':
  347. # It's invalid; show the user where the error is.
  348. self._mark_error(linenum, line)
  349. def _mark_error(self, linenum, line):
  350. """
  351. Mark the location of an error in a line.
  352. """
  353. arrowmatch = CFGEditor._ARROW_RE.search(line)
  354. if not arrowmatch:
  355. # If there's no arrow at all, highlight the whole line.
  356. start = '%d.0' % linenum
  357. end = '%d.end' % linenum
  358. elif not CFGEditor._LHS_RE.match(line):
  359. # Otherwise, if the LHS is bad, highlight it.
  360. start = '%d.0' % linenum
  361. end = '%d.%d' % (linenum, arrowmatch.start())
  362. else:
  363. # Otherwise, highlight the RHS.
  364. start = '%d.%d' % (linenum, arrowmatch.end())
  365. end = '%d.end' % linenum
  366. # If we're highlighting 0 chars, highlight the whole line.
  367. if self._textwidget.compare(start, '==', end):
  368. start = '%d.0' % linenum
  369. end = '%d.end' % linenum
  370. self._textwidget.tag_add('error', start, end)
  371. def _analyze(self, *e):
  372. """
  373. Replace ``->`` with arrows, and colorize the entire buffer.
  374. """
  375. self._replace_arrows()
  376. numlines = int(self._textwidget.index('end').split('.')[0])
  377. for linenum in range(1, numlines+1): # line numbers start at 1.
  378. self._analyze_line(linenum)
  379. def _parse_productions(self):
  380. """
  381. Parse the current contents of the textwidget buffer, to create
  382. a list of productions.
  383. """
  384. productions = []
  385. # Get the text, normalize it, and split it into lines.
  386. text = self._textwidget.get('1.0', 'end')
  387. text = re.sub(self.ARROW, '->', text)
  388. text = re.sub('\t', ' ', text)
  389. lines = text.split('\n')
  390. # Convert each line to a CFG production
  391. for line in lines:
  392. line = line.strip()
  393. if line=='': continue
  394. productions += parse_cfg_production(line)
  395. #if line.strip() == '': continue
  396. #if not CFGEditor._PRODUCTION_RE.match(line):
  397. # raise ValueError('Bad production string %r' % line)
  398. #
  399. #(lhs_str, rhs_str) = line.split('->')
  400. #lhs = Nonterminal(lhs_str.strip())
  401. #rhs = []
  402. #def parse_token(match, rhs=rhs):
  403. # token = match.group()
  404. # if token[0] in "'\"": rhs.append(token[1:-1])
  405. # else: rhs.append(Nonterminal(token))
  406. # return ''
  407. #CFGEditor._TOKEN_RE.sub(parse_token, rhs_str)
  408. #
  409. #productions.append(Production(lhs, *rhs))
  410. return productions
  411. def _destroy(self, *e):
  412. if self._top is None: return
  413. self._top.destroy()
  414. self._top = None
  415. def _ok(self, *e):
  416. self._apply()
  417. self._destroy()
  418. def _apply(self, *e):
  419. productions = self._parse_productions()
  420. start = Nonterminal(self._start.get())
  421. cfg = ContextFreeGrammar(start, productions)
  422. if self._set_cfg_callback is not None:
  423. self._set_cfg_callback(cfg)
  424. def _reset(self, *e):
  425. self._textwidget.delete('1.0', 'end')
  426. for production in self._cfg.productions():
  427. self._textwidget.insert('end', '%s\n' % production)
  428. self._analyze()
  429. if self._set_cfg_callback is not None:
  430. self._set_cfg_callback(self._cfg)
  431. def _cancel(self, *e):
  432. try: self._reset()
  433. except: pass
  434. self._destroy()
  435. def _help(self, *e):
  436. # The default font's not very legible; try using 'fixed' instead.
  437. try:
  438. ShowText(self._parent, 'Help: Chart Parser Demo',
  439. (_CFGEditor_HELP).strip(), width=75, font='fixed')
  440. except:
  441. ShowText(self._parent, 'Help: Chart Parser Demo',
  442. (_CFGEditor_HELP).strip(), width=75)
  443. ######################################################################
  444. # New Demo (built tree based on cfg)
  445. ######################################################################
  446. class CFGDemo(object):
  447. def __init__(self, grammar, text):
  448. self._grammar = grammar
  449. self._text = text
  450. # Set up the main window.
  451. self._top = Tk()
  452. self._top.title('Context Free Grammar Demo')
  453. # Base font size
  454. self._size = IntVar(self._top)
  455. self._size.set(12) # = medium
  456. # Set up the key bindings
  457. self._init_bindings(self._top)
  458. # Create the basic frames
  459. frame1 = Frame(self._top)
  460. frame1.pack(side='left', fill='y', expand=0)
  461. self._init_menubar(self._top)
  462. self._init_buttons(self._top)
  463. self._init_grammar(frame1)
  464. self._init_treelet(frame1)
  465. self._init_workspace(self._top)
  466. #//////////////////////////////////////////////////
  467. # Initialization
  468. #//////////////////////////////////////////////////
  469. def _init_bindings(self, top):
  470. top.bind('<Control-q>', self.destroy)
  471. def _init_menubar(self, parent): pass
  472. def _init_buttons(self, parent): pass
  473. def _init_grammar(self, parent):
  474. self._prodlist = ProductionList(parent, self._grammar, width=20)
  475. self._prodlist.pack(side='top', fill='both', expand=1)
  476. self._prodlist.focus()
  477. self._prodlist.add_callback('select', self._selectprod_cb)
  478. self._prodlist.add_callback('move', self._selectprod_cb)
  479. def _init_treelet(self, parent):
  480. self._treelet_canvas = Canvas(parent, background='white')
  481. self._treelet_canvas.pack(side='bottom', fill='x')
  482. self._treelet = None
  483. def _init_workspace(self, parent):
  484. self._workspace = CanvasFrame(parent, background='white')
  485. self._workspace.pack(side='right', fill='both', expand=1)
  486. self._tree = None
  487. self.reset_workspace()
  488. #//////////////////////////////////////////////////
  489. # Workspace
  490. #//////////////////////////////////////////////////
  491. def reset_workspace(self):
  492. c = self._workspace.canvas()
  493. fontsize = int(self._size.get())
  494. node_font = ('helvetica', -(fontsize+4), 'bold')
  495. leaf_font = ('helvetica', -(fontsize+2))
  496. # Remove the old tree
  497. if self._tree is not None:
  498. self._workspace.remove_widget(self._tree)
  499. # The root of the tree.
  500. start = self._grammar.start().symbol()
  501. rootnode = TextWidget(c, start, font=node_font, draggable=1)
  502. # The leaves of the tree.
  503. leaves = []
  504. for word in self._text:
  505. leaves.append(TextWidget(c, word, font=leaf_font, draggable=1))
  506. # Put it all together into one tree
  507. self._tree = TreeSegmentWidget(c, rootnode, leaves,
  508. color='white')
  509. # Add it to the workspace.
  510. self._workspace.add_widget(self._tree)
  511. # Move the leaves to the bottom of the workspace.
  512. for leaf in leaves: leaf.move(0,100)
  513. #self._nodes = {start:1}
  514. #self._leaves = dict([(l,1) for l in leaves])
  515. def workspace_markprod(self, production):
  516. pass
  517. def _markproduction(self, prod, tree=None):
  518. if tree is None: tree = self._tree
  519. for i in range(len(tree.subtrees())-len(prod.rhs())):
  520. if tree['color', i] == 'white':
  521. self._markproduction
  522. for j, node in enumerate(prod.rhs()):
  523. widget = tree.subtrees()[i+j]
  524. if (isinstance(node, Nonterminal) and
  525. isinstance(widget, TreeSegmentWidget) and
  526. node.symbol == widget.node().text()):
  527. pass # matching nonterminal
  528. elif (isinstance(node, compat.string_types) and
  529. isinstance(widget, TextWidget) and
  530. node == widget.text()):
  531. pass # matching nonterminal
  532. else: break
  533. else:
  534. # Everything matched!
  535. print('MATCH AT', i)
  536. #//////////////////////////////////////////////////
  537. # Grammar
  538. #//////////////////////////////////////////////////
  539. def _selectprod_cb(self, production):
  540. canvas = self._treelet_canvas
  541. self._prodlist.highlight(production)
  542. if self._treelet is not None: self._treelet.destroy()
  543. # Convert the production to a tree.
  544. rhs = production.rhs()
  545. for (i, elt) in enumerate(rhs):
  546. if isinstance(elt, Nonterminal): elt = Tree(elt)
  547. tree = Tree(production.lhs().symbol(), *rhs)
  548. # Draw the tree in the treelet area.
  549. fontsize = int(self._size.get())
  550. node_font = ('helvetica', -(fontsize+4), 'bold')
  551. leaf_font = ('helvetica', -(fontsize+2))
  552. self._treelet = tree_to_treesegment(canvas, tree,
  553. node_font=node_font,
  554. leaf_font=leaf_font)
  555. self._treelet['draggable'] = 1
  556. # Center the treelet.
  557. (x1, y1, x2, y2) = self._treelet.bbox()
  558. w, h = int(canvas['width']), int(canvas['height'])
  559. self._treelet.move((w-x1-x2)/2, (h-y1-y2)/2)
  560. # Mark the places where we can add it to the workspace.
  561. self._markproduction(production)
  562. def destroy(self, *args):
  563. self._top.destroy()
  564. def mainloop(self, *args, **kwargs):
  565. self._top.mainloop(*args, **kwargs)
  566. def demo2():
  567. from nltk import Nonterminal, Production, ContextFreeGrammar
  568. nonterminals = 'S VP NP PP P N Name V Det'
  569. (S, VP, NP, PP, P, N, Name, V, Det) = [Nonterminal(s)
  570. for s in nonterminals.split()]
  571. productions = (
  572. # Syntactic Productions
  573. Production(S, [NP, VP]),
  574. Production(NP, [Det, N]),
  575. Production(NP, [NP, PP]),
  576. Production(VP, [VP, PP]),
  577. Production(VP, [V, NP, PP]),
  578. Production(VP, [V, NP]),
  579. Production(PP, [P, NP]),
  580. Production(PP, []),
  581. Production(PP, ['up', 'over', NP]),
  582. # Lexical Productions
  583. Production(NP, ['I']), Production(Det, ['the']),
  584. Production(Det, ['a']), Production(N, ['man']),
  585. Production(V, ['saw']), Production(P, ['in']),
  586. Production(P, ['with']), Production(N, ['park']),
  587. Production(N, ['dog']), Production(N, ['statue']),
  588. Production(Det, ['my']),
  589. )
  590. grammar = ContextFreeGrammar(S, productions)
  591. text = 'I saw a man in the park'.split()
  592. d=CFGDemo(grammar, text)
  593. d.mainloop()
  594. ######################################################################
  595. # Old Demo
  596. ######################################################################
  597. def demo():
  598. from nltk import Nonterminal, parse_cfg
  599. nonterminals = 'S VP NP PP P N Name V Det'
  600. (S, VP, NP, PP, P, N, Name, V, Det) = [Nonterminal(s)
  601. for s in nonterminals.split()]
  602. grammar = parse_cfg("""
  603. S -> NP VP
  604. PP -> P NP
  605. NP -> Det N
  606. NP -> NP PP
  607. VP -> V NP
  608. VP -> VP PP
  609. Det -> 'a'
  610. Det -> 'the'
  611. Det -> 'my'
  612. NP -> 'I'
  613. N -> 'dog'
  614. N -> 'man'
  615. N -> 'park'
  616. N -> 'statue'
  617. V -> 'saw'
  618. P -> 'in'
  619. P -> 'up'
  620. P -> 'over'
  621. P -> 'with'
  622. """)
  623. def cb(grammar): print(grammar)
  624. top = Tk()
  625. editor = CFGEditor(top, grammar, cb)
  626. Label(top, text='\nTesting CFG Editor\n').pack()
  627. Button(top, text='Quit', command=top.destroy).pack()
  628. top.mainloop()
  629. def demo3():
  630. from nltk import Production
  631. (S, VP, NP, PP, P, N, Name, V, Det) = \
  632. nonterminals('S, VP, NP, PP, P, N, Name, V, Det')
  633. productions = (
  634. # Syntactic Productions
  635. Production(S, [NP, VP]),
  636. Production(NP, [Det, N]),
  637. Production(NP, [NP, PP]),
  638. Production(VP, [VP, PP]),
  639. Production(VP, [V, NP, PP]),
  640. Production(VP, [V, NP]),
  641. Production(PP, [P, NP]),
  642. Production(PP, []),
  643. Production(PP, ['up', 'over', NP]),
  644. # Lexical Productions
  645. Production(NP, ['I']), Production(Det, ['the']),
  646. Production(Det, ['a']), Production(N, ['man']),
  647. Production(V, ['saw']), Production(P, ['in']),
  648. Production(P, ['with']), Production(N, ['park']),
  649. Production(N, ['dog']), Production(N, ['statue']),
  650. Production(Det, ['my']),
  651. )
  652. t = Tk()
  653. def destroy(e, t=t): t.destroy()
  654. t.bind('q', destroy)
  655. p = ProductionList(t, productions)
  656. p.pack(expand=1, fill='both')
  657. p.add_callback('select', p.markonly)
  658. p.add_callback('move', p.markonly)
  659. p.focus()
  660. p.mark(productions[2])
  661. p.mark(productions[8])
  662. if __name__ == '__main__': demo()