PageRenderTime 51ms CodeModel.GetById 18ms RepoModel.GetById 0ms app.codeStats 0ms

/nltk-old/contrib/nltk_contrib/upenn/cis530/spring2005/chenh/chart_fs.py

http://nltk.googlecode.com/
Python | 654 lines | 524 code | 31 blank | 99 comment | 37 complexity | 06d05ba082ad20fe0ad804477cfb3c08 MD5 | raw file
Possible License(s): Apache-2.0, AGPL-1.0
  1. # CIS530 final project
  2. #
  3. # Topic: A Chart Parser with Feature Structures
  4. #
  5. # Author: Huang-Wen Chen <chenh@seas.upenn.edu>
  6. #
  7. """
  8. This module includes the following:
  9. 1. An interface 'FeatureStructureI', and an implementaion utilizing
  10. existing nltk.featurestructure
  11. 2. An extended context free grammer 'CFG_fs' that can store a list
  12. of FeatureStructureI
  13. 3. An extended TreeEdge and LeafEdge that can store FeatureStructureI
  14. 4. An extended Chart that allows edges with same grammar rules
  15. but different feature structures. It also checks for subsumation
  16. of feature structures of edges. It rejects the insertion of
  17. new edge if its feature sturcture is subsumed by edges already
  18. in the chart.
  19. 5. Three modified rules: PredictorRule, ScannerRule, and CompleterRule
  20. 6. The modified EarleyChartParser
  21. 7. demo code
  22. Developing Environment:
  23. python 2.3.5
  24. Numeric 23.8
  25. nltk 1.4.3 win32
  26. Filename & path:
  27. nltk\parser\chart_fs.py
  28. Some examples of output:
  29. |. these . dogs . eat . the . cookie.|
  30. Predictor |> . . . . .| S -> * NP VP//[NP=[AGR=?x1], VP=[AGR=?x1]]
  31. Predictor |> . . . . .| NP -> * Det N//[Det=[AGR=?x3], N=[AGR=?<x2=x3>], NP=[AGR=?x2]]
  32. Predictor |> . . . . .| NP -> * NP PP//[]
  33. Scanner |[-------] . . . .| 'these'.//[Det=[AGR=[Num='pl', Person='3rd']]]
  34. Scanner |[-------] . . . .| Det -> 'these' *//[Det=[AGR=[Num='pl', Person='3rd']]]
  35. Completer |[-------> . . . .| NP -> Det * N//[Det=[AGR=(1)[Num='pl', Person='3rd']], N=[AGR->(1)], NP=[AGR->(1)]]
  36. Scanner |. [-------] . . .| 'dogs'.//[N=[AGR=[Num='pl', Person='3rd']]]
  37. ***Summary***
  38. "this dog eats the cookie": Parsing Succeed. 0.875sec
  39. "this dogs eats the cookie": Parsing Fail. 0.157sec
  40. "these dog eats the cookie": Parsing Fail. 0.156sec
  41. "these dogs eat the cookie": Parsing Succeed. 1.047sec
  42. "these dogs eats the cookie": Parsing Fail. 0.657sec
  43. "I eat the cookie": Parsing Succeed. 1.000sec
  44. "I eats the cookie": Parsing Fail. 0.578sec
  45. Some Comments:
  46. This project consumes much more time than I've planed for the following reasons:
  47. 1. I have to thoroughly study both feature structure and earley parser algorithms.
  48. Especially for feature structure, since the slides are not enough, I have to
  49. search and read more literatures about this topic.
  50. 2. I have to read existing code for both feature structure and earley parser.
  51. Especially for EarleyChartParser, since it uses lots of advanced python techniques,
  52. such as inherence, generator, regular expression and customizations.
  53. 3. I need to design a good interface for feature structure and a concise class hierarchy
  54. for modified EarleyChartParser
  55. 4. The core idea about how to enhance Earley parser to support feature structures takes time
  56. (The only thing I've planed in advance)
  57. 5. Distinguishing 'I eat the cookie' and 'these dogs eat the cookie' is quite complicated.
  58. The current NLTK code should be examine and modified.
  59. """
  60. import re
  61. from nltk.chktype import chktype
  62. from nltk.token import Token
  63. from nltk.parser import AbstractParser
  64. from nltk.cfg import CFG, CFGProduction, Nonterminal, nonterminals
  65. from nltk import cfg
  66. from nltk import featurestructure
  67. from nltk.parser import chart
  68. from nltk.parser.chart import AbstractChartRule
  69. #################################################################
  70. # FeatureStructureI and FeatureStructure
  71. #################################################################
  72. class FeatureStructureI:
  73. """
  74. This interface support the following operations:
  75. 1. FeatureStructureI() : Construct an empty feature sturcture '[]'
  76. 2. FeatureStructureI(['<VP AGR>=<V=AGR>']) : Construct a feature
  77. structure with a set of "unification constraints".
  78. The unification constraints is defined as:
  79. B0 -> B1 ... Bn
  80. {set of constraints}
  81. <Bi feature path>=atomic value
  82. <Bi feature path>=<Bk feature path>
  83. 3. unify() : Unification of two feature structures
  84. 4. issubsumed() : Is feature sturcture A subsumed (included)
  85. by feature structure B ?
  86. """
  87. def __init__(self, consts=None, fs=None, bindings=None):
  88. if self.__class__ == FeatureStructureI:
  89. raise TypeError('FeatureStructureI is an abstract interface')
  90. def unify(self, fs2):
  91. raise AssertionError('FeatureStructureI is an abstract interface')
  92. def issubsumed(self, fs2):
  93. raise AssertionError('FeatureStructureI is an abstract interface')
  94. class FeatureStructure(FeatureStructureI):
  95. """
  96. The implementation of FeatureStructureI utilizing nltk.featurestructure.
  97. It is quite simple, except for the parsing of unification constraints.
  98. """
  99. _next_numbered_id = 1;
  100. def __init__(self, consts=None, fs=None, bindings=None):
  101. if fs == None:
  102. self._fs = featurestructure.FeatureStructure()
  103. if bindings == None:
  104. self._bindings = featurestructure.FeatureBindings()
  105. # empty feature stcutres : '[]'
  106. if consts == None:
  107. return
  108. # Convert unification constraints into string representation
  109. # , then pass the string to FeatureStructure.parse()
  110. for s in consts:
  111. #e.g. <a b c>=<d e f>
  112. r = re.compile('<(.+)>\s*=\s*<(.+)>')
  113. m = r.match(s)
  114. if (m != None):
  115. myvar = '?x' + '%d' % FeatureStructure._next_numbered_id
  116. FeatureStructure._next_numbered_id += 1
  117. str = FeatureStructure._featurepath2str(self, m.group(1), myvar)
  118. fs2 = featurestructure.FeatureStructure.parse(str)
  119. self._fs = self._fs.unify(fs2)
  120. str = FeatureStructure._featurepath2str(self, m.group(2), myvar)
  121. fs2 = featurestructure.FeatureStructure.parse(str)
  122. self._fs = self._fs.unify(fs2, self._bindings)
  123. else:
  124. #e.g. <a b c>=some_value
  125. r = re.compile('<(.+)>\s*=\s*(.+)')
  126. m = r.match(s)
  127. if(m != None):
  128. str = FeatureStructure._featurepath2str(self, m.group(1), m.group(2))
  129. fs2 = featurestructure.FeatureStructure.parse(str)
  130. self._fs = self._fs.unify(fs2, self._bindings)
  131. else:
  132. raise AssertionError("Bad Format:"+s)
  133. def _featurepath2str(self, path, val):
  134. """
  135. Generate '[path1=[path2=val]]'
  136. """
  137. path = path.strip().split()
  138. str = '[' + path.pop() + '=' + val + ']'
  139. while len(path) != 0:
  140. str = '[' + path.pop() + '=' + str + ']'
  141. return str;
  142. def unify(self, fs2):
  143. """
  144. unify two FeatureSturctureI and generate a new one
  145. """
  146. bindings = self._bindings.copy()
  147. fs3 = self._fs.unify(fs2._fs, bindings)
  148. if fs3 == None:
  149. return None
  150. #==A strange bug, I can't use the following line:
  151. #return FeatureStructure(fs=fs3, bindings=bindings)
  152. #==Instead, the following lines work
  153. fs = FeatureStructure(fs=fs3, bindings=self._bindings)
  154. fs._fs = fs3
  155. fs._bindings = bindings
  156. return fs
  157. def issubsumed(self, fs2):
  158. """
  159. Is fs1 subsumed (included) by fs2 ?
  160. """
  161. if repr(fs2._fs) == '[]':
  162. return False
  163. fs3 = self._fs.unify(fs2._fs, self._bindings)
  164. if repr(fs3) == repr(self._fs):
  165. return True
  166. else:
  167. return False
  168. def __repr__(self):
  169. return repr(self._fs)
  170. #################################################################
  171. # CFG_fs
  172. #################################################################
  173. class CFG_fs(CFG):
  174. """
  175. A CFG with feature sturcture.
  176. This class inherents existing nltk.cfg.CFG.
  177. """
  178. def __init__(self, start, productions, fs):
  179. """
  180. Store the list of feature structures
  181. """
  182. assert chktype(3, fs, list)
  183. self._fs = fs;
  184. CFG.__init__(self, start, productions)
  185. def fs(self):
  186. """
  187. Retrieve the list of feature structures
  188. """
  189. return self._fs
  190. ########################################################################
  191. ## Edges (TreeEdge & LeafEdge)
  192. ########################################################################
  193. class TreeEdge(chart.TreeEdge):
  194. """
  195. A TreeEdge with feature structures.
  196. This class inherents nltk.parser.chart.TreeEdge.
  197. """
  198. def __init__(self, span, lhs, rhs, fs=FeatureStructure(), dot=0):
  199. """
  200. Store the of feature structure
  201. """
  202. assert chktype(4, fs, FeatureStructure)
  203. self._fs = fs
  204. chart.TreeEdge.__init__(self, span, lhs, rhs, dot)
  205. # Accessors
  206. def fs(self): return self._fs
  207. # Comparisons & hashing
  208. def __cmp__(self, other):
  209. """
  210. Two edges are consided identical if they have the same feature structure
  211. """
  212. if not isinstance(other, TreeEdge): return -1
  213. return cmp((self._span, self._lhs, self._rhs, self._dot, self._fs),
  214. (other._span, other._lhs, other._rhs, other._dot, other._fs))
  215. def __hash__(self):
  216. """
  217. Two edges are consided identical if they have the same feature structure
  218. """
  219. return hash((self._lhs, self._rhs, self._span, self._dot, self._fs))
  220. # [staticmethod]
  221. def from_production(production, index, fs=FeatureStructure()):
  222. """
  223. Store the of feature structure if given
  224. """
  225. return TreeEdge(span=(index, index), lhs=production.lhs(),
  226. rhs=production.rhs(), fs=fs, dot=0)
  227. from_production = staticmethod(from_production)
  228. # String representation
  229. def __str__(self):
  230. str = '%-2s ->' % (self._lhs.symbol(),)
  231. for i in range(len(self._rhs)):
  232. if i == self._dot: str += ' *'
  233. if isinstance(self._rhs[i], Nonterminal):
  234. str += ' %s' % (self._rhs[i].symbol(),)
  235. else:
  236. str += ' %r' % (self._rhs[i],)
  237. if len(self._rhs) == self._dot: str += ' *'
  238. #Append feature structure, Modified by howard
  239. str += '//' + repr(self._fs)
  240. return str
  241. class LeafEdge(chart.LeafEdge):
  242. """
  243. A LeafEdge with feature structures.
  244. This class inherents nltk.parser.chart.LeafEdge.
  245. """
  246. def __init__(self, leaf, index, fs=FeatureStructure()):
  247. """
  248. Store the of feature structure
  249. """
  250. assert chktype(3, fs, FeatureStructure)
  251. self._fs = fs
  252. chart.LeafEdge.__init__(self, leaf, index)
  253. # Accessors
  254. def fs(self): return self._fs
  255. # Comparisons & hasing
  256. def __cmp__(self, other):
  257. if not isinstance(other, LeafEdge): return -1
  258. return cmp((self._index, self._leaf, self._fs), (other._index, other._leaf, other._fs))
  259. def __hash__(self):
  260. return hash((self._index, self._leaf, self._fs))
  261. # String representations
  262. def __str__(self): return '%r.//' % self._leaf + repr(self._fs) #Append feature structure, Modified by howard
  263. ########################################################################
  264. ## Chart
  265. ########################################################################
  266. class Chart(chart.Chart):
  267. """
  268. A Chart which checks for subsumation when an edge is inserted
  269. This class inherents nltk.parser.chart.Chart.
  270. """
  271. #////////////////////////////////////////////////////////////
  272. # Edge Insertion
  273. #////////////////////////////////////////////////////////////
  274. def insert(self, edge, child_pointer_list):
  275. """
  276. Reject insertion of the new edge if it is subsumed by edges
  277. already in chart
  278. """
  279. # Is it a new edge?
  280. if not self._edge_to_cpls.has_key(edge):
  281. # Is this edge subsumed by edges in chart. Modified by howard
  282. issubsumed = False
  283. for e in self._edges:
  284. if (edge == e) and (edge.fs().issubsumed(e.fs())):
  285. issubsumed = True
  286. break
  287. #DEBUG
  288. if issubsumed:
  289. print '****issubsumed %s' % issubsumed + str(edge) +'/' + str(e)
  290. if not issubsumed:
  291. # Add it to the list of edges.
  292. self._edges.append(edge)
  293. # Register with indexes
  294. for (restr_keys, index) in self._indexes.items():
  295. vals = [getattr(edge, k)() for k in restr_keys]
  296. index = self._indexes[restr_keys]
  297. index.setdefault(tuple(vals),[]).append(edge)
  298. # Get the set of child pointer lists for this edge.
  299. cpls = self._edge_to_cpls.setdefault(edge,{})
  300. child_pointer_list = tuple(child_pointer_list)
  301. if cpls.has_key(child_pointer_list):
  302. # We've already got this CPL; return false.
  303. return False
  304. else:
  305. # It's a new CPL; register it, and return true.
  306. cpls[child_pointer_list] = True
  307. return True
  308. #////////////////////////////////////////////////////////////
  309. # Earley Parsing Rules
  310. #////////////////////////////////////////////////////////////
  311. class FundamentalRule(AbstractChartRule):
  312. """
  313. The FundamentalRule (used by CompleterRule) is modified for:
  314. 1. Check for compatibility of two edges (unification)
  315. 2. Supply the new edge with the feature structure unified from
  316. two joining edges
  317. """
  318. NUM_EDGES=2
  319. def apply_iter(self, chart, grammar, left_edge, right_edge):
  320. # Make sure the rule is applicable.
  321. if not (left_edge.end() == right_edge.start() and
  322. left_edge.next() == right_edge.lhs() and
  323. left_edge.is_incomplete() and right_edge.is_complete()):
  324. return
  325. # Make sure feature structures are compatiable, Modified by howard
  326. fs = left_edge.fs().unify(right_edge.fs())
  327. if (fs == None):
  328. return
  329. # Construct the new edge.
  330. new_edge = TreeEdge(span=(left_edge.start(), right_edge.end()),
  331. lhs=left_edge.lhs(), rhs=left_edge.rhs(),
  332. fs = fs, #Modified by howard
  333. dot=left_edge.dot()+1)
  334. # Add it to the chart, with appropraite child pointers.
  335. changed_chart = False
  336. for cpl1 in chart.child_pointer_lists(left_edge):
  337. if chart.insert(new_edge, cpl1+(right_edge,)):
  338. changed_chart = True
  339. # If we changed the chart, then generate the edge.
  340. if changed_chart: yield new_edge
  341. class CompleterRule(chart.CompleterRule):
  342. """
  343. This CompleterRule extends nltk.parser.chart.CompleterRule.
  344. The only modification is setting the member variable
  345. '_fundamental_rule' to FundeamentalRule() in this module.
  346. See FundamentalRule for the details.
  347. """
  348. NUM_EDGES=1
  349. _fundamental_rule = FundamentalRule()
  350. class ScannerRule(AbstractChartRule):
  351. """
  352. This ScannerRule is modified for:
  353. 1. Supply the newly generated LeafEdge with feature structure
  354. 2. If there are lexcion rules that having the same rule but
  355. different feature structures, insert all of them into chart
  356. """
  357. NUM_EDGES=1
  358. def __init__(self, word_to_pos_lexicon, lexicon_fs):
  359. self._word_to_pos = word_to_pos_lexicon
  360. self._lexicon_fs = lexicon_fs #Modified by howard
  361. def apply_iter(self, chart, gramar, edge):
  362. if edge.is_complete() or edge.end()>=chart.num_leaves(): return
  363. index = edge.end()
  364. leaf = chart.leaf(index)
  365. #Generate all LeafEdges with same leaf (string) but different
  366. #feature structures, Modified by howard
  367. for i in range(len(self._word_to_pos.get(leaf, []))):
  368. #a leaf points to a list of fs and rules
  369. literal = self._word_to_pos.get(leaf, [])[i]
  370. fs = self._lexicon_fs.get(leaf, [FeatureStructure()])[i]
  371. if edge.next() == literal:
  372. new_leaf_edge = LeafEdge(leaf, index, fs) #Modified by howard
  373. if chart.insert(new_leaf_edge, ()):
  374. yield new_leaf_edge
  375. new_pos_edge = TreeEdge((index,index+1), edge.next(),
  376. [leaf], fs, 1) #Modified by howard
  377. if chart.insert(new_pos_edge, (new_leaf_edge,)):
  378. yield new_pos_edge
  379. class PredictorRule(TopDownExpandRule):
  380. """
  381. This PredictorRule is modified for:
  382. 1. Supply the newly generated TreeEdge with feature structure
  383. 2. If there are grammatical rules that having the same rule but
  384. different feature structures, insert all of them into chart
  385. """
  386. NUM_EDGES=1
  387. def apply_iter(self, chart, grammar, edge):
  388. if edge.is_complete(): return
  389. #Retrieve both productions and fs, modified by howard
  390. for i in range(len(grammar.productions())):
  391. prod = grammar.productions()[i]
  392. fs = grammar.fs()[i]
  393. if edge.next() == prod.lhs():
  394. new_edge = TreeEdge.from_production(prod, edge.end(), fs)
  395. if chart.insert(new_edge, ()):
  396. yield new_edge
  397. ########################################################################
  398. ## Simple Earley Chart Parser
  399. ########################################################################
  400. class EarleyChartParser(AbstractParser):
  401. """
  402. This EarleyChartParser is modified for:
  403. 1. Store the feature structure of lexicon
  404. 2. Use PredictorRule, CompleterRule, ScannerRule in this module
  405. My suggestion to nltk.chart.EarleyChartParser:
  406. Rule variables can be promoted as member variables (in 'self'),
  407. so that I can set these variables in a child class.
  408. for example:
  409. predictor -> self._predictor
  410. completer -> self._completer
  411. scanner -> self._scanner
  412. By doing this, the new EarleyChartParser can simply extend
  413. chart.EarleyChartParser and override only __init__ to set
  414. these variables. This can reduce the trouble of overriding
  415. get_parser_list().
  416. """
  417. def __init__(self, grammar, lexicon, lexicon_fs, trace=0, **property_names): #Modified by howard
  418. """
  419. Store the feature structure of lexicon
  420. """
  421. self._grammar = grammar
  422. self._lexicon = lexicon
  423. self._lexicon_fs = lexicon_fs #Modified by howard
  424. self._trace = trace
  425. AbstractParser.__init__(self, **property_names)
  426. def get_parse_list(self, token):
  427. chart = Chart(token, **self.property_names())
  428. grammar = self._grammar
  429. # Width, for printing trace edges.
  430. w = 50/(chart.num_leaves()+1)
  431. if self._trace > 0: print ' ', chart.pp_leaves(w)
  432. # Initialize the chart with a special "starter" edge.
  433. root = Nonterminal('[INIT]')
  434. edge = TreeEdge((0,0), root, (grammar.start(),))
  435. chart.insert(edge, ())
  436. # Create the 3 rules:
  437. predictor = PredictorRule()
  438. completer = CompleterRule()
  439. scanner = ScannerRule(self._lexicon, self._lexicon_fs) #Modified by howard
  440. for end in range(chart.num_leaves()+1):
  441. if self._trace > 1: print 'Processing queue %d' % end
  442. for edge in chart.select(end=end):
  443. if edge.is_incomplete():
  444. for e in predictor.apply(chart, grammar, edge):
  445. if self._trace > 0:
  446. print 'Predictor', chart.pp_edge(e,w)
  447. if edge.is_incomplete():
  448. for e in scanner.apply(chart, grammar, edge):
  449. if self._trace > 0:
  450. print 'Scanner ', chart.pp_edge(e,w)
  451. if edge.is_complete():
  452. for e in completer.apply(chart, grammar, edge):
  453. if self._trace > 0:
  454. print 'Completer', chart.pp_edge(e,w)
  455. # Output a list of complete parses.
  456. return chart.parses(grammar.start())
  457. ########################################################################
  458. ## Demo Code
  459. ########################################################################
  460. def demo():
  461. """
  462. A demonstration of the chart parsers.
  463. """
  464. import sys, time
  465. # Define some nonterminals
  466. S, VP, NP, PP = nonterminals('S, VP, NP, PP')
  467. V, N, P, Name, Det = nonterminals('V, N, P, Name, Det')
  468. # Define some grammatical productions.
  469. grammatical_productions = [
  470. CFGProduction(S, [NP, VP]), #1
  471. CFGProduction(PP, [P, NP]), #2
  472. CFGProduction(NP, [Det, N]), #3
  473. CFGProduction(NP, [NP, PP]), #4
  474. CFGProduction(VP, [VP, PP]), #5
  475. CFGProduction(VP, [V, NP]), #6
  476. CFGProduction(VP, [V]) #7
  477. ]
  478. # Define feature structures for productions
  479. grammatical_fs = [
  480. FeatureStructure(['<NP AGR>=<VP AGR>']), #1
  481. FeatureStructure(), #2
  482. FeatureStructure(['<NP AGR>=<N AGR>',
  483. '<Det AGR>=<N AGR>']), #3
  484. FeatureStructure(), #4
  485. FeatureStructure(), #5
  486. FeatureStructure(['<VP AGR>=<V AGR>']), #6
  487. FeatureStructure(['<VP AGR>=<V AGR>']) #7
  488. ]
  489. # Define some lexical productions.
  490. lexical_productions = [
  491. CFGProduction(Det, ['this']), #1
  492. CFGProduction(Det, ['these']), #2
  493. CFGProduction(N, ['dog']), #3
  494. CFGProduction(N, ['dogs']), #4
  495. CFGProduction(NP, ['I']), #5
  496. CFGProduction(V, ['eat']), #6
  497. CFGProduction(V, ['eat']), #7
  498. CFGProduction(V, ['eats']), #8
  499. CFGProduction(Det, ['the']), #9
  500. CFGProduction(N, ['cookie']), #10
  501. ]
  502. lexical_fs = [
  503. FeatureStructure(['<Det AGR Num>=sg', '<Det AGR Person>=3rd']), #1
  504. FeatureStructure(['<Det AGR Num>=pl', '<Det AGR Person>=3rd']), #2
  505. FeatureStructure(['<N AGR Num>=sg', '<N AGR Person>=3rd']), #3
  506. FeatureStructure(['<N AGR Num>=pl', '<N AGR Person>=3rd']), #4
  507. FeatureStructure(['<NP AGR Num>=sg', '<NP AGR Person>=1st']), #5
  508. FeatureStructure(['<V AGR Num>=sg', '<V AGR Person>=1st']), #6
  509. FeatureStructure(['<V AGR Num>=pl', '<V AGR Person>=3rd']), #7
  510. FeatureStructure(['<V AGR Num>=sg', '<V AGR Person>=3rd']), #8
  511. FeatureStructure(), #9
  512. FeatureStructure(), #10
  513. ]
  514. # Convert the grammar productions to an earley-style lexicon.
  515. earley_lexicon = {}
  516. earley_lexicon_fs = {}
  517. for i in range(len(lexical_productions)):
  518. prod = lexical_productions[i]
  519. fs = lexical_fs[i]
  520. earley_lexicon.setdefault(prod.rhs()[0], []).append(prod.lhs())
  521. earley_lexicon_fs.setdefault(prod.rhs()[0], []).append(fs)
  522. # The grammar for EarleyChartParser:
  523. earley_grammar = CFG_fs(S, grammatical_productions, grammatical_fs)
  524. # Tokenize a sample sentence.
  525. test_strings = [
  526. #Det<->Noun Agreement
  527. 'this dog eats the cookie',
  528. 'this dogs eats the cookie',
  529. 'these dog eats the cookie',
  530. #Subject<->Verb Agreement
  531. 'these dogs eat the cookie',
  532. 'these dogs eats the cookie',
  533. #Subject<->Verb Agreement, eat could be either [1st, sg] or [3rd, pl]
  534. 'I eat the cookie',
  535. 'I eats the cookie',
  536. ]
  537. times = {}
  538. result = {}
  539. for txt in test_strings:
  540. sent = Token(TEXT=txt)
  541. print "Sentence:\n", sent
  542. from nltk.tokenizer import WhitespaceTokenizer
  543. WhitespaceTokenizer(SUBTOKENS='WORDS').tokenize(sent)
  544. # Keep track of how long each parser takes.
  545. cp = EarleyChartParser(earley_grammar, earley_lexicon, earley_lexicon_fs,
  546. LEAF='TEXT', SUBTOKENS='WORDS', trace=1)
  547. t = time.time()
  548. parses = cp.get_parse_list(sent)
  549. times[txt] = time.time()-t
  550. result[txt] = len(parses)
  551. print '----------------------------------------------------------------------------------'
  552. # Print the times of all parsers:
  553. maxlen = max([len(key) for key in times.keys()])
  554. format = '%' + `maxlen+2` + 's: %15s. %6.3fsec'
  555. print "***Summary***"
  556. for txt in test_strings:
  557. if(result[txt]>0):
  558. print format % ('"'+txt+'"', 'Parsing Succeed', times[txt])
  559. else:
  560. print format % ('"'+txt+'"', 'Parsing Fail', times[txt])
  561. if __name__ == '__main__': demo()