PageRenderTime 52ms CodeModel.GetById 18ms RepoModel.GetById 0ms app.codeStats 0ms

/projects/antlr-3.4/runtime/ObjC/Framework/ANTLRBaseRecognizer.m

https://gitlab.com/essere.lab.public/qualitas.class-corpus
Objective C | 1129 lines | 641 code | 76 blank | 412 comment | 140 complexity | ba76a637ad9324e64e7b7ba9d1cd973c MD5 | raw file
  1. //
  2. // ANTLRBaseRecognizer.m
  3. // ANTLR
  4. //
  5. // Created by Alan Condit on 6/16/10.
  6. // [The "BSD licence"]
  7. // Copyright (c) 2010 Alan Condit
  8. // All rights reserved.
  9. //
  10. // Redistribution and use in source and binary forms, with or without
  11. // modification, are permitted provided that the following conditions
  12. // are met:
  13. // 1. Redistributions of source code must retain the above copyright
  14. // notice, this list of conditions and the following disclaimer.
  15. // 2. Redistributions in binary form must reproduce the above copyright
  16. // notice, this list of conditions and the following disclaimer in the
  17. // documentation and/or other materials provided with the distribution.
  18. // 3. The name of the author may not be used to endorse or promote products
  19. // derived from this software without specific prior written permission.
  20. //
  21. // THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  22. // IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  23. // OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  24. // IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  25. // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  26. // NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  27. // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  28. // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  29. // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  30. // THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  31. #import "ANTLRBaseRecognizer.h"
  32. #import "ANTLRHashRule.h"
  33. #import "ANTLRRuleMemo.h"
  34. #import "ANTLRCommonToken.h"
  35. #import "ANTLRMap.h"
  36. extern NSInteger debug;
  37. @implementation ANTLRBaseRecognizer
  38. static AMutableArray *_tokenNames;
  39. static NSString *_grammarFileName;
  40. static NSString *NEXT_TOKEN_RULE_NAME;
  41. @synthesize state;
  42. @synthesize grammarFileName;
  43. //@synthesize failed;
  44. @synthesize sourceName;
  45. //@synthesize numberOfSyntaxErrors;
  46. @synthesize tokenNames;
  47. + (void) initialize
  48. {
  49. NEXT_TOKEN_RULE_NAME = [NSString stringWithString:@"nextToken"];
  50. [NEXT_TOKEN_RULE_NAME retain];
  51. }
  52. + (ANTLRBaseRecognizer *) newANTLRBaseRecognizer
  53. {
  54. return [[ANTLRBaseRecognizer alloc] init];
  55. }
  56. + (ANTLRBaseRecognizer *) newANTLRBaseRecognizerWithRuleLen:(NSInteger)aLen
  57. {
  58. return [[ANTLRBaseRecognizer alloc] initWithLen:aLen];
  59. }
  60. + (ANTLRBaseRecognizer *) newANTLRBaseRecognizer:(ANTLRRecognizerSharedState *)aState
  61. {
  62. return [[ANTLRBaseRecognizer alloc] initWithState:aState];
  63. }
  64. + (AMutableArray *)getTokenNames
  65. {
  66. return _tokenNames;
  67. }
  68. + (void)setTokenNames:(AMutableArray *)theTokNams
  69. {
  70. if ( _tokenNames != theTokNams ) {
  71. if ( _tokenNames ) [_tokenNames release];
  72. [theTokNams retain];
  73. }
  74. _tokenNames = theTokNams;
  75. }
  76. + (void)setGrammarFileName:(NSString *)aFileName
  77. {
  78. if ( _grammarFileName != aFileName ) {
  79. if ( _grammarFileName ) [_grammarFileName release];
  80. [aFileName retain];
  81. }
  82. [_grammarFileName retain];
  83. }
  84. - (id) init
  85. {
  86. if ((self = [super init]) != nil) {
  87. if (state == nil) {
  88. state = [[ANTLRRecognizerSharedState newANTLRRecognizerSharedState] retain];
  89. }
  90. tokenNames = _tokenNames;
  91. if ( tokenNames ) [tokenNames retain];
  92. grammarFileName = _grammarFileName;
  93. if ( grammarFileName ) [grammarFileName retain];
  94. state._fsp = -1;
  95. state.errorRecovery = NO; // are we recovering?
  96. state.lastErrorIndex = -1;
  97. state.failed = NO; // indicate that some match failed
  98. state.syntaxErrors = 0;
  99. state.backtracking = 0; // the level of backtracking
  100. state.tokenStartCharIndex = -1;
  101. }
  102. return self;
  103. }
  104. - (id) initWithLen:(NSInteger)aLen
  105. {
  106. if ((self = [super init]) != nil) {
  107. if (state == nil) {
  108. state = [[ANTLRRecognizerSharedState newANTLRRecognizerSharedStateWithRuleLen:aLen] retain];
  109. }
  110. tokenNames = _tokenNames;
  111. if ( tokenNames ) [tokenNames retain];
  112. grammarFileName = _grammarFileName;
  113. if ( grammarFileName ) [grammarFileName retain];
  114. state._fsp = -1;
  115. state.errorRecovery = NO; // are we recovering?
  116. state.lastErrorIndex = -1;
  117. state.failed = NO; // indicate that some match failed
  118. state.syntaxErrors = 0;
  119. state.backtracking = 0; // the level of backtracking
  120. state.tokenStartCharIndex = -1;
  121. }
  122. return self;
  123. }
  124. - (id) initWithState:(ANTLRRecognizerSharedState *)aState
  125. {
  126. if ((self = [super init]) != nil) {
  127. state = aState;
  128. if (state == nil) {
  129. state = [ANTLRRecognizerSharedState newANTLRRecognizerSharedState];
  130. }
  131. [state retain];
  132. tokenNames = _tokenNames;
  133. if ( tokenNames ) [tokenNames retain];
  134. grammarFileName = _grammarFileName;
  135. if ( grammarFileName ) [grammarFileName retain];
  136. state._fsp = -1;
  137. state.errorRecovery = NO; // are we recovering?
  138. state.lastErrorIndex = -1;
  139. state.failed = NO; // indicate that some match failed
  140. state.syntaxErrors = 0;
  141. state.backtracking = 0; // the level of backtracking
  142. state.tokenStartCharIndex = -1;
  143. }
  144. return self;
  145. }
  146. - (void)dealloc
  147. {
  148. #ifdef DEBUG_DEALLOC
  149. NSLog( @"called dealloc in ANTLRBaseRecognizer" );
  150. #endif
  151. if ( grammarFileName ) [grammarFileName release];
  152. if ( tokenNames ) [tokenNames release];
  153. if ( state ) [state release];
  154. [super dealloc];
  155. }
  156. // reset the recognizer to the initial state. does not touch the token source!
  157. // this can be extended by the grammar writer to reset custom ivars
  158. - (void) reset
  159. {
  160. if ( state == nil )
  161. return;
  162. if ( state.following != nil ) {
  163. if ( [state.following count] )
  164. [state.following removeAllObjects];
  165. }
  166. state._fsp = -1;
  167. state.errorRecovery = NO; // are we recovering?
  168. state.lastErrorIndex = -1;
  169. state.failed = NO; // indicate that some match failed
  170. state.syntaxErrors = 0;
  171. state.backtracking = 0; // the level of backtracking
  172. state.tokenStartCharIndex = -1;
  173. if ( state.ruleMemo != nil ) {
  174. if ( [state.ruleMemo count] )
  175. [state.ruleMemo removeAllObjects];
  176. }
  177. }
  178. - (BOOL) getFailed
  179. {
  180. return [state getFailed];
  181. }
  182. - (void) setFailed:(BOOL)flag
  183. {
  184. [state setFailed:flag];
  185. }
  186. - (ANTLRRecognizerSharedState *) getState
  187. {
  188. return state;
  189. }
  190. - (void) setState:(ANTLRRecognizerSharedState *) theState
  191. {
  192. if (state != theState) {
  193. if ( state ) [state release];
  194. state = theState;
  195. [state retain];
  196. }
  197. }
  198. - (id)input
  199. {
  200. return nil; // Must be overriden in inheriting class
  201. }
  202. - (void)skip // override in inheriting class
  203. {
  204. return;
  205. }
  206. -(id) match:(id<ANTLRIntStream>)anInput TokenType:(NSInteger)ttype Follow:(ANTLRBitSet *)follow
  207. {
  208. id matchedSymbol = [self getCurrentInputSymbol:anInput];
  209. if ([anInput LA:1] == ttype) {
  210. [anInput consume];
  211. state.errorRecovery = NO;
  212. state.failed = NO;
  213. return matchedSymbol;
  214. }
  215. if (state.backtracking > 0) {
  216. state.failed = YES;
  217. return matchedSymbol;
  218. }
  219. matchedSymbol = [self recoverFromMismatchedToken:anInput TokenType:ttype Follow:follow];
  220. return matchedSymbol;
  221. }
  222. -(void) matchAny:(id<ANTLRIntStream>)anInput
  223. {
  224. state.errorRecovery = NO;
  225. state.failed = NO;
  226. [anInput consume];
  227. }
  228. -(BOOL) mismatchIsUnwantedToken:(id<ANTLRIntStream>)anInput TokenType:(NSInteger)ttype
  229. {
  230. return [anInput LA:2] == ttype;
  231. }
  232. -(BOOL) mismatchIsMissingToken:(id<ANTLRIntStream>)anInput Follow:(ANTLRBitSet *) follow
  233. {
  234. if ( follow == nil ) {
  235. // we have no information about the follow; we can only consume
  236. // a single token and hope for the best
  237. return NO;
  238. }
  239. // compute what can follow this grammar element reference
  240. if ( [follow member:ANTLRTokenTypeEOR] ) {
  241. ANTLRBitSet *viableTokensFollowingThisRule = [self computeContextSensitiveRuleFOLLOW];
  242. follow = [follow or:viableTokensFollowingThisRule];
  243. if ( state._fsp >= 0 ) { // remove EOR if we're not the start symbol
  244. [follow remove:(ANTLRTokenTypeEOR)];
  245. }
  246. }
  247. // if current token is consistent with what could come after set
  248. // then we know we're missing a token; error recovery is free to
  249. // "insert" the missing token
  250. //System.out.println("viable tokens="+follow.toString(getTokenNames()));
  251. //System.out.println("LT(1)="+((TokenStream)input).LT(1));
  252. // BitSet cannot handle negative numbers like -1 (EOF) so I leave EOR
  253. // in follow set to indicate that the fall of the start symbol is
  254. // in the set (EOF can follow).
  255. if ( [follow member:[anInput LA:1]] || [follow member:ANTLRTokenTypeEOR] ) {
  256. //System.out.println("LT(1)=="+((TokenStream)input).LT(1)+" is consistent with what follows; inserting...");
  257. return YES;
  258. }
  259. return NO;
  260. }
  261. /** Report a recognition problem.
  262. *
  263. * This method sets errorRecovery to indicate the parser is recovering
  264. * not parsing. Once in recovery mode, no errors are generated.
  265. * To get out of recovery mode, the parser must successfully match
  266. * a token (after a resync). So it will go:
  267. *
  268. * 1. error occurs
  269. * 2. enter recovery mode, report error
  270. * 3. consume until token found in resynch set
  271. * 4. try to resume parsing
  272. * 5. next match() will reset errorRecovery mode
  273. *
  274. * If you override, make sure to update syntaxErrors if you care about that.
  275. */
  276. -(void) reportError:(ANTLRRecognitionException *) e
  277. {
  278. // if we've already reported an error and have not matched a token
  279. // yet successfully, don't report any errors.
  280. if ( state.errorRecovery ) {
  281. //System.err.print("[SPURIOUS] ");
  282. return;
  283. }
  284. state.syntaxErrors++; // don't count spurious
  285. state.errorRecovery = YES;
  286. [self displayRecognitionError:[self getTokenNames] Exception:e];
  287. }
  288. -(void) displayRecognitionError:(AMutableArray *)theTokNams Exception:(ANTLRRecognitionException *)e
  289. {
  290. NSString *hdr = [self getErrorHeader:e];
  291. NSString *msg = [self getErrorMessage:e TokenNames:theTokNams];
  292. [self emitErrorMessage:[NSString stringWithFormat:@" %@ %@", hdr, msg]];
  293. }
  294. /** What error message should be generated for the various
  295. * exception types?
  296. *
  297. * Not very object-oriented code, but I like having all error message
  298. * generation within one method rather than spread among all of the
  299. * exception classes. This also makes it much easier for the exception
  300. * handling because the exception classes do not have to have pointers back
  301. * to this object to access utility routines and so on. Also, changing
  302. * the message for an exception type would be difficult because you
  303. * would have to subclassing exception, but then somehow get ANTLR
  304. * to make those kinds of exception objects instead of the default.
  305. * This looks weird, but trust me--it makes the most sense in terms
  306. * of flexibility.
  307. *
  308. * For grammar debugging, you will want to override this to add
  309. * more information such as the stack frame with
  310. * getRuleInvocationStack(e, this.getClass().getName()) and,
  311. * for no viable alts, the decision description and state etc...
  312. *
  313. * Override this to change the message generated for one or more
  314. * exception types.
  315. */
  316. - (NSString *)getErrorMessage:(ANTLRRecognitionException *)e TokenNames:(AMutableArray *)theTokNams
  317. {
  318. // NSString *msg = [e getMessage];
  319. NSString *msg;
  320. if ( [e isKindOfClass:[ANTLRUnwantedTokenException class]] ) {
  321. ANTLRUnwantedTokenException *ute = (ANTLRUnwantedTokenException *)e;
  322. NSString *tokenName=@"<unknown>";
  323. if ( ute.expecting == ANTLRTokenTypeEOF ) {
  324. tokenName = @"EOF";
  325. }
  326. else {
  327. tokenName = (NSString *)[theTokNams objectAtIndex:ute.expecting];
  328. }
  329. msg = [NSString stringWithFormat:@"extraneous input %@ expecting %@", [self getTokenErrorDisplay:[ute getUnexpectedToken]],
  330. tokenName];
  331. }
  332. else if ( [e isKindOfClass:[ANTLRMissingTokenException class] ] ) {
  333. ANTLRMissingTokenException *mte = (ANTLRMissingTokenException *)e;
  334. NSString *tokenName=@"<unknown>";
  335. if ( mte.expecting== ANTLRTokenTypeEOF ) {
  336. tokenName = @"EOF";
  337. }
  338. else {
  339. tokenName = [theTokNams objectAtIndex:mte.expecting];
  340. }
  341. msg = [NSString stringWithFormat:@"missing %@ at %@", tokenName, [self getTokenErrorDisplay:(e.token)] ];
  342. }
  343. else if ( [e isKindOfClass:[ANTLRMismatchedTokenException class]] ) {
  344. ANTLRMismatchedTokenException *mte = (ANTLRMismatchedTokenException *)e;
  345. NSString *tokenName=@"<unknown>";
  346. if ( mte.expecting== ANTLRTokenTypeEOF ) {
  347. tokenName = @"EOF";
  348. }
  349. else {
  350. tokenName = [theTokNams objectAtIndex:mte.expecting];
  351. }
  352. msg = [NSString stringWithFormat:@"mismatched input %@ expecting %@",[self getTokenErrorDisplay:(e.token)], tokenName];
  353. }
  354. else if ( [e isKindOfClass:[ANTLRMismatchedTreeNodeException class]] ) {
  355. ANTLRMismatchedTreeNodeException *mtne = (ANTLRMismatchedTreeNodeException *)e;
  356. NSString *tokenName=@"<unknown>";
  357. if ( mtne.expecting==ANTLRTokenTypeEOF ) {
  358. tokenName = @"EOF";
  359. }
  360. else {
  361. tokenName = [theTokNams objectAtIndex:mtne.expecting];
  362. }
  363. msg = [NSString stringWithFormat:@"mismatched tree node: %@ expecting %@", mtne.node, tokenName];
  364. }
  365. else if ( [e isKindOfClass:[ANTLRNoViableAltException class]] ) {
  366. //NoViableAltException *nvae = (NoViableAltException *)e;
  367. // for development, can add "decision=<<"+nvae.grammarDecisionDescription+">>"
  368. // and "(decision="+nvae.decisionNumber+") and
  369. // "state "+nvae.stateNumber
  370. msg = [NSString stringWithFormat:@"no viable alternative at input %@", [self getTokenErrorDisplay:e.token]];
  371. }
  372. else if ( [e isKindOfClass:[ANTLREarlyExitException class]] ) {
  373. //ANTLREarlyExitException *eee = (ANTLREarlyExitException *)e;
  374. // for development, can add "(decision="+eee.decisionNumber+")"
  375. msg =[NSString stringWithFormat: @"required (...)+ loop did not match anything at input ", [self getTokenErrorDisplay:e.token]];
  376. }
  377. else if ( [e isKindOfClass:[ANTLRMismatchedSetException class]] ) {
  378. ANTLRMismatchedSetException *mse = (ANTLRMismatchedSetException *)e;
  379. msg = [NSString stringWithFormat:@"mismatched input %@ expecting set %@",
  380. [self getTokenErrorDisplay:(e.token)],
  381. mse.expecting];
  382. }
  383. #pragma warning NotSet not yet implemented.
  384. else if ( [e isKindOfClass:[ANTLRMismatchedNotSetException class] ] ) {
  385. ANTLRMismatchedNotSetException *mse = (ANTLRMismatchedNotSetException *)e;
  386. msg = [NSString stringWithFormat:@"mismatched input %@ expecting set %@",
  387. [self getTokenErrorDisplay:(e.token)],
  388. mse.expecting];
  389. }
  390. else if ( [e isKindOfClass:[ANTLRFailedPredicateException class]] ) {
  391. ANTLRFailedPredicateException *fpe = (ANTLRFailedPredicateException *)e;
  392. msg = [NSString stringWithFormat:@"rule %@ failed predicate: { %@ }?", fpe.ruleName, fpe.predicate];
  393. }
  394. else {
  395. msg = [NSString stringWithFormat:@"Exception= %@\n", e.name];
  396. }
  397. return msg;
  398. }
  399. /** Get number of recognition errors (lexer, parser, tree parser). Each
  400. * recognizer tracks its own number. So parser and lexer each have
  401. * separate count. Does not count the spurious errors found between
  402. * an error and next valid token match
  403. *
  404. * See also reportError()
  405. */
  406. - (NSInteger) getNumberOfSyntaxErrors
  407. {
  408. return state.syntaxErrors;
  409. }
  410. /** What is the error header, normally line/character position information? */
  411. - (NSString *)getErrorHeader:(ANTLRRecognitionException *)e
  412. {
  413. return [NSString stringWithFormat:@"line %d:%d", e.line, e.charPositionInLine];
  414. }
  415. /** How should a token be displayed in an error message? The default
  416. * is to display just the text, but during development you might
  417. * want to have a lot of information spit out. Override in that case
  418. * to use t.toString() (which, for CommonToken, dumps everything about
  419. * the token). This is better than forcing you to override a method in
  420. * your token objects because you don't have to go modify your lexer
  421. * so that it creates a new Java type.
  422. */
  423. - (NSString *)getTokenErrorDisplay:(id<ANTLRToken>)t
  424. {
  425. NSString *s = t.text;
  426. if ( s == nil ) {
  427. if ( t.type == ANTLRTokenTypeEOF ) {
  428. s = @"<EOF>";
  429. }
  430. else {
  431. s = [NSString stringWithFormat:@"<%@>", t.type];
  432. }
  433. }
  434. s = [s stringByReplacingOccurrencesOfString:@"\n" withString:@"\\\\n"];
  435. s = [s stringByReplacingOccurrencesOfString:@"\r" withString:@"\\\\r"];
  436. s = [s stringByReplacingOccurrencesOfString:@"\t" withString:@"\\\\t"];
  437. return [NSString stringWithFormat:@"\'%@\'", s];
  438. }
  439. /** Override this method to change where error messages go */
  440. - (void) emitErrorMessage:(NSString *) msg
  441. {
  442. // System.err.println(msg);
  443. NSLog(@"%@", msg);
  444. }
  445. /** Recover from an error found on the input stream. This is
  446. * for NoViableAlt and mismatched symbol exceptions. If you enable
  447. * single token insertion and deletion, this will usually not
  448. * handle mismatched symbol exceptions but there could be a mismatched
  449. * token that the match() routine could not recover from.
  450. */
  451. - (void)recover:(id<ANTLRIntStream>)anInput Exception:(ANTLRRecognitionException *)re
  452. {
  453. if ( state.lastErrorIndex == anInput.index ) {
  454. // uh oh, another error at same token index; must be a case
  455. // where LT(1) is in the recovery token set so nothing is
  456. // consumed; consume a single token so at least to prevent
  457. // an infinite loop; this is a failsafe.
  458. [anInput consume];
  459. }
  460. state.lastErrorIndex = anInput.index;
  461. ANTLRBitSet *followSet = [self computeErrorRecoverySet];
  462. [self beginResync];
  463. [self consumeUntilFollow:anInput Follow:followSet];
  464. [self endResync];
  465. }
  466. - (void) beginResync
  467. {
  468. }
  469. - (void) endResync
  470. {
  471. }
  472. /* Compute the error recovery set for the current rule. During
  473. * rule invocation, the parser pushes the set of tokens that can
  474. * follow that rule reference on the stack; this amounts to
  475. * computing FIRST of what follows the rule reference in the
  476. * enclosing rule. This local follow set only includes tokens
  477. * from within the rule; i.e., the FIRST computation done by
  478. * ANTLR stops at the end of a rule.
  479. *
  480. * EXAMPLE
  481. *
  482. * When you find a "no viable alt exception", the input is not
  483. * consistent with any of the alternatives for rule r. The best
  484. * thing to do is to consume tokens until you see something that
  485. * can legally follow a call to r *or* any rule that called r.
  486. * You don't want the exact set of viable next tokens because the
  487. * input might just be missing a token--you might consume the
  488. * rest of the input looking for one of the missing tokens.
  489. *
  490. * Consider grammar:
  491. *
  492. * a : '[' b ']'
  493. * | '(' b ')'
  494. * ;
  495. * b : c '^' INT ;
  496. * c : ID
  497. * | INT
  498. * ;
  499. *
  500. * At each rule invocation, the set of tokens that could follow
  501. * that rule is pushed on a stack. Here are the various "local"
  502. * follow sets:
  503. *
  504. * FOLLOW(b1_in_a) = FIRST(']') = ']'
  505. * FOLLOW(b2_in_a) = FIRST(')') = ')'
  506. * FOLLOW(c_in_b) = FIRST('^') = '^'
  507. *
  508. * Upon erroneous input "[]", the call chain is
  509. *
  510. * a -> b -> c
  511. *
  512. * and, hence, the follow context stack is:
  513. *
  514. * depth local follow set after call to rule
  515. * 0 <EOF> a (from main())
  516. * 1 ']' b
  517. * 3 '^' c
  518. *
  519. * Notice that ')' is not included, because b would have to have
  520. * been called from a different context in rule a for ')' to be
  521. * included.
  522. *
  523. * For error recovery, we cannot consider FOLLOW(c)
  524. * (context-sensitive or otherwise). We need the combined set of
  525. * all context-sensitive FOLLOW sets--the set of all tokens that
  526. * could follow any reference in the call chain. We need to
  527. * resync to one of those tokens. Note that FOLLOW(c)='^' and if
  528. * we resync'd to that token, we'd consume until EOF. We need to
  529. * sync to context-sensitive FOLLOWs for a, b, and c: {']','^'}.
  530. * In this case, for input "[]", LA(1) is in this set so we would
  531. * not consume anything and after printing an error rule c would
  532. * return normally. It would not find the required '^' though.
  533. * At this point, it gets a mismatched token error and throws an
  534. * exception (since LA(1) is not in the viable following token
  535. * set). The rule exception handler tries to recover, but finds
  536. * the same recovery set and doesn't consume anything. Rule b
  537. * exits normally returning to rule a. Now it finds the ']' (and
  538. * with the successful match exits errorRecovery mode).
  539. *
  540. * So, you cna see that the parser walks up call chain looking
  541. * for the token that was a member of the recovery set.
  542. *
  543. * Errors are not generated in errorRecovery mode.
  544. *
  545. * ANTLR's error recovery mechanism is based upon original ideas:
  546. *
  547. * "Algorithms + Data Structures = Programs" by Niklaus Wirth
  548. *
  549. * and
  550. *
  551. * "A note on error recovery in recursive descent parsers":
  552. * http://portal.acm.org/citation.cfm?id=947902.947905
  553. *
  554. * Later, Josef Grosch had some good ideas:
  555. *
  556. * "Efficient and Comfortable Error Recovery in Recursive Descent
  557. * Parsers":
  558. * ftp://www.cocolab.com/products/cocktail/doca4.ps/ell.ps.zip
  559. *
  560. * Like Grosch I implemented local FOLLOW sets that are combined
  561. * at run-time upon error to avoid overhead during parsing.
  562. */
  563. - (ANTLRBitSet *) computeErrorRecoverySet
  564. {
  565. return [self combineFollows:NO];
  566. }
  567. /** Compute the context-sensitive FOLLOW set for current rule.
  568. * This is set of token types that can follow a specific rule
  569. * reference given a specific call chain. You get the set of
  570. * viable tokens that can possibly come next (lookahead depth 1)
  571. * given the current call chain. Contrast this with the
  572. * definition of plain FOLLOW for rule r:
  573. *
  574. * FOLLOW(r)={x | S=>*alpha r beta in G and x in FIRST(beta)}
  575. *
  576. * where x in T* and alpha, beta in V*; T is set of terminals and
  577. * V is the set of terminals and nonterminals. In other words,
  578. * FOLLOW(r) is the set of all tokens that can possibly follow
  579. * references to r in *any* sentential form (context). At
  580. * runtime, however, we know precisely which context applies as
  581. * we have the call chain. We may compute the exact (rather
  582. * than covering superset) set of following tokens.
  583. *
  584. * For example, consider grammar:
  585. *
  586. * stat : ID '=' expr ';' // FOLLOW(stat)=={EOF}
  587. * | "return" expr '.'
  588. * ;
  589. * expr : atom ('+' atom)* ; // FOLLOW(expr)=={';','.',')'}
  590. * atom : INT // FOLLOW(atom)=={'+',')',';','.'}
  591. * | '(' expr ')'
  592. * ;
  593. *
  594. * The FOLLOW sets are all inclusive whereas context-sensitive
  595. * FOLLOW sets are precisely what could follow a rule reference.
  596. * For input input "i=(3);", here is the derivation:
  597. *
  598. * stat => ID '=' expr ';'
  599. * => ID '=' atom ('+' atom)* ';'
  600. * => ID '=' '(' expr ')' ('+' atom)* ';'
  601. * => ID '=' '(' atom ')' ('+' atom)* ';'
  602. * => ID '=' '(' INT ')' ('+' atom)* ';'
  603. * => ID '=' '(' INT ')' ';'
  604. *
  605. * At the "3" token, you'd have a call chain of
  606. *
  607. * stat -> expr -> atom -> expr -> atom
  608. *
  609. * What can follow that specific nested ref to atom? Exactly ')'
  610. * as you can see by looking at the derivation of this specific
  611. * input. Contrast this with the FOLLOW(atom)={'+',')',';','.'}.
  612. *
  613. * You want the exact viable token set when recovering from a
  614. * token mismatch. Upon token mismatch, if LA(1) is member of
  615. * the viable next token set, then you know there is most likely
  616. * a missing token in the input stream. "Insert" one by just not
  617. * throwing an exception.
  618. */
  619. - (ANTLRBitSet *)computeContextSensitiveRuleFOLLOW
  620. {
  621. return [self combineFollows:YES];
  622. }
  623. // what is exact? it seems to only add sets from above on stack
  624. // if EOR is in set i. When it sees a set w/o EOR, it stops adding.
  625. // Why would we ever want them all? Maybe no viable alt instead of
  626. // mismatched token?
  627. - (ANTLRBitSet *)combineFollows:(BOOL) exact
  628. {
  629. NSInteger top = state._fsp;
  630. ANTLRBitSet *followSet = [[ANTLRBitSet newANTLRBitSet] retain];
  631. for (int i = top; i >= 0; i--) {
  632. ANTLRBitSet *localFollowSet = (ANTLRBitSet *)[state.following objectAtIndex:i];
  633. /*
  634. System.out.println("local follow depth "+i+"="+
  635. localFollowSet.toString(getTokenNames())+")");
  636. */
  637. [followSet orInPlace:localFollowSet];
  638. if ( exact ) {
  639. // can we see end of rule?
  640. if ( [localFollowSet member:ANTLRTokenTypeEOR] ) {
  641. // Only leave EOR in set if at top (start rule); this lets
  642. // us know if have to include follow(start rule); i.e., EOF
  643. if ( i > 0 ) {
  644. [followSet remove:ANTLRTokenTypeEOR];
  645. }
  646. }
  647. else { // can't see end of rule, quit
  648. break;
  649. }
  650. }
  651. }
  652. return followSet;
  653. }
  654. /** Attempt to recover from a single missing or extra token.
  655. *
  656. * EXTRA TOKEN
  657. *
  658. * LA(1) is not what we are looking for. If LA(2) has the right token,
  659. * however, then assume LA(1) is some extra spurious token. Delete it
  660. * and LA(2) as if we were doing a normal match(), which advances the
  661. * input.
  662. *
  663. * MISSING TOKEN
  664. *
  665. * If current token is consistent with what could come after
  666. * ttype then it is ok to "insert" the missing token, else throw
  667. * exception For example, Input "i=(3;" is clearly missing the
  668. * ')'. When the parser returns from the nested call to expr, it
  669. * will have call chain:
  670. *
  671. * stat -> expr -> atom
  672. *
  673. * and it will be trying to match the ')' at this point in the
  674. * derivation:
  675. *
  676. * => ID '=' '(' INT ')' ('+' atom)* ';'
  677. * ^
  678. * match() will see that ';' doesn't match ')' and report a
  679. * mismatched token error. To recover, it sees that LA(1)==';'
  680. * is in the set of tokens that can follow the ')' token
  681. * reference in rule atom. It can assume that you forgot the ')'.
  682. */
  683. - (id<ANTLRToken>)recoverFromMismatchedToken:(id<ANTLRIntStream>)anInput
  684. TokenType:(NSInteger)ttype
  685. Follow:(ANTLRBitSet *)follow
  686. {
  687. ANTLRRecognitionException *e = nil;
  688. // if next token is what we are looking for then "delete" this token
  689. if ( [self mismatchIsUnwantedToken:anInput TokenType:ttype] ) {
  690. e = [ANTLRUnwantedTokenException newException:ttype Stream:anInput];
  691. /*
  692. System.err.println("recoverFromMismatchedToken deleting "+
  693. ((TokenStream)input).LT(1)+
  694. " since "+((TokenStream)input).LT(2)+" is what we want");
  695. */
  696. [self beginResync];
  697. [anInput consume]; // simply delete extra token
  698. [self endResync];
  699. [self reportError:e]; // report after consuming so AW sees the token in the exception
  700. // we want to return the token we're actually matching
  701. id matchedSymbol = [self getCurrentInputSymbol:anInput];
  702. [anInput consume]; // move past ttype token as if all were ok
  703. return matchedSymbol;
  704. }
  705. // can't recover with single token deletion, try insertion
  706. if ( [self mismatchIsMissingToken:anInput Follow:follow] ) {
  707. id<ANTLRToken> inserted = [self getMissingSymbol:anInput Exception:e TokenType:ttype Follow:follow];
  708. e = [ANTLRMissingTokenException newException:ttype Stream:anInput With:inserted];
  709. [self reportError:e]; // report after inserting so AW sees the token in the exception
  710. return inserted;
  711. }
  712. // even that didn't work; must throw the exception
  713. e = [ANTLRMismatchedTokenException newException:ttype Stream:anInput];
  714. @throw e;
  715. }
  716. /** Not currently used */
  717. -(id) recoverFromMismatchedSet:(id<ANTLRIntStream>)anInput
  718. Exception:(ANTLRRecognitionException *)e
  719. Follow:(ANTLRBitSet *) follow
  720. {
  721. if ( [self mismatchIsMissingToken:anInput Follow:follow] ) {
  722. // System.out.println("missing token");
  723. [self reportError:e];
  724. // we don't know how to conjure up a token for sets yet
  725. return [self getMissingSymbol:anInput Exception:e TokenType:ANTLRTokenTypeInvalid Follow:follow];
  726. }
  727. // TODO do single token deletion like above for Token mismatch
  728. @throw e;
  729. }
  730. /** Match needs to return the current input symbol, which gets put
  731. * into the label for the associated token ref; e.g., x=ID. Token
  732. * and tree parsers need to return different objects. Rather than test
  733. * for input stream type or change the IntStream interface, I use
  734. * a simple method to ask the recognizer to tell me what the current
  735. * input symbol is.
  736. *
  737. * This is ignored for lexers.
  738. */
  739. - (id) getCurrentInputSymbol:(id<ANTLRIntStream>)anInput
  740. {
  741. return nil;
  742. }
  743. /** Conjure up a missing token during error recovery.
  744. *
  745. * The recognizer attempts to recover from single missing
  746. * symbols. But, actions might refer to that missing symbol.
  747. * For example, x=ID {f($x);}. The action clearly assumes
  748. * that there has been an identifier matched previously and that
  749. * $x points at that token. If that token is missing, but
  750. * the next token in the stream is what we want we assume that
  751. * this token is missing and we keep going. Because we
  752. * have to return some token to replace the missing token,
  753. * we have to conjure one up. This method gives the user control
  754. * over the tokens returned for missing tokens. Mostly,
  755. * you will want to create something special for identifier
  756. * tokens. For literals such as '{' and ',', the default
  757. * action in the parser or tree parser works. It simply creates
  758. * a CommonToken of the appropriate type. The text will be the token.
  759. * If you change what tokens must be created by the lexer,
  760. * override this method to create the appropriate tokens.
  761. */
  762. - (id)getMissingSymbol:(id<ANTLRIntStream>)anInput
  763. Exception:(ANTLRRecognitionException *)e
  764. TokenType:(NSInteger)expectedTokenType
  765. Follow:(ANTLRBitSet *)follow
  766. {
  767. return nil;
  768. }
  769. -(void) consumeUntilTType:(id<ANTLRIntStream>)anInput TokenType:(NSInteger)tokenType
  770. {
  771. //System.out.println("consumeUntil "+tokenType);
  772. int ttype = [anInput LA:1];
  773. while (ttype != ANTLRTokenTypeEOF && ttype != tokenType) {
  774. [anInput consume];
  775. ttype = [anInput LA:1];
  776. }
  777. }
  778. /** Consume tokens until one matches the given token set */
  779. -(void) consumeUntilFollow:(id<ANTLRIntStream>)anInput Follow:(ANTLRBitSet *)set
  780. {
  781. //System.out.println("consumeUntil("+set.toString(getTokenNames())+")");
  782. int ttype = [anInput LA:1];
  783. while (ttype != ANTLRTokenTypeEOF && ![set member:ttype] ) {
  784. //System.out.println("consume during recover LA(1)="+getTokenNames()[input.LA(1)]);
  785. [anInput consume];
  786. ttype = [anInput LA:1];
  787. }
  788. }
  789. /** Push a rule's follow set using our own hardcoded stack */
  790. - (void)pushFollow:(ANTLRBitSet *)fset
  791. {
  792. if ( (state._fsp +1) >= [state.following count] ) {
  793. // AMutableArray *f = [AMutableArray arrayWithCapacity:[[state.following] count]*2];
  794. // System.arraycopy(state.following, 0, f, 0, state.following.length);
  795. // state.following = f;
  796. [state.following addObject:fset];
  797. [fset retain];
  798. state._fsp++;
  799. }
  800. else {
  801. [state.following replaceObjectAtIndex:++state._fsp withObject:fset];
  802. }
  803. }
  804. - (ANTLRBitSet *)popFollow
  805. {
  806. ANTLRBitSet *fset;
  807. if ( state._fsp >= 0 && [state.following count] > 0 ) {
  808. fset = [state.following objectAtIndex:state._fsp--];
  809. [state.following removeLastObject];
  810. return fset;
  811. }
  812. else {
  813. NSLog( @"Attempted to pop a follow when none exists on the stack\n" );
  814. }
  815. return nil;
  816. }
  817. /** Return List<String> of the rules in your parser instance
  818. * leading up to a call to this method. You could override if
  819. * you want more details such as the file/line info of where
  820. * in the parser java code a rule is invoked.
  821. *
  822. * This is very useful for error messages and for context-sensitive
  823. * error recovery.
  824. */
  825. - (AMutableArray *)getRuleInvocationStack
  826. {
  827. NSString *parserClassName = [[self className] retain];
  828. return [self getRuleInvocationStack:[ANTLRRecognitionException newException] Recognizer:parserClassName];
  829. }
  830. /** A more general version of getRuleInvocationStack where you can
  831. * pass in, for example, a RecognitionException to get it's rule
  832. * stack trace. This routine is shared with all recognizers, hence,
  833. * static.
  834. *
  835. * TODO: move to a utility class or something; weird having lexer call this
  836. */
  837. - (AMutableArray *)getRuleInvocationStack:(ANTLRRecognitionException *)e
  838. Recognizer:(NSString *)recognizerClassName
  839. {
  840. // char *name;
  841. AMutableArray *rules = [[AMutableArray arrayWithCapacity:20] retain];
  842. NSArray *stack = [e callStackSymbols];
  843. int i = 0;
  844. for (i = [stack count]-1; i >= 0; i--) {
  845. NSString *t = [stack objectAtIndex:i];
  846. // NSLog(@"stack %d = %@\n", i, t);
  847. if ( [t commonPrefixWithString:@"org.antlr.runtime." options:NSLiteralSearch] ) {
  848. // id aClass = objc_getClass( [t UTF8String] );
  849. continue; // skip support code such as this method
  850. }
  851. if ( [t isEqualTo:NEXT_TOKEN_RULE_NAME] ) {
  852. // name = sel_getName(method_getName(method));
  853. // NSString *aMethod = [NSString stringWithFormat:@"%s", name];
  854. continue;
  855. }
  856. if ( ![t isEqualTo:recognizerClassName] ) {
  857. // name = class_getName( [t UTF8String] );
  858. continue; // must not be part of this parser
  859. }
  860. [rules addObject:t];
  861. }
  862. #ifdef DONTUSEYET
  863. StackTraceElement[] stack = e.getStackTrace();
  864. int i = 0;
  865. for (i=stack.length-1; i>=0; i--) {
  866. StackTraceElement t = stack[i];
  867. if ( [t getClassName().startsWith("org.antlr.runtime.") ) {
  868. continue; // skip support code such as this method
  869. }
  870. if ( [[t getMethodName] equals:NEXT_TOKEN_RULE_NAME] ) {
  871. continue;
  872. }
  873. if ( ![[t getClassName] equals:recognizerClassName] ) {
  874. continue; // must not be part of this parser
  875. }
  876. [rules addObject:[t getMethodName]];
  877. }
  878. #endif
  879. [stack release];
  880. return rules;
  881. }
  882. - (NSInteger) getBacktrackingLevel
  883. {
  884. return [state getBacktracking];
  885. }
  886. - (void) setBacktrackingLevel:(NSInteger)level
  887. {
  888. [state setBacktracking:level];
  889. }
  890. /** Used to print out token names like ID during debugging and
  891. * error reporting. The generated parsers implement a method
  892. * that overrides this to point to their String[] tokenNames.
  893. */
  894. - (NSArray *)getTokenNames
  895. {
  896. return tokenNames;
  897. }
  898. /** For debugging and other purposes, might want the grammar name.
  899. * Have ANTLR generate an implementation for this method.
  900. */
  901. - (NSString *)getGrammarFileName
  902. {
  903. return grammarFileName;
  904. }
  905. - (NSString *)getSourceName
  906. {
  907. return nil;
  908. }
  909. /** A convenience method for use most often with template rewrites.
  910. * Convert a List<Token> to List<String>
  911. */
  912. - (AMutableArray *)toStrings:(AMutableArray *)tokens
  913. {
  914. if ( tokens == nil )
  915. return nil;
  916. AMutableArray *strings = [AMutableArray arrayWithCapacity:[tokens count]];
  917. id object;
  918. NSInteger i = 0;
  919. for (object in tokens) {
  920. [strings addObject:[object text]];
  921. i++;
  922. }
  923. return strings;
  924. }
  925. /** Given a rule number and a start token index number, return
  926. * ANTLR_MEMO_RULE_UNKNOWN if the rule has not parsed input starting from
  927. * start index. If this rule has parsed input starting from the
  928. * start index before, then return where the rule stopped parsing.
  929. * It returns the index of the last token matched by the rule.
  930. *
  931. * For now we use a hashtable and just the slow Object-based one.
  932. * Later, we can make a special one for ints and also one that
  933. * tosses out data after we commit past input position i.
  934. */
  935. - (NSInteger)getRuleMemoization:(NSInteger)ruleIndex StartIndex:(NSInteger)ruleStartIndex
  936. {
  937. NSNumber *stopIndexI;
  938. ANTLRHashRule *aHashRule;
  939. if ( (aHashRule = [state.ruleMemo objectAtIndex:ruleIndex]) == nil ) {
  940. aHashRule = [ANTLRHashRule newANTLRHashRuleWithLen:17];
  941. [state.ruleMemo insertObject:aHashRule atIndex:ruleIndex];
  942. }
  943. stopIndexI = [aHashRule getRuleMemoStopIndex:ruleStartIndex];
  944. if ( stopIndexI == nil ) {
  945. return ANTLR_MEMO_RULE_UNKNOWN;
  946. }
  947. return [stopIndexI integerValue];
  948. }
  949. /** Has this rule already parsed input at the current index in the
  950. * input stream? Return the stop token index or MEMO_RULE_UNKNOWN.
  951. * If we attempted but failed to parse properly before, return
  952. * MEMO_RULE_FAILED.
  953. *
  954. * This method has a side-effect: if we have seen this input for
  955. * this rule and successfully parsed before, then seek ahead to
  956. * 1 past the stop token matched for this rule last time.
  957. */
  958. - (BOOL)alreadyParsedRule:(id<ANTLRIntStream>)anInput RuleIndex:(NSInteger)ruleIndex
  959. {
  960. NSInteger aStopIndex = [self getRuleMemoization:ruleIndex StartIndex:anInput.index];
  961. if ( aStopIndex == ANTLR_MEMO_RULE_UNKNOWN ) {
  962. // NSLog(@"rule %d not yet encountered\n", ruleIndex);
  963. return NO;
  964. }
  965. if ( aStopIndex == ANTLR_MEMO_RULE_FAILED ) {
  966. if (debug) NSLog(@"rule %d will never succeed\n", ruleIndex);
  967. state.failed = YES;
  968. }
  969. else {
  970. if (debug) NSLog(@"seen rule %d before; skipping ahead to %d failed = %@\n", ruleIndex, aStopIndex+1, state.failed?@"YES":@"NO");
  971. [anInput seek:(aStopIndex+1)]; // jump to one past stop token
  972. }
  973. return YES;
  974. }
  975. /** Record whether or not this rule parsed the input at this position
  976. * successfully. Use a standard java hashtable for now.
  977. */
  978. - (void)memoize:(id<ANTLRIntStream>)anInput
  979. RuleIndex:(NSInteger)ruleIndex
  980. StartIndex:(NSInteger)ruleStartIndex
  981. {
  982. ANTLRRuleStack *aRuleStack;
  983. NSInteger stopTokenIndex;
  984. aRuleStack = state.ruleMemo;
  985. stopTokenIndex = (state.failed ? ANTLR_MEMO_RULE_FAILED : (anInput.index-1));
  986. if ( aRuleStack == nil ) {
  987. if (debug) NSLog(@"!!!!!!!!! memo array is nil for %@", [self getGrammarFileName]);
  988. return;
  989. }
  990. if ( ruleIndex >= [aRuleStack length] ) {
  991. if (debug) NSLog(@"!!!!!!!!! memo size is %d, but rule index is %d", [state.ruleMemo length], ruleIndex);
  992. return;
  993. }
  994. if ( [aRuleStack objectAtIndex:ruleIndex] != nil ) {
  995. [aRuleStack putHashRuleAtRuleIndex:ruleIndex StartIndex:ruleStartIndex StopIndex:stopTokenIndex];
  996. }
  997. return;
  998. }
  999. /** return how many rule/input-index pairs there are in total.
  1000. * TODO: this includes synpreds. :(
  1001. */
  1002. - (NSInteger)getRuleMemoizationCacheSize
  1003. {
  1004. ANTLRRuleStack *aRuleStack;
  1005. ANTLRHashRule *aHashRule;
  1006. int aCnt = 0;
  1007. aRuleStack = state.ruleMemo;
  1008. for (NSUInteger i = 0; aRuleStack != nil && i < [aRuleStack length]; i++) {
  1009. aHashRule = [aRuleStack objectAtIndex:i];
  1010. if ( aHashRule != nil ) {
  1011. aCnt += [aHashRule count]; // how many input indexes are recorded?
  1012. }
  1013. }
  1014. return aCnt;
  1015. }
  1016. #pragma warning Have to fix traceIn and traceOut.
  1017. - (void)traceIn:(NSString *)ruleName Index:(NSInteger)ruleIndex Object:(id)inputSymbol
  1018. {
  1019. NSLog(@"enter %@ %@", ruleName, inputSymbol);
  1020. if ( state.backtracking > 0 ) {
  1021. NSLog(@" backtracking=%s", ((state.backtracking==YES)?"YES":"NO"));
  1022. }
  1023. NSLog(@"\n");
  1024. }
  1025. - (void)traceOut:(NSString *)ruleName Index:(NSInteger)ruleIndex Object:(id)inputSymbol
  1026. {
  1027. NSLog(@"exit %@ -- %@", ruleName, inputSymbol);
  1028. if ( state.backtracking > 0 ) {
  1029. NSLog(@" backtracking=%s %s", state.backtracking?"YES":"NO", state.failed ? "failed":"succeeded");
  1030. }
  1031. NSLog(@"\n");
  1032. }
  1033. // call a syntactic predicate methods using its selector. this way we can support arbitrary synpreds.
  1034. - (BOOL) evaluateSyntacticPredicate:(SEL)synpredFragment // stream:(id<ANTLRIntStream>)input
  1035. {
  1036. id<ANTLRIntStream> input;
  1037. state.backtracking++;
  1038. // input = state.token.input;
  1039. input = self.input;
  1040. int start = [input mark];
  1041. @try {
  1042. [self performSelector:synpredFragment];
  1043. }
  1044. @catch (ANTLRRecognitionException *re) {
  1045. NSLog(@"impossible synpred: %@", re.name);
  1046. }
  1047. BOOL success = (state.failed == NO);
  1048. [input rewind:start];
  1049. state.backtracking--;
  1050. state.failed = NO;
  1051. return success;
  1052. }
  1053. @end