ANTLRBaseRecognizer.m - Created by Alan Condit on 6/16/10. …

/projects/antlr-3.4/runtime/ObjC/Framework/ANTLRBaseRecognizer.m

https://gitlab.com/essere.lab.public/qualitas.class-corpus · Objective C · 1129 lines · 641 code · 76 blank · 412 comment · 140 complexity · ba76a637ad9324e64e7b7ba9d1cd973c MD5 · raw file

//
//  ANTLRBaseRecognizer.m
//  ANTLR
//
//  Created by Alan Condit on 6/16/10.
// [The "BSD licence"]
// Copyright (c) 2010 Alan Condit
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// 1. Redistributions of source code must retain the above copyright
//    notice, this list of conditions and the following disclaimer.
// 2. Redistributions in binary form must reproduce the above copyright
//    notice, this list of conditions and the following disclaimer in the
//    documentation and/or other materials provided with the distribution.
// 3. The name of the author may not be used to endorse or promote products
//    derived from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
// IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
// OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
// IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
// NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
// THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

#import "ANTLRBaseRecognizer.h"
#import "ANTLRHashRule.h"
#import "ANTLRRuleMemo.h"
#import "ANTLRCommonToken.h"
#import "ANTLRMap.h"

extern NSInteger debug;

@implementation ANTLRBaseRecognizer

static AMutableArray *_tokenNames;
static NSString *_grammarFileName;
static NSString *NEXT_TOKEN_RULE_NAME;

@synthesize state;
@synthesize grammarFileName;
//@synthesize failed;
@synthesize sourceName;
//@synthesize numberOfSyntaxErrors;
@synthesize tokenNames;

+ (void) initialize
{
    NEXT_TOKEN_RULE_NAME = [NSString stringWithString:@"nextToken"];
    [NEXT_TOKEN_RULE_NAME retain];
}

+ (ANTLRBaseRecognizer *) newANTLRBaseRecognizer
{
    return [[ANTLRBaseRecognizer alloc] init];
}

+ (ANTLRBaseRecognizer *) newANTLRBaseRecognizerWithRuleLen:(NSInteger)aLen
{
    return [[ANTLRBaseRecognizer alloc] initWithLen:aLen];
}

+ (ANTLRBaseRecognizer *) newANTLRBaseRecognizer:(ANTLRRecognizerSharedState *)aState
{
	return [[ANTLRBaseRecognizer alloc] initWithState:aState];
}

+ (AMutableArray *)getTokenNames
{
    return _tokenNames;
}

+ (void)setTokenNames:(AMutableArray *)theTokNams
{
    if ( _tokenNames != theTokNams ) {
        if ( _tokenNames ) [_tokenNames release];
        [theTokNams retain];
    }
    _tokenNames = theTokNams;
}

+ (void)setGrammarFileName:(NSString *)aFileName
{
    if ( _grammarFileName != aFileName ) {
        if ( _grammarFileName ) [_grammarFileName release];
        [aFileName retain];
    }
    [_grammarFileName retain];
}

- (id) init
{
	if ((self = [super init]) != nil) {
        if (state == nil) {
            state = [[ANTLRRecognizerSharedState newANTLRRecognizerSharedState] retain];
        }
        tokenNames = _tokenNames;
        if ( tokenNames ) [tokenNames retain];
        grammarFileName = _grammarFileName;
        if ( grammarFileName ) [grammarFileName retain];
        state._fsp = -1;
        state.errorRecovery = NO;		// are we recovering?
        state.lastErrorIndex = -1;
        state.failed = NO;				// indicate that some match failed
        state.syntaxErrors = 0;
        state.backtracking = 0;			// the level of backtracking
        state.tokenStartCharIndex = -1;
	}
	return self;
}

- (id) initWithLen:(NSInteger)aLen
{
	if ((self = [super init]) != nil) {
        if (state == nil) {
            state = [[ANTLRRecognizerSharedState newANTLRRecognizerSharedStateWithRuleLen:aLen] retain];
        }
        tokenNames = _tokenNames;
        if ( tokenNames ) [tokenNames retain];
        grammarFileName = _grammarFileName;
        if ( grammarFileName ) [grammarFileName retain];
        state._fsp = -1;
        state.errorRecovery = NO;		// are we recovering?
        state.lastErrorIndex = -1;
        state.failed = NO;				// indicate that some match failed
        state.syntaxErrors = 0;
        state.backtracking = 0;			// the level of backtracking
        state.tokenStartCharIndex = -1;
	}
	return self;
}

- (id) initWithState:(ANTLRRecognizerSharedState *)aState
{
	if ((self = [super init]) != nil) {
		state = aState;
        if (state == nil) {
            state = [ANTLRRecognizerSharedState newANTLRRecognizerSharedState];
        }
        [state retain];
        tokenNames = _tokenNames;
        if ( tokenNames ) [tokenNames retain];
        grammarFileName = _grammarFileName;
        if ( grammarFileName ) [grammarFileName retain];
        state._fsp = -1;
        state.errorRecovery = NO;		// are we recovering?
        state.lastErrorIndex = -1;
        state.failed = NO;				// indicate that some match failed
        state.syntaxErrors = 0;
        state.backtracking = 0;			// the level of backtracking
        state.tokenStartCharIndex = -1;
	}
	return self;
}

- (void)dealloc
{
#ifdef DEBUG_DEALLOC
    NSLog( @"called dealloc in ANTLRBaseRecognizer" );
#endif
	if ( grammarFileName ) [grammarFileName release];
	if ( tokenNames ) [tokenNames release];
	if ( state ) [state release];
	[super dealloc];
}

// reset the recognizer to the initial state. does not touch the token source!
// this can be extended by the grammar writer to reset custom ivars
- (void) reset
{
    if ( state == nil )
        return; 
    if ( state.following != nil ) {
        if ( [state.following count] )
            [state.following removeAllObjects];
    }
    state._fsp = -1;
    state.errorRecovery = NO;		// are we recovering?
    state.lastErrorIndex = -1;
    state.failed = NO;				// indicate that some match failed
    state.syntaxErrors = 0;
    state.backtracking = 0;			// the level of backtracking
    state.tokenStartCharIndex = -1;
    if ( state.ruleMemo != nil ) {
        if ( [state.ruleMemo count] )
            [state.ruleMemo removeAllObjects];
    }
}

- (BOOL) getFailed
{
	return [state getFailed];
}

- (void) setFailed:(BOOL)flag
{
	[state setFailed:flag];
}

- (ANTLRRecognizerSharedState *) getState
{
	return state;
}

- (void) setState:(ANTLRRecognizerSharedState *) theState
{
	if (state != theState) {
		if ( state ) [state release];
		state = theState;
		[state retain];
	}
}

- (id)input
{
    return nil; // Must be overriden in inheriting class
}

- (void)skip // override in inheriting class
{
    return;
}

-(id) match:(id<ANTLRIntStream>)anInput TokenType:(NSInteger)ttype Follow:(ANTLRBitSet *)follow
{
	id matchedSymbol = [self getCurrentInputSymbol:anInput];
	if ([anInput LA:1] == ttype) {
		[anInput consume];
		state.errorRecovery = NO;
		state.failed = NO;
		return matchedSymbol;
	}
	if (state.backtracking > 0) {
		state.failed = YES;
		return matchedSymbol;
	}
	matchedSymbol = [self recoverFromMismatchedToken:anInput TokenType:ttype Follow:follow];
	return matchedSymbol;
}

-(void) matchAny:(id<ANTLRIntStream>)anInput
{
    state.errorRecovery = NO;
    state.failed = NO;
    [anInput consume];
}

-(BOOL) mismatchIsUnwantedToken:(id<ANTLRIntStream>)anInput TokenType:(NSInteger)ttype
{
    return [anInput LA:2] == ttype;
}

-(BOOL) mismatchIsMissingToken:(id<ANTLRIntStream>)anInput Follow:(ANTLRBitSet *) follow
{
    if ( follow == nil ) {
        // we have no information about the follow; we can only consume
        // a single token and hope for the best
        return NO;
    }
    // compute what can follow this grammar element reference
    if ( [follow member:ANTLRTokenTypeEOR] ) {
        ANTLRBitSet *viableTokensFollowingThisRule = [self computeContextSensitiveRuleFOLLOW];
        follow = [follow or:viableTokensFollowingThisRule];
        if ( state._fsp >= 0 ) { // remove EOR if we're not the start symbol
            [follow remove:(ANTLRTokenTypeEOR)];
        }
    }
    // if current token is consistent with what could come after set
    // then we know we're missing a token; error recovery is free to
    // "insert" the missing token
    
    //System.out.println("viable tokens="+follow.toString(getTokenNames()));
    //System.out.println("LT(1)="+((TokenStream)input).LT(1));
    
    // BitSet cannot handle negative numbers like -1 (EOF) so I leave EOR
    // in follow set to indicate that the fall of the start symbol is
    // in the set (EOF can follow).
    if ( [follow member:[anInput LA:1]] || [follow member:ANTLRTokenTypeEOR] ) {
        //System.out.println("LT(1)=="+((TokenStream)input).LT(1)+" is consistent with what follows; inserting...");
        return YES;
    }
    return NO;
}

/** Report a recognition problem.
 *
 *  This method sets errorRecovery to indicate the parser is recovering
 *  not parsing.  Once in recovery mode, no errors are generated.
 *  To get out of recovery mode, the parser must successfully match
 *  a token (after a resync).  So it will go:
 *
 * 		1. error occurs
 * 		2. enter recovery mode, report error
 * 		3. consume until token found in resynch set
 * 		4. try to resume parsing
 * 		5. next match() will reset errorRecovery mode
 *
 *  If you override, make sure to update syntaxErrors if you care about that.
 */
-(void) reportError:(ANTLRRecognitionException *) e
{
    // if we've already reported an error and have not matched a token
    // yet successfully, don't report any errors.
    if ( state.errorRecovery ) {
        //System.err.print("[SPURIOUS] ");
        return;
    }
    state.syntaxErrors++; // don't count spurious
    state.errorRecovery = YES;
    
    [self displayRecognitionError:[self getTokenNames] Exception:e];
}

-(void) displayRecognitionError:(AMutableArray *)theTokNams Exception:(ANTLRRecognitionException *)e
{
    NSString *hdr = [self getErrorHeader:e];
    NSString *msg = [self getErrorMessage:e TokenNames:theTokNams];
    [self emitErrorMessage:[NSString stringWithFormat:@" %@ %@", hdr, msg]];
}

/** What error message should be generated for the various
 *  exception types?
 *
 *  Not very object-oriented code, but I like having all error message
 *  generation within one method rather than spread among all of the
 *  exception classes. This also makes it much easier for the exception
 *  handling because the exception classes do not have to have pointers back
 *  to this object to access utility routines and so on. Also, changing
 *  the message for an exception type would be difficult because you
 *  would have to subclassing exception, but then somehow get ANTLR
 *  to make those kinds of exception objects instead of the default.
 *  This looks weird, but trust me--it makes the most sense in terms
 *  of flexibility.
 *
 *  For grammar debugging, you will want to override this to add
 *  more information such as the stack frame with
 *  getRuleInvocationStack(e, this.getClass().getName()) and,
 *  for no viable alts, the decision description and state etc...
 *
 *  Override this to change the message generated for one or more
 *  exception types.
 */
- (NSString *)getErrorMessage:(ANTLRRecognitionException *)e TokenNames:(AMutableArray *)theTokNams
{
    // NSString *msg = [e getMessage];
    NSString *msg;
    if ( [e isKindOfClass:[ANTLRUnwantedTokenException class]] ) {
        ANTLRUnwantedTokenException *ute = (ANTLRUnwantedTokenException *)e;
        NSString *tokenName=@"<unknown>";
        if ( ute.expecting == ANTLRTokenTypeEOF ) {
            tokenName = @"EOF";
        }
        else {
            tokenName = (NSString *)[theTokNams objectAtIndex:ute.expecting];
        }
        msg = [NSString stringWithFormat:@"extraneous input %@ expecting %@", [self getTokenErrorDisplay:[ute getUnexpectedToken]],
               tokenName];
    }
    else if ( [e isKindOfClass:[ANTLRMissingTokenException class] ] ) {
        ANTLRMissingTokenException *mte = (ANTLRMissingTokenException *)e;
        NSString *tokenName=@"<unknown>";
        if ( mte.expecting== ANTLRTokenTypeEOF ) {
            tokenName = @"EOF";
        }
        else {
            tokenName = [theTokNams objectAtIndex:mte.expecting];
        }
        msg = [NSString stringWithFormat:@"missing %@ at %@", tokenName, [self getTokenErrorDisplay:(e.token)] ];
    }
    else if ( [e isKindOfClass:[ANTLRMismatchedTokenException class]] ) {
        ANTLRMismatchedTokenException *mte = (ANTLRMismatchedTokenException *)e;
        NSString *tokenName=@"<unknown>";
        if ( mte.expecting== ANTLRTokenTypeEOF ) {
            tokenName = @"EOF";
        }
        else {
            tokenName = [theTokNams objectAtIndex:mte.expecting];
        }
        msg = [NSString stringWithFormat:@"mismatched input %@ expecting %@",[self getTokenErrorDisplay:(e.token)], tokenName];
    }
    else if ( [e isKindOfClass:[ANTLRMismatchedTreeNodeException class]] ) {
        ANTLRMismatchedTreeNodeException *mtne = (ANTLRMismatchedTreeNodeException *)e;
        NSString *tokenName=@"<unknown>";
        if ( mtne.expecting==ANTLRTokenTypeEOF ) {
            tokenName = @"EOF";
        }
        else {
            tokenName = [theTokNams objectAtIndex:mtne.expecting];
        }
        msg = [NSString stringWithFormat:@"mismatched tree node: %@ expecting %@", mtne.node, tokenName];
    }
    else if ( [e isKindOfClass:[ANTLRNoViableAltException class]] ) {
        //NoViableAltException *nvae = (NoViableAltException *)e;
        // for development, can add "decision=<<"+nvae.grammarDecisionDescription+">>"
        // and "(decision="+nvae.decisionNumber+") and
        // "state "+nvae.stateNumber
        msg = [NSString stringWithFormat:@"no viable alternative at input %@", [self getTokenErrorDisplay:e.token]];
    }
    else if ( [e isKindOfClass:[ANTLREarlyExitException class]] ) {
        //ANTLREarlyExitException *eee = (ANTLREarlyExitException *)e;
        // for development, can add "(decision="+eee.decisionNumber+")"
        msg =[NSString stringWithFormat: @"required (...)+ loop did not match anything at input ", [self getTokenErrorDisplay:e.token]];
    }
    else if ( [e isKindOfClass:[ANTLRMismatchedSetException class]] ) {
        ANTLRMismatchedSetException *mse = (ANTLRMismatchedSetException *)e;
        msg = [NSString stringWithFormat:@"mismatched input %@ expecting set %@",
               [self getTokenErrorDisplay:(e.token)],
               mse.expecting];
    }
#pragma warning NotSet not yet implemented.
    else if ( [e isKindOfClass:[ANTLRMismatchedNotSetException class] ] ) {
        ANTLRMismatchedNotSetException *mse = (ANTLRMismatchedNotSetException *)e;
        msg = [NSString stringWithFormat:@"mismatched input %@ expecting set %@",
               [self getTokenErrorDisplay:(e.token)],
               mse.expecting];
    }
    else if ( [e isKindOfClass:[ANTLRFailedPredicateException class]] ) {
        ANTLRFailedPredicateException *fpe = (ANTLRFailedPredicateException *)e;
        msg = [NSString stringWithFormat:@"rule %@ failed predicate: { %@ }?", fpe.ruleName, fpe.predicate];
    }
    else {
        msg = [NSString stringWithFormat:@"Exception= %@\n", e.name];
    }
    return msg;
}

/** Get number of recognition errors (lexer, parser, tree parser).  Each
 *  recognizer tracks its own number.  So parser and lexer each have
 *  separate count.  Does not count the spurious errors found between
 *  an error and next valid token match
 *
 *  See also reportError()
 */
- (NSInteger) getNumberOfSyntaxErrors
{
    return state.syntaxErrors;
}

/** What is the error header, normally line/character position information? */
- (NSString *)getErrorHeader:(ANTLRRecognitionException *)e
{
    return [NSString stringWithFormat:@"line %d:%d", e.line, e.charPositionInLine];
}

/** How should a token be displayed in an error message? The default
 *  is to display just the text, but during development you might
 *  want to have a lot of information spit out.  Override in that case
 *  to use t.toString() (which, for CommonToken, dumps everything about
 *  the token). This is better than forcing you to override a method in
 *  your token objects because you don't have to go modify your lexer
 *  so that it creates a new Java type.
 */
- (NSString *)getTokenErrorDisplay:(id<ANTLRToken>)t
{
    NSString *s = t.text;
    if ( s == nil ) {
        if ( t.type == ANTLRTokenTypeEOF ) {
            s = @"<EOF>";
        }
        else {
            s = [NSString stringWithFormat:@"<%@>", t.type];
        }
    }
    s = [s stringByReplacingOccurrencesOfString:@"\n" withString:@"\\\\n"];
    s = [s stringByReplacingOccurrencesOfString:@"\r" withString:@"\\\\r"];
    s = [s stringByReplacingOccurrencesOfString:@"\t" withString:@"\\\\t"];
    return [NSString stringWithFormat:@"\'%@\'", s];
}
                                        
/** Override this method to change where error messages go */
- (void) emitErrorMessage:(NSString *) msg
{
//    System.err.println(msg);
    NSLog(@"%@", msg);
}

/** Recover from an error found on the input stream.  This is
 *  for NoViableAlt and mismatched symbol exceptions.  If you enable
 *  single token insertion and deletion, this will usually not
 *  handle mismatched symbol exceptions but there could be a mismatched
 *  token that the match() routine could not recover from.
 */
- (void)recover:(id<ANTLRIntStream>)anInput Exception:(ANTLRRecognitionException *)re
{
    if ( state.lastErrorIndex == anInput.index ) {
        // uh oh, another error at same token index; must be a case
        // where LT(1) is in the recovery token set so nothing is
        // consumed; consume a single token so at least to prevent
        // an infinite loop; this is a failsafe.
        [anInput consume];
    }
    state.lastErrorIndex = anInput.index;
    ANTLRBitSet *followSet = [self computeErrorRecoverySet];
    [self beginResync];
    [self consumeUntilFollow:anInput Follow:followSet];
    [self endResync];
}

- (void) beginResync
{
    
}

- (void) endResync
{
    
}
                            
/*  Compute the error recovery set for the current rule.  During
 *  rule invocation, the parser pushes the set of tokens that can
 *  follow that rule reference on the stack; this amounts to
 *  computing FIRST of what follows the rule reference in the
 *  enclosing rule. This local follow set only includes tokens
 *  from within the rule; i.e., the FIRST computation done by
 *  ANTLR stops at the end of a rule.
 *
 *  EXAMPLE
 *
 *  When you find a "no viable alt exception", the input is not
 *  consistent with any of the alternatives for rule r.  The best
 *  thing to do is to consume tokens until you see something that
 *  can legally follow a call to r *or* any rule that called r.
 *  You don't want the exact set of viable next tokens because the
 *  input might just be missing a token--you might consume the
 *  rest of the input looking for one of the missing tokens.
 *
 *  Consider grammar:
 *
 *  a : '[' b ']'
 *    | '(' b ')'
 *    ;
 *  b : c '^' INT ;
 *  c : ID
 *    | INT
 *    ;
 *
 *  At each rule invocation, the set of tokens that could follow
 *  that rule is pushed on a stack.  Here are the various "local"
 *  follow sets:
 *
 *  FOLLOW(b1_in_a) = FIRST(']') = ']'
 *  FOLLOW(b2_in_a) = FIRST(')') = ')'
 *  FOLLOW(c_in_b) = FIRST('^') = '^'
 *
 *  Upon erroneous input "[]", the call chain is
 *
 *  a -> b -> c
 *
 *  and, hence, the follow context stack is:
 *
 *  depth  local follow set     after call to rule
 *    0         <EOF>                    a (from main())
 *    1          ']'                     b
 *    3          '^'                     c
 *
 *  Notice that ')' is not included, because b would have to have
 *  been called from a different context in rule a for ')' to be
 *  included.
 *
 *  For error recovery, we cannot consider FOLLOW(c)
 *  (context-sensitive or otherwise).  We need the combined set of
 *  all context-sensitive FOLLOW sets--the set of all tokens that
 *  could follow any reference in the call chain.  We need to
 *  resync to one of those tokens.  Note that FOLLOW(c)='^' and if
 *  we resync'd to that token, we'd consume until EOF.  We need to
 *  sync to context-sensitive FOLLOWs for a, b, and c: {']','^'}.
 *  In this case, for input "[]", LA(1) is in this set so we would
 *  not consume anything and after printing an error rule c would
 *  return normally.  It would not find the required '^' though.
 *  At this point, it gets a mismatched token error and throws an
 *  exception (since LA(1) is not in the viable following token
 *  set).  The rule exception handler tries to recover, but finds
 *  the same recovery set and doesn't consume anything.  Rule b
 *  exits normally returning to rule a.  Now it finds the ']' (and
 *  with the successful match exits errorRecovery mode).
 *
 *  So, you cna see that the parser walks up call chain looking
 *  for the token that was a member of the recovery set.
 *
 *  Errors are not generated in errorRecovery mode.
 *
 *  ANTLR's error recovery mechanism is based upon original ideas:
 *
 *  "Algorithms + Data Structures = Programs" by Niklaus Wirth
 *
 *  and
 *
 *  "A note on error recovery in recursive descent parsers":
 *  http://portal.acm.org/citation.cfm?id=947902.947905
 *
 *  Later, Josef Grosch had some good ideas:
 *
 *  "Efficient and Comfortable Error Recovery in Recursive Descent
 *  Parsers":
 *  ftp://www.cocolab.com/products/cocktail/doca4.ps/ell.ps.zip
 *
 *  Like Grosch I implemented local FOLLOW sets that are combined
 *  at run-time upon error to avoid overhead during parsing.
 */
- (ANTLRBitSet *) computeErrorRecoverySet
{
    return [self combineFollows:NO];
}

/** Compute the context-sensitive FOLLOW set for current rule.
 *  This is set of token types that can follow a specific rule
 *  reference given a specific call chain.  You get the set of
 *  viable tokens that can possibly come next (lookahead depth 1)
 *  given the current call chain.  Contrast this with the
 *  definition of plain FOLLOW for rule r:
 *
 *   FOLLOW(r)={x | S=>*alpha r beta in G and x in FIRST(beta)}
 *
 *  where x in T* and alpha, beta in V*; T is set of terminals and
 *  V is the set of terminals and nonterminals.  In other words,
 *  FOLLOW(r) is the set of all tokens that can possibly follow
 *  references to r in *any* sentential form (context).  At
 *  runtime, however, we know precisely which context applies as
 *  we have the call chain.  We may compute the exact (rather
 *  than covering superset) set of following tokens.
 *
 *  For example, consider grammar:
 *
 *  stat : ID '=' expr ';'      // FOLLOW(stat)=={EOF}
 *       | "return" expr '.'
 *       ;
 *  expr : atom ('+' atom)* ;   // FOLLOW(expr)=={';','.',')'}
 *  atom : INT                  // FOLLOW(atom)=={'+',')',';','.'}
 *       | '(' expr ')'
 *       ;
 *
 *  The FOLLOW sets are all inclusive whereas context-sensitive
 *  FOLLOW sets are precisely what could follow a rule reference.
 *  For input input "i=(3);", here is the derivation:
 *
 *  stat => ID '=' expr ';'
 *       => ID '=' atom ('+' atom)* ';'
 *       => ID '=' '(' expr ')' ('+' atom)* ';'
 *       => ID '=' '(' atom ')' ('+' atom)* ';'
 *       => ID '=' '(' INT ')' ('+' atom)* ';'
 *       => ID '=' '(' INT ')' ';'
 *
 *  At the "3" token, you'd have a call chain of
 *
 *    stat -> expr -> atom -> expr -> atom
 *
 *  What can follow that specific nested ref to atom?  Exactly ')'
 *  as you can see by looking at the derivation of this specific
 *  input.  Contrast this with the FOLLOW(atom)={'+',')',';','.'}.
 *
 *  You want the exact viable token set when recovering from a
 *  token mismatch.  Upon token mismatch, if LA(1) is member of
 *  the viable next token set, then you know there is most likely
 *  a missing token in the input stream.  "Insert" one by just not
 *  throwing an exception.
 */
- (ANTLRBitSet *)computeContextSensitiveRuleFOLLOW
{
    return [self combineFollows:YES];
}

// what is exact? it seems to only add sets from above on stack
// if EOR is in set i.  When it sees a set w/o EOR, it stops adding.
// Why would we ever want them all?  Maybe no viable alt instead of
// mismatched token?
- (ANTLRBitSet *)combineFollows:(BOOL) exact
{
    NSInteger top = state._fsp;
    ANTLRBitSet *followSet = [[ANTLRBitSet newANTLRBitSet] retain];
    for (int i = top; i >= 0; i--) {
        ANTLRBitSet *localFollowSet = (ANTLRBitSet *)[state.following objectAtIndex:i];
        /*
         System.out.println("local follow depth "+i+"="+
         localFollowSet.toString(getTokenNames())+")");
         */
        [followSet orInPlace:localFollowSet];
        if ( exact ) {
            // can we see end of rule?
            if ( [localFollowSet member:ANTLRTokenTypeEOR] ) {
                // Only leave EOR in set if at top (start rule); this lets
                // us know if have to include follow(start rule); i.e., EOF
                if ( i > 0 ) {
                    [followSet remove:ANTLRTokenTypeEOR];
                }
            }
            else { // can't see end of rule, quit
                break;
            }
        }
    }
    return followSet;
}

/** Attempt to recover from a single missing or extra token.
 *
 *  EXTRA TOKEN
 *
 *  LA(1) is not what we are looking for.  If LA(2) has the right token,
 *  however, then assume LA(1) is some extra spurious token.  Delete it
 *  and LA(2) as if we were doing a normal match(), which advances the
 *  input.
 *
 *  MISSING TOKEN
 *
 *  If current token is consistent with what could come after
 *  ttype then it is ok to "insert" the missing token, else throw
 *  exception For example, Input "i=(3;" is clearly missing the
 *  ')'.  When the parser returns from the nested call to expr, it
 *  will have call chain:
 *
 *    stat -> expr -> atom
 *
 *  and it will be trying to match the ')' at this point in the
 *  derivation:
 *
 *       => ID '=' '(' INT ')' ('+' atom)* ';'
 *                          ^
 *  match() will see that ';' doesn't match ')' and report a
 *  mismatched token error.  To recover, it sees that LA(1)==';'
 *  is in the set of tokens that can follow the ')' token
 *  reference in rule atom.  It can assume that you forgot the ')'.
 */
- (id<ANTLRToken>)recoverFromMismatchedToken:(id<ANTLRIntStream>)anInput
                       TokenType:(NSInteger)ttype
                          Follow:(ANTLRBitSet *)follow
{
    ANTLRRecognitionException *e = nil;
    // if next token is what we are looking for then "delete" this token
    if ( [self mismatchIsUnwantedToken:anInput TokenType:ttype] ) {
        e = [ANTLRUnwantedTokenException newException:ttype Stream:anInput];
        /*
         System.err.println("recoverFromMismatchedToken deleting "+
         ((TokenStream)input).LT(1)+
         " since "+((TokenStream)input).LT(2)+" is what we want");
         */
        [self beginResync];
        [anInput consume]; // simply delete extra token
        [self endResync];
        [self reportError:e];  // report after consuming so AW sees the token in the exception
                         // we want to return the token we're actually matching
        id matchedSymbol = [self getCurrentInputSymbol:anInput];
        [anInput consume]; // move past ttype token as if all were ok
        return matchedSymbol;
    }
    // can't recover with single token deletion, try insertion
    if ( [self mismatchIsMissingToken:anInput Follow:follow] ) {
        id<ANTLRToken> inserted = [self getMissingSymbol:anInput Exception:e TokenType:ttype Follow:follow];
        e = [ANTLRMissingTokenException newException:ttype Stream:anInput With:inserted];
        [self reportError:e];  // report after inserting so AW sees the token in the exception
        return inserted;
    }
    // even that didn't work; must throw the exception
    e = [ANTLRMismatchedTokenException newException:ttype Stream:anInput];
    @throw e;
}

/** Not currently used */
-(id) recoverFromMismatchedSet:(id<ANTLRIntStream>)anInput
                     Exception:(ANTLRRecognitionException *)e
                        Follow:(ANTLRBitSet *) follow
{
    if ( [self mismatchIsMissingToken:anInput Follow:follow] ) {
        // System.out.println("missing token");
        [self reportError:e];
        // we don't know how to conjure up a token for sets yet
        return [self getMissingSymbol:anInput Exception:e TokenType:ANTLRTokenTypeInvalid Follow:follow];
    }
    // TODO do single token deletion like above for Token mismatch
    @throw e;
}

/** Match needs to return the current input symbol, which gets put
 *  into the label for the associated token ref; e.g., x=ID.  Token
 *  and tree parsers need to return different objects. Rather than test
 *  for input stream type or change the IntStream interface, I use
 *  a simple method to ask the recognizer to tell me what the current
 *  input symbol is.
 * 
 *  This is ignored for lexers.
 */
- (id) getCurrentInputSymbol:(id<ANTLRIntStream>)anInput
{
    return nil;
}

/** Conjure up a missing token during error recovery.
 *
 *  The recognizer attempts to recover from single missing
 *  symbols. But, actions might refer to that missing symbol.
 *  For example, x=ID {f($x);}. The action clearly assumes
 *  that there has been an identifier matched previously and that
 *  $x points at that token. If that token is missing, but
 *  the next token in the stream is what we want we assume that
 *  this token is missing and we keep going. Because we
 *  have to return some token to replace the missing token,
 *  we have to conjure one up. This method gives the user control
 *  over the tokens returned for missing tokens. Mostly,
 *  you will want to create something special for identifier
 *  tokens. For literals such as '{' and ',', the default
 *  action in the parser or tree parser works. It simply creates
 *  a CommonToken of the appropriate type. The text will be the token.
 *  If you change what tokens must be created by the lexer,
 *  override this method to create the appropriate tokens.
 */
- (id)getMissingSymbol:(id<ANTLRIntStream>)anInput
             Exception:(ANTLRRecognitionException *)e
             TokenType:(NSInteger)expectedTokenType
                Follow:(ANTLRBitSet *)follow
{
    return nil;
}


-(void) consumeUntilTType:(id<ANTLRIntStream>)anInput TokenType:(NSInteger)tokenType
{
    //System.out.println("consumeUntil "+tokenType);
    int ttype = [anInput LA:1];
    while (ttype != ANTLRTokenTypeEOF && ttype != tokenType) {
        [anInput consume];
        ttype = [anInput LA:1];
    }
}

/** Consume tokens until one matches the given token set */
-(void) consumeUntilFollow:(id<ANTLRIntStream>)anInput Follow:(ANTLRBitSet *)set
{
    //System.out.println("consumeUntil("+set.toString(getTokenNames())+")");
    int ttype = [anInput LA:1];
    while (ttype != ANTLRTokenTypeEOF && ![set member:ttype] ) {
        //System.out.println("consume during recover LA(1)="+getTokenNames()[input.LA(1)]);
        [anInput consume];
        ttype = [anInput LA:1];
    }
}

/** Push a rule's follow set using our own hardcoded stack */
- (void)pushFollow:(ANTLRBitSet *)fset
{
    if ( (state._fsp +1) >= [state.following count] ) {
        //        AMutableArray *f = [AMutableArray arrayWithCapacity:[[state.following] count]*2];
        //        System.arraycopy(state.following, 0, f, 0, state.following.length);
        //        state.following = f;
        [state.following addObject:fset];
        [fset retain];
        state._fsp++;
    }
    else {
        [state.following replaceObjectAtIndex:++state._fsp withObject:fset];
    }
}

- (ANTLRBitSet *)popFollow
{
    ANTLRBitSet *fset;

    if ( state._fsp >= 0 && [state.following count] > 0 ) {
        fset = [state.following objectAtIndex:state._fsp--];
        [state.following removeLastObject];
        return fset;
    }
    else {
        NSLog( @"Attempted to pop a follow when none exists on the stack\n" );
    }
    return nil;
}

/** Return List<String> of the rules in your parser instance
 *  leading up to a call to this method.  You could override if
 *  you want more details such as the file/line info of where
 *  in the parser java code a rule is invoked.
 *
 *  This is very useful for error messages and for context-sensitive
 *  error recovery.
 */
- (AMutableArray *)getRuleInvocationStack
{
    NSString *parserClassName = [[self className] retain];
    return [self getRuleInvocationStack:[ANTLRRecognitionException newException] Recognizer:parserClassName];
}

/** A more general version of getRuleInvocationStack where you can
 *  pass in, for example, a RecognitionException to get it's rule
 *  stack trace.  This routine is shared with all recognizers, hence,
 *  static.
 *
 *  TODO: move to a utility class or something; weird having lexer call this
 */
- (AMutableArray *)getRuleInvocationStack:(ANTLRRecognitionException *)e
                                Recognizer:(NSString *)recognizerClassName
{
    // char *name;
    AMutableArray *rules = [[AMutableArray arrayWithCapacity:20] retain];
    NSArray *stack = [e callStackSymbols];
    int i = 0;
    for (i = [stack count]-1; i >= 0; i--) {
        NSString *t = [stack objectAtIndex:i];
        // NSLog(@"stack %d = %@\n", i, t);
        if ( [t commonPrefixWithString:@"org.antlr.runtime." options:NSLiteralSearch] ) {
            // id aClass = objc_getClass( [t UTF8String] );
            continue; // skip support code such as this method
        }
        if ( [t isEqualTo:NEXT_TOKEN_RULE_NAME] ) {
            // name = sel_getName(method_getName(method));
            // NSString *aMethod = [NSString stringWithFormat:@"%s", name];
            continue;
        }
        if ( ![t isEqualTo:recognizerClassName] ) {
            // name = class_getName( [t UTF8String] );
            continue; // must not be part of this parser
        }
        [rules addObject:t];
    }
#ifdef DONTUSEYET
    StackTraceElement[] stack = e.getStackTrace();
    int i = 0;
    for (i=stack.length-1; i>=0; i--) {
        StackTraceElement t = stack[i];
        if ( [t getClassName().startsWith("org.antlr.runtime.") ) {
            continue; // skip support code such as this method
        }
              if ( [[t getMethodName] equals:NEXT_TOKEN_RULE_NAME] ) {
            continue;
        }
              if ( ![[t getClassName] equals:recognizerClassName] ) {
            continue; // must not be part of this parser
        }
              [rules addObject:[t getMethodName]];
    }
#endif
    [stack release];
    return rules;
}

- (NSInteger) getBacktrackingLevel
{
    return [state getBacktracking];
}
      
- (void) setBacktrackingLevel:(NSInteger)level
{
    [state setBacktracking:level];
}
      
        /** Used to print out token names like ID during debugging and
 *  error reporting.  The generated parsers implement a method
 *  that overrides this to point to their String[] tokenNames.
 */
- (NSArray *)getTokenNames
{
    return tokenNames;
}

/** For debugging and other purposes, might want the grammar name.
 *  Have ANTLR generate an implementation for this method.
 */
- (NSString *)getGrammarFileName
{
    return grammarFileName;
}

- (NSString *)getSourceName
{
    return nil;
}

/** A convenience method for use most often with template rewrites.
 *  Convert a List<Token> to List<String>
 */
- (AMutableArray *)toStrings:(AMutableArray *)tokens
{
    if ( tokens == nil )
        return nil;
    AMutableArray *strings = [AMutableArray arrayWithCapacity:[tokens count]];
    id object;
    NSInteger i = 0;
    for (object in tokens) {
        [strings addObject:[object text]];
        i++;
    }
    return strings;
}

/** Given a rule number and a start token index number, return
 *  ANTLR_MEMO_RULE_UNKNOWN if the rule has not parsed input starting from
 *  start index.  If this rule has parsed input starting from the
 *  start index before, then return where the rule stopped parsing.
 *  It returns the index of the last token matched by the rule.
 *
 *  For now we use a hashtable and just the slow Object-based one.
 *  Later, we can make a special one for ints and also one that
 *  tosses out data after we commit past input position i.
 */
- (NSInteger)getRuleMemoization:(NSInteger)ruleIndex StartIndex:(NSInteger)ruleStartIndex
{
    NSNumber *stopIndexI;
    ANTLRHashRule *aHashRule;
    if ( (aHashRule = [state.ruleMemo objectAtIndex:ruleIndex]) == nil ) {
        aHashRule = [ANTLRHashRule newANTLRHashRuleWithLen:17];
        [state.ruleMemo insertObject:aHashRule atIndex:ruleIndex];
    }
    stopIndexI = [aHashRule getRuleMemoStopIndex:ruleStartIndex];
    if ( stopIndexI == nil ) {
        return ANTLR_MEMO_RULE_UNKNOWN;
    }
    return [stopIndexI integerValue];
}

/** Has this rule already parsed input at the current index in the
 *  input stream?  Return the stop token index or MEMO_RULE_UNKNOWN.
 *  If we attempted but failed to parse properly before, return
 *  MEMO_RULE_FAILED.
 *
 *  This method has a side-effect: if we have seen this input for
 *  this rule and successfully parsed before, then seek ahead to
 *  1 past the stop token matched for this rule last time.
 */
- (BOOL)alreadyParsedRule:(id<ANTLRIntStream>)anInput RuleIndex:(NSInteger)ruleIndex
{
    NSInteger aStopIndex = [self getRuleMemoization:ruleIndex StartIndex:anInput.index];
    if ( aStopIndex == ANTLR_MEMO_RULE_UNKNOWN ) {
        // NSLog(@"rule %d not yet encountered\n", ruleIndex);
        return NO;
    }
    if ( aStopIndex == ANTLR_MEMO_RULE_FAILED ) {
        if (debug) NSLog(@"rule %d will never succeed\n", ruleIndex);
        state.failed = YES;
    }
    else {
        if (debug) NSLog(@"seen rule %d before; skipping ahead to %d failed = %@\n", ruleIndex, aStopIndex+1, state.failed?@"YES":@"NO");
        [anInput seek:(aStopIndex+1)]; // jump to one past stop token
    }
    return YES;
}
      
/** Record whether or not this rule parsed the input at this position
 *  successfully.  Use a standard java hashtable for now.
 */
- (void)memoize:(id<ANTLRIntStream>)anInput
      RuleIndex:(NSInteger)ruleIndex
     StartIndex:(NSInteger)ruleStartIndex
{
    ANTLRRuleStack *aRuleStack;
    NSInteger stopTokenIndex;

    aRuleStack = state.ruleMemo;
    stopTokenIndex = (state.failed ? ANTLR_MEMO_RULE_FAILED : (anInput.index-1));
    if ( aRuleStack == nil ) {
        if (debug) NSLog(@"!!!!!!!!! memo array is nil for %@", [self getGrammarFileName]);
        return;
    }
    if ( ruleIndex >= [aRuleStack length] ) {
        if (debug) NSLog(@"!!!!!!!!! memo size is %d, but rule index is %d", [state.ruleMemo length], ruleIndex);
        return;
    }
    if ( [aRuleStack objectAtIndex:ruleIndex] != nil ) {
        [aRuleStack putHashRuleAtRuleIndex:ruleIndex StartIndex:ruleStartIndex StopIndex:stopTokenIndex];
    }
    return;
}
   
/** return how many rule/input-index pairs there are in total.
 *  TODO: this includes synpreds. :(
 */
- (NSInteger)getRuleMemoizationCacheSize
{
    ANTLRRuleStack *aRuleStack;
    ANTLRHashRule *aHashRule;

    int aCnt = 0;
    aRuleStack = state.ruleMemo;
    for (NSUInteger i = 0; aRuleStack != nil && i < [aRuleStack length]; i++) {
        aHashRule = [aRuleStack objectAtIndex:i];
        if ( aHashRule != nil ) {
            aCnt += [aHashRule count]; // how many input indexes are recorded?
        }
    }
    return aCnt;
}

#pragma warning Have to fix traceIn and traceOut.
- (void)traceIn:(NSString *)ruleName Index:(NSInteger)ruleIndex Object:(id)inputSymbol
{
    NSLog(@"enter %@ %@", ruleName, inputSymbol);
    if ( state.backtracking > 0 ) {
        NSLog(@" backtracking=%s", ((state.backtracking==YES)?"YES":"NO"));
    }
    NSLog(@"\n");
}

- (void)traceOut:(NSString *)ruleName Index:(NSInteger)ruleIndex Object:(id)inputSymbol
{
    NSLog(@"exit %@ -- %@", ruleName, inputSymbol);
    if ( state.backtracking > 0 ) {
        NSLog(@" backtracking=%s %s", state.backtracking?"YES":"NO", state.failed ? "failed":"succeeded");
    }
    NSLog(@"\n");
}


// call a syntactic predicate methods using its selector. this way we can support arbitrary synpreds.
- (BOOL) evaluateSyntacticPredicate:(SEL)synpredFragment // stream:(id<ANTLRIntStream>)input
{
    id<ANTLRIntStream> input;

    state.backtracking++;
    // input = state.token.input;
    input = self.input;
    int start = [input mark];
    @try {
        [self performSelector:synpredFragment];
    }
    @catch (ANTLRRecognitionException *re) {
        NSLog(@"impossible synpred: %@", re.name);
    }
    BOOL success = (state.failed == NO);
    [input rewind:start];
    state.backtracking--;
    state.failed = NO;
    return success;
}
              
@end
Alerts (7)

Complexity hotspot; lines 99 to 100 (total complexity: 4)
99 100
Complexity hotspot; lines 120 to 121 (total complexity: 4)
120 121
Complexity hotspot; lines 462 to 463 (total complexity: 4)
462 463
Complexity hotspot; line 824 (total complexity: 4)
824