LexerCore.java - This Java code is a parser for a string in…

/servers/jain-sip-ext/src/main/java/gov/nist/javax/sip/parser/chars/LexerCore.java

http://mobicents.googlecode.com/ · Java · 800 lines · 552 code · 57 blank · 191 comment · 127 complexity · e3b1cf446ca84986c8ee93348f74735c MD5 · raw file

/*
 * JBoss, Home of Professional Open Source
 * Copyright 2011, Red Hat, Inc. and individual contributors
 * by the @authors tag. See the copyright.txt in the distribution for a
 * full listing of individual contributors.
 *
 * This is free software; you can redistribute it and/or modify it
 * under the terms of the GNU Lesser General Public License as
 * published by the Free Software Foundation; either version 2.1 of
 * the License, or (at your option) any later version.
 *
 * This software is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with this software; if not, write to the Free
 * Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
 * 02110-1301 USA, or see the FSF site: http://www.fsf.org.
 */

/*
* Conditions Of Use
*
* This software was developed by employees of the National Institute of
* Standards and Technology (NIST), an agency of the Federal Government.
* Pursuant to title 15 Untied States Code Section 105, works of NIST
* employees are not subject to copyright protection in the United States
* and are considered to be in the public domain.  As a result, a formal
* license is not needed to use the software.
*
* This software is provided by NIST as a service and is expressly
* provided "AS IS."  NIST MAKES NO WARRANTY OF ANY KIND, EXPRESS, IMPLIED
* OR STATUTORY, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTY OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NON-INFRINGEMENT
* AND DATA ACCURACY.  NIST does not warrant or make any representations
* regarding the use of the software or the results thereof, including but
* not limited to the correctness, accuracy, reliability or usefulness of
* the software.
*
* Permission to use this software is contingent upon your acceptance
* of the terms of this agreement
*
* .
*
*/
package gov.nist.javax.sip.parser.chars;

import gov.nist.core.Debug;

import java.text.ParseException;
import java.util.Map;
import java.util.concurrent.ConcurrentHashMap;

/** A lexical analyzer that is used by all parsers in our implementation.
 *
 *@version 1.2
 *@since 1.1
 *
 *@author M. Ranganathan
 */
public class LexerCore extends StringTokenizer {

    // IMPORTANT - All keyword matches should be between START and END
    public static final int START = 2048;
    public static final int END = START + 2048;
    // IMPORTANT -- This should be < END
    public static final int ID = END - 1;
    public static final int SAFE = END - 2;
    // Individial token classes.
    public static final int WHITESPACE = END + 1;
    public static final int DIGIT = END + 2;
    public static final int ALPHA = END + 3;
    public static final int BACKSLASH = (int) '\\';
    public static final int QUOTE = (int) '\'';
    public static final int AT = (int) '@';
    public static final int SP = (int) ' ';
    public static final int HT = (int) '\t';
    public static final int COLON = (int) ':';
    public static final int STAR = (int) '*';
    public static final int DOLLAR = (int) '$';
    public static final int PLUS = (int) '+';
    public static final int POUND = (int) '#';
    public static final int MINUS = (int) '-';
    public static final int DOUBLEQUOTE = (int) '\"';
    public static final int TILDE = (int) '~';
    public static final int BACK_QUOTE = (int) '`';
    public static final int NULL = (int) '\0';
    public static final int EQUALS = (int) '=';
    public static final int SEMICOLON = (int) ';';
    public static final int SLASH = (int) '/';
    public static final int L_SQUARE_BRACKET = (int) '[';
    public static final int R_SQUARE_BRACKET = (int) ']';
    public static final int R_CURLY = (int) '}';
    public static final int L_CURLY = (int) '{';
    public static final int HAT = (int) '^';
    public static final int BAR = (int) '|';
    public static final int DOT = (int) '.';
    public static final int EXCLAMATION = (int) '!';
    public static final int LPAREN = (int) '(';
    public static final int RPAREN = (int) ')';
    public static final int GREATER_THAN = (int) '>';
    public static final int LESS_THAN = (int) '<';
    public static final int PERCENT = (int) '%';
    public static final int QUESTION = (int) '?';
    public static final int AND = (int) '&';
    public static final int UNDERSCORE = (int) '_';

    // jeand : using concurrent data structure to avoid excessive blocking
    protected static final ConcurrentHashMap<Integer, String> globalSymbolTable;
    protected static final ConcurrentHashMap<String, ConcurrentHashMap<String, Integer>> lexerTables;
    protected Map<String, Integer> currentLexer;
    protected String currentLexerName;
    protected Token currentMatch;

    static {
        globalSymbolTable = new ConcurrentHashMap<Integer, String>();        
        lexerTables = new ConcurrentHashMap<String, ConcurrentHashMap<String, Integer>>();
    }

    protected void addKeyword(String name, int value) {
        // System.out.println("addKeyword " + name + " value = " + value);
        // new Exception().printStackTrace();
        Integer val = Integer.valueOf(value);
        currentLexer.put(name, val);
//        if (!globalSymbolTable.containsKey(val))
        globalSymbolTable.putIfAbsent(val, name);
    }

    public String lookupToken(int value) {
        if (value > START) {
            return (String) globalSymbolTable.get(Integer.valueOf(value));
        } else {
            Character ch = Character.valueOf((char) value);
            return ch.toString();
        }
    }

//    protected Map<String, Integer> addLexer(String lexerName) {
//        currentLexer = (Map<String, Integer>) lexerTables.get(lexerName);
//        if (currentLexer == null) {
//            currentLexer = new Hashtable();
//            lexerTables.put(lexerName, currentLexer);
//        }
//        return currentLexer;
//    }

    //public abstract void selectLexer(String lexerName);

    public void selectLexer(String lexerName) {
        this.currentLexerName = lexerName;
    }

    protected LexerCore() {
        this.currentLexer = new ConcurrentHashMap<String, Integer>();
        this.currentLexerName = "charLexer";
    }

    /** Initialize the lexer with a buffer.
     */
    public LexerCore(String lexerName, char[] buffer) {
        super(buffer);
        this.currentLexerName = lexerName;
    }

    /** Peek the next id but dont move the buffer pointer forward.
     */

    public char[] peekNextId() {
        int oldPtr = ptr;
        char[] retval = ttoken();
        savedPtr = ptr;
        ptr = oldPtr;
        return retval;
    }

    /** Get the next id.
     */
    public char[] getNextId() {
        return ttoken();
    }

    // call this after you call match
    public Token getNextToken() {
        return this.currentMatch;

    }

    /** Look ahead for one token.
     */
    public Token peekNextToken() throws ParseException {
        return (Token) peekNextToken(1)[0];
    }

    public Token[] peekNextToken(int ntokens) throws ParseException {
        int old = ptr;
        Token[] retval = new Token[ntokens];
        for (int i = 0; i < ntokens; i++) {
            Token tok = new Token();
            if (startsId()) {
                char[] id = ttoken();
                tok.tokenValue = id;
                String idUppercase = String.valueOf(id).toUpperCase().intern();
                if (currentLexer.containsKey(idUppercase)) {
                    Integer type = (Integer) currentLexer.get(idUppercase);
                    tok.tokenType = type.intValue();
                } else
                    tok.tokenType = ID;
            } else {
                char nextChar = getNextChar();
                tok.tokenValue = new char[] {nextChar};
                if (isAlpha(nextChar)) {
                    tok.tokenType = ALPHA;
                } else if (isDigit(nextChar)) {
                    tok.tokenType = DIGIT;
                } else
                    tok.tokenType = (int) nextChar;
            }
            retval[i] = tok;
        }
        savedPtr = ptr;
        ptr = old;
        return retval;
    }

    /** Match the given token or throw an exception if no such token
     * can be matched.
     */
    public Token match(int tok) throws ParseException {
        if (Debug.parserDebug) {
            Debug.println("match " + tok);
        }
        if (tok > START && tok < END) {
            if (tok == ID) {
                // Generic ID sought.
                if (!startsId())
                    throw new ParseException(buffer + "\nID expected", ptr);
                char[] id = getNextId();
                this.currentMatch = new Token();
                this.currentMatch.tokenValue = id;
                this.currentMatch.tokenType = ID;
            } else if (tok == SAFE) {
                if (!startsSafeToken())
                    throw new ParseException(buffer + "\nID expected", ptr);
                char[] id = ttokenSafe();
                this.currentMatch = new Token();
                this.currentMatch.tokenValue = id;
                this.currentMatch.tokenType = SAFE;
            } else {
            	char[] nexttok = getNextId();
                Integer cur = (Integer) currentLexer.get(String.valueOf(nexttok).toUpperCase().intern());

                if (cur == null || cur.intValue() != tok)
                    throw new ParseException(
                        buffer + "\nUnexpected Token : " + nexttok,
                        ptr);
                this.currentMatch = new Token();
                this.currentMatch.tokenValue = nexttok;
                this.currentMatch.tokenType = tok;
            }
        } else if (tok > END) {
            // Character classes.
            char next = lookAhead(0);
            if (tok == DIGIT) {
                if (!isDigit(next))
                    throw new ParseException(buffer + "\nExpecting DIGIT", ptr);
                this.currentMatch = new Token();
                this.currentMatch.tokenValue = new char[] {next};
                this.currentMatch.tokenType = tok;
                consume(1);

            } else if (tok == ALPHA) {
                if (!isAlpha(next))
                    throw new ParseException(buffer + "\nExpecting ALPHA", ptr);
                this.currentMatch = new Token();
                this.currentMatch.tokenValue =
                	new char[] {next};
                this.currentMatch.tokenType = tok;
                consume(1);

            }

        } else {
            // This is a direct character spec.
            char ch = (char) tok;
            char next = lookAhead(0);
            if (next == ch) {
                /*this.currentMatch = new Token();
                this.currentMatch.tokenValue =
                    String.valueOf(ch);
                this.currentMatch.tokenType = tok;*/
                consume(1);
            } else
                throw new ParseException(
                    buffer + "\nExpecting  >>>" + ch + "<<< got >>>"
                    + next + "<<<", ptr);
        }
        return this.currentMatch;
    }

    public void SPorHT() {
        try {
            char c = lookAhead(0);
            while (c == ' ' || c == '\t') {
                consume(1);
                c = lookAhead(0);
            }
        } catch (ParseException ex) {
            // Ignore
        }
    }

    /**
     * JvB: utility function added to validate tokens
     *
     * @see RFC3261 section 25.1:
     * token       =  1*(alphanum / "-" / "." / "!" / "%" / "*"
                     / "_" / "+" / "`" / "'" / "~" )

     * @param c - character to check
     * @return true iff character c is a valid token character as per RFC3261
     */
    public static final boolean isTokenChar( char c ) {
        if ( isAlphaDigit(c) ) return true;
        else switch (c)
        {
            case '-':
            case '.':
            case '!':
            case '%':
            case '*':
            case '_':
            case '+':
            case '`':
            case '\'':
            case '~':
                return true;
            default:
                return false;
        }
    }


    public boolean startsId() {
        try {
            char nextChar = lookAhead(0);
            return isTokenChar(nextChar);
        } catch (ParseException ex) {
            return false;
        }
    }

    public boolean startsSafeToken() {
        try {
            char nextChar = lookAhead(0);
            if (isAlphaDigit(nextChar)) {
                return true;
            }
            else {
                switch (nextChar) {
                    case '_':
                    case '+':
                    case '-':
                    case '!':
                    case '`':
                    case '\'':
                    case '.':
                    case '/':
                    case '}':
                    case '{':
                    case ']':
                    case '[':
                    case '^':
                    case '|':
                    case '~':
                    case '%': // bug fix by Bruno Konik, JvB copied here
                    case '#':
                    case '@':
                    case '$':
                    case ':':
                    case ';':
                    case '?':
                    case '\"':
                    case '*':
                    case '=': // Issue 155 on java.net
                        return true;
                    default:
                        return false;
                }
            }
        } catch (ParseException ex) {
            return false;
        }
    }

    public char[] ttoken() {
        int startIdx = ptr;        
        try {
            while (hasMoreChars()) {
                char nextChar = lookAhead(0);
                if ( isTokenChar(nextChar) ) {
                    consume(1);                    
                } else {
                    break;
                }
            }
            char[] retval = new char[ptr - startIdx];
            System.arraycopy(buffer, startIdx, retval, 0, ptr - startIdx);
//            return String.valueOf(buffer, startIdx, ptr - startIdx);
            return retval;
        } catch (ParseException ex) {
            return null;
        }
    }

    /* JvB: unreferenced
    public String ttokenAllowSpace() {
        int startIdx = ptr;
        try {
            while (hasMoreChars()) {
                char nextChar = lookAhead(0);
                if (isAlphaDigit(nextChar)) {
                    consume(1);
                }
                else {
                    boolean isValidChar = false;
                    switch (nextChar) {
                        case '_':
                        case '+':
                        case '-':
                        case '!':
                        case '`':
                        case '\'':
                        case '~':
                        case '%': // bug fix by Bruno Konik, JvB copied here
                        case '.':
                        case ' ':
                        case '\t':
                        case '*':
                            isValidChar = true;
                    }
                    if (isValidChar) {
                        consume(1);
                    }
                    else {
                        break;
                    }
                }

            }
            return buffer.substring(startIdx, ptr);
        } catch (ParseException ex) {
            return null;
        }
    }*/

    public char[] ttokenSafe() {
        int startIdx = ptr;
        try {
            while (hasMoreChars()) {
                char nextChar = lookAhead(0);
                if (isAlphaDigit(nextChar)) {
                    consume(1);
                }
                else {
                    boolean isValidChar = false;
                    switch (nextChar) {
                        case '_':
                        case '+':
                        case '-':
                        case '!':
                        case '`':
                        case '\'':
                        case '.':
                        case '/':
                        case '}':
                        case '{':
                        case ']':
                        case '[':
                        case '^':
                        case '|':
                        case '~':
                        case '%': // bug fix by Bruno Konik, JvB copied here
                        case '#':
                        case '@':
                        case '$':
                        case ':':
                        case ';':
                        case '?':
                        case '\"':
                        case '*':
                            isValidChar = true;
                    }
                    if (isValidChar) {
                        consume(1);
                    }
                    else {
                        break;
                    }
                }
            }
            char[] retval = new char[ptr - startIdx];
            System.arraycopy(buffer, startIdx, retval, 0, ptr - startIdx);
//          	return String.valueOf(buffer, startIdx, ptr - startIdx);
            return retval;
            
        } catch (ParseException ex) {
            return null;
        }
    }

    public static final char ALPHA_VALID_CHARS = Character.MAX_VALUE;
    public static final char DIGIT_VALID_CHARS = Character.MAX_VALUE - 1;
    public static final char ALPHADIGIT_VALID_CHARS = Character.MAX_VALUE - 2;
    
    public void consumeValidChars(char[] validChars) {
        int validCharsLength = validChars.length;
        try {
            while (hasMoreChars()) {
                char nextChar = lookAhead(0);
                boolean isValid = false;
                for (int i = 0; i < validCharsLength; i++) {
                    char validChar = validChars[i];
                    switch(validChar) {
                        case ALPHA_VALID_CHARS:
                            isValid = isAlpha(nextChar);
                            break;
                        case DIGIT_VALID_CHARS:
                            isValid = isDigit(nextChar);
                            break;
                        case ALPHADIGIT_VALID_CHARS:
                            isValid = isAlphaDigit(nextChar);
                            break;
                        default:
                            isValid = nextChar == validChar;
                    }
                    if (isValid) {
                        break;
                    }
                }
                if (isValid) {
                    consume(1);
                }
                else {
                    break;
                }
            }
        } catch (ParseException ex) {

        }
    }

    /** Parse a comment string cursor is at a ". Leave cursor at closing "
    *@return the substring containing the quoted string excluding the
    * closing quote.
    */
    public String quotedString() throws ParseException {
        int startIdx = ptr + 1;        
        if (lookAhead(0) != '\"')
            return null;        
        consume(1);
        while (true) {
            char next = getNextChar();
            if (next == '\"') {
                // Got to the terminating quote.
                break;
            } else if (next == '\0') {
                throw new ParseException(
                    String.valueOf(this.buffer) + " :unexpected EOL",
                    this.ptr);
            } else if (next == '\\') {
                consume(1);
            }
        }
        return String.valueOf(buffer, startIdx, ptr - startIdx - 1);
    }

    /** Parse a comment string cursor is at a "(". Leave cursor at )
    *@return the substring containing the comment excluding the
    * closing brace.
    */
    public String comment() throws ParseException {
        StringBuilder retval = new StringBuilder();
        if (lookAhead(0) != '(')
            return null;
        consume(1);
        while (true) {
            char next = getNextChar();
            if (next == ')') {
                break;
            } else if (next == '\0') {
                throw new ParseException(
                    this.buffer + " :unexpected EOL",
                    this.ptr);
            } else if (next == '\\') {
                retval.append(next);
                next = getNextChar();
                if (next == '\0')
                    throw new ParseException(
                        this.buffer + " : unexpected EOL",
                        this.ptr);
                retval.append(next);
            } else {
                retval.append(next);
            }
        }
        return retval.toString();
    }

    /** Return a substring containing no semicolons.
    *@return a substring containing no semicolons.
    */
    public String byteStringNoSemicolon() {
        StringBuilder retval = new StringBuilder();
        try {
            while (true) {
                char next = lookAhead(0);
                // bug fix from Ben Evans.
                if (next == '\0' || next == '\n' || next == ';' || next == ',' ) {
                    break;
                } else {
                    consume(1);
                    retval.append(next);
                }
            }
        } catch (ParseException ex) {
            return retval.toString();
        }
        return retval.toString();
    }

    /**
     * Scan until you see a slash or an EOL.
     *
     * @return substring containing no slash.
     */
    public String byteStringNoSlash() {
        StringBuilder retval = new StringBuilder();
        try {
            while (true) {
                char next = lookAhead(0);
                // bug fix from Ben Evans.
                if (next == '\0' || next == '\n' || next == '/'  ) {
                    break;
                } else {
                    consume(1);
                    retval.append(next);
                }
            }
        } catch (ParseException ex) {
            return retval.toString();
        }
        return retval.toString();
    }

    /** Return a substring containing no commas
    *@return a substring containing no commas.
    */

    public String byteStringNoComma() {
        StringBuilder retval = new StringBuilder();
        try {
            while (true) {
                char next = lookAhead(0);
                if (next == '\n' || next == ',') {
                    break;
                } else {
                    consume(1);
                    retval.append(next);
                }
            }
        } catch (ParseException ex) {
        }
        return retval.toString();
    }

    public static String charAsString(char ch) {
        return String.valueOf(ch);
    }

    /** Lookahead in the inputBuffer for n chars and return as a string.
     * Do not consume the input.
     */
    public String charAsString(int nchars) {
        return String.valueOf(buffer, ptr, nchars -1);
    }

    /** Get and consume the next number.
     *@return a substring corresponding to a number
     *(i.e. sequence of digits).
     */
    public String number() throws ParseException {

        int startIdx = ptr;
        try {
            if (!isDigit(lookAhead(0))) {
                throw new ParseException(
                    buffer + ": Unexpected token at " + lookAhead(0),
                    ptr);
            }
            consume(1);
            while (true) {
                char next = lookAhead(0);
                if (isDigit(next)) {
                    consume(1);
                } else
                    break;
            }
            return String.valueOf(buffer, startIdx, ptr - startIdx);
        } catch (ParseException ex) {
            return String.valueOf(buffer, startIdx, ptr - startIdx);
        }
    }

    /** Mark the position for backtracking.
     *@return the current location of the pointer.
     */
    public int markInputPosition() {
        return ptr;
    }

    /** Rewind the input ptr to the marked position.
     *@param position - the position to rewind the parser to.
     */
    public void rewindInputPosition(int position) {
        this.ptr = position;
    }

    /** Get the rest of the String
     * @return rest of the buffer.
     */
    public String getRest() {
        if (ptr >= bufferLen)
            return null;
        else
            return String.valueOf(buffer, ptr, bufferLen - ptr - 1);
    }

    /** Get the sub-String until the character is encountered
     * @param c the character to match
     * @return the substring that matches.
     */
    public String getString(char c) throws ParseException {
        StringBuilder retval = new StringBuilder();
        while (true) {
            char next = lookAhead(0);
            //System.out.println(" next = [" + next + ']' + "ptr = " + ptr);
            //System.out.println(next == '\0');

            if (next == '\0') {
                throw new ParseException(
                    this.buffer + "unexpected EOL",
                    this.ptr);
            } else if (next == c) {
                consume(1);
                break;
            } else if (next == '\\') {
                consume(1);
                char nextchar = lookAhead(0);
                if (nextchar == '\0') {
                    throw new ParseException(
                        this.buffer + "unexpected EOL",
                        this.ptr);
                } else {
                    consume(1);
                    retval.append(nextchar);
                }
            } else {
                consume(1);
                retval.append(next);
            }
        }
        return retval.toString();
    }

    /** Get the read pointer.
     */
    public int getPtr() {
        return this.ptr;
    }

    /** Get the buffer.
     */
    public String getBuffer() {
        return String.valueOf(buffer);
    }
    
    public char[] getSubBuffer(int start, int end) {
    	char[] retval = new char[end - start];
    	System.arraycopy(buffer, start, retval, 0, end - start);
        return retval;
    }

    /** Create a parse exception.
     */
    public ParseException createParseException() {
        return new ParseException(getBuffer(), this.ptr);
    }
}
Summary ✨

This Java code is a parser for a string input buffer, designed to parse and extract various substrings from the input. It provides methods for getting specific characters, numbers, strings, and other substrings, as well as functions for rewinding the input pointer, creating parse exceptions, and getting the current read pointer and buffer contents.
Tech Fingerprint

Alerts (6)

'=' Maintainability Info: Avoid using unnamed 'magic' numbers directly in comparisons or assignments. Use named constants (static final variables) instead to improve readability and maintainability.
66 67 70 73 74
Complexity hotspot; line 620 (total complexity: 8)
620