/servers/jain-sip-ext/src/main/java/gov/nist/javax/sip/parser/chars/LexerCore.java
Java | 800 lines | 552 code | 57 blank | 191 comment | 127 complexity | e3b1cf446ca84986c8ee93348f74735c MD5 | raw file
Possible License(s): LGPL-3.0, GPL-3.0, LGPL-2.1, GPL-2.0, CC-BY-SA-3.0, CC0-1.0, Apache-2.0, BSD-3-Clause
- /*
- * JBoss, Home of Professional Open Source
- * Copyright 2011, Red Hat, Inc. and individual contributors
- * by the @authors tag. See the copyright.txt in the distribution for a
- * full listing of individual contributors.
- *
- * This is free software; you can redistribute it and/or modify it
- * under the terms of the GNU Lesser General Public License as
- * published by the Free Software Foundation; either version 2.1 of
- * the License, or (at your option) any later version.
- *
- * This software is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this software; if not, write to the Free
- * Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
- * 02110-1301 USA, or see the FSF site: http://www.fsf.org.
- */
- /*
- * Conditions Of Use
- *
- * This software was developed by employees of the National Institute of
- * Standards and Technology (NIST), an agency of the Federal Government.
- * Pursuant to title 15 Untied States Code Section 105, works of NIST
- * employees are not subject to copyright protection in the United States
- * and are considered to be in the public domain. As a result, a formal
- * license is not needed to use the software.
- *
- * This software is provided by NIST as a service and is expressly
- * provided "AS IS." NIST MAKES NO WARRANTY OF ANY KIND, EXPRESS, IMPLIED
- * OR STATUTORY, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTY OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, NON-INFRINGEMENT
- * AND DATA ACCURACY. NIST does not warrant or make any representations
- * regarding the use of the software or the results thereof, including but
- * not limited to the correctness, accuracy, reliability or usefulness of
- * the software.
- *
- * Permission to use this software is contingent upon your acceptance
- * of the terms of this agreement
- *
- * .
- *
- */
- package gov.nist.javax.sip.parser.chars;
- import gov.nist.core.Debug;
- import java.text.ParseException;
- import java.util.Map;
- import java.util.concurrent.ConcurrentHashMap;
- /** A lexical analyzer that is used by all parsers in our implementation.
- *
- *@version 1.2
- *@since 1.1
- *
- *@author M. Ranganathan
- */
- public class LexerCore extends StringTokenizer {
- // IMPORTANT - All keyword matches should be between START and END
- public static final int START = 2048;
- public static final int END = START + 2048;
- // IMPORTANT -- This should be < END
- public static final int ID = END - 1;
- public static final int SAFE = END - 2;
- // Individial token classes.
- public static final int WHITESPACE = END + 1;
- public static final int DIGIT = END + 2;
- public static final int ALPHA = END + 3;
- public static final int BACKSLASH = (int) '\\';
- public static final int QUOTE = (int) '\'';
- public static final int AT = (int) '@';
- public static final int SP = (int) ' ';
- public static final int HT = (int) '\t';
- public static final int COLON = (int) ':';
- public static final int STAR = (int) '*';
- public static final int DOLLAR = (int) '$';
- public static final int PLUS = (int) '+';
- public static final int POUND = (int) '#';
- public static final int MINUS = (int) '-';
- public static final int DOUBLEQUOTE = (int) '\"';
- public static final int TILDE = (int) '~';
- public static final int BACK_QUOTE = (int) '`';
- public static final int NULL = (int) '\0';
- public static final int EQUALS = (int) '=';
- public static final int SEMICOLON = (int) ';';
- public static final int SLASH = (int) '/';
- public static final int L_SQUARE_BRACKET = (int) '[';
- public static final int R_SQUARE_BRACKET = (int) ']';
- public static final int R_CURLY = (int) '}';
- public static final int L_CURLY = (int) '{';
- public static final int HAT = (int) '^';
- public static final int BAR = (int) '|';
- public static final int DOT = (int) '.';
- public static final int EXCLAMATION = (int) '!';
- public static final int LPAREN = (int) '(';
- public static final int RPAREN = (int) ')';
- public static final int GREATER_THAN = (int) '>';
- public static final int LESS_THAN = (int) '<';
- public static final int PERCENT = (int) '%';
- public static final int QUESTION = (int) '?';
- public static final int AND = (int) '&';
- public static final int UNDERSCORE = (int) '_';
- // jeand : using concurrent data structure to avoid excessive blocking
- protected static final ConcurrentHashMap<Integer, String> globalSymbolTable;
- protected static final ConcurrentHashMap<String, ConcurrentHashMap<String, Integer>> lexerTables;
- protected Map<String, Integer> currentLexer;
- protected String currentLexerName;
- protected Token currentMatch;
- static {
- globalSymbolTable = new ConcurrentHashMap<Integer, String>();
- lexerTables = new ConcurrentHashMap<String, ConcurrentHashMap<String, Integer>>();
- }
- protected void addKeyword(String name, int value) {
- // System.out.println("addKeyword " + name + " value = " + value);
- // new Exception().printStackTrace();
- Integer val = Integer.valueOf(value);
- currentLexer.put(name, val);
- // if (!globalSymbolTable.containsKey(val))
- globalSymbolTable.putIfAbsent(val, name);
- }
- public String lookupToken(int value) {
- if (value > START) {
- return (String) globalSymbolTable.get(Integer.valueOf(value));
- } else {
- Character ch = Character.valueOf((char) value);
- return ch.toString();
- }
- }
- // protected Map<String, Integer> addLexer(String lexerName) {
- // currentLexer = (Map<String, Integer>) lexerTables.get(lexerName);
- // if (currentLexer == null) {
- // currentLexer = new Hashtable();
- // lexerTables.put(lexerName, currentLexer);
- // }
- // return currentLexer;
- // }
- //public abstract void selectLexer(String lexerName);
- public void selectLexer(String lexerName) {
- this.currentLexerName = lexerName;
- }
- protected LexerCore() {
- this.currentLexer = new ConcurrentHashMap<String, Integer>();
- this.currentLexerName = "charLexer";
- }
- /** Initialize the lexer with a buffer.
- */
- public LexerCore(String lexerName, char[] buffer) {
- super(buffer);
- this.currentLexerName = lexerName;
- }
- /** Peek the next id but dont move the buffer pointer forward.
- */
- public char[] peekNextId() {
- int oldPtr = ptr;
- char[] retval = ttoken();
- savedPtr = ptr;
- ptr = oldPtr;
- return retval;
- }
- /** Get the next id.
- */
- public char[] getNextId() {
- return ttoken();
- }
- // call this after you call match
- public Token getNextToken() {
- return this.currentMatch;
- }
- /** Look ahead for one token.
- */
- public Token peekNextToken() throws ParseException {
- return (Token) peekNextToken(1)[0];
- }
- public Token[] peekNextToken(int ntokens) throws ParseException {
- int old = ptr;
- Token[] retval = new Token[ntokens];
- for (int i = 0; i < ntokens; i++) {
- Token tok = new Token();
- if (startsId()) {
- char[] id = ttoken();
- tok.tokenValue = id;
- String idUppercase = String.valueOf(id).toUpperCase().intern();
- if (currentLexer.containsKey(idUppercase)) {
- Integer type = (Integer) currentLexer.get(idUppercase);
- tok.tokenType = type.intValue();
- } else
- tok.tokenType = ID;
- } else {
- char nextChar = getNextChar();
- tok.tokenValue = new char[] {nextChar};
- if (isAlpha(nextChar)) {
- tok.tokenType = ALPHA;
- } else if (isDigit(nextChar)) {
- tok.tokenType = DIGIT;
- } else
- tok.tokenType = (int) nextChar;
- }
- retval[i] = tok;
- }
- savedPtr = ptr;
- ptr = old;
- return retval;
- }
- /** Match the given token or throw an exception if no such token
- * can be matched.
- */
- public Token match(int tok) throws ParseException {
- if (Debug.parserDebug) {
- Debug.println("match " + tok);
- }
- if (tok > START && tok < END) {
- if (tok == ID) {
- // Generic ID sought.
- if (!startsId())
- throw new ParseException(buffer + "\nID expected", ptr);
- char[] id = getNextId();
- this.currentMatch = new Token();
- this.currentMatch.tokenValue = id;
- this.currentMatch.tokenType = ID;
- } else if (tok == SAFE) {
- if (!startsSafeToken())
- throw new ParseException(buffer + "\nID expected", ptr);
- char[] id = ttokenSafe();
- this.currentMatch = new Token();
- this.currentMatch.tokenValue = id;
- this.currentMatch.tokenType = SAFE;
- } else {
- char[] nexttok = getNextId();
- Integer cur = (Integer) currentLexer.get(String.valueOf(nexttok).toUpperCase().intern());
- if (cur == null || cur.intValue() != tok)
- throw new ParseException(
- buffer + "\nUnexpected Token : " + nexttok,
- ptr);
- this.currentMatch = new Token();
- this.currentMatch.tokenValue = nexttok;
- this.currentMatch.tokenType = tok;
- }
- } else if (tok > END) {
- // Character classes.
- char next = lookAhead(0);
- if (tok == DIGIT) {
- if (!isDigit(next))
- throw new ParseException(buffer + "\nExpecting DIGIT", ptr);
- this.currentMatch = new Token();
- this.currentMatch.tokenValue = new char[] {next};
- this.currentMatch.tokenType = tok;
- consume(1);
- } else if (tok == ALPHA) {
- if (!isAlpha(next))
- throw new ParseException(buffer + "\nExpecting ALPHA", ptr);
- this.currentMatch = new Token();
- this.currentMatch.tokenValue =
- new char[] {next};
- this.currentMatch.tokenType = tok;
- consume(1);
- }
- } else {
- // This is a direct character spec.
- char ch = (char) tok;
- char next = lookAhead(0);
- if (next == ch) {
- /*this.currentMatch = new Token();
- this.currentMatch.tokenValue =
- String.valueOf(ch);
- this.currentMatch.tokenType = tok;*/
- consume(1);
- } else
- throw new ParseException(
- buffer + "\nExpecting >>>" + ch + "<<< got >>>"
- + next + "<<<", ptr);
- }
- return this.currentMatch;
- }
- public void SPorHT() {
- try {
- char c = lookAhead(0);
- while (c == ' ' || c == '\t') {
- consume(1);
- c = lookAhead(0);
- }
- } catch (ParseException ex) {
- // Ignore
- }
- }
- /**
- * JvB: utility function added to validate tokens
- *
- * @see RFC3261 section 25.1:
- * token = 1*(alphanum / "-" / "." / "!" / "%" / "*"
- / "_" / "+" / "`" / "'" / "~" )
- * @param c - character to check
- * @return true iff character c is a valid token character as per RFC3261
- */
- public static final boolean isTokenChar( char c ) {
- if ( isAlphaDigit(c) ) return true;
- else switch (c)
- {
- case '-':
- case '.':
- case '!':
- case '%':
- case '*':
- case '_':
- case '+':
- case '`':
- case '\'':
- case '~':
- return true;
- default:
- return false;
- }
- }
- public boolean startsId() {
- try {
- char nextChar = lookAhead(0);
- return isTokenChar(nextChar);
- } catch (ParseException ex) {
- return false;
- }
- }
- public boolean startsSafeToken() {
- try {
- char nextChar = lookAhead(0);
- if (isAlphaDigit(nextChar)) {
- return true;
- }
- else {
- switch (nextChar) {
- case '_':
- case '+':
- case '-':
- case '!':
- case '`':
- case '\'':
- case '.':
- case '/':
- case '}':
- case '{':
- case ']':
- case '[':
- case '^':
- case '|':
- case '~':
- case '%': // bug fix by Bruno Konik, JvB copied here
- case '#':
- case '@':
- case '$':
- case ':':
- case ';':
- case '?':
- case '\"':
- case '*':
- case '=': // Issue 155 on java.net
- return true;
- default:
- return false;
- }
- }
- } catch (ParseException ex) {
- return false;
- }
- }
- public char[] ttoken() {
- int startIdx = ptr;
- try {
- while (hasMoreChars()) {
- char nextChar = lookAhead(0);
- if ( isTokenChar(nextChar) ) {
- consume(1);
- } else {
- break;
- }
- }
- char[] retval = new char[ptr - startIdx];
- System.arraycopy(buffer, startIdx, retval, 0, ptr - startIdx);
- // return String.valueOf(buffer, startIdx, ptr - startIdx);
- return retval;
- } catch (ParseException ex) {
- return null;
- }
- }
- /* JvB: unreferenced
- public String ttokenAllowSpace() {
- int startIdx = ptr;
- try {
- while (hasMoreChars()) {
- char nextChar = lookAhead(0);
- if (isAlphaDigit(nextChar)) {
- consume(1);
- }
- else {
- boolean isValidChar = false;
- switch (nextChar) {
- case '_':
- case '+':
- case '-':
- case '!':
- case '`':
- case '\'':
- case '~':
- case '%': // bug fix by Bruno Konik, JvB copied here
- case '.':
- case ' ':
- case '\t':
- case '*':
- isValidChar = true;
- }
- if (isValidChar) {
- consume(1);
- }
- else {
- break;
- }
- }
- }
- return buffer.substring(startIdx, ptr);
- } catch (ParseException ex) {
- return null;
- }
- }*/
- public char[] ttokenSafe() {
- int startIdx = ptr;
- try {
- while (hasMoreChars()) {
- char nextChar = lookAhead(0);
- if (isAlphaDigit(nextChar)) {
- consume(1);
- }
- else {
- boolean isValidChar = false;
- switch (nextChar) {
- case '_':
- case '+':
- case '-':
- case '!':
- case '`':
- case '\'':
- case '.':
- case '/':
- case '}':
- case '{':
- case ']':
- case '[':
- case '^':
- case '|':
- case '~':
- case '%': // bug fix by Bruno Konik, JvB copied here
- case '#':
- case '@':
- case '$':
- case ':':
- case ';':
- case '?':
- case '\"':
- case '*':
- isValidChar = true;
- }
- if (isValidChar) {
- consume(1);
- }
- else {
- break;
- }
- }
- }
- char[] retval = new char[ptr - startIdx];
- System.arraycopy(buffer, startIdx, retval, 0, ptr - startIdx);
- // return String.valueOf(buffer, startIdx, ptr - startIdx);
- return retval;
-
- } catch (ParseException ex) {
- return null;
- }
- }
- public static final char ALPHA_VALID_CHARS = Character.MAX_VALUE;
- public static final char DIGIT_VALID_CHARS = Character.MAX_VALUE - 1;
- public static final char ALPHADIGIT_VALID_CHARS = Character.MAX_VALUE - 2;
-
- public void consumeValidChars(char[] validChars) {
- int validCharsLength = validChars.length;
- try {
- while (hasMoreChars()) {
- char nextChar = lookAhead(0);
- boolean isValid = false;
- for (int i = 0; i < validCharsLength; i++) {
- char validChar = validChars[i];
- switch(validChar) {
- case ALPHA_VALID_CHARS:
- isValid = isAlpha(nextChar);
- break;
- case DIGIT_VALID_CHARS:
- isValid = isDigit(nextChar);
- break;
- case ALPHADIGIT_VALID_CHARS:
- isValid = isAlphaDigit(nextChar);
- break;
- default:
- isValid = nextChar == validChar;
- }
- if (isValid) {
- break;
- }
- }
- if (isValid) {
- consume(1);
- }
- else {
- break;
- }
- }
- } catch (ParseException ex) {
- }
- }
- /** Parse a comment string cursor is at a ". Leave cursor at closing "
- *@return the substring containing the quoted string excluding the
- * closing quote.
- */
- public String quotedString() throws ParseException {
- int startIdx = ptr + 1;
- if (lookAhead(0) != '\"')
- return null;
- consume(1);
- while (true) {
- char next = getNextChar();
- if (next == '\"') {
- // Got to the terminating quote.
- break;
- } else if (next == '\0') {
- throw new ParseException(
- String.valueOf(this.buffer) + " :unexpected EOL",
- this.ptr);
- } else if (next == '\\') {
- consume(1);
- }
- }
- return String.valueOf(buffer, startIdx, ptr - startIdx - 1);
- }
- /** Parse a comment string cursor is at a "(". Leave cursor at )
- *@return the substring containing the comment excluding the
- * closing brace.
- */
- public String comment() throws ParseException {
- StringBuilder retval = new StringBuilder();
- if (lookAhead(0) != '(')
- return null;
- consume(1);
- while (true) {
- char next = getNextChar();
- if (next == ')') {
- break;
- } else if (next == '\0') {
- throw new ParseException(
- this.buffer + " :unexpected EOL",
- this.ptr);
- } else if (next == '\\') {
- retval.append(next);
- next = getNextChar();
- if (next == '\0')
- throw new ParseException(
- this.buffer + " : unexpected EOL",
- this.ptr);
- retval.append(next);
- } else {
- retval.append(next);
- }
- }
- return retval.toString();
- }
- /** Return a substring containing no semicolons.
- *@return a substring containing no semicolons.
- */
- public String byteStringNoSemicolon() {
- StringBuilder retval = new StringBuilder();
- try {
- while (true) {
- char next = lookAhead(0);
- // bug fix from Ben Evans.
- if (next == '\0' || next == '\n' || next == ';' || next == ',' ) {
- break;
- } else {
- consume(1);
- retval.append(next);
- }
- }
- } catch (ParseException ex) {
- return retval.toString();
- }
- return retval.toString();
- }
- /**
- * Scan until you see a slash or an EOL.
- *
- * @return substring containing no slash.
- */
- public String byteStringNoSlash() {
- StringBuilder retval = new StringBuilder();
- try {
- while (true) {
- char next = lookAhead(0);
- // bug fix from Ben Evans.
- if (next == '\0' || next == '\n' || next == '/' ) {
- break;
- } else {
- consume(1);
- retval.append(next);
- }
- }
- } catch (ParseException ex) {
- return retval.toString();
- }
- return retval.toString();
- }
- /** Return a substring containing no commas
- *@return a substring containing no commas.
- */
- public String byteStringNoComma() {
- StringBuilder retval = new StringBuilder();
- try {
- while (true) {
- char next = lookAhead(0);
- if (next == '\n' || next == ',') {
- break;
- } else {
- consume(1);
- retval.append(next);
- }
- }
- } catch (ParseException ex) {
- }
- return retval.toString();
- }
- public static String charAsString(char ch) {
- return String.valueOf(ch);
- }
- /** Lookahead in the inputBuffer for n chars and return as a string.
- * Do not consume the input.
- */
- public String charAsString(int nchars) {
- return String.valueOf(buffer, ptr, nchars -1);
- }
- /** Get and consume the next number.
- *@return a substring corresponding to a number
- *(i.e. sequence of digits).
- */
- public String number() throws ParseException {
- int startIdx = ptr;
- try {
- if (!isDigit(lookAhead(0))) {
- throw new ParseException(
- buffer + ": Unexpected token at " + lookAhead(0),
- ptr);
- }
- consume(1);
- while (true) {
- char next = lookAhead(0);
- if (isDigit(next)) {
- consume(1);
- } else
- break;
- }
- return String.valueOf(buffer, startIdx, ptr - startIdx);
- } catch (ParseException ex) {
- return String.valueOf(buffer, startIdx, ptr - startIdx);
- }
- }
- /** Mark the position for backtracking.
- *@return the current location of the pointer.
- */
- public int markInputPosition() {
- return ptr;
- }
- /** Rewind the input ptr to the marked position.
- *@param position - the position to rewind the parser to.
- */
- public void rewindInputPosition(int position) {
- this.ptr = position;
- }
- /** Get the rest of the String
- * @return rest of the buffer.
- */
- public String getRest() {
- if (ptr >= bufferLen)
- return null;
- else
- return String.valueOf(buffer, ptr, bufferLen - ptr - 1);
- }
- /** Get the sub-String until the character is encountered
- * @param c the character to match
- * @return the substring that matches.
- */
- public String getString(char c) throws ParseException {
- StringBuilder retval = new StringBuilder();
- while (true) {
- char next = lookAhead(0);
- //System.out.println(" next = [" + next + ']' + "ptr = " + ptr);
- //System.out.println(next == '\0');
- if (next == '\0') {
- throw new ParseException(
- this.buffer + "unexpected EOL",
- this.ptr);
- } else if (next == c) {
- consume(1);
- break;
- } else if (next == '\\') {
- consume(1);
- char nextchar = lookAhead(0);
- if (nextchar == '\0') {
- throw new ParseException(
- this.buffer + "unexpected EOL",
- this.ptr);
- } else {
- consume(1);
- retval.append(nextchar);
- }
- } else {
- consume(1);
- retval.append(next);
- }
- }
- return retval.toString();
- }
- /** Get the read pointer.
- */
- public int getPtr() {
- return this.ptr;
- }
- /** Get the buffer.
- */
- public String getBuffer() {
- return String.valueOf(buffer);
- }
-
- public char[] getSubBuffer(int start, int end) {
- char[] retval = new char[end - start];
- System.arraycopy(buffer, start, retval, 0, end - start);
- return retval;
- }
- /** Create a parse exception.
- */
- public ParseException createParseException() {
- return new ParseException(getBuffer(), this.ptr);
- }
- }