TokenMarker.java - This Java code is part of a parser gener…

/jEdit/tags/jedit-4-0-pre5/org/gjt/sp/jedit/syntax/TokenMarker.java

# · Java · 896 lines · 647 code · 114 blank · 135 comment · 163 complexity · 8a7536a95426b58b02378857f94e2928 MD5 · raw file


/*
 * TokenMarker.java - Tokenizes lines of text
 * :tabSize=8:indentSize=8:noTabs=false:
 * :folding=explicit:collapseFolds=1:
 *
 * Copyright (C) 1998, 1999, 2000, 2001 Slava Pestov
 * Copyright (C) 1999, 2000 mike dillon
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public License
 * as published by the Free Software Foundation; either version 2
 * of the License, or any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
 */

package org.gjt.sp.jedit.syntax;

//{{{ Imports
import javax.swing.text.Segment;
import java.util.*;
import org.gjt.sp.jedit.*;
import org.gjt.sp.util.Log;
//}}}

/**
 * A token marker splits lines of text into tokens. Each token carries
 * a length field and an identification tag that can be mapped to a color
 * or font style for painting that token.
 *
 * @author Slava Pestov, mike dillon
 * @version $Id: TokenMarker.java 3954 2001-12-30 07:35:03Z spestov $
 *
 * @see org.gjt.sp.jedit.syntax.Token
 */
public class TokenMarker
{
	//{{{ Major actions (total: 8)
	public static final int MAJOR_ACTIONS = 0x000000FF;
	public static final int WHITESPACE = 1 << 0;
	public static final int SPAN = 1 << 1;
	public static final int MARK_PREVIOUS = 1 << 2;
	public static final int MARK_FOLLOWING = 1 << 3;
	public static final int EOL_SPAN = 1 << 4;
//	public static final int MAJOR_ACTION_5 = 1 << 5;
//	public static final int MAJOR_ACTION_6 = 1 << 6;
//	public static final int MAJOR_ACTION_7 = 1 << 7;
	//}}}

	//{{{ Action hints (total: 8)
	public static final int ACTION_HINTS = 0x0000FF00;
	public static final int EXCLUDE_MATCH = 1 << 8;
	public static final int AT_LINE_START = 1 << 9;
	public static final int NO_LINE_BREAK = 1 << 10;
	public static final int NO_WORD_BREAK = 1 << 11;
	public static final int IS_ESCAPE = 1 << 12;
	public static final int DELEGATE = 1 << 13;
//	public static final int ACTION_HINT_14 = 1 << 14;
//	public static final int ACTION_HINT_15 = 1 << 15;
	//}}}

	//{{{ TokenMarker constructor
	public TokenMarker()
	{
		ruleSets = new Hashtable(64);
	} //}}}

	//{{{ addRuleSet() method
	public void addRuleSet(String setName, ParserRuleSet rules)
	{
		if (rules == null) return;

		if (setName == null) setName = "MAIN";

		ruleSets.put(rulePfx.concat(setName), rules);

		if (setName.equals("MAIN"))
			mainRuleSet = rules;
	} //}}}

	//{{{ getMainRuleSet() method
	public ParserRuleSet getMainRuleSet()
	{
		return mainRuleSet;
	} //}}}

	//{{{ getRuleSet() method
	public ParserRuleSet getRuleSet(String setName)
	{
		ParserRuleSet rules;

		rules = (ParserRuleSet) ruleSets.get(setName);

		if (rules == null && !setName.startsWith(rulePfx))
		{
			int delim = setName.indexOf("::");

			String modeName = setName.substring(0, delim);

			Mode mode = jEdit.getMode(modeName);
			if(mode == null)
			{
				Log.log(Log.ERROR,TokenMarker.class,
					"Unknown edit mode: " + modeName);
				rules = null;
			}
			else
			{
				TokenMarker marker = mode.getTokenMarker();
				rules = marker.getRuleSet(setName);
			}

			// store external ParserRuleSet in the local hashtable for
			// faster lookups later
			ruleSets.put(setName, rules);
		}

		if (rules == null)
		{
			Log.log(Log.ERROR,this,"Unresolved delegate target: " + setName);
		}

		return rules;
	} //}}}

	//{{{ getName() method
	public String getName()
	{
		return name;
	} //}}}

	//{{{ setName() method
	public void setName(String name)
	{
		if (name == null) throw new NullPointerException();

		this.name = name;
		rulePfx = name.concat("::");
	} //}}}

	//{{{ markTokens() method
	/**
	 * Do not call this method directly; call Buffer.markTokens() instead.
	 */
	public LineContext markTokens(LineContext prevContext,
		Buffer.TokenList tokenList, Segment line)
	{
		this.context = new LineContext();

		//{{{ Set up context
		if(prevContext == null)
			context.rules = getMainRuleSet();
		else
		{
			context.parent = prevContext.parent;
			context.inRule = prevContext.inRule;
			context.rules = prevContext.rules;
		} //}}}

		lastOffset = lastKeyword = line.offset;
		lineLength = line.count + line.offset;

		int terminateChar = context.rules.getTerminateChar();
		int searchLimit = (terminateChar >= 0 && terminateChar < line.count)
			? line.offset + terminateChar : lineLength;

		escaped = false;

		boolean b;
		boolean tempEscaped;
		Segment tempPattern;
		ParserRule rule;
		LineContext tempContext;

		for(pos = line.offset; pos < searchLimit; pos++)
		{
			//{{{ if we are not in the top level context, we are delegated
			if (context.parent != null)
			{
				tempContext = context;

				context = context.parent;

				pattern.array = context.inRule.searchChars;
				pattern.count = context.inRule.sequenceLengths[1];
				pattern.offset = context.inRule.sequenceLengths[0];

				b = handleRule(tokenList, line, context.inRule);

				context = tempContext;

				if (!b)
				{
					if (escaped)
					{
						escaped = false;
					}
					else
					{
						if (pos != lastOffset)
						{
							if (context.inRule == null)
							{
								markKeyword(tokenList,line,lastKeyword,pos);

								tokenList.addToken(pos - lastOffset,
									context.rules.getDefault(),
									context.rules);
							}
							else if ((context.inRule.action & (NO_LINE_BREAK | NO_WORD_BREAK)) == 0)
							{
								tokenList.addToken(pos - lastOffset,
									context.inRule.token,
									context.rules);
							}
							else
							{
								tokenList.addToken(pos - lastOffset, Token.INVALID,
									context.rules);
							}
						}

						context = (LineContext)context.parent.clone();

						if ((context.inRule.action & EXCLUDE_MATCH) == EXCLUDE_MATCH)
						{
							tokenList.addToken(pattern.count,
								context.rules.getDefault(),
								context.rules);
						}
						else
						{
							tokenList.addToken(pattern.count,
								context.inRule.token,
								context.rules);
						}

						context.inRule = null;

						lastKeyword = lastOffset = pos + pattern.count;
					}

					pos += (pattern.count - 1); // move pos to last character of match sequence

					continue;
				}
			} //}}}

			//{{{ check the escape rule for the current context, if there is one
			if ((rule = context.rules.getEscapeRule()) != null)
			{
				// assign tempPattern to mutable "buffer" pattern
				tempPattern = pattern;

				// swap in the escape pattern
				pattern = context.rules.getEscapePattern();

				tempEscaped = escaped;

				b = handleRule(tokenList, line, rule);

				// swap back the buffer pattern
				pattern = tempPattern;

				if (!b)
				{
					if (tempEscaped) escaped = false;
					continue;
				}
			} //}}}

			//{{{ if we are inside a span, check for its end sequence
			rule = context.inRule;
			if(rule != null && (rule.action & SPAN) == SPAN)
			{
				pattern.array = rule.searchChars;
				pattern.count = rule.sequenceLengths[1];
				pattern.offset = rule.sequenceLengths[0];

				// if we match the end of the span, or if this is a "hard" span,
				// we continue to the next character; otherwise, we check all
				// applicable rules below
				if (!handleRule(tokenList,line,rule)
					|| (rule.action & SOFT_SPAN) == 0)
				{
					escaped = false;
					continue;
				}
			} //}}}

			//{{{ now check every rule
			rule = context.rules.getRules(line.array[pos]);
			while(rule != null)
			{
				pattern.array = rule.searchChars;

				if (context.inRule == rule && (rule.action & SPAN) == SPAN)
				{
					pattern.count = rule.sequenceLengths[1];
					pattern.offset = rule.sequenceLengths[0];
				}
				else
				{
					pattern.count = rule.sequenceLengths[0];
					pattern.offset = 0;
				}

				// stop checking rules if there was a match and go to next pos
				if (!handleRule(tokenList,line,rule))
					break;

				rule = rule.next;
			} //}}}

			escaped = false;
		}

		//{{{ check for keywords at the line's end
		if(context.inRule == null)
			markKeyword(tokenList, line, lastKeyword, lineLength);
		//}}}

		//{{{ mark all remaining characters
		if(lastOffset != lineLength)
		{
			if (context.inRule == null)
			{
				tokenList.addToken(lineLength - lastOffset,
					context.rules.getDefault(),
					context.rules);
			}
			else if (
				(context.inRule.action & SPAN) == SPAN &&
				(context.inRule.action & (NO_LINE_BREAK | NO_WORD_BREAK)) != 0
			)
			{
				tokenList.addToken(lineLength - lastOffset,Token.INVALID,
					context.rules);
				context.inRule = null;
			}
			else
			{
				tokenList.addToken(lineLength - lastOffset,
					context.inRule.token,
					context.rules);

				if((context.inRule.action & MARK_FOLLOWING) == MARK_FOLLOWING)
				{
					context.inRule = null;
				}
			}
		} //}}}

		tokenList.addToken(0,Token.END,context.rules);

		return context.intern();
	} //}}}

	//{{{ Private members
	private static final int SOFT_SPAN = MARK_FOLLOWING | NO_WORD_BREAK;

	//{{{ Instance variables
	private String name;
	private String rulePfx;
	private Hashtable ruleSets;
	private ParserRuleSet mainRuleSet;

	private LineContext context;
	private Segment pattern = new Segment(new char[0],0,0);
	private int lastOffset;
	private int lastKeyword;
	private int lineLength;
	private int pos;
	private boolean escaped;
	//}}}

	//{{{ handleRule() method
	/**
	 * Checks if the rule matches the line at the current position
	 * and handles the rule if it does match
	 * @param line Segment to check rule against
	 * @param checkRule ParserRule to check against line
	 * @return true,  keep checking other rules
	 *     <br>false, stop checking other rules
	 */
	private boolean handleRule(Buffer.TokenList tokenList, Segment line,
		ParserRule checkRule)
	{
		if (pattern.count == 0) return true;

		if (lineLength - pos < pattern.count) return true;

		char a, b;
		for (int k = 0; k < pattern.count; k++)
		{
			a = pattern.array[pattern.offset + k];
			b = line.array[pos + k];

			//{{{ break out and check the next rule if there is a mismatch
			if (
				!(
					a == b ||
					context.rules.getIgnoreCase() &&
					(
						Character.toLowerCase(a) == b ||
						a == Character.toLowerCase(b)
					)
				)
			) return true;
			//}}}
		}

		if (escaped)
		{
			pos += pattern.count - 1;
			return false;
		}
		else if ((checkRule.action & IS_ESCAPE) == IS_ESCAPE)
		{
			escaped = true;
			pos += pattern.count - 1;
			return false;
		}

		//{{{ handle soft spans
		if (context.inRule != checkRule && context.inRule != null
			&& (context.inRule.action & SOFT_SPAN) != 0)
		{
			if ((context.inRule.action & NO_WORD_BREAK) == NO_WORD_BREAK)
			{
				tokenList.addToken(pos - lastOffset, Token.INVALID,
					context.rules);
			}
			else
			{
				tokenList.addToken(pos - lastOffset,
					context.inRule.token,
					context.rules);
			}
			lastOffset = lastKeyword = pos;
			context.inRule = null;
		} //}}}

		//{{{ not inside a rule
		if (context.inRule == null)
		{
			if ((checkRule.action & AT_LINE_START) == AT_LINE_START)
			{
				if (
					(((checkRule.action & MARK_PREVIOUS) != 0) ?
					lastKeyword :
					pos) != line.offset
				)
				{
					return true;
				}
			}

			markKeyword(tokenList, line, lastKeyword, pos);

			if ((checkRule.action & MARK_PREVIOUS) != MARK_PREVIOUS)
			{
				lastKeyword = pos + pattern.count;

				if ((checkRule.action & WHITESPACE) == WHITESPACE)
				{
					return false; // break out of inner for loop to check next char
				}

				// mark previous sequence as NULL (plain text)
				if (lastOffset < pos)
				{
					tokenList.addToken(pos - lastOffset,
						context.rules.getDefault(),
						context.rules);
				}
			}

			switch(checkRule.action & MAJOR_ACTIONS)
			{
			//{{{ SEQ
			case 0:
				// this is a plain sequence rule
				tokenList.addToken(pattern.count,checkRule.token,
					context.rules);
				lastOffset = pos + pattern.count;

				break;
			//}}}
			//{{{ SPAN
			case SPAN:
				context.inRule = checkRule;

				//{{{ Non-delegated
				if ((checkRule.action & DELEGATE) != DELEGATE)
				{
					if ((checkRule.action & EXCLUDE_MATCH) == EXCLUDE_MATCH)
					{
						tokenList.addToken(pattern.count,
							context.rules.getDefault(),
							context.rules);
						lastOffset = pos + pattern.count;
					}
					else
					{
						lastOffset = pos;
					}
				} //}}}
				//{{{ Delegated
				else
				{
					String setName = new String(checkRule.searchChars,
						checkRule.sequenceLengths[0] + checkRule.sequenceLengths[1],
						checkRule.sequenceLengths[2]);

					ParserRuleSet delegateSet = getRuleSet(setName);

					if (delegateSet != null)
					{
						if ((checkRule.action & EXCLUDE_MATCH) == EXCLUDE_MATCH)
						{
							tokenList.addToken(pattern.count,
								context.rules.getDefault(),
								context.rules);
						}
						else
						{
							tokenList.addToken(pattern.count,
								checkRule.token,
								context.rules);
						}
						lastOffset = pos + pattern.count;

						context = new LineContext(delegateSet, context);
					}
				} //}}}

				break;
			//}}}
			//{{{ EOL_SPAN
			case EOL_SPAN:
				if ((checkRule.action & EXCLUDE_MATCH) == EXCLUDE_MATCH)
				{
					tokenList.addToken(pattern.count,
						context.rules.getDefault(),
						context.rules);
					tokenList.addToken(lineLength - (pos + pattern.count),
						checkRule.token,context.rules);
				}
				else
				{
					tokenList.addToken(lineLength - pos,
						checkRule.token,context.rules);
				}
				lastOffset = lineLength;
				lastKeyword = lineLength;
				pos = lineLength;

				return false;
			//}}}
			//{{{ MARK_PREVIOUS
			case MARK_PREVIOUS:
				if (lastKeyword > lastOffset)
				{
					tokenList.addToken(lastKeyword - lastOffset,
						context.rules.getDefault(),
						context.rules);
					lastOffset = lastKeyword;
				}

				if ((checkRule.action & EXCLUDE_MATCH) == EXCLUDE_MATCH)
				{
					tokenList.addToken(pos - lastOffset,
						checkRule.token,context.rules);
					tokenList.addToken(pattern.count,
						context.rules.getDefault(),
						context.rules);
				}
				else
				{
					tokenList.addToken(pos - lastOffset + pattern.count,
						checkRule.token,context.rules);
				}
				lastOffset = pos + pattern.count;

				break;
			//}}}
			//{{{ MARK_FOLLOWING
			case MARK_FOLLOWING:
				context.inRule = checkRule;
				if ((checkRule.action & EXCLUDE_MATCH) == EXCLUDE_MATCH)
				{
					tokenList.addToken(pattern.count,
						context.rules.getDefault(),
						context.rules);
					lastOffset = pos + pattern.count;
				}
				else
				{
					lastOffset = pos;
				}

				break;
			//}}}
			default:
				throw new InternalError("Unhandled major action");
			}

			lastKeyword = lastOffset;

			pos += (pattern.count - 1); // move pos to last character of match sequence
			return false; // break out of inner for loop to check next char
		}
		//}}}
		//{{{ inside a SPAN
		else if ((checkRule.action & SPAN) == SPAN)
		{
			if ((checkRule.action & DELEGATE) != DELEGATE)
			{
				context.inRule = null;
				if ((checkRule.action & EXCLUDE_MATCH) == EXCLUDE_MATCH)
				{
					tokenList.addToken(pos - lastOffset,
						checkRule.token,context.rules);
					tokenList.addToken(pattern.count,
						context.rules.getDefault(),
						context.rules);
				}
				else
				{
					tokenList.addToken((pos + pattern.count) - lastOffset,
						checkRule.token,context.rules);
				}
				lastKeyword = lastOffset = pos + pattern.count;

				pos += (pattern.count - 1); // move pos to last character of match sequence
			}

			return false; // break out of inner for loop to check next char
		}//}}}

		return true;
	} //}}}

	//{{{ markKeyword() method
	private void markKeyword(Buffer.TokenList tokenList, Segment line,
		int start, int end)
	{
		KeywordMap keywords = context.rules.getKeywords();

		int len = end - start;

		//{{{ do digits.

		/* right now, this is hardcoded to handle these cases:
		 * 1234
		 * 0x1234abcf
		 * 1234l
		 * 12.34f
		 * 12.34d
		 *
		 * in the future, we need some sort of regexp mechanism. */
		if(context.rules.getHighlightDigits())
		{
			boolean digit = true;
			char[] array = line.array;
			boolean octal = false;
			boolean hex = false;
			boolean seenSomeDigits = false;
loop:			for(int i = 0; i < len; i++)
			{
				char ch = array[start+i];
				switch(ch)
				{
				case '0':
					if(i == 0)
						octal = true;
					seenSomeDigits = true;
					continue loop;
				case '1': case '2': case '3':
				case '4': case '5': case '6':
				case '7': case '8': case '9':
					seenSomeDigits = true;
					continue loop;
				case 'x': case 'X':
					if(octal && i == 1)
					{
						hex = true;
						continue loop;
					}
					else
						break;
				case 'd': case 'D':
				case 'f': case 'F':
					if(hex)
						continue loop;
					else if(i == len -1 && seenSomeDigits)
						continue loop;
					else
						break;
				case 'l': case 'L':
					if(i == len -1 && seenSomeDigits)
						continue loop;
					else
						break;
				case 'e': case 'E':
					if(seenSomeDigits)
						continue loop;
					else
						break;
				case 'a': case 'A': case 'b': case 'B':
				case 'c': case 'C':
					if(hex)
						continue loop;
					else
						break;
				case '.': case '-':
					// normally, this shouldn't be
					// necessary, because most modes
					// define '.' and '-' SEQs. However,
					// in props mode, we can't define
					// such a SEQ because it would
					// break the AT_LINE_START
					// MARK_PREVIOUS rule.

					continue loop;
				default:
					break;
				}

				// if we ended up here, then we have found a
				// non-digit character.
				digit = false;
				break loop;
			}

			// if we got this far with digit = true, then the keyword
			// consists of all digits. Add it as such.
			if(digit && seenSomeDigits)
			{
				if(start != lastOffset)
				{
					tokenList.addToken(start - lastOffset,
						context.rules.getDefault(),
						context.rules);
				}
				tokenList.addToken(len,Token.DIGIT,context.rules);
				lastKeyword = lastOffset = end;

				return;
			}
		} //}}}

		if(keywords != null)
		{
			byte id = keywords.lookup(line, start, len);

			if(id != Token.NULL)
			{
				if(start != lastOffset)
				{
					tokenList.addToken(start - lastOffset,
						context.rules.getDefault(),
						context.rules);
				}
				tokenList.addToken(len,id,context.rules);
				lastKeyword = lastOffset = end;
			}
		}
	} //}}}

	//}}}

	//{{{ LineContext class
	public static class LineContext
	{
		//{{{ Debug code
		static int count;
		static int countGC;

		public String getAllocationStatistics()
		{
			return "total: " + count + ", in core: " +
				(count - countGC)
				+ ", interned: " + intern.size();
		} //}}}

		static Hashtable intern = new Hashtable();

		public LineContext parent;
		public ParserRule inRule;
		public ParserRuleSet rules;

		//{{{ LineContext constructor
		public LineContext(ParserRule r, ParserRuleSet rs)
		{
			this();
			inRule = r;
			rules = rs;
		} //}}}

		//{{{ LineContext constructor
		public LineContext(ParserRuleSet rs, LineContext lc)
		{
			this();
			rules = rs;
			parent = (lc == null ? null : (LineContext)lc.clone());
		} //}}}

		//{{{ LineContext constructor
		public LineContext(ParserRule r)
		{
			this();
			inRule = r;
		} //}}}

		//{{{ LineContext constructor
		public LineContext()
		{
			count++;
		} //}}}

		//{{{ intern() method
		public LineContext intern()
		{
			Object obj = intern.get(this);
			if(obj == null)
			{
				intern.put(this,this);
				return this;
			}
			else
				return (LineContext)obj;
		} //}}}

		//{{{ finalize() method
		public void finalize()
		{
			countGC++;
		} //}}}

		//{{{ hashCode() method
		public int hashCode()
		{
			if(inRule != null)
				return inRule.hashCode();
			else if(rules != null)
				return rules.hashCode();
			else
				return 0;
		} //}}}

		//{{{ equals() method
		public boolean equals(Object obj)
		{
			if(obj instanceof LineContext)
			{
				LineContext lc = (LineContext)obj;
				if(lc.parent == null)
				{
					if(parent != null)
						return false;
				}
				else //if(lc.parent != null)
				{
					if(parent == null)
						return false;
					else if(!lc.parent.equals(parent))
						return false;
				}

				return lc.inRule == inRule && lc.rules == rules;
			}
			else
				return false;
		} //}}}

		//{{{ clone() method
		public Object clone()
		{
			LineContext lc = new LineContext();
			lc.inRule = inRule;
			lc.rules = rules;
			lc.parent = (parent == null) ? null : (LineContext) parent.clone();

			return lc;
		} //}}}
	} //}}}
}

Summary ✨

This Java code is part of a parser generator tool, specifically for creating parsers for context-free grammars. It defines classes and methods for parsing lines of text according to predefined rules, including handling keywords, digits, and special characters. The LineContext class represents the current parsing state, allowing it to be cloned, interned, and compared with other contexts.

Tech Fingerprint

Standard Collections

Alerts (25)

'import' Maintainability Info: Wildcard imports (e.g., `import java.util.*;`) can obscure the origin of classes and lead to namespace collisions. Prefer importing specific classes explicitly.
28 29
'=' Maintainability Info: Avoid using unnamed 'magic' numbers directly in comparisons or assignments. Use named constants (static final variables) instead to improve readability and maintainability.
72
'Set' Raw collection type used. Specify generic type arguments (e.g., List<String>, Map<Integer, Client>) for type safety and clarity. Avoid raw types unless interacting with legacy code.
76 85 89 95 97 373 523 525 799 802 810
'List' Raw collection type used. Specify generic type arguments (e.g., List<String>, Map<Integer, Client>) for type safety and clarity. Avoid raw types unless interacting with legacy code.
153 393 653
Complexity hotspot; lines 433 to 434 (total complexity: 6)
433 434
'Map' Raw collection type used. Specify generic type arguments (e.g., List<String>, Map<Integer, Client>) for type safety and clarity. Avoid raw types unless interacting with legacy code.
656
'public' Maintainability Info: Public non-final fields violate encapsulation. Prefer making fields private and providing public getter/setter methods if access is needed.
797 798 799
'finalize()' Avoid finalize(). It's deprecated, unreliable for timely cleanup, and has performance overhead. Use try-with-resources for resource management or java.lang.ref.Cleaner (Java 9+) for more complex cleanup actions.
844
'instanceof' Frequent 'instanceof' checks can indicate a need for better polymorphism (using overridden methods in subclasses) or visitor pattern. Consider if the design can be improved.
863