PageRenderTime 354ms CodeModel.GetById 309ms app.highlight 38ms RepoModel.GetById 1ms app.codeStats 0ms

/jEdit/tags/jedit-4-3-pre5/org/gjt/sp/jedit/syntax/TokenMarker.java

#
Java | 877 lines | 633 code | 111 blank | 133 comment | 175 complexity | abc4c21bd6f0558116512b3188234f00 MD5 | raw file
  1/*
  2 * TokenMarker.java - Tokenizes lines of text
  3 * :tabSize=8:indentSize=8:noTabs=false:
  4 * :folding=explicit:collapseFolds=1:
  5 *
  6 * Copyright (C) 1998, 2003 Slava Pestov
  7 * Copyright (C) 1999, 2000 mike dillon
  8 *
  9 * This program is free software; you can redistribute it and/or
 10 * modify it under the terms of the GNU General Public License
 11 * as published by the Free Software Foundation; either version 2
 12 * of the License, or any later version.
 13 *
 14 * This program is distributed in the hope that it will be useful,
 15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 17 * GNU General Public License for more details.
 18 *
 19 * You should have received a copy of the GNU General Public License
 20 * along with this program; if not, write to the Free Software
 21 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
 22 */
 23
 24package org.gjt.sp.jedit.syntax;
 25
 26//{{{ Imports
 27import javax.swing.text.Segment;
 28import java.util.*;
 29import java.util.regex.Matcher;
 30import java.util.regex.Pattern;
 31import org.gjt.sp.jedit.*;
 32import org.gjt.sp.util.SegmentCharSequence;
 33//}}}
 34
 35/**
 36 * A token marker splits lines of text into tokens. Each token carries
 37 * a length field and an identification tag that can be mapped to a color
 38 * or font style for painting that token.
 39 *
 40 * @author Slava Pestov, mike dillon
 41 * @version $Id: TokenMarker.java 5488 2006-06-24 04:01:01Z vanza $
 42 *
 43 * @see org.gjt.sp.jedit.syntax.Token
 44 * @see org.gjt.sp.jedit.syntax.TokenHandler
 45 */
 46public class TokenMarker
 47{
 48	//{{{ TokenMarker constructor
 49	public TokenMarker()
 50	{
 51		ruleSets = new Hashtable(64);
 52	} //}}}
 53
 54	//{{{ addRuleSet() method
 55	public void addRuleSet(ParserRuleSet rules)
 56	{
 57		ruleSets.put(rules.getSetName(), rules);
 58
 59		if (rules.getSetName().equals("MAIN"))
 60			mainRuleSet = rules;
 61	} //}}}
 62
 63	//{{{ getMainRuleSet() method
 64	public ParserRuleSet getMainRuleSet()
 65	{
 66		return mainRuleSet;
 67	} //}}}
 68
 69	//{{{ getRuleSet() method
 70	public ParserRuleSet getRuleSet(String setName)
 71	{
 72		return (ParserRuleSet) ruleSets.get(setName);
 73	} //}}}
 74
 75	//{{{ getRuleSets() method
 76	/**
 77	 * @since jEdit 4.2pre3
 78	 */
 79	public ParserRuleSet[] getRuleSets()
 80	{
 81		return (ParserRuleSet[])ruleSets.values().toArray(new ParserRuleSet[ruleSets.size()]);
 82	} //}}}
 83
 84	//{{{ markTokens() method
 85	/**
 86	 * Do not call this method directly; call Buffer.markTokens() instead.
 87	 */
 88	public LineContext markTokens(LineContext prevContext,
 89		TokenHandler tokenHandler, Segment line)
 90	{
 91		//{{{ Set up some instance variables
 92		// this is to avoid having to pass around lots and lots of
 93		// parameters.
 94		this.tokenHandler = tokenHandler;
 95		this.line = line;
 96
 97		lastOffset = line.offset;
 98		lineLength = line.count + line.offset;
 99
100		context = new LineContext();
101
102		if(prevContext == null)
103			context.rules = getMainRuleSet();
104		else
105		{
106			context.parent = prevContext.parent;
107			context.inRule = prevContext.inRule;
108			context.rules = prevContext.rules;
109			context.spanEndSubst = prevContext.spanEndSubst;
110		}
111
112		keywords = context.rules.getKeywords();
113		escaped = false;
114
115		seenWhitespaceEnd = false;
116		whitespaceEnd = line.offset;
117		//}}}
118
119		//{{{ Main parser loop
120		ParserRule rule;
121		int terminateChar = context.rules.getTerminateChar();
122		boolean terminated = false;
123main_loop:	for(pos = line.offset; pos < lineLength; pos++)
124		{
125			//{{{ check if we have to stop parsing (happens if the terminateChar has been exceeded)
126			if(terminateChar >= 0 && pos - line.offset >= terminateChar
127				&& !terminated)
128			{
129				terminated = true;
130				context = new LineContext(ParserRuleSet
131					.getStandardRuleSet(context.rules
132					.getDefault()),context);
133				keywords = context.rules.getKeywords();
134			} //}}}
135
136			//{{{ check for end of delegate
137			if(context.parent != null)
138			{
139				rule = context.parent.inRule;
140				if(rule != null)
141				{
142					if(checkDelegateEnd(rule))
143					{
144						seenWhitespaceEnd = true;
145						continue main_loop;
146					}
147				}
148			} //}}}
149
150			//{{{ check every rule
151			char ch = line.array[pos];
152
153			rule = context.rules.getRules(ch);
154			while(rule != null)
155			{
156				// stop checking rules if there was a match
157				if (handleRule(rule,false))
158				{
159					seenWhitespaceEnd = true;
160					continue main_loop;
161				}
162
163				rule = rule.next;
164			} //}}}
165
166			//{{{ check if current character is a word separator
167			if(Character.isWhitespace(ch))
168			{
169				if(!seenWhitespaceEnd)
170					whitespaceEnd = pos + 1;
171
172				if(context.inRule != null)
173					handleRule(context.inRule,true);
174
175				handleNoWordBreak();
176
177				markKeyword(false);
178
179				if(lastOffset != pos)
180				{
181					tokenHandler.handleToken(line,
182						context.rules.getDefault(),
183						lastOffset - line.offset,
184						pos - lastOffset,
185						context);
186				}
187
188				tokenHandler.handleToken(line,
189					context.rules.getDefault(),
190					pos - line.offset,1,context);
191				lastOffset = pos + 1;
192
193				escaped = false;
194			}
195			else
196			{
197				if(keywords != null || context.rules.getRuleCount() != 0)
198				{
199					String noWordSep = context.rules.getNoWordSep();
200
201					if(!Character.isLetterOrDigit(ch)
202						&& noWordSep.indexOf(ch) == -1)
203					{
204						if(context.inRule != null)
205							handleRule(context.inRule,true);
206
207						handleNoWordBreak();
208
209						markKeyword(true);
210
211						tokenHandler.handleToken(line,
212							context.rules.getDefault(),
213							lastOffset - line.offset,1,
214							context);
215						lastOffset = pos + 1;
216					}
217				}
218
219				seenWhitespaceEnd = true;
220				escaped = false;
221			} //}}}
222		} //}}}
223
224		//{{{ Mark all remaining characters
225		pos = lineLength;
226
227		if(context.inRule != null)
228			handleRule(context.inRule,true);
229
230		handleNoWordBreak();
231		markKeyword(true);
232		//}}}
233
234		//{{{ Unwind any NO_LINE_BREAK parent delegates
235unwind:		while(context.parent != null)
236		{
237			rule = context.parent.inRule;
238			if((rule != null && (rule.action
239				& ParserRule.NO_LINE_BREAK) == ParserRule.NO_LINE_BREAK)
240				|| terminated)
241			{
242				context = context.parent;
243				keywords = context.rules.getKeywords();
244				context.inRule = null;
245			}
246			else
247				break unwind;
248		} //}}}
249
250		tokenHandler.handleToken(line,Token.END,
251			pos - line.offset,0,context);
252
253		context = context.intern();
254		tokenHandler.setLineContext(context);
255
256		/* for GC. */
257		this.line = null;
258
259		return context;
260	} //}}}
261
262	//{{{ Private members
263
264	//{{{ Instance variables
265	private Hashtable ruleSets;
266	private ParserRuleSet mainRuleSet;
267
268	// Instead of passing these around to each method, we just store them
269	// as instance variables. Note that this is not thread-safe.
270	private TokenHandler tokenHandler;
271	private Segment line;
272	private LineContext context;
273	private KeywordMap keywords;
274	private Segment pattern = new Segment();
275	private int lastOffset;
276	private int lineLength;
277	private int pos;
278	private boolean escaped;
279
280	private int whitespaceEnd;
281	private boolean seenWhitespaceEnd;
282	//}}}
283
284	//{{{ checkDelegateEnd() method
285	private boolean checkDelegateEnd(ParserRule rule)
286	{
287		if(rule.end == null)
288			return false;
289
290		LineContext tempContext = context;
291		context = context.parent;
292		keywords = context.rules.getKeywords();
293		boolean tempEscaped = escaped;
294		boolean b = handleRule(rule,true);
295		context = tempContext;
296		keywords = context.rules.getKeywords();
297
298		if(b && !tempEscaped)
299		{
300			if(context.inRule != null)
301				handleRule(context.inRule,true);
302
303			markKeyword(true);
304
305			context = (LineContext)context.parent.clone();
306
307			tokenHandler.handleToken(line,
308				(context.inRule.action & ParserRule.EXCLUDE_MATCH)
309				== ParserRule.EXCLUDE_MATCH
310				? context.rules.getDefault()
311				: context.inRule.token,
312				pos - line.offset,pattern.count,context);
313
314			keywords = context.rules.getKeywords();
315			context.inRule = null;
316			lastOffset = pos + pattern.count;
317
318			// move pos to last character of match sequence
319			pos += (pattern.count - 1);
320
321			return true;
322		}
323
324		// check escape rule of parent
325		if((rule.action & ParserRule.NO_ESCAPE) == 0)
326		{
327			ParserRule escape = context.parent.rules.getEscapeRule();
328			if(escape != null && handleRule(escape,false))
329				return true;
330		}
331
332		return false;
333	} //}}}
334
335	//{{{ handleRule() method
336	/**
337	 * Checks if the rule matches the line at the current position
338	 * and handles the rule if it does match
339	 */
340	private boolean handleRule(ParserRule checkRule, boolean end)
341	{
342		//{{{ Some rules can only match in certain locations
343		if(!end)
344		{
345			if(Character.toUpperCase(checkRule.hashChar)
346				!= Character.toUpperCase(line.array[pos]))
347			{
348				return false;
349			}
350		}
351
352		int offset = ((checkRule.action & ParserRule.MARK_PREVIOUS) != 0) ?
353			lastOffset : pos;
354		int posMatch = (end ? checkRule.endPosMatch : checkRule.startPosMatch);
355
356		if((posMatch & ParserRule.AT_LINE_START)
357			== ParserRule.AT_LINE_START)
358		{
359			if(offset != line.offset)
360				return false;
361		}
362		else if((posMatch & ParserRule.AT_WHITESPACE_END)
363			== ParserRule.AT_WHITESPACE_END)
364		{
365			if(offset != whitespaceEnd)
366				return false;
367		}
368		else if((posMatch & ParserRule.AT_WORD_START)
369			== ParserRule.AT_WORD_START)
370		{
371			if(offset != lastOffset)
372				return false;
373		} //}}}
374
375		int matchedChars = 1;
376		CharSequence charSeq = null;
377		Matcher match = null;
378
379		//{{{ See if the rule's start or end sequence matches here
380		if(!end || (checkRule.action & ParserRule.MARK_FOLLOWING) == 0)
381		{
382			// the end cannot be a regular expression
383			if((checkRule.action & ParserRule.REGEXP) == 0 || end)
384			{
385				if(end)
386				{
387					if(context.spanEndSubst != null)
388						pattern.array = context.spanEndSubst;
389					else
390						pattern.array = checkRule.end;
391				}
392				else
393					pattern.array = checkRule.start;
394				pattern.offset = 0;
395				pattern.count = pattern.array.length;
396				matchedChars = pattern.count;
397
398				if(!SyntaxUtilities.regionMatches(context.rules
399					.getIgnoreCase(),line,pos,pattern.array))
400				{
401					return false;
402				}
403			}
404			else
405			{
406				// note that all regexps start with \A so they only
407				// match the start of the string
408				//int matchStart = pos - line.offset;
409				charSeq = new SegmentCharSequence(line, pos - line.offset,
410								  line.count - (pos - line.offset));
411				match = checkRule.startRegexp.matcher(charSeq);
412				if(!match.lookingAt())
413					return false;
414				else if(match.start() != 0)
415					throw new InternalError("Can't happen");
416				else
417				{
418					matchedChars = match.end();
419					/* workaround for hang if match was
420					 * zero-width. not sure if there is
421					 * a better way to handle this */
422					if(matchedChars == 0)
423						matchedChars = 1;
424				}
425			}
426		} //}}}
427
428		//{{{ Check for an escape sequence
429		if((checkRule.action & ParserRule.IS_ESCAPE) == ParserRule.IS_ESCAPE)
430		{
431			if(context.inRule != null)
432				handleRule(context.inRule,true);
433
434			escaped = !escaped;
435			pos += pattern.count - 1;
436		}
437		else if(escaped)
438		{
439			escaped = false;
440			pos += pattern.count - 1;
441		} //}}}
442		//{{{ Handle start of rule
443		else if(!end)
444		{
445			if(context.inRule != null)
446				handleRule(context.inRule,true);
447
448			markKeyword((checkRule.action & ParserRule.MARK_PREVIOUS)
449				!= ParserRule.MARK_PREVIOUS);
450
451			switch(checkRule.action & ParserRule.MAJOR_ACTIONS)
452			{
453			//{{{ SEQ
454			case ParserRule.SEQ:
455				context.spanEndSubst = null;
456
457				if((checkRule.action & ParserRule.REGEXP) != 0)
458				{
459					handleTokenWithSpaces(tokenHandler,
460						checkRule.token,
461						pos - line.offset,
462						matchedChars,
463						context);
464				}
465				else
466				{
467					tokenHandler.handleToken(line,
468						checkRule.token,
469						pos - line.offset,
470						matchedChars,context);
471				}
472
473				// a DELEGATE attribute on a SEQ changes the
474				// ruleset from the end of the SEQ onwards
475				if(checkRule.delegate != null)
476				{
477					context = new LineContext(
478						checkRule.delegate,
479						context.parent);
480					keywords = context.rules.getKeywords();
481				}
482				break;
483			//}}}
484			//{{{ SPAN, EOL_SPAN
485			case ParserRule.SPAN:
486			case ParserRule.EOL_SPAN:
487				context.inRule = checkRule;
488
489				byte tokenType = ((checkRule.action & ParserRule.EXCLUDE_MATCH)
490					== ParserRule.EXCLUDE_MATCH
491					? context.rules.getDefault() : checkRule.token);
492
493				if((checkRule.action & ParserRule.REGEXP) != 0)
494				{
495					handleTokenWithSpaces(tokenHandler,
496						tokenType,
497						pos - line.offset,
498						matchedChars,
499						context);
500				}
501				else
502				{
503					tokenHandler.handleToken(line,tokenType,
504						pos - line.offset,
505						matchedChars,context);
506				}
507
508				char[] spanEndSubst = null;
509				/* substitute result of matching the rule start
510				 * into the end string.
511				 *
512				 * eg, in shell script mode, <<\s*(\w+) is
513				 * matched into \<$1\> to construct rules for
514				 * highlighting read-ins like this <<EOF
515				 * ...
516				 * EOF
517				 */
518				if(charSeq != null && checkRule.end != null)
519				{
520					spanEndSubst = substitute(match,
521						checkRule.end);
522				}
523
524				context.spanEndSubst = spanEndSubst;
525				context = new LineContext(
526					checkRule.delegate,
527					context);
528				keywords = context.rules.getKeywords();
529
530				break;
531			//}}}
532			//{{{ MARK_FOLLOWING
533			case ParserRule.MARK_FOLLOWING:
534				tokenHandler.handleToken(line,(checkRule.action
535					& ParserRule.EXCLUDE_MATCH)
536					== ParserRule.EXCLUDE_MATCH ?
537					context.rules.getDefault()
538					: checkRule.token,pos - line.offset,
539					pattern.count,context);
540
541				context.spanEndSubst = null;
542				context.inRule = checkRule;
543				break;
544			//}}}
545			//{{{ MARK_PREVIOUS
546			case ParserRule.MARK_PREVIOUS:
547				context.spanEndSubst = null;
548
549				if ((checkRule.action & ParserRule.EXCLUDE_MATCH)
550					== ParserRule.EXCLUDE_MATCH)
551				{
552					if(pos != lastOffset)
553					{
554						tokenHandler.handleToken(line,
555							checkRule.token,
556							lastOffset - line.offset,
557							pos - lastOffset,
558							context);
559					}
560
561					tokenHandler.handleToken(line,
562						context.rules.getDefault(),
563						pos - line.offset,pattern.count,
564						context);
565				}
566				else
567				{
568					tokenHandler.handleToken(line,
569						checkRule.token,
570						lastOffset - line.offset,
571						pos - lastOffset + pattern.count,
572						context);
573				}
574
575				break;
576			//}}}
577			default:
578				throw new InternalError("Unhandled major action");
579			}
580
581			// move pos to last character of match sequence
582			pos += (matchedChars - 1);
583			lastOffset = pos + 1;
584
585			// break out of inner for loop to check next char
586		} //}}}
587		//{{{ Handle end of MARK_FOLLOWING
588		else if((context.inRule.action & ParserRule.MARK_FOLLOWING) != 0)
589		{
590			if(pos != lastOffset)
591			{
592				tokenHandler.handleToken(line,
593					context.inRule.token,
594					lastOffset - line.offset,
595					pos - lastOffset,context);
596			}
597
598			lastOffset = pos;
599			context.inRule = null;
600		} //}}}
601
602		return true;
603	} //}}}
604
605	//{{{ handleNoWordBreak() method
606	private void handleNoWordBreak()
607	{
608		if(context.parent != null)
609		{
610			ParserRule rule = context.parent.inRule;
611			if(rule != null && (context.parent.inRule.action
612				& ParserRule.NO_WORD_BREAK) != 0)
613			{
614				if(pos != lastOffset)
615				{
616					tokenHandler.handleToken(line,
617						rule.token,
618						lastOffset - line.offset,
619						pos - lastOffset,context);
620				}
621
622				lastOffset = pos;
623				context = context.parent;
624				keywords = context.rules.getKeywords();
625				context.inRule = null;
626			}
627		}
628	} //}}}
629
630	//{{{ handleTokenWithSpaces() method
631	private void handleTokenWithSpaces(TokenHandler tokenHandler,
632		byte tokenType, int start, int len, LineContext context)
633	{
634		int last = start;
635		int end = start + len;
636
637		for(int i = start; i < end; i++)
638		{
639			if(Character.isWhitespace(line.array[i + line.offset]))
640			{
641				if(last != i)
642				{
643					tokenHandler.handleToken(line,
644					tokenType,last,i - last,context);
645				}
646				tokenHandler.handleToken(line,tokenType,i,1,context);
647				last = i + 1;
648			}
649		}
650
651		if(last != end)
652		{
653			tokenHandler.handleToken(line,tokenType,last,
654				end - last,context);
655		}
656	} //}}}
657
658	//{{{ markKeyword() method
659	private void markKeyword(boolean addRemaining)
660	{
661		int len = pos - lastOffset;
662		if(len == 0)
663			return;
664
665		//{{{ Do digits
666		if(context.rules.getHighlightDigits())
667		{
668			boolean digit = false;
669			boolean mixed = false;
670
671			for(int i = lastOffset; i < pos; i++)
672			{
673				char ch = line.array[i];
674				if(Character.isDigit(ch))
675					digit = true;
676				else
677					mixed = true;
678			}
679
680			if(mixed)
681			{
682				Pattern digitRE = context.rules.getDigitRegexp();
683
684				// only match against regexp if its not all
685				// digits; if all digits, no point matching
686				if(digit)
687				{
688					if(digitRE == null)
689					{
690						// mixed digit/alpha keyword,
691						// and no regexp... don't
692						// highlight as DIGIT
693						digit = false;
694					}
695					else
696					{
697						int oldCount = line.count;
698						int oldOffset = line.offset;
699						line.offset = lastOffset;
700						line.count = len;
701						CharSequence seq = new SegmentCharSequence(line);
702						digit = digitRE.matcher(seq).matches();
703						line.offset = oldOffset;
704						line.count = oldCount;
705					}
706				}
707			}
708
709			if(digit)
710			{
711				tokenHandler.handleToken(line,Token.DIGIT,
712					lastOffset - line.offset,
713					len,context);
714				lastOffset = pos;
715
716				return;
717			}
718		} //}}}
719
720		//{{{ Do keywords
721		if(keywords != null)
722		{
723			byte id = keywords.lookup(line, lastOffset, len);
724
725			if(id != Token.NULL)
726			{
727				tokenHandler.handleToken(line,id,
728					lastOffset - line.offset,
729					len,context);
730				lastOffset = pos;
731				return;
732			}
733		} //}}}
734
735		//{{{ Handle any remaining crud
736		if(addRemaining)
737		{
738			tokenHandler.handleToken(line,context.rules.getDefault(),
739				lastOffset - line.offset,len,context);
740			lastOffset = pos;
741		} //}}}
742	} //}}}
743
744	//{{{ substitute() method
745	private static char[] substitute(Matcher match, char[] end)
746	{
747		StringBuffer buf = new StringBuffer();
748		for(int i = 0; i < end.length; i++)
749		{
750			char ch = end[i];
751			if(ch == '$')
752			{
753				if(i == end.length - 1)
754					buf.append(ch);
755				else
756				{
757					char digit = end[i + 1];
758					if(!Character.isDigit(digit))
759						buf.append(ch);
760					else
761					{
762						buf.append(match.group(
763							digit - '0'));
764						i++;
765					}
766				}
767			}
768			else
769				buf.append(ch);
770		}
771
772		char[] returnValue = new char[buf.length()];
773		buf.getChars(0,buf.length(),returnValue,0);
774		return returnValue;
775	} //}}}
776
777	//}}}
778
779	//{{{ LineContext class
780	/**
781	 * Stores persistent per-line syntax parser state.
782	 */
783	public static class LineContext
784	{
785		private static Hashtable intern = new Hashtable();
786
787		public LineContext parent;
788		public ParserRule inRule;
789		public ParserRuleSet rules;
790		// used for SPAN_REGEXP rules; otherwise null
791		public char[] spanEndSubst;
792
793		//{{{ LineContext constructor
794		public LineContext(ParserRuleSet rs, LineContext lc)
795		{
796			rules = rs;
797			parent = (lc == null ? null : (LineContext)lc.clone());
798		} //}}}
799
800		//{{{ LineContext constructor
801		public LineContext()
802		{
803		} //}}}
804
805		//{{{ intern() method
806		public LineContext intern()
807		{
808			Object obj = intern.get(this);
809			if(obj == null)
810			{
811				intern.put(this,this);
812				return this;
813			}
814			else
815				return (LineContext)obj;
816		} //}}}
817
818		//{{{ hashCode() method
819		public int hashCode()
820		{
821			if(inRule != null)
822				return inRule.hashCode();
823			else if(rules != null)
824				return rules.hashCode();
825			else
826				return 0;
827		} //}}}
828
829		//{{{ equals() method
830		public boolean equals(Object obj)
831		{
832			if(obj instanceof LineContext)
833			{
834				LineContext lc = (LineContext)obj;
835				return lc.inRule == inRule && lc.rules == rules
836					&& MiscUtilities.objectsEqual(parent,lc.parent)
837					&& charArraysEqual(spanEndSubst,lc.spanEndSubst);
838			}
839			else
840				return false;
841		} //}}}
842
843		//{{{ clone() method
844		public Object clone()
845		{
846			LineContext lc = new LineContext();
847			lc.inRule = inRule;
848			lc.rules = rules;
849			lc.parent = (parent == null) ? null : (LineContext) parent.clone();
850			lc.spanEndSubst = spanEndSubst;
851
852			return lc;
853		} //}}}
854
855		//{{{ charArraysEqual() method
856		private static boolean charArraysEqual(char[] c1, char[] c2)
857		{
858			if(c1 == null)
859				return c2 == null;
860
861			// c1 is not null
862			if(c2 == null)
863				return false;
864
865			if(c1.length != c2.length)
866				return false;
867
868			for(int i = 0; i < c1.length; i++)
869			{
870				if(c1[i] != c2[i])
871					return false;
872			}
873
874			return true;
875		} //}}}
876	} //}}}
877}