/jEdit/tags/jedit-4-3-pre5/org/gjt/sp/jedit/syntax/TokenMarker.java
Java | 877 lines | 633 code | 111 blank | 133 comment | 175 complexity | abc4c21bd6f0558116512b3188234f00 MD5 | raw file
Possible License(s): BSD-3-Clause, AGPL-1.0, Apache-2.0, LGPL-2.0, LGPL-3.0, GPL-2.0, CC-BY-SA-3.0, LGPL-2.1, GPL-3.0, MPL-2.0-no-copyleft-exception, IPL-1.0
1/*
2 * TokenMarker.java - Tokenizes lines of text
3 * :tabSize=8:indentSize=8:noTabs=false:
4 * :folding=explicit:collapseFolds=1:
5 *
6 * Copyright (C) 1998, 2003 Slava Pestov
7 * Copyright (C) 1999, 2000 mike dillon
8 *
9 * This program is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU General Public License
11 * as published by the Free Software Foundation; either version 2
12 * of the License, or any later version.
13 *
14 * This program is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 * GNU General Public License for more details.
18 *
19 * You should have received a copy of the GNU General Public License
20 * along with this program; if not, write to the Free Software
21 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
22 */
23
24package org.gjt.sp.jedit.syntax;
25
26//{{{ Imports
27import javax.swing.text.Segment;
28import java.util.*;
29import java.util.regex.Matcher;
30import java.util.regex.Pattern;
31import org.gjt.sp.jedit.*;
32import org.gjt.sp.util.SegmentCharSequence;
33//}}}
34
35/**
36 * A token marker splits lines of text into tokens. Each token carries
37 * a length field and an identification tag that can be mapped to a color
38 * or font style for painting that token.
39 *
40 * @author Slava Pestov, mike dillon
41 * @version $Id: TokenMarker.java 5488 2006-06-24 04:01:01Z vanza $
42 *
43 * @see org.gjt.sp.jedit.syntax.Token
44 * @see org.gjt.sp.jedit.syntax.TokenHandler
45 */
46public class TokenMarker
47{
48 //{{{ TokenMarker constructor
49 public TokenMarker()
50 {
51 ruleSets = new Hashtable(64);
52 } //}}}
53
54 //{{{ addRuleSet() method
55 public void addRuleSet(ParserRuleSet rules)
56 {
57 ruleSets.put(rules.getSetName(), rules);
58
59 if (rules.getSetName().equals("MAIN"))
60 mainRuleSet = rules;
61 } //}}}
62
63 //{{{ getMainRuleSet() method
64 public ParserRuleSet getMainRuleSet()
65 {
66 return mainRuleSet;
67 } //}}}
68
69 //{{{ getRuleSet() method
70 public ParserRuleSet getRuleSet(String setName)
71 {
72 return (ParserRuleSet) ruleSets.get(setName);
73 } //}}}
74
75 //{{{ getRuleSets() method
76 /**
77 * @since jEdit 4.2pre3
78 */
79 public ParserRuleSet[] getRuleSets()
80 {
81 return (ParserRuleSet[])ruleSets.values().toArray(new ParserRuleSet[ruleSets.size()]);
82 } //}}}
83
84 //{{{ markTokens() method
85 /**
86 * Do not call this method directly; call Buffer.markTokens() instead.
87 */
88 public LineContext markTokens(LineContext prevContext,
89 TokenHandler tokenHandler, Segment line)
90 {
91 //{{{ Set up some instance variables
92 // this is to avoid having to pass around lots and lots of
93 // parameters.
94 this.tokenHandler = tokenHandler;
95 this.line = line;
96
97 lastOffset = line.offset;
98 lineLength = line.count + line.offset;
99
100 context = new LineContext();
101
102 if(prevContext == null)
103 context.rules = getMainRuleSet();
104 else
105 {
106 context.parent = prevContext.parent;
107 context.inRule = prevContext.inRule;
108 context.rules = prevContext.rules;
109 context.spanEndSubst = prevContext.spanEndSubst;
110 }
111
112 keywords = context.rules.getKeywords();
113 escaped = false;
114
115 seenWhitespaceEnd = false;
116 whitespaceEnd = line.offset;
117 //}}}
118
119 //{{{ Main parser loop
120 ParserRule rule;
121 int terminateChar = context.rules.getTerminateChar();
122 boolean terminated = false;
123main_loop: for(pos = line.offset; pos < lineLength; pos++)
124 {
125 //{{{ check if we have to stop parsing (happens if the terminateChar has been exceeded)
126 if(terminateChar >= 0 && pos - line.offset >= terminateChar
127 && !terminated)
128 {
129 terminated = true;
130 context = new LineContext(ParserRuleSet
131 .getStandardRuleSet(context.rules
132 .getDefault()),context);
133 keywords = context.rules.getKeywords();
134 } //}}}
135
136 //{{{ check for end of delegate
137 if(context.parent != null)
138 {
139 rule = context.parent.inRule;
140 if(rule != null)
141 {
142 if(checkDelegateEnd(rule))
143 {
144 seenWhitespaceEnd = true;
145 continue main_loop;
146 }
147 }
148 } //}}}
149
150 //{{{ check every rule
151 char ch = line.array[pos];
152
153 rule = context.rules.getRules(ch);
154 while(rule != null)
155 {
156 // stop checking rules if there was a match
157 if (handleRule(rule,false))
158 {
159 seenWhitespaceEnd = true;
160 continue main_loop;
161 }
162
163 rule = rule.next;
164 } //}}}
165
166 //{{{ check if current character is a word separator
167 if(Character.isWhitespace(ch))
168 {
169 if(!seenWhitespaceEnd)
170 whitespaceEnd = pos + 1;
171
172 if(context.inRule != null)
173 handleRule(context.inRule,true);
174
175 handleNoWordBreak();
176
177 markKeyword(false);
178
179 if(lastOffset != pos)
180 {
181 tokenHandler.handleToken(line,
182 context.rules.getDefault(),
183 lastOffset - line.offset,
184 pos - lastOffset,
185 context);
186 }
187
188 tokenHandler.handleToken(line,
189 context.rules.getDefault(),
190 pos - line.offset,1,context);
191 lastOffset = pos + 1;
192
193 escaped = false;
194 }
195 else
196 {
197 if(keywords != null || context.rules.getRuleCount() != 0)
198 {
199 String noWordSep = context.rules.getNoWordSep();
200
201 if(!Character.isLetterOrDigit(ch)
202 && noWordSep.indexOf(ch) == -1)
203 {
204 if(context.inRule != null)
205 handleRule(context.inRule,true);
206
207 handleNoWordBreak();
208
209 markKeyword(true);
210
211 tokenHandler.handleToken(line,
212 context.rules.getDefault(),
213 lastOffset - line.offset,1,
214 context);
215 lastOffset = pos + 1;
216 }
217 }
218
219 seenWhitespaceEnd = true;
220 escaped = false;
221 } //}}}
222 } //}}}
223
224 //{{{ Mark all remaining characters
225 pos = lineLength;
226
227 if(context.inRule != null)
228 handleRule(context.inRule,true);
229
230 handleNoWordBreak();
231 markKeyword(true);
232 //}}}
233
234 //{{{ Unwind any NO_LINE_BREAK parent delegates
235unwind: while(context.parent != null)
236 {
237 rule = context.parent.inRule;
238 if((rule != null && (rule.action
239 & ParserRule.NO_LINE_BREAK) == ParserRule.NO_LINE_BREAK)
240 || terminated)
241 {
242 context = context.parent;
243 keywords = context.rules.getKeywords();
244 context.inRule = null;
245 }
246 else
247 break unwind;
248 } //}}}
249
250 tokenHandler.handleToken(line,Token.END,
251 pos - line.offset,0,context);
252
253 context = context.intern();
254 tokenHandler.setLineContext(context);
255
256 /* for GC. */
257 this.line = null;
258
259 return context;
260 } //}}}
261
262 //{{{ Private members
263
264 //{{{ Instance variables
265 private Hashtable ruleSets;
266 private ParserRuleSet mainRuleSet;
267
268 // Instead of passing these around to each method, we just store them
269 // as instance variables. Note that this is not thread-safe.
270 private TokenHandler tokenHandler;
271 private Segment line;
272 private LineContext context;
273 private KeywordMap keywords;
274 private Segment pattern = new Segment();
275 private int lastOffset;
276 private int lineLength;
277 private int pos;
278 private boolean escaped;
279
280 private int whitespaceEnd;
281 private boolean seenWhitespaceEnd;
282 //}}}
283
284 //{{{ checkDelegateEnd() method
285 private boolean checkDelegateEnd(ParserRule rule)
286 {
287 if(rule.end == null)
288 return false;
289
290 LineContext tempContext = context;
291 context = context.parent;
292 keywords = context.rules.getKeywords();
293 boolean tempEscaped = escaped;
294 boolean b = handleRule(rule,true);
295 context = tempContext;
296 keywords = context.rules.getKeywords();
297
298 if(b && !tempEscaped)
299 {
300 if(context.inRule != null)
301 handleRule(context.inRule,true);
302
303 markKeyword(true);
304
305 context = (LineContext)context.parent.clone();
306
307 tokenHandler.handleToken(line,
308 (context.inRule.action & ParserRule.EXCLUDE_MATCH)
309 == ParserRule.EXCLUDE_MATCH
310 ? context.rules.getDefault()
311 : context.inRule.token,
312 pos - line.offset,pattern.count,context);
313
314 keywords = context.rules.getKeywords();
315 context.inRule = null;
316 lastOffset = pos + pattern.count;
317
318 // move pos to last character of match sequence
319 pos += (pattern.count - 1);
320
321 return true;
322 }
323
324 // check escape rule of parent
325 if((rule.action & ParserRule.NO_ESCAPE) == 0)
326 {
327 ParserRule escape = context.parent.rules.getEscapeRule();
328 if(escape != null && handleRule(escape,false))
329 return true;
330 }
331
332 return false;
333 } //}}}
334
335 //{{{ handleRule() method
336 /**
337 * Checks if the rule matches the line at the current position
338 * and handles the rule if it does match
339 */
340 private boolean handleRule(ParserRule checkRule, boolean end)
341 {
342 //{{{ Some rules can only match in certain locations
343 if(!end)
344 {
345 if(Character.toUpperCase(checkRule.hashChar)
346 != Character.toUpperCase(line.array[pos]))
347 {
348 return false;
349 }
350 }
351
352 int offset = ((checkRule.action & ParserRule.MARK_PREVIOUS) != 0) ?
353 lastOffset : pos;
354 int posMatch = (end ? checkRule.endPosMatch : checkRule.startPosMatch);
355
356 if((posMatch & ParserRule.AT_LINE_START)
357 == ParserRule.AT_LINE_START)
358 {
359 if(offset != line.offset)
360 return false;
361 }
362 else if((posMatch & ParserRule.AT_WHITESPACE_END)
363 == ParserRule.AT_WHITESPACE_END)
364 {
365 if(offset != whitespaceEnd)
366 return false;
367 }
368 else if((posMatch & ParserRule.AT_WORD_START)
369 == ParserRule.AT_WORD_START)
370 {
371 if(offset != lastOffset)
372 return false;
373 } //}}}
374
375 int matchedChars = 1;
376 CharSequence charSeq = null;
377 Matcher match = null;
378
379 //{{{ See if the rule's start or end sequence matches here
380 if(!end || (checkRule.action & ParserRule.MARK_FOLLOWING) == 0)
381 {
382 // the end cannot be a regular expression
383 if((checkRule.action & ParserRule.REGEXP) == 0 || end)
384 {
385 if(end)
386 {
387 if(context.spanEndSubst != null)
388 pattern.array = context.spanEndSubst;
389 else
390 pattern.array = checkRule.end;
391 }
392 else
393 pattern.array = checkRule.start;
394 pattern.offset = 0;
395 pattern.count = pattern.array.length;
396 matchedChars = pattern.count;
397
398 if(!SyntaxUtilities.regionMatches(context.rules
399 .getIgnoreCase(),line,pos,pattern.array))
400 {
401 return false;
402 }
403 }
404 else
405 {
406 // note that all regexps start with \A so they only
407 // match the start of the string
408 //int matchStart = pos - line.offset;
409 charSeq = new SegmentCharSequence(line, pos - line.offset,
410 line.count - (pos - line.offset));
411 match = checkRule.startRegexp.matcher(charSeq);
412 if(!match.lookingAt())
413 return false;
414 else if(match.start() != 0)
415 throw new InternalError("Can't happen");
416 else
417 {
418 matchedChars = match.end();
419 /* workaround for hang if match was
420 * zero-width. not sure if there is
421 * a better way to handle this */
422 if(matchedChars == 0)
423 matchedChars = 1;
424 }
425 }
426 } //}}}
427
428 //{{{ Check for an escape sequence
429 if((checkRule.action & ParserRule.IS_ESCAPE) == ParserRule.IS_ESCAPE)
430 {
431 if(context.inRule != null)
432 handleRule(context.inRule,true);
433
434 escaped = !escaped;
435 pos += pattern.count - 1;
436 }
437 else if(escaped)
438 {
439 escaped = false;
440 pos += pattern.count - 1;
441 } //}}}
442 //{{{ Handle start of rule
443 else if(!end)
444 {
445 if(context.inRule != null)
446 handleRule(context.inRule,true);
447
448 markKeyword((checkRule.action & ParserRule.MARK_PREVIOUS)
449 != ParserRule.MARK_PREVIOUS);
450
451 switch(checkRule.action & ParserRule.MAJOR_ACTIONS)
452 {
453 //{{{ SEQ
454 case ParserRule.SEQ:
455 context.spanEndSubst = null;
456
457 if((checkRule.action & ParserRule.REGEXP) != 0)
458 {
459 handleTokenWithSpaces(tokenHandler,
460 checkRule.token,
461 pos - line.offset,
462 matchedChars,
463 context);
464 }
465 else
466 {
467 tokenHandler.handleToken(line,
468 checkRule.token,
469 pos - line.offset,
470 matchedChars,context);
471 }
472
473 // a DELEGATE attribute on a SEQ changes the
474 // ruleset from the end of the SEQ onwards
475 if(checkRule.delegate != null)
476 {
477 context = new LineContext(
478 checkRule.delegate,
479 context.parent);
480 keywords = context.rules.getKeywords();
481 }
482 break;
483 //}}}
484 //{{{ SPAN, EOL_SPAN
485 case ParserRule.SPAN:
486 case ParserRule.EOL_SPAN:
487 context.inRule = checkRule;
488
489 byte tokenType = ((checkRule.action & ParserRule.EXCLUDE_MATCH)
490 == ParserRule.EXCLUDE_MATCH
491 ? context.rules.getDefault() : checkRule.token);
492
493 if((checkRule.action & ParserRule.REGEXP) != 0)
494 {
495 handleTokenWithSpaces(tokenHandler,
496 tokenType,
497 pos - line.offset,
498 matchedChars,
499 context);
500 }
501 else
502 {
503 tokenHandler.handleToken(line,tokenType,
504 pos - line.offset,
505 matchedChars,context);
506 }
507
508 char[] spanEndSubst = null;
509 /* substitute result of matching the rule start
510 * into the end string.
511 *
512 * eg, in shell script mode, <<\s*(\w+) is
513 * matched into \<$1\> to construct rules for
514 * highlighting read-ins like this <<EOF
515 * ...
516 * EOF
517 */
518 if(charSeq != null && checkRule.end != null)
519 {
520 spanEndSubst = substitute(match,
521 checkRule.end);
522 }
523
524 context.spanEndSubst = spanEndSubst;
525 context = new LineContext(
526 checkRule.delegate,
527 context);
528 keywords = context.rules.getKeywords();
529
530 break;
531 //}}}
532 //{{{ MARK_FOLLOWING
533 case ParserRule.MARK_FOLLOWING:
534 tokenHandler.handleToken(line,(checkRule.action
535 & ParserRule.EXCLUDE_MATCH)
536 == ParserRule.EXCLUDE_MATCH ?
537 context.rules.getDefault()
538 : checkRule.token,pos - line.offset,
539 pattern.count,context);
540
541 context.spanEndSubst = null;
542 context.inRule = checkRule;
543 break;
544 //}}}
545 //{{{ MARK_PREVIOUS
546 case ParserRule.MARK_PREVIOUS:
547 context.spanEndSubst = null;
548
549 if ((checkRule.action & ParserRule.EXCLUDE_MATCH)
550 == ParserRule.EXCLUDE_MATCH)
551 {
552 if(pos != lastOffset)
553 {
554 tokenHandler.handleToken(line,
555 checkRule.token,
556 lastOffset - line.offset,
557 pos - lastOffset,
558 context);
559 }
560
561 tokenHandler.handleToken(line,
562 context.rules.getDefault(),
563 pos - line.offset,pattern.count,
564 context);
565 }
566 else
567 {
568 tokenHandler.handleToken(line,
569 checkRule.token,
570 lastOffset - line.offset,
571 pos - lastOffset + pattern.count,
572 context);
573 }
574
575 break;
576 //}}}
577 default:
578 throw new InternalError("Unhandled major action");
579 }
580
581 // move pos to last character of match sequence
582 pos += (matchedChars - 1);
583 lastOffset = pos + 1;
584
585 // break out of inner for loop to check next char
586 } //}}}
587 //{{{ Handle end of MARK_FOLLOWING
588 else if((context.inRule.action & ParserRule.MARK_FOLLOWING) != 0)
589 {
590 if(pos != lastOffset)
591 {
592 tokenHandler.handleToken(line,
593 context.inRule.token,
594 lastOffset - line.offset,
595 pos - lastOffset,context);
596 }
597
598 lastOffset = pos;
599 context.inRule = null;
600 } //}}}
601
602 return true;
603 } //}}}
604
605 //{{{ handleNoWordBreak() method
606 private void handleNoWordBreak()
607 {
608 if(context.parent != null)
609 {
610 ParserRule rule = context.parent.inRule;
611 if(rule != null && (context.parent.inRule.action
612 & ParserRule.NO_WORD_BREAK) != 0)
613 {
614 if(pos != lastOffset)
615 {
616 tokenHandler.handleToken(line,
617 rule.token,
618 lastOffset - line.offset,
619 pos - lastOffset,context);
620 }
621
622 lastOffset = pos;
623 context = context.parent;
624 keywords = context.rules.getKeywords();
625 context.inRule = null;
626 }
627 }
628 } //}}}
629
630 //{{{ handleTokenWithSpaces() method
631 private void handleTokenWithSpaces(TokenHandler tokenHandler,
632 byte tokenType, int start, int len, LineContext context)
633 {
634 int last = start;
635 int end = start + len;
636
637 for(int i = start; i < end; i++)
638 {
639 if(Character.isWhitespace(line.array[i + line.offset]))
640 {
641 if(last != i)
642 {
643 tokenHandler.handleToken(line,
644 tokenType,last,i - last,context);
645 }
646 tokenHandler.handleToken(line,tokenType,i,1,context);
647 last = i + 1;
648 }
649 }
650
651 if(last != end)
652 {
653 tokenHandler.handleToken(line,tokenType,last,
654 end - last,context);
655 }
656 } //}}}
657
658 //{{{ markKeyword() method
659 private void markKeyword(boolean addRemaining)
660 {
661 int len = pos - lastOffset;
662 if(len == 0)
663 return;
664
665 //{{{ Do digits
666 if(context.rules.getHighlightDigits())
667 {
668 boolean digit = false;
669 boolean mixed = false;
670
671 for(int i = lastOffset; i < pos; i++)
672 {
673 char ch = line.array[i];
674 if(Character.isDigit(ch))
675 digit = true;
676 else
677 mixed = true;
678 }
679
680 if(mixed)
681 {
682 Pattern digitRE = context.rules.getDigitRegexp();
683
684 // only match against regexp if its not all
685 // digits; if all digits, no point matching
686 if(digit)
687 {
688 if(digitRE == null)
689 {
690 // mixed digit/alpha keyword,
691 // and no regexp... don't
692 // highlight as DIGIT
693 digit = false;
694 }
695 else
696 {
697 int oldCount = line.count;
698 int oldOffset = line.offset;
699 line.offset = lastOffset;
700 line.count = len;
701 CharSequence seq = new SegmentCharSequence(line);
702 digit = digitRE.matcher(seq).matches();
703 line.offset = oldOffset;
704 line.count = oldCount;
705 }
706 }
707 }
708
709 if(digit)
710 {
711 tokenHandler.handleToken(line,Token.DIGIT,
712 lastOffset - line.offset,
713 len,context);
714 lastOffset = pos;
715
716 return;
717 }
718 } //}}}
719
720 //{{{ Do keywords
721 if(keywords != null)
722 {
723 byte id = keywords.lookup(line, lastOffset, len);
724
725 if(id != Token.NULL)
726 {
727 tokenHandler.handleToken(line,id,
728 lastOffset - line.offset,
729 len,context);
730 lastOffset = pos;
731 return;
732 }
733 } //}}}
734
735 //{{{ Handle any remaining crud
736 if(addRemaining)
737 {
738 tokenHandler.handleToken(line,context.rules.getDefault(),
739 lastOffset - line.offset,len,context);
740 lastOffset = pos;
741 } //}}}
742 } //}}}
743
744 //{{{ substitute() method
745 private static char[] substitute(Matcher match, char[] end)
746 {
747 StringBuffer buf = new StringBuffer();
748 for(int i = 0; i < end.length; i++)
749 {
750 char ch = end[i];
751 if(ch == '$')
752 {
753 if(i == end.length - 1)
754 buf.append(ch);
755 else
756 {
757 char digit = end[i + 1];
758 if(!Character.isDigit(digit))
759 buf.append(ch);
760 else
761 {
762 buf.append(match.group(
763 digit - '0'));
764 i++;
765 }
766 }
767 }
768 else
769 buf.append(ch);
770 }
771
772 char[] returnValue = new char[buf.length()];
773 buf.getChars(0,buf.length(),returnValue,0);
774 return returnValue;
775 } //}}}
776
777 //}}}
778
779 //{{{ LineContext class
780 /**
781 * Stores persistent per-line syntax parser state.
782 */
783 public static class LineContext
784 {
785 private static Hashtable intern = new Hashtable();
786
787 public LineContext parent;
788 public ParserRule inRule;
789 public ParserRuleSet rules;
790 // used for SPAN_REGEXP rules; otherwise null
791 public char[] spanEndSubst;
792
793 //{{{ LineContext constructor
794 public LineContext(ParserRuleSet rs, LineContext lc)
795 {
796 rules = rs;
797 parent = (lc == null ? null : (LineContext)lc.clone());
798 } //}}}
799
800 //{{{ LineContext constructor
801 public LineContext()
802 {
803 } //}}}
804
805 //{{{ intern() method
806 public LineContext intern()
807 {
808 Object obj = intern.get(this);
809 if(obj == null)
810 {
811 intern.put(this,this);
812 return this;
813 }
814 else
815 return (LineContext)obj;
816 } //}}}
817
818 //{{{ hashCode() method
819 public int hashCode()
820 {
821 if(inRule != null)
822 return inRule.hashCode();
823 else if(rules != null)
824 return rules.hashCode();
825 else
826 return 0;
827 } //}}}
828
829 //{{{ equals() method
830 public boolean equals(Object obj)
831 {
832 if(obj instanceof LineContext)
833 {
834 LineContext lc = (LineContext)obj;
835 return lc.inRule == inRule && lc.rules == rules
836 && MiscUtilities.objectsEqual(parent,lc.parent)
837 && charArraysEqual(spanEndSubst,lc.spanEndSubst);
838 }
839 else
840 return false;
841 } //}}}
842
843 //{{{ clone() method
844 public Object clone()
845 {
846 LineContext lc = new LineContext();
847 lc.inRule = inRule;
848 lc.rules = rules;
849 lc.parent = (parent == null) ? null : (LineContext) parent.clone();
850 lc.spanEndSubst = spanEndSubst;
851
852 return lc;
853 } //}}}
854
855 //{{{ charArraysEqual() method
856 private static boolean charArraysEqual(char[] c1, char[] c2)
857 {
858 if(c1 == null)
859 return c2 == null;
860
861 // c1 is not null
862 if(c2 == null)
863 return false;
864
865 if(c1.length != c2.length)
866 return false;
867
868 for(int i = 0; i < c1.length; i++)
869 {
870 if(c1[i] != c2[i])
871 return false;
872 }
873
874 return true;
875 } //}}}
876 } //}}}
877}