/jEdit/tags/jedit-4-3-pre5/org/gjt/sp/jedit/TextUtilities.java
Java | 828 lines | 534 code | 70 blank | 224 comment | 134 complexity | cfbd9d3f417729d5dd8478532373ccfb MD5 | raw file
Possible License(s): BSD-3-Clause, AGPL-1.0, Apache-2.0, LGPL-2.0, LGPL-3.0, GPL-2.0, CC-BY-SA-3.0, LGPL-2.1, GPL-3.0, MPL-2.0-no-copyleft-exception, IPL-1.0
1/*
2 * TextUtilities.java - Various text functions
3 * Copyright (C) 1998, 2005 Slava Pestov
4 * :tabSize=8:indentSize=8:noTabs=false:
5 * :folding=explicit:collapseFolds=1:
6 *
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation; either version 2
10 * of the License, or any later version.
11 *
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
20 */
21
22package org.gjt.sp.jedit;
23
24//{{{ Imports
25import java.util.*;
26import javax.swing.text.Segment;
27import org.gjt.sp.jedit.buffer.JEditBuffer;
28import org.gjt.sp.jedit.syntax.*;
29import org.gjt.sp.util.StandardUtilities;
30//}}}
31
32/**
33 * Contains several text manipulation methods.
34 *
35 * <ul>
36 * <li>Bracket matching
37 * <li>Word start and end offset calculation
38 * <li>String comparison
39 * <li>Converting tabs to spaces and vice versa
40 * <li>Wrapping text
41 * <li>String case conversion
42 * </ul>
43 *
44 * @author Slava Pestov
45 * @version $Id: TextUtilities.java 5485 2006-06-23 22:04:58Z kpouer $
46 */
47public class TextUtilities
48{
49 // to avoid slowdown with large files; only scan 10000 lines either way
50 public static final int BRACKET_MATCH_LIMIT = 10000;
51
52 //{{{ getTokenAtOffset() method
53 /**
54 * Returns the token that contains the specified offset.
55 * @param tokens The token list
56 * @param offset The offset
57 * @since jEdit 4.0pre3
58 */
59 public static Token getTokenAtOffset(Token tokens, int offset)
60 {
61 if(offset == 0 && tokens.id == Token.END)
62 return tokens;
63
64 for(;;)
65 {
66 if(tokens.id == Token.END)
67 throw new ArrayIndexOutOfBoundsException("offset > line length");
68
69 if(tokens.offset + tokens.length > offset)
70 return tokens;
71 else
72 tokens = tokens.next;
73 }
74 } //}}}
75
76 //{{{ getComplementaryBracket() method
77 /**
78 * Given an opening bracket, return the corresponding closing bracket
79 * and store true in <code>direction[0]</code>. Given a closing bracket,
80 * return the corresponding opening bracket and store false in
81 * <code>direction[0]</code>. Otherwise, return <code>\0</code>.
82 * @since jEdit 4.3pre2
83 */
84 public static char getComplementaryBracket(char ch, boolean[] direction)
85 {
86 switch(ch)
87 {
88 case '(': direction[0] = true; return ')';
89 case ')': direction[0] = false; return '(';
90 case '[': direction[0] = true; return ']';
91 case ']': direction[0] = false; return '[';
92 case '{': direction[0] = true; return '}';
93 case '}': direction[0] = false; return '{';
94 default: return '\0';
95 }
96 } //}}}
97
98 //{{{ findMatchingBracket() method
99 /**
100 * Returns the offset of the bracket matching the one at the
101 * specified offset of the buffer, or -1 if the bracket is
102 * unmatched (or if the character is not a bracket).
103 * @param buffer The buffer
104 * @param line The line
105 * @param offset The offset within that line
106 * @since jEdit 2.6pre1
107 */
108 public static int findMatchingBracket(JEditBuffer buffer, int line, int offset)
109 {
110 if(offset < 0 || offset >= buffer.getLineLength(line))
111 {
112 throw new ArrayIndexOutOfBoundsException(offset + ":"
113 + buffer.getLineLength(line));
114 }
115
116 Segment lineText = new Segment();
117 buffer.getLineText(line,lineText);
118
119 char c = lineText.array[lineText.offset + offset];
120 // false - backwards, true - forwards
121 boolean[] direction = new boolean[1];
122
123 // corresponding character
124 char cprime = getComplementaryBracket(c,direction);
125
126 // 1 because we've already 'seen' the first bracket
127 int count = 1;
128
129 DefaultTokenHandler tokenHandler = new DefaultTokenHandler();
130 buffer.markTokens(line,tokenHandler);
131
132 // Get the syntax token at 'offset'
133 // only tokens with the same type will be checked for
134 // the corresponding bracket
135 byte idOfBracket = getTokenAtOffset(tokenHandler.getTokens(),offset).id;
136
137 boolean haveTokens = true;
138
139 int startLine = line;
140
141 //{{{ Forward search
142 if(direction[0])
143 {
144 offset++;
145
146 for(;;)
147 {
148 for(int i = offset; i < lineText.count; i++)
149 {
150 char ch = lineText.array[lineText.offset + i];
151 if(ch == c)
152 {
153 if(!haveTokens)
154 {
155 tokenHandler.init();
156 buffer.markTokens(line,tokenHandler);
157 haveTokens = true;
158 }
159 if(getTokenAtOffset(tokenHandler.getTokens(),i).id == idOfBracket)
160 count++;
161 }
162 else if(ch == cprime)
163 {
164 if(!haveTokens)
165 {
166 tokenHandler.init();
167 buffer.markTokens(line,tokenHandler);
168 haveTokens = true;
169 }
170 if(getTokenAtOffset(tokenHandler.getTokens(),i).id == idOfBracket)
171 {
172 count--;
173 if(count == 0)
174 return buffer.getLineStartOffset(line) + i;
175 }
176 }
177 }
178
179 //{{{ Go on to next line
180 line++;
181 if(line >= buffer.getLineCount() || (line - startLine) > BRACKET_MATCH_LIMIT)
182 break;
183 buffer.getLineText(line,lineText);
184 offset = 0;
185 haveTokens = false;
186 //}}}
187 }
188 } //}}}
189 //{{{ Backward search
190 else
191 {
192 offset--;
193
194 for(;;)
195 {
196 for(int i = offset; i >= 0; i--)
197 {
198 char ch = lineText.array[lineText.offset + i];
199 if(ch == c)
200 {
201 if(!haveTokens)
202 {
203 tokenHandler.init();
204 buffer.markTokens(line,tokenHandler);
205 haveTokens = true;
206 }
207 if(getTokenAtOffset(tokenHandler.getTokens(),i).id == idOfBracket)
208 count++;
209 }
210 else if(ch == cprime)
211 {
212 if(!haveTokens)
213 {
214 tokenHandler.init();
215 buffer.markTokens(line,tokenHandler);
216 haveTokens = true;
217 }
218 if(getTokenAtOffset(tokenHandler.getTokens(),i).id == idOfBracket)
219 {
220 count--;
221 if(count == 0)
222 return buffer.getLineStartOffset(line) + i;
223 }
224 }
225 }
226
227 //{{{ Go on to previous line
228 line--;
229 if(line < 0 || (startLine - line) > BRACKET_MATCH_LIMIT)
230 break;
231 buffer.getLineText(line,lineText);
232 offset = lineText.count - 1;
233 haveTokens = false;
234 //}}}
235 }
236 } //}}}
237
238 // Nothing found
239 return -1;
240 } //}}}
241
242 //{{{ findWordStart() method
243 /**
244 * Locates the start of the word at the specified position.
245 * @param line The text
246 * @param pos The position
247 * @param noWordSep Characters that are non-alphanumeric, but
248 * should be treated as word characters anyway
249 */
250 public static int findWordStart(String line, int pos, String noWordSep)
251 {
252 return findWordStart(line, pos, noWordSep, true, false);
253 } //}}}
254
255
256 /** Similar to perl's join() method on lists,
257 * but works with all collections.
258 *
259 * @param c An iterable collection of Objects
260 * @param delim a string to put between each object
261 * @return a joined toString() representation of the collection
262 *
263 * @since jedit 4.3pre3
264 */
265 public static String join(Collection c, String delim) {
266 StringBuffer retval = new StringBuffer();
267 Iterator itr = c.iterator();
268 if (itr.hasNext()) {
269 retval.append( itr.next().toString() );
270 }
271 else return "";
272 while (itr.hasNext()) {
273 retval.append(delim);
274 retval.append(itr.next().toString());
275 }
276 return retval.toString();
277 }
278
279 //{{{ findWordStart() method
280 /**
281 * Locates the start of the word at the specified position.
282 * @param line The text
283 * @param pos The position
284 * @param noWordSep Characters that are non-alphanumeric, but
285 * should be treated as word characters anyway
286 * @param joinNonWordChars Treat consecutive non-alphanumeric
287 * characters as one word
288 * @since jEdit 4.2pre5
289 */
290 public static int findWordStart(String line, int pos, String noWordSep,
291 boolean joinNonWordChars)
292 {
293 return findWordStart(line,pos,noWordSep,joinNonWordChars,false);
294 } //}}}
295
296 //{{{ findWordStart() method
297 /**
298 * Locates the start of the word at the specified position.
299 * @param line The text
300 * @param pos The position
301 * @param noWordSep Characters that are non-alphanumeric, but
302 * should be treated as word characters anyway
303 * @param joinNonWordChars Treat consecutive non-alphanumeric
304 * characters as one word
305 * @param eatWhitespace Include whitespace at start of word
306 * @since jEdit 4.1pre2
307 */
308 public static int findWordStart(String line, int pos, String noWordSep,
309 boolean joinNonWordChars, boolean eatWhitespace)
310 {
311 char ch = line.charAt(pos);
312
313 if(noWordSep == null)
314 noWordSep = "";
315
316 //{{{ the character under the cursor changes how we behave.
317 int type;
318 if(Character.isWhitespace(ch))
319 type = WHITESPACE;
320 else if(Character.isLetterOrDigit(ch)
321 || noWordSep.indexOf(ch) != -1)
322 type = WORD_CHAR;
323 else
324 type = SYMBOL;
325 //}}}
326
327loop: for(int i = pos; i >= 0; i--)
328 {
329 ch = line.charAt(i);
330 switch(type)
331 {
332 //{{{ Whitespace...
333 case WHITESPACE:
334 // only select other whitespace in this case
335 if(Character.isWhitespace(ch))
336 break;
337 // word char or symbol; stop
338 else
339 return i + 1; //}}}
340 //{{{ Word character...
341 case WORD_CHAR:
342 // word char; keep going
343 if(Character.isLetterOrDigit(ch) ||
344 noWordSep.indexOf(ch) != -1)
345 {
346 break;
347 }
348 // whitespace; include in word if eating
349 else if(Character.isWhitespace(ch)
350 && eatWhitespace)
351 {
352 type = WHITESPACE;
353 break;
354 }
355 else
356 return i + 1; //}}}
357 //{{{ Symbol...
358 case SYMBOL:
359 if(!joinNonWordChars && pos != i)
360 return i + 1;
361
362 // whitespace; include in word if eating
363 if(Character.isWhitespace(ch))
364 {
365 if(eatWhitespace)
366 {
367 type = WHITESPACE;
368 break;
369 }
370 else
371 return i + 1;
372 }
373 else if(Character.isLetterOrDigit(ch) ||
374 noWordSep.indexOf(ch) != -1)
375 {
376 return i + 1;
377 }
378 else
379 {
380 break;
381 } //}}}
382 }
383 }
384
385 return 0;
386 } //}}}
387
388 //{{{ findWordEnd() method
389 /**
390 * Locates the end of the word at the specified position.
391 * @param line The text
392 * @param pos The position
393 * @param noWordSep Characters that are non-alphanumeric, but
394 * should be treated as word characters anyway
395 */
396 public static int findWordEnd(String line, int pos, String noWordSep)
397 {
398 return findWordEnd(line, pos, noWordSep, true);
399 } //}}}
400
401 //{{{ findWordEnd() method
402 /**
403 * Locates the end of the word at the specified position.
404 * @param line The text
405 * @param pos The position
406 * @param noWordSep Characters that are non-alphanumeric, but
407 * should be treated as word characters anyway
408 * @param joinNonWordChars Treat consecutive non-alphanumeric
409 * characters as one word
410 * @since jEdit 4.1pre2
411 */
412 public static int findWordEnd(String line, int pos, String noWordSep,
413 boolean joinNonWordChars)
414 {
415 return findWordEnd(line,pos,noWordSep,joinNonWordChars,false);
416 } //}}}
417
418 //{{{ findWordEnd() method
419 /**
420 * Locates the end of the word at the specified position.
421 * @param line The text
422 * @param pos The position
423 * @param noWordSep Characters that are non-alphanumeric, but
424 * should be treated as word characters anyway
425 * @param joinNonWordChars Treat consecutive non-alphanumeric
426 * characters as one word
427 * @param eatWhitespace Include whitespace at end of word
428 * @since jEdit 4.2pre5
429 */
430 public static int findWordEnd(String line, int pos, String noWordSep,
431 boolean joinNonWordChars, boolean eatWhitespace)
432 {
433 if(pos != 0)
434 pos--;
435
436 char ch = line.charAt(pos);
437
438 if(noWordSep == null)
439 noWordSep = "";
440
441 //{{{ the character under the cursor changes how we behave.
442 int type;
443 if(Character.isWhitespace(ch))
444 type = WHITESPACE;
445 else if(Character.isLetterOrDigit(ch)
446 || noWordSep.indexOf(ch) != -1)
447 type = WORD_CHAR;
448 else
449 type = SYMBOL;
450 //}}}
451
452loop: for(int i = pos; i < line.length(); i++)
453 {
454 ch = line.charAt(i);
455 switch(type)
456 {
457 //{{{ Whitespace...
458 case WHITESPACE:
459 // only select other whitespace in this case
460 if(Character.isWhitespace(ch))
461 break;
462 else
463 return i; //}}}
464 //{{{ Word character...
465 case WORD_CHAR:
466 if(Character.isLetterOrDigit(ch) ||
467 noWordSep.indexOf(ch) != -1)
468 {
469 break;
470 }
471 // whitespace; include in word if eating
472 else if(Character.isWhitespace(ch)
473 && eatWhitespace)
474 {
475 type = WHITESPACE;
476 break;
477 }
478 else
479 return i; //}}}
480 //{{{ Symbol...
481 case SYMBOL:
482 if(!joinNonWordChars && i != pos)
483 return i;
484
485 // if we see whitespace, set flag.
486 if(Character.isWhitespace(ch))
487 {
488 if(eatWhitespace)
489 {
490 type = WHITESPACE;
491 break;
492 }
493 else
494 return i;
495 }
496 else if(Character.isLetterOrDigit(ch) ||
497 noWordSep.indexOf(ch) != -1)
498 {
499 return i;
500 }
501 else
502 {
503 break;
504 } //}}}
505 }
506 }
507
508 return line.length();
509 } //}}}
510
511 //{{{ spacesToTabs() method
512 /**
513 * Converts consecutive spaces to tabs in the specified string.
514 * @param in The string
515 * @param tabSize The tab size
516 */
517 public static String spacesToTabs(String in, int tabSize)
518 {
519 StringBuffer buf = new StringBuffer();
520 int width = 0;
521 int whitespace = 0;
522 for(int i = 0; i < in.length(); i++)
523 {
524 switch(in.charAt(i))
525 {
526 case ' ':
527 whitespace++;
528 width++;
529 break;
530 case '\t':
531 int tab = tabSize - (width % tabSize);
532 width += tab;
533 whitespace += tab;
534 break;
535 case '\n':
536 if(whitespace != 0)
537 {
538 buf.append(StandardUtilities
539 .createWhiteSpace(whitespace,tabSize,
540 width - whitespace));
541 }
542 whitespace = 0;
543 width = 0;
544 buf.append('\n');
545 break;
546 default:
547 if(whitespace != 0)
548 {
549 buf.append(StandardUtilities
550 .createWhiteSpace(whitespace,tabSize,
551 width - whitespace));
552 whitespace = 0;
553 }
554 buf.append(in.charAt(i));
555 width++;
556 break;
557 }
558 }
559
560 if(whitespace != 0)
561 {
562 buf.append(StandardUtilities.createWhiteSpace(whitespace,tabSize,
563 width - whitespace));
564 }
565
566 return buf.toString();
567 } //}}}
568
569 //{{{ tabsToSpaces() method
570 /**
571 * Converts tabs to consecutive spaces in the specified string.
572 * @param in The string
573 * @param tabSize The tab size
574 */
575 public static String tabsToSpaces(String in, int tabSize)
576 {
577 StringBuffer buf = new StringBuffer();
578 int width = 0;
579 for(int i = 0; i < in.length(); i++)
580 {
581 switch(in.charAt(i))
582 {
583 case '\t':
584 int count = tabSize - (width % tabSize);
585 width += count;
586 while(--count >= 0)
587 buf.append(' ');
588 break;
589 case '\n':
590 width = 0;
591 buf.append(in.charAt(i));
592 break;
593 default:
594 width++;
595 buf.append(in.charAt(i));
596 break;
597 }
598 }
599 return buf.toString();
600 } //}}}
601
602 //{{{ format() method
603 /**
604 * Formats the specified text by merging and breaking lines to the
605 * specified width.
606 * @param text The text
607 * @param maxLineLength The maximum line length
608 * @param tabSize The tab size
609 */
610 public static String format(String text, int maxLineLength, int tabSize)
611 {
612 StringBuffer buf = new StringBuffer();
613
614 int index = 0;
615
616 for(;;)
617 {
618 int newIndex = text.indexOf("\n\n",index);
619 if(newIndex == -1)
620 break;
621
622 formatParagraph(text.substring(index,newIndex),
623 maxLineLength,tabSize,buf);
624 buf.append("\n\n");
625 index = newIndex + 2;
626 }
627
628 if(index != text.length())
629 {
630 formatParagraph(text.substring(index),
631 maxLineLength,tabSize,buf);
632 }
633
634 return buf.toString();
635 } //}}}
636
637 //{{{ indexIgnoringWhitespace() method
638 /**
639 * Inverse of <code>ignoringWhitespaceIndex()</code>.
640 * @param str A string
641 * @param index The index
642 * @return The number of non-whitespace characters that precede the index.
643 * @since jEdit 4.3pre2
644 */
645 public static int indexIgnoringWhitespace(String str, int index)
646 {
647 int j = 0;
648 for(int i = 0; i < index; i++)
649 if(!Character.isWhitespace(str.charAt(i))) j++;
650 return j;
651 } //}}}
652
653 //{{{ ignoringWhitespaceIndex() method
654 /**
655 * Inverse of <code>indexIgnoringWhitespace()</code>.
656 * @param str A string
657 * @param index The index
658 * @return The index into the string where the number of non-whitespace
659 * characters that precede the index is count.
660 * @since jEdit 4.3pre2
661 */
662 public static int ignoringWhitespaceIndex(String str, int index)
663 {
664 int j = 0;
665 for(int i = 0;;i++)
666 {
667 if(!Character.isWhitespace(str.charAt(i))) j++;
668
669 if(j > index)
670 return i;
671 if(i == str.length() - 1)
672 return i + 1;
673 }
674 } //}}}
675
676 //{{{ getStringCase() method
677 public static final int MIXED = 0;
678 public static final int LOWER_CASE = 1;
679 public static final int UPPER_CASE = 2;
680 public static final int TITLE_CASE = 3;
681
682 /**
683 * Returns if the specified string is all upper case, all lower case,
684 * or title case (first letter upper case, rest lower case).
685 * @param str The string
686 * @since jEdit 4.0pre1
687 */
688 public static int getStringCase(String str)
689 {
690 if(str.length() == 0)
691 return MIXED;
692
693 int state = -1;
694
695 char ch = str.charAt(0);
696 if(Character.isLetter(ch))
697 {
698 if(Character.isUpperCase(ch))
699 state = UPPER_CASE;
700 else
701 state = LOWER_CASE;
702 }
703
704 for(int i = 1; i < str.length(); i++)
705 {
706 ch = str.charAt(i);
707 if(!Character.isLetter(ch))
708 continue;
709
710 switch(state)
711 {
712 case UPPER_CASE:
713 if(Character.isLowerCase(ch))
714 {
715 if(i == 1)
716 state = TITLE_CASE;
717 else
718 return MIXED;
719 }
720 break;
721 case LOWER_CASE:
722 case TITLE_CASE:
723 if(Character.isUpperCase(ch))
724 return MIXED;
725 break;
726 }
727 }
728
729 return state;
730 } //}}}
731
732 //{{{ toTitleCase() method
733 /**
734 * Converts the specified string to title case, by capitalizing the
735 * first letter.
736 * @param str The string
737 * @since jEdit 4.0pre1
738 */
739 public static String toTitleCase(String str)
740 {
741 if(str.length() == 0)
742 return str;
743 else
744 {
745 return Character.toUpperCase(str.charAt(0))
746 + str.substring(1).toLowerCase();
747 }
748 } //}}}
749
750 //{{{ Private members
751 private static final int WHITESPACE = 0;
752 private static final int WORD_CHAR = 1;
753 private static final int SYMBOL = 2;
754
755 //{{{ formatParagraph() method
756 private static void formatParagraph(String text, int maxLineLength,
757 int tabSize, StringBuffer buf)
758 {
759 // align everything to paragraph's leading indent
760 int leadingWhitespaceCount = StandardUtilities.getLeadingWhiteSpace(text);
761 String leadingWhitespace = text.substring(0,leadingWhitespaceCount);
762 int leadingWhitespaceWidth = StandardUtilities.getLeadingWhiteSpaceWidth(text,tabSize);
763
764 buf.append(leadingWhitespace);
765
766 int lineLength = leadingWhitespaceWidth;
767 StringTokenizer st = new StringTokenizer(text);
768 while(st.hasMoreTokens())
769 {
770 String word = st.nextToken();
771 if(lineLength == leadingWhitespaceWidth)
772 {
773 // do nothing
774 }
775 else if(lineLength + word.length() + 1 > maxLineLength)
776 {
777 buf.append('\n');
778 buf.append(leadingWhitespace);
779 lineLength = leadingWhitespaceWidth;
780 }
781 else
782 {
783 buf.append(' ');
784 lineLength++;
785 }
786 buf.append(word);
787 lineLength += word.length();
788 }
789 } //}}}
790
791 //{{{ indexIgnoringWhitespace() method
792 public static void indexIgnoringWhitespace(String text, int maxLineLength,
793 int tabSize, StringBuffer buf)
794 {
795 // align everything to paragraph's leading indent
796 int leadingWhitespaceCount = StandardUtilities.getLeadingWhiteSpace(text);
797 String leadingWhitespace = text.substring(0,leadingWhitespaceCount);
798 int leadingWhitespaceWidth = StandardUtilities.getLeadingWhiteSpaceWidth(text,tabSize);
799
800 buf.append(leadingWhitespace);
801
802 int lineLength = leadingWhitespaceWidth;
803 StringTokenizer st = new StringTokenizer(text);
804 while(st.hasMoreTokens())
805 {
806 String word = st.nextToken();
807 if(lineLength == leadingWhitespaceWidth)
808 {
809 // do nothing
810 }
811 else if(lineLength + word.length() + 1 > maxLineLength)
812 {
813 buf.append('\n');
814 buf.append(leadingWhitespace);
815 lineLength = leadingWhitespaceWidth;
816 }
817 else
818 {
819 buf.append(' ');
820 lineLength++;
821 }
822 buf.append(word);
823 lineLength += word.length();
824 }
825 } //}}}
826
827 //}}}
828}