PageRenderTime 27ms CodeModel.GetById 3ms app.highlight 19ms RepoModel.GetById 1ms app.codeStats 0ms

/jEdit/tags/jedit-4-2-pre14/org/gjt/sp/jedit/TextUtilities.java

#
Java | 713 lines | 467 code | 61 blank | 185 comment | 120 complexity | f064168abfc395aca67ac49a33f1e712 MD5 | raw file
  1/*
  2 * TextUtilities.java - Various text functions
  3 * Copyright (C) 1998, 2003 Slava Pestov
  4 * :tabSize=8:indentSize=8:noTabs=false:
  5 * :folding=explicit:collapseFolds=1:
  6 *
  7 * This program is free software; you can redistribute it and/or
  8 * modify it under the terms of the GNU General Public License
  9 * as published by the Free Software Foundation; either version 2
 10 * of the License, or any later version.
 11 *
 12 * This program is distributed in the hope that it will be useful,
 13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 15 * GNU General Public License for more details.
 16 *
 17 * You should have received a copy of the GNU General Public License
 18 * along with this program; if not, write to the Free Software
 19 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
 20 */
 21
 22package org.gjt.sp.jedit;
 23
 24//{{{ Imports
 25import java.util.*;
 26import javax.swing.text.Segment;
 27import org.gjt.sp.jedit.syntax.*;
 28//}}}
 29
 30/**
 31 * Contains several text manipulation methods.
 32 *
 33 * <ul>
 34 * <li>Bracket matching
 35 * <li>Word start and end offset calculation
 36 * <li>String comparison
 37 * <li>Converting tabs to spaces and vice versa
 38 * <li>Wrapping text
 39 * <li>String case conversion
 40 * </ul>
 41 *
 42 * @author Slava Pestov
 43 * @version $Id: TextUtilities.java 5004 2004-03-28 00:07:27Z spestov $
 44 */
 45public class TextUtilities
 46{
 47	// to avoid slowdown with large files; only scan 10000 lines either way
 48	public static final int BRACKET_MATCH_LIMIT = 10000;
 49
 50	//{{{ getTokenAtOffset() method
 51	/**
 52	 * Returns the token that contains the specified offset.
 53	 * @param tokens The token list
 54	 * @param offset The offset
 55	 * @since jEdit 4.0pre3
 56	 */
 57	public static Token getTokenAtOffset(Token tokens, int offset)
 58	{
 59		if(offset == 0 && tokens.id == Token.END)
 60			return tokens;
 61
 62		for(;;)
 63		{
 64			if(tokens.id == Token.END)
 65				throw new ArrayIndexOutOfBoundsException("offset > line length");
 66
 67			if(tokens.offset + tokens.length > offset)
 68				return tokens;
 69			else
 70				tokens = tokens.next;
 71		}
 72	} //}}}
 73
 74	//{{{ findMatchingBracket() method
 75	/**
 76	 * Returns the offset of the bracket matching the one at the
 77	 * specified offset of the buffer, or -1 if the bracket is
 78	 * unmatched (or if the character is not a bracket).
 79	 * @param buffer The buffer
 80	 * @param line The line
 81	 * @param offset The offset within that line
 82	 * @since jEdit 2.6pre1
 83	 */
 84	public static int findMatchingBracket(Buffer buffer, int line, int offset)
 85	{
 86		if(offset < 0 || offset >= buffer.getLineLength(line))
 87		{
 88			throw new ArrayIndexOutOfBoundsException(offset + ":"
 89				+ buffer.getLineLength(line));
 90		}
 91
 92		Segment lineText = new Segment();
 93		buffer.getLineText(line,lineText);
 94
 95		char c = lineText.array[lineText.offset + offset];
 96		char cprime; // corresponding character
 97		boolean direction; // false - backwards, true - forwards
 98
 99		switch(c)
100		{
101		case '(': cprime = ')'; direction = true;  break;
102		case ')': cprime = '('; direction = false; break;
103		case '[': cprime = ']'; direction = true;  break;
104		case ']': cprime = '['; direction = false; break;
105		case '{': cprime = '}'; direction = true;  break;
106		case '}': cprime = '{'; direction = false; break;
107		default: return -1;
108		}
109
110		// 1 because we've already 'seen' the first bracket
111		int count = 1;
112
113		DefaultTokenHandler tokenHandler = new DefaultTokenHandler();
114		buffer.markTokens(line,tokenHandler);
115
116		// Get the syntax token at 'offset'
117		// only tokens with the same type will be checked for
118		// the corresponding bracket
119		byte idOfBracket = getTokenAtOffset(tokenHandler.getTokens(),offset).id;
120
121		boolean haveTokens = true;
122
123		int startLine = line;
124
125		//{{{ Forward search
126		if(direction)
127		{
128			offset++;
129
130			for(;;)
131			{
132				for(int i = offset; i < lineText.count; i++)
133				{
134					char ch = lineText.array[lineText.offset + i];
135					if(ch == c)
136					{
137						if(!haveTokens)
138						{
139							tokenHandler.init();
140							buffer.markTokens(line,tokenHandler);
141							haveTokens = true;
142						}
143						if(getTokenAtOffset(tokenHandler.getTokens(),i).id == idOfBracket)
144							count++;
145					}
146					else if(ch == cprime)
147					{
148						if(!haveTokens)
149						{
150							tokenHandler.init();
151							buffer.markTokens(line,tokenHandler);
152							haveTokens = true;
153						}
154						if(getTokenAtOffset(tokenHandler.getTokens(),i).id == idOfBracket)
155						{
156							count--;
157							if(count == 0)
158								return buffer.getLineStartOffset(line) + i;
159						}
160					}
161				}
162
163				//{{{ Go on to next line
164				line++;
165				if(line >= buffer.getLineCount() || (line - startLine) > BRACKET_MATCH_LIMIT)
166					break;
167				buffer.getLineText(line,lineText);
168				offset = 0;
169				haveTokens = false;
170				//}}}
171			}
172		} //}}}
173		//{{{ Backward search
174		else
175		{
176			offset--;
177
178			for(;;)
179			{
180				for(int i = offset; i >= 0; i--)
181				{
182					char ch = lineText.array[lineText.offset + i];
183					if(ch == c)
184					{
185						if(!haveTokens)
186						{
187							tokenHandler.init();
188							buffer.markTokens(line,tokenHandler);
189							haveTokens = true;
190						}
191						if(getTokenAtOffset(tokenHandler.getTokens(),i).id == idOfBracket)
192							count++;
193					}
194					else if(ch == cprime)
195					{
196						if(!haveTokens)
197						{
198							tokenHandler.init();
199							buffer.markTokens(line,tokenHandler);
200							haveTokens = true;
201						}
202						if(getTokenAtOffset(tokenHandler.getTokens(),i).id == idOfBracket)
203						{
204							count--;
205							if(count == 0)
206								return buffer.getLineStartOffset(line) + i;
207						}
208					}
209				}
210
211				//{{{ Go on to previous line
212				line--;
213				if(line < 0 || (startLine - line) > BRACKET_MATCH_LIMIT)
214					break;
215				buffer.getLineText(line,lineText);
216				offset = lineText.count - 1;
217				haveTokens = false;
218				//}}}
219			}
220		} //}}}
221
222		// Nothing found
223		return -1;
224	} //}}}
225
226	//{{{ findWordStart() method
227	/**
228	 * Locates the start of the word at the specified position.
229	 * @param line The text
230	 * @param pos The position
231	 * @param noWordSep Characters that are non-alphanumeric, but
232	 * should be treated as word characters anyway
233	 */
234	public static int findWordStart(String line, int pos, String noWordSep)
235	{
236		return findWordStart(line, pos, noWordSep, true, false);
237	} //}}}
238
239	//{{{ findWordStart() method
240	/**
241	 * Locates the start of the word at the specified position.
242	 * @param line The text
243	 * @param pos The position
244	 * @param noWordSep Characters that are non-alphanumeric, but
245	 * should be treated as word characters anyway
246	 * @param joinNonWordChars Treat consecutive non-alphanumeric
247	 * characters as one word
248	 * @since jEdit 4.2pre5
249	 */
250	public static int findWordStart(String line, int pos, String noWordSep,
251		boolean joinNonWordChars)
252	{
253		return findWordStart(line,pos,noWordSep,joinNonWordChars,false);
254	} //}}}
255
256	//{{{ findWordStart() method
257	/**
258	 * Locates the start of the word at the specified position.
259	 * @param line The text
260	 * @param pos The position
261	 * @param noWordSep Characters that are non-alphanumeric, but
262	 * should be treated as word characters anyway
263	 * @param joinNonWordChars Treat consecutive non-alphanumeric
264	 * characters as one word
265	 * @param eatWhitespace Include whitespace at start of word
266	 * @since jEdit 4.1pre2
267	 */
268	public static int findWordStart(String line, int pos, String noWordSep,
269		boolean joinNonWordChars, boolean eatWhitespace)
270	{
271		char ch = line.charAt(pos);
272
273		if(noWordSep == null)
274			noWordSep = "";
275
276		//{{{ the character under the cursor changes how we behave.
277		int type;
278		if(Character.isWhitespace(ch))
279			type = WHITESPACE;
280		else if(Character.isLetterOrDigit(ch)
281			|| noWordSep.indexOf(ch) != -1)
282			type = WORD_CHAR;
283		else
284			type = SYMBOL;
285		//}}}
286
287loop:		for(int i = pos; i >= 0; i--)
288		{
289			ch = line.charAt(i);
290			switch(type)
291			{
292			//{{{ Whitespace...
293			case WHITESPACE:
294				// only select other whitespace in this case
295				if(Character.isWhitespace(ch))
296					break;
297				// word char or symbol; stop
298				else
299					return i + 1; //}}}
300			//{{{ Word character...
301			case WORD_CHAR:
302				// word char; keep going
303				if(Character.isLetterOrDigit(ch) ||
304					noWordSep.indexOf(ch) != -1)
305				{
306					break;
307				}
308				// whitespace; include in word if eating
309				else if(Character.isWhitespace(ch)
310					&& eatWhitespace)
311				{
312					type = WHITESPACE;
313					break;
314				}
315				else
316					return i + 1; //}}}
317			//{{{ Symbol...
318			case SYMBOL:
319				if(!joinNonWordChars && pos != i)
320					return i + 1;
321
322				// whitespace; include in word if eating
323				if(Character.isWhitespace(ch))
324				{
325					if(eatWhitespace)
326					{
327						type = WHITESPACE;
328						break;
329					}
330					else
331						return i + 1;
332				}
333				else if(Character.isLetterOrDigit(ch) ||
334					noWordSep.indexOf(ch) != -1)
335				{
336					return i + 1;
337				}
338				else
339				{
340					break;
341				} //}}}
342			}
343		}
344
345		return 0;
346	} //}}}
347
348	//{{{ findWordEnd() method
349	/**
350	 * Locates the end of the word at the specified position.
351	 * @param line The text
352	 * @param pos The position
353	 * @param noWordSep Characters that are non-alphanumeric, but
354	 * should be treated as word characters anyway
355	 */
356	public static int findWordEnd(String line, int pos, String noWordSep)
357	{
358		return findWordEnd(line, pos, noWordSep, true);
359	} //}}}
360
361	//{{{ findWordEnd() method
362	/**
363	 * Locates the end of the word at the specified position.
364	 * @param line The text
365	 * @param pos The position
366	 * @param noWordSep Characters that are non-alphanumeric, but
367	 * should be treated as word characters anyway
368	 * @param joinNonWordChars Treat consecutive non-alphanumeric
369	 * characters as one word
370	 * @since jEdit 4.1pre2
371	 */
372	public static int findWordEnd(String line, int pos, String noWordSep,
373		boolean joinNonWordChars)
374	{
375		return findWordEnd(line,pos,noWordSep,joinNonWordChars,false);
376	} //}}}
377
378	//{{{ findWordEnd() method
379	/**
380	 * Locates the end of the word at the specified position.
381	 * @param line The text
382	 * @param pos The position
383	 * @param noWordSep Characters that are non-alphanumeric, but
384	 * should be treated as word characters anyway
385	 * @param joinNonWordChars Treat consecutive non-alphanumeric
386	 * characters as one word
387	 * @param eatWhitespace Include whitespace at end of word
388	 * @since jEdit 4.2pre5
389	 */
390	public static int findWordEnd(String line, int pos, String noWordSep,
391		boolean joinNonWordChars, boolean eatWhitespace)
392	{
393		if(pos != 0)
394			pos--;
395
396		char ch = line.charAt(pos);
397
398		if(noWordSep == null)
399			noWordSep = "";
400
401		//{{{ the character under the cursor changes how we behave.
402		int type;
403		if(Character.isWhitespace(ch))
404			type = WHITESPACE;
405		else if(Character.isLetterOrDigit(ch)
406			|| noWordSep.indexOf(ch) != -1)
407			type = WORD_CHAR;
408		else
409			type = SYMBOL;
410		//}}}
411
412loop:		for(int i = pos; i < line.length(); i++)
413		{
414			ch = line.charAt(i);
415			switch(type)
416			{
417			//{{{ Whitespace...
418			case WHITESPACE:
419				// only select other whitespace in this case
420				if(Character.isWhitespace(ch))
421					break;
422				else
423					return i; //}}}
424			//{{{ Word character...
425			case WORD_CHAR:
426				if(Character.isLetterOrDigit(ch) ||
427					noWordSep.indexOf(ch) != -1)
428				{
429					break;
430				}
431				// whitespace; include in word if eating
432				else if(Character.isWhitespace(ch)
433					&& eatWhitespace)
434				{
435					type = WHITESPACE;
436					break;
437				}
438				else
439					return i; //}}}
440			//{{{ Symbol...
441			case SYMBOL:
442				if(!joinNonWordChars && i != pos)
443					return i;
444
445				// if we see whitespace, set flag.
446				if(Character.isWhitespace(ch))
447				{
448					if(eatWhitespace)
449					{
450						type = WHITESPACE;
451						break;
452					}
453					else
454						return i;
455				}
456				else if(Character.isLetterOrDigit(ch) ||
457					noWordSep.indexOf(ch) != -1)
458				{
459					return i;
460				}
461				else
462				{
463					break;
464				} //}}}
465			}
466		}
467
468		return line.length();
469	} //}}}
470
471	//{{{ spacesToTabs() method
472	/**
473	 * Converts consecutive spaces to tabs in the specified string.
474	 * @param in The string
475	 * @param tabSize The tab size
476	 */
477	public static String spacesToTabs(String in, int tabSize)
478	{
479		StringBuffer buf = new StringBuffer();
480		int width = 0;
481		int whitespace = 0;
482		for(int i = 0; i < in.length(); i++)
483		{
484			switch(in.charAt(i))
485			{
486			case ' ':
487				whitespace++;
488				width++;
489				break;
490			case '\t':
491				int tab = tabSize - (width % tabSize);
492				width += tab;
493				whitespace += tab;
494				break;
495			case '\n':
496				if(whitespace != 0)
497				{
498					buf.append(MiscUtilities
499						.createWhiteSpace(whitespace,tabSize,
500						width - whitespace));
501				}
502				whitespace = 0;
503				width = 0;
504				buf.append('\n');
505				break;
506			default:
507				if(whitespace != 0)
508				{
509					buf.append(MiscUtilities
510						.createWhiteSpace(whitespace,tabSize,
511						width - whitespace));
512					whitespace = 0;
513				}
514				buf.append(in.charAt(i));
515				width++;
516				break;
517			}
518		}
519
520		if(whitespace != 0)
521		{
522			buf.append(MiscUtilities.createWhiteSpace(whitespace,tabSize,
523				width - whitespace));
524		}
525
526                return buf.toString();
527	} //}}}
528
529	//{{{ tabsToSpaces() method
530	/**
531	 * Converts tabs to consecutive spaces in the specified string.
532	 * @param in The string
533	 * @param tabSize The tab size
534	 */
535	public static String tabsToSpaces(String in, int tabSize)
536	{
537		StringBuffer buf = new StringBuffer();
538		int width = 0;
539		for(int i = 0; i < in.length(); i++)
540		{
541			switch(in.charAt(i))
542			{
543			case '\t':
544				int count = tabSize - (width % tabSize);
545				width += count;
546				while(--count >= 0)
547					buf.append(' ');
548				break;
549			case '\n':
550				width = 0;
551				buf.append(in.charAt(i));
552				break;
553			default:
554				width++;
555				buf.append(in.charAt(i));
556				break;
557                        }
558                }
559                return buf.toString();
560	} //}}}
561
562	//{{{ format() method
563	/**
564	 * Formats the specified text by merging and breaking lines to the
565	 * specified width.
566	 * @param text The text
567	 * @param maxLineLength The maximum line length
568	 * @param tabSize The tab size
569	 */
570	public static String format(String text, int maxLineLength, int tabSize)
571	{
572		StringBuffer buf = new StringBuffer();
573
574		int index = 0;
575
576		for(;;)
577		{
578			int newIndex = text.indexOf("\n\n",index);
579			if(newIndex == -1)
580				break;
581
582			formatParagraph(text.substring(index,newIndex),
583				maxLineLength,tabSize,buf);
584			buf.append("\n\n");
585			index = newIndex + 2;
586		}
587
588		if(index != text.length())
589		{
590			formatParagraph(text.substring(index),
591				maxLineLength,tabSize,buf);
592		}
593
594		return buf.toString();
595	} //}}}
596
597	//{{{ getStringCase() method
598	public static final int MIXED = 0;
599	public static final int LOWER_CASE = 1;
600	public static final int UPPER_CASE = 2;
601	public static final int TITLE_CASE = 3;
602
603	/**
604	 * Returns if the specified string is all upper case, all lower case,
605	 * or title case (first letter upper case, rest lower case).
606	 * @param str The string
607	 * @since jEdit 4.0pre1
608	 */
609	public static int getStringCase(String str)
610	{
611		if(str.length() == 0)
612			return MIXED;
613
614		int state = -1;
615
616		char ch = str.charAt(0);
617		if(Character.isLetter(ch))
618		{
619			if(Character.isUpperCase(ch))
620				state = UPPER_CASE;
621			else
622				state = LOWER_CASE;
623		}
624
625		for(int i = 1; i < str.length(); i++)
626		{
627			ch = str.charAt(i);
628			if(!Character.isLetter(ch))
629				continue;
630
631			switch(state)
632			{
633			case UPPER_CASE:
634				if(Character.isLowerCase(ch))
635				{
636					if(i == 1)
637						state = TITLE_CASE;
638					else
639						return MIXED;
640				}
641				break;
642			case LOWER_CASE:
643			case TITLE_CASE:
644				if(Character.isUpperCase(ch))
645					return MIXED;
646				break;
647			}
648		}
649
650		return state;
651	} //}}}
652
653	//{{{ toTitleCase() method
654	/**
655	 * Converts the specified string to title case, by capitalizing the
656	 * first letter.
657	 * @param str The string
658	 * @since jEdit 4.0pre1
659	 */
660	public static String toTitleCase(String str)
661	{
662		if(str.length() == 0)
663			return str;
664		else
665		{
666			return Character.toUpperCase(str.charAt(0))
667				+ str.substring(1).toLowerCase();
668		}
669	} //}}}
670
671	//{{{ Private members
672	private static final int WHITESPACE = 0;
673	private static final int WORD_CHAR = 1;
674	private static final int SYMBOL = 2;
675
676	//{{{ formatParagraph() method
677	private static void formatParagraph(String text, int maxLineLength,
678		int tabSize, StringBuffer buf)
679	{
680		// align everything to paragraph's leading indent
681		int leadingWhitespaceCount = MiscUtilities.getLeadingWhiteSpace(text);
682		String leadingWhitespace = text.substring(0,leadingWhitespaceCount);
683		int leadingWhitespaceWidth = MiscUtilities.getLeadingWhiteSpaceWidth(text,tabSize);
684
685		buf.append(leadingWhitespace);
686
687		int lineLength = leadingWhitespaceWidth;
688		StringTokenizer st = new StringTokenizer(text);
689		while(st.hasMoreTokens())
690		{
691			String word = st.nextToken();
692			if(lineLength == leadingWhitespaceWidth)
693			{
694				// do nothing
695			}
696			else if(lineLength + word.length() + 1 > maxLineLength)
697			{
698				buf.append('\n');
699				buf.append(leadingWhitespace);
700				lineLength = leadingWhitespaceWidth;
701			}
702			else
703			{
704				buf.append(' ');
705				lineLength++;
706			}
707			buf.append(word);
708			lineLength += word.length();
709		}
710	} //}}}
711
712	//}}}
713}