PageRenderTime 78ms CodeModel.GetById 31ms app.highlight 42ms RepoModel.GetById 1ms app.codeStats 1ms

/jEdit/tags/jedit-4-0-pre3/org/gjt/sp/jedit/TextUtilities.java

#
Java | 732 lines | 509 code | 49 blank | 174 comment | 144 complexity | 1ee0ebd0f5e76ac8e395e46723ad9507 MD5 | raw file
  1/*
  2 * TextUtilities.java - Various text functions
  3 * Copyright (C) 1998, 1999, 2000, 2001 Slava Pestov
  4 * :tabSize=8:indentSize=8:noTabs=false:
  5 * :folding=explicit:collapseFolds=1:
  6 *
  7 * This program is free software; you can redistribute it and/or
  8 * modify it under the terms of the GNU General Public License
  9 * as published by the Free Software Foundation; either version 2
 10 * of the License, or any later version.
 11 *
 12 * This program is distributed in the hope that it will be useful,
 13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 15 * GNU General Public License for more details.
 16 *
 17 * You should have received a copy of the GNU General Public License
 18 * along with this program; if not, write to the Free Software
 19 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
 20 */
 21
 22package org.gjt.sp.jedit;
 23
 24import javax.swing.text.Segment;
 25import org.gjt.sp.jedit.syntax.*;
 26
 27/**
 28 * Class with several text utility functions.
 29 * @author Slava Pestov
 30 * @version $Id: TextUtilities.java 3930 2001-12-02 07:34:52Z spestov $
 31 */
 32public class TextUtilities
 33{
 34	//{{{ getTokenAtOffset() method
 35	/**
 36	 * Returns the token that contains the specified offset.
 37	 * @param tokenList The token list
 38	 * @param offset The offset
 39	 * @since jEdit 4.0pre3
 40	 */
 41	public static Token getTokenAtOffset(Buffer.TokenList tokenList, int offset)
 42	{
 43		Token lineTokens = tokenList.getFirstToken();
 44
 45		if(offset == 0 && lineTokens.id == Token.END)
 46			return lineTokens;
 47
 48		int tokenListOffset = 0;
 49		for(;;)
 50		{
 51			if(lineTokens.id == Token.END)
 52				throw new ArrayIndexOutOfBoundsException("offset > line length");
 53
 54			if(tokenListOffset + lineTokens.length > offset)
 55				return lineTokens;
 56			else
 57			{
 58				tokenListOffset += lineTokens.length;
 59				lineTokens = lineTokens.next;
 60			}
 61		}
 62	} //}}}
 63
 64	//{{{ findMatchingBracket() method
 65	/**
 66	 * Returns the offset of the bracket matching the one at the
 67	 * specified offset of the buffer, or -1 if the bracket is
 68	 * unmatched (or if the character is not a bracket).
 69	 * @param buffer The buffer
 70	 * @param line The line
 71	 * @param offset The offset within that line
 72	 * @since jEdit 2.6pre1
 73	 */
 74	public static int findMatchingBracket(Buffer buffer, int line, int offset)
 75	{
 76		return findMatchingBracket(buffer,line,offset,0,
 77			buffer.getLineCount() - 1);
 78	} //}}}
 79
 80	//{{{ findMatchingBracket() method
 81	/**
 82	 * Returns the offset of the bracket matching the one at the
 83	 * specified offset of the buffer, or -1 if the bracket is
 84	 * unmatched (or if the character is not a bracket).
 85	 * @param buffer The buffer
 86	 * @param line The line
 87	 * @param offset The offset within that line
 88	 * @param startLine The first line to scan. This is used to speed up
 89	 * on-screen bracket matching because only visible lines need to be
 90	 * scanned
 91	 * @param endLine The last line to scan. This is used to speed up
 92	 * on-screen bracket matching because only visible lines need to be
 93	 * scanned
 94	 * @since jEdit 2.7pre3
 95	 */
 96	public static int findMatchingBracket(Buffer buffer, int line, int offset,
 97		int startLine, int endLine)
 98	{
 99		if(buffer.getLength() == 0)
100			return -1;
101
102		Segment lineText = new Segment();
103		buffer.getLineText(line,lineText);
104
105		char c = lineText.array[lineText.offset + offset];
106		char cprime; // corresponding character
107		boolean direction; // false - backwards, true - forwards
108
109		switch(c)
110		{
111		case '(': cprime = ')'; direction = true;  break;
112		case ')': cprime = '('; direction = false; break;
113		case '[': cprime = ']'; direction = true;  break;
114		case ']': cprime = '['; direction = false; break;
115		case '{': cprime = '}'; direction = true;  break;
116		case '}': cprime = '{'; direction = false; break;
117		default: return -1;
118		}
119
120		// 1 because we've already 'seen' the first bracket
121		int count = 1;
122
123		Buffer.TokenList tokenList = buffer.markTokens(line);
124
125		// Get the syntax token at 'offset'
126		// only tokens with the same type will be checked for
127		// the corresponding bracket
128		byte idOfBracket = getTokenAtOffset(tokenList,offset).id;
129
130		boolean haveTokens = true;
131
132		//{{{ Forward search
133		if(direction)
134		{
135			offset++;
136
137			for(;;)
138			{
139				for(int i = offset; i < lineText.count; i++)
140				{
141					char ch = lineText.array[lineText.offset + i];
142					if(ch == c)
143					{
144						if(!haveTokens)
145						{
146							tokenList = buffer.markTokens(line);
147							haveTokens = true;
148						}
149						if(getTokenAtOffset(tokenList,i).id == idOfBracket)
150							count++;
151					}
152					else if(ch == cprime)
153					{
154						if(!haveTokens)
155						{
156							tokenList = buffer.markTokens(line);
157							haveTokens = true;
158						}
159						if(getTokenAtOffset(tokenList,i).id == idOfBracket)
160						{
161							count--;
162							if(count == 0)
163								return buffer.getLineStartOffset(line) + i;
164						}
165					}
166				}
167
168				//{{{ Go on to next line
169				line++;
170				if(line > endLine)
171					break;
172				buffer.getLineText(line,lineText);
173				offset = 0;
174				haveTokens = false;
175				//}}}
176			}
177		} //}}}
178		//{{{ Backward search
179		else
180		{
181			offset--;
182
183			for(;;)
184			{
185				for(int i = offset; i >= 0; i--)
186				{
187					char ch = lineText.array[lineText.offset + i];
188					if(ch == c)
189					{
190						if(!haveTokens)
191						{
192							tokenList = buffer.markTokens(line);
193							haveTokens = true;
194						}
195						if(getTokenAtOffset(tokenList,i).id == idOfBracket)
196							count++;
197					}
198					else if(ch == cprime)
199					{
200						if(!haveTokens)
201						{
202							tokenList = buffer.markTokens(line);
203							haveTokens = true;
204						}
205						if(getTokenAtOffset(tokenList,i).id == idOfBracket)
206						{
207							count--;
208							if(count == 0)
209								return buffer.getLineStartOffset(line) + i;
210						}
211					}
212				}
213
214				//{{{ Go on to next line
215				line--;
216				if(line < startLine)
217					break;
218				buffer.getLineText(line,lineText);
219				offset = lineText.count - 1;
220				haveTokens = false;
221				//}}}
222			}
223		} //}}}
224
225		// Nothing found
226		return -1;
227	} //}}}
228
229	//{{{ findWordStart() method
230	/**
231	 * Locates the start of the word at the specified position.
232	 * @param line The text
233	 * @param pos The position
234	 * @param noWordSep Characters that are non-alphanumeric, but
235	 * should be treated as word characters anyway
236	 */
237	public static int findWordStart(String line, int pos, String noWordSep)
238	{
239		return findWordStart(line,pos,noWordSep,false);
240	} //}}}
241
242	//{{{ findWordStart() method
243	/**
244	 * Locates the start of the word at the specified position.
245	 * @param line The text
246	 * @param pos The position
247	 * @param noWordSep Characters that are non-alphanumeric, but
248	 * should be treated as word characters anyway
249	 * @param whiteSpace If true, any whitespace at the end of the
250	 * word is also included
251	 * @since jEdit 4.0pre3
252	 */
253	public static int findWordStart(String line, int pos, String noWordSep,
254		boolean whiteSpace)
255	{
256		char ch = line.charAt(pos);
257
258		if(noWordSep == null)
259			noWordSep = "";
260
261		//{{{ the character under the cursor changes how we behave.
262		int type;
263		if(Character.isWhitespace(ch))
264			type = WHITESPACE;
265		else if(Character.isLetterOrDigit(ch)
266			|| noWordSep.indexOf(ch) != -1)
267			type = WORD_CHAR;
268		else
269			type = SYMBOL;
270		//}}}
271
272		boolean seenWhiteSpace = false;
273		int whiteSpaceEnd = 0;
274loop:		for(int i = pos; i >= 0; i--)
275		{
276			ch = line.charAt(i);
277			switch(type)
278			{
279			//{{{ Whitespace...
280			case WHITESPACE:
281				// only select other whitespace in this case
282				if(Character.isWhitespace(ch))
283					break;
284				else
285					return i + 1; //}}}
286			//{{{ Word character...
287			case WORD_CHAR:
288				// if we see whitespace, set flag.
289				if(Character.isWhitespace(ch) && whiteSpace)
290				{
291					if(!seenWhiteSpace)
292						whiteSpaceEnd = i + 1;
293					seenWhiteSpace = true;
294					break;
295				}
296				else if(Character.isLetterOrDigit(ch) ||
297					noWordSep.indexOf(ch) != -1)
298				{
299					// next word?
300					if(seenWhiteSpace)
301						return i + 1;
302					else
303						break;
304				}
305				else
306					return i + 1; //}}}
307			//{{{ Symbol...
308			case SYMBOL:
309				// if we see whitespace, set flag.
310				if(Character.isWhitespace(ch))
311				{
312					if(whiteSpace)
313					{
314						if(!seenWhiteSpace)
315							whiteSpaceEnd = i + 1;
316						seenWhiteSpace = true;
317						break;
318					}
319					else
320						return i + 1;
321				}
322				else if(Character.isLetterOrDigit(ch) ||
323					noWordSep.indexOf(ch) != -1)
324					return i + 1;
325				else
326				{
327					// next word?
328					if(seenWhiteSpace)
329						return i + 1;
330					else
331						break;
332				} //}}}
333			}
334		}
335
336		return whiteSpaceEnd;
337	} //}}}
338
339	//{{{ findWordEnd() method
340	/**
341	 * Locates the end of the word at the specified position.
342	 * @param line The text
343	 * @param pos The position
344	 * @param noWordSep Characters that are non-alphanumeric, but
345	 * should be treated as word characters anyway
346	 */
347	public static int findWordEnd(String line, int pos, String noWordSep)
348	{
349		return findWordEnd(line,pos,noWordSep,false);
350	} //}}}
351
352	//{{{ findWordEnd() method
353	/**
354	 * Locates the end of the word at the specified position.
355	 * @param line The text
356	 * @param pos The position
357	 * @param noWordSep Characters that are non-alphanumeric, but
358	 * should be treated as word characters anyway
359	 * @param whiteSpace If true, any whitespace at the start of the
360	 * word is also included
361	 * @since jEdit 4.0pre3
362	 */
363	public static int findWordEnd(String line, int pos, String noWordSep,
364		boolean whiteSpace)
365	{
366		if(pos != 0)
367			pos--;
368
369		char ch = line.charAt(pos);
370
371		if(noWordSep == null)
372			noWordSep = "";
373
374		//{{{ the character under the cursor changes how we behave.
375		int type;
376		if(Character.isWhitespace(ch))
377			type = WHITESPACE;
378		else if(Character.isLetterOrDigit(ch)
379			|| noWordSep.indexOf(ch) != -1)
380			type = WORD_CHAR;
381		else
382			type = SYMBOL;
383		//}}}
384
385		boolean seenWhiteSpace = false;
386loop:		for(int i = pos; i < line.length(); i++)
387		{
388			ch = line.charAt(i);
389			switch(type)
390			{
391			//{{{ Whitespace...
392			case WHITESPACE:
393				// only select other whitespace in this case
394				if(Character.isWhitespace(ch))
395					break;
396				else
397					return i; //}}}
398			//{{{ Word character...
399			case WORD_CHAR:
400				// if we see whitespace, set flag.
401				if(Character.isWhitespace(ch) && whiteSpace)
402				{
403					seenWhiteSpace = true;
404					break;
405				}
406				else if(Character.isLetterOrDigit(ch) ||
407					noWordSep.indexOf(ch) != -1)
408				{
409					// next word?
410					if(seenWhiteSpace)
411						return i;
412					else
413						break;
414				}
415				else
416					return i; //}}}
417			//{{{ Symbol...
418			case SYMBOL:
419				// if we see whitespace, set flag.
420				if(Character.isWhitespace(ch))
421				{
422					if(whiteSpace)
423					{
424						seenWhiteSpace = true;
425						break;
426					}
427					else
428						return i;
429				}
430				else if(Character.isLetterOrDigit(ch) ||
431					noWordSep.indexOf(ch) != -1)
432					return i;
433				else
434				{
435					// next word?
436					if(seenWhiteSpace)
437						return i;
438					else
439						break;
440				} //}}}
441			}
442		}
443
444		return line.length();
445	} //}}}
446
447	//{{{ regionMatches() method
448	/**
449	 * Checks if a subregion of a <code>Segment</code> is equal to a
450	 * character array.
451	 * @param ignoreCase True if case should be ignored, false otherwise
452	 * @param text The segment
453	 * @param offset The offset into the segment
454	 * @param match The character array to match
455	 * @since jEdit 2.7pre1
456	 */
457	public static boolean regionMatches(boolean ignoreCase, Segment text,
458					    int offset, char[] match)
459	{
460		int length = offset + match.length;
461		char[] textArray = text.array;
462		if(length > text.offset + text.count)
463			return false;
464		for(int i = offset, j = 0; i < length; i++, j++)
465		{
466			char c1 = textArray[i];
467			char c2 = match[j];
468			if(ignoreCase)
469			{
470				c1 = Character.toUpperCase(c1);
471				c2 = Character.toUpperCase(c2);
472			}
473			if(c1 != c2)
474				return false;
475		}
476		return true;
477	} //}}}
478
479	//{{{ spacesToTabs() method
480	/**
481	 * Converts consecutive spaces to tabs in the specified string.
482	 * @param in The string
483	 * @param tabSize The tab size
484	 */
485	public static String spacesToTabs(String in, int tabSize)
486	{
487		StringBuffer buf = new StringBuffer();
488		int width = 0;
489		int whitespace = 0;
490		for(int i = 0; i < in.length(); i++)
491		{
492			switch(in.charAt(i))
493			{
494			case ' ':
495				whitespace++;
496				width++;
497				break;
498			case '\t':
499				int tab = tabSize - (width % tabSize);
500				width += tab;
501				whitespace += tab;
502				break;
503			case '\n':
504				if(whitespace != 0)
505				{
506					buf.append(MiscUtilities
507						.createWhiteSpace(whitespace,tabSize));
508				}
509				whitespace = 0;
510				width = 0;
511				buf.append('\n');
512				break;
513			default:
514				if(whitespace != 0)
515				{
516					buf.append(MiscUtilities
517						.createWhiteSpace(whitespace,tabSize));
518					whitespace = 0;
519				}
520				buf.append(in.charAt(i));
521				width++;
522				break;
523			}
524		}
525
526		if(whitespace != 0)
527		{
528			buf.append(MiscUtilities.createWhiteSpace(whitespace,tabSize));
529		}
530
531                return buf.toString();
532	} //}}}
533
534	//{{{ tabsToSpaces() method
535	/**
536	 * Converts tabs to consecutive spaces in the specified string.
537	 * @param in The string
538	 * @param tabSize The tab size
539	 */
540	public static String tabsToSpaces(String in, int tabSize)
541	{
542		StringBuffer buf = new StringBuffer();
543		int width = 0;
544		for(int i = 0; i < in.length(); i++)
545		{
546			switch(in.charAt(i))
547			{
548			case '\t':
549				int count = tabSize - (width % tabSize);
550				width += count;
551				while(--count >= 0)
552					buf.append(' ');
553				break;
554			case '\n':
555				width = 0;
556				buf.append(in.charAt(i));
557				break;
558			default:
559				width++;
560				buf.append(in.charAt(i));
561				break;
562                        }
563                }
564                return buf.toString();
565	} //}}}
566
567	//{{{ format() method
568	/**
569	 * Formats the specified text by merging and breaking lines to the
570	 * specified width.
571	 * @param text The text
572	 * @param maxLineLen The maximum line length
573	 */
574	public static String format(String text, int maxLineLength)
575	{
576		StringBuffer buf = new StringBuffer();
577		StringBuffer word = new StringBuffer();
578		int lineLength = 0;
579		boolean newline = true;
580		boolean space = false;
581		char[] chars = text.toCharArray();
582		for(int i = 0; i < chars.length; i++)
583		{
584			char c = chars[i];
585			switch(c)
586			{
587			case '\n':
588				if(i == 0 || chars.length - i <= 2)
589				{
590					if(lineLength + word.length() >= maxLineLength)
591						buf.append('\n');
592					else if(space && word.length() != 0)
593						buf.append(' ');
594					buf.append(word);
595					word.setLength(0);
596					buf.append('\n');
597					newline = true;
598					space = false;
599					break;
600				}
601				else if(newline)
602				{
603					if(lineLength + word.length() >= maxLineLength)
604						buf.append('\n');
605					else if(space && word.length() != 0)
606						buf.append(' ');
607					buf.append(word);
608					word.setLength(0);
609					buf.append("\n\n");
610					newline = space = false;
611					lineLength = 0;
612					break;
613				}
614				else
615					newline = true;
616			case ' ':
617				if(lineLength + word.length() >= maxLineLength)
618				{
619					buf.append('\n');
620					lineLength = 0;
621					newline = true;
622				}
623				else if(space && lineLength != 0 && word.length() != 0)
624				{
625					buf.append(' ');
626					lineLength++;
627					space = false;
628				}
629				else
630					space = true;
631				buf.append(word);
632				lineLength += word.length();
633				word.setLength(0);
634				break;
635			default:
636				newline = false;
637				// without this test, we would have spaces
638				// at the start of lines
639				if(lineLength != 0)
640					space = true;
641				word.append(c);
642				break;
643			}
644		}
645		if(lineLength + word.length() >= maxLineLength)
646			buf.append('\n');
647		else if(space && word.length() != 0)
648			buf.append(' ');
649		buf.append(word);
650		return buf.toString();
651	} //}}}
652
653	//{{{ getStringCase() method
654	public static final int MIXED = 0;
655	public static final int LOWER_CASE = 1;
656	public static final int UPPER_CASE = 2;
657	public static final int TITLE_CASE = 3;
658
659	/**
660	 * Returns if the specified string is all upper case, all lower case,
661	 * or title case (first letter upper case, rest lower case).
662	 * @param str The string
663	 * @since jEdit 4.0pre1
664	 */
665	public static int getStringCase(String str)
666	{
667		if(str.length() == 0)
668			return MIXED;
669
670		int state = -1;
671
672		char ch = str.charAt(0);
673		if(Character.isLetter(ch))
674		{
675			if(Character.isUpperCase(ch))
676				state = UPPER_CASE;
677			else
678				state = LOWER_CASE;
679		}
680
681		for(int i = 1; i < str.length(); i++)
682		{
683			ch = str.charAt(i);
684			if(!Character.isLetter(ch))
685				continue;
686
687			switch(state)
688			{
689			case UPPER_CASE:
690				if(Character.isLowerCase(ch))
691				{
692					if(i == 1)
693						state = TITLE_CASE;
694					else
695						return MIXED;
696				}
697				break;
698			case LOWER_CASE:
699			case TITLE_CASE:
700				if(Character.isUpperCase(ch))
701					return MIXED;
702				break;
703			}
704		}
705
706		return state;
707	} //}}}
708
709	//{{{ toTitleCase() method
710	/**
711	 * Converts the specified string to title case, by capitalizing the
712	 * first letter.
713	 * @param str The string
714	 * @since jEdit 4.0pre1
715	 */
716	public static String toTitleCase(String str)
717	{
718		if(str.length() == 0)
719			return str;
720		else
721		{
722			return Character.toUpperCase(str.charAt(0))
723				+ str.substring(1).toLowerCase();
724		}
725	} //}}}
726
727	//{{{ Private members
728	private static final int WHITESPACE = 0;
729	private static final int WORD_CHAR = 1;
730	private static final int SYMBOL = 2;
731	//}}}
732}