PageRenderTime 61ms CodeModel.GetById 25ms app.highlight 28ms RepoModel.GetById 1ms app.codeStats 0ms

/jEdit/tags/jedit-4-3-pre5/org/gjt/sp/jedit/TextUtilities.java

#
Java | 828 lines | 534 code | 70 blank | 224 comment | 134 complexity | cfbd9d3f417729d5dd8478532373ccfb MD5 | raw file
Possible License(s): BSD-3-Clause, AGPL-1.0, Apache-2.0, LGPL-2.0, LGPL-3.0, GPL-2.0, CC-BY-SA-3.0, LGPL-2.1, GPL-3.0, MPL-2.0-no-copyleft-exception, IPL-1.0
  1/*
  2 * TextUtilities.java - Various text functions
  3 * Copyright (C) 1998, 2005 Slava Pestov
  4 * :tabSize=8:indentSize=8:noTabs=false:
  5 * :folding=explicit:collapseFolds=1:
  6 *
  7 * This program is free software; you can redistribute it and/or
  8 * modify it under the terms of the GNU General Public License
  9 * as published by the Free Software Foundation; either version 2
 10 * of the License, or any later version.
 11 *
 12 * This program is distributed in the hope that it will be useful,
 13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 15 * GNU General Public License for more details.
 16 *
 17 * You should have received a copy of the GNU General Public License
 18 * along with this program; if not, write to the Free Software
 19 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
 20 */
 21
 22package org.gjt.sp.jedit;
 23
 24//{{{ Imports
 25import java.util.*;
 26import javax.swing.text.Segment;
 27import org.gjt.sp.jedit.buffer.JEditBuffer;
 28import org.gjt.sp.jedit.syntax.*;
 29import org.gjt.sp.util.StandardUtilities;
 30//}}}
 31
 32/**
 33 * Contains several text manipulation methods.
 34 *
 35 * <ul>
 36 * <li>Bracket matching
 37 * <li>Word start and end offset calculation
 38 * <li>String comparison
 39 * <li>Converting tabs to spaces and vice versa
 40 * <li>Wrapping text
 41 * <li>String case conversion
 42 * </ul>
 43 *
 44 * @author Slava Pestov
 45 * @version $Id: TextUtilities.java 5485 2006-06-23 22:04:58Z kpouer $
 46 */
 47public class TextUtilities
 48{
 49	// to avoid slowdown with large files; only scan 10000 lines either way
 50	public static final int BRACKET_MATCH_LIMIT = 10000;
 51
 52	//{{{ getTokenAtOffset() method
 53	/**
 54	 * Returns the token that contains the specified offset.
 55	 * @param tokens The token list
 56	 * @param offset The offset
 57	 * @since jEdit 4.0pre3
 58	 */
 59	public static Token getTokenAtOffset(Token tokens, int offset)
 60	{
 61		if(offset == 0 && tokens.id == Token.END)
 62			return tokens;
 63
 64		for(;;)
 65		{
 66			if(tokens.id == Token.END)
 67				throw new ArrayIndexOutOfBoundsException("offset > line length");
 68
 69			if(tokens.offset + tokens.length > offset)
 70				return tokens;
 71			else
 72				tokens = tokens.next;
 73		}
 74	} //}}}
 75
 76	//{{{ getComplementaryBracket() method
 77	/**
 78	 * Given an opening bracket, return the corresponding closing bracket
 79	 * and store true in <code>direction[0]</code>. Given a closing bracket,
 80	 * return the corresponding opening bracket and store false in
 81	 * <code>direction[0]</code>. Otherwise, return <code>\0</code>.
 82	 * @since jEdit 4.3pre2
 83	 */
 84	public static char getComplementaryBracket(char ch, boolean[] direction)
 85	{
 86		switch(ch)
 87		{
 88		case '(': direction[0] = true;  return ')';
 89		case ')': direction[0] = false; return '(';
 90		case '[': direction[0] = true;  return ']';
 91		case ']': direction[0] = false; return '[';
 92		case '{': direction[0] = true;  return '}';
 93		case '}': direction[0] = false; return '{';
 94		default:  return '\0';
 95		}
 96	} //}}}
 97
 98	//{{{ findMatchingBracket() method
 99	/**
100	 * Returns the offset of the bracket matching the one at the
101	 * specified offset of the buffer, or -1 if the bracket is
102	 * unmatched (or if the character is not a bracket).
103	 * @param buffer The buffer
104	 * @param line The line
105	 * @param offset The offset within that line
106	 * @since jEdit 2.6pre1
107	 */
108	public static int findMatchingBracket(JEditBuffer buffer, int line, int offset)
109	{
110		if(offset < 0 || offset >= buffer.getLineLength(line))
111		{
112			throw new ArrayIndexOutOfBoundsException(offset + ":"
113				+ buffer.getLineLength(line));
114		}
115
116		Segment lineText = new Segment();
117		buffer.getLineText(line,lineText);
118
119		char c = lineText.array[lineText.offset + offset];
120		// false - backwards, true - forwards
121		boolean[] direction = new boolean[1];
122
123		// corresponding character
124		char cprime = getComplementaryBracket(c,direction);
125
126		// 1 because we've already 'seen' the first bracket
127		int count = 1;
128
129		DefaultTokenHandler tokenHandler = new DefaultTokenHandler();
130		buffer.markTokens(line,tokenHandler);
131
132		// Get the syntax token at 'offset'
133		// only tokens with the same type will be checked for
134		// the corresponding bracket
135		byte idOfBracket = getTokenAtOffset(tokenHandler.getTokens(),offset).id;
136
137		boolean haveTokens = true;
138
139		int startLine = line;
140
141		//{{{ Forward search
142		if(direction[0])
143		{
144			offset++;
145
146			for(;;)
147			{
148				for(int i = offset; i < lineText.count; i++)
149				{
150					char ch = lineText.array[lineText.offset + i];
151					if(ch == c)
152					{
153						if(!haveTokens)
154						{
155							tokenHandler.init();
156							buffer.markTokens(line,tokenHandler);
157							haveTokens = true;
158						}
159						if(getTokenAtOffset(tokenHandler.getTokens(),i).id == idOfBracket)
160							count++;
161					}
162					else if(ch == cprime)
163					{
164						if(!haveTokens)
165						{
166							tokenHandler.init();
167							buffer.markTokens(line,tokenHandler);
168							haveTokens = true;
169						}
170						if(getTokenAtOffset(tokenHandler.getTokens(),i).id == idOfBracket)
171						{
172							count--;
173							if(count == 0)
174								return buffer.getLineStartOffset(line) + i;
175						}
176					}
177				}
178
179				//{{{ Go on to next line
180				line++;
181				if(line >= buffer.getLineCount() || (line - startLine) > BRACKET_MATCH_LIMIT)
182					break;
183				buffer.getLineText(line,lineText);
184				offset = 0;
185				haveTokens = false;
186				//}}}
187			}
188		} //}}}
189		//{{{ Backward search
190		else
191		{
192			offset--;
193
194			for(;;)
195			{
196				for(int i = offset; i >= 0; i--)
197				{
198					char ch = lineText.array[lineText.offset + i];
199					if(ch == c)
200					{
201						if(!haveTokens)
202						{
203							tokenHandler.init();
204							buffer.markTokens(line,tokenHandler);
205							haveTokens = true;
206						}
207						if(getTokenAtOffset(tokenHandler.getTokens(),i).id == idOfBracket)
208							count++;
209					}
210					else if(ch == cprime)
211					{
212						if(!haveTokens)
213						{
214							tokenHandler.init();
215							buffer.markTokens(line,tokenHandler);
216							haveTokens = true;
217						}
218						if(getTokenAtOffset(tokenHandler.getTokens(),i).id == idOfBracket)
219						{
220							count--;
221							if(count == 0)
222								return buffer.getLineStartOffset(line) + i;
223						}
224					}
225				}
226
227				//{{{ Go on to previous line
228				line--;
229				if(line < 0 || (startLine - line) > BRACKET_MATCH_LIMIT)
230					break;
231				buffer.getLineText(line,lineText);
232				offset = lineText.count - 1;
233				haveTokens = false;
234				//}}}
235			}
236		} //}}}
237
238		// Nothing found
239		return -1;
240	} //}}}
241
242	//{{{ findWordStart() method
243	/**
244	 * Locates the start of the word at the specified position.
245	 * @param line The text
246	 * @param pos The position
247	 * @param noWordSep Characters that are non-alphanumeric, but
248	 * should be treated as word characters anyway
249	 */
250	public static int findWordStart(String line, int pos, String noWordSep)
251	{
252		return findWordStart(line, pos, noWordSep, true, false);
253	} //}}}
254
255	
256	/** Similar to perl's join() method on lists,
257	 *    but works with all collections.
258	 * 
259	 * @param c An iterable collection of Objects
260	 * @param delim a string to put between each object
261	 * @return a joined toString() representation of the collection
262	 * 
263	 * @since jedit 4.3pre3
264	 */
265	public static String join(Collection c, String delim) {
266		StringBuffer retval = new StringBuffer();
267		Iterator itr = c.iterator();
268		if (itr.hasNext()) {
269			retval.append( itr.next().toString() );
270		}
271		else return "";
272		while (itr.hasNext()) {
273			retval.append(delim);
274			retval.append(itr.next().toString());
275		}
276		return retval.toString();
277	}
278	
279	//{{{ findWordStart() method
280	/**
281	 * Locates the start of the word at the specified position.
282	 * @param line The text
283	 * @param pos The position
284	 * @param noWordSep Characters that are non-alphanumeric, but
285	 * should be treated as word characters anyway
286	 * @param joinNonWordChars Treat consecutive non-alphanumeric
287	 * characters as one word
288	 * @since jEdit 4.2pre5
289	 */
290	public static int findWordStart(String line, int pos, String noWordSep,
291		boolean joinNonWordChars)
292	{
293		return findWordStart(line,pos,noWordSep,joinNonWordChars,false);
294	} //}}}
295
296	//{{{ findWordStart() method
297	/**
298	 * Locates the start of the word at the specified position.
299	 * @param line The text
300	 * @param pos The position
301	 * @param noWordSep Characters that are non-alphanumeric, but
302	 * should be treated as word characters anyway
303	 * @param joinNonWordChars Treat consecutive non-alphanumeric
304	 * characters as one word
305	 * @param eatWhitespace Include whitespace at start of word
306	 * @since jEdit 4.1pre2
307	 */
308	public static int findWordStart(String line, int pos, String noWordSep,
309		boolean joinNonWordChars, boolean eatWhitespace)
310	{
311		char ch = line.charAt(pos);
312
313		if(noWordSep == null)
314			noWordSep = "";
315
316		//{{{ the character under the cursor changes how we behave.
317		int type;
318		if(Character.isWhitespace(ch))
319			type = WHITESPACE;
320		else if(Character.isLetterOrDigit(ch)
321			|| noWordSep.indexOf(ch) != -1)
322			type = WORD_CHAR;
323		else
324			type = SYMBOL;
325		//}}}
326
327loop:		for(int i = pos; i >= 0; i--)
328		{
329			ch = line.charAt(i);
330			switch(type)
331			{
332			//{{{ Whitespace...
333			case WHITESPACE:
334				// only select other whitespace in this case
335				if(Character.isWhitespace(ch))
336					break;
337				// word char or symbol; stop
338				else
339					return i + 1; //}}}
340			//{{{ Word character...
341			case WORD_CHAR:
342				// word char; keep going
343				if(Character.isLetterOrDigit(ch) ||
344					noWordSep.indexOf(ch) != -1)
345				{
346					break;
347				}
348				// whitespace; include in word if eating
349				else if(Character.isWhitespace(ch)
350					&& eatWhitespace)
351				{
352					type = WHITESPACE;
353					break;
354				}
355				else
356					return i + 1; //}}}
357			//{{{ Symbol...
358			case SYMBOL:
359				if(!joinNonWordChars && pos != i)
360					return i + 1;
361
362				// whitespace; include in word if eating
363				if(Character.isWhitespace(ch))
364				{
365					if(eatWhitespace)
366					{
367						type = WHITESPACE;
368						break;
369					}
370					else
371						return i + 1;
372				}
373				else if(Character.isLetterOrDigit(ch) ||
374					noWordSep.indexOf(ch) != -1)
375				{
376					return i + 1;
377				}
378				else
379				{
380					break;
381				} //}}}
382			}
383		}
384
385		return 0;
386	} //}}}
387
388	//{{{ findWordEnd() method
389	/**
390	 * Locates the end of the word at the specified position.
391	 * @param line The text
392	 * @param pos The position
393	 * @param noWordSep Characters that are non-alphanumeric, but
394	 * should be treated as word characters anyway
395	 */
396	public static int findWordEnd(String line, int pos, String noWordSep)
397	{
398		return findWordEnd(line, pos, noWordSep, true);
399	} //}}}
400
401	//{{{ findWordEnd() method
402	/**
403	 * Locates the end of the word at the specified position.
404	 * @param line The text
405	 * @param pos The position
406	 * @param noWordSep Characters that are non-alphanumeric, but
407	 * should be treated as word characters anyway
408	 * @param joinNonWordChars Treat consecutive non-alphanumeric
409	 * characters as one word
410	 * @since jEdit 4.1pre2
411	 */
412	public static int findWordEnd(String line, int pos, String noWordSep,
413		boolean joinNonWordChars)
414	{
415		return findWordEnd(line,pos,noWordSep,joinNonWordChars,false);
416	} //}}}
417
418	//{{{ findWordEnd() method
419	/**
420	 * Locates the end of the word at the specified position.
421	 * @param line The text
422	 * @param pos The position
423	 * @param noWordSep Characters that are non-alphanumeric, but
424	 * should be treated as word characters anyway
425	 * @param joinNonWordChars Treat consecutive non-alphanumeric
426	 * characters as one word
427	 * @param eatWhitespace Include whitespace at end of word
428	 * @since jEdit 4.2pre5
429	 */
430	public static int findWordEnd(String line, int pos, String noWordSep,
431		boolean joinNonWordChars, boolean eatWhitespace)
432	{
433		if(pos != 0)
434			pos--;
435
436		char ch = line.charAt(pos);
437
438		if(noWordSep == null)
439			noWordSep = "";
440
441		//{{{ the character under the cursor changes how we behave.
442		int type;
443		if(Character.isWhitespace(ch))
444			type = WHITESPACE;
445		else if(Character.isLetterOrDigit(ch)
446			|| noWordSep.indexOf(ch) != -1)
447			type = WORD_CHAR;
448		else
449			type = SYMBOL;
450		//}}}
451
452loop:		for(int i = pos; i < line.length(); i++)
453		{
454			ch = line.charAt(i);
455			switch(type)
456			{
457			//{{{ Whitespace...
458			case WHITESPACE:
459				// only select other whitespace in this case
460				if(Character.isWhitespace(ch))
461					break;
462				else
463					return i; //}}}
464			//{{{ Word character...
465			case WORD_CHAR:
466				if(Character.isLetterOrDigit(ch) ||
467					noWordSep.indexOf(ch) != -1)
468				{
469					break;
470				}
471				// whitespace; include in word if eating
472				else if(Character.isWhitespace(ch)
473					&& eatWhitespace)
474				{
475					type = WHITESPACE;
476					break;
477				}
478				else
479					return i; //}}}
480			//{{{ Symbol...
481			case SYMBOL:
482				if(!joinNonWordChars && i != pos)
483					return i;
484
485				// if we see whitespace, set flag.
486				if(Character.isWhitespace(ch))
487				{
488					if(eatWhitespace)
489					{
490						type = WHITESPACE;
491						break;
492					}
493					else
494						return i;
495				}
496				else if(Character.isLetterOrDigit(ch) ||
497					noWordSep.indexOf(ch) != -1)
498				{
499					return i;
500				}
501				else
502				{
503					break;
504				} //}}}
505			}
506		}
507
508		return line.length();
509	} //}}}
510
511	//{{{ spacesToTabs() method
512	/**
513	 * Converts consecutive spaces to tabs in the specified string.
514	 * @param in The string
515	 * @param tabSize The tab size
516	 */
517	public static String spacesToTabs(String in, int tabSize)
518	{
519		StringBuffer buf = new StringBuffer();
520		int width = 0;
521		int whitespace = 0;
522		for(int i = 0; i < in.length(); i++)
523		{
524			switch(in.charAt(i))
525			{
526			case ' ':
527				whitespace++;
528				width++;
529				break;
530			case '\t':
531				int tab = tabSize - (width % tabSize);
532				width += tab;
533				whitespace += tab;
534				break;
535			case '\n':
536				if(whitespace != 0)
537				{
538					buf.append(StandardUtilities
539						.createWhiteSpace(whitespace,tabSize,
540						width - whitespace));
541				}
542				whitespace = 0;
543				width = 0;
544				buf.append('\n');
545				break;
546			default:
547				if(whitespace != 0)
548				{
549					buf.append(StandardUtilities
550						.createWhiteSpace(whitespace,tabSize,
551						width - whitespace));
552					whitespace = 0;
553				}
554				buf.append(in.charAt(i));
555				width++;
556				break;
557			}
558		}
559
560		if(whitespace != 0)
561		{
562			buf.append(StandardUtilities.createWhiteSpace(whitespace,tabSize,
563				width - whitespace));
564		}
565
566                return buf.toString();
567	} //}}}
568
569	//{{{ tabsToSpaces() method
570	/**
571	 * Converts tabs to consecutive spaces in the specified string.
572	 * @param in The string
573	 * @param tabSize The tab size
574	 */
575	public static String tabsToSpaces(String in, int tabSize)
576	{
577		StringBuffer buf = new StringBuffer();
578		int width = 0;
579		for(int i = 0; i < in.length(); i++)
580		{
581			switch(in.charAt(i))
582			{
583			case '\t':
584				int count = tabSize - (width % tabSize);
585				width += count;
586				while(--count >= 0)
587					buf.append(' ');
588				break;
589			case '\n':
590				width = 0;
591				buf.append(in.charAt(i));
592				break;
593			default:
594				width++;
595				buf.append(in.charAt(i));
596				break;
597                        }
598                }
599                return buf.toString();
600	} //}}}
601
602	//{{{ format() method
603	/**
604	 * Formats the specified text by merging and breaking lines to the
605	 * specified width.
606	 * @param text The text
607	 * @param maxLineLength The maximum line length
608	 * @param tabSize The tab size
609	 */
610	public static String format(String text, int maxLineLength, int tabSize)
611	{
612		StringBuffer buf = new StringBuffer();
613
614		int index = 0;
615
616		for(;;)
617		{
618			int newIndex = text.indexOf("\n\n",index);
619			if(newIndex == -1)
620				break;
621
622			formatParagraph(text.substring(index,newIndex),
623				maxLineLength,tabSize,buf);
624			buf.append("\n\n");
625			index = newIndex + 2;
626		}
627
628		if(index != text.length())
629		{
630			formatParagraph(text.substring(index),
631				maxLineLength,tabSize,buf);
632		}
633
634		return buf.toString();
635	} //}}}
636
637	//{{{ indexIgnoringWhitespace() method
638	/**
639	 * Inverse of <code>ignoringWhitespaceIndex()</code>.
640	 * @param str A string
641	 * @param index The index
642	 * @return The number of non-whitespace characters that precede the index.
643	 * @since jEdit 4.3pre2
644	 */
645	public static int indexIgnoringWhitespace(String str, int index)
646	{
647		int j = 0;
648		for(int i = 0; i < index; i++)
649			if(!Character.isWhitespace(str.charAt(i))) j++;
650		return j;
651	} //}}}
652
653	//{{{ ignoringWhitespaceIndex() method
654	/**
655	 * Inverse of <code>indexIgnoringWhitespace()</code>.
656	 * @param str A string
657	 * @param index The index
658	 * @return The index into the string where the number of non-whitespace
659	 * characters that precede the index is count.
660	 * @since jEdit 4.3pre2
661	 */
662	public static int ignoringWhitespaceIndex(String str, int index)
663	{
664		int j = 0;
665		for(int i = 0;;i++)
666		{
667			if(!Character.isWhitespace(str.charAt(i))) j++;
668
669			if(j > index)
670				return i;
671			if(i == str.length() - 1)
672				return i + 1;
673		}
674	} //}}}
675
676	//{{{ getStringCase() method
677	public static final int MIXED = 0;
678	public static final int LOWER_CASE = 1;
679	public static final int UPPER_CASE = 2;
680	public static final int TITLE_CASE = 3;
681
682	/**
683	 * Returns if the specified string is all upper case, all lower case,
684	 * or title case (first letter upper case, rest lower case).
685	 * @param str The string
686	 * @since jEdit 4.0pre1
687	 */
688	public static int getStringCase(String str)
689	{
690		if(str.length() == 0)
691			return MIXED;
692
693		int state = -1;
694
695		char ch = str.charAt(0);
696		if(Character.isLetter(ch))
697		{
698			if(Character.isUpperCase(ch))
699				state = UPPER_CASE;
700			else
701				state = LOWER_CASE;
702		}
703
704		for(int i = 1; i < str.length(); i++)
705		{
706			ch = str.charAt(i);
707			if(!Character.isLetter(ch))
708				continue;
709
710			switch(state)
711			{
712			case UPPER_CASE:
713				if(Character.isLowerCase(ch))
714				{
715					if(i == 1)
716						state = TITLE_CASE;
717					else
718						return MIXED;
719				}
720				break;
721			case LOWER_CASE:
722			case TITLE_CASE:
723				if(Character.isUpperCase(ch))
724					return MIXED;
725				break;
726			}
727		}
728
729		return state;
730	} //}}}
731
732	//{{{ toTitleCase() method
733	/**
734	 * Converts the specified string to title case, by capitalizing the
735	 * first letter.
736	 * @param str The string
737	 * @since jEdit 4.0pre1
738	 */
739	public static String toTitleCase(String str)
740	{
741		if(str.length() == 0)
742			return str;
743		else
744		{
745			return Character.toUpperCase(str.charAt(0))
746				+ str.substring(1).toLowerCase();
747		}
748	} //}}}
749
750	//{{{ Private members
751	private static final int WHITESPACE = 0;
752	private static final int WORD_CHAR = 1;
753	private static final int SYMBOL = 2;
754
755	//{{{ formatParagraph() method
756	private static void formatParagraph(String text, int maxLineLength,
757		int tabSize, StringBuffer buf)
758	{
759		// align everything to paragraph's leading indent
760		int leadingWhitespaceCount = StandardUtilities.getLeadingWhiteSpace(text);
761		String leadingWhitespace = text.substring(0,leadingWhitespaceCount);
762		int leadingWhitespaceWidth = StandardUtilities.getLeadingWhiteSpaceWidth(text,tabSize);
763
764		buf.append(leadingWhitespace);
765
766		int lineLength = leadingWhitespaceWidth;
767		StringTokenizer st = new StringTokenizer(text);
768		while(st.hasMoreTokens())
769		{
770			String word = st.nextToken();
771			if(lineLength == leadingWhitespaceWidth)
772			{
773				// do nothing
774			}
775			else if(lineLength + word.length() + 1 > maxLineLength)
776			{
777				buf.append('\n');
778				buf.append(leadingWhitespace);
779				lineLength = leadingWhitespaceWidth;
780			}
781			else
782			{
783				buf.append(' ');
784				lineLength++;
785			}
786			buf.append(word);
787			lineLength += word.length();
788		}
789	} //}}}
790	
791	//{{{ indexIgnoringWhitespace() method
792	public static void indexIgnoringWhitespace(String text, int maxLineLength,
793		int tabSize, StringBuffer buf)
794	{
795		// align everything to paragraph's leading indent
796		int leadingWhitespaceCount = StandardUtilities.getLeadingWhiteSpace(text);
797		String leadingWhitespace = text.substring(0,leadingWhitespaceCount);
798		int leadingWhitespaceWidth = StandardUtilities.getLeadingWhiteSpaceWidth(text,tabSize);
799
800		buf.append(leadingWhitespace);
801
802		int lineLength = leadingWhitespaceWidth;
803		StringTokenizer st = new StringTokenizer(text);
804		while(st.hasMoreTokens())
805		{
806			String word = st.nextToken();
807			if(lineLength == leadingWhitespaceWidth)
808			{
809				// do nothing
810			}
811			else if(lineLength + word.length() + 1 > maxLineLength)
812			{
813				buf.append('\n');
814				buf.append(leadingWhitespace);
815				lineLength = leadingWhitespaceWidth;
816			}
817			else
818			{
819				buf.append(' ');
820				lineLength++;
821			}
822			buf.append(word);
823			lineLength += word.length();
824		}
825	} //}}}
826
827	//}}}
828}