/jEdit/tags/jedit-4-0-final/org/gjt/sp/jedit/TextUtilities.java

# · Java · 670 lines · 466 code · 46 blank · 158 comment · 130 complexity · cde96941bf3fdb021f0b1b579b993247 MD5 · raw file

  1. /*
  2. * TextUtilities.java - Various text functions
  3. * Copyright (C) 1998, 1999, 2000, 2001 Slava Pestov
  4. * :tabSize=8:indentSize=8:noTabs=false:
  5. * :folding=explicit:collapseFolds=1:
  6. *
  7. * This program is free software; you can redistribute it and/or
  8. * modify it under the terms of the GNU General Public License
  9. * as published by the Free Software Foundation; either version 2
  10. * of the License, or any later version.
  11. *
  12. * This program is distributed in the hope that it will be useful,
  13. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  15. * GNU General Public License for more details.
  16. *
  17. * You should have received a copy of the GNU General Public License
  18. * along with this program; if not, write to the Free Software
  19. * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
  20. */
  21. package org.gjt.sp.jedit;
  22. //{{{ Imports
  23. import java.awt.*;
  24. import java.util.*;
  25. import javax.swing.text.Segment;
  26. import org.gjt.sp.jedit.syntax.*;
  27. //}}}
  28. /**
  29. * Contains several text manipulation methods.
  30. *
  31. * <ul>
  32. * <li>Bracket matching
  33. * <li>Word start and end offset calculation
  34. * <li>String comparison
  35. * <li>Converting tabs to spaces and vice versa
  36. * <li>Wrapping text
  37. * <li>String case conversion
  38. * </ul>
  39. *
  40. * @author Slava Pestov
  41. * @version $Id: TextUtilities.java 4137 2002-04-12 03:49:45Z spestov $
  42. */
  43. public class TextUtilities
  44. {
  45. //{{{ getTokenAtOffset() method
  46. /**
  47. * Returns the token that contains the specified offset.
  48. * @param tokens The token list
  49. * @param offset The offset
  50. * @since jEdit 4.0pre3
  51. */
  52. public static Token getTokenAtOffset(Token tokens, int offset)
  53. {
  54. if(offset == 0 && tokens.id == Token.END)
  55. return tokens;
  56. int tokenListOffset = 0;
  57. for(;;)
  58. {
  59. if(tokens.id == Token.END)
  60. throw new ArrayIndexOutOfBoundsException("offset > line length");
  61. if(tokenListOffset + tokens.length > offset)
  62. return tokens;
  63. else
  64. {
  65. tokenListOffset += tokens.length;
  66. tokens = tokens.next;
  67. }
  68. }
  69. } //}}}
  70. //{{{ findMatchingBracket() method
  71. /**
  72. * Returns the offset of the bracket matching the one at the
  73. * specified offset of the buffer, or -1 if the bracket is
  74. * unmatched (or if the character is not a bracket).
  75. * @param buffer The buffer
  76. * @param line The line
  77. * @param offset The offset within that line
  78. * @since jEdit 2.6pre1
  79. */
  80. public static int findMatchingBracket(Buffer buffer, int line, int offset)
  81. {
  82. return findMatchingBracket(buffer,line,offset,0,
  83. buffer.getLineCount() - 1);
  84. } //}}}
  85. //{{{ findMatchingBracket() method
  86. /**
  87. * Returns the offset of the bracket matching the one at the
  88. * specified offset of the buffer, or -1 if the bracket is
  89. * unmatched (or if the character is not a bracket).
  90. * @param buffer The buffer
  91. * @param line The line
  92. * @param offset The offset within that line
  93. * @param startLine The first line to scan. This is used to speed up
  94. * on-screen bracket matching because only visible lines need to be
  95. * scanned
  96. * @param endLine The last line to scan. This is used to speed up
  97. * on-screen bracket matching because only visible lines need to be
  98. * scanned
  99. * @since jEdit 2.7pre3
  100. */
  101. public static int findMatchingBracket(Buffer buffer, int line, int offset,
  102. int startLine, int endLine)
  103. {
  104. if(offset < 0 || offset >= buffer.getLineLength(line))
  105. {
  106. throw new ArrayIndexOutOfBoundsException(offset + ":"
  107. + buffer.getLineLength(line));
  108. }
  109. Segment lineText = new Segment();
  110. buffer.getLineText(line,lineText);
  111. char c = lineText.array[lineText.offset + offset];
  112. char cprime; // corresponding character
  113. boolean direction; // false - backwards, true - forwards
  114. switch(c)
  115. {
  116. case '(': cprime = ')'; direction = true; break;
  117. case ')': cprime = '('; direction = false; break;
  118. case '[': cprime = ']'; direction = true; break;
  119. case ']': cprime = '['; direction = false; break;
  120. case '{': cprime = '}'; direction = true; break;
  121. case '}': cprime = '{'; direction = false; break;
  122. default: return -1;
  123. }
  124. // 1 because we've already 'seen' the first bracket
  125. int count = 1;
  126. Buffer.TokenList tokenList = buffer.markTokens(line);
  127. // Get the syntax token at 'offset'
  128. // only tokens with the same type will be checked for
  129. // the corresponding bracket
  130. byte idOfBracket = getTokenAtOffset(tokenList.getFirstToken(),offset).id;
  131. boolean haveTokens = true;
  132. //{{{ Forward search
  133. if(direction)
  134. {
  135. offset++;
  136. for(;;)
  137. {
  138. for(int i = offset; i < lineText.count; i++)
  139. {
  140. char ch = lineText.array[lineText.offset + i];
  141. if(ch == c)
  142. {
  143. if(!haveTokens)
  144. {
  145. tokenList = buffer.markTokens(line);
  146. haveTokens = true;
  147. }
  148. if(getTokenAtOffset(tokenList.getFirstToken(),i).id == idOfBracket)
  149. count++;
  150. }
  151. else if(ch == cprime)
  152. {
  153. if(!haveTokens)
  154. {
  155. tokenList = buffer.markTokens(line);
  156. haveTokens = true;
  157. }
  158. if(getTokenAtOffset(tokenList.getFirstToken(),i).id == idOfBracket)
  159. {
  160. count--;
  161. if(count == 0)
  162. return buffer.getLineStartOffset(line) + i;
  163. }
  164. }
  165. }
  166. //{{{ Go on to next line
  167. line++;
  168. if(line > endLine)
  169. break;
  170. buffer.getLineText(line,lineText);
  171. offset = 0;
  172. haveTokens = false;
  173. //}}}
  174. }
  175. } //}}}
  176. //{{{ Backward search
  177. else
  178. {
  179. offset--;
  180. for(;;)
  181. {
  182. for(int i = offset; i >= 0; i--)
  183. {
  184. char ch = lineText.array[lineText.offset + i];
  185. if(ch == c)
  186. {
  187. if(!haveTokens)
  188. {
  189. tokenList = buffer.markTokens(line);
  190. haveTokens = true;
  191. }
  192. if(getTokenAtOffset(tokenList.getFirstToken(),i).id == idOfBracket)
  193. count++;
  194. }
  195. else if(ch == cprime)
  196. {
  197. if(!haveTokens)
  198. {
  199. tokenList = buffer.markTokens(line);
  200. haveTokens = true;
  201. }
  202. if(getTokenAtOffset(tokenList.getFirstToken(),i).id == idOfBracket)
  203. {
  204. count--;
  205. if(count == 0)
  206. return buffer.getLineStartOffset(line) + i;
  207. }
  208. }
  209. }
  210. //{{{ Go on to next line
  211. line--;
  212. if(line < startLine)
  213. break;
  214. buffer.getLineText(line,lineText);
  215. offset = lineText.count - 1;
  216. haveTokens = false;
  217. //}}}
  218. }
  219. } //}}}
  220. // Nothing found
  221. return -1;
  222. } //}}}
  223. //{{{ findWordStart() method
  224. /**
  225. * Locates the start of the word at the specified position.
  226. * @param line The text
  227. * @param pos The position
  228. * @param noWordSep Characters that are non-alphanumeric, but
  229. * should be treated as word characters anyway
  230. */
  231. public static int findWordStart(String line, int pos, String noWordSep)
  232. {
  233. char ch = line.charAt(pos);
  234. if(noWordSep == null)
  235. noWordSep = "";
  236. //{{{ the character under the cursor changes how we behave.
  237. int type;
  238. if(Character.isWhitespace(ch))
  239. type = WHITESPACE;
  240. else if(Character.isLetterOrDigit(ch)
  241. || noWordSep.indexOf(ch) != -1)
  242. type = WORD_CHAR;
  243. else
  244. type = SYMBOL;
  245. //}}}
  246. int whiteSpaceEnd = 0;
  247. loop: for(int i = pos; i >= 0; i--)
  248. {
  249. ch = line.charAt(i);
  250. switch(type)
  251. {
  252. //{{{ Whitespace...
  253. case WHITESPACE:
  254. // only select other whitespace in this case
  255. if(Character.isWhitespace(ch))
  256. break;
  257. else
  258. return i + 1; //}}}
  259. //{{{ Word character...
  260. case WORD_CHAR:
  261. if(Character.isLetterOrDigit(ch) ||
  262. noWordSep.indexOf(ch) != -1)
  263. {
  264. break;
  265. }
  266. else
  267. return i + 1; //}}}
  268. //{{{ Symbol...
  269. case SYMBOL:
  270. // if we see whitespace, set flag.
  271. if(Character.isWhitespace(ch))
  272. {
  273. return i + 1;
  274. }
  275. else if(Character.isLetterOrDigit(ch) ||
  276. noWordSep.indexOf(ch) != -1)
  277. {
  278. return i + 1;
  279. }
  280. else
  281. {
  282. break;
  283. } //}}}
  284. }
  285. }
  286. return whiteSpaceEnd;
  287. } //}}}
  288. //{{{ findWordEnd() method
  289. /**
  290. * Locates the end of the word at the specified position.
  291. * @param line The text
  292. * @param pos The position
  293. * @param noWordSep Characters that are non-alphanumeric, but
  294. * should be treated as word characters anyway
  295. */
  296. public static int findWordEnd(String line, int pos, String noWordSep)
  297. {
  298. if(pos != 0)
  299. pos--;
  300. char ch = line.charAt(pos);
  301. if(noWordSep == null)
  302. noWordSep = "";
  303. //{{{ the character under the cursor changes how we behave.
  304. int type;
  305. if(Character.isWhitespace(ch))
  306. type = WHITESPACE;
  307. else if(Character.isLetterOrDigit(ch)
  308. || noWordSep.indexOf(ch) != -1)
  309. type = WORD_CHAR;
  310. else
  311. type = SYMBOL;
  312. //}}}
  313. boolean seenWhiteSpace = false;
  314. loop: for(int i = pos; i < line.length(); i++)
  315. {
  316. ch = line.charAt(i);
  317. switch(type)
  318. {
  319. //{{{ Whitespace...
  320. case WHITESPACE:
  321. // only select other whitespace in this case
  322. if(Character.isWhitespace(ch))
  323. break;
  324. else
  325. return i; //}}}
  326. //{{{ Word character...
  327. case WORD_CHAR:
  328. if(Character.isLetterOrDigit(ch) ||
  329. noWordSep.indexOf(ch) != -1)
  330. {
  331. break;
  332. }
  333. else
  334. return i; //}}}
  335. //{{{ Symbol...
  336. case SYMBOL:
  337. // if we see whitespace, set flag.
  338. if(Character.isWhitespace(ch))
  339. {
  340. return i;
  341. }
  342. else if(Character.isLetterOrDigit(ch) ||
  343. noWordSep.indexOf(ch) != -1)
  344. return i;
  345. else
  346. {
  347. break;
  348. } //}}}
  349. }
  350. }
  351. return line.length();
  352. } //}}}
  353. //{{{ regionMatches() method
  354. /**
  355. * Checks if a subregion of a <code>Segment</code> is equal to a
  356. * character array.
  357. * @param ignoreCase True if case should be ignored, false otherwise
  358. * @param text The segment
  359. * @param offset The offset into the segment
  360. * @param match The character array to match
  361. * @since jEdit 2.7pre1
  362. */
  363. public static boolean regionMatches(boolean ignoreCase, Segment text,
  364. int offset, char[] match)
  365. {
  366. int length = offset + match.length;
  367. char[] textArray = text.array;
  368. if(length > text.offset + text.count)
  369. return false;
  370. for(int i = offset, j = 0; i < length; i++, j++)
  371. {
  372. char c1 = textArray[i];
  373. char c2 = match[j];
  374. if(ignoreCase)
  375. {
  376. c1 = Character.toUpperCase(c1);
  377. c2 = Character.toUpperCase(c2);
  378. }
  379. if(c1 != c2)
  380. return false;
  381. }
  382. return true;
  383. } //}}}
  384. //{{{ spacesToTabs() method
  385. /**
  386. * Converts consecutive spaces to tabs in the specified string.
  387. * @param in The string
  388. * @param tabSize The tab size
  389. */
  390. public static String spacesToTabs(String in, int tabSize)
  391. {
  392. StringBuffer buf = new StringBuffer();
  393. int width = 0;
  394. int whitespace = 0;
  395. for(int i = 0; i < in.length(); i++)
  396. {
  397. switch(in.charAt(i))
  398. {
  399. case ' ':
  400. whitespace++;
  401. width++;
  402. break;
  403. case '\t':
  404. int tab = tabSize - (width % tabSize);
  405. width += tab;
  406. whitespace += tab;
  407. break;
  408. case '\n':
  409. if(whitespace != 0)
  410. {
  411. buf.append(MiscUtilities
  412. .createWhiteSpace(whitespace,tabSize));
  413. }
  414. whitespace = 0;
  415. width = 0;
  416. buf.append('\n');
  417. break;
  418. default:
  419. if(whitespace != 0)
  420. {
  421. buf.append(MiscUtilities
  422. .createWhiteSpace(whitespace,tabSize));
  423. whitespace = 0;
  424. }
  425. buf.append(in.charAt(i));
  426. width++;
  427. break;
  428. }
  429. }
  430. if(whitespace != 0)
  431. {
  432. buf.append(MiscUtilities.createWhiteSpace(whitespace,tabSize));
  433. }
  434. return buf.toString();
  435. } //}}}
  436. //{{{ tabsToSpaces() method
  437. /**
  438. * Converts tabs to consecutive spaces in the specified string.
  439. * @param in The string
  440. * @param tabSize The tab size
  441. */
  442. public static String tabsToSpaces(String in, int tabSize)
  443. {
  444. StringBuffer buf = new StringBuffer();
  445. int width = 0;
  446. for(int i = 0; i < in.length(); i++)
  447. {
  448. switch(in.charAt(i))
  449. {
  450. case '\t':
  451. int count = tabSize - (width % tabSize);
  452. width += count;
  453. while(--count >= 0)
  454. buf.append(' ');
  455. break;
  456. case '\n':
  457. width = 0;
  458. buf.append(in.charAt(i));
  459. break;
  460. default:
  461. width++;
  462. buf.append(in.charAt(i));
  463. break;
  464. }
  465. }
  466. return buf.toString();
  467. } //}}}
  468. //{{{ format() method
  469. /**
  470. * Formats the specified text by merging and breaking lines to the
  471. * specified width.
  472. * @param text The text
  473. * @param maxLineLen The maximum line length
  474. */
  475. public static String format(String text, int maxLineLength)
  476. {
  477. StringBuffer buf = new StringBuffer();
  478. StringBuffer word = new StringBuffer();
  479. int lineLength = 0;
  480. boolean newline = true;
  481. boolean space = false;
  482. char[] chars = text.toCharArray();
  483. for(int i = 0; i < chars.length; i++)
  484. {
  485. char c = chars[i];
  486. switch(c)
  487. {
  488. case '\n':
  489. if(i == 0 || chars.length - i <= 2)
  490. {
  491. if(lineLength + word.length() >= maxLineLength)
  492. buf.append('\n');
  493. else if(space && word.length() != 0)
  494. buf.append(' ');
  495. buf.append(word);
  496. word.setLength(0);
  497. buf.append('\n');
  498. newline = true;
  499. space = false;
  500. break;
  501. }
  502. else if(newline)
  503. {
  504. if(lineLength + word.length() >= maxLineLength)
  505. buf.append('\n');
  506. else if(space && word.length() != 0)
  507. buf.append(' ');
  508. buf.append(word);
  509. word.setLength(0);
  510. buf.append("\n\n");
  511. newline = space = false;
  512. lineLength = 0;
  513. break;
  514. }
  515. else
  516. newline = true;
  517. case ' ':
  518. if(lineLength + word.length() >= maxLineLength)
  519. {
  520. buf.append('\n');
  521. lineLength = 0;
  522. newline = true;
  523. }
  524. else if(space && lineLength != 0 && word.length() != 0)
  525. {
  526. buf.append(' ');
  527. lineLength++;
  528. space = false;
  529. }
  530. else
  531. space = true;
  532. buf.append(word);
  533. lineLength += word.length();
  534. word.setLength(0);
  535. break;
  536. default:
  537. newline = false;
  538. // without this test, we would have spaces
  539. // at the start of lines
  540. if(lineLength != 0)
  541. space = true;
  542. word.append(c);
  543. break;
  544. }
  545. }
  546. if(lineLength + word.length() >= maxLineLength)
  547. buf.append('\n');
  548. else if(space && word.length() != 0)
  549. buf.append(' ');
  550. buf.append(word);
  551. return buf.toString();
  552. } //}}}
  553. //{{{ getStringCase() method
  554. public static final int MIXED = 0;
  555. public static final int LOWER_CASE = 1;
  556. public static final int UPPER_CASE = 2;
  557. public static final int TITLE_CASE = 3;
  558. /**
  559. * Returns if the specified string is all upper case, all lower case,
  560. * or title case (first letter upper case, rest lower case).
  561. * @param str The string
  562. * @since jEdit 4.0pre1
  563. */
  564. public static int getStringCase(String str)
  565. {
  566. if(str.length() == 0)
  567. return MIXED;
  568. int state = -1;
  569. char ch = str.charAt(0);
  570. if(Character.isLetter(ch))
  571. {
  572. if(Character.isUpperCase(ch))
  573. state = UPPER_CASE;
  574. else
  575. state = LOWER_CASE;
  576. }
  577. for(int i = 1; i < str.length(); i++)
  578. {
  579. ch = str.charAt(i);
  580. if(!Character.isLetter(ch))
  581. continue;
  582. switch(state)
  583. {
  584. case UPPER_CASE:
  585. if(Character.isLowerCase(ch))
  586. {
  587. if(i == 1)
  588. state = TITLE_CASE;
  589. else
  590. return MIXED;
  591. }
  592. break;
  593. case LOWER_CASE:
  594. case TITLE_CASE:
  595. if(Character.isUpperCase(ch))
  596. return MIXED;
  597. break;
  598. }
  599. }
  600. return state;
  601. } //}}}
  602. //{{{ toTitleCase() method
  603. /**
  604. * Converts the specified string to title case, by capitalizing the
  605. * first letter.
  606. * @param str The string
  607. * @since jEdit 4.0pre1
  608. */
  609. public static String toTitleCase(String str)
  610. {
  611. if(str.length() == 0)
  612. return str;
  613. else
  614. {
  615. return Character.toUpperCase(str.charAt(0))
  616. + str.substring(1).toLowerCase();
  617. }
  618. } //}}}
  619. //{{{ Private members
  620. private static final int WHITESPACE = 0;
  621. private static final int WORD_CHAR = 1;
  622. private static final int SYMBOL = 2;
  623. //}}}
  624. }