PageRenderTime 29ms CodeModel.GetById 1ms RepoModel.GetById 0ms app.codeStats 0ms

/jEdit/tags/jedit-4-0-pre3/org/gjt/sp/jedit/TextUtilities.java

#
Java | 732 lines | 509 code | 49 blank | 174 comment | 144 complexity | 1ee0ebd0f5e76ac8e395e46723ad9507 MD5 | raw file
Possible License(s): BSD-3-Clause, AGPL-1.0, Apache-2.0, LGPL-2.0, LGPL-3.0, GPL-2.0, CC-BY-SA-3.0, LGPL-2.1, GPL-3.0, MPL-2.0-no-copyleft-exception, IPL-1.0
  1. /*
  2. * TextUtilities.java - Various text functions
  3. * Copyright (C) 1998, 1999, 2000, 2001 Slava Pestov
  4. * :tabSize=8:indentSize=8:noTabs=false:
  5. * :folding=explicit:collapseFolds=1:
  6. *
  7. * This program is free software; you can redistribute it and/or
  8. * modify it under the terms of the GNU General Public License
  9. * as published by the Free Software Foundation; either version 2
  10. * of the License, or any later version.
  11. *
  12. * This program is distributed in the hope that it will be useful,
  13. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  15. * GNU General Public License for more details.
  16. *
  17. * You should have received a copy of the GNU General Public License
  18. * along with this program; if not, write to the Free Software
  19. * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
  20. */
  21. package org.gjt.sp.jedit;
  22. import javax.swing.text.Segment;
  23. import org.gjt.sp.jedit.syntax.*;
  24. /**
  25. * Class with several text utility functions.
  26. * @author Slava Pestov
  27. * @version $Id: TextUtilities.java 3930 2001-12-02 07:34:52Z spestov $
  28. */
  29. public class TextUtilities
  30. {
  31. //{{{ getTokenAtOffset() method
  32. /**
  33. * Returns the token that contains the specified offset.
  34. * @param tokenList The token list
  35. * @param offset The offset
  36. * @since jEdit 4.0pre3
  37. */
  38. public static Token getTokenAtOffset(Buffer.TokenList tokenList, int offset)
  39. {
  40. Token lineTokens = tokenList.getFirstToken();
  41. if(offset == 0 && lineTokens.id == Token.END)
  42. return lineTokens;
  43. int tokenListOffset = 0;
  44. for(;;)
  45. {
  46. if(lineTokens.id == Token.END)
  47. throw new ArrayIndexOutOfBoundsException("offset > line length");
  48. if(tokenListOffset + lineTokens.length > offset)
  49. return lineTokens;
  50. else
  51. {
  52. tokenListOffset += lineTokens.length;
  53. lineTokens = lineTokens.next;
  54. }
  55. }
  56. } //}}}
  57. //{{{ findMatchingBracket() method
  58. /**
  59. * Returns the offset of the bracket matching the one at the
  60. * specified offset of the buffer, or -1 if the bracket is
  61. * unmatched (or if the character is not a bracket).
  62. * @param buffer The buffer
  63. * @param line The line
  64. * @param offset The offset within that line
  65. * @since jEdit 2.6pre1
  66. */
  67. public static int findMatchingBracket(Buffer buffer, int line, int offset)
  68. {
  69. return findMatchingBracket(buffer,line,offset,0,
  70. buffer.getLineCount() - 1);
  71. } //}}}
  72. //{{{ findMatchingBracket() method
  73. /**
  74. * Returns the offset of the bracket matching the one at the
  75. * specified offset of the buffer, or -1 if the bracket is
  76. * unmatched (or if the character is not a bracket).
  77. * @param buffer The buffer
  78. * @param line The line
  79. * @param offset The offset within that line
  80. * @param startLine The first line to scan. This is used to speed up
  81. * on-screen bracket matching because only visible lines need to be
  82. * scanned
  83. * @param endLine The last line to scan. This is used to speed up
  84. * on-screen bracket matching because only visible lines need to be
  85. * scanned
  86. * @since jEdit 2.7pre3
  87. */
  88. public static int findMatchingBracket(Buffer buffer, int line, int offset,
  89. int startLine, int endLine)
  90. {
  91. if(buffer.getLength() == 0)
  92. return -1;
  93. Segment lineText = new Segment();
  94. buffer.getLineText(line,lineText);
  95. char c = lineText.array[lineText.offset + offset];
  96. char cprime; // corresponding character
  97. boolean direction; // false - backwards, true - forwards
  98. switch(c)
  99. {
  100. case '(': cprime = ')'; direction = true; break;
  101. case ')': cprime = '('; direction = false; break;
  102. case '[': cprime = ']'; direction = true; break;
  103. case ']': cprime = '['; direction = false; break;
  104. case '{': cprime = '}'; direction = true; break;
  105. case '}': cprime = '{'; direction = false; break;
  106. default: return -1;
  107. }
  108. // 1 because we've already 'seen' the first bracket
  109. int count = 1;
  110. Buffer.TokenList tokenList = buffer.markTokens(line);
  111. // Get the syntax token at 'offset'
  112. // only tokens with the same type will be checked for
  113. // the corresponding bracket
  114. byte idOfBracket = getTokenAtOffset(tokenList,offset).id;
  115. boolean haveTokens = true;
  116. //{{{ Forward search
  117. if(direction)
  118. {
  119. offset++;
  120. for(;;)
  121. {
  122. for(int i = offset; i < lineText.count; i++)
  123. {
  124. char ch = lineText.array[lineText.offset + i];
  125. if(ch == c)
  126. {
  127. if(!haveTokens)
  128. {
  129. tokenList = buffer.markTokens(line);
  130. haveTokens = true;
  131. }
  132. if(getTokenAtOffset(tokenList,i).id == idOfBracket)
  133. count++;
  134. }
  135. else if(ch == cprime)
  136. {
  137. if(!haveTokens)
  138. {
  139. tokenList = buffer.markTokens(line);
  140. haveTokens = true;
  141. }
  142. if(getTokenAtOffset(tokenList,i).id == idOfBracket)
  143. {
  144. count--;
  145. if(count == 0)
  146. return buffer.getLineStartOffset(line) + i;
  147. }
  148. }
  149. }
  150. //{{{ Go on to next line
  151. line++;
  152. if(line > endLine)
  153. break;
  154. buffer.getLineText(line,lineText);
  155. offset = 0;
  156. haveTokens = false;
  157. //}}}
  158. }
  159. } //}}}
  160. //{{{ Backward search
  161. else
  162. {
  163. offset--;
  164. for(;;)
  165. {
  166. for(int i = offset; i >= 0; i--)
  167. {
  168. char ch = lineText.array[lineText.offset + i];
  169. if(ch == c)
  170. {
  171. if(!haveTokens)
  172. {
  173. tokenList = buffer.markTokens(line);
  174. haveTokens = true;
  175. }
  176. if(getTokenAtOffset(tokenList,i).id == idOfBracket)
  177. count++;
  178. }
  179. else if(ch == cprime)
  180. {
  181. if(!haveTokens)
  182. {
  183. tokenList = buffer.markTokens(line);
  184. haveTokens = true;
  185. }
  186. if(getTokenAtOffset(tokenList,i).id == idOfBracket)
  187. {
  188. count--;
  189. if(count == 0)
  190. return buffer.getLineStartOffset(line) + i;
  191. }
  192. }
  193. }
  194. //{{{ Go on to next line
  195. line--;
  196. if(line < startLine)
  197. break;
  198. buffer.getLineText(line,lineText);
  199. offset = lineText.count - 1;
  200. haveTokens = false;
  201. //}}}
  202. }
  203. } //}}}
  204. // Nothing found
  205. return -1;
  206. } //}}}
  207. //{{{ findWordStart() method
  208. /**
  209. * Locates the start of the word at the specified position.
  210. * @param line The text
  211. * @param pos The position
  212. * @param noWordSep Characters that are non-alphanumeric, but
  213. * should be treated as word characters anyway
  214. */
  215. public static int findWordStart(String line, int pos, String noWordSep)
  216. {
  217. return findWordStart(line,pos,noWordSep,false);
  218. } //}}}
  219. //{{{ findWordStart() method
  220. /**
  221. * Locates the start of the word at the specified position.
  222. * @param line The text
  223. * @param pos The position
  224. * @param noWordSep Characters that are non-alphanumeric, but
  225. * should be treated as word characters anyway
  226. * @param whiteSpace If true, any whitespace at the end of the
  227. * word is also included
  228. * @since jEdit 4.0pre3
  229. */
  230. public static int findWordStart(String line, int pos, String noWordSep,
  231. boolean whiteSpace)
  232. {
  233. char ch = line.charAt(pos);
  234. if(noWordSep == null)
  235. noWordSep = "";
  236. //{{{ the character under the cursor changes how we behave.
  237. int type;
  238. if(Character.isWhitespace(ch))
  239. type = WHITESPACE;
  240. else if(Character.isLetterOrDigit(ch)
  241. || noWordSep.indexOf(ch) != -1)
  242. type = WORD_CHAR;
  243. else
  244. type = SYMBOL;
  245. //}}}
  246. boolean seenWhiteSpace = false;
  247. int whiteSpaceEnd = 0;
  248. loop: for(int i = pos; i >= 0; i--)
  249. {
  250. ch = line.charAt(i);
  251. switch(type)
  252. {
  253. //{{{ Whitespace...
  254. case WHITESPACE:
  255. // only select other whitespace in this case
  256. if(Character.isWhitespace(ch))
  257. break;
  258. else
  259. return i + 1; //}}}
  260. //{{{ Word character...
  261. case WORD_CHAR:
  262. // if we see whitespace, set flag.
  263. if(Character.isWhitespace(ch) && whiteSpace)
  264. {
  265. if(!seenWhiteSpace)
  266. whiteSpaceEnd = i + 1;
  267. seenWhiteSpace = true;
  268. break;
  269. }
  270. else if(Character.isLetterOrDigit(ch) ||
  271. noWordSep.indexOf(ch) != -1)
  272. {
  273. // next word?
  274. if(seenWhiteSpace)
  275. return i + 1;
  276. else
  277. break;
  278. }
  279. else
  280. return i + 1; //}}}
  281. //{{{ Symbol...
  282. case SYMBOL:
  283. // if we see whitespace, set flag.
  284. if(Character.isWhitespace(ch))
  285. {
  286. if(whiteSpace)
  287. {
  288. if(!seenWhiteSpace)
  289. whiteSpaceEnd = i + 1;
  290. seenWhiteSpace = true;
  291. break;
  292. }
  293. else
  294. return i + 1;
  295. }
  296. else if(Character.isLetterOrDigit(ch) ||
  297. noWordSep.indexOf(ch) != -1)
  298. return i + 1;
  299. else
  300. {
  301. // next word?
  302. if(seenWhiteSpace)
  303. return i + 1;
  304. else
  305. break;
  306. } //}}}
  307. }
  308. }
  309. return whiteSpaceEnd;
  310. } //}}}
  311. //{{{ findWordEnd() method
  312. /**
  313. * Locates the end of the word at the specified position.
  314. * @param line The text
  315. * @param pos The position
  316. * @param noWordSep Characters that are non-alphanumeric, but
  317. * should be treated as word characters anyway
  318. */
  319. public static int findWordEnd(String line, int pos, String noWordSep)
  320. {
  321. return findWordEnd(line,pos,noWordSep,false);
  322. } //}}}
  323. //{{{ findWordEnd() method
  324. /**
  325. * Locates the end of the word at the specified position.
  326. * @param line The text
  327. * @param pos The position
  328. * @param noWordSep Characters that are non-alphanumeric, but
  329. * should be treated as word characters anyway
  330. * @param whiteSpace If true, any whitespace at the start of the
  331. * word is also included
  332. * @since jEdit 4.0pre3
  333. */
  334. public static int findWordEnd(String line, int pos, String noWordSep,
  335. boolean whiteSpace)
  336. {
  337. if(pos != 0)
  338. pos--;
  339. char ch = line.charAt(pos);
  340. if(noWordSep == null)
  341. noWordSep = "";
  342. //{{{ the character under the cursor changes how we behave.
  343. int type;
  344. if(Character.isWhitespace(ch))
  345. type = WHITESPACE;
  346. else if(Character.isLetterOrDigit(ch)
  347. || noWordSep.indexOf(ch) != -1)
  348. type = WORD_CHAR;
  349. else
  350. type = SYMBOL;
  351. //}}}
  352. boolean seenWhiteSpace = false;
  353. loop: for(int i = pos; i < line.length(); i++)
  354. {
  355. ch = line.charAt(i);
  356. switch(type)
  357. {
  358. //{{{ Whitespace...
  359. case WHITESPACE:
  360. // only select other whitespace in this case
  361. if(Character.isWhitespace(ch))
  362. break;
  363. else
  364. return i; //}}}
  365. //{{{ Word character...
  366. case WORD_CHAR:
  367. // if we see whitespace, set flag.
  368. if(Character.isWhitespace(ch) && whiteSpace)
  369. {
  370. seenWhiteSpace = true;
  371. break;
  372. }
  373. else if(Character.isLetterOrDigit(ch) ||
  374. noWordSep.indexOf(ch) != -1)
  375. {
  376. // next word?
  377. if(seenWhiteSpace)
  378. return i;
  379. else
  380. break;
  381. }
  382. else
  383. return i; //}}}
  384. //{{{ Symbol...
  385. case SYMBOL:
  386. // if we see whitespace, set flag.
  387. if(Character.isWhitespace(ch))
  388. {
  389. if(whiteSpace)
  390. {
  391. seenWhiteSpace = true;
  392. break;
  393. }
  394. else
  395. return i;
  396. }
  397. else if(Character.isLetterOrDigit(ch) ||
  398. noWordSep.indexOf(ch) != -1)
  399. return i;
  400. else
  401. {
  402. // next word?
  403. if(seenWhiteSpace)
  404. return i;
  405. else
  406. break;
  407. } //}}}
  408. }
  409. }
  410. return line.length();
  411. } //}}}
  412. //{{{ regionMatches() method
  413. /**
  414. * Checks if a subregion of a <code>Segment</code> is equal to a
  415. * character array.
  416. * @param ignoreCase True if case should be ignored, false otherwise
  417. * @param text The segment
  418. * @param offset The offset into the segment
  419. * @param match The character array to match
  420. * @since jEdit 2.7pre1
  421. */
  422. public static boolean regionMatches(boolean ignoreCase, Segment text,
  423. int offset, char[] match)
  424. {
  425. int length = offset + match.length;
  426. char[] textArray = text.array;
  427. if(length > text.offset + text.count)
  428. return false;
  429. for(int i = offset, j = 0; i < length; i++, j++)
  430. {
  431. char c1 = textArray[i];
  432. char c2 = match[j];
  433. if(ignoreCase)
  434. {
  435. c1 = Character.toUpperCase(c1);
  436. c2 = Character.toUpperCase(c2);
  437. }
  438. if(c1 != c2)
  439. return false;
  440. }
  441. return true;
  442. } //}}}
  443. //{{{ spacesToTabs() method
  444. /**
  445. * Converts consecutive spaces to tabs in the specified string.
  446. * @param in The string
  447. * @param tabSize The tab size
  448. */
  449. public static String spacesToTabs(String in, int tabSize)
  450. {
  451. StringBuffer buf = new StringBuffer();
  452. int width = 0;
  453. int whitespace = 0;
  454. for(int i = 0; i < in.length(); i++)
  455. {
  456. switch(in.charAt(i))
  457. {
  458. case ' ':
  459. whitespace++;
  460. width++;
  461. break;
  462. case '\t':
  463. int tab = tabSize - (width % tabSize);
  464. width += tab;
  465. whitespace += tab;
  466. break;
  467. case '\n':
  468. if(whitespace != 0)
  469. {
  470. buf.append(MiscUtilities
  471. .createWhiteSpace(whitespace,tabSize));
  472. }
  473. whitespace = 0;
  474. width = 0;
  475. buf.append('\n');
  476. break;
  477. default:
  478. if(whitespace != 0)
  479. {
  480. buf.append(MiscUtilities
  481. .createWhiteSpace(whitespace,tabSize));
  482. whitespace = 0;
  483. }
  484. buf.append(in.charAt(i));
  485. width++;
  486. break;
  487. }
  488. }
  489. if(whitespace != 0)
  490. {
  491. buf.append(MiscUtilities.createWhiteSpace(whitespace,tabSize));
  492. }
  493. return buf.toString();
  494. } //}}}
  495. //{{{ tabsToSpaces() method
  496. /**
  497. * Converts tabs to consecutive spaces in the specified string.
  498. * @param in The string
  499. * @param tabSize The tab size
  500. */
  501. public static String tabsToSpaces(String in, int tabSize)
  502. {
  503. StringBuffer buf = new StringBuffer();
  504. int width = 0;
  505. for(int i = 0; i < in.length(); i++)
  506. {
  507. switch(in.charAt(i))
  508. {
  509. case '\t':
  510. int count = tabSize - (width % tabSize);
  511. width += count;
  512. while(--count >= 0)
  513. buf.append(' ');
  514. break;
  515. case '\n':
  516. width = 0;
  517. buf.append(in.charAt(i));
  518. break;
  519. default:
  520. width++;
  521. buf.append(in.charAt(i));
  522. break;
  523. }
  524. }
  525. return buf.toString();
  526. } //}}}
  527. //{{{ format() method
  528. /**
  529. * Formats the specified text by merging and breaking lines to the
  530. * specified width.
  531. * @param text The text
  532. * @param maxLineLen The maximum line length
  533. */
  534. public static String format(String text, int maxLineLength)
  535. {
  536. StringBuffer buf = new StringBuffer();
  537. StringBuffer word = new StringBuffer();
  538. int lineLength = 0;
  539. boolean newline = true;
  540. boolean space = false;
  541. char[] chars = text.toCharArray();
  542. for(int i = 0; i < chars.length; i++)
  543. {
  544. char c = chars[i];
  545. switch(c)
  546. {
  547. case '\n':
  548. if(i == 0 || chars.length - i <= 2)
  549. {
  550. if(lineLength + word.length() >= maxLineLength)
  551. buf.append('\n');
  552. else if(space && word.length() != 0)
  553. buf.append(' ');
  554. buf.append(word);
  555. word.setLength(0);
  556. buf.append('\n');
  557. newline = true;
  558. space = false;
  559. break;
  560. }
  561. else if(newline)
  562. {
  563. if(lineLength + word.length() >= maxLineLength)
  564. buf.append('\n');
  565. else if(space && word.length() != 0)
  566. buf.append(' ');
  567. buf.append(word);
  568. word.setLength(0);
  569. buf.append("\n\n");
  570. newline = space = false;
  571. lineLength = 0;
  572. break;
  573. }
  574. else
  575. newline = true;
  576. case ' ':
  577. if(lineLength + word.length() >= maxLineLength)
  578. {
  579. buf.append('\n');
  580. lineLength = 0;
  581. newline = true;
  582. }
  583. else if(space && lineLength != 0 && word.length() != 0)
  584. {
  585. buf.append(' ');
  586. lineLength++;
  587. space = false;
  588. }
  589. else
  590. space = true;
  591. buf.append(word);
  592. lineLength += word.length();
  593. word.setLength(0);
  594. break;
  595. default:
  596. newline = false;
  597. // without this test, we would have spaces
  598. // at the start of lines
  599. if(lineLength != 0)
  600. space = true;
  601. word.append(c);
  602. break;
  603. }
  604. }
  605. if(lineLength + word.length() >= maxLineLength)
  606. buf.append('\n');
  607. else if(space && word.length() != 0)
  608. buf.append(' ');
  609. buf.append(word);
  610. return buf.toString();
  611. } //}}}
  612. //{{{ getStringCase() method
  613. public static final int MIXED = 0;
  614. public static final int LOWER_CASE = 1;
  615. public static final int UPPER_CASE = 2;
  616. public static final int TITLE_CASE = 3;
  617. /**
  618. * Returns if the specified string is all upper case, all lower case,
  619. * or title case (first letter upper case, rest lower case).
  620. * @param str The string
  621. * @since jEdit 4.0pre1
  622. */
  623. public static int getStringCase(String str)
  624. {
  625. if(str.length() == 0)
  626. return MIXED;
  627. int state = -1;
  628. char ch = str.charAt(0);
  629. if(Character.isLetter(ch))
  630. {
  631. if(Character.isUpperCase(ch))
  632. state = UPPER_CASE;
  633. else
  634. state = LOWER_CASE;
  635. }
  636. for(int i = 1; i < str.length(); i++)
  637. {
  638. ch = str.charAt(i);
  639. if(!Character.isLetter(ch))
  640. continue;
  641. switch(state)
  642. {
  643. case UPPER_CASE:
  644. if(Character.isLowerCase(ch))
  645. {
  646. if(i == 1)
  647. state = TITLE_CASE;
  648. else
  649. return MIXED;
  650. }
  651. break;
  652. case LOWER_CASE:
  653. case TITLE_CASE:
  654. if(Character.isUpperCase(ch))
  655. return MIXED;
  656. break;
  657. }
  658. }
  659. return state;
  660. } //}}}
  661. //{{{ toTitleCase() method
  662. /**
  663. * Converts the specified string to title case, by capitalizing the
  664. * first letter.
  665. * @param str The string
  666. * @since jEdit 4.0pre1
  667. */
  668. public static String toTitleCase(String str)
  669. {
  670. if(str.length() == 0)
  671. return str;
  672. else
  673. {
  674. return Character.toUpperCase(str.charAt(0))
  675. + str.substring(1).toLowerCase();
  676. }
  677. } //}}}
  678. //{{{ Private members
  679. private static final int WHITESPACE = 0;
  680. private static final int WORD_CHAR = 1;
  681. private static final int SYMBOL = 2;
  682. //}}}
  683. }