PageRenderTime 46ms CodeModel.GetById 16ms RepoModel.GetById 0ms app.codeStats 0ms

/jEdit/tags/jedit-4-2-pre14/org/gjt/sp/jedit/TextUtilities.java

#
Java | 713 lines | 467 code | 61 blank | 185 comment | 120 complexity | f064168abfc395aca67ac49a33f1e712 MD5 | raw file
Possible License(s): BSD-3-Clause, AGPL-1.0, Apache-2.0, LGPL-2.0, LGPL-3.0, GPL-2.0, CC-BY-SA-3.0, LGPL-2.1, GPL-3.0, MPL-2.0-no-copyleft-exception, IPL-1.0
  1. /*
  2. * TextUtilities.java - Various text functions
  3. * Copyright (C) 1998, 2003 Slava Pestov
  4. * :tabSize=8:indentSize=8:noTabs=false:
  5. * :folding=explicit:collapseFolds=1:
  6. *
  7. * This program is free software; you can redistribute it and/or
  8. * modify it under the terms of the GNU General Public License
  9. * as published by the Free Software Foundation; either version 2
  10. * of the License, or any later version.
  11. *
  12. * This program is distributed in the hope that it will be useful,
  13. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  15. * GNU General Public License for more details.
  16. *
  17. * You should have received a copy of the GNU General Public License
  18. * along with this program; if not, write to the Free Software
  19. * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
  20. */
  21. package org.gjt.sp.jedit;
  22. //{{{ Imports
  23. import java.util.*;
  24. import javax.swing.text.Segment;
  25. import org.gjt.sp.jedit.syntax.*;
  26. //}}}
  27. /**
  28. * Contains several text manipulation methods.
  29. *
  30. * <ul>
  31. * <li>Bracket matching
  32. * <li>Word start and end offset calculation
  33. * <li>String comparison
  34. * <li>Converting tabs to spaces and vice versa
  35. * <li>Wrapping text
  36. * <li>String case conversion
  37. * </ul>
  38. *
  39. * @author Slava Pestov
  40. * @version $Id: TextUtilities.java 5004 2004-03-28 00:07:27Z spestov $
  41. */
  42. public class TextUtilities
  43. {
  44. // to avoid slowdown with large files; only scan 10000 lines either way
  45. public static final int BRACKET_MATCH_LIMIT = 10000;
  46. //{{{ getTokenAtOffset() method
  47. /**
  48. * Returns the token that contains the specified offset.
  49. * @param tokens The token list
  50. * @param offset The offset
  51. * @since jEdit 4.0pre3
  52. */
  53. public static Token getTokenAtOffset(Token tokens, int offset)
  54. {
  55. if(offset == 0 && tokens.id == Token.END)
  56. return tokens;
  57. for(;;)
  58. {
  59. if(tokens.id == Token.END)
  60. throw new ArrayIndexOutOfBoundsException("offset > line length");
  61. if(tokens.offset + tokens.length > offset)
  62. return tokens;
  63. else
  64. tokens = tokens.next;
  65. }
  66. } //}}}
  67. //{{{ findMatchingBracket() method
  68. /**
  69. * Returns the offset of the bracket matching the one at the
  70. * specified offset of the buffer, or -1 if the bracket is
  71. * unmatched (or if the character is not a bracket).
  72. * @param buffer The buffer
  73. * @param line The line
  74. * @param offset The offset within that line
  75. * @since jEdit 2.6pre1
  76. */
  77. public static int findMatchingBracket(Buffer buffer, int line, int offset)
  78. {
  79. if(offset < 0 || offset >= buffer.getLineLength(line))
  80. {
  81. throw new ArrayIndexOutOfBoundsException(offset + ":"
  82. + buffer.getLineLength(line));
  83. }
  84. Segment lineText = new Segment();
  85. buffer.getLineText(line,lineText);
  86. char c = lineText.array[lineText.offset + offset];
  87. char cprime; // corresponding character
  88. boolean direction; // false - backwards, true - forwards
  89. switch(c)
  90. {
  91. case '(': cprime = ')'; direction = true; break;
  92. case ')': cprime = '('; direction = false; break;
  93. case '[': cprime = ']'; direction = true; break;
  94. case ']': cprime = '['; direction = false; break;
  95. case '{': cprime = '}'; direction = true; break;
  96. case '}': cprime = '{'; direction = false; break;
  97. default: return -1;
  98. }
  99. // 1 because we've already 'seen' the first bracket
  100. int count = 1;
  101. DefaultTokenHandler tokenHandler = new DefaultTokenHandler();
  102. buffer.markTokens(line,tokenHandler);
  103. // Get the syntax token at 'offset'
  104. // only tokens with the same type will be checked for
  105. // the corresponding bracket
  106. byte idOfBracket = getTokenAtOffset(tokenHandler.getTokens(),offset).id;
  107. boolean haveTokens = true;
  108. int startLine = line;
  109. //{{{ Forward search
  110. if(direction)
  111. {
  112. offset++;
  113. for(;;)
  114. {
  115. for(int i = offset; i < lineText.count; i++)
  116. {
  117. char ch = lineText.array[lineText.offset + i];
  118. if(ch == c)
  119. {
  120. if(!haveTokens)
  121. {
  122. tokenHandler.init();
  123. buffer.markTokens(line,tokenHandler);
  124. haveTokens = true;
  125. }
  126. if(getTokenAtOffset(tokenHandler.getTokens(),i).id == idOfBracket)
  127. count++;
  128. }
  129. else if(ch == cprime)
  130. {
  131. if(!haveTokens)
  132. {
  133. tokenHandler.init();
  134. buffer.markTokens(line,tokenHandler);
  135. haveTokens = true;
  136. }
  137. if(getTokenAtOffset(tokenHandler.getTokens(),i).id == idOfBracket)
  138. {
  139. count--;
  140. if(count == 0)
  141. return buffer.getLineStartOffset(line) + i;
  142. }
  143. }
  144. }
  145. //{{{ Go on to next line
  146. line++;
  147. if(line >= buffer.getLineCount() || (line - startLine) > BRACKET_MATCH_LIMIT)
  148. break;
  149. buffer.getLineText(line,lineText);
  150. offset = 0;
  151. haveTokens = false;
  152. //}}}
  153. }
  154. } //}}}
  155. //{{{ Backward search
  156. else
  157. {
  158. offset--;
  159. for(;;)
  160. {
  161. for(int i = offset; i >= 0; i--)
  162. {
  163. char ch = lineText.array[lineText.offset + i];
  164. if(ch == c)
  165. {
  166. if(!haveTokens)
  167. {
  168. tokenHandler.init();
  169. buffer.markTokens(line,tokenHandler);
  170. haveTokens = true;
  171. }
  172. if(getTokenAtOffset(tokenHandler.getTokens(),i).id == idOfBracket)
  173. count++;
  174. }
  175. else if(ch == cprime)
  176. {
  177. if(!haveTokens)
  178. {
  179. tokenHandler.init();
  180. buffer.markTokens(line,tokenHandler);
  181. haveTokens = true;
  182. }
  183. if(getTokenAtOffset(tokenHandler.getTokens(),i).id == idOfBracket)
  184. {
  185. count--;
  186. if(count == 0)
  187. return buffer.getLineStartOffset(line) + i;
  188. }
  189. }
  190. }
  191. //{{{ Go on to previous line
  192. line--;
  193. if(line < 0 || (startLine - line) > BRACKET_MATCH_LIMIT)
  194. break;
  195. buffer.getLineText(line,lineText);
  196. offset = lineText.count - 1;
  197. haveTokens = false;
  198. //}}}
  199. }
  200. } //}}}
  201. // Nothing found
  202. return -1;
  203. } //}}}
  204. //{{{ findWordStart() method
  205. /**
  206. * Locates the start of the word at the specified position.
  207. * @param line The text
  208. * @param pos The position
  209. * @param noWordSep Characters that are non-alphanumeric, but
  210. * should be treated as word characters anyway
  211. */
  212. public static int findWordStart(String line, int pos, String noWordSep)
  213. {
  214. return findWordStart(line, pos, noWordSep, true, false);
  215. } //}}}
  216. //{{{ findWordStart() method
  217. /**
  218. * Locates the start of the word at the specified position.
  219. * @param line The text
  220. * @param pos The position
  221. * @param noWordSep Characters that are non-alphanumeric, but
  222. * should be treated as word characters anyway
  223. * @param joinNonWordChars Treat consecutive non-alphanumeric
  224. * characters as one word
  225. * @since jEdit 4.2pre5
  226. */
  227. public static int findWordStart(String line, int pos, String noWordSep,
  228. boolean joinNonWordChars)
  229. {
  230. return findWordStart(line,pos,noWordSep,joinNonWordChars,false);
  231. } //}}}
  232. //{{{ findWordStart() method
  233. /**
  234. * Locates the start of the word at the specified position.
  235. * @param line The text
  236. * @param pos The position
  237. * @param noWordSep Characters that are non-alphanumeric, but
  238. * should be treated as word characters anyway
  239. * @param joinNonWordChars Treat consecutive non-alphanumeric
  240. * characters as one word
  241. * @param eatWhitespace Include whitespace at start of word
  242. * @since jEdit 4.1pre2
  243. */
  244. public static int findWordStart(String line, int pos, String noWordSep,
  245. boolean joinNonWordChars, boolean eatWhitespace)
  246. {
  247. char ch = line.charAt(pos);
  248. if(noWordSep == null)
  249. noWordSep = "";
  250. //{{{ the character under the cursor changes how we behave.
  251. int type;
  252. if(Character.isWhitespace(ch))
  253. type = WHITESPACE;
  254. else if(Character.isLetterOrDigit(ch)
  255. || noWordSep.indexOf(ch) != -1)
  256. type = WORD_CHAR;
  257. else
  258. type = SYMBOL;
  259. //}}}
  260. loop: for(int i = pos; i >= 0; i--)
  261. {
  262. ch = line.charAt(i);
  263. switch(type)
  264. {
  265. //{{{ Whitespace...
  266. case WHITESPACE:
  267. // only select other whitespace in this case
  268. if(Character.isWhitespace(ch))
  269. break;
  270. // word char or symbol; stop
  271. else
  272. return i + 1; //}}}
  273. //{{{ Word character...
  274. case WORD_CHAR:
  275. // word char; keep going
  276. if(Character.isLetterOrDigit(ch) ||
  277. noWordSep.indexOf(ch) != -1)
  278. {
  279. break;
  280. }
  281. // whitespace; include in word if eating
  282. else if(Character.isWhitespace(ch)
  283. && eatWhitespace)
  284. {
  285. type = WHITESPACE;
  286. break;
  287. }
  288. else
  289. return i + 1; //}}}
  290. //{{{ Symbol...
  291. case SYMBOL:
  292. if(!joinNonWordChars && pos != i)
  293. return i + 1;
  294. // whitespace; include in word if eating
  295. if(Character.isWhitespace(ch))
  296. {
  297. if(eatWhitespace)
  298. {
  299. type = WHITESPACE;
  300. break;
  301. }
  302. else
  303. return i + 1;
  304. }
  305. else if(Character.isLetterOrDigit(ch) ||
  306. noWordSep.indexOf(ch) != -1)
  307. {
  308. return i + 1;
  309. }
  310. else
  311. {
  312. break;
  313. } //}}}
  314. }
  315. }
  316. return 0;
  317. } //}}}
  318. //{{{ findWordEnd() method
  319. /**
  320. * Locates the end of the word at the specified position.
  321. * @param line The text
  322. * @param pos The position
  323. * @param noWordSep Characters that are non-alphanumeric, but
  324. * should be treated as word characters anyway
  325. */
  326. public static int findWordEnd(String line, int pos, String noWordSep)
  327. {
  328. return findWordEnd(line, pos, noWordSep, true);
  329. } //}}}
  330. //{{{ findWordEnd() method
  331. /**
  332. * Locates the end of the word at the specified position.
  333. * @param line The text
  334. * @param pos The position
  335. * @param noWordSep Characters that are non-alphanumeric, but
  336. * should be treated as word characters anyway
  337. * @param joinNonWordChars Treat consecutive non-alphanumeric
  338. * characters as one word
  339. * @since jEdit 4.1pre2
  340. */
  341. public static int findWordEnd(String line, int pos, String noWordSep,
  342. boolean joinNonWordChars)
  343. {
  344. return findWordEnd(line,pos,noWordSep,joinNonWordChars,false);
  345. } //}}}
  346. //{{{ findWordEnd() method
  347. /**
  348. * Locates the end of the word at the specified position.
  349. * @param line The text
  350. * @param pos The position
  351. * @param noWordSep Characters that are non-alphanumeric, but
  352. * should be treated as word characters anyway
  353. * @param joinNonWordChars Treat consecutive non-alphanumeric
  354. * characters as one word
  355. * @param eatWhitespace Include whitespace at end of word
  356. * @since jEdit 4.2pre5
  357. */
  358. public static int findWordEnd(String line, int pos, String noWordSep,
  359. boolean joinNonWordChars, boolean eatWhitespace)
  360. {
  361. if(pos != 0)
  362. pos--;
  363. char ch = line.charAt(pos);
  364. if(noWordSep == null)
  365. noWordSep = "";
  366. //{{{ the character under the cursor changes how we behave.
  367. int type;
  368. if(Character.isWhitespace(ch))
  369. type = WHITESPACE;
  370. else if(Character.isLetterOrDigit(ch)
  371. || noWordSep.indexOf(ch) != -1)
  372. type = WORD_CHAR;
  373. else
  374. type = SYMBOL;
  375. //}}}
  376. loop: for(int i = pos; i < line.length(); i++)
  377. {
  378. ch = line.charAt(i);
  379. switch(type)
  380. {
  381. //{{{ Whitespace...
  382. case WHITESPACE:
  383. // only select other whitespace in this case
  384. if(Character.isWhitespace(ch))
  385. break;
  386. else
  387. return i; //}}}
  388. //{{{ Word character...
  389. case WORD_CHAR:
  390. if(Character.isLetterOrDigit(ch) ||
  391. noWordSep.indexOf(ch) != -1)
  392. {
  393. break;
  394. }
  395. // whitespace; include in word if eating
  396. else if(Character.isWhitespace(ch)
  397. && eatWhitespace)
  398. {
  399. type = WHITESPACE;
  400. break;
  401. }
  402. else
  403. return i; //}}}
  404. //{{{ Symbol...
  405. case SYMBOL:
  406. if(!joinNonWordChars && i != pos)
  407. return i;
  408. // if we see whitespace, set flag.
  409. if(Character.isWhitespace(ch))
  410. {
  411. if(eatWhitespace)
  412. {
  413. type = WHITESPACE;
  414. break;
  415. }
  416. else
  417. return i;
  418. }
  419. else if(Character.isLetterOrDigit(ch) ||
  420. noWordSep.indexOf(ch) != -1)
  421. {
  422. return i;
  423. }
  424. else
  425. {
  426. break;
  427. } //}}}
  428. }
  429. }
  430. return line.length();
  431. } //}}}
  432. //{{{ spacesToTabs() method
  433. /**
  434. * Converts consecutive spaces to tabs in the specified string.
  435. * @param in The string
  436. * @param tabSize The tab size
  437. */
  438. public static String spacesToTabs(String in, int tabSize)
  439. {
  440. StringBuffer buf = new StringBuffer();
  441. int width = 0;
  442. int whitespace = 0;
  443. for(int i = 0; i < in.length(); i++)
  444. {
  445. switch(in.charAt(i))
  446. {
  447. case ' ':
  448. whitespace++;
  449. width++;
  450. break;
  451. case '\t':
  452. int tab = tabSize - (width % tabSize);
  453. width += tab;
  454. whitespace += tab;
  455. break;
  456. case '\n':
  457. if(whitespace != 0)
  458. {
  459. buf.append(MiscUtilities
  460. .createWhiteSpace(whitespace,tabSize,
  461. width - whitespace));
  462. }
  463. whitespace = 0;
  464. width = 0;
  465. buf.append('\n');
  466. break;
  467. default:
  468. if(whitespace != 0)
  469. {
  470. buf.append(MiscUtilities
  471. .createWhiteSpace(whitespace,tabSize,
  472. width - whitespace));
  473. whitespace = 0;
  474. }
  475. buf.append(in.charAt(i));
  476. width++;
  477. break;
  478. }
  479. }
  480. if(whitespace != 0)
  481. {
  482. buf.append(MiscUtilities.createWhiteSpace(whitespace,tabSize,
  483. width - whitespace));
  484. }
  485. return buf.toString();
  486. } //}}}
  487. //{{{ tabsToSpaces() method
  488. /**
  489. * Converts tabs to consecutive spaces in the specified string.
  490. * @param in The string
  491. * @param tabSize The tab size
  492. */
  493. public static String tabsToSpaces(String in, int tabSize)
  494. {
  495. StringBuffer buf = new StringBuffer();
  496. int width = 0;
  497. for(int i = 0; i < in.length(); i++)
  498. {
  499. switch(in.charAt(i))
  500. {
  501. case '\t':
  502. int count = tabSize - (width % tabSize);
  503. width += count;
  504. while(--count >= 0)
  505. buf.append(' ');
  506. break;
  507. case '\n':
  508. width = 0;
  509. buf.append(in.charAt(i));
  510. break;
  511. default:
  512. width++;
  513. buf.append(in.charAt(i));
  514. break;
  515. }
  516. }
  517. return buf.toString();
  518. } //}}}
  519. //{{{ format() method
  520. /**
  521. * Formats the specified text by merging and breaking lines to the
  522. * specified width.
  523. * @param text The text
  524. * @param maxLineLength The maximum line length
  525. * @param tabSize The tab size
  526. */
  527. public static String format(String text, int maxLineLength, int tabSize)
  528. {
  529. StringBuffer buf = new StringBuffer();
  530. int index = 0;
  531. for(;;)
  532. {
  533. int newIndex = text.indexOf("\n\n",index);
  534. if(newIndex == -1)
  535. break;
  536. formatParagraph(text.substring(index,newIndex),
  537. maxLineLength,tabSize,buf);
  538. buf.append("\n\n");
  539. index = newIndex + 2;
  540. }
  541. if(index != text.length())
  542. {
  543. formatParagraph(text.substring(index),
  544. maxLineLength,tabSize,buf);
  545. }
  546. return buf.toString();
  547. } //}}}
  548. //{{{ getStringCase() method
  549. public static final int MIXED = 0;
  550. public static final int LOWER_CASE = 1;
  551. public static final int UPPER_CASE = 2;
  552. public static final int TITLE_CASE = 3;
  553. /**
  554. * Returns if the specified string is all upper case, all lower case,
  555. * or title case (first letter upper case, rest lower case).
  556. * @param str The string
  557. * @since jEdit 4.0pre1
  558. */
  559. public static int getStringCase(String str)
  560. {
  561. if(str.length() == 0)
  562. return MIXED;
  563. int state = -1;
  564. char ch = str.charAt(0);
  565. if(Character.isLetter(ch))
  566. {
  567. if(Character.isUpperCase(ch))
  568. state = UPPER_CASE;
  569. else
  570. state = LOWER_CASE;
  571. }
  572. for(int i = 1; i < str.length(); i++)
  573. {
  574. ch = str.charAt(i);
  575. if(!Character.isLetter(ch))
  576. continue;
  577. switch(state)
  578. {
  579. case UPPER_CASE:
  580. if(Character.isLowerCase(ch))
  581. {
  582. if(i == 1)
  583. state = TITLE_CASE;
  584. else
  585. return MIXED;
  586. }
  587. break;
  588. case LOWER_CASE:
  589. case TITLE_CASE:
  590. if(Character.isUpperCase(ch))
  591. return MIXED;
  592. break;
  593. }
  594. }
  595. return state;
  596. } //}}}
  597. //{{{ toTitleCase() method
  598. /**
  599. * Converts the specified string to title case, by capitalizing the
  600. * first letter.
  601. * @param str The string
  602. * @since jEdit 4.0pre1
  603. */
  604. public static String toTitleCase(String str)
  605. {
  606. if(str.length() == 0)
  607. return str;
  608. else
  609. {
  610. return Character.toUpperCase(str.charAt(0))
  611. + str.substring(1).toLowerCase();
  612. }
  613. } //}}}
  614. //{{{ Private members
  615. private static final int WHITESPACE = 0;
  616. private static final int WORD_CHAR = 1;
  617. private static final int SYMBOL = 2;
  618. //{{{ formatParagraph() method
  619. private static void formatParagraph(String text, int maxLineLength,
  620. int tabSize, StringBuffer buf)
  621. {
  622. // align everything to paragraph's leading indent
  623. int leadingWhitespaceCount = MiscUtilities.getLeadingWhiteSpace(text);
  624. String leadingWhitespace = text.substring(0,leadingWhitespaceCount);
  625. int leadingWhitespaceWidth = MiscUtilities.getLeadingWhiteSpaceWidth(text,tabSize);
  626. buf.append(leadingWhitespace);
  627. int lineLength = leadingWhitespaceWidth;
  628. StringTokenizer st = new StringTokenizer(text);
  629. while(st.hasMoreTokens())
  630. {
  631. String word = st.nextToken();
  632. if(lineLength == leadingWhitespaceWidth)
  633. {
  634. // do nothing
  635. }
  636. else if(lineLength + word.length() + 1 > maxLineLength)
  637. {
  638. buf.append('\n');
  639. buf.append(leadingWhitespace);
  640. lineLength = leadingWhitespaceWidth;
  641. }
  642. else
  643. {
  644. buf.append(' ');
  645. lineLength++;
  646. }
  647. buf.append(word);
  648. lineLength += word.length();
  649. }
  650. } //}}}
  651. //}}}
  652. }