/interpreter/tags/at2dist041108/src/edu/vub/util/Regexp.java

http://ambienttalk.googlecode.com/ · Java · 98 lines · 39 code · 10 blank · 49 comment · 2 complexity · 3338dfb54c3909367d0ed64e6ad44358 MD5 · raw file

  1. /**
  2. * AmbientTalk/2 Project
  3. * Regexp.java created on 09-apr-2008 at 09:50:03
  4. * (c) Programming Technology Lab, 2006 - 2007
  5. * Authors: Tom Van Cutsem & Stijn Mostinckx
  6. *
  7. * Permission is hereby granted, free of charge, to any person
  8. * obtaining a copy of this software and associated documentation
  9. * files (the "Software"), to deal in the Software without
  10. * restriction, including without limitation the rights to use,
  11. * copy, modify, merge, publish, distribute, sublicense, and/or
  12. * sell copies of the Software, and to permit persons to whom the
  13. * Software is furnished to do so, subject to the following
  14. * conditions:
  15. *
  16. * The above copyright notice and this permission notice shall be
  17. * included in all copies or substantial portions of the Software.
  18. *
  19. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  20. * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
  21. * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
  22. * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
  23. * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
  24. * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  25. * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
  26. * OTHER DEALINGS IN THE SOFTWARE.
  27. */
  28. package edu.vub.util;
  29. import edu.vub.at.exceptions.InterpreterException;
  30. import org.apache.regexp.RE;
  31. import org.apache.regexp.RECompiler;
  32. import org.apache.regexp.REProgram;
  33. /**
  34. * A thin wrapper around the Apache Regexp library. Primarily provides support for
  35. * finding or replacing all occurences of a regular expression in a string.
  36. *
  37. * This is the only class that should explicitly manage the compilation of regexp patterns.
  38. * Instances of the class {@link RE} should never be shared between threads. However,
  39. * {@link REProgram} instances can be shared among threads. Compilation of an REProgram
  40. * into an RE however, must be synchronised between threads.
  41. *
  42. * @author tvcutsem
  43. */
  44. public class Regexp {
  45. private static final RECompiler _COMPILER_ = new RECompiler();
  46. // serve as "closures" passed to the findAll and replaceAll methods
  47. public interface StringCallable {
  48. public String call(String input) throws InterpreterException;
  49. }
  50. public interface StringRunnable {
  51. public void run(String input) throws InterpreterException;
  52. }
  53. public static REProgram compile(String regex) {
  54. // synchronize access to the compiler because it is not multiple-thread safe
  55. synchronized (_COMPILER_) {
  56. return _COMPILER_.compile(regex);
  57. }
  58. }
  59. public static void findAll(RE regex, String input, StringRunnable consumer) throws InterpreterException {
  60. while (regex.match(input)) {
  61. consumer.run(regex.getParen(0));
  62. input = input.substring(regex.getParenEnd(0), input.length());
  63. }
  64. }
  65. public static String replaceAll(RE regex, String input, StringCallable replacer) throws InterpreterException {
  66. StringBuffer out = new StringBuffer(input);
  67. int ofs = 0;
  68. while (regex.match(input)) {
  69. int start = regex.getParenStart(0);
  70. String matched = regex.getParen(0);
  71. int end = regex.getParenEnd(0);
  72. String substitute = replacer.call(matched);
  73. // advance input string to the end of the current match
  74. // e.g. if input = "xxabyab" is matched against "(a*)b", then
  75. // input is set to "yab" after the first match
  76. input = input.substring(end, input.length());
  77. // in the output string, replace the matching part (start - end) by the substitute
  78. // i.e. if out = "xxabyab" and every matching string is replaced by * then out
  79. // is changed to "xx*yab"
  80. out.replace(ofs + start, ofs + end, substitute);
  81. // move offset in which to write to out to the end of the newly substituted part
  82. // in the example above, ofs is set to 0 + 2 + 1 = 3 such that the next replace
  83. // will occur relative to the string "yab"
  84. ofs = ofs + start + substitute.length();
  85. }
  86. return out.toString();
  87. }
  88. }