/interpreter/tags/at2dist220411/src/edu/vub/util/Regexp.java
Java | 98 lines | 39 code | 10 blank | 49 comment | 2 complexity | 3338dfb54c3909367d0ed64e6ad44358 MD5 | raw file
1/** 2 * AmbientTalk/2 Project 3 * Regexp.java created on 09-apr-2008 at 09:50:03 4 * (c) Programming Technology Lab, 2006 - 2007 5 * Authors: Tom Van Cutsem & Stijn Mostinckx 6 * 7 * Permission is hereby granted, free of charge, to any person 8 * obtaining a copy of this software and associated documentation 9 * files (the "Software"), to deal in the Software without 10 * restriction, including without limitation the rights to use, 11 * copy, modify, merge, publish, distribute, sublicense, and/or 12 * sell copies of the Software, and to permit persons to whom the 13 * Software is furnished to do so, subject to the following 14 * conditions: 15 * 16 * The above copyright notice and this permission notice shall be 17 * included in all copies or substantial portions of the Software. 18 * 19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 20 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES 21 * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 22 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT 23 * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, 24 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 25 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 26 * OTHER DEALINGS IN THE SOFTWARE. 27 */ 28package edu.vub.util; 29 30import edu.vub.at.exceptions.InterpreterException; 31 32import org.apache.regexp.RE; 33import org.apache.regexp.RECompiler; 34import org.apache.regexp.REProgram; 35 36/** 37 * A thin wrapper around the Apache Regexp library. Primarily provides support for 38 * finding or replacing all occurences of a regular expression in a string. 39 * 40 * This is the only class that should explicitly manage the compilation of regexp patterns. 41 * Instances of the class {@link RE} should never be shared between threads. However, 42 * {@link REProgram} instances can be shared among threads. Compilation of an REProgram 43 * into an RE however, must be synchronised between threads. 44 * 45 * @author tvcutsem 46 */ 47public class Regexp { 48 49 private static final RECompiler _COMPILER_ = new RECompiler(); 50 51 // serve as "closures" passed to the findAll and replaceAll methods 52 53 public interface StringCallable { 54 public String call(String input) throws InterpreterException; 55 } 56 public interface StringRunnable { 57 public void run(String input) throws InterpreterException; 58 } 59 60 public static REProgram compile(String regex) { 61 // synchronize access to the compiler because it is not multiple-thread safe 62 synchronized (_COMPILER_) { 63 return _COMPILER_.compile(regex); 64 } 65 } 66 67 public static void findAll(RE regex, String input, StringRunnable consumer) throws InterpreterException { 68 while (regex.match(input)) { 69 consumer.run(regex.getParen(0)); 70 input = input.substring(regex.getParenEnd(0), input.length()); 71 } 72 } 73 74 public static String replaceAll(RE regex, String input, StringCallable replacer) throws InterpreterException { 75 StringBuffer out = new StringBuffer(input); 76 int ofs = 0; 77 while (regex.match(input)) { 78 int start = regex.getParenStart(0); 79 String matched = regex.getParen(0); 80 int end = regex.getParenEnd(0); 81 String substitute = replacer.call(matched); 82 // advance input string to the end of the current match 83 // e.g. if input = "xxabyab" is matched against "(a*)b", then 84 // input is set to "yab" after the first match 85 input = input.substring(end, input.length()); 86 // in the output string, replace the matching part (start - end) by the substitute 87 // i.e. if out = "xxabyab" and every matching string is replaced by * then out 88 // is changed to "xx*yab" 89 out.replace(ofs + start, ofs + end, substitute); 90 // move offset in which to write to out to the end of the newly substituted part 91 // in the example above, ofs is set to 0 + 2 + 1 = 3 such that the next replace 92 // will occur relative to the string "yab" 93 ofs = ofs + start + substitute.length(); 94 } 95 return out.toString(); 96 } 97 98}