PageRenderTime 34ms CodeModel.GetById 2ms app.highlight 23ms RepoModel.GetById 1ms app.codeStats 0ms

/core/externals/update-engine/externals/google-toolbox-for-mac/Foundation/GTMRegex.h

http://macfuse.googlecode.com/
C++ Header | 379 lines | 72 code | 42 blank | 265 comment | 0 complexity | d78425c2802d8dcbbba0209c13579a63 MD5 | raw file
  1//
  2//  GTMRegex.h
  3//
  4//  Copyright 2007-2008 Google Inc.
  5//
  6//  Licensed under the Apache License, Version 2.0 (the "License"); you may not
  7//  use this file except in compliance with the License.  You may obtain a copy
  8//  of the License at
  9// 
 10//  http://www.apache.org/licenses/LICENSE-2.0
 11// 
 12//  Unless required by applicable law or agreed to in writing, software
 13//  distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
 14//  WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  See the
 15//  License for the specific language governing permissions and limitations under
 16//  the License.
 17//
 18
 19#import <Foundation/Foundation.h>
 20#import <regex.h>
 21#import "GTMDefines.h"
 22
 23/// Options for controlling the behavior of the matches
 24enum {
 25
 26  kGTMRegexOptionIgnoreCase            = 0x01,
 27    // Ignore case in matching, ie: 'a' matches 'a' or 'A'
 28
 29  kGTMRegexOptionSupressNewlineSupport = 0x02,
 30    // By default (without this option), regular expressions are implicitly
 31    // processed on a line by line basis, where "lines" are delimited by newline
 32    // characters. In this mode '.' (dot) does NOT match newline characters, and
 33    // '^' and '$' match at the beginning and end of the string as well as
 34    // around newline characters. This behavior matches the default behavior for
 35    // regular expressions in other languages including Perl and Python. For
 36    // example,
 37    //     foo.*bar
 38    // would match
 39    //     fooAAAbar
 40    // but would NOT match
 41    //     fooAAA\nbar
 42    // With the kGTMRegexOptionSupressNewlineSupport option, newlines are treated
 43    // just like any other character which means that '.' will match them. In
 44    // this mode, ^ and $ only match the beginning and end of the input string
 45    // and do NOT match around the newline characters. For example,
 46    //     foo.*bar
 47    // would match
 48    //     fooAAAbar
 49    // and would also match
 50    //     fooAAA\nbar
 51
 52};
 53typedef NSUInteger GTMRegexOptions;
 54
 55/// Global contants needed for errors from consuming patterns
 56
 57#undef _EXTERN
 58#undef _INITIALIZE_AS
 59#if GTMREGEX_DEFINE_GLOBALS
 60#define _EXTERN 
 61#define _INITIALIZE_AS(x) =x
 62#else
 63#define _EXTERN GTM_EXTERN
 64#define _INITIALIZE_AS(x)
 65#endif
 66
 67_EXTERN NSString* kGTMRegexErrorDomain _INITIALIZE_AS(@"com.google.mactoolbox.RegexDomain");
 68
 69enum {
 70  kGTMRegexPatternParseFailedError = -100
 71};
 72
 73// Keys for the userInfo from a kGTMRegexErrorDomain/kGTMRegexPatternParseFailedError error
 74_EXTERN NSString* kGTMRegexPatternErrorPattern _INITIALIZE_AS(@"pattern");
 75_EXTERN NSString* kGTMRegexPatternErrorErrorString _INITIALIZE_AS(@"patternError");
 76
 77/// Class for doing Extended Regex operations w/ libregex (see re_format(7)).
 78//
 79// NOTE: the docs for recomp/regexec make *no* claims about i18n.  All work
 80// within this class is done w/ UTF-8 so Unicode should move through it safely,
 81// however, the character classes described in re_format(7) might not really
 82// be unicode "savvy", so use them and this class w/ that in mind.
 83//
 84// Example usage:
 85//
 86//   NSArray *inputArrayOfStrings = ...
 87//   NSArray *matches = [NSMutableArray array];
 88//
 89//   GTMRegex *regex = [GTMRegex regexWithPattern:@"foo.*bar"];
 90//   for (NSString *curStr in inputArrayOfStrings) {
 91//     if ([regex matchesString:curStr])
 92//       [matches addObject:curStr];
 93//   }
 94//   ....
 95//
 96// -------------
 97//
 98//  If you need to include something dynamic in a pattern:
 99//
100//   NSString *pattern =
101//     [NSString stringWithFormat:@"^foo:%@bar",
102//       [GTMRegex escapedPatternForString:inputStr]];
103//   GTMRegex *regex = [GTMRegex regexWithPattern:pattern];
104//   ....
105//
106// -------------
107//
108//   GTMRegex *regex = [GTMRegex regexWithPattern:@"(foo+)(bar)"];
109//   NSString *highlighted =
110//     [regex stringByReplacingMatchesInString:inputString
111//                             withReplacement:@"<i>\\1</i><b>\\2</b>"];
112//   ....
113//
114@interface GTMRegex : NSObject {
115 @private
116  NSString *pattern_;
117  GTMRegexOptions options_;
118  regex_t regexData_;
119}
120
121/// Create a new, autoreleased object w/ the given regex pattern with the default options
122+ (id)regexWithPattern:(NSString *)pattern;
123
124/// Create a new, autoreleased object w/ the given regex pattern and specify the matching options
125+ (id)regexWithPattern:(NSString *)pattern options:(GTMRegexOptions)options;
126
127/// Create a new, autoreleased object w/ the given regex pattern, specify the matching options and receive any error consuming the pattern.
128+ (id)regexWithPattern:(NSString *)pattern
129               options:(GTMRegexOptions)options
130             withError:(NSError **)outErrorOrNULL;
131
132/// Returns a new, autoreleased copy of |str| w/ any pattern chars in it escaped so they have no meaning when used w/in a pattern.
133+ (NSString *)escapedPatternForString:(NSString *)str;
134
135/// Initialize a new object w/ the given regex pattern with the default options
136- (id)initWithPattern:(NSString *)pattern;
137
138/// Initialize a new object w/ the given regex pattern and specify the matching options
139- (id)initWithPattern:(NSString *)pattern options:(GTMRegexOptions)options;
140
141/// Initialize a new object w/ the given regex pattern, specify the matching options, and receive any error consuming the pattern.
142- (id)initWithPattern:(NSString *)pattern
143              options:(GTMRegexOptions)options
144            withError:(NSError **)outErrorOrNULL;
145
146/// Returns the number of sub patterns in the pattern
147//
148// Sub Patterns are basically the number of parenthesis blocks w/in the pattern.
149//   ie: The pattern "foo((bar)|(baz))" has 3 sub patterns.
150//
151- (NSUInteger)subPatternCount;
152
153/// Returns YES if the whole string |str| matches the pattern.
154- (BOOL)matchesString:(NSString *)str;
155
156/// Returns a new, autoreleased array of string that contain the subpattern matches for the string.
157//
158// If the whole string does not match the pattern, nil is returned.
159//
160// The api follows the conventions of most regex engines, and index 0 (zero) is
161// the full match, then the subpatterns are index 1, 2, ... going left to right.
162// If the pattern has optional subpatterns, then anything that didn't match
163// will have NSNull at that index.
164//   ie: The pattern "(fo(o+))((bar)|(baz))" has five subpatterns, and when
165//       applied to the string "foooooobaz" you'd get an array of:
166//              0: "foooooobaz"
167//              1: "foooooo"
168//              2: "ooooo"
169//              3: "baz"
170//              4: NSNull
171//              5: "baz"
172//
173- (NSArray *)subPatternsOfString:(NSString *)str;
174
175/// Returns the first match for this pattern in |str|.
176- (NSString *)firstSubStringMatchedInString:(NSString *)str;
177
178/// Returns YES if this pattern some substring of |str|.
179- (BOOL)matchesSubStringInString:(NSString *)str;
180
181/// Returns a new, autoreleased enumerator that will walk segments (GTMRegexStringSegment) of |str| based on the pattern.
182//
183// This will split the string into "segments" using the given pattern.  You get
184// both the matches and parts that are inbetween matches.  ie-the entire string
185// will eventually be returned.
186//
187// See GTMRegexStringSegment for more infomation and examples.
188//
189- (NSEnumerator *)segmentEnumeratorForString:(NSString *)str;
190
191/// Returns a new, autoreleased enumerator that will walk only the matching segments (GTMRegexStringSegment) of |str| based on the pattern.
192//
193// This extracts the "segments" of the string that used the pattern.  So it can
194// be used to collect all of the matching substrings from within a string.
195//
196// See GTMRegexStringSegment for more infomation and examples.
197//
198- (NSEnumerator *)matchSegmentEnumeratorForString:(NSString *)str;
199
200/// Returns a new, autoreleased string with all matches of the pattern in |str| replaced with |replacementPattern|.
201//
202// Replacement uses the SED substitution like syntax w/in |replacementPattern|
203// to allow the use of matches in the replacment.  The replacement pattern can
204// make use of any number of match references by using a backslash followed by
205// the match subexpression number (ie-"\2", "\0", ...), see subPatternsOfString:
206// for details on the subexpression indexing.
207//
208// REMINDER: you need to double-slash since the slash has meaning to the
209// compiler/preprocessor.  ie: "\\0"
210//
211- (NSString *)stringByReplacingMatchesInString:(NSString *)str
212                               withReplacement:(NSString *)replacementPattern;
213
214@end
215
216/// Class returned by the nextObject for the enumerators from GTMRegex
217//
218// The two enumerators on from GTMRegex return objects of this type.  This object
219// represents a "piece" of the string the enumerator is walking.  It's the apis
220// on this object allow you to figure out why each segment was returned and to
221// act on it.
222//
223// The easiest way to under stand this how the enumerators and this class works
224// is through and examples ::
225//    Pattern: "foo+"
226//     String: "fo bar foobar foofooo baz"
227// If you walk this w/ -segmentEnumeratorForString you'll get:
228//   # nextObjects Calls   -isMatch       -string
229//          1                 NO         "fo bar "
230//          2                 YES        "foo"
231//          3                 NO         "bar "
232//          4                 YES        "foo"
233//          5                 YES        "fooo"
234//          6                 NO         " baz"
235// And if you walk this w/ -matchSegmentEnumeratorForString you'll get:
236//   # nextObjects Calls   -isMatch       -string
237//          1                 YES        "foo"
238//          2                 YES        "foo"
239//          3                 YES        "fooo"
240// (see the comments on subPatternString for how it works)
241//
242// Example usage:
243//
244//   NSMutableString processedStr = [NSMutableString string];
245//   NSEnumerator *enumerator =
246//     [inputStr segmentEnumeratorForPattern:@"foo+((ba+r)|(ba+z))"];
247//   GTMRegexStringSegment *segment = nil;
248//   while ((segment = [enumerator nextObject]) != nil) {
249//     if ([segment isMatch]) {
250//       if ([segment subPatterString:2] != nil) {
251//         // matched: "(ba+r)"
252//         [processStr appendFormat:@"<b>%@</b>", [segment string]];
253//       } else {
254//         // matched: "(ba+z)"
255//         [processStr appendFormat:@"<i>%@</i>", [segment string]];
256//       }
257//     } else {
258//       [processStr appendString:[segment string]];
259//     }
260//   }
261//   // proccessedStr now has all the versions of foobar wrapped in bold tags,
262//   // and all the versons of foobaz in italics tags.
263//   //   ie: " fooobar foobaaz " ==> " <b>fooobar</b> <i>foobaaz</i> "
264//
265@interface GTMRegexStringSegment : NSObject {
266 @private
267  NSData *utf8StrBuf_;
268  regmatch_t *regMatches_;  // STRONG: ie-we call free
269  NSUInteger numRegMatches_;
270  BOOL isMatch_;
271}
272
273/// Returns YES if this segment from from a match of the regex, false if it was a segment between matches.
274//
275// Use -isMatch to see if the segment from from a match of the pattern or if the
276// segment is some text between matches.  (NOTE: isMatch is always YES for
277// matchSegmentEnumeratorForString)
278//
279- (BOOL)isMatch;
280
281/// Returns a new, autoreleased string w/ the full text segment from the original string.
282- (NSString *)string;
283
284/// Returns a new, autoreleased string w/ the |index| sub pattern from this segment of the original string.
285//
286// This api follows the conventions of most regex engines, and index 0 (zero) is
287// the full match, then the subpatterns are index 1, 2, ... going left to right.
288// If the pattern has optional subpatterns, then anything that didn't match
289// will return nil.
290//   ie: When using the pattern "(fo(o+))((bar)|(baz))" the following indexes
291//       fetch these values for a segment where -string is @"foooooobaz":
292//              0: "foooooobaz"
293//              1: "foooooo"
294//              2: "ooooo"
295//              3: "baz"
296//              4: nil
297//              5: "baz"
298//
299- (NSString *)subPatternString:(NSUInteger)index;
300
301@end
302
303/// Some helpers to streamline usage of GTMRegex
304//
305// Example usage:
306//
307//   if ([inputStr matchesPattern:@"foo.*bar"]) {
308//     // act on match
309//     ....
310//   }
311//
312// -------------
313//
314//   NSString *subStr = [inputStr firstSubStringMatchedByPattern:@"^foo:.*$"];
315//   if (subStr != nil) {
316//     // act on subStr
317//     ....
318//   }
319//
320// -------------
321//
322//   NSArray *headingList =
323//     [inputStr allSubstringsMatchedByPattern:@"^Heading:.*$"];
324//   // act on the list of headings
325//   ....
326//
327// -------------
328//
329//   NSString *highlightedString =
330//     [inputString stringByReplacingMatchesOfPattern:@"(foo+)(bar)"
331//                                    withReplacement:@"<i>\\1</i><b>\\2</b>"];
332//   ....
333//
334@interface NSString (GTMRegexAdditions)
335
336/// Returns YES if the full string matches regex |pattern| using the default match options
337- (BOOL)gtm_matchesPattern:(NSString *)pattern;
338
339/// Returns a new, autoreleased array of strings that contain the subpattern matches of |pattern| using the default match options
340//
341// See [GTMRegex subPatternsOfString:] for information about the returned array.
342//
343- (NSArray *)gtm_subPatternsOfPattern:(NSString *)pattern;
344
345/// Returns a new, autoreleased string w/ the first substring that matched the regex |pattern| using the default match options
346- (NSString *)gtm_firstSubStringMatchedByPattern:(NSString *)pattern;
347
348/// Returns YES if a substring string matches regex |pattern| using the default match options
349- (BOOL)gtm_subStringMatchesPattern:(NSString *)pattern;
350
351/// Returns a new, autoreleased array of substrings in the string that match the regex |pattern| using the default match options
352//
353// Note: if the string has no matches, you get an empty array.
354- (NSArray *)gtm_allSubstringsMatchedByPattern:(NSString *)pattern;
355
356/// Returns a new, autoreleased segment enumerator that will break the string using pattern w/ the default match options
357//
358// The enumerator returns GTMRegexStringSegment options, see that class for more
359// details and examples.
360//
361- (NSEnumerator *)gtm_segmentEnumeratorForPattern:(NSString *)pattern;
362
363/// Returns a new, autoreleased segment enumerator that will only return matching segments from the string using pattern w/ the default match options
364//
365// The enumerator returns GTMRegexStringSegment options, see that class for more
366// details and examples.
367//
368- (NSEnumerator *)gtm_matchSegmentEnumeratorForPattern:(NSString *)pattern;
369
370/// Returns a new, autoreleased string with all matches for pattern |pattern| are replaced w/ |replacementPattern|.  Uses the default match options.
371//
372// |replacemetPattern| has support for using any subExpression that matched,
373// see [GTMRegex stringByReplacingMatchesInString:withReplacement:] above
374// for details.
375//
376- (NSString *)gtm_stringByReplacingMatchesOfPattern:(NSString *)pattern
377                                    withReplacement:(NSString *)replacementPattern;
378
379@end