/core/externals/update-engine/externals/google-toolbox-for-mac/Foundation/GTMRegex.h
http://macfuse.googlecode.com/ · C++ Header · 379 lines · 72 code · 42 blank · 265 comment · 0 complexity · d78425c2802d8dcbbba0209c13579a63 MD5 · raw file
- //
- // GTMRegex.h
- //
- // Copyright 2007-2008 Google Inc.
- //
- // Licensed under the Apache License, Version 2.0 (the "License"); you may not
- // use this file except in compliance with the License. You may obtain a copy
- // of the License at
- //
- // http://www.apache.org/licenses/LICENSE-2.0
- //
- // Unless required by applicable law or agreed to in writing, software
- // distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
- // WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
- // License for the specific language governing permissions and limitations under
- // the License.
- //
- #import <Foundation/Foundation.h>
- #import <regex.h>
- #import "GTMDefines.h"
- /// Options for controlling the behavior of the matches
- enum {
- kGTMRegexOptionIgnoreCase = 0x01,
- // Ignore case in matching, ie: 'a' matches 'a' or 'A'
- kGTMRegexOptionSupressNewlineSupport = 0x02,
- // By default (without this option), regular expressions are implicitly
- // processed on a line by line basis, where "lines" are delimited by newline
- // characters. In this mode '.' (dot) does NOT match newline characters, and
- // '^' and '$' match at the beginning and end of the string as well as
- // around newline characters. This behavior matches the default behavior for
- // regular expressions in other languages including Perl and Python. For
- // example,
- // foo.*bar
- // would match
- // fooAAAbar
- // but would NOT match
- // fooAAA\nbar
- // With the kGTMRegexOptionSupressNewlineSupport option, newlines are treated
- // just like any other character which means that '.' will match them. In
- // this mode, ^ and $ only match the beginning and end of the input string
- // and do NOT match around the newline characters. For example,
- // foo.*bar
- // would match
- // fooAAAbar
- // and would also match
- // fooAAA\nbar
- };
- typedef NSUInteger GTMRegexOptions;
- /// Global contants needed for errors from consuming patterns
- #undef _EXTERN
- #undef _INITIALIZE_AS
- #if GTMREGEX_DEFINE_GLOBALS
- #define _EXTERN
- #define _INITIALIZE_AS(x) =x
- #else
- #define _EXTERN GTM_EXTERN
- #define _INITIALIZE_AS(x)
- #endif
- _EXTERN NSString* kGTMRegexErrorDomain _INITIALIZE_AS(@"com.google.mactoolbox.RegexDomain");
- enum {
- kGTMRegexPatternParseFailedError = -100
- };
- // Keys for the userInfo from a kGTMRegexErrorDomain/kGTMRegexPatternParseFailedError error
- _EXTERN NSString* kGTMRegexPatternErrorPattern _INITIALIZE_AS(@"pattern");
- _EXTERN NSString* kGTMRegexPatternErrorErrorString _INITIALIZE_AS(@"patternError");
- /// Class for doing Extended Regex operations w/ libregex (see re_format(7)).
- //
- // NOTE: the docs for recomp/regexec make *no* claims about i18n. All work
- // within this class is done w/ UTF-8 so Unicode should move through it safely,
- // however, the character classes described in re_format(7) might not really
- // be unicode "savvy", so use them and this class w/ that in mind.
- //
- // Example usage:
- //
- // NSArray *inputArrayOfStrings = ...
- // NSArray *matches = [NSMutableArray array];
- //
- // GTMRegex *regex = [GTMRegex regexWithPattern:@"foo.*bar"];
- // for (NSString *curStr in inputArrayOfStrings) {
- // if ([regex matchesString:curStr])
- // [matches addObject:curStr];
- // }
- // ....
- //
- // -------------
- //
- // If you need to include something dynamic in a pattern:
- //
- // NSString *pattern =
- // [NSString stringWithFormat:@"^foo:%@bar",
- // [GTMRegex escapedPatternForString:inputStr]];
- // GTMRegex *regex = [GTMRegex regexWithPattern:pattern];
- // ....
- //
- // -------------
- //
- // GTMRegex *regex = [GTMRegex regexWithPattern:@"(foo+)(bar)"];
- // NSString *highlighted =
- // [regex stringByReplacingMatchesInString:inputString
- // withReplacement:@"<i>\\1</i><b>\\2</b>"];
- // ....
- //
- @interface GTMRegex : NSObject {
- @private
- NSString *pattern_;
- GTMRegexOptions options_;
- regex_t regexData_;
- }
- /// Create a new, autoreleased object w/ the given regex pattern with the default options
- + (id)regexWithPattern:(NSString *)pattern;
- /// Create a new, autoreleased object w/ the given regex pattern and specify the matching options
- + (id)regexWithPattern:(NSString *)pattern options:(GTMRegexOptions)options;
- /// Create a new, autoreleased object w/ the given regex pattern, specify the matching options and receive any error consuming the pattern.
- + (id)regexWithPattern:(NSString *)pattern
- options:(GTMRegexOptions)options
- withError:(NSError **)outErrorOrNULL;
- /// Returns a new, autoreleased copy of |str| w/ any pattern chars in it escaped so they have no meaning when used w/in a pattern.
- + (NSString *)escapedPatternForString:(NSString *)str;
- /// Initialize a new object w/ the given regex pattern with the default options
- - (id)initWithPattern:(NSString *)pattern;
- /// Initialize a new object w/ the given regex pattern and specify the matching options
- - (id)initWithPattern:(NSString *)pattern options:(GTMRegexOptions)options;
- /// Initialize a new object w/ the given regex pattern, specify the matching options, and receive any error consuming the pattern.
- - (id)initWithPattern:(NSString *)pattern
- options:(GTMRegexOptions)options
- withError:(NSError **)outErrorOrNULL;
- /// Returns the number of sub patterns in the pattern
- //
- // Sub Patterns are basically the number of parenthesis blocks w/in the pattern.
- // ie: The pattern "foo((bar)|(baz))" has 3 sub patterns.
- //
- - (NSUInteger)subPatternCount;
- /// Returns YES if the whole string |str| matches the pattern.
- - (BOOL)matchesString:(NSString *)str;
- /// Returns a new, autoreleased array of string that contain the subpattern matches for the string.
- //
- // If the whole string does not match the pattern, nil is returned.
- //
- // The api follows the conventions of most regex engines, and index 0 (zero) is
- // the full match, then the subpatterns are index 1, 2, ... going left to right.
- // If the pattern has optional subpatterns, then anything that didn't match
- // will have NSNull at that index.
- // ie: The pattern "(fo(o+))((bar)|(baz))" has five subpatterns, and when
- // applied to the string "foooooobaz" you'd get an array of:
- // 0: "foooooobaz"
- // 1: "foooooo"
- // 2: "ooooo"
- // 3: "baz"
- // 4: NSNull
- // 5: "baz"
- //
- - (NSArray *)subPatternsOfString:(NSString *)str;
- /// Returns the first match for this pattern in |str|.
- - (NSString *)firstSubStringMatchedInString:(NSString *)str;
- /// Returns YES if this pattern some substring of |str|.
- - (BOOL)matchesSubStringInString:(NSString *)str;
- /// Returns a new, autoreleased enumerator that will walk segments (GTMRegexStringSegment) of |str| based on the pattern.
- //
- // This will split the string into "segments" using the given pattern. You get
- // both the matches and parts that are inbetween matches. ie-the entire string
- // will eventually be returned.
- //
- // See GTMRegexStringSegment for more infomation and examples.
- //
- - (NSEnumerator *)segmentEnumeratorForString:(NSString *)str;
- /// Returns a new, autoreleased enumerator that will walk only the matching segments (GTMRegexStringSegment) of |str| based on the pattern.
- //
- // This extracts the "segments" of the string that used the pattern. So it can
- // be used to collect all of the matching substrings from within a string.
- //
- // See GTMRegexStringSegment for more infomation and examples.
- //
- - (NSEnumerator *)matchSegmentEnumeratorForString:(NSString *)str;
- /// Returns a new, autoreleased string with all matches of the pattern in |str| replaced with |replacementPattern|.
- //
- // Replacement uses the SED substitution like syntax w/in |replacementPattern|
- // to allow the use of matches in the replacment. The replacement pattern can
- // make use of any number of match references by using a backslash followed by
- // the match subexpression number (ie-"\2", "\0", ...), see subPatternsOfString:
- // for details on the subexpression indexing.
- //
- // REMINDER: you need to double-slash since the slash has meaning to the
- // compiler/preprocessor. ie: "\\0"
- //
- - (NSString *)stringByReplacingMatchesInString:(NSString *)str
- withReplacement:(NSString *)replacementPattern;
- @end
- /// Class returned by the nextObject for the enumerators from GTMRegex
- //
- // The two enumerators on from GTMRegex return objects of this type. This object
- // represents a "piece" of the string the enumerator is walking. It's the apis
- // on this object allow you to figure out why each segment was returned and to
- // act on it.
- //
- // The easiest way to under stand this how the enumerators and this class works
- // is through and examples ::
- // Pattern: "foo+"
- // String: "fo bar foobar foofooo baz"
- // If you walk this w/ -segmentEnumeratorForString you'll get:
- // # nextObjects Calls -isMatch -string
- // 1 NO "fo bar "
- // 2 YES "foo"
- // 3 NO "bar "
- // 4 YES "foo"
- // 5 YES "fooo"
- // 6 NO " baz"
- // And if you walk this w/ -matchSegmentEnumeratorForString you'll get:
- // # nextObjects Calls -isMatch -string
- // 1 YES "foo"
- // 2 YES "foo"
- // 3 YES "fooo"
- // (see the comments on subPatternString for how it works)
- //
- // Example usage:
- //
- // NSMutableString processedStr = [NSMutableString string];
- // NSEnumerator *enumerator =
- // [inputStr segmentEnumeratorForPattern:@"foo+((ba+r)|(ba+z))"];
- // GTMRegexStringSegment *segment = nil;
- // while ((segment = [enumerator nextObject]) != nil) {
- // if ([segment isMatch]) {
- // if ([segment subPatterString:2] != nil) {
- // // matched: "(ba+r)"
- // [processStr appendFormat:@"<b>%@</b>", [segment string]];
- // } else {
- // // matched: "(ba+z)"
- // [processStr appendFormat:@"<i>%@</i>", [segment string]];
- // }
- // } else {
- // [processStr appendString:[segment string]];
- // }
- // }
- // // proccessedStr now has all the versions of foobar wrapped in bold tags,
- // // and all the versons of foobaz in italics tags.
- // // ie: " fooobar foobaaz " ==> " <b>fooobar</b> <i>foobaaz</i> "
- //
- @interface GTMRegexStringSegment : NSObject {
- @private
- NSData *utf8StrBuf_;
- regmatch_t *regMatches_; // STRONG: ie-we call free
- NSUInteger numRegMatches_;
- BOOL isMatch_;
- }
- /// Returns YES if this segment from from a match of the regex, false if it was a segment between matches.
- //
- // Use -isMatch to see if the segment from from a match of the pattern or if the
- // segment is some text between matches. (NOTE: isMatch is always YES for
- // matchSegmentEnumeratorForString)
- //
- - (BOOL)isMatch;
- /// Returns a new, autoreleased string w/ the full text segment from the original string.
- - (NSString *)string;
- /// Returns a new, autoreleased string w/ the |index| sub pattern from this segment of the original string.
- //
- // This api follows the conventions of most regex engines, and index 0 (zero) is
- // the full match, then the subpatterns are index 1, 2, ... going left to right.
- // If the pattern has optional subpatterns, then anything that didn't match
- // will return nil.
- // ie: When using the pattern "(fo(o+))((bar)|(baz))" the following indexes
- // fetch these values for a segment where -string is @"foooooobaz":
- // 0: "foooooobaz"
- // 1: "foooooo"
- // 2: "ooooo"
- // 3: "baz"
- // 4: nil
- // 5: "baz"
- //
- - (NSString *)subPatternString:(NSUInteger)index;
- @end
- /// Some helpers to streamline usage of GTMRegex
- //
- // Example usage:
- //
- // if ([inputStr matchesPattern:@"foo.*bar"]) {
- // // act on match
- // ....
- // }
- //
- // -------------
- //
- // NSString *subStr = [inputStr firstSubStringMatchedByPattern:@"^foo:.*$"];
- // if (subStr != nil) {
- // // act on subStr
- // ....
- // }
- //
- // -------------
- //
- // NSArray *headingList =
- // [inputStr allSubstringsMatchedByPattern:@"^Heading:.*$"];
- // // act on the list of headings
- // ....
- //
- // -------------
- //
- // NSString *highlightedString =
- // [inputString stringByReplacingMatchesOfPattern:@"(foo+)(bar)"
- // withReplacement:@"<i>\\1</i><b>\\2</b>"];
- // ....
- //
- @interface NSString (GTMRegexAdditions)
- /// Returns YES if the full string matches regex |pattern| using the default match options
- - (BOOL)gtm_matchesPattern:(NSString *)pattern;
- /// Returns a new, autoreleased array of strings that contain the subpattern matches of |pattern| using the default match options
- //
- // See [GTMRegex subPatternsOfString:] for information about the returned array.
- //
- - (NSArray *)gtm_subPatternsOfPattern:(NSString *)pattern;
- /// Returns a new, autoreleased string w/ the first substring that matched the regex |pattern| using the default match options
- - (NSString *)gtm_firstSubStringMatchedByPattern:(NSString *)pattern;
- /// Returns YES if a substring string matches regex |pattern| using the default match options
- - (BOOL)gtm_subStringMatchesPattern:(NSString *)pattern;
- /// Returns a new, autoreleased array of substrings in the string that match the regex |pattern| using the default match options
- //
- // Note: if the string has no matches, you get an empty array.
- - (NSArray *)gtm_allSubstringsMatchedByPattern:(NSString *)pattern;
- /// Returns a new, autoreleased segment enumerator that will break the string using pattern w/ the default match options
- //
- // The enumerator returns GTMRegexStringSegment options, see that class for more
- // details and examples.
- //
- - (NSEnumerator *)gtm_segmentEnumeratorForPattern:(NSString *)pattern;
- /// Returns a new, autoreleased segment enumerator that will only return matching segments from the string using pattern w/ the default match options
- //
- // The enumerator returns GTMRegexStringSegment options, see that class for more
- // details and examples.
- //
- - (NSEnumerator *)gtm_matchSegmentEnumeratorForPattern:(NSString *)pattern;
- /// Returns a new, autoreleased string with all matches for pattern |pattern| are replaced w/ |replacementPattern|. Uses the default match options.
- //
- // |replacemetPattern| has support for using any subExpression that matched,
- // see [GTMRegex stringByReplacingMatchesInString:withReplacement:] above
- // for details.
- //
- - (NSString *)gtm_stringByReplacingMatchesOfPattern:(NSString *)pattern
- withReplacement:(NSString *)replacementPattern;
- @end