/core/externals/update-engine/externals/google-toolbox-for-mac/Foundation/GTMRegex.h

http://macfuse.googlecode.com/ · C++ Header · 379 lines · 72 code · 42 blank · 265 comment · 0 complexity · d78425c2802d8dcbbba0209c13579a63 MD5 · raw file

  1. //
  2. // GTMRegex.h
  3. //
  4. // Copyright 2007-2008 Google Inc.
  5. //
  6. // Licensed under the Apache License, Version 2.0 (the "License"); you may not
  7. // use this file except in compliance with the License. You may obtain a copy
  8. // of the License at
  9. //
  10. // http://www.apache.org/licenses/LICENSE-2.0
  11. //
  12. // Unless required by applicable law or agreed to in writing, software
  13. // distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
  14. // WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
  15. // License for the specific language governing permissions and limitations under
  16. // the License.
  17. //
  18. #import <Foundation/Foundation.h>
  19. #import <regex.h>
  20. #import "GTMDefines.h"
  21. /// Options for controlling the behavior of the matches
  22. enum {
  23. kGTMRegexOptionIgnoreCase = 0x01,
  24. // Ignore case in matching, ie: 'a' matches 'a' or 'A'
  25. kGTMRegexOptionSupressNewlineSupport = 0x02,
  26. // By default (without this option), regular expressions are implicitly
  27. // processed on a line by line basis, where "lines" are delimited by newline
  28. // characters. In this mode '.' (dot) does NOT match newline characters, and
  29. // '^' and '$' match at the beginning and end of the string as well as
  30. // around newline characters. This behavior matches the default behavior for
  31. // regular expressions in other languages including Perl and Python. For
  32. // example,
  33. // foo.*bar
  34. // would match
  35. // fooAAAbar
  36. // but would NOT match
  37. // fooAAA\nbar
  38. // With the kGTMRegexOptionSupressNewlineSupport option, newlines are treated
  39. // just like any other character which means that '.' will match them. In
  40. // this mode, ^ and $ only match the beginning and end of the input string
  41. // and do NOT match around the newline characters. For example,
  42. // foo.*bar
  43. // would match
  44. // fooAAAbar
  45. // and would also match
  46. // fooAAA\nbar
  47. };
  48. typedef NSUInteger GTMRegexOptions;
  49. /// Global contants needed for errors from consuming patterns
  50. #undef _EXTERN
  51. #undef _INITIALIZE_AS
  52. #if GTMREGEX_DEFINE_GLOBALS
  53. #define _EXTERN
  54. #define _INITIALIZE_AS(x) =x
  55. #else
  56. #define _EXTERN GTM_EXTERN
  57. #define _INITIALIZE_AS(x)
  58. #endif
  59. _EXTERN NSString* kGTMRegexErrorDomain _INITIALIZE_AS(@"com.google.mactoolbox.RegexDomain");
  60. enum {
  61. kGTMRegexPatternParseFailedError = -100
  62. };
  63. // Keys for the userInfo from a kGTMRegexErrorDomain/kGTMRegexPatternParseFailedError error
  64. _EXTERN NSString* kGTMRegexPatternErrorPattern _INITIALIZE_AS(@"pattern");
  65. _EXTERN NSString* kGTMRegexPatternErrorErrorString _INITIALIZE_AS(@"patternError");
  66. /// Class for doing Extended Regex operations w/ libregex (see re_format(7)).
  67. //
  68. // NOTE: the docs for recomp/regexec make *no* claims about i18n. All work
  69. // within this class is done w/ UTF-8 so Unicode should move through it safely,
  70. // however, the character classes described in re_format(7) might not really
  71. // be unicode "savvy", so use them and this class w/ that in mind.
  72. //
  73. // Example usage:
  74. //
  75. // NSArray *inputArrayOfStrings = ...
  76. // NSArray *matches = [NSMutableArray array];
  77. //
  78. // GTMRegex *regex = [GTMRegex regexWithPattern:@"foo.*bar"];
  79. // for (NSString *curStr in inputArrayOfStrings) {
  80. // if ([regex matchesString:curStr])
  81. // [matches addObject:curStr];
  82. // }
  83. // ....
  84. //
  85. // -------------
  86. //
  87. // If you need to include something dynamic in a pattern:
  88. //
  89. // NSString *pattern =
  90. // [NSString stringWithFormat:@"^foo:%@bar",
  91. // [GTMRegex escapedPatternForString:inputStr]];
  92. // GTMRegex *regex = [GTMRegex regexWithPattern:pattern];
  93. // ....
  94. //
  95. // -------------
  96. //
  97. // GTMRegex *regex = [GTMRegex regexWithPattern:@"(foo+)(bar)"];
  98. // NSString *highlighted =
  99. // [regex stringByReplacingMatchesInString:inputString
  100. // withReplacement:@"<i>\\1</i><b>\\2</b>"];
  101. // ....
  102. //
  103. @interface GTMRegex : NSObject {
  104. @private
  105. NSString *pattern_;
  106. GTMRegexOptions options_;
  107. regex_t regexData_;
  108. }
  109. /// Create a new, autoreleased object w/ the given regex pattern with the default options
  110. + (id)regexWithPattern:(NSString *)pattern;
  111. /// Create a new, autoreleased object w/ the given regex pattern and specify the matching options
  112. + (id)regexWithPattern:(NSString *)pattern options:(GTMRegexOptions)options;
  113. /// Create a new, autoreleased object w/ the given regex pattern, specify the matching options and receive any error consuming the pattern.
  114. + (id)regexWithPattern:(NSString *)pattern
  115. options:(GTMRegexOptions)options
  116. withError:(NSError **)outErrorOrNULL;
  117. /// Returns a new, autoreleased copy of |str| w/ any pattern chars in it escaped so they have no meaning when used w/in a pattern.
  118. + (NSString *)escapedPatternForString:(NSString *)str;
  119. /// Initialize a new object w/ the given regex pattern with the default options
  120. - (id)initWithPattern:(NSString *)pattern;
  121. /// Initialize a new object w/ the given regex pattern and specify the matching options
  122. - (id)initWithPattern:(NSString *)pattern options:(GTMRegexOptions)options;
  123. /// Initialize a new object w/ the given regex pattern, specify the matching options, and receive any error consuming the pattern.
  124. - (id)initWithPattern:(NSString *)pattern
  125. options:(GTMRegexOptions)options
  126. withError:(NSError **)outErrorOrNULL;
  127. /// Returns the number of sub patterns in the pattern
  128. //
  129. // Sub Patterns are basically the number of parenthesis blocks w/in the pattern.
  130. // ie: The pattern "foo((bar)|(baz))" has 3 sub patterns.
  131. //
  132. - (NSUInteger)subPatternCount;
  133. /// Returns YES if the whole string |str| matches the pattern.
  134. - (BOOL)matchesString:(NSString *)str;
  135. /// Returns a new, autoreleased array of string that contain the subpattern matches for the string.
  136. //
  137. // If the whole string does not match the pattern, nil is returned.
  138. //
  139. // The api follows the conventions of most regex engines, and index 0 (zero) is
  140. // the full match, then the subpatterns are index 1, 2, ... going left to right.
  141. // If the pattern has optional subpatterns, then anything that didn't match
  142. // will have NSNull at that index.
  143. // ie: The pattern "(fo(o+))((bar)|(baz))" has five subpatterns, and when
  144. // applied to the string "foooooobaz" you'd get an array of:
  145. // 0: "foooooobaz"
  146. // 1: "foooooo"
  147. // 2: "ooooo"
  148. // 3: "baz"
  149. // 4: NSNull
  150. // 5: "baz"
  151. //
  152. - (NSArray *)subPatternsOfString:(NSString *)str;
  153. /// Returns the first match for this pattern in |str|.
  154. - (NSString *)firstSubStringMatchedInString:(NSString *)str;
  155. /// Returns YES if this pattern some substring of |str|.
  156. - (BOOL)matchesSubStringInString:(NSString *)str;
  157. /// Returns a new, autoreleased enumerator that will walk segments (GTMRegexStringSegment) of |str| based on the pattern.
  158. //
  159. // This will split the string into "segments" using the given pattern. You get
  160. // both the matches and parts that are inbetween matches. ie-the entire string
  161. // will eventually be returned.
  162. //
  163. // See GTMRegexStringSegment for more infomation and examples.
  164. //
  165. - (NSEnumerator *)segmentEnumeratorForString:(NSString *)str;
  166. /// Returns a new, autoreleased enumerator that will walk only the matching segments (GTMRegexStringSegment) of |str| based on the pattern.
  167. //
  168. // This extracts the "segments" of the string that used the pattern. So it can
  169. // be used to collect all of the matching substrings from within a string.
  170. //
  171. // See GTMRegexStringSegment for more infomation and examples.
  172. //
  173. - (NSEnumerator *)matchSegmentEnumeratorForString:(NSString *)str;
  174. /// Returns a new, autoreleased string with all matches of the pattern in |str| replaced with |replacementPattern|.
  175. //
  176. // Replacement uses the SED substitution like syntax w/in |replacementPattern|
  177. // to allow the use of matches in the replacment. The replacement pattern can
  178. // make use of any number of match references by using a backslash followed by
  179. // the match subexpression number (ie-"\2", "\0", ...), see subPatternsOfString:
  180. // for details on the subexpression indexing.
  181. //
  182. // REMINDER: you need to double-slash since the slash has meaning to the
  183. // compiler/preprocessor. ie: "\\0"
  184. //
  185. - (NSString *)stringByReplacingMatchesInString:(NSString *)str
  186. withReplacement:(NSString *)replacementPattern;
  187. @end
  188. /// Class returned by the nextObject for the enumerators from GTMRegex
  189. //
  190. // The two enumerators on from GTMRegex return objects of this type. This object
  191. // represents a "piece" of the string the enumerator is walking. It's the apis
  192. // on this object allow you to figure out why each segment was returned and to
  193. // act on it.
  194. //
  195. // The easiest way to under stand this how the enumerators and this class works
  196. // is through and examples ::
  197. // Pattern: "foo+"
  198. // String: "fo bar foobar foofooo baz"
  199. // If you walk this w/ -segmentEnumeratorForString you'll get:
  200. // # nextObjects Calls -isMatch -string
  201. // 1 NO "fo bar "
  202. // 2 YES "foo"
  203. // 3 NO "bar "
  204. // 4 YES "foo"
  205. // 5 YES "fooo"
  206. // 6 NO " baz"
  207. // And if you walk this w/ -matchSegmentEnumeratorForString you'll get:
  208. // # nextObjects Calls -isMatch -string
  209. // 1 YES "foo"
  210. // 2 YES "foo"
  211. // 3 YES "fooo"
  212. // (see the comments on subPatternString for how it works)
  213. //
  214. // Example usage:
  215. //
  216. // NSMutableString processedStr = [NSMutableString string];
  217. // NSEnumerator *enumerator =
  218. // [inputStr segmentEnumeratorForPattern:@"foo+((ba+r)|(ba+z))"];
  219. // GTMRegexStringSegment *segment = nil;
  220. // while ((segment = [enumerator nextObject]) != nil) {
  221. // if ([segment isMatch]) {
  222. // if ([segment subPatterString:2] != nil) {
  223. // // matched: "(ba+r)"
  224. // [processStr appendFormat:@"<b>%@</b>", [segment string]];
  225. // } else {
  226. // // matched: "(ba+z)"
  227. // [processStr appendFormat:@"<i>%@</i>", [segment string]];
  228. // }
  229. // } else {
  230. // [processStr appendString:[segment string]];
  231. // }
  232. // }
  233. // // proccessedStr now has all the versions of foobar wrapped in bold tags,
  234. // // and all the versons of foobaz in italics tags.
  235. // // ie: " fooobar foobaaz " ==> " <b>fooobar</b> <i>foobaaz</i> "
  236. //
  237. @interface GTMRegexStringSegment : NSObject {
  238. @private
  239. NSData *utf8StrBuf_;
  240. regmatch_t *regMatches_; // STRONG: ie-we call free
  241. NSUInteger numRegMatches_;
  242. BOOL isMatch_;
  243. }
  244. /// Returns YES if this segment from from a match of the regex, false if it was a segment between matches.
  245. //
  246. // Use -isMatch to see if the segment from from a match of the pattern or if the
  247. // segment is some text between matches. (NOTE: isMatch is always YES for
  248. // matchSegmentEnumeratorForString)
  249. //
  250. - (BOOL)isMatch;
  251. /// Returns a new, autoreleased string w/ the full text segment from the original string.
  252. - (NSString *)string;
  253. /// Returns a new, autoreleased string w/ the |index| sub pattern from this segment of the original string.
  254. //
  255. // This api follows the conventions of most regex engines, and index 0 (zero) is
  256. // the full match, then the subpatterns are index 1, 2, ... going left to right.
  257. // If the pattern has optional subpatterns, then anything that didn't match
  258. // will return nil.
  259. // ie: When using the pattern "(fo(o+))((bar)|(baz))" the following indexes
  260. // fetch these values for a segment where -string is @"foooooobaz":
  261. // 0: "foooooobaz"
  262. // 1: "foooooo"
  263. // 2: "ooooo"
  264. // 3: "baz"
  265. // 4: nil
  266. // 5: "baz"
  267. //
  268. - (NSString *)subPatternString:(NSUInteger)index;
  269. @end
  270. /// Some helpers to streamline usage of GTMRegex
  271. //
  272. // Example usage:
  273. //
  274. // if ([inputStr matchesPattern:@"foo.*bar"]) {
  275. // // act on match
  276. // ....
  277. // }
  278. //
  279. // -------------
  280. //
  281. // NSString *subStr = [inputStr firstSubStringMatchedByPattern:@"^foo:.*$"];
  282. // if (subStr != nil) {
  283. // // act on subStr
  284. // ....
  285. // }
  286. //
  287. // -------------
  288. //
  289. // NSArray *headingList =
  290. // [inputStr allSubstringsMatchedByPattern:@"^Heading:.*$"];
  291. // // act on the list of headings
  292. // ....
  293. //
  294. // -------------
  295. //
  296. // NSString *highlightedString =
  297. // [inputString stringByReplacingMatchesOfPattern:@"(foo+)(bar)"
  298. // withReplacement:@"<i>\\1</i><b>\\2</b>"];
  299. // ....
  300. //
  301. @interface NSString (GTMRegexAdditions)
  302. /// Returns YES if the full string matches regex |pattern| using the default match options
  303. - (BOOL)gtm_matchesPattern:(NSString *)pattern;
  304. /// Returns a new, autoreleased array of strings that contain the subpattern matches of |pattern| using the default match options
  305. //
  306. // See [GTMRegex subPatternsOfString:] for information about the returned array.
  307. //
  308. - (NSArray *)gtm_subPatternsOfPattern:(NSString *)pattern;
  309. /// Returns a new, autoreleased string w/ the first substring that matched the regex |pattern| using the default match options
  310. - (NSString *)gtm_firstSubStringMatchedByPattern:(NSString *)pattern;
  311. /// Returns YES if a substring string matches regex |pattern| using the default match options
  312. - (BOOL)gtm_subStringMatchesPattern:(NSString *)pattern;
  313. /// Returns a new, autoreleased array of substrings in the string that match the regex |pattern| using the default match options
  314. //
  315. // Note: if the string has no matches, you get an empty array.
  316. - (NSArray *)gtm_allSubstringsMatchedByPattern:(NSString *)pattern;
  317. /// Returns a new, autoreleased segment enumerator that will break the string using pattern w/ the default match options
  318. //
  319. // The enumerator returns GTMRegexStringSegment options, see that class for more
  320. // details and examples.
  321. //
  322. - (NSEnumerator *)gtm_segmentEnumeratorForPattern:(NSString *)pattern;
  323. /// Returns a new, autoreleased segment enumerator that will only return matching segments from the string using pattern w/ the default match options
  324. //
  325. // The enumerator returns GTMRegexStringSegment options, see that class for more
  326. // details and examples.
  327. //
  328. - (NSEnumerator *)gtm_matchSegmentEnumeratorForPattern:(NSString *)pattern;
  329. /// Returns a new, autoreleased string with all matches for pattern |pattern| are replaced w/ |replacementPattern|. Uses the default match options.
  330. //
  331. // |replacemetPattern| has support for using any subExpression that matched,
  332. // see [GTMRegex stringByReplacingMatchesInString:withReplacement:] above
  333. // for details.
  334. //
  335. - (NSString *)gtm_stringByReplacingMatchesOfPattern:(NSString *)pattern
  336. withReplacement:(NSString *)replacementPattern;
  337. @end