/core/externals/google-toolbox-for-mac/Foundation/GTMRegex.m

http://macfuse.googlecode.com/ · Objective C · 798 lines · 542 code · 115 blank · 141 comment · 94 complexity · 1ea4b1d233a2093e5119da45c0f04d44 MD5 · raw file

  1. //
  2. // GTMRegex.m
  3. //
  4. // Copyright 2007-2008 Google Inc.
  5. //
  6. // Licensed under the Apache License, Version 2.0 (the "License"); you may not
  7. // use this file except in compliance with the License. You may obtain a copy
  8. // of the License at
  9. //
  10. // http://www.apache.org/licenses/LICENSE-2.0
  11. //
  12. // Unless required by applicable law or agreed to in writing, software
  13. // distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
  14. // WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
  15. // License for the specific language governing permissions and limitations under
  16. // the License.
  17. //
  18. #define GTMREGEX_DEFINE_GLOBALS 1
  19. #import "GTMRegex.h"
  20. #import "GTMDefines.h"
  21. // This is the pattern to use for walking replacement text when doing
  22. // substitutions.
  23. //
  24. // This pattern may look over-escaped, but remember the compiler will consume
  25. // one layer of slashes, and then we have to escape the slashes for them to be
  26. // seen as we want in the pattern.
  27. static NSString *const kReplacementPattern =
  28. @"((^|[^\\\\])(\\\\\\\\)*)(\\\\([0-9]+))";
  29. #define kReplacementPatternLeadingTextIndex 1
  30. #define kReplacementPatternSubpatternNumberIndex 5
  31. @interface GTMRegex (PrivateMethods)
  32. - (NSString *)errorMessage:(int)errCode;
  33. - (BOOL)runRegexOnUTF8:(const char*)utf8Str
  34. nmatch:(size_t)nmatch
  35. pmatch:(regmatch_t *)pmatch
  36. flags:(int)flags;
  37. @end
  38. // private enumerator as impl detail
  39. @interface GTMRegexEnumerator : NSEnumerator {
  40. @private
  41. GTMRegex *regex_;
  42. NSData *utf8StrBuf_;
  43. BOOL allSegments_;
  44. BOOL treatStartOfNewSegmentAsBeginningOfString_;
  45. regoff_t curParseIndex_;
  46. __strong regmatch_t *savedRegMatches_;
  47. }
  48. - (id)initWithRegex:(GTMRegex *)regex
  49. processString:(NSString *)str
  50. allSegments:(BOOL)allSegments;
  51. - (void)treatStartOfNewSegmentAsBeginningOfString:(BOOL)yesNo;
  52. @end
  53. @interface GTMRegexStringSegment (PrivateMethods)
  54. - (id)initWithUTF8StrBuf:(NSData *)utf8StrBuf
  55. regMatches:(regmatch_t *)regMatches
  56. numRegMatches:(NSUInteger)numRegMatches
  57. isMatch:(BOOL)isMatch;
  58. @end
  59. @implementation GTMRegex
  60. + (id)regexWithPattern:(NSString *)pattern {
  61. return [[[self alloc] initWithPattern:pattern] autorelease];
  62. }
  63. + (id)regexWithPattern:(NSString *)pattern options:(GTMRegexOptions)options {
  64. return [[[self alloc] initWithPattern:pattern
  65. options:options] autorelease];
  66. }
  67. + (id)regexWithPattern:(NSString *)pattern
  68. options:(GTMRegexOptions)options
  69. withError:(NSError **)outErrorOrNULL {
  70. return [[[self alloc] initWithPattern:pattern
  71. options:options
  72. withError:outErrorOrNULL] autorelease];
  73. }
  74. + (NSString *)escapedPatternForString:(NSString *)str {
  75. if (str == nil)
  76. return nil;
  77. // NOTE: this could be done more efficiently by fetching the whole string into
  78. // a unichar buffer and scanning that, along w/ pushing the data over in
  79. // chunks (when possible).
  80. NSUInteger len = [str length];
  81. NSMutableString *result = [NSMutableString stringWithCapacity:len];
  82. for (NSUInteger x = 0; x < len; ++x) {
  83. unichar ch = [str characterAtIndex:x];
  84. switch (ch) {
  85. case '^':
  86. case '.':
  87. case '[':
  88. case '$':
  89. case '(':
  90. case ')':
  91. case '|':
  92. case '*':
  93. case '+':
  94. case '?':
  95. case '{':
  96. case '\\':
  97. [result appendFormat:@"\\%C", ch];
  98. break;
  99. default:
  100. [result appendFormat:@"%C", ch];
  101. break;
  102. }
  103. }
  104. return result;
  105. }
  106. - (id)init {
  107. return [self initWithPattern:nil];
  108. }
  109. - (id)initWithPattern:(NSString *)pattern {
  110. return [self initWithPattern:pattern options:0];
  111. }
  112. - (id)initWithPattern:(NSString *)pattern options:(GTMRegexOptions)options {
  113. return [self initWithPattern:pattern options:options withError:nil];
  114. }
  115. - (id)initWithPattern:(NSString *)pattern
  116. options:(GTMRegexOptions)options
  117. withError:(NSError **)outErrorOrNULL {
  118. self = [super init];
  119. if (!self) return nil;
  120. if (outErrorOrNULL) *outErrorOrNULL = nil;
  121. if ([pattern length] == 0) {
  122. [self release];
  123. return nil;
  124. }
  125. // figure out the flags
  126. options_ = options;
  127. int flags = REG_EXTENDED;
  128. if (options_ & kGTMRegexOptionIgnoreCase)
  129. flags |= REG_ICASE;
  130. if ((options_ & kGTMRegexOptionSupressNewlineSupport) == 0)
  131. flags |= REG_NEWLINE;
  132. // even if regcomp failes we need a flags that we did call regcomp so we'll
  133. // call regfree (because the structure can get filled in some to allow better
  134. // error info). we use pattern_ as this flag.
  135. pattern_ = [pattern copy];
  136. if (!pattern_) {
  137. // COV_NF_START - no real way to force this in a unittest
  138. [self release];
  139. return nil;
  140. // COV_NF_END
  141. }
  142. // compile it
  143. int compResult = regcomp(&regexData_, [pattern_ UTF8String], flags);
  144. if (compResult != 0) {
  145. NSString *errorStr = [self errorMessage:compResult];
  146. if (outErrorOrNULL) {
  147. // include the pattern and patternError message in the userInfo.
  148. NSDictionary *userInfo = [NSDictionary dictionaryWithObjectsAndKeys:
  149. pattern_, kGTMRegexPatternErrorPattern,
  150. errorStr, kGTMRegexPatternErrorErrorString,
  151. nil];
  152. *outErrorOrNULL = [NSError errorWithDomain:kGTMRegexErrorDomain
  153. code:kGTMRegexPatternParseFailedError
  154. userInfo:userInfo];
  155. } else {
  156. // if caller didn't get us an NSError to fill in, we log the error to help
  157. // debugging.
  158. _GTMDevLog(@"Invalid pattern \"%@\", error: \"%@\"",
  159. pattern_, errorStr);
  160. }
  161. [self release];
  162. return nil;
  163. }
  164. return self;
  165. }
  166. - (void)dealloc {
  167. // we used pattern_ as our flag that we initialized the regex_t
  168. if (pattern_) {
  169. regfree(&regexData_);
  170. [pattern_ release];
  171. // play it safe and clear it since we use it as a flag for regexData_
  172. pattern_ = nil;
  173. }
  174. [super dealloc];
  175. }
  176. - (NSUInteger)subPatternCount {
  177. return regexData_.re_nsub;
  178. }
  179. - (BOOL)matchesString:(NSString *)str {
  180. regmatch_t regMatch;
  181. if (![self runRegexOnUTF8:[str UTF8String]
  182. nmatch:1
  183. pmatch:&regMatch
  184. flags:0]) {
  185. // no match
  186. return NO;
  187. }
  188. // make sure the match is the full string
  189. return (regMatch.rm_so == 0) &&
  190. (regMatch.rm_eo == (regoff_t)[str lengthOfBytesUsingEncoding:NSUTF8StringEncoding]);
  191. }
  192. - (NSArray *)subPatternsOfString:(NSString *)str {
  193. NSArray *result = nil;
  194. NSUInteger count = regexData_.re_nsub + 1;
  195. regmatch_t *regMatches = malloc(sizeof(regmatch_t) * count);
  196. if (!regMatches)
  197. return nil; // COV_NF_LINE - no real way to force this in a unittest
  198. // wrap it all in a try so we don't leak the malloc
  199. @try {
  200. const char *utf8Str = [str UTF8String];
  201. if (![self runRegexOnUTF8:utf8Str
  202. nmatch:count
  203. pmatch:regMatches
  204. flags:0]) {
  205. // no match
  206. return nil;
  207. }
  208. // make sure the match is the full string
  209. if ((regMatches[0].rm_so != 0) ||
  210. (regMatches[0].rm_eo != (regoff_t)[str lengthOfBytesUsingEncoding:NSUTF8StringEncoding])) {
  211. // only matched a sub part of the string
  212. return nil;
  213. }
  214. NSMutableArray *buildResult = [NSMutableArray arrayWithCapacity:count];
  215. for (NSUInteger x = 0 ; x < count ; ++x) {
  216. if ((regMatches[x].rm_so == -1) && (regMatches[x].rm_eo == -1)) {
  217. // add NSNull since it wasn't used
  218. [buildResult addObject:[NSNull null]];
  219. } else {
  220. // fetch the string
  221. const char *base = utf8Str + regMatches[x].rm_so;
  222. regoff_t len = regMatches[x].rm_eo - regMatches[x].rm_so;
  223. NSString *sub =
  224. [[[NSString alloc] initWithBytes:base
  225. length:(NSUInteger)len
  226. encoding:NSUTF8StringEncoding] autorelease];
  227. [buildResult addObject:sub];
  228. }
  229. }
  230. result = buildResult;
  231. } // COV_NF_LINE - radar 5851992 only reachable w/ an uncaught exception which isn't testable
  232. @finally {
  233. free(regMatches);
  234. }
  235. return result;
  236. }
  237. - (NSString *)firstSubStringMatchedInString:(NSString *)str {
  238. NSString *result = nil;
  239. regmatch_t regMatch;
  240. const char *utf8Str = [str UTF8String];
  241. if ([self runRegexOnUTF8:utf8Str
  242. nmatch:1
  243. pmatch:&regMatch
  244. flags:0]) {
  245. // fetch the string
  246. const char *base = utf8Str + regMatch.rm_so;
  247. regoff_t len = regMatch.rm_eo - regMatch.rm_so;
  248. result =
  249. [[[NSString alloc] initWithBytes:base
  250. length:(NSUInteger)len
  251. encoding:NSUTF8StringEncoding] autorelease];
  252. }
  253. return result;
  254. }
  255. - (BOOL)matchesSubStringInString:(NSString *)str {
  256. regmatch_t regMatch;
  257. if ([self runRegexOnUTF8:[str UTF8String]
  258. nmatch:1
  259. pmatch:&regMatch
  260. flags:0]) {
  261. // don't really care what matched, just report the match
  262. return YES;
  263. }
  264. return NO;
  265. }
  266. - (NSEnumerator *)segmentEnumeratorForString:(NSString *)str {
  267. return [[[GTMRegexEnumerator alloc] initWithRegex:self
  268. processString:str
  269. allSegments:YES] autorelease];
  270. }
  271. - (NSEnumerator *)matchSegmentEnumeratorForString:(NSString *)str {
  272. return [[[GTMRegexEnumerator alloc] initWithRegex:self
  273. processString:str
  274. allSegments:NO] autorelease];
  275. }
  276. - (NSString *)stringByReplacingMatchesInString:(NSString *)str
  277. withReplacement:(NSString *)replacementPattern {
  278. if (!str)
  279. return nil;
  280. // if we have a replacement, we go ahead and crack it now. if the replacement
  281. // is just an empty string (or nil), just use the nil marker.
  282. NSArray *replacements = nil;
  283. if ([replacementPattern length]) {
  284. // don't need newline support, just match the start of the pattern for '^'
  285. GTMRegex *replacementRegex =
  286. [GTMRegex regexWithPattern:kReplacementPattern
  287. options:kGTMRegexOptionSupressNewlineSupport];
  288. #ifdef DEBUG
  289. if (!replacementRegex) {
  290. _GTMDevLog(@"failed to parse out replacement regex!!!"); // COV_NF_LINE
  291. }
  292. #endif
  293. GTMRegexEnumerator *relacementEnumerator =
  294. [[[GTMRegexEnumerator alloc] initWithRegex:replacementRegex
  295. processString:replacementPattern
  296. allSegments:YES] autorelease];
  297. // We turn on treatStartOfNewSegmentAsBeginningOfLine for this enumerator.
  298. // As complex as kReplacementPattern is, it can't completely do what we want
  299. // with the normal string walk. The problem is this, backreferences are a
  300. // slash follow by a number ("\0"), but the replacement pattern might
  301. // actually need to use backslashes (they have to be escaped). So if a
  302. // replacement were "\\0", then there is no backreference, instead the
  303. // replacement is a backslash and a zero. Generically this means an even
  304. // number of backslashes are all escapes, and an odd are some number of
  305. // literal backslashes followed by our backreference. Think of it as a "an
  306. // odd number of slashes that comes after a non-backslash character." There
  307. // is no way to rexpress this in re_format(7) extended expressions. Instead
  308. // we look for a non-blackslash or string start followed by an optional even
  309. // number of slashes followed by the backreference; and use the special
  310. // flag; so after each match, we restart claiming it's the start of the
  311. // string. (the problem match w/o this flag is a substition of "\2\1")
  312. [relacementEnumerator treatStartOfNewSegmentAsBeginningOfString:YES];
  313. // pull them all into an array so we can walk this as many times as needed.
  314. replacements = [relacementEnumerator allObjects];
  315. if (!replacements) {
  316. // COV_NF_START - no real way to force this in a unittest
  317. _GTMDevLog(@"failed to create the replacements for substitutions");
  318. return nil;
  319. // COV_NF_END
  320. }
  321. }
  322. NSMutableString *result = [NSMutableString stringWithCapacity:[str length]];
  323. NSEnumerator *enumerator = [self segmentEnumeratorForString:str];
  324. GTMRegexStringSegment *segment = nil;
  325. while ((segment = [enumerator nextObject]) != nil) {
  326. if (![segment isMatch]) {
  327. // not a match, just move this chunk over
  328. [result appendString:[segment string]];
  329. } else {
  330. // match...
  331. if (!replacements) {
  332. // no replacements, they want to eat matches, nothing to do
  333. } else {
  334. // spin over the split up replacement
  335. GTMRegexStringSegment *replacementSegment = nil;
  336. GTM_FOREACH_OBJECT(replacementSegment, replacements) {
  337. if (![replacementSegment isMatch]) {
  338. // not a match, raw text to put in
  339. [result appendString:[replacementSegment string]];
  340. } else {
  341. // match...
  342. // first goes any leading text
  343. NSString *leading =
  344. [replacementSegment subPatternString:kReplacementPatternLeadingTextIndex];
  345. if (leading)
  346. [result appendString:leading];
  347. // then use the subpattern number to find what goes in from the
  348. // original string match.
  349. int subPatternNum =
  350. [[replacementSegment subPatternString:kReplacementPatternSubpatternNumberIndex] intValue];
  351. NSString *matchSubPatStr = [segment subPatternString:subPatternNum];
  352. // handle an unused subpattern (ie-nil result)
  353. if (matchSubPatStr)
  354. [result appendString:matchSubPatStr];
  355. }
  356. }
  357. }
  358. }
  359. }
  360. return result;
  361. }
  362. - (NSString *)description {
  363. NSMutableString *result =
  364. [NSMutableString stringWithFormat:@"%@<%p> { pattern=\"%@\", rawNumSubPatterns=%zd, options=(",
  365. [self class], self, pattern_, regexData_.re_nsub];
  366. if (options_) {
  367. if (options_ & kGTMRegexOptionIgnoreCase)
  368. [result appendString:@" IgnoreCase"];
  369. if ((options_ & kGTMRegexOptionSupressNewlineSupport) == kGTMRegexOptionSupressNewlineSupport)
  370. [result appendString:@" NoNewlineSupport"];
  371. } else {
  372. [result appendString:@" None(Default)"];
  373. }
  374. [result appendString:@" ) }"];
  375. return result;
  376. }
  377. @end
  378. @implementation GTMRegex (PrivateMethods)
  379. - (NSString *)errorMessage:(int)errCode {
  380. NSString *result = @"internal error";
  381. // size the buffer we need
  382. size_t len = regerror(errCode, &regexData_, NULL, 0);
  383. char *buffer = (char*)malloc(sizeof(char) * len);
  384. if (buffer) {
  385. // fetch the error
  386. if (len == regerror(errCode, &regexData_, buffer, len)) {
  387. NSString *generatedError = [NSString stringWithUTF8String:buffer];
  388. if (generatedError)
  389. result = generatedError;
  390. }
  391. free(buffer);
  392. }
  393. return result;
  394. }
  395. // private helper to run the regex on a block
  396. - (BOOL)runRegexOnUTF8:(const char*)utf8Str
  397. nmatch:(size_t)nmatch
  398. pmatch:(regmatch_t *)pmatch
  399. flags:(int)flags {
  400. if (!utf8Str)
  401. return NO;
  402. int execResult = regexec(&regexData_, utf8Str, nmatch, pmatch, flags);
  403. if (execResult != 0) {
  404. #ifdef DEBUG
  405. if (execResult != REG_NOMATCH) {
  406. // COV_NF_START - no real way to force this in a unittest
  407. NSString *errorStr = [self errorMessage:execResult];
  408. _GTMDevLog(@"%@: matching string \"%.20s...\", had error: \"%@\"",
  409. self, utf8Str, errorStr);
  410. // COV_NF_END
  411. }
  412. #endif
  413. return NO;
  414. }
  415. return YES;
  416. }
  417. @end
  418. @implementation GTMRegexEnumerator
  419. // we don't block init because the class isn't exported, so no one can
  420. // create one, or if they do, they get whatever happens...
  421. - (id)initWithRegex:(GTMRegex *)regex
  422. processString:(NSString *)str
  423. allSegments:(BOOL)allSegments {
  424. self = [super init];
  425. if (!self) return nil;
  426. // collect args
  427. regex_ = [regex retain];
  428. utf8StrBuf_ = [[str dataUsingEncoding:NSUTF8StringEncoding] retain];
  429. allSegments_ = allSegments;
  430. // arg check
  431. if (!regex_ || !utf8StrBuf_) {
  432. [self release];
  433. return nil;
  434. }
  435. // parsing state initialized to zero for us by object creation
  436. return self;
  437. }
  438. // Don't need a finalize because savedRegMatches_ is marked __strong
  439. - (void)dealloc {
  440. free(savedRegMatches_);
  441. [regex_ release];
  442. [utf8StrBuf_ release];
  443. [super dealloc];
  444. }
  445. - (void)treatStartOfNewSegmentAsBeginningOfString:(BOOL)yesNo {
  446. // The way regexec works, it assumes the first char it's looking at to the
  447. // start of the string. In normal use, this makes sense; but in this case,
  448. // we're going to walk the entry string splitting it up by our pattern. That
  449. // means for the first call, it is the string start, but for all future calls,
  450. // it is NOT the string start, so we will pass regexec the flag to let it
  451. // know. However, (you knew that was coming), there are some cases where you
  452. // actually want the each pass to be considered as the start of the string
  453. // (usually the cases are where a pattern can't express what's needed w/o
  454. // this). There is no really good way to explain this behavior w/o all this
  455. // text and lot of examples, so for now this is not in the public api, and
  456. // just here. (Hint: see what w/in this file uses this for why we have it)
  457. treatStartOfNewSegmentAsBeginningOfString_ = yesNo;
  458. }
  459. - (id)nextObject {
  460. GTMRegexStringSegment *result = nil;
  461. regmatch_t *nextMatches = nil;
  462. BOOL isMatch = NO;
  463. // we do all this w/in a try, so if something throws, the memory we malloced
  464. // will still get cleaned up
  465. @try {
  466. // if we have a saved match, use that...
  467. if (savedRegMatches_) {
  468. nextMatches = savedRegMatches_;
  469. savedRegMatches_ = nil;
  470. isMatch = YES; // if we have something saved, it was a pattern match
  471. }
  472. // have we reached the end?
  473. else if (curParseIndex_ >= (regoff_t)[utf8StrBuf_ length]) {
  474. // done, do nothing, we'll return nil
  475. }
  476. // do the search.
  477. else {
  478. // alloc the match structure (extra space for the zero (full) match)
  479. size_t matchBufSize = ([regex_ subPatternCount] + 1) * sizeof(regmatch_t);
  480. nextMatches = malloc(matchBufSize);
  481. if (!nextMatches)
  482. return nil; // COV_NF_LINE - no real way to force this in a unittest
  483. // setup our range to work on
  484. nextMatches[0].rm_so = curParseIndex_;
  485. nextMatches[0].rm_eo = [utf8StrBuf_ length];
  486. // figure out our flags
  487. int flags = REG_STARTEND;
  488. if ((!treatStartOfNewSegmentAsBeginningOfString_) &&
  489. (curParseIndex_ != 0)) {
  490. // see -treatStartOfNewSegmentAsBeginningOfString: for why we have
  491. // this check here.
  492. flags |= REG_NOTBOL;
  493. }
  494. // call for the match
  495. if ([regex_ runRegexOnUTF8:[utf8StrBuf_ bytes]
  496. nmatch:([regex_ subPatternCount] + 1)
  497. pmatch:nextMatches
  498. flags:flags]) {
  499. // match
  500. if (allSegments_ &&
  501. (nextMatches[0].rm_so != curParseIndex_)) {
  502. // we should return all segments (not just matches), and there was
  503. // something before this match. So safe off this match for later
  504. // and create a range for this.
  505. savedRegMatches_ = nextMatches;
  506. nextMatches = malloc(matchBufSize);
  507. if (!nextMatches)
  508. return nil; // COV_NF_LINE - no real way to force this in a unittest
  509. isMatch = NO;
  510. // mark everything but the zero slot w/ not used
  511. for (NSUInteger x = [regex_ subPatternCount]; x > 0; --x) {
  512. nextMatches[x].rm_so = nextMatches[x].rm_eo = -1;
  513. }
  514. nextMatches[0].rm_so = curParseIndex_;
  515. nextMatches[0].rm_eo = savedRegMatches_[0].rm_so;
  516. // advance our marker
  517. curParseIndex_ = savedRegMatches_[0].rm_eo;
  518. } else {
  519. // we only return matches or are pointed at a match
  520. // no real work to do, just fall through to return to return the
  521. // current match.
  522. isMatch = YES;
  523. // advance our marker
  524. curParseIndex_ = nextMatches[0].rm_eo;
  525. }
  526. } else {
  527. // no match
  528. // should we return the last non matching segment?
  529. if (allSegments_) {
  530. isMatch = NO;
  531. // mark everything but the zero slot w/ not used
  532. for (NSUInteger x = [regex_ subPatternCount]; x > 0; --x) {
  533. nextMatches[x].rm_so = nextMatches[x].rm_eo = -1;
  534. }
  535. nextMatches[0].rm_so = curParseIndex_;
  536. nextMatches[0].rm_eo = [utf8StrBuf_ length];
  537. } else {
  538. // drop match set, we don't want it
  539. free(nextMatches);
  540. nextMatches = nil;
  541. }
  542. // advance our marker since we're done
  543. curParseIndex_ = [utf8StrBuf_ length];
  544. }
  545. }
  546. // create the segment to return
  547. if (nextMatches) {
  548. result =
  549. [[[GTMRegexStringSegment alloc] initWithUTF8StrBuf:utf8StrBuf_
  550. regMatches:nextMatches
  551. numRegMatches:[regex_ subPatternCount]
  552. isMatch:isMatch] autorelease];
  553. nextMatches = nil;
  554. }
  555. } @catch (id e) { // COV_NF_START - no real way to force this in a test
  556. _GTMDevLog(@"Exceptions while trying to advance enumeration (%@)", e);
  557. // if we still have something in our temp, free it
  558. free(nextMatches);
  559. } // COV_NF_END
  560. return result;
  561. }
  562. - (NSString *)description {
  563. return [NSString stringWithFormat:@"%@<%p> { regex=\"%@\", allSegments=%s, string=\"%.20s...\" }",
  564. [self class], self,
  565. regex_,
  566. (allSegments_ ? "YES" : "NO"),
  567. [utf8StrBuf_ bytes]];
  568. }
  569. @end
  570. @implementation GTMRegexStringSegment
  571. - (id)init {
  572. // make sure init is never called, the class in in the header so someone
  573. // could try to create it by mistake.
  574. // Call super init and release so we don't leak
  575. [[super init] autorelease];
  576. [self doesNotRecognizeSelector:_cmd];
  577. return nil; // COV_NF_LINE - return is just here to keep gcc happy
  578. }
  579. - (void)dealloc {
  580. free(regMatches_);
  581. [utf8StrBuf_ release];
  582. [super dealloc];
  583. }
  584. - (BOOL)isMatch {
  585. return isMatch_;
  586. }
  587. - (NSString *)string {
  588. // fetch match zero
  589. return [self subPatternString:0];
  590. }
  591. - (NSString *)subPatternString:(NSUInteger)patternIndex {
  592. if (patternIndex > numRegMatches_)
  593. return nil;
  594. // pick off when it wasn't found
  595. if ((regMatches_[patternIndex].rm_so == -1) &&
  596. (regMatches_[patternIndex].rm_eo == -1))
  597. return nil;
  598. // fetch the string
  599. const char *base = (const char*)[utf8StrBuf_ bytes]
  600. + regMatches_[patternIndex].rm_so;
  601. regoff_t len = regMatches_[patternIndex].rm_eo
  602. - regMatches_[patternIndex].rm_so;
  603. return [[[NSString alloc] initWithBytes:base
  604. length:(NSUInteger)len
  605. encoding:NSUTF8StringEncoding] autorelease];
  606. }
  607. - (NSString *)description {
  608. NSMutableString *result =
  609. [NSMutableString stringWithFormat:@"%@<%p> { isMatch=\"%s\", subPatterns=(",
  610. [self class], self, (isMatch_ ? "YES" : "NO")];
  611. for (NSUInteger x = 0; x <= numRegMatches_; ++x) {
  612. int length = (int)(regMatches_[x].rm_eo - regMatches_[x].rm_so);
  613. const char* string
  614. = (((const char*)[utf8StrBuf_ bytes]) + regMatches_[x].rm_so);
  615. if (x == 0) {
  616. [result appendFormat:@" \"%.*s\"", length , string];
  617. } else {
  618. [result appendFormat:@", \"%.*s\"", length , string];
  619. }
  620. }
  621. [result appendString:@" ) }"];
  622. return result;
  623. }
  624. @end
  625. @implementation GTMRegexStringSegment (PrivateMethods)
  626. - (id)initWithUTF8StrBuf:(NSData *)utf8StrBuf
  627. regMatches:(regmatch_t *)regMatches
  628. numRegMatches:(NSUInteger)numRegMatches
  629. isMatch:(BOOL)isMatch {
  630. self = [super init];
  631. if (!self) return nil;
  632. utf8StrBuf_ = [utf8StrBuf retain];
  633. regMatches_ = regMatches;
  634. numRegMatches_ = numRegMatches;
  635. isMatch_ = isMatch;
  636. // check the args
  637. if (!utf8StrBuf_ || !regMatches_) {
  638. // COV_NF_START
  639. // this could only happen something messed w/ our internal state.
  640. [self release];
  641. return nil;
  642. // COV_NF_END
  643. }
  644. return self;
  645. }
  646. @end
  647. @implementation NSString (GTMRegexAdditions)
  648. - (BOOL)gtm_matchesPattern:(NSString *)pattern {
  649. GTMRegex *regex = [GTMRegex regexWithPattern:pattern];
  650. return [regex matchesString:self];
  651. }
  652. - (NSArray *)gtm_subPatternsOfPattern:(NSString *)pattern {
  653. GTMRegex *regex = [GTMRegex regexWithPattern:pattern];
  654. return [regex subPatternsOfString:self];
  655. }
  656. - (NSString *)gtm_firstSubStringMatchedByPattern:(NSString *)pattern {
  657. GTMRegex *regex = [GTMRegex regexWithPattern:pattern];
  658. return [regex firstSubStringMatchedInString:self];
  659. }
  660. - (BOOL)gtm_subStringMatchesPattern:(NSString *)pattern {
  661. GTMRegex *regex = [GTMRegex regexWithPattern:pattern];
  662. return [regex matchesSubStringInString:self];
  663. }
  664. - (NSArray *)gtm_allSubstringsMatchedByPattern:(NSString *)pattern {
  665. NSEnumerator *enumerator = [self gtm_matchSegmentEnumeratorForPattern:pattern];
  666. NSArray *allSegments = [enumerator allObjects];
  667. return [allSegments valueForKey:@"string"];
  668. }
  669. - (NSEnumerator *)gtm_segmentEnumeratorForPattern:(NSString *)pattern {
  670. GTMRegex *regex = [GTMRegex regexWithPattern:pattern];
  671. return [regex segmentEnumeratorForString:self];
  672. }
  673. - (NSEnumerator *)gtm_matchSegmentEnumeratorForPattern:(NSString *)pattern {
  674. GTMRegex *regex = [GTMRegex regexWithPattern:pattern];
  675. return [regex matchSegmentEnumeratorForString:self];
  676. }
  677. - (NSString *)gtm_stringByReplacingMatchesOfPattern:(NSString *)pattern
  678. withReplacement:(NSString *)replacementPattern {
  679. GTMRegex *regex = [GTMRegex regexWithPattern:pattern];
  680. return [regex stringByReplacingMatchesInString:self
  681. withReplacement:replacementPattern];
  682. }
  683. @end