PageRenderTime 46ms CodeModel.GetById 15ms app.highlight 28ms RepoModel.GetById 1ms app.codeStats 0ms

/core/externals/google-toolbox-for-mac/Foundation/GTMNSString+XML.m

http://macfuse.googlecode.com/
Objective C | 181 lines | 103 code | 27 blank | 51 comment | 25 complexity | d0a16d2eca814a46228a342cafb843d4 MD5 | raw file
  1//
  2//  GTMNSString+XML.m
  3//
  4//  Copyright 2007-2008 Google Inc.
  5//
  6//  Licensed under the Apache License, Version 2.0 (the "License"); you may not
  7//  use this file except in compliance with the License.  You may obtain a copy
  8//  of the License at
  9// 
 10//  http://www.apache.org/licenses/LICENSE-2.0
 11// 
 12//  Unless required by applicable law or agreed to in writing, software
 13//  distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
 14//  WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  See the
 15//  License for the specific language governing permissions and limitations under
 16//  the License.
 17//
 18
 19#import "GTMDefines.h"
 20#import "GTMNSString+XML.h"
 21
 22enum {
 23  kGTMXMLCharModeEncodeQUOT  = 0,
 24  kGTMXMLCharModeEncodeAMP   = 1,
 25  kGTMXMLCharModeEncodeAPOS  = 2,
 26  kGTMXMLCharModeEncodeLT    = 3,
 27  kGTMXMLCharModeEncodeGT    = 4,
 28  kGTMXMLCharModeValid       = 99,
 29  kGTMXMLCharModeInvalid     = 100,
 30};
 31typedef NSUInteger GTMXMLCharMode;
 32
 33static NSString *gXMLEntityList[] = {
 34  // this must match the above order
 35  @""",
 36  @"&",
 37  @"'",
 38  @"<",
 39  @">",
 40};
 41
 42GTM_INLINE GTMXMLCharMode XMLModeForUnichar(UniChar c) {
 43
 44  // Per XML spec Section 2.2 Characters
 45  //   ( http://www.w3.org/TR/REC-xml/#charsets )
 46  //
 47  //   Char    ::=       #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] |
 48  //                      [#x10000-#x10FFFF]
 49
 50  if (c <= 0xd7ff)  {
 51    if (c >= 0x20) {
 52      switch (c) {
 53        case 34:
 54          return kGTMXMLCharModeEncodeQUOT;
 55        case 38:
 56          return kGTMXMLCharModeEncodeAMP;
 57        case 39:
 58          return kGTMXMLCharModeEncodeAPOS;
 59        case 60:
 60          return kGTMXMLCharModeEncodeLT;
 61        case 62:
 62          return kGTMXMLCharModeEncodeGT;
 63        default:
 64          return kGTMXMLCharModeValid;
 65      }
 66    } else {
 67      if (c == '\n')
 68        return kGTMXMLCharModeValid;
 69      if (c == '\r')
 70        return kGTMXMLCharModeValid;
 71      if (c == '\t')
 72        return kGTMXMLCharModeValid;
 73      return kGTMXMLCharModeInvalid;
 74    }
 75  }
 76
 77  if (c < 0xE000)
 78    return kGTMXMLCharModeInvalid;
 79
 80  if (c <= 0xFFFD)
 81    return kGTMXMLCharModeValid;
 82
 83  // UniChar can't have the following values
 84  // if (c < 0x10000)
 85  //   return kGTMXMLCharModeInvalid;
 86  // if (c <= 0x10FFFF)
 87  //   return kGTMXMLCharModeValid;
 88
 89  return kGTMXMLCharModeInvalid;
 90} // XMLModeForUnichar
 91
 92
 93static NSString *AutoreleasedCloneForXML(NSString *src, BOOL escaping) {
 94  //
 95  // NOTE:
 96  // We don't use CFXMLCreateStringByEscapingEntities because it's busted in
 97  // 10.3 (http://lists.apple.com/archives/Cocoa-dev/2004/Nov/msg00059.html) and
 98  // it doesn't do anything about the chars that are actually invalid per the
 99  // xml spec.
100  //
101  
102  // we can't use the CF call here because it leaves the invalid chars
103  // in the string.
104  NSUInteger length = [src length];
105  if (!length) {
106    return src;
107  }
108  
109  NSMutableString *finalString = [NSMutableString string];
110
111  // this block is common between GTMNSString+HTML and GTMNSString+XML but
112  // it's so short that it isn't really worth trying to share.
113  const UniChar *buffer = CFStringGetCharactersPtr((CFStringRef)src);
114  if (!buffer) {
115    // We want this buffer to be autoreleased.
116    NSMutableData *data = [NSMutableData dataWithLength:length * sizeof(UniChar)];
117    if (!data) {
118      // COV_NF_START  - Memory fail case
119      _GTMDevLog(@"couldn't alloc buffer");
120      return nil;
121      // COV_NF_END
122    }
123    [src getCharacters:[data mutableBytes]];
124    buffer = [data bytes];
125  }
126  
127  const UniChar *goodRun = buffer;
128  NSUInteger goodRunLength = 0;
129  
130  for (NSUInteger i = 0; i < length; ++i) {
131    
132    GTMXMLCharMode cMode = XMLModeForUnichar(buffer[i]);
133    
134    // valid chars go as is, and if we aren't doing entities, then
135    // everything goes as is.
136    if ((cMode == kGTMXMLCharModeValid) ||
137        (!escaping && (cMode != kGTMXMLCharModeInvalid))) {
138      // goes as is
139      goodRunLength += 1;
140    } else {
141      // it's something we have to encode or something invalid
142      
143      // start by adding what we already collected (if anything)
144      if (goodRunLength) {
145        CFStringAppendCharacters((CFMutableStringRef)finalString, 
146                                 goodRun, 
147                                 goodRunLength);
148        goodRunLength = 0;
149      }
150      
151      // if it wasn't invalid, add the encoded version
152      if (cMode != kGTMXMLCharModeInvalid) {
153        // add this encoded
154        [finalString appendString:gXMLEntityList[cMode]];
155      }
156      
157      // update goodRun to point to the next UniChar
158      goodRun = buffer + i + 1;
159    }
160  }
161  
162  // anything left to add?
163  if (goodRunLength) {
164    CFStringAppendCharacters((CFMutableStringRef)finalString, 
165                             goodRun, 
166                             goodRunLength);
167  }
168  return finalString;
169} // AutoreleasedCloneForXML
170
171@implementation NSString (GTMNSStringXMLAdditions)
172
173- (NSString *)gtm_stringBySanitizingAndEscapingForXML {
174  return AutoreleasedCloneForXML(self, YES);
175} // gtm_stringBySanitizingAndEscapingForXML
176
177- (NSString *)gtm_stringBySanitizingToXMLSpec {
178  return AutoreleasedCloneForXML(self, NO);
179} // gtm_stringBySanitizingToXMLSpec
180
181@end