/core/externals/google-toolbox-for-mac/Foundation/GTMNSString+XML.m

http://macfuse.googlecode.com/ · Objective C · 181 lines · 103 code · 27 blank · 51 comment · 25 complexity · d0a16d2eca814a46228a342cafb843d4 MD5 · raw file

  1. //
  2. // GTMNSString+XML.m
  3. //
  4. // Copyright 2007-2008 Google Inc.
  5. //
  6. // Licensed under the Apache License, Version 2.0 (the "License"); you may not
  7. // use this file except in compliance with the License. You may obtain a copy
  8. // of the License at
  9. //
  10. // http://www.apache.org/licenses/LICENSE-2.0
  11. //
  12. // Unless required by applicable law or agreed to in writing, software
  13. // distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
  14. // WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
  15. // License for the specific language governing permissions and limitations under
  16. // the License.
  17. //
  18. #import "GTMDefines.h"
  19. #import "GTMNSString+XML.h"
  20. enum {
  21. kGTMXMLCharModeEncodeQUOT = 0,
  22. kGTMXMLCharModeEncodeAMP = 1,
  23. kGTMXMLCharModeEncodeAPOS = 2,
  24. kGTMXMLCharModeEncodeLT = 3,
  25. kGTMXMLCharModeEncodeGT = 4,
  26. kGTMXMLCharModeValid = 99,
  27. kGTMXMLCharModeInvalid = 100,
  28. };
  29. typedef NSUInteger GTMXMLCharMode;
  30. static NSString *gXMLEntityList[] = {
  31. // this must match the above order
  32. @""",
  33. @"&",
  34. @"'",
  35. @"<",
  36. @">",
  37. };
  38. GTM_INLINE GTMXMLCharMode XMLModeForUnichar(UniChar c) {
  39. // Per XML spec Section 2.2 Characters
  40. // ( http://www.w3.org/TR/REC-xml/#charsets )
  41. //
  42. // Char ::= #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] |
  43. // [#x10000-#x10FFFF]
  44. if (c <= 0xd7ff) {
  45. if (c >= 0x20) {
  46. switch (c) {
  47. case 34:
  48. return kGTMXMLCharModeEncodeQUOT;
  49. case 38:
  50. return kGTMXMLCharModeEncodeAMP;
  51. case 39:
  52. return kGTMXMLCharModeEncodeAPOS;
  53. case 60:
  54. return kGTMXMLCharModeEncodeLT;
  55. case 62:
  56. return kGTMXMLCharModeEncodeGT;
  57. default:
  58. return kGTMXMLCharModeValid;
  59. }
  60. } else {
  61. if (c == '\n')
  62. return kGTMXMLCharModeValid;
  63. if (c == '\r')
  64. return kGTMXMLCharModeValid;
  65. if (c == '\t')
  66. return kGTMXMLCharModeValid;
  67. return kGTMXMLCharModeInvalid;
  68. }
  69. }
  70. if (c < 0xE000)
  71. return kGTMXMLCharModeInvalid;
  72. if (c <= 0xFFFD)
  73. return kGTMXMLCharModeValid;
  74. // UniChar can't have the following values
  75. // if (c < 0x10000)
  76. // return kGTMXMLCharModeInvalid;
  77. // if (c <= 0x10FFFF)
  78. // return kGTMXMLCharModeValid;
  79. return kGTMXMLCharModeInvalid;
  80. } // XMLModeForUnichar
  81. static NSString *AutoreleasedCloneForXML(NSString *src, BOOL escaping) {
  82. //
  83. // NOTE:
  84. // We don't use CFXMLCreateStringByEscapingEntities because it's busted in
  85. // 10.3 (http://lists.apple.com/archives/Cocoa-dev/2004/Nov/msg00059.html) and
  86. // it doesn't do anything about the chars that are actually invalid per the
  87. // xml spec.
  88. //
  89. // we can't use the CF call here because it leaves the invalid chars
  90. // in the string.
  91. NSUInteger length = [src length];
  92. if (!length) {
  93. return src;
  94. }
  95. NSMutableString *finalString = [NSMutableString string];
  96. // this block is common between GTMNSString+HTML and GTMNSString+XML but
  97. // it's so short that it isn't really worth trying to share.
  98. const UniChar *buffer = CFStringGetCharactersPtr((CFStringRef)src);
  99. if (!buffer) {
  100. // We want this buffer to be autoreleased.
  101. NSMutableData *data = [NSMutableData dataWithLength:length * sizeof(UniChar)];
  102. if (!data) {
  103. // COV_NF_START - Memory fail case
  104. _GTMDevLog(@"couldn't alloc buffer");
  105. return nil;
  106. // COV_NF_END
  107. }
  108. [src getCharacters:[data mutableBytes]];
  109. buffer = [data bytes];
  110. }
  111. const UniChar *goodRun = buffer;
  112. NSUInteger goodRunLength = 0;
  113. for (NSUInteger i = 0; i < length; ++i) {
  114. GTMXMLCharMode cMode = XMLModeForUnichar(buffer[i]);
  115. // valid chars go as is, and if we aren't doing entities, then
  116. // everything goes as is.
  117. if ((cMode == kGTMXMLCharModeValid) ||
  118. (!escaping && (cMode != kGTMXMLCharModeInvalid))) {
  119. // goes as is
  120. goodRunLength += 1;
  121. } else {
  122. // it's something we have to encode or something invalid
  123. // start by adding what we already collected (if anything)
  124. if (goodRunLength) {
  125. CFStringAppendCharacters((CFMutableStringRef)finalString,
  126. goodRun,
  127. goodRunLength);
  128. goodRunLength = 0;
  129. }
  130. // if it wasn't invalid, add the encoded version
  131. if (cMode != kGTMXMLCharModeInvalid) {
  132. // add this encoded
  133. [finalString appendString:gXMLEntityList[cMode]];
  134. }
  135. // update goodRun to point to the next UniChar
  136. goodRun = buffer + i + 1;
  137. }
  138. }
  139. // anything left to add?
  140. if (goodRunLength) {
  141. CFStringAppendCharacters((CFMutableStringRef)finalString,
  142. goodRun,
  143. goodRunLength);
  144. }
  145. return finalString;
  146. } // AutoreleasedCloneForXML
  147. @implementation NSString (GTMNSStringXMLAdditions)
  148. - (NSString *)gtm_stringBySanitizingAndEscapingForXML {
  149. return AutoreleasedCloneForXML(self, YES);
  150. } // gtm_stringBySanitizingAndEscapingForXML
  151. - (NSString *)gtm_stringBySanitizingToXMLSpec {
  152. return AutoreleasedCloneForXML(self, NO);
  153. } // gtm_stringBySanitizingToXMLSpec
  154. @end