/thirdparty/breakpad/third_party/protobuf/protobuf/java/src/main/java/com/google/protobuf/Internal.java

http://github.com/tomahawk-player/tomahawk · Java · 206 lines · 74 code · 12 blank · 120 comment · 27 complexity · d99098df21ab1335010f07c6ccad7803 MD5 · raw file

  1. // Protocol Buffers - Google's data interchange format
  2. // Copyright 2008 Google Inc. All rights reserved.
  3. // http://code.google.com/p/protobuf/
  4. //
  5. // Redistribution and use in source and binary forms, with or without
  6. // modification, are permitted provided that the following conditions are
  7. // met:
  8. //
  9. // * Redistributions of source code must retain the above copyright
  10. // notice, this list of conditions and the following disclaimer.
  11. // * Redistributions in binary form must reproduce the above
  12. // copyright notice, this list of conditions and the following disclaimer
  13. // in the documentation and/or other materials provided with the
  14. // distribution.
  15. // * Neither the name of Google Inc. nor the names of its
  16. // contributors may be used to endorse or promote products derived from
  17. // this software without specific prior written permission.
  18. //
  19. // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  20. // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  21. // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  22. // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  23. // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  24. // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  25. // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  26. // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  27. // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  28. // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  29. // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  30. package com.google.protobuf;
  31. import java.io.UnsupportedEncodingException;
  32. /**
  33. * The classes contained within are used internally by the Protocol Buffer
  34. * library and generated message implementations. They are public only because
  35. * those generated messages do not reside in the {@code protobuf} package.
  36. * Others should not use this class directly.
  37. *
  38. * @author kenton@google.com (Kenton Varda)
  39. */
  40. public class Internal {
  41. /**
  42. * Helper called by generated code to construct default values for string
  43. * fields.
  44. * <p>
  45. * The protocol compiler does not actually contain a UTF-8 decoder -- it
  46. * just pushes UTF-8-encoded text around without touching it. The one place
  47. * where this presents a problem is when generating Java string literals.
  48. * Unicode characters in the string literal would normally need to be encoded
  49. * using a Unicode escape sequence, which would require decoding them.
  50. * To get around this, protoc instead embeds the UTF-8 bytes into the
  51. * generated code and leaves it to the runtime library to decode them.
  52. * <p>
  53. * It gets worse, though. If protoc just generated a byte array, like:
  54. * new byte[] {0x12, 0x34, 0x56, 0x78}
  55. * Java actually generates *code* which allocates an array and then fills
  56. * in each value. This is much less efficient than just embedding the bytes
  57. * directly into the bytecode. To get around this, we need another
  58. * work-around. String literals are embedded directly, so protoc actually
  59. * generates a string literal corresponding to the bytes. The easiest way
  60. * to do this is to use the ISO-8859-1 character set, which corresponds to
  61. * the first 256 characters of the Unicode range. Protoc can then use
  62. * good old CEscape to generate the string.
  63. * <p>
  64. * So we have a string literal which represents a set of bytes which
  65. * represents another string. This function -- stringDefaultValue --
  66. * converts from the generated string to the string we actually want. The
  67. * generated code calls this automatically.
  68. */
  69. public static String stringDefaultValue(String bytes) {
  70. try {
  71. return new String(bytes.getBytes("ISO-8859-1"), "UTF-8");
  72. } catch (UnsupportedEncodingException e) {
  73. // This should never happen since all JVMs are required to implement
  74. // both of the above character sets.
  75. throw new IllegalStateException(
  76. "Java VM does not support a standard character set.", e);
  77. }
  78. }
  79. /**
  80. * Helper called by generated code to construct default values for bytes
  81. * fields.
  82. * <p>
  83. * This is a lot like {@link #stringDefaultValue}, but for bytes fields.
  84. * In this case we only need the second of the two hacks -- allowing us to
  85. * embed raw bytes as a string literal with ISO-8859-1 encoding.
  86. */
  87. public static ByteString bytesDefaultValue(String bytes) {
  88. try {
  89. return ByteString.copyFrom(bytes.getBytes("ISO-8859-1"));
  90. } catch (UnsupportedEncodingException e) {
  91. // This should never happen since all JVMs are required to implement
  92. // ISO-8859-1.
  93. throw new IllegalStateException(
  94. "Java VM does not support a standard character set.", e);
  95. }
  96. }
  97. /**
  98. * Helper called by generated code to determine if a byte array is a valid
  99. * UTF-8 encoded string such that the original bytes can be converted to
  100. * a String object and then back to a byte array round tripping the bytes
  101. * without loss.
  102. * <p>
  103. * This is inspired by UTF_8.java in sun.nio.cs.
  104. *
  105. * @param byteString the string to check
  106. * @return whether the byte array is round trippable
  107. */
  108. public static boolean isValidUtf8(ByteString byteString) {
  109. int index = 0;
  110. int size = byteString.size();
  111. // To avoid the masking, we could change this to use bytes;
  112. // Then X > 0xC2 gets turned into X < -0xC2; X < 0x80
  113. // gets turned into X >= 0, etc.
  114. while (index < size) {
  115. int byte1 = byteString.byteAt(index++) & 0xFF;
  116. if (byte1 < 0x80) {
  117. // fast loop for single bytes
  118. continue;
  119. // we know from this point on that we have 2-4 byte forms
  120. } else if (byte1 < 0xC2 || byte1 > 0xF4) {
  121. // catch illegal first bytes: < C2 or > F4
  122. return false;
  123. }
  124. if (index >= size) {
  125. // fail if we run out of bytes
  126. return false;
  127. }
  128. int byte2 = byteString.byteAt(index++) & 0xFF;
  129. if (byte2 < 0x80 || byte2 > 0xBF) {
  130. // general trail-byte test
  131. return false;
  132. }
  133. if (byte1 <= 0xDF) {
  134. // two-byte form; general trail-byte test is sufficient
  135. continue;
  136. }
  137. // we know from this point on that we have 3 or 4 byte forms
  138. if (index >= size) {
  139. // fail if we run out of bytes
  140. return false;
  141. }
  142. int byte3 = byteString.byteAt(index++) & 0xFF;
  143. if (byte3 < 0x80 || byte3 > 0xBF) {
  144. // general trail-byte test
  145. return false;
  146. }
  147. if (byte1 <= 0xEF) {
  148. // three-byte form. Vastly more frequent than four-byte forms
  149. // The following has an extra test, but not worth restructuring
  150. if (byte1 == 0xE0 && byte2 < 0xA0 ||
  151. byte1 == 0xED && byte2 > 0x9F) {
  152. // check special cases of byte2
  153. return false;
  154. }
  155. } else {
  156. // four-byte form
  157. if (index >= size) {
  158. // fail if we run out of bytes
  159. return false;
  160. }
  161. int byte4 = byteString.byteAt(index++) & 0xFF;
  162. if (byte4 < 0x80 || byte4 > 0xBF) {
  163. // general trail-byte test
  164. return false;
  165. }
  166. // The following has an extra test, but not worth restructuring
  167. if (byte1 == 0xF0 && byte2 < 0x90 ||
  168. byte1 == 0xF4 && byte2 > 0x8F) {
  169. // check special cases of byte2
  170. return false;
  171. }
  172. }
  173. }
  174. return true;
  175. }
  176. /**
  177. * Interface for an enum value or value descriptor, to be used in FieldSet.
  178. * The lite library stores enum values directly in FieldSets but the full
  179. * library stores EnumValueDescriptors in order to better support reflection.
  180. */
  181. public interface EnumLite {
  182. int getNumber();
  183. }
  184. /**
  185. * Interface for an object which maps integers to {@link EnumLite}s.
  186. * {@link Descriptors.EnumDescriptor} implements this interface by mapping
  187. * numbers to {@link Descriptors.EnumValueDescriptor}s. Additionally,
  188. * every generated enum type has a static method internalGetValueMap() which
  189. * returns an implementation of this type that maps numbers to enum values.
  190. */
  191. public interface EnumLiteMap<T extends EnumLite> {
  192. T findValueByNumber(int number);
  193. }
  194. }