PageRenderTime 36ms CodeModel.GetById 20ms app.highlight 11ms RepoModel.GetById 1ms app.codeStats 0ms

/thirdparty/breakpad/third_party/protobuf/protobuf/java/src/main/java/com/google/protobuf/Internal.java

http://github.com/tomahawk-player/tomahawk
Java | 206 lines | 74 code | 12 blank | 120 comment | 27 complexity | d99098df21ab1335010f07c6ccad7803 MD5 | raw file
  1// Protocol Buffers - Google's data interchange format
  2// Copyright 2008 Google Inc.  All rights reserved.
  3// http://code.google.com/p/protobuf/
  4//
  5// Redistribution and use in source and binary forms, with or without
  6// modification, are permitted provided that the following conditions are
  7// met:
  8//
  9//     * Redistributions of source code must retain the above copyright
 10// notice, this list of conditions and the following disclaimer.
 11//     * Redistributions in binary form must reproduce the above
 12// copyright notice, this list of conditions and the following disclaimer
 13// in the documentation and/or other materials provided with the
 14// distribution.
 15//     * Neither the name of Google Inc. nor the names of its
 16// contributors may be used to endorse or promote products derived from
 17// this software without specific prior written permission.
 18//
 19// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 20// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 21// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 22// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
 23// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 24// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 25// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 26// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 27// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 28// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 29// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 30
 31package com.google.protobuf;
 32
 33import java.io.UnsupportedEncodingException;
 34
 35/**
 36 * The classes contained within are used internally by the Protocol Buffer
 37 * library and generated message implementations. They are public only because
 38 * those generated messages do not reside in the {@code protobuf} package.
 39 * Others should not use this class directly.
 40 *
 41 * @author kenton@google.com (Kenton Varda)
 42 */
 43public class Internal {
 44  /**
 45   * Helper called by generated code to construct default values for string
 46   * fields.
 47   * <p>
 48   * The protocol compiler does not actually contain a UTF-8 decoder -- it
 49   * just pushes UTF-8-encoded text around without touching it.  The one place
 50   * where this presents a problem is when generating Java string literals.
 51   * Unicode characters in the string literal would normally need to be encoded
 52   * using a Unicode escape sequence, which would require decoding them.
 53   * To get around this, protoc instead embeds the UTF-8 bytes into the
 54   * generated code and leaves it to the runtime library to decode them.
 55   * <p>
 56   * It gets worse, though.  If protoc just generated a byte array, like:
 57   *   new byte[] {0x12, 0x34, 0x56, 0x78}
 58   * Java actually generates *code* which allocates an array and then fills
 59   * in each value.  This is much less efficient than just embedding the bytes
 60   * directly into the bytecode.  To get around this, we need another
 61   * work-around.  String literals are embedded directly, so protoc actually
 62   * generates a string literal corresponding to the bytes.  The easiest way
 63   * to do this is to use the ISO-8859-1 character set, which corresponds to
 64   * the first 256 characters of the Unicode range.  Protoc can then use
 65   * good old CEscape to generate the string.
 66   * <p>
 67   * So we have a string literal which represents a set of bytes which
 68   * represents another string.  This function -- stringDefaultValue --
 69   * converts from the generated string to the string we actually want.  The
 70   * generated code calls this automatically.
 71   */
 72  public static String stringDefaultValue(String bytes) {
 73    try {
 74      return new String(bytes.getBytes("ISO-8859-1"), "UTF-8");
 75    } catch (UnsupportedEncodingException e) {
 76      // This should never happen since all JVMs are required to implement
 77      // both of the above character sets.
 78      throw new IllegalStateException(
 79          "Java VM does not support a standard character set.", e);
 80    }
 81  }
 82
 83  /**
 84   * Helper called by generated code to construct default values for bytes
 85   * fields.
 86   * <p>
 87   * This is a lot like {@link #stringDefaultValue}, but for bytes fields.
 88   * In this case we only need the second of the two hacks -- allowing us to
 89   * embed raw bytes as a string literal with ISO-8859-1 encoding.
 90   */
 91  public static ByteString bytesDefaultValue(String bytes) {
 92    try {
 93      return ByteString.copyFrom(bytes.getBytes("ISO-8859-1"));
 94    } catch (UnsupportedEncodingException e) {
 95      // This should never happen since all JVMs are required to implement
 96      // ISO-8859-1.
 97      throw new IllegalStateException(
 98          "Java VM does not support a standard character set.", e);
 99    }
100  }
101
102  /**
103   * Helper called by generated code to determine if a byte array is a valid
104   * UTF-8 encoded string such that the original bytes can be converted to
105   * a String object and then back to a byte array round tripping the bytes
106   * without loss.
107   * <p>
108   * This is inspired by UTF_8.java in sun.nio.cs.
109   *
110   * @param byteString the string to check
111   * @return whether the byte array is round trippable
112   */
113  public static boolean isValidUtf8(ByteString byteString) {
114    int index = 0;
115    int size = byteString.size();
116    // To avoid the masking, we could change this to use bytes;
117    // Then X > 0xC2 gets turned into X < -0xC2; X < 0x80
118    // gets turned into X >= 0, etc.
119
120    while (index < size) {
121      int byte1 = byteString.byteAt(index++) & 0xFF;
122      if (byte1 < 0x80) {
123        // fast loop for single bytes
124        continue;
125
126        // we know from this point on that we have 2-4 byte forms
127      } else if (byte1 < 0xC2 || byte1 > 0xF4) {
128        // catch illegal first bytes: < C2 or > F4
129        return false;
130      }
131      if (index >= size) {
132        // fail if we run out of bytes
133        return false;
134      }
135      int byte2 = byteString.byteAt(index++) & 0xFF;
136      if (byte2 < 0x80 || byte2 > 0xBF) {
137        // general trail-byte test
138        return false;
139      }
140      if (byte1 <= 0xDF) {
141        // two-byte form; general trail-byte test is sufficient
142        continue;
143      }
144
145      // we know from this point on that we have 3 or 4 byte forms
146      if (index >= size) {
147        // fail if we run out of bytes
148        return false;
149      }
150      int byte3 = byteString.byteAt(index++) & 0xFF;
151      if (byte3 < 0x80 || byte3 > 0xBF) {
152        // general trail-byte test
153        return false;
154      }
155      if (byte1 <= 0xEF) {
156        // three-byte form. Vastly more frequent than four-byte forms
157        // The following has an extra test, but not worth restructuring
158        if (byte1 == 0xE0 && byte2 < 0xA0 ||
159            byte1 == 0xED && byte2 > 0x9F) {
160          // check special cases of byte2
161          return false;
162        }
163
164      } else {
165        // four-byte form
166
167        if (index >= size) {
168          // fail if we run out of bytes
169          return false;
170        }
171        int byte4 = byteString.byteAt(index++) & 0xFF;
172        if (byte4 < 0x80 || byte4 > 0xBF) {
173          // general trail-byte test
174          return false;
175        }
176        // The following has an extra test, but not worth restructuring
177        if (byte1 == 0xF0 && byte2 < 0x90 ||
178            byte1 == 0xF4 && byte2 > 0x8F) {
179          // check special cases of byte2
180          return false;
181        }
182      }
183    }
184    return true;
185  }
186
187  /**
188   * Interface for an enum value or value descriptor, to be used in FieldSet.
189   * The lite library stores enum values directly in FieldSets but the full
190   * library stores EnumValueDescriptors in order to better support reflection.
191   */
192  public interface EnumLite {
193    int getNumber();
194  }
195
196  /**
197   * Interface for an object which maps integers to {@link EnumLite}s.
198   * {@link Descriptors.EnumDescriptor} implements this interface by mapping
199   * numbers to {@link Descriptors.EnumValueDescriptor}s.  Additionally,
200   * every generated enum type has a static method internalGetValueMap() which
201   * returns an implementation of this type that maps numbers to enum values.
202   */
203  public interface EnumLiteMap<T extends EnumLite> {
204    T findValueByNumber(int number);
205  }
206}