/thirdparty/breakpad/third_party/protobuf/protobuf/java/src/main/java/com/google/protobuf/Internal.java
http://github.com/tomahawk-player/tomahawk · Java · 206 lines · 74 code · 12 blank · 120 comment · 27 complexity · d99098df21ab1335010f07c6ccad7803 MD5 · raw file
- // Protocol Buffers - Google's data interchange format
- // Copyright 2008 Google Inc. All rights reserved.
- // http://code.google.com/p/protobuf/
- //
- // Redistribution and use in source and binary forms, with or without
- // modification, are permitted provided that the following conditions are
- // met:
- //
- // * Redistributions of source code must retain the above copyright
- // notice, this list of conditions and the following disclaimer.
- // * Redistributions in binary form must reproduce the above
- // copyright notice, this list of conditions and the following disclaimer
- // in the documentation and/or other materials provided with the
- // distribution.
- // * Neither the name of Google Inc. nor the names of its
- // contributors may be used to endorse or promote products derived from
- // this software without specific prior written permission.
- //
- // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- package com.google.protobuf;
- import java.io.UnsupportedEncodingException;
- /**
- * The classes contained within are used internally by the Protocol Buffer
- * library and generated message implementations. They are public only because
- * those generated messages do not reside in the {@code protobuf} package.
- * Others should not use this class directly.
- *
- * @author kenton@google.com (Kenton Varda)
- */
- public class Internal {
- /**
- * Helper called by generated code to construct default values for string
- * fields.
- * <p>
- * The protocol compiler does not actually contain a UTF-8 decoder -- it
- * just pushes UTF-8-encoded text around without touching it. The one place
- * where this presents a problem is when generating Java string literals.
- * Unicode characters in the string literal would normally need to be encoded
- * using a Unicode escape sequence, which would require decoding them.
- * To get around this, protoc instead embeds the UTF-8 bytes into the
- * generated code and leaves it to the runtime library to decode them.
- * <p>
- * It gets worse, though. If protoc just generated a byte array, like:
- * new byte[] {0x12, 0x34, 0x56, 0x78}
- * Java actually generates *code* which allocates an array and then fills
- * in each value. This is much less efficient than just embedding the bytes
- * directly into the bytecode. To get around this, we need another
- * work-around. String literals are embedded directly, so protoc actually
- * generates a string literal corresponding to the bytes. The easiest way
- * to do this is to use the ISO-8859-1 character set, which corresponds to
- * the first 256 characters of the Unicode range. Protoc can then use
- * good old CEscape to generate the string.
- * <p>
- * So we have a string literal which represents a set of bytes which
- * represents another string. This function -- stringDefaultValue --
- * converts from the generated string to the string we actually want. The
- * generated code calls this automatically.
- */
- public static String stringDefaultValue(String bytes) {
- try {
- return new String(bytes.getBytes("ISO-8859-1"), "UTF-8");
- } catch (UnsupportedEncodingException e) {
- // This should never happen since all JVMs are required to implement
- // both of the above character sets.
- throw new IllegalStateException(
- "Java VM does not support a standard character set.", e);
- }
- }
- /**
- * Helper called by generated code to construct default values for bytes
- * fields.
- * <p>
- * This is a lot like {@link #stringDefaultValue}, but for bytes fields.
- * In this case we only need the second of the two hacks -- allowing us to
- * embed raw bytes as a string literal with ISO-8859-1 encoding.
- */
- public static ByteString bytesDefaultValue(String bytes) {
- try {
- return ByteString.copyFrom(bytes.getBytes("ISO-8859-1"));
- } catch (UnsupportedEncodingException e) {
- // This should never happen since all JVMs are required to implement
- // ISO-8859-1.
- throw new IllegalStateException(
- "Java VM does not support a standard character set.", e);
- }
- }
- /**
- * Helper called by generated code to determine if a byte array is a valid
- * UTF-8 encoded string such that the original bytes can be converted to
- * a String object and then back to a byte array round tripping the bytes
- * without loss.
- * <p>
- * This is inspired by UTF_8.java in sun.nio.cs.
- *
- * @param byteString the string to check
- * @return whether the byte array is round trippable
- */
- public static boolean isValidUtf8(ByteString byteString) {
- int index = 0;
- int size = byteString.size();
- // To avoid the masking, we could change this to use bytes;
- // Then X > 0xC2 gets turned into X < -0xC2; X < 0x80
- // gets turned into X >= 0, etc.
- while (index < size) {
- int byte1 = byteString.byteAt(index++) & 0xFF;
- if (byte1 < 0x80) {
- // fast loop for single bytes
- continue;
- // we know from this point on that we have 2-4 byte forms
- } else if (byte1 < 0xC2 || byte1 > 0xF4) {
- // catch illegal first bytes: < C2 or > F4
- return false;
- }
- if (index >= size) {
- // fail if we run out of bytes
- return false;
- }
- int byte2 = byteString.byteAt(index++) & 0xFF;
- if (byte2 < 0x80 || byte2 > 0xBF) {
- // general trail-byte test
- return false;
- }
- if (byte1 <= 0xDF) {
- // two-byte form; general trail-byte test is sufficient
- continue;
- }
- // we know from this point on that we have 3 or 4 byte forms
- if (index >= size) {
- // fail if we run out of bytes
- return false;
- }
- int byte3 = byteString.byteAt(index++) & 0xFF;
- if (byte3 < 0x80 || byte3 > 0xBF) {
- // general trail-byte test
- return false;
- }
- if (byte1 <= 0xEF) {
- // three-byte form. Vastly more frequent than four-byte forms
- // The following has an extra test, but not worth restructuring
- if (byte1 == 0xE0 && byte2 < 0xA0 ||
- byte1 == 0xED && byte2 > 0x9F) {
- // check special cases of byte2
- return false;
- }
- } else {
- // four-byte form
- if (index >= size) {
- // fail if we run out of bytes
- return false;
- }
- int byte4 = byteString.byteAt(index++) & 0xFF;
- if (byte4 < 0x80 || byte4 > 0xBF) {
- // general trail-byte test
- return false;
- }
- // The following has an extra test, but not worth restructuring
- if (byte1 == 0xF0 && byte2 < 0x90 ||
- byte1 == 0xF4 && byte2 > 0x8F) {
- // check special cases of byte2
- return false;
- }
- }
- }
- return true;
- }
- /**
- * Interface for an enum value or value descriptor, to be used in FieldSet.
- * The lite library stores enum values directly in FieldSets but the full
- * library stores EnumValueDescriptors in order to better support reflection.
- */
- public interface EnumLite {
- int getNumber();
- }
- /**
- * Interface for an object which maps integers to {@link EnumLite}s.
- * {@link Descriptors.EnumDescriptor} implements this interface by mapping
- * numbers to {@link Descriptors.EnumValueDescriptor}s. Additionally,
- * every generated enum type has a static method internalGetValueMap() which
- * returns an implementation of this type that maps numbers to enum values.
- */
- public interface EnumLiteMap<T extends EnumLite> {
- T findValueByNumber(int number);
- }
- }