PageRenderTime 99ms CodeModel.GetById 33ms app.highlight 55ms RepoModel.GetById 1ms app.codeStats 1ms

/thirdparty/breakpad/third_party/protobuf/protobuf/java/src/main/java/com/google/protobuf/TextFormat.java

http://github.com/tomahawk-player/tomahawk
Java | 1476 lines | 976 code | 146 blank | 354 comment | 190 complexity | 64953b6cec696d93e41e2064d6384a85 MD5 | raw file

Large files files are truncated, but you can click here to view the full file

   1// Protocol Buffers - Google's data interchange format
   2// Copyright 2008 Google Inc.  All rights reserved.
   3// http://code.google.com/p/protobuf/
   4//
   5// Redistribution and use in source and binary forms, with or without
   6// modification, are permitted provided that the following conditions are
   7// met:
   8//
   9//     * Redistributions of source code must retain the above copyright
  10// notice, this list of conditions and the following disclaimer.
  11//     * Redistributions in binary form must reproduce the above
  12// copyright notice, this list of conditions and the following disclaimer
  13// in the documentation and/or other materials provided with the
  14// distribution.
  15//     * Neither the name of Google Inc. nor the names of its
  16// contributors may be used to endorse or promote products derived from
  17// this software without specific prior written permission.
  18//
  19// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  20// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  21// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  22// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  23// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  24// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  25// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  26// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  27// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  28// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  29// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  30
  31package com.google.protobuf;
  32
  33import com.google.protobuf.Descriptors.Descriptor;
  34import com.google.protobuf.Descriptors.FieldDescriptor;
  35import com.google.protobuf.Descriptors.EnumDescriptor;
  36import com.google.protobuf.Descriptors.EnumValueDescriptor;
  37
  38import java.io.IOException;
  39import java.nio.CharBuffer;
  40import java.math.BigInteger;
  41import java.util.ArrayList;
  42import java.util.List;
  43import java.util.Locale;
  44import java.util.Map;
  45import java.util.regex.Matcher;
  46import java.util.regex.Pattern;
  47
  48/**
  49 * Provide text parsing and formatting support for proto2 instances.
  50 * The implementation largely follows google/protobuf/text_format.cc.
  51 *
  52 * @author wenboz@google.com Wenbo Zhu
  53 * @author kenton@google.com Kenton Varda
  54 */
  55public final class TextFormat {
  56  private TextFormat() {}
  57
  58  private static final Printer DEFAULT_PRINTER = new Printer(false);
  59  private static final Printer SINGLE_LINE_PRINTER = new Printer(true);
  60
  61  /**
  62   * Outputs a textual representation of the Protocol Message supplied into
  63   * the parameter output. (This representation is the new version of the
  64   * classic "ProtocolPrinter" output from the original Protocol Buffer system)
  65   */
  66  public static void print(final Message message, final Appendable output)
  67                           throws IOException {
  68    DEFAULT_PRINTER.print(message, new TextGenerator(output));
  69  }
  70
  71  /** Outputs a textual representation of {@code fields} to {@code output}. */
  72  public static void print(final UnknownFieldSet fields,
  73                           final Appendable output)
  74                           throws IOException {
  75    DEFAULT_PRINTER.printUnknownFields(fields, new TextGenerator(output));
  76  }
  77
  78  /**
  79   * Generates a human readable form of this message, useful for debugging and
  80   * other purposes, with no newline characters.
  81   */
  82  public static String shortDebugString(final Message message) {
  83    try {
  84      final StringBuilder sb = new StringBuilder();
  85      SINGLE_LINE_PRINTER.print(message, new TextGenerator(sb));
  86      // Single line mode currently might have an extra space at the end.
  87      return sb.toString().trim();
  88    } catch (IOException e) {
  89      throw new IllegalStateException(e);
  90    }
  91  }
  92
  93  /**
  94   * Generates a human readable form of the unknown fields, useful for debugging
  95   * and other purposes, with no newline characters.
  96   */
  97  public static String shortDebugString(final UnknownFieldSet fields) {
  98    try {
  99      final StringBuilder sb = new StringBuilder();
 100      SINGLE_LINE_PRINTER.printUnknownFields(fields, new TextGenerator(sb));
 101      // Single line mode currently might have an extra space at the end.
 102      return sb.toString().trim();
 103    } catch (IOException e) {
 104      throw new IllegalStateException(e);
 105    }
 106  }
 107
 108  /**
 109   * Like {@code print()}, but writes directly to a {@code String} and
 110   * returns it.
 111   */
 112  public static String printToString(final Message message) {
 113    try {
 114      final StringBuilder text = new StringBuilder();
 115      print(message, text);
 116      return text.toString();
 117    } catch (IOException e) {
 118      throw new IllegalStateException(e);
 119    }
 120  }
 121
 122  /**
 123   * Like {@code print()}, but writes directly to a {@code String} and
 124   * returns it.
 125   */
 126  public static String printToString(final UnknownFieldSet fields) {
 127    try {
 128      final StringBuilder text = new StringBuilder();
 129      print(fields, text);
 130      return text.toString();
 131    } catch (IOException e) {
 132      throw new IllegalStateException(e);
 133    }
 134  }
 135
 136  public static void printField(final FieldDescriptor field,
 137                                final Object value,
 138                                final Appendable output)
 139                                throws IOException {
 140    DEFAULT_PRINTER.printField(field, value, new TextGenerator(output));
 141  }
 142
 143  public static String printFieldToString(final FieldDescriptor field,
 144                                          final Object value) {
 145    try {
 146      final StringBuilder text = new StringBuilder();
 147      printField(field, value, text);
 148      return text.toString();
 149    } catch (IOException e) {
 150      throw new IllegalStateException(e);
 151    }
 152  }
 153
 154  /**
 155   * Outputs a textual representation of the value of given field value.
 156   *
 157   * @param field the descriptor of the field
 158   * @param value the value of the field
 159   * @param output the output to which to append the formatted value
 160   * @throws ClassCastException if the value is not appropriate for the
 161   *     given field descriptor
 162   * @throws IOException if there is an exception writing to the output
 163   */
 164  public static void printFieldValue(final FieldDescriptor field,
 165                                     final Object value,
 166                                     final Appendable output)
 167                                     throws IOException {
 168    DEFAULT_PRINTER.printFieldValue(field, value, new TextGenerator(output));
 169  }
 170
 171  /**
 172   * Outputs a textual representation of the value of an unknown field.
 173   *
 174   * @param tag the field's tag number
 175   * @param value the value of the field
 176   * @param output the output to which to append the formatted value
 177   * @throws ClassCastException if the value is not appropriate for the
 178   *     given field descriptor
 179   * @throws IOException if there is an exception writing to the output
 180   */
 181  public static void printUnknownFieldValue(final int tag,
 182                                            final Object value,
 183                                            final Appendable output)
 184                                            throws IOException {
 185    printUnknownFieldValue(tag, value, new TextGenerator(output));
 186  }
 187
 188  private static void printUnknownFieldValue(final int tag,
 189                                             final Object value,
 190                                             final TextGenerator generator)
 191                                             throws IOException {
 192    switch (WireFormat.getTagWireType(tag)) {
 193      case WireFormat.WIRETYPE_VARINT:
 194        generator.print(unsignedToString((Long) value));
 195        break;
 196      case WireFormat.WIRETYPE_FIXED32:
 197        generator.print(
 198            String.format((Locale) null, "0x%08x", (Integer) value));
 199        break;
 200      case WireFormat.WIRETYPE_FIXED64:
 201        generator.print(String.format((Locale) null, "0x%016x", (Long) value));
 202        break;
 203      case WireFormat.WIRETYPE_LENGTH_DELIMITED:
 204        generator.print("\"");
 205        generator.print(escapeBytes((ByteString) value));
 206        generator.print("\"");
 207        break;
 208      case WireFormat.WIRETYPE_START_GROUP:
 209        DEFAULT_PRINTER.printUnknownFields((UnknownFieldSet) value, generator);
 210        break;
 211      default:
 212        throw new IllegalArgumentException("Bad tag: " + tag);
 213    }
 214  }
 215
 216  /** Helper class for converting protobufs to text. */
 217  private static final class Printer {
 218    /** Whether to omit newlines from the output. */
 219    final boolean singleLineMode;
 220
 221    private Printer(final boolean singleLineMode) {
 222      this.singleLineMode = singleLineMode;
 223    }
 224
 225    private void print(final Message message, final TextGenerator generator)
 226        throws IOException {
 227      for (Map.Entry<FieldDescriptor, Object> field
 228          : message.getAllFields().entrySet()) {
 229        printField(field.getKey(), field.getValue(), generator);
 230      }
 231      printUnknownFields(message.getUnknownFields(), generator);
 232    }
 233
 234    private void printField(final FieldDescriptor field, final Object value,
 235        final TextGenerator generator) throws IOException {
 236      if (field.isRepeated()) {
 237        // Repeated field.  Print each element.
 238        for (Object element : (List<?>) value) {
 239          printSingleField(field, element, generator);
 240        }
 241      } else {
 242        printSingleField(field, value, generator);
 243      }
 244    }
 245
 246    private void printSingleField(final FieldDescriptor field,
 247                                  final Object value,
 248                                  final TextGenerator generator)
 249                                  throws IOException {
 250      if (field.isExtension()) {
 251        generator.print("[");
 252        // We special-case MessageSet elements for compatibility with proto1.
 253        if (field.getContainingType().getOptions().getMessageSetWireFormat()
 254            && (field.getType() == FieldDescriptor.Type.MESSAGE)
 255            && (field.isOptional())
 256            // object equality
 257            && (field.getExtensionScope() == field.getMessageType())) {
 258          generator.print(field.getMessageType().getFullName());
 259        } else {
 260          generator.print(field.getFullName());
 261        }
 262        generator.print("]");
 263      } else {
 264        if (field.getType() == FieldDescriptor.Type.GROUP) {
 265          // Groups must be serialized with their original capitalization.
 266          generator.print(field.getMessageType().getName());
 267        } else {
 268          generator.print(field.getName());
 269        }
 270      }
 271
 272      if (field.getJavaType() == FieldDescriptor.JavaType.MESSAGE) {
 273        if (singleLineMode) {
 274          generator.print(" { ");
 275        } else {
 276          generator.print(" {\n");
 277          generator.indent();
 278        }
 279      } else {
 280        generator.print(": ");
 281      }
 282
 283      printFieldValue(field, value, generator);
 284
 285      if (field.getJavaType() == FieldDescriptor.JavaType.MESSAGE) {
 286        if (singleLineMode) {
 287          generator.print("} ");
 288        } else {
 289          generator.outdent();
 290          generator.print("}\n");
 291        }
 292      } else {
 293        if (singleLineMode) {
 294          generator.print(" ");
 295        } else {
 296          generator.print("\n");
 297        }
 298      }
 299    }
 300
 301    private void printFieldValue(final FieldDescriptor field,
 302                                 final Object value,
 303                                 final TextGenerator generator)
 304                                 throws IOException {
 305      switch (field.getType()) {
 306        case INT32:
 307        case SINT32:
 308        case SFIXED32:
 309          generator.print(((Integer) value).toString());
 310          break;
 311
 312        case INT64:
 313        case SINT64:
 314        case SFIXED64:
 315          generator.print(((Long) value).toString());
 316          break;
 317
 318        case BOOL:
 319          generator.print(((Boolean) value).toString());
 320          break;
 321
 322        case FLOAT:
 323          generator.print(((Float) value).toString());
 324          break;
 325
 326        case DOUBLE:
 327          generator.print(((Double) value).toString());
 328          break;
 329
 330        case UINT32:
 331        case FIXED32:
 332          generator.print(unsignedToString((Integer) value));
 333          break;
 334
 335        case UINT64:
 336        case FIXED64:
 337          generator.print(unsignedToString((Long) value));
 338          break;
 339
 340        case STRING:
 341          generator.print("\"");
 342          generator.print(escapeText((String) value));
 343          generator.print("\"");
 344          break;
 345
 346        case BYTES:
 347          generator.print("\"");
 348          generator.print(escapeBytes((ByteString) value));
 349          generator.print("\"");
 350          break;
 351
 352        case ENUM:
 353          generator.print(((EnumValueDescriptor) value).getName());
 354          break;
 355
 356        case MESSAGE:
 357        case GROUP:
 358          print((Message) value, generator);
 359          break;
 360      }
 361    }
 362
 363    private void printUnknownFields(final UnknownFieldSet unknownFields,
 364                                    final TextGenerator generator)
 365                                    throws IOException {
 366      for (Map.Entry<Integer, UnknownFieldSet.Field> entry :
 367               unknownFields.asMap().entrySet()) {
 368        final int number = entry.getKey();
 369        final UnknownFieldSet.Field field = entry.getValue();
 370        printUnknownField(number, WireFormat.WIRETYPE_VARINT,
 371            field.getVarintList(), generator);
 372        printUnknownField(number, WireFormat.WIRETYPE_FIXED32,
 373            field.getFixed32List(), generator);
 374        printUnknownField(number, WireFormat.WIRETYPE_FIXED64,
 375            field.getFixed64List(), generator);
 376        printUnknownField(number, WireFormat.WIRETYPE_LENGTH_DELIMITED,
 377            field.getLengthDelimitedList(), generator);
 378        for (final UnknownFieldSet value : field.getGroupList()) {
 379          generator.print(entry.getKey().toString());
 380          if (singleLineMode) {
 381            generator.print(" { ");
 382          } else {
 383            generator.print(" {\n");
 384            generator.indent();
 385          }
 386          printUnknownFields(value, generator);
 387          if (singleLineMode) {
 388            generator.print("} ");
 389          } else {
 390            generator.outdent();
 391            generator.print("}\n");
 392          }
 393        }
 394      }
 395    }
 396
 397    private void printUnknownField(final int number,
 398                                   final int wireType,
 399                                   final List<?> values,
 400                                   final TextGenerator generator)
 401                                   throws IOException {
 402      for (final Object value : values) {
 403        generator.print(String.valueOf(number));
 404        generator.print(": ");
 405        printUnknownFieldValue(wireType, value, generator);
 406        generator.print(singleLineMode ? " " : "\n");
 407      }
 408    }
 409  }
 410
 411  /** Convert an unsigned 32-bit integer to a string. */
 412  private static String unsignedToString(final int value) {
 413    if (value >= 0) {
 414      return Integer.toString(value);
 415    } else {
 416      return Long.toString(((long) value) & 0x00000000FFFFFFFFL);
 417    }
 418  }
 419
 420  /** Convert an unsigned 64-bit integer to a string. */
 421  private static String unsignedToString(final long value) {
 422    if (value >= 0) {
 423      return Long.toString(value);
 424    } else {
 425      // Pull off the most-significant bit so that BigInteger doesn't think
 426      // the number is negative, then set it again using setBit().
 427      return BigInteger.valueOf(value & 0x7FFFFFFFFFFFFFFFL)
 428                       .setBit(63).toString();
 429    }
 430  }
 431
 432  /**
 433   * An inner class for writing text to the output stream.
 434   */
 435  private static final class TextGenerator {
 436    private final Appendable output;
 437    private final StringBuilder indent = new StringBuilder();
 438    private boolean atStartOfLine = true;
 439
 440    private TextGenerator(final Appendable output) {
 441      this.output = output;
 442    }
 443
 444    /**
 445     * Indent text by two spaces.  After calling Indent(), two spaces will be
 446     * inserted at the beginning of each line of text.  Indent() may be called
 447     * multiple times to produce deeper indents.
 448     */
 449    public void indent() {
 450      indent.append("  ");
 451    }
 452
 453    /**
 454     * Reduces the current indent level by two spaces, or crashes if the indent
 455     * level is zero.
 456     */
 457    public void outdent() {
 458      final int length = indent.length();
 459      if (length == 0) {
 460        throw new IllegalArgumentException(
 461            " Outdent() without matching Indent().");
 462      }
 463      indent.delete(length - 2, length);
 464    }
 465
 466    /**
 467     * Print text to the output stream.
 468     */
 469    public void print(final CharSequence text) throws IOException {
 470      final int size = text.length();
 471      int pos = 0;
 472
 473      for (int i = 0; i < size; i++) {
 474        if (text.charAt(i) == '\n') {
 475          write(text.subSequence(pos, size), i - pos + 1);
 476          pos = i + 1;
 477          atStartOfLine = true;
 478        }
 479      }
 480      write(text.subSequence(pos, size), size - pos);
 481    }
 482
 483    private void write(final CharSequence data, final int size)
 484                       throws IOException {
 485      if (size == 0) {
 486        return;
 487      }
 488      if (atStartOfLine) {
 489        atStartOfLine = false;
 490        output.append(indent);
 491      }
 492      output.append(data);
 493    }
 494  }
 495
 496  // =================================================================
 497  // Parsing
 498
 499  /**
 500   * Represents a stream of tokens parsed from a {@code String}.
 501   *
 502   * <p>The Java standard library provides many classes that you might think
 503   * would be useful for implementing this, but aren't.  For example:
 504   *
 505   * <ul>
 506   * <li>{@code java.io.StreamTokenizer}:  This almost does what we want -- or,
 507   *   at least, something that would get us close to what we want -- except
 508   *   for one fatal flaw:  It automatically un-escapes strings using Java
 509   *   escape sequences, which do not include all the escape sequences we
 510   *   need to support (e.g. '\x').
 511   * <li>{@code java.util.Scanner}:  This seems like a great way at least to
 512   *   parse regular expressions out of a stream (so we wouldn't have to load
 513   *   the entire input into a single string before parsing).  Sadly,
 514   *   {@code Scanner} requires that tokens be delimited with some delimiter.
 515   *   Thus, although the text "foo:" should parse to two tokens ("foo" and
 516   *   ":"), {@code Scanner} would recognize it only as a single token.
 517   *   Furthermore, {@code Scanner} provides no way to inspect the contents
 518   *   of delimiters, making it impossible to keep track of line and column
 519   *   numbers.
 520   * </ul>
 521   *
 522   * <p>Luckily, Java's regular expression support does manage to be useful to
 523   * us.  (Barely:  We need {@code Matcher.usePattern()}, which is new in
 524   * Java 1.5.)  So, we can use that, at least.  Unfortunately, this implies
 525   * that we need to have the entire input in one contiguous string.
 526   */
 527  private static final class Tokenizer {
 528    private final CharSequence text;
 529    private final Matcher matcher;
 530    private String currentToken;
 531
 532    // The character index within this.text at which the current token begins.
 533    private int pos = 0;
 534
 535    // The line and column numbers of the current token.
 536    private int line = 0;
 537    private int column = 0;
 538
 539    // The line and column numbers of the previous token (allows throwing
 540    // errors *after* consuming).
 541    private int previousLine = 0;
 542    private int previousColumn = 0;
 543
 544    // We use possesive quantifiers (*+ and ++) because otherwise the Java
 545    // regex matcher has stack overflows on large inputs.
 546    private static final Pattern WHITESPACE =
 547      Pattern.compile("(\\s|(#.*$))++", Pattern.MULTILINE);
 548    private static final Pattern TOKEN = Pattern.compile(
 549      "[a-zA-Z_][0-9a-zA-Z_+-]*+|" +                // an identifier
 550      "[.]?[0-9+-][0-9a-zA-Z_.+-]*+|" +             // a number
 551      "\"([^\"\n\\\\]|\\\\.)*+(\"|\\\\?$)|" +       // a double-quoted string
 552      "\'([^\'\n\\\\]|\\\\.)*+(\'|\\\\?$)",         // a single-quoted string
 553      Pattern.MULTILINE);
 554
 555    private static final Pattern DOUBLE_INFINITY = Pattern.compile(
 556      "-?inf(inity)?",
 557      Pattern.CASE_INSENSITIVE);
 558    private static final Pattern FLOAT_INFINITY = Pattern.compile(
 559      "-?inf(inity)?f?",
 560      Pattern.CASE_INSENSITIVE);
 561    private static final Pattern FLOAT_NAN = Pattern.compile(
 562      "nanf?",
 563      Pattern.CASE_INSENSITIVE);
 564
 565    /** Construct a tokenizer that parses tokens from the given text. */
 566    private Tokenizer(final CharSequence text) {
 567      this.text = text;
 568      this.matcher = WHITESPACE.matcher(text);
 569      skipWhitespace();
 570      nextToken();
 571    }
 572
 573    /** Are we at the end of the input? */
 574    public boolean atEnd() {
 575      return currentToken.length() == 0;
 576    }
 577
 578    /** Advance to the next token. */
 579    public void nextToken() {
 580      previousLine = line;
 581      previousColumn = column;
 582
 583      // Advance the line counter to the current position.
 584      while (pos < matcher.regionStart()) {
 585        if (text.charAt(pos) == '\n') {
 586          ++line;
 587          column = 0;
 588        } else {
 589          ++column;
 590        }
 591        ++pos;
 592      }
 593
 594      // Match the next token.
 595      if (matcher.regionStart() == matcher.regionEnd()) {
 596        // EOF
 597        currentToken = "";
 598      } else {
 599        matcher.usePattern(TOKEN);
 600        if (matcher.lookingAt()) {
 601          currentToken = matcher.group();
 602          matcher.region(matcher.end(), matcher.regionEnd());
 603        } else {
 604          // Take one character.
 605          currentToken = String.valueOf(text.charAt(pos));
 606          matcher.region(pos + 1, matcher.regionEnd());
 607        }
 608
 609        skipWhitespace();
 610      }
 611    }
 612
 613    /**
 614     * Skip over any whitespace so that the matcher region starts at the next
 615     * token.
 616     */
 617    private void skipWhitespace() {
 618      matcher.usePattern(WHITESPACE);
 619      if (matcher.lookingAt()) {
 620        matcher.region(matcher.end(), matcher.regionEnd());
 621      }
 622    }
 623
 624    /**
 625     * If the next token exactly matches {@code token}, consume it and return
 626     * {@code true}.  Otherwise, return {@code false} without doing anything.
 627     */
 628    public boolean tryConsume(final String token) {
 629      if (currentToken.equals(token)) {
 630        nextToken();
 631        return true;
 632      } else {
 633        return false;
 634      }
 635    }
 636
 637    /**
 638     * If the next token exactly matches {@code token}, consume it.  Otherwise,
 639     * throw a {@link ParseException}.
 640     */
 641    public void consume(final String token) throws ParseException {
 642      if (!tryConsume(token)) {
 643        throw parseException("Expected \"" + token + "\".");
 644      }
 645    }
 646
 647    /**
 648     * Returns {@code true} if the next token is an integer, but does
 649     * not consume it.
 650     */
 651    public boolean lookingAtInteger() {
 652      if (currentToken.length() == 0) {
 653        return false;
 654      }
 655
 656      final char c = currentToken.charAt(0);
 657      return ('0' <= c && c <= '9') ||
 658             c == '-' || c == '+';
 659    }
 660
 661    /**
 662     * If the next token is an identifier, consume it and return its value.
 663     * Otherwise, throw a {@link ParseException}.
 664     */
 665    public String consumeIdentifier() throws ParseException {
 666      for (int i = 0; i < currentToken.length(); i++) {
 667        final char c = currentToken.charAt(i);
 668        if (('a' <= c && c <= 'z') ||
 669            ('A' <= c && c <= 'Z') ||
 670            ('0' <= c && c <= '9') ||
 671            (c == '_') || (c == '.')) {
 672          // OK
 673        } else {
 674          throw parseException("Expected identifier.");
 675        }
 676      }
 677
 678      final String result = currentToken;
 679      nextToken();
 680      return result;
 681    }
 682
 683    /**
 684     * If the next token is a 32-bit signed integer, consume it and return its
 685     * value.  Otherwise, throw a {@link ParseException}.
 686     */
 687    public int consumeInt32() throws ParseException {
 688      try {
 689        final int result = parseInt32(currentToken);
 690        nextToken();
 691        return result;
 692      } catch (NumberFormatException e) {
 693        throw integerParseException(e);
 694      }
 695    }
 696
 697    /**
 698     * If the next token is a 32-bit unsigned integer, consume it and return its
 699     * value.  Otherwise, throw a {@link ParseException}.
 700     */
 701    public int consumeUInt32() throws ParseException {
 702      try {
 703        final int result = parseUInt32(currentToken);
 704        nextToken();
 705        return result;
 706      } catch (NumberFormatException e) {
 707        throw integerParseException(e);
 708      }
 709    }
 710
 711    /**
 712     * If the next token is a 64-bit signed integer, consume it and return its
 713     * value.  Otherwise, throw a {@link ParseException}.
 714     */
 715    public long consumeInt64() throws ParseException {
 716      try {
 717        final long result = parseInt64(currentToken);
 718        nextToken();
 719        return result;
 720      } catch (NumberFormatException e) {
 721        throw integerParseException(e);
 722      }
 723    }
 724
 725    /**
 726     * If the next token is a 64-bit unsigned integer, consume it and return its
 727     * value.  Otherwise, throw a {@link ParseException}.
 728     */
 729    public long consumeUInt64() throws ParseException {
 730      try {
 731        final long result = parseUInt64(currentToken);
 732        nextToken();
 733        return result;
 734      } catch (NumberFormatException e) {
 735        throw integerParseException(e);
 736      }
 737    }
 738
 739    /**
 740     * If the next token is a double, consume it and return its value.
 741     * Otherwise, throw a {@link ParseException}.
 742     */
 743    public double consumeDouble() throws ParseException {
 744      // We need to parse infinity and nan separately because
 745      // Double.parseDouble() does not accept "inf", "infinity", or "nan".
 746      if (DOUBLE_INFINITY.matcher(currentToken).matches()) {
 747        final boolean negative = currentToken.startsWith("-");
 748        nextToken();
 749        return negative ? Double.NEGATIVE_INFINITY : Double.POSITIVE_INFINITY;
 750      }
 751      if (currentToken.equalsIgnoreCase("nan")) {
 752        nextToken();
 753        return Double.NaN;
 754      }
 755      try {
 756        final double result = Double.parseDouble(currentToken);
 757        nextToken();
 758        return result;
 759      } catch (NumberFormatException e) {
 760        throw floatParseException(e);
 761      }
 762    }
 763
 764    /**
 765     * If the next token is a float, consume it and return its value.
 766     * Otherwise, throw a {@link ParseException}.
 767     */
 768    public float consumeFloat() throws ParseException {
 769      // We need to parse infinity and nan separately because
 770      // Float.parseFloat() does not accept "inf", "infinity", or "nan".
 771      if (FLOAT_INFINITY.matcher(currentToken).matches()) {
 772        final boolean negative = currentToken.startsWith("-");
 773        nextToken();
 774        return negative ? Float.NEGATIVE_INFINITY : Float.POSITIVE_INFINITY;
 775      }
 776      if (FLOAT_NAN.matcher(currentToken).matches()) {
 777        nextToken();
 778        return Float.NaN;
 779      }
 780      try {
 781        final float result = Float.parseFloat(currentToken);
 782        nextToken();
 783        return result;
 784      } catch (NumberFormatException e) {
 785        throw floatParseException(e);
 786      }
 787    }
 788
 789    /**
 790     * If the next token is a boolean, consume it and return its value.
 791     * Otherwise, throw a {@link ParseException}.
 792     */
 793    public boolean consumeBoolean() throws ParseException {
 794      if (currentToken.equals("true") ||
 795          currentToken.equals("t") ||
 796          currentToken.equals("1")) {
 797        nextToken();
 798        return true;
 799      } else if (currentToken.equals("false") ||
 800                 currentToken.equals("f") ||
 801                 currentToken.equals("0")) {
 802        nextToken();
 803        return false;
 804      } else {
 805        throw parseException("Expected \"true\" or \"false\".");
 806      }
 807    }
 808
 809    /**
 810     * If the next token is a string, consume it and return its (unescaped)
 811     * value.  Otherwise, throw a {@link ParseException}.
 812     */
 813    public String consumeString() throws ParseException {
 814      return consumeByteString().toStringUtf8();
 815    }
 816
 817    /**
 818     * If the next token is a string, consume it, unescape it as a
 819     * {@link ByteString}, and return it.  Otherwise, throw a
 820     * {@link ParseException}.
 821     */
 822    public ByteString consumeByteString() throws ParseException {
 823      List<ByteString> list = new ArrayList<ByteString>();
 824      consumeByteString(list);
 825      while (currentToken.startsWith("'") || currentToken.startsWith("\"")) {
 826        consumeByteString(list);
 827      }
 828      return ByteString.copyFrom(list);
 829    }
 830
 831    /**
 832     * Like {@link #consumeByteString()} but adds each token of the string to
 833     * the given list.  String literals (whether bytes or text) may come in
 834     * multiple adjacent tokens which are automatically concatenated, like in
 835     * C or Python.
 836     */
 837    private void consumeByteString(List<ByteString> list) throws ParseException {
 838      final char quote = currentToken.length() > 0 ? currentToken.charAt(0)
 839                                                   : '\0';
 840      if (quote != '\"' && quote != '\'') {
 841        throw parseException("Expected string.");
 842      }
 843
 844      if (currentToken.length() < 2 ||
 845          currentToken.charAt(currentToken.length() - 1) != quote) {
 846        throw parseException("String missing ending quote.");
 847      }
 848
 849      try {
 850        final String escaped =
 851            currentToken.substring(1, currentToken.length() - 1);
 852        final ByteString result = unescapeBytes(escaped);
 853        nextToken();
 854        list.add(result);
 855      } catch (InvalidEscapeSequenceException e) {
 856        throw parseException(e.getMessage());
 857      }
 858    }
 859
 860    /**
 861     * Returns a {@link ParseException} with the current line and column
 862     * numbers in the description, suitable for throwing.
 863     */
 864    public ParseException parseException(final String description) {
 865      // Note:  People generally prefer one-based line and column numbers.
 866      return new ParseException(
 867        (line + 1) + ":" + (column + 1) + ": " + description);
 868    }
 869
 870    /**
 871     * Returns a {@link ParseException} with the line and column numbers of
 872     * the previous token in the description, suitable for throwing.
 873     */
 874    public ParseException parseExceptionPreviousToken(
 875        final String description) {
 876      // Note:  People generally prefer one-based line and column numbers.
 877      return new ParseException(
 878        (previousLine + 1) + ":" + (previousColumn + 1) + ": " + description);
 879    }
 880
 881    /**
 882     * Constructs an appropriate {@link ParseException} for the given
 883     * {@code NumberFormatException} when trying to parse an integer.
 884     */
 885    private ParseException integerParseException(
 886        final NumberFormatException e) {
 887      return parseException("Couldn't parse integer: " + e.getMessage());
 888    }
 889
 890    /**
 891     * Constructs an appropriate {@link ParseException} for the given
 892     * {@code NumberFormatException} when trying to parse a float or double.
 893     */
 894    private ParseException floatParseException(final NumberFormatException e) {
 895      return parseException("Couldn't parse number: " + e.getMessage());
 896    }
 897  }
 898
 899  /** Thrown when parsing an invalid text format message. */
 900  public static class ParseException extends IOException {
 901    private static final long serialVersionUID = 3196188060225107702L;
 902
 903    public ParseException(final String message) {
 904      super(message);
 905    }
 906  }
 907
 908  /**
 909   * Parse a text-format message from {@code input} and merge the contents
 910   * into {@code builder}.
 911   */
 912  public static void merge(final Readable input,
 913                           final Message.Builder builder)
 914                           throws IOException {
 915    merge(input, ExtensionRegistry.getEmptyRegistry(), builder);
 916  }
 917
 918  /**
 919   * Parse a text-format message from {@code input} and merge the contents
 920   * into {@code builder}.
 921   */
 922  public static void merge(final CharSequence input,
 923                           final Message.Builder builder)
 924                           throws ParseException {
 925    merge(input, ExtensionRegistry.getEmptyRegistry(), builder);
 926  }
 927
 928  /**
 929   * Parse a text-format message from {@code input} and merge the contents
 930   * into {@code builder}.  Extensions will be recognized if they are
 931   * registered in {@code extensionRegistry}.
 932   */
 933  public static void merge(final Readable input,
 934                           final ExtensionRegistry extensionRegistry,
 935                           final Message.Builder builder)
 936                           throws IOException {
 937    // Read the entire input to a String then parse that.
 938
 939    // If StreamTokenizer were not quite so crippled, or if there were a kind
 940    // of Reader that could read in chunks that match some particular regex,
 941    // or if we wanted to write a custom Reader to tokenize our stream, then
 942    // we would not have to read to one big String.  Alas, none of these is
 943    // the case.  Oh well.
 944
 945    merge(toStringBuilder(input), extensionRegistry, builder);
 946  }
 947
 948  private static final int BUFFER_SIZE = 4096;
 949
 950  // TODO(chrisn): See if working around java.io.Reader#read(CharBuffer)
 951  // overhead is worthwhile
 952  private static StringBuilder toStringBuilder(final Readable input)
 953      throws IOException {
 954    final StringBuilder text = new StringBuilder();
 955    final CharBuffer buffer = CharBuffer.allocate(BUFFER_SIZE);
 956    while (true) {
 957      final int n = input.read(buffer);
 958      if (n == -1) {
 959        break;
 960      }
 961      buffer.flip();
 962      text.append(buffer, 0, n);
 963    }
 964    return text;
 965  }
 966
 967  /**
 968   * Parse a text-format message from {@code input} and merge the contents
 969   * into {@code builder}.  Extensions will be recognized if they are
 970   * registered in {@code extensionRegistry}.
 971   */
 972  public static void merge(final CharSequence input,
 973                           final ExtensionRegistry extensionRegistry,
 974                           final Message.Builder builder)
 975                           throws ParseException {
 976    final Tokenizer tokenizer = new Tokenizer(input);
 977
 978    while (!tokenizer.atEnd()) {
 979      mergeField(tokenizer, extensionRegistry, builder);
 980    }
 981  }
 982
 983  /**
 984   * Parse a single field from {@code tokenizer} and merge it into
 985   * {@code builder}.
 986   */
 987  private static void mergeField(final Tokenizer tokenizer,
 988                                 final ExtensionRegistry extensionRegistry,
 989                                 final Message.Builder builder)
 990                                 throws ParseException {
 991    FieldDescriptor field;
 992    final Descriptor type = builder.getDescriptorForType();
 993    ExtensionRegistry.ExtensionInfo extension = null;
 994
 995    if (tokenizer.tryConsume("[")) {
 996      // An extension.
 997      final StringBuilder name =
 998          new StringBuilder(tokenizer.consumeIdentifier());
 999      while (tokenizer.tryConsume(".")) {
1000        name.append('.');
1001        name.append(tokenizer.consumeIdentifier());
1002      }
1003
1004      extension = extensionRegistry.findExtensionByName(name.toString());
1005
1006      if (extension == null) {
1007        throw tokenizer.parseExceptionPreviousToken(
1008          "Extension \"" + name + "\" not found in the ExtensionRegistry.");
1009      } else if (extension.descriptor.getContainingType() != type) {
1010        throw tokenizer.parseExceptionPreviousToken(
1011          "Extension \"" + name + "\" does not extend message type \"" +
1012          type.getFullName() + "\".");
1013      }
1014
1015      tokenizer.consume("]");
1016
1017      field = extension.descriptor;
1018    } else {
1019      final String name = tokenizer.consumeIdentifier();
1020      field = type.findFieldByName(name);
1021
1022      // Group names are expected to be capitalized as they appear in the
1023      // .proto file, which actually matches their type names, not their field
1024      // names.
1025      if (field == null) {
1026        // Explicitly specify US locale so that this code does not break when
1027        // executing in Turkey.
1028        final String lowerName = name.toLowerCase(Locale.US);
1029        field = type.findFieldByName(lowerName);
1030        // If the case-insensitive match worked but the field is NOT a group,
1031        if (field != null && field.getType() != FieldDescriptor.Type.GROUP) {
1032          field = null;
1033        }
1034      }
1035      // Again, special-case group names as described above.
1036      if (field != null && field.getType() == FieldDescriptor.Type.GROUP &&
1037          !field.getMessageType().getName().equals(name)) {
1038        field = null;
1039      }
1040
1041      if (field == null) {
1042        throw tokenizer.parseExceptionPreviousToken(
1043          "Message type \"" + type.getFullName() +
1044          "\" has no field named \"" + name + "\".");
1045      }
1046    }
1047
1048    Object value = null;
1049
1050    if (field.getJavaType() == FieldDescriptor.JavaType.MESSAGE) {
1051      tokenizer.tryConsume(":");  // optional
1052
1053      final String endToken;
1054      if (tokenizer.tryConsume("<")) {
1055        endToken = ">";
1056      } else {
1057        tokenizer.consume("{");
1058        endToken = "}";
1059      }
1060
1061      final Message.Builder subBuilder;
1062      if (extension == null) {
1063        subBuilder = builder.newBuilderForField(field);
1064      } else {
1065        subBuilder = extension.defaultInstance.newBuilderForType();
1066      }
1067
1068      while (!tokenizer.tryConsume(endToken)) {
1069        if (tokenizer.atEnd()) {
1070          throw tokenizer.parseException(
1071            "Expected \"" + endToken + "\".");
1072        }
1073        mergeField(tokenizer, extensionRegistry, subBuilder);
1074      }
1075
1076      value = subBuilder.build();
1077
1078    } else {
1079      tokenizer.consume(":");
1080
1081      switch (field.getType()) {
1082        case INT32:
1083        case SINT32:
1084        case SFIXED32:
1085          value = tokenizer.consumeInt32();
1086          break;
1087
1088        case INT64:
1089        case SINT64:
1090        case SFIXED64:
1091          value = tokenizer.consumeInt64();
1092          break;
1093
1094        case UINT32:
1095        case FIXED32:
1096          value = tokenizer.consumeUInt32();
1097          break;
1098
1099        case UINT64:
1100        case FIXED64:
1101          value = tokenizer.consumeUInt64();
1102          break;
1103
1104        case FLOAT:
1105          value = tokenizer.consumeFloat();
1106          break;
1107
1108        case DOUBLE:
1109          value = tokenizer.consumeDouble();
1110          break;
1111
1112        case BOOL:
1113          value = tokenizer.consumeBoolean();
1114          break;
1115
1116        case STRING:
1117          value = tokenizer.consumeString();
1118          break;
1119
1120        case BYTES:
1121          value = tokenizer.consumeByteString();
1122          break;
1123
1124        case ENUM:
1125          final EnumDescriptor enumType = field.getEnumType();
1126
1127          if (tokenizer.lookingAtInteger()) {
1128            final int number = tokenizer.consumeInt32();
1129            value = enumType.findValueByNumber(number);
1130            if (value == null) {
1131              throw tokenizer.parseExceptionPreviousToken(
1132                "Enum type \"" + enumType.getFullName() +
1133                "\" has no value with number " + number + '.');
1134            }
1135          } else {
1136            final String id = tokenizer.consumeIdentifier();
1137            value = enumType.findValueByName(id);
1138            if (value == null) {
1139              throw tokenizer.parseExceptionPreviousToken(
1140                "Enum type \"" + enumType.getFullName() +
1141                "\" has no value named \"" + id + "\".");
1142            }
1143          }
1144
1145          break;
1146
1147        case MESSAGE:
1148        case GROUP:
1149          throw new RuntimeException("Can't get here.");
1150      }
1151    }
1152
1153    if (field.isRepeated()) {
1154      builder.addRepeatedField(field, value);
1155    } else {
1156      builder.setField(field, value);
1157    }
1158  }
1159
1160  // =================================================================
1161  // Utility functions
1162  //
1163  // Some of these methods are package-private because Descriptors.java uses
1164  // them.
1165
1166  /**
1167   * Escapes bytes in the format used in protocol buffer text format, which
1168   * is the same as the format used for C string literals.  All bytes
1169   * that are not printable 7-bit ASCII characters are escaped, as well as
1170   * backslash, single-quote, and double-quote characters.  Characters for
1171   * which no defined short-hand escape sequence is defined will be escaped
1172   * using 3-digit octal sequences.
1173   */
1174  static String escapeBytes(final ByteString input) {
1175    final StringBuilder builder = new StringBuilder(input.size());
1176    for (int i = 0; i < input.size(); i++) {
1177      final byte b = input.byteAt(i);
1178      switch (b) {
1179        // Java does not recognize \a or \v, apparently.
1180        case 0x07: builder.append("\\a" ); break;
1181        case '\b': builder.append("\\b" ); break;
1182        case '\f': builder.append("\\f" ); break;
1183        case '\n': builder.append("\\n" ); break;
1184        case '\r': builder.append("\\r" ); break;
1185        case '\t': builder.append("\\t" ); break;
1186        case 0x0b: builder.append("\\v" ); break;
1187        case '\\': builder.append("\\\\"); break;
1188        case '\'': builder.append("\\\'"); break;
1189        case '"' : builder.append("\\\""); break;
1190        default:
1191          // Note:  Bytes with the high-order bit set should be escaped.  Since
1192          //   bytes are signed, such bytes will compare less than 0x20, hence
1193          //   the following line is correct.
1194          if (b >= 0x20) {
1195            builder.append((char) b);
1196          } else {
1197            builder.append('\\');
1198            builder.append((char) ('0' + ((b >>> 6) & 3)));
1199            builder.append((char) ('0' + ((b >>> 3) & 7)));
1200            builder.append((char) ('0' + (b & 7)));
1201          }
1202          break;
1203      }
1204    }
1205    return builder.toString();
1206  }
1207
1208  /**
1209   * Un-escape a byte sequence as escaped using
1210   * {@link #escapeBytes(ByteString)}.  Two-digit hex escapes (starting with
1211   * "\x") are also recognized.
1212   */
1213  static ByteString unescapeBytes(final CharSequence charString)
1214      throws InvalidEscapeSequenceException {
1215    // First convert the Java characater sequence to UTF-8 bytes.
1216    ByteString input = ByteString.copyFromUtf8(charString.toString());
1217    // Then unescape certain byte sequences introduced by ASCII '\\'.  The valid
1218    // escapes can all be expressed with ASCII characters, so it is safe to
1219    // operate on bytes here.
1220    //
1221    // Unescaping the input byte array will result in a byte sequence that's no
1222    // longer than the input.  That's because each escape sequence is between
1223    // two and four bytes long and stands for a single byte.
1224    final byte[] result = new byte[input.size()];
1225    int pos = 0;
1226    for (int i = 0; i < input.size(); i++) {
1227      byte c = input.byteAt(i);
1228      if (c == '\\') {
1229        if (i + 1 < input.size()) {
1230          ++i;
1231          c = input.byteAt(i);
1232          if (isOctal(c)) {
1233            // Octal escape.
1234            int code = digitValue(c);
1235            if (i + 1 < input.size() && isOctal(input.byteAt(i + 1))) {
1236              ++i;
1237              code = code * 8 + digitValue(input.byteAt(i));
1238            }
1239            if (i + 1 < input.size() && isOctal(input.byteAt(i + 1))) {
1240              ++i;
1241              code = code * 8 + digitValue(input.byteAt(i));
1242            }
1243            // TODO: Check that 0 <= code && code <= 0xFF.
1244            result[pos++] = (byte)code;
1245          } else {
1246            switch (c) {
1247              case 'a' : result[pos++] = 0x07; break;
1248              case 'b' : result[pos++] = '\b'; break;
1249              case 'f' : result[pos++] = '\f'; break;
1250              case 'n' : result[pos++] = '\n'; break;
1251              case 'r' : result[pos++] = '\r'; break;
1252              case 't' : result[pos++] = '\t'; break;
1253              case 'v' : result[pos++] = 0x0b; break;
1254              case '\\': result[pos++] = '\\'; break;
1255              case '\'': result[pos++] = '\''; break;
1256              case '"' : result[pos++] = '\"'; break;
1257
1258              case 'x':
1259                // hex escape
1260                int code = 0;
1261                if (i + 1 < input.size() && isHex(input.byteAt(i + 1))) {
1262                  ++i;
1263                  code = digitValue(input.byteAt(i));
1264                } else {
1265                  throw new InvalidEscapeSequenceException(
1266                      "Invalid escape sequence: '\\x' with no digits");
1267                }
1268                if (i + 1 < input.size() && isHex(input.byteAt(i + 1))) {
1269                  ++i;
1270                  code = code * 16 + digitValue(input.byteAt(i));
1271                }
1272                result[pos++] = (byte)code;
1273                break;
1274
1275              default:
1276                throw new InvalidEscapeSequenceException(
1277                    "Invalid escape sequence: '\\" + (char)c + '\'');
1278            }
1279          }
1280        } else {
1281          throw new InvalidEscapeSequenceException(
1282              "Invalid escape sequence: '\\' at end of string.");
1283        }
1284      } else {
1285        result[pos++] = c;
1286      }
1287    }
1288
1289    return ByteString.copyFrom(result, 0, pos);
1290  }
1291
1292  /**
1293   * Thrown by {@link TextFormat#unescapeBytes} and
1294   * {@link TextFormat#unescapeText} when an invalid escape sequence is seen.
1295   */
1296  static class InvalidEscapeSequenceException extends IOException {
1297    private static final long serialVersionUID = -8164033650142593304L;
1298
1299    InvalidEscapeSequenceException(final String description) {
1300      super(description);
1301    }
1302  }
1303
1304  /**
1305   * Like {@link #escapeBytes(ByteString)}, but escapes a text string.
1306   * Non-ASCII characters are first encoded as UTF-8, then each byte is escaped
1307   * individually as a 3-digit octal escape.  Yes, it's weird.
1308   */
1309  static String escapeText(final String input) {
1310    return escapeBytes(ByteString.copyFromUtf8(input));
1311  }
1312
1313  /**
1314   * Un-escape a text string as escaped using {@link #escapeText(String)}.
1315   * Two-digit hex escapes (starting with "\x") are also recognized.
1316   */
1317  static String unescapeText(final String input)
1318                             throws InvalidEscapeSequenceException {
1319    return unescapeBytes(input).toStringUtf8();
1320  }
1321
1322  /** Is this an octal digit? */
1323  private static boolean isOctal(final byte c) {
1324    return '0' <= c && c <= '7';
1325  }
1326
1327  /** Is this a hex digit? */
1328  private static boolean isHex(final byte c) {
1329    return ('0' <= c && c <= '9') ||
1330           ('a' <= c && c <= 'f') ||
1331           ('A' <= c && c <= 'F');
1332  }
1333
1334  /**
1335   * Interpret a character as a digit (in any base up to 36) and return the
1336   * numeric value.  This is like {@code Character.digit()} but we don't accept
1337   * non-ASCII digits.
1338   */
1339  private static int digitValue(final byte c) {
1340    if ('0' <= c && c <= '9') {
1341      return c - '0';
1342    } else if ('a' <= c && c <= 'z') {
1343      return c - 'a' + 10;
1344    } else {
1345      return c - 'A' + 10;
1346    }
1347  }
1348
1349  /**
1350   * Parse a 32-bit signed integer from the text.  Unlike the Java standard
1351   * {@code Integer.parseInt()}, this function recognizes the prefixes "0x"
1352   * and "0" to signify hexidecimal and octal numbers, respectively.
1353   */
1354  static int parseInt32(final String text) throws NumberFormatException {
1355    return (int) parseInteger(text, true, false);
1356  }
1357
1358  /**
1359   * Parse a 32-bit unsigned integer from the text.  Unlike the Java standard
1360   * {@code Integer.parseInt()}, this function recognizes the prefixes "0x"
1361   * and "0" to signify hexidecimal and octal numbers, respectively.  The
1362   * result is coerced to a (signed) {@code int} when returned since Java has
1363   * no unsigned integer type.
1364   */
1365  static int parseUInt32(final String text) throws NumberFormatException {
1366    return (int) parseInteger(text, false, false);
1367  }
1368
1369  /**
1370   * Parse a 64-bit signed integer from the text.  Unlike the Java standard
1371   * {@code Integer.parseInt()}, this function recognizes the prefixes "0x"
1372   * and "0" to signify hexidecimal and octal numbers, respectively.
1373   */
1374  static long parseInt64(final String text) throws NumberFormatException {
1375    return parseInteger(text, true, true);
1376  }
1377
1378  /**
1379   * Parse a 64-bit unsigned integer from the text.  Unlike the Java standard
1380   * {@code Integer.parseInt()}, this function recognizes the prefixes "0x"
1381   * and "0" to signify hexidecimal and octal numbers, respectively.  The
1382   * result is coerced to a (signed) {@code long} when returned since Java has
1383   * no unsigned long type.
1384   */
1385  static long parseUInt64(final String text) throws NumberFormatException {
1386    return parseInteger(text, false, true);
1387  }
1388
1389  private static long parseInteger(final String text,
1390                                   final boolean isSigned,
1391                                   final boolean isLong)
1392                                   throws NumberFormatException {
1393    int pos = 0;
1394
1395    boolean negative = false;
1396    if (text.startsWith("-", pos)) {
1397      if (!isSigned) {
1398        throw new NumberFormatException("Number must be positive: " + text);
1399      }
1400      ++pos;
1401      negative = true;
1402    }
1403
1404    int radix = 10;
1405    if (text.startsWith("0x", pos)) {
1406      pos += 2;
1407      radix = 16;
1408    } else if (text.startsWith("0", pos)) {
1409      radix = 8;
1410    }
1411
1412    final String numberText = text.substring(pos);
1413
1414    long result = 0;
1415    if (numberText.length() < 16) {
1416      // Can safely assume no overflow.
1417      result = Long.parseLong(numberText, radix);
1418      if (negative) {
1419        result = -result;
1420      }
1421
1422      // Check bounds.
1423      // No need to check for 64-bit numbers since they'd have to be 16 chars
1424      // or longer to overflow.
1425      if (!isLong) {
1426        if (isSigned) {
1427          if (result > Integer.MAX_VALUE || result < Integer.MIN_VALUE) {
1428            throw new NumberFormatException(
1429              "Number out of range for 32-bit signed integer: " + text);
1430          }
1431        } else {
1432          if (result >= (1L << 32) || result < 0) {
1433            throw new NumberFormatException(
1434              "Number out of range for 32-bit unsigned integer: " + text);
1435          }
1436        }
1437      }
1438    } else {
1439      BigInteger bigValue = new BigInteger(numberText, radix);
1440      if (negative) {
1441        bigValue = bigValue.negate();
1442      }
1443
1444      // Check bounds.
1445      if (!isLong) {
1446        if (isSigned) {
1447          if (bigValue.bitLength() > 31) {
1448            throw new NumberFormatException(
1449              "Number out of range for 32-bit signed integer: " + text);
1450          }
1451   

Large files files are truncated, but you can click here to view the full file