PageRenderTime 87ms CodeModel.GetById 11ms app.highlight 70ms RepoModel.GetById 1ms app.codeStats 0ms

/thirdparty/breakpad/third_party/protobuf/protobuf/python/google/protobuf/text_format.py

http://github.com/tomahawk-player/tomahawk
Python | 691 lines | 552 code | 43 blank | 96 comment | 54 complexity | 00f8b4081552a97cca537d8ae0516e88 MD5 | raw file
  1# Protocol Buffers - Google's data interchange format
  2# Copyright 2008 Google Inc.  All rights reserved.
  3# http://code.google.com/p/protobuf/
  4#
  5# Redistribution and use in source and binary forms, with or without
  6# modification, are permitted provided that the following conditions are
  7# met:
  8#
  9#     * Redistributions of source code must retain the above copyright
 10# notice, this list of conditions and the following disclaimer.
 11#     * Redistributions in binary form must reproduce the above
 12# copyright notice, this list of conditions and the following disclaimer
 13# in the documentation and/or other materials provided with the
 14# distribution.
 15#     * Neither the name of Google Inc. nor the names of its
 16# contributors may be used to endorse or promote products derived from
 17# this software without specific prior written permission.
 18#
 19# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 20# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 21# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 22# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
 23# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 24# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 25# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 26# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 27# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 28# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 29# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 30
 31"""Contains routines for printing protocol messages in text format."""
 32
 33__author__ = 'kenton@google.com (Kenton Varda)'
 34
 35import cStringIO
 36import re
 37
 38from collections import deque
 39from google.protobuf.internal import type_checkers
 40from google.protobuf import descriptor
 41
 42__all__ = [ 'MessageToString', 'PrintMessage', 'PrintField',
 43            'PrintFieldValue', 'Merge' ]
 44
 45
 46# Infinity and NaN are not explicitly supported by Python pre-2.6, and
 47# float('inf') does not work on Windows (pre-2.6).
 48_INFINITY = 1e10000    # overflows, thus will actually be infinity.
 49_NAN = _INFINITY * 0
 50
 51
 52class ParseError(Exception):
 53  """Thrown in case of ASCII parsing error."""
 54
 55
 56def MessageToString(message, as_utf8=False, as_one_line=False):
 57  out = cStringIO.StringIO()
 58  PrintMessage(message, out, as_utf8=as_utf8, as_one_line=as_one_line)
 59  result = out.getvalue()
 60  out.close()
 61  if as_one_line:
 62    return result.rstrip()
 63  return result
 64
 65
 66def PrintMessage(message, out, indent=0, as_utf8=False, as_one_line=False):
 67  for field, value in message.ListFields():
 68    if field.label == descriptor.FieldDescriptor.LABEL_REPEATED:
 69      for element in value:
 70        PrintField(field, element, out, indent, as_utf8, as_one_line)
 71    else:
 72      PrintField(field, value, out, indent, as_utf8, as_one_line)
 73
 74
 75def PrintField(field, value, out, indent=0, as_utf8=False, as_one_line=False):
 76  """Print a single field name/value pair.  For repeated fields, the value
 77  should be a single element."""
 78
 79  out.write(' ' * indent);
 80  if field.is_extension:
 81    out.write('[')
 82    if (field.containing_type.GetOptions().message_set_wire_format and
 83        field.type == descriptor.FieldDescriptor.TYPE_MESSAGE and
 84        field.message_type == field.extension_scope and
 85        field.label == descriptor.FieldDescriptor.LABEL_OPTIONAL):
 86      out.write(field.message_type.full_name)
 87    else:
 88      out.write(field.full_name)
 89    out.write(']')
 90  elif field.type == descriptor.FieldDescriptor.TYPE_GROUP:
 91    # For groups, use the capitalized name.
 92    out.write(field.message_type.name)
 93  else:
 94    out.write(field.name)
 95
 96  if field.cpp_type != descriptor.FieldDescriptor.CPPTYPE_MESSAGE:
 97    # The colon is optional in this case, but our cross-language golden files
 98    # don't include it.
 99    out.write(': ')
100
101  PrintFieldValue(field, value, out, indent, as_utf8, as_one_line)
102  if as_one_line:
103    out.write(' ')
104  else:
105    out.write('\n')
106
107
108def PrintFieldValue(field, value, out, indent=0,
109                    as_utf8=False, as_one_line=False):
110  """Print a single field value (not including name).  For repeated fields,
111  the value should be a single element."""
112
113  if field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_MESSAGE:
114    if as_one_line:
115      out.write(' { ')
116      PrintMessage(value, out, indent, as_utf8, as_one_line)
117      out.write('}')
118    else:
119      out.write(' {\n')
120      PrintMessage(value, out, indent + 2, as_utf8, as_one_line)
121      out.write(' ' * indent + '}')
122  elif field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_ENUM:
123    out.write(field.enum_type.values_by_number[value].name)
124  elif field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_STRING:
125    out.write('\"')
126    if type(value) is unicode:
127      out.write(_CEscape(value.encode('utf-8'), as_utf8))
128    else:
129      out.write(_CEscape(value, as_utf8))
130    out.write('\"')
131  elif field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_BOOL:
132    if value:
133      out.write("true")
134    else:
135      out.write("false")
136  else:
137    out.write(str(value))
138
139
140def Merge(text, message):
141  """Merges an ASCII representation of a protocol message into a message.
142
143  Args:
144    text: Message ASCII representation.
145    message: A protocol buffer message to merge into.
146
147  Raises:
148    ParseError: On ASCII parsing problems.
149  """
150  tokenizer = _Tokenizer(text)
151  while not tokenizer.AtEnd():
152    _MergeField(tokenizer, message)
153
154
155def _MergeField(tokenizer, message):
156  """Merges a single protocol message field into a message.
157
158  Args:
159    tokenizer: A tokenizer to parse the field name and values.
160    message: A protocol message to record the data.
161
162  Raises:
163    ParseError: In case of ASCII parsing problems.
164  """
165  message_descriptor = message.DESCRIPTOR
166  if tokenizer.TryConsume('['):
167    name = [tokenizer.ConsumeIdentifier()]
168    while tokenizer.TryConsume('.'):
169      name.append(tokenizer.ConsumeIdentifier())
170    name = '.'.join(name)
171
172    if not message_descriptor.is_extendable:
173      raise tokenizer.ParseErrorPreviousToken(
174          'Message type "%s" does not have extensions.' %
175          message_descriptor.full_name)
176    field = message.Extensions._FindExtensionByName(name)
177    if not field:
178      raise tokenizer.ParseErrorPreviousToken(
179          'Extension "%s" not registered.' % name)
180    elif message_descriptor != field.containing_type:
181      raise tokenizer.ParseErrorPreviousToken(
182          'Extension "%s" does not extend message type "%s".' % (
183              name, message_descriptor.full_name))
184    tokenizer.Consume(']')
185  else:
186    name = tokenizer.ConsumeIdentifier()
187    field = message_descriptor.fields_by_name.get(name, None)
188
189    # Group names are expected to be capitalized as they appear in the
190    # .proto file, which actually matches their type names, not their field
191    # names.
192    if not field:
193      field = message_descriptor.fields_by_name.get(name.lower(), None)
194      if field and field.type != descriptor.FieldDescriptor.TYPE_GROUP:
195        field = None
196
197    if (field and field.type == descriptor.FieldDescriptor.TYPE_GROUP and
198        field.message_type.name != name):
199      field = None
200
201    if not field:
202      raise tokenizer.ParseErrorPreviousToken(
203          'Message type "%s" has no field named "%s".' % (
204              message_descriptor.full_name, name))
205
206  if field.cpp_type == descriptor.FieldDescriptor.CPPTYPE_MESSAGE:
207    tokenizer.TryConsume(':')
208
209    if tokenizer.TryConsume('<'):
210      end_token = '>'
211    else:
212      tokenizer.Consume('{')
213      end_token = '}'
214
215    if field.label == descriptor.FieldDescriptor.LABEL_REPEATED:
216      if field.is_extension:
217        sub_message = message.Extensions[field].add()
218      else:
219        sub_message = getattr(message, field.name).add()
220    else:
221      if field.is_extension:
222        sub_message = message.Extensions[field]
223      else:
224        sub_message = getattr(message, field.name)
225      sub_message.SetInParent()
226
227    while not tokenizer.TryConsume(end_token):
228      if tokenizer.AtEnd():
229        raise tokenizer.ParseErrorPreviousToken('Expected "%s".' % (end_token))
230      _MergeField(tokenizer, sub_message)
231  else:
232    _MergeScalarField(tokenizer, message, field)
233
234
235def _MergeScalarField(tokenizer, message, field):
236  """Merges a single protocol message scalar field into a message.
237
238  Args:
239    tokenizer: A tokenizer to parse the field value.
240    message: A protocol message to record the data.
241    field: The descriptor of the field to be merged.
242
243  Raises:
244    ParseError: In case of ASCII parsing problems.
245    RuntimeError: On runtime errors.
246  """
247  tokenizer.Consume(':')
248  value = None
249
250  if field.type in (descriptor.FieldDescriptor.TYPE_INT32,
251                    descriptor.FieldDescriptor.TYPE_SINT32,
252                    descriptor.FieldDescriptor.TYPE_SFIXED32):
253    value = tokenizer.ConsumeInt32()
254  elif field.type in (descriptor.FieldDescriptor.TYPE_INT64,
255                      descriptor.FieldDescriptor.TYPE_SINT64,
256                      descriptor.FieldDescriptor.TYPE_SFIXED64):
257    value = tokenizer.ConsumeInt64()
258  elif field.type in (descriptor.FieldDescriptor.TYPE_UINT32,
259                      descriptor.FieldDescriptor.TYPE_FIXED32):
260    value = tokenizer.ConsumeUint32()
261  elif field.type in (descriptor.FieldDescriptor.TYPE_UINT64,
262                      descriptor.FieldDescriptor.TYPE_FIXED64):
263    value = tokenizer.ConsumeUint64()
264  elif field.type in (descriptor.FieldDescriptor.TYPE_FLOAT,
265                      descriptor.FieldDescriptor.TYPE_DOUBLE):
266    value = tokenizer.ConsumeFloat()
267  elif field.type == descriptor.FieldDescriptor.TYPE_BOOL:
268    value = tokenizer.ConsumeBool()
269  elif field.type == descriptor.FieldDescriptor.TYPE_STRING:
270    value = tokenizer.ConsumeString()
271  elif field.type == descriptor.FieldDescriptor.TYPE_BYTES:
272    value = tokenizer.ConsumeByteString()
273  elif field.type == descriptor.FieldDescriptor.TYPE_ENUM:
274    # Enum can be specified by a number (the enum value), or by
275    # a string literal (the enum name).
276    enum_descriptor = field.enum_type
277    if tokenizer.LookingAtInteger():
278      number = tokenizer.ConsumeInt32()
279      enum_value = enum_descriptor.values_by_number.get(number, None)
280      if enum_value is None:
281        raise tokenizer.ParseErrorPreviousToken(
282            'Enum type "%s" has no value with number %d.' % (
283                enum_descriptor.full_name, number))
284    else:
285      identifier = tokenizer.ConsumeIdentifier()
286      enum_value = enum_descriptor.values_by_name.get(identifier, None)
287      if enum_value is None:
288        raise tokenizer.ParseErrorPreviousToken(
289            'Enum type "%s" has no value named %s.' % (
290                enum_descriptor.full_name, identifier))
291    value = enum_value.number
292  else:
293    raise RuntimeError('Unknown field type %d' % field.type)
294
295  if field.label == descriptor.FieldDescriptor.LABEL_REPEATED:
296    if field.is_extension:
297      message.Extensions[field].append(value)
298    else:
299      getattr(message, field.name).append(value)
300  else:
301    if field.is_extension:
302      message.Extensions[field] = value
303    else:
304      setattr(message, field.name, value)
305
306
307class _Tokenizer(object):
308  """Protocol buffer ASCII representation tokenizer.
309
310  This class handles the lower level string parsing by splitting it into
311  meaningful tokens.
312
313  It was directly ported from the Java protocol buffer API.
314  """
315
316  _WHITESPACE = re.compile('(\\s|(#.*$))+', re.MULTILINE)
317  _TOKEN = re.compile(
318      '[a-zA-Z_][0-9a-zA-Z_+-]*|'           # an identifier
319      '[0-9+-][0-9a-zA-Z_.+-]*|'            # a number
320      '\"([^\"\n\\\\]|\\\\.)*(\"|\\\\?$)|'  # a double-quoted string
321      '\'([^\'\n\\\\]|\\\\.)*(\'|\\\\?$)')  # a single-quoted string
322  _IDENTIFIER = re.compile('\w+')
323  _INTEGER_CHECKERS = [type_checkers.Uint32ValueChecker(),
324                       type_checkers.Int32ValueChecker(),
325                       type_checkers.Uint64ValueChecker(),
326                       type_checkers.Int64ValueChecker()]
327  _FLOAT_INFINITY = re.compile('-?inf(inity)?f?', re.IGNORECASE)
328  _FLOAT_NAN = re.compile("nanf?", re.IGNORECASE)
329
330  def __init__(self, text_message):
331    self._text_message = text_message
332
333    self._position = 0
334    self._line = -1
335    self._column = 0
336    self._token_start = None
337    self.token = ''
338    self._lines = deque(text_message.split('\n'))
339    self._current_line = ''
340    self._previous_line = 0
341    self._previous_column = 0
342    self._SkipWhitespace()
343    self.NextToken()
344
345  def AtEnd(self):
346    """Checks the end of the text was reached.
347
348    Returns:
349      True iff the end was reached.
350    """
351    return self.token == ''
352
353  def _PopLine(self):
354    while len(self._current_line) <= self._column:
355      if not self._lines:
356        self._current_line = ''
357        return
358      self._line += 1
359      self._column = 0
360      self._current_line = self._lines.popleft()
361
362  def _SkipWhitespace(self):
363    while True:
364      self._PopLine()
365      match = self._WHITESPACE.match(self._current_line, self._column)
366      if not match:
367        break
368      length = len(match.group(0))
369      self._column += length
370
371  def TryConsume(self, token):
372    """Tries to consume a given piece of text.
373
374    Args:
375      token: Text to consume.
376
377    Returns:
378      True iff the text was consumed.
379    """
380    if self.token == token:
381      self.NextToken()
382      return True
383    return False
384
385  def Consume(self, token):
386    """Consumes a piece of text.
387
388    Args:
389      token: Text to consume.
390
391    Raises:
392      ParseError: If the text couldn't be consumed.
393    """
394    if not self.TryConsume(token):
395      raise self._ParseError('Expected "%s".' % token)
396
397  def LookingAtInteger(self):
398    """Checks if the current token is an integer.
399
400    Returns:
401      True iff the current token is an integer.
402    """
403    if not self.token:
404      return False
405    c = self.token[0]
406    return (c >= '0' and c <= '9') or c == '-' or c == '+'
407
408  def ConsumeIdentifier(self):
409    """Consumes protocol message field identifier.
410
411    Returns:
412      Identifier string.
413
414    Raises:
415      ParseError: If an identifier couldn't be consumed.
416    """
417    result = self.token
418    if not self._IDENTIFIER.match(result):
419      raise self._ParseError('Expected identifier.')
420    self.NextToken()
421    return result
422
423  def ConsumeInt32(self):
424    """Consumes a signed 32bit integer number.
425
426    Returns:
427      The integer parsed.
428
429    Raises:
430      ParseError: If a signed 32bit integer couldn't be consumed.
431    """
432    try:
433      result = self._ParseInteger(self.token, is_signed=True, is_long=False)
434    except ValueError, e:
435      raise self._IntegerParseError(e)
436    self.NextToken()
437    return result
438
439  def ConsumeUint32(self):
440    """Consumes an unsigned 32bit integer number.
441
442    Returns:
443      The integer parsed.
444
445    Raises:
446      ParseError: If an unsigned 32bit integer couldn't be consumed.
447    """
448    try:
449      result = self._ParseInteger(self.token, is_signed=False, is_long=False)
450    except ValueError, e:
451      raise self._IntegerParseError(e)
452    self.NextToken()
453    return result
454
455  def ConsumeInt64(self):
456    """Consumes a signed 64bit integer number.
457
458    Returns:
459      The integer parsed.
460
461    Raises:
462      ParseError: If a signed 64bit integer couldn't be consumed.
463    """
464    try:
465      result = self._ParseInteger(self.token, is_signed=True, is_long=True)
466    except ValueError, e:
467      raise self._IntegerParseError(e)
468    self.NextToken()
469    return result
470
471  def ConsumeUint64(self):
472    """Consumes an unsigned 64bit integer number.
473
474    Returns:
475      The integer parsed.
476
477    Raises:
478      ParseError: If an unsigned 64bit integer couldn't be consumed.
479    """
480    try:
481      result = self._ParseInteger(self.token, is_signed=False, is_long=True)
482    except ValueError, e:
483      raise self._IntegerParseError(e)
484    self.NextToken()
485    return result
486
487  def ConsumeFloat(self):
488    """Consumes an floating point number.
489
490    Returns:
491      The number parsed.
492
493    Raises:
494      ParseError: If a floating point number couldn't be consumed.
495    """
496    text = self.token
497    if self._FLOAT_INFINITY.match(text):
498      self.NextToken()
499      if text.startswith('-'):
500        return -_INFINITY
501      return _INFINITY
502
503    if self._FLOAT_NAN.match(text):
504      self.NextToken()
505      return _NAN
506
507    try:
508      result = float(text)
509    except ValueError, e:
510      raise self._FloatParseError(e)
511    self.NextToken()
512    return result
513
514  def ConsumeBool(self):
515    """Consumes a boolean value.
516
517    Returns:
518      The bool parsed.
519
520    Raises:
521      ParseError: If a boolean value couldn't be consumed.
522    """
523    if self.token in ('true', 't', '1'):
524      self.NextToken()
525      return True
526    elif self.token in ('false', 'f', '0'):
527      self.NextToken()
528      return False
529    else:
530      raise self._ParseError('Expected "true" or "false".')
531
532  def ConsumeString(self):
533    """Consumes a string value.
534
535    Returns:
536      The string parsed.
537
538    Raises:
539      ParseError: If a string value couldn't be consumed.
540    """
541    bytes = self.ConsumeByteString()
542    try:
543      return unicode(bytes, 'utf-8')
544    except UnicodeDecodeError, e:
545      raise self._StringParseError(e)
546
547  def ConsumeByteString(self):
548    """Consumes a byte array value.
549
550    Returns:
551      The array parsed (as a string).
552
553    Raises:
554      ParseError: If a byte array value couldn't be consumed.
555    """
556    list = [self._ConsumeSingleByteString()]
557    while len(self.token) > 0 and self.token[0] in ('\'', '"'):
558      list.append(self._ConsumeSingleByteString())
559    return "".join(list)
560
561  def _ConsumeSingleByteString(self):
562    """Consume one token of a string literal.
563
564    String literals (whether bytes or text) can come in multiple adjacent
565    tokens which are automatically concatenated, like in C or Python.  This
566    method only consumes one token.
567    """
568    text = self.token
569    if len(text) < 1 or text[0] not in ('\'', '"'):
570      raise self._ParseError('Exptected string.')
571
572    if len(text) < 2 or text[-1] != text[0]:
573      raise self._ParseError('String missing ending quote.')
574
575    try:
576      result = _CUnescape(text[1:-1])
577    except ValueError, e:
578      raise self._ParseError(str(e))
579    self.NextToken()
580    return result
581
582  def _ParseInteger(self, text, is_signed=False, is_long=False):
583    """Parses an integer.
584
585    Args:
586      text: The text to parse.
587      is_signed: True if a signed integer must be parsed.
588      is_long: True if a long integer must be parsed.
589
590    Returns:
591      The integer value.
592
593    Raises:
594      ValueError: Thrown Iff the text is not a valid integer.
595    """
596    pos = 0
597    if text.startswith('-'):
598      pos += 1
599
600    base = 10
601    if text.startswith('0x', pos) or text.startswith('0X', pos):
602      base = 16
603    elif text.startswith('0', pos):
604      base = 8
605
606    # Do the actual parsing. Exception handling is propagated to caller.
607    result = int(text, base)
608
609    # Check if the integer is sane. Exceptions handled by callers.
610    checker = self._INTEGER_CHECKERS[2 * int(is_long) + int(is_signed)]
611    checker.CheckValue(result)
612    return result
613
614  def ParseErrorPreviousToken(self, message):
615    """Creates and *returns* a ParseError for the previously read token.
616
617    Args:
618      message: A message to set for the exception.
619
620    Returns:
621      A ParseError instance.
622    """
623    return ParseError('%d:%d : %s' % (
624        self._previous_line + 1, self._previous_column + 1, message))
625
626  def _ParseError(self, message):
627    """Creates and *returns* a ParseError for the current token."""
628    return ParseError('%d:%d : %s' % (
629        self._line + 1, self._column - len(self.token) + 1, message))
630
631  def _IntegerParseError(self, e):
632    return self._ParseError('Couldn\'t parse integer: ' + str(e))
633
634  def _FloatParseError(self, e):
635    return self._ParseError('Couldn\'t parse number: ' + str(e))
636
637  def _StringParseError(self, e):
638    return self._ParseError('Couldn\'t parse string: ' + str(e))
639
640  def NextToken(self):
641    """Reads the next meaningful token."""
642    self._previous_line = self._line
643    self._previous_column = self._column
644
645    self._column += len(self.token)
646    self._SkipWhitespace()
647
648    if not self._lines and len(self._current_line) <= self._column:
649      self.token = ''
650      return
651
652    match = self._TOKEN.match(self._current_line, self._column)
653    if match:
654      token = match.group(0)
655      self.token = token
656    else:
657      self.token = self._current_line[self._column]
658
659
660# text.encode('string_escape') does not seem to satisfy our needs as it
661# encodes unprintable characters using two-digit hex escapes whereas our
662# C++ unescaping function allows hex escapes to be any length.  So,
663# "\0011".encode('string_escape') ends up being "\\x011", which will be
664# decoded in C++ as a single-character string with char code 0x11.
665def _CEscape(text, as_utf8):
666  def escape(c):
667    o = ord(c)
668    if o == 10: return r"\n"   # optional escape
669    if o == 13: return r"\r"   # optional escape
670    if o ==  9: return r"\t"   # optional escape
671    if o == 39: return r"\'"   # optional escape
672
673    if o == 34: return r'\"'   # necessary escape
674    if o == 92: return r"\\"   # necessary escape
675
676    # necessary escapes
677    if not as_utf8 and (o >= 127 or o < 32): return "\\%03o" % o
678    return c
679  return "".join([escape(c) for c in text])
680
681
682_CUNESCAPE_HEX = re.compile('\\\\x([0-9a-fA-F]{2}|[0-9a-fA-F])')
683
684
685def _CUnescape(text):
686  def ReplaceHex(m):
687    return chr(int(m.group(0)[2:], 16))
688  # This is required because the 'string_escape' encoding doesn't
689  # allow single-digit hex escapes (like '\xf').
690  result = _CUNESCAPE_HEX.sub(ReplaceHex, text)
691  return result.decode('string_escape')