PageRenderTime 85ms CodeModel.GetById 47ms app.highlight 29ms RepoModel.GetById 2ms app.codeStats 0ms

/thirdparty/breakpad/third_party/protobuf/protobuf/src/google/protobuf/compiler/parser.h

http://github.com/tomahawk-player/tomahawk
C++ Header | 434 lines | 157 code | 66 blank | 211 comment | 0 complexity | 16be09cc3a9a807a2ef564913948d724 MD5 | raw file
  1// Protocol Buffers - Google's data interchange format
  2// Copyright 2008 Google Inc.  All rights reserved.
  3// http://code.google.com/p/protobuf/
  4//
  5// Redistribution and use in source and binary forms, with or without
  6// modification, are permitted provided that the following conditions are
  7// met:
  8//
  9//     * Redistributions of source code must retain the above copyright
 10// notice, this list of conditions and the following disclaimer.
 11//     * Redistributions in binary form must reproduce the above
 12// copyright notice, this list of conditions and the following disclaimer
 13// in the documentation and/or other materials provided with the
 14// distribution.
 15//     * Neither the name of Google Inc. nor the names of its
 16// contributors may be used to endorse or promote products derived from
 17// this software without specific prior written permission.
 18//
 19// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 20// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 21// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 22// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
 23// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 24// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 25// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 26// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 27// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 28// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 29// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 30
 31// Author: kenton@google.com (Kenton Varda)
 32//  Based on original Protocol Buffers design by
 33//  Sanjay Ghemawat, Jeff Dean, and others.
 34//
 35// Implements parsing of .proto files to FileDescriptorProtos.
 36
 37#ifndef GOOGLE_PROTOBUF_COMPILER_PARSER_H__
 38#define GOOGLE_PROTOBUF_COMPILER_PARSER_H__
 39
 40#include <map>
 41#include <string>
 42#include <utility>
 43#include <google/protobuf/stubs/common.h>
 44#include <google/protobuf/descriptor.h>
 45#include <google/protobuf/descriptor.pb.h>
 46#include <google/protobuf/repeated_field.h>
 47#include <google/protobuf/io/tokenizer.h>
 48
 49namespace google {
 50namespace protobuf { class Message; }
 51
 52namespace protobuf {
 53namespace compiler {
 54
 55// Defined in this file.
 56class Parser;
 57class SourceLocationTable;
 58
 59// Implements parsing of protocol definitions (such as .proto files).
 60//
 61// Note that most users will be more interested in the Importer class.
 62// Parser is a lower-level class which simply converts a single .proto file
 63// to a FileDescriptorProto.  It does not resolve import directives or perform
 64// many other kinds of validation needed to construct a complete
 65// FileDescriptor.
 66class LIBPROTOBUF_EXPORT Parser {
 67 public:
 68  Parser();
 69  ~Parser();
 70
 71  // Parse the entire input and construct a FileDescriptorProto representing
 72  // it.  Returns true if no errors occurred, false otherwise.
 73  bool Parse(io::Tokenizer* input, FileDescriptorProto* file);
 74
 75  // Optional fetaures:
 76
 77  // DEPRECATED:  New code should use the SourceCodeInfo embedded in the
 78  //   FileDescriptorProto.
 79  //
 80  // Requests that locations of certain definitions be recorded to the given
 81  // SourceLocationTable while parsing.  This can be used to look up exact line
 82  // and column numbers for errors reported by DescriptorPool during validation.
 83  // Set to NULL (the default) to discard source location information.
 84  void RecordSourceLocationsTo(SourceLocationTable* location_table) {
 85    source_location_table_ = location_table;
 86  }
 87
 88  // Requests that errors be recorded to the given ErrorCollector while
 89  // parsing.  Set to NULL (the default) to discard error messages.
 90  void RecordErrorsTo(io::ErrorCollector* error_collector) {
 91    error_collector_ = error_collector;
 92  }
 93
 94  // Returns the identifier used in the "syntax = " declaration, if one was
 95  // seen during the last call to Parse(), or the empty string otherwise.
 96  const string& GetSyntaxIdentifier() { return syntax_identifier_; }
 97
 98  // If set true, input files will be required to begin with a syntax
 99  // identifier.  Otherwise, files may omit this.  If a syntax identifier
100  // is provided, it must be 'syntax = "proto2";' and must appear at the
101  // top of this file regardless of whether or not it was required.
102  void SetRequireSyntaxIdentifier(bool value) {
103    require_syntax_identifier_ = value;
104  }
105
106  // Call SetStopAfterSyntaxIdentifier(true) to tell the parser to stop
107  // parsing as soon as it has seen the syntax identifier, or lack thereof.
108  // This is useful for quickly identifying the syntax of the file without
109  // parsing the whole thing.  If this is enabled, no error will be recorded
110  // if the syntax identifier is something other than "proto2" (since
111  // presumably the caller intends to deal with that), but other kinds of
112  // errors (e.g. parse errors) will still be reported.  When this is enabled,
113  // you may pass a NULL FileDescriptorProto to Parse().
114  void SetStopAfterSyntaxIdentifier(bool value) {
115    stop_after_syntax_identifier_ = value;
116  }
117
118 private:
119  // =================================================================
120  // Error recovery helpers
121
122  // Consume the rest of the current statement.  This consumes tokens
123  // until it sees one of:
124  //   ';'  Consumes the token and returns.
125  //   '{'  Consumes the brace then calls SkipRestOfBlock().
126  //   '}'  Returns without consuming.
127  //   EOF  Returns (can't consume).
128  // The Parser often calls SkipStatement() after encountering a syntax
129  // error.  This allows it to go on parsing the following lines, allowing
130  // it to report more than just one error in the file.
131  void SkipStatement();
132
133  // Consume the rest of the current block, including nested blocks,
134  // ending after the closing '}' is encountered and consumed, or at EOF.
135  void SkipRestOfBlock();
136
137  // -----------------------------------------------------------------
138  // Single-token consuming helpers
139  //
140  // These make parsing code more readable.
141
142  // True if the current token is TYPE_END.
143  inline bool AtEnd();
144
145  // True if the next token matches the given text.
146  inline bool LookingAt(const char* text);
147  // True if the next token is of the given type.
148  inline bool LookingAtType(io::Tokenizer::TokenType token_type);
149
150  // If the next token exactly matches the text given, consume it and return
151  // true.  Otherwise, return false without logging an error.
152  bool TryConsume(const char* text);
153
154  // These attempt to read some kind of token from the input.  If successful,
155  // they return true.  Otherwise they return false and add the given error
156  // to the error list.
157
158  // Consume a token with the exact text given.
159  bool Consume(const char* text, const char* error);
160  // Same as above, but automatically generates the error "Expected \"text\".",
161  // where "text" is the expected token text.
162  bool Consume(const char* text);
163  // Consume a token of type IDENTIFIER and store its text in "output".
164  bool ConsumeIdentifier(string* output, const char* error);
165  // Consume an integer and store its value in "output".
166  bool ConsumeInteger(int* output, const char* error);
167  // Consume a 64-bit integer and store its value in "output".  If the value
168  // is greater than max_value, an error will be reported.
169  bool ConsumeInteger64(uint64 max_value, uint64* output, const char* error);
170  // Consume a number and store its value in "output".  This will accept
171  // tokens of either INTEGER or FLOAT type.
172  bool ConsumeNumber(double* output, const char* error);
173  // Consume a string literal and store its (unescaped) value in "output".
174  bool ConsumeString(string* output, const char* error);
175
176  // -----------------------------------------------------------------
177  // Error logging helpers
178
179  // Invokes error_collector_->AddError(), if error_collector_ is not NULL.
180  void AddError(int line, int column, const string& error);
181
182  // Invokes error_collector_->AddError() with the line and column number
183  // of the current token.
184  void AddError(const string& error);
185
186  // Records a location in the SourceCodeInfo.location table (see
187  // descriptor.proto).  We use RAII to ensure that the start and end locations
188  // are recorded -- the constructor records the start location and the
189  // destructor records the end location.  Since the parser is
190  // recursive-descent, this works out beautifully.
191  class LIBPROTOBUF_EXPORT LocationRecorder {
192   public:
193    // Construct the file's "root" location.
194    LocationRecorder(Parser* parser);
195
196    // Construct a location that represents a declaration nested within the
197    // given parent.  E.g. a field's location is nested within the location
198    // for a message type.  The parent's path will be copied, so you should
199    // call AddPath() only to add the path components leading from the parent
200    // to the child (as opposed to leading from the root to the child).
201    LocationRecorder(const LocationRecorder& parent);
202
203    // Convenience constructors that call AddPath() one or two times.
204    LocationRecorder(const LocationRecorder& parent, int path1);
205    LocationRecorder(const LocationRecorder& parent, int path1, int path2);
206
207    ~LocationRecorder();
208
209    // Add a path component.  See SourceCodeInfo.Location.path in
210    // descriptor.proto.
211    void AddPath(int path_component);
212
213    // By default the location is considered to start at the current token at
214    // the time the LocationRecorder is created.  StartAt() sets the start
215    // location to the given token instead.
216    void StartAt(const io::Tokenizer::Token& token);
217
218    // By default the location is considered to end at the previous token at
219    // the time the LocationRecorder is destroyed.  EndAt() sets the end
220    // location to the given token instead.
221    void EndAt(const io::Tokenizer::Token& token);
222
223    // Records the start point of this location to the SourceLocationTable that
224    // was passed to RecordSourceLocationsTo(), if any.  SourceLocationTable
225    // is an older way of keeping track of source locations which is still
226    // used in some places.
227    void RecordLegacyLocation(const Message* descriptor,
228        DescriptorPool::ErrorCollector::ErrorLocation location);
229
230   private:
231    Parser* parser_;
232    SourceCodeInfo::Location* location_;
233
234    void Init(const LocationRecorder& parent);
235  };
236
237  // =================================================================
238  // Parsers for various language constructs
239
240  // Parses the "syntax = \"proto2\";" line at the top of the file.  Returns
241  // false if it failed to parse or if the syntax identifier was not
242  // recognized.
243  bool ParseSyntaxIdentifier();
244
245  // These methods parse various individual bits of code.  They return
246  // false if they completely fail to parse the construct.  In this case,
247  // it is probably necessary to skip the rest of the statement to recover.
248  // However, if these methods return true, it does NOT mean that there
249  // were no errors; only that there were no *syntax* errors.  For instance,
250  // if a service method is defined using proper syntax but uses a primitive
251  // type as its input or output, ParseMethodField() still returns true
252  // and only reports the error by calling AddError().  In practice, this
253  // makes logic much simpler for the caller.
254
255  // Parse a top-level message, enum, service, etc.
256  bool ParseTopLevelStatement(FileDescriptorProto* file,
257                              const LocationRecorder& root_location);
258
259  // Parse various language high-level language construrcts.
260  bool ParseMessageDefinition(DescriptorProto* message,
261                              const LocationRecorder& message_location);
262  bool ParseEnumDefinition(EnumDescriptorProto* enum_type,
263                           const LocationRecorder& enum_location);
264  bool ParseServiceDefinition(ServiceDescriptorProto* service,
265                              const LocationRecorder& service_location);
266  bool ParsePackage(FileDescriptorProto* file,
267                    const LocationRecorder& root_location);
268  bool ParseImport(string* import_filename,
269                   const LocationRecorder& root_location,
270                   int index);
271  bool ParseOption(Message* options,
272                   const LocationRecorder& options_location);
273
274  // These methods parse the contents of a message, enum, or service type and
275  // add them to the given object.  They consume the entire block including
276  // the beginning and ending brace.
277  bool ParseMessageBlock(DescriptorProto* message,
278                         const LocationRecorder& message_location);
279  bool ParseEnumBlock(EnumDescriptorProto* enum_type,
280                      const LocationRecorder& enum_location);
281  bool ParseServiceBlock(ServiceDescriptorProto* service,
282                         const LocationRecorder& service_location);
283
284  // Parse one statement within a message, enum, or service block, inclunding
285  // final semicolon.
286  bool ParseMessageStatement(DescriptorProto* message,
287                             const LocationRecorder& message_location);
288  bool ParseEnumStatement(EnumDescriptorProto* message,
289                          const LocationRecorder& enum_location);
290  bool ParseServiceStatement(ServiceDescriptorProto* message,
291                             const LocationRecorder& service_location);
292
293  // Parse a field of a message.  If the field is a group, its type will be
294  // added to "messages".
295  //
296  // parent_location and location_field_number_for_nested_type are needed when
297  // parsing groups -- we need to generate a nested message type within the
298  // parent and record its location accordingly.  Since the parent could be
299  // either a FileDescriptorProto or a DescriptorProto, we must pass in the
300  // correct field number to use.
301  bool ParseMessageField(FieldDescriptorProto* field,
302                         RepeatedPtrField<DescriptorProto>* messages,
303                         const LocationRecorder& parent_location,
304                         int location_field_number_for_nested_type,
305                         const LocationRecorder& field_location);
306
307  // Parse an "extensions" declaration.
308  bool ParseExtensions(DescriptorProto* message,
309                       const LocationRecorder& extensions_location);
310
311  // Parse an "extend" declaration.  (See also comments for
312  // ParseMessageField().)
313  bool ParseExtend(RepeatedPtrField<FieldDescriptorProto>* extensions,
314                   RepeatedPtrField<DescriptorProto>* messages,
315                   const LocationRecorder& parent_location,
316                   int location_field_number_for_nested_type,
317                   const LocationRecorder& extend_location);
318
319  // Parse a single enum value within an enum block.
320  bool ParseEnumConstant(EnumValueDescriptorProto* enum_value,
321                         const LocationRecorder& enum_value_location);
322
323  // Parse enum constant options, i.e. the list in square brackets at the end
324  // of the enum constant value definition.
325  bool ParseEnumConstantOptions(EnumValueDescriptorProto* value,
326                                const LocationRecorder& enum_value_location);
327
328  // Parse a single method within a service definition.
329  bool ParseServiceMethod(MethodDescriptorProto* method,
330                          const LocationRecorder& method_location);
331
332  // Parse "required", "optional", or "repeated" and fill in "label"
333  // with the value.
334  bool ParseLabel(FieldDescriptorProto::Label* label);
335
336  // Parse a type name and fill in "type" (if it is a primitive) or
337  // "type_name" (if it is not) with the type parsed.
338  bool ParseType(FieldDescriptorProto::Type* type,
339                 string* type_name);
340  // Parse a user-defined type and fill in "type_name" with the name.
341  // If a primitive type is named, it is treated as an error.
342  bool ParseUserDefinedType(string* type_name);
343
344  // Parses field options, i.e. the stuff in square brackets at the end
345  // of a field definition.  Also parses default value.
346  bool ParseFieldOptions(FieldDescriptorProto* field,
347                         const LocationRecorder& field_location);
348
349  // Parse the "default" option.  This needs special handling because its
350  // type is the field's type.
351  bool ParseDefaultAssignment(FieldDescriptorProto* field,
352                              const LocationRecorder& field_location);
353
354  // Parse a single option name/value pair, e.g. "ctype = CORD".  The name
355  // identifies a field of the given Message, and the value of that field
356  // is set to the parsed value.
357  bool ParseOptionAssignment(Message* options,
358                             const LocationRecorder& options_location);
359
360  // Parses a single part of a multipart option name. A multipart name consists
361  // of names separated by dots. Each name is either an identifier or a series
362  // of identifiers separated by dots and enclosed in parentheses. E.g.,
363  // "foo.(bar.baz).qux".
364  bool ParseOptionNamePart(UninterpretedOption* uninterpreted_option,
365                           const LocationRecorder& part_location);
366
367  // Parses a string surrounded by balanced braces.  Strips off the outer
368  // braces and stores the enclosed string in *value.
369  // E.g.,
370  //     { foo }                     *value gets 'foo'
371  //     { foo { bar: box } }        *value gets 'foo { bar: box }'
372  //     {}                          *value gets ''
373  //
374  // REQUIRES: LookingAt("{")
375  // When finished successfully, we are looking at the first token past
376  // the ending brace.
377  bool ParseUninterpretedBlock(string* value);
378
379  // =================================================================
380
381  io::Tokenizer* input_;
382  io::ErrorCollector* error_collector_;
383  SourceCodeInfo* source_code_info_;
384  SourceLocationTable* source_location_table_;  // legacy
385  bool had_errors_;
386  bool require_syntax_identifier_;
387  bool stop_after_syntax_identifier_;
388  string syntax_identifier_;
389
390  GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(Parser);
391};
392
393// A table mapping (descriptor, ErrorLocation) pairs -- as reported by
394// DescriptorPool when validating descriptors -- to line and column numbers
395// within the original source code.
396//
397// This is semi-obsolete:  FileDescriptorProto.source_code_info now contains
398// far more complete information about source locations.  However, as of this
399// writing you still need to use SourceLocationTable when integrating with
400// DescriptorPool.
401class LIBPROTOBUF_EXPORT SourceLocationTable {
402 public:
403  SourceLocationTable();
404  ~SourceLocationTable();
405
406  // Finds the precise location of the given error and fills in *line and
407  // *column with the line and column numbers.  If not found, sets *line to
408  // -1 and *column to 0 (since line = -1 is used to mean "error has no exact
409  // location" in the ErrorCollector interface).  Returns true if found, false
410  // otherwise.
411  bool Find(const Message* descriptor,
412            DescriptorPool::ErrorCollector::ErrorLocation location,
413            int* line, int* column) const;
414
415  // Adds a location to the table.
416  void Add(const Message* descriptor,
417           DescriptorPool::ErrorCollector::ErrorLocation location,
418           int line, int column);
419
420  // Clears the contents of the table.
421  void Clear();
422
423 private:
424  typedef map<
425    pair<const Message*, DescriptorPool::ErrorCollector::ErrorLocation>,
426    pair<int, int> > LocationMap;
427  LocationMap location_map_;
428};
429
430}  // namespace compiler
431}  // namespace protobuf
432
433}  // namespace google
434#endif  // GOOGLE_PROTOBUF_COMPILER_PARSER_H__