/thirdparty/breakpad/third_party/protobuf/protobuf/src/google/protobuf/compiler/parser.h

http://github.com/tomahawk-player/tomahawk · C++ Header · 434 lines · 157 code · 66 blank · 211 comment · 0 complexity · 16be09cc3a9a807a2ef564913948d724 MD5 · raw file

  1. // Protocol Buffers - Google's data interchange format
  2. // Copyright 2008 Google Inc. All rights reserved.
  3. // http://code.google.com/p/protobuf/
  4. //
  5. // Redistribution and use in source and binary forms, with or without
  6. // modification, are permitted provided that the following conditions are
  7. // met:
  8. //
  9. // * Redistributions of source code must retain the above copyright
  10. // notice, this list of conditions and the following disclaimer.
  11. // * Redistributions in binary form must reproduce the above
  12. // copyright notice, this list of conditions and the following disclaimer
  13. // in the documentation and/or other materials provided with the
  14. // distribution.
  15. // * Neither the name of Google Inc. nor the names of its
  16. // contributors may be used to endorse or promote products derived from
  17. // this software without specific prior written permission.
  18. //
  19. // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  20. // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  21. // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  22. // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  23. // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  24. // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  25. // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  26. // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  27. // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  28. // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  29. // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  30. // Author: kenton@google.com (Kenton Varda)
  31. // Based on original Protocol Buffers design by
  32. // Sanjay Ghemawat, Jeff Dean, and others.
  33. //
  34. // Implements parsing of .proto files to FileDescriptorProtos.
  35. #ifndef GOOGLE_PROTOBUF_COMPILER_PARSER_H__
  36. #define GOOGLE_PROTOBUF_COMPILER_PARSER_H__
  37. #include <map>
  38. #include <string>
  39. #include <utility>
  40. #include <google/protobuf/stubs/common.h>
  41. #include <google/protobuf/descriptor.h>
  42. #include <google/protobuf/descriptor.pb.h>
  43. #include <google/protobuf/repeated_field.h>
  44. #include <google/protobuf/io/tokenizer.h>
  45. namespace google {
  46. namespace protobuf { class Message; }
  47. namespace protobuf {
  48. namespace compiler {
  49. // Defined in this file.
  50. class Parser;
  51. class SourceLocationTable;
  52. // Implements parsing of protocol definitions (such as .proto files).
  53. //
  54. // Note that most users will be more interested in the Importer class.
  55. // Parser is a lower-level class which simply converts a single .proto file
  56. // to a FileDescriptorProto. It does not resolve import directives or perform
  57. // many other kinds of validation needed to construct a complete
  58. // FileDescriptor.
  59. class LIBPROTOBUF_EXPORT Parser {
  60. public:
  61. Parser();
  62. ~Parser();
  63. // Parse the entire input and construct a FileDescriptorProto representing
  64. // it. Returns true if no errors occurred, false otherwise.
  65. bool Parse(io::Tokenizer* input, FileDescriptorProto* file);
  66. // Optional fetaures:
  67. // DEPRECATED: New code should use the SourceCodeInfo embedded in the
  68. // FileDescriptorProto.
  69. //
  70. // Requests that locations of certain definitions be recorded to the given
  71. // SourceLocationTable while parsing. This can be used to look up exact line
  72. // and column numbers for errors reported by DescriptorPool during validation.
  73. // Set to NULL (the default) to discard source location information.
  74. void RecordSourceLocationsTo(SourceLocationTable* location_table) {
  75. source_location_table_ = location_table;
  76. }
  77. // Requests that errors be recorded to the given ErrorCollector while
  78. // parsing. Set to NULL (the default) to discard error messages.
  79. void RecordErrorsTo(io::ErrorCollector* error_collector) {
  80. error_collector_ = error_collector;
  81. }
  82. // Returns the identifier used in the "syntax = " declaration, if one was
  83. // seen during the last call to Parse(), or the empty string otherwise.
  84. const string& GetSyntaxIdentifier() { return syntax_identifier_; }
  85. // If set true, input files will be required to begin with a syntax
  86. // identifier. Otherwise, files may omit this. If a syntax identifier
  87. // is provided, it must be 'syntax = "proto2";' and must appear at the
  88. // top of this file regardless of whether or not it was required.
  89. void SetRequireSyntaxIdentifier(bool value) {
  90. require_syntax_identifier_ = value;
  91. }
  92. // Call SetStopAfterSyntaxIdentifier(true) to tell the parser to stop
  93. // parsing as soon as it has seen the syntax identifier, or lack thereof.
  94. // This is useful for quickly identifying the syntax of the file without
  95. // parsing the whole thing. If this is enabled, no error will be recorded
  96. // if the syntax identifier is something other than "proto2" (since
  97. // presumably the caller intends to deal with that), but other kinds of
  98. // errors (e.g. parse errors) will still be reported. When this is enabled,
  99. // you may pass a NULL FileDescriptorProto to Parse().
  100. void SetStopAfterSyntaxIdentifier(bool value) {
  101. stop_after_syntax_identifier_ = value;
  102. }
  103. private:
  104. // =================================================================
  105. // Error recovery helpers
  106. // Consume the rest of the current statement. This consumes tokens
  107. // until it sees one of:
  108. // ';' Consumes the token and returns.
  109. // '{' Consumes the brace then calls SkipRestOfBlock().
  110. // '}' Returns without consuming.
  111. // EOF Returns (can't consume).
  112. // The Parser often calls SkipStatement() after encountering a syntax
  113. // error. This allows it to go on parsing the following lines, allowing
  114. // it to report more than just one error in the file.
  115. void SkipStatement();
  116. // Consume the rest of the current block, including nested blocks,
  117. // ending after the closing '}' is encountered and consumed, or at EOF.
  118. void SkipRestOfBlock();
  119. // -----------------------------------------------------------------
  120. // Single-token consuming helpers
  121. //
  122. // These make parsing code more readable.
  123. // True if the current token is TYPE_END.
  124. inline bool AtEnd();
  125. // True if the next token matches the given text.
  126. inline bool LookingAt(const char* text);
  127. // True if the next token is of the given type.
  128. inline bool LookingAtType(io::Tokenizer::TokenType token_type);
  129. // If the next token exactly matches the text given, consume it and return
  130. // true. Otherwise, return false without logging an error.
  131. bool TryConsume(const char* text);
  132. // These attempt to read some kind of token from the input. If successful,
  133. // they return true. Otherwise they return false and add the given error
  134. // to the error list.
  135. // Consume a token with the exact text given.
  136. bool Consume(const char* text, const char* error);
  137. // Same as above, but automatically generates the error "Expected \"text\".",
  138. // where "text" is the expected token text.
  139. bool Consume(const char* text);
  140. // Consume a token of type IDENTIFIER and store its text in "output".
  141. bool ConsumeIdentifier(string* output, const char* error);
  142. // Consume an integer and store its value in "output".
  143. bool ConsumeInteger(int* output, const char* error);
  144. // Consume a 64-bit integer and store its value in "output". If the value
  145. // is greater than max_value, an error will be reported.
  146. bool ConsumeInteger64(uint64 max_value, uint64* output, const char* error);
  147. // Consume a number and store its value in "output". This will accept
  148. // tokens of either INTEGER or FLOAT type.
  149. bool ConsumeNumber(double* output, const char* error);
  150. // Consume a string literal and store its (unescaped) value in "output".
  151. bool ConsumeString(string* output, const char* error);
  152. // -----------------------------------------------------------------
  153. // Error logging helpers
  154. // Invokes error_collector_->AddError(), if error_collector_ is not NULL.
  155. void AddError(int line, int column, const string& error);
  156. // Invokes error_collector_->AddError() with the line and column number
  157. // of the current token.
  158. void AddError(const string& error);
  159. // Records a location in the SourceCodeInfo.location table (see
  160. // descriptor.proto). We use RAII to ensure that the start and end locations
  161. // are recorded -- the constructor records the start location and the
  162. // destructor records the end location. Since the parser is
  163. // recursive-descent, this works out beautifully.
  164. class LIBPROTOBUF_EXPORT LocationRecorder {
  165. public:
  166. // Construct the file's "root" location.
  167. LocationRecorder(Parser* parser);
  168. // Construct a location that represents a declaration nested within the
  169. // given parent. E.g. a field's location is nested within the location
  170. // for a message type. The parent's path will be copied, so you should
  171. // call AddPath() only to add the path components leading from the parent
  172. // to the child (as opposed to leading from the root to the child).
  173. LocationRecorder(const LocationRecorder& parent);
  174. // Convenience constructors that call AddPath() one or two times.
  175. LocationRecorder(const LocationRecorder& parent, int path1);
  176. LocationRecorder(const LocationRecorder& parent, int path1, int path2);
  177. ~LocationRecorder();
  178. // Add a path component. See SourceCodeInfo.Location.path in
  179. // descriptor.proto.
  180. void AddPath(int path_component);
  181. // By default the location is considered to start at the current token at
  182. // the time the LocationRecorder is created. StartAt() sets the start
  183. // location to the given token instead.
  184. void StartAt(const io::Tokenizer::Token& token);
  185. // By default the location is considered to end at the previous token at
  186. // the time the LocationRecorder is destroyed. EndAt() sets the end
  187. // location to the given token instead.
  188. void EndAt(const io::Tokenizer::Token& token);
  189. // Records the start point of this location to the SourceLocationTable that
  190. // was passed to RecordSourceLocationsTo(), if any. SourceLocationTable
  191. // is an older way of keeping track of source locations which is still
  192. // used in some places.
  193. void RecordLegacyLocation(const Message* descriptor,
  194. DescriptorPool::ErrorCollector::ErrorLocation location);
  195. private:
  196. Parser* parser_;
  197. SourceCodeInfo::Location* location_;
  198. void Init(const LocationRecorder& parent);
  199. };
  200. // =================================================================
  201. // Parsers for various language constructs
  202. // Parses the "syntax = \"proto2\";" line at the top of the file. Returns
  203. // false if it failed to parse or if the syntax identifier was not
  204. // recognized.
  205. bool ParseSyntaxIdentifier();
  206. // These methods parse various individual bits of code. They return
  207. // false if they completely fail to parse the construct. In this case,
  208. // it is probably necessary to skip the rest of the statement to recover.
  209. // However, if these methods return true, it does NOT mean that there
  210. // were no errors; only that there were no *syntax* errors. For instance,
  211. // if a service method is defined using proper syntax but uses a primitive
  212. // type as its input or output, ParseMethodField() still returns true
  213. // and only reports the error by calling AddError(). In practice, this
  214. // makes logic much simpler for the caller.
  215. // Parse a top-level message, enum, service, etc.
  216. bool ParseTopLevelStatement(FileDescriptorProto* file,
  217. const LocationRecorder& root_location);
  218. // Parse various language high-level language construrcts.
  219. bool ParseMessageDefinition(DescriptorProto* message,
  220. const LocationRecorder& message_location);
  221. bool ParseEnumDefinition(EnumDescriptorProto* enum_type,
  222. const LocationRecorder& enum_location);
  223. bool ParseServiceDefinition(ServiceDescriptorProto* service,
  224. const LocationRecorder& service_location);
  225. bool ParsePackage(FileDescriptorProto* file,
  226. const LocationRecorder& root_location);
  227. bool ParseImport(string* import_filename,
  228. const LocationRecorder& root_location,
  229. int index);
  230. bool ParseOption(Message* options,
  231. const LocationRecorder& options_location);
  232. // These methods parse the contents of a message, enum, or service type and
  233. // add them to the given object. They consume the entire block including
  234. // the beginning and ending brace.
  235. bool ParseMessageBlock(DescriptorProto* message,
  236. const LocationRecorder& message_location);
  237. bool ParseEnumBlock(EnumDescriptorProto* enum_type,
  238. const LocationRecorder& enum_location);
  239. bool ParseServiceBlock(ServiceDescriptorProto* service,
  240. const LocationRecorder& service_location);
  241. // Parse one statement within a message, enum, or service block, inclunding
  242. // final semicolon.
  243. bool ParseMessageStatement(DescriptorProto* message,
  244. const LocationRecorder& message_location);
  245. bool ParseEnumStatement(EnumDescriptorProto* message,
  246. const LocationRecorder& enum_location);
  247. bool ParseServiceStatement(ServiceDescriptorProto* message,
  248. const LocationRecorder& service_location);
  249. // Parse a field of a message. If the field is a group, its type will be
  250. // added to "messages".
  251. //
  252. // parent_location and location_field_number_for_nested_type are needed when
  253. // parsing groups -- we need to generate a nested message type within the
  254. // parent and record its location accordingly. Since the parent could be
  255. // either a FileDescriptorProto or a DescriptorProto, we must pass in the
  256. // correct field number to use.
  257. bool ParseMessageField(FieldDescriptorProto* field,
  258. RepeatedPtrField<DescriptorProto>* messages,
  259. const LocationRecorder& parent_location,
  260. int location_field_number_for_nested_type,
  261. const LocationRecorder& field_location);
  262. // Parse an "extensions" declaration.
  263. bool ParseExtensions(DescriptorProto* message,
  264. const LocationRecorder& extensions_location);
  265. // Parse an "extend" declaration. (See also comments for
  266. // ParseMessageField().)
  267. bool ParseExtend(RepeatedPtrField<FieldDescriptorProto>* extensions,
  268. RepeatedPtrField<DescriptorProto>* messages,
  269. const LocationRecorder& parent_location,
  270. int location_field_number_for_nested_type,
  271. const LocationRecorder& extend_location);
  272. // Parse a single enum value within an enum block.
  273. bool ParseEnumConstant(EnumValueDescriptorProto* enum_value,
  274. const LocationRecorder& enum_value_location);
  275. // Parse enum constant options, i.e. the list in square brackets at the end
  276. // of the enum constant value definition.
  277. bool ParseEnumConstantOptions(EnumValueDescriptorProto* value,
  278. const LocationRecorder& enum_value_location);
  279. // Parse a single method within a service definition.
  280. bool ParseServiceMethod(MethodDescriptorProto* method,
  281. const LocationRecorder& method_location);
  282. // Parse "required", "optional", or "repeated" and fill in "label"
  283. // with the value.
  284. bool ParseLabel(FieldDescriptorProto::Label* label);
  285. // Parse a type name and fill in "type" (if it is a primitive) or
  286. // "type_name" (if it is not) with the type parsed.
  287. bool ParseType(FieldDescriptorProto::Type* type,
  288. string* type_name);
  289. // Parse a user-defined type and fill in "type_name" with the name.
  290. // If a primitive type is named, it is treated as an error.
  291. bool ParseUserDefinedType(string* type_name);
  292. // Parses field options, i.e. the stuff in square brackets at the end
  293. // of a field definition. Also parses default value.
  294. bool ParseFieldOptions(FieldDescriptorProto* field,
  295. const LocationRecorder& field_location);
  296. // Parse the "default" option. This needs special handling because its
  297. // type is the field's type.
  298. bool ParseDefaultAssignment(FieldDescriptorProto* field,
  299. const LocationRecorder& field_location);
  300. // Parse a single option name/value pair, e.g. "ctype = CORD". The name
  301. // identifies a field of the given Message, and the value of that field
  302. // is set to the parsed value.
  303. bool ParseOptionAssignment(Message* options,
  304. const LocationRecorder& options_location);
  305. // Parses a single part of a multipart option name. A multipart name consists
  306. // of names separated by dots. Each name is either an identifier or a series
  307. // of identifiers separated by dots and enclosed in parentheses. E.g.,
  308. // "foo.(bar.baz).qux".
  309. bool ParseOptionNamePart(UninterpretedOption* uninterpreted_option,
  310. const LocationRecorder& part_location);
  311. // Parses a string surrounded by balanced braces. Strips off the outer
  312. // braces and stores the enclosed string in *value.
  313. // E.g.,
  314. // { foo } *value gets 'foo'
  315. // { foo { bar: box } } *value gets 'foo { bar: box }'
  316. // {} *value gets ''
  317. //
  318. // REQUIRES: LookingAt("{")
  319. // When finished successfully, we are looking at the first token past
  320. // the ending brace.
  321. bool ParseUninterpretedBlock(string* value);
  322. // =================================================================
  323. io::Tokenizer* input_;
  324. io::ErrorCollector* error_collector_;
  325. SourceCodeInfo* source_code_info_;
  326. SourceLocationTable* source_location_table_; // legacy
  327. bool had_errors_;
  328. bool require_syntax_identifier_;
  329. bool stop_after_syntax_identifier_;
  330. string syntax_identifier_;
  331. GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(Parser);
  332. };
  333. // A table mapping (descriptor, ErrorLocation) pairs -- as reported by
  334. // DescriptorPool when validating descriptors -- to line and column numbers
  335. // within the original source code.
  336. //
  337. // This is semi-obsolete: FileDescriptorProto.source_code_info now contains
  338. // far more complete information about source locations. However, as of this
  339. // writing you still need to use SourceLocationTable when integrating with
  340. // DescriptorPool.
  341. class LIBPROTOBUF_EXPORT SourceLocationTable {
  342. public:
  343. SourceLocationTable();
  344. ~SourceLocationTable();
  345. // Finds the precise location of the given error and fills in *line and
  346. // *column with the line and column numbers. If not found, sets *line to
  347. // -1 and *column to 0 (since line = -1 is used to mean "error has no exact
  348. // location" in the ErrorCollector interface). Returns true if found, false
  349. // otherwise.
  350. bool Find(const Message* descriptor,
  351. DescriptorPool::ErrorCollector::ErrorLocation location,
  352. int* line, int* column) const;
  353. // Adds a location to the table.
  354. void Add(const Message* descriptor,
  355. DescriptorPool::ErrorCollector::ErrorLocation location,
  356. int line, int column);
  357. // Clears the contents of the table.
  358. void Clear();
  359. private:
  360. typedef map<
  361. pair<const Message*, DescriptorPool::ErrorCollector::ErrorLocation>,
  362. pair<int, int> > LocationMap;
  363. LocationMap location_map_;
  364. };
  365. } // namespace compiler
  366. } // namespace protobuf
  367. } // namespace google
  368. #endif // GOOGLE_PROTOBUF_COMPILER_PARSER_H__