PageRenderTime 54ms CodeModel.GetById 28ms RepoModel.GetById 1ms app.codeStats 0ms

/Util/protobuf/include/google/protobuf/compiler/parser.h

https://github.com/alon/bhuman2009fork
C Header | 325 lines | 111 code | 56 blank | 158 comment | 0 complexity | 13bdebea92b5699daa83146bb9c7c248 MD5 | raw file
Possible License(s): GPL-2.0
  1. // Protocol Buffers - Google's data interchange format
  2. // Copyright 2008 Google Inc. All rights reserved.
  3. // http://code.google.com/p/protobuf/
  4. //
  5. // Redistribution and use in source and binary forms, with or without
  6. // modification, are permitted provided that the following conditions are
  7. // met:
  8. //
  9. // * Redistributions of source code must retain the above copyright
  10. // notice, this list of conditions and the following disclaimer.
  11. // * Redistributions in binary form must reproduce the above
  12. // copyright notice, this list of conditions and the following disclaimer
  13. // in the documentation and/or other materials provided with the
  14. // distribution.
  15. // * Neither the name of Google Inc. nor the names of its
  16. // contributors may be used to endorse or promote products derived from
  17. // this software without specific prior written permission.
  18. //
  19. // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  20. // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  21. // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  22. // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  23. // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  24. // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  25. // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  26. // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  27. // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  28. // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  29. // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  30. // Author: kenton@google.com (Kenton Varda)
  31. // Based on original Protocol Buffers design by
  32. // Sanjay Ghemawat, Jeff Dean, and others.
  33. //
  34. // Implements parsing of .proto files to FileDescriptorProtos.
  35. #ifndef GOOGLE_PROTOBUF_COMPILER_PARSER_H__
  36. #define GOOGLE_PROTOBUF_COMPILER_PARSER_H__
  37. #include <map>
  38. #include <string>
  39. #include <utility>
  40. #include <google/protobuf/stubs/common.h>
  41. #include <google/protobuf/descriptor.h>
  42. #include <google/protobuf/descriptor.pb.h>
  43. #include <google/protobuf/repeated_field.h>
  44. #include <google/protobuf/io/tokenizer.h>
  45. namespace google {
  46. namespace protobuf { class Message; }
  47. namespace protobuf {
  48. namespace compiler {
  49. // Defined in this file.
  50. class Parser;
  51. class SourceLocationTable;
  52. // Implements parsing of protocol definitions (such as .proto files).
  53. //
  54. // Note that most users will be more interested in the Importer class.
  55. // Parser is a lower-level class which simply converts a single .proto file
  56. // to a FileDescriptorProto. It does not resolve import directives or perform
  57. // many other kinds of validation needed to construct a complete
  58. // FileDescriptor.
  59. class LIBPROTOBUF_EXPORT Parser {
  60. public:
  61. Parser();
  62. ~Parser();
  63. // Parse the entire input and construct a FileDescriptorProto representing
  64. // it. Returns true if no errors occurred, false otherwise.
  65. bool Parse(io::Tokenizer* input, FileDescriptorProto* file);
  66. // Optional fetaures:
  67. // Requests that locations of certain definitions be recorded to the given
  68. // SourceLocationTable while parsing. This can be used to look up exact line
  69. // and column numbers for errors reported by DescriptorPool during validation.
  70. // Set to NULL (the default) to discard source location information.
  71. void RecordSourceLocationsTo(SourceLocationTable* location_table) {
  72. source_location_table_ = location_table;
  73. }
  74. // Requsets that errors be recorded to the given ErrorCollector while
  75. // parsing. Set to NULL (the default) to discard error messages.
  76. void RecordErrorsTo(io::ErrorCollector* error_collector) {
  77. error_collector_ = error_collector;
  78. }
  79. // Returns the identifier used in the "syntax = " declaration, if one was
  80. // seen during the last call to Parse(), or the empty string otherwise.
  81. const string& GetSyntaxIndentifier() { return syntax_identifier_; }
  82. // If set true, input files will be required to begin with a syntax
  83. // identifier. Otherwise, files may omit this. If a syntax identifier
  84. // is provided, it must be 'syntax = "proto2";' and must appear at the
  85. // top of this file regardless of whether or not it was required.
  86. void SetRequireSyntaxIdentifier(bool value) {
  87. require_syntax_identifier_ = value;
  88. }
  89. private:
  90. // =================================================================
  91. // Error recovery helpers
  92. // Consume the rest of the current statement. This consumes tokens
  93. // until it sees one of:
  94. // ';' Consumes the token and returns.
  95. // '{' Consumes the brace then calls SkipRestOfBlock().
  96. // '}' Returns without consuming.
  97. // EOF Returns (can't consume).
  98. // The Parser often calls SkipStatement() after encountering a syntax
  99. // error. This allows it to go on parsing the following lines, allowing
  100. // it to report more than just one error in the file.
  101. void SkipStatement();
  102. // Consume the rest of the current block, including nested blocks,
  103. // ending after the closing '}' is encountered and consumed, or at EOF.
  104. void SkipRestOfBlock();
  105. // -----------------------------------------------------------------
  106. // Single-token consuming helpers
  107. //
  108. // These make parsing code more readable.
  109. // True if the current token is TYPE_END.
  110. inline bool AtEnd();
  111. // True if the next token matches the given text.
  112. inline bool LookingAt(const char* text);
  113. // True if the next token is of the given type.
  114. inline bool LookingAtType(io::Tokenizer::TokenType token_type);
  115. // If the next token exactly matches the text given, consume it and return
  116. // true. Otherwise, return false without logging an error.
  117. bool TryConsume(const char* text);
  118. // These attempt to read some kind of token from the input. If successful,
  119. // they return true. Otherwise they return false and add the given error
  120. // to the error list.
  121. // Consume a token with the exact text given.
  122. bool Consume(const char* text, const char* error);
  123. // Same as above, but automatically generates the error "Expected \"text\".",
  124. // where "text" is the expected token text.
  125. bool Consume(const char* text);
  126. // Consume a token of type IDENTIFIER and store its text in "output".
  127. bool ConsumeIdentifier(string* output, const char* error);
  128. // Consume an integer and store its value in "output".
  129. bool ConsumeInteger(int* output, const char* error);
  130. // Consume a 64-bit integer and store its value in "output". If the value
  131. // is greater than max_value, an error will be reported.
  132. bool ConsumeInteger64(uint64 max_value, uint64* output, const char* error);
  133. // Consume a number and store its value in "output". This will accept
  134. // tokens of either INTEGER or FLOAT type.
  135. bool ConsumeNumber(double* output, const char* error);
  136. // Consume a string literal and store its (unescaped) value in "output".
  137. bool ConsumeString(string* output, const char* error);
  138. // -----------------------------------------------------------------
  139. // Error logging helpers
  140. // Invokes error_collector_->AddError(), if error_collector_ is not NULL.
  141. void AddError(int line, int column, const string& error);
  142. // Invokes error_collector_->AddError() with the line and column number
  143. // of the current token.
  144. void AddError(const string& error);
  145. // Record the given line and column and associate it with this descriptor
  146. // in the SourceLocationTable.
  147. void RecordLocation(const Message* descriptor,
  148. DescriptorPool::ErrorCollector::ErrorLocation location,
  149. int line, int column);
  150. // Record the current line and column and associate it with this descriptor
  151. // in the SourceLocationTable.
  152. void RecordLocation(const Message* descriptor,
  153. DescriptorPool::ErrorCollector::ErrorLocation location);
  154. // =================================================================
  155. // Parsers for various language constructs
  156. // Parses the "syntax = \"proto2\";" line at the top of the file. Returns
  157. // false if it failed to parse or if the syntax identifier was not
  158. // recognized.
  159. bool ParseSyntaxIdentifier();
  160. // These methods parse various individual bits of code. They return
  161. // false if they completely fail to parse the construct. In this case,
  162. // it is probably necessary to skip the rest of the statement to recover.
  163. // However, if these methods return true, it does NOT mean that there
  164. // were no errors; only that there were no *syntax* errors. For instance,
  165. // if a service method is defined using proper syntax but uses a primitive
  166. // type as its input or output, ParseMethodField() still returns true
  167. // and only reports the error by calling AddError(). In practice, this
  168. // makes logic much simpler for the caller.
  169. // Parse a top-level message, enum, service, etc.
  170. bool ParseTopLevelStatement(FileDescriptorProto* file);
  171. // Parse various language high-level language construrcts.
  172. bool ParseMessageDefinition(DescriptorProto* message);
  173. bool ParseEnumDefinition(EnumDescriptorProto* enum_type);
  174. bool ParseServiceDefinition(ServiceDescriptorProto* service);
  175. bool ParsePackage(FileDescriptorProto* file);
  176. bool ParseImport(string* import_filename);
  177. bool ParseOption(Message* options);
  178. // These methods parse the contents of a message, enum, or service type and
  179. // add them to the given object. They consume the entire block including
  180. // the beginning and ending brace.
  181. bool ParseMessageBlock(DescriptorProto* message);
  182. bool ParseEnumBlock(EnumDescriptorProto* enum_type);
  183. bool ParseServiceBlock(ServiceDescriptorProto* service);
  184. // Parse one statement within a message, enum, or service block, inclunding
  185. // final semicolon.
  186. bool ParseMessageStatement(DescriptorProto* message);
  187. bool ParseEnumStatement(EnumDescriptorProto* message);
  188. bool ParseServiceStatement(ServiceDescriptorProto* message);
  189. // Parse a field of a message. If the field is a group, its type will be
  190. // added to "messages".
  191. bool ParseMessageField(FieldDescriptorProto* field,
  192. RepeatedPtrField<DescriptorProto>* messages);
  193. // Parse an "extensions" declaration.
  194. bool ParseExtensions(DescriptorProto* message);
  195. // Parse an "extend" declaration.
  196. bool ParseExtend(RepeatedPtrField<FieldDescriptorProto>* extensions,
  197. RepeatedPtrField<DescriptorProto>* messages);
  198. // Parse a single enum value within an enum block.
  199. bool ParseEnumConstant(EnumValueDescriptorProto* enum_value);
  200. // Parse enum constant options, i.e. the list in square brackets at the end
  201. // of the enum constant value definition.
  202. bool ParseEnumConstantOptions(EnumValueDescriptorProto* value);
  203. // Parse a single method within a service definition.
  204. bool ParseServiceMethod(MethodDescriptorProto* method);
  205. // Parse "required", "optional", or "repeated" and fill in "label"
  206. // with the value.
  207. bool ParseLabel(FieldDescriptorProto::Label* label);
  208. // Parse a type name and fill in "type" (if it is a primitive) or
  209. // "type_name" (if it is not) with the type parsed.
  210. bool ParseType(FieldDescriptorProto::Type* type,
  211. string* type_name);
  212. // Parse a user-defined type and fill in "type_name" with the name.
  213. // If a primitive type is named, it is treated as an error.
  214. bool ParseUserDefinedType(string* type_name);
  215. // Parses field options, i.e. the stuff in square brackets at the end
  216. // of a field definition. Also parses default value.
  217. bool ParseFieldOptions(FieldDescriptorProto* field);
  218. // Parse the "default" option. This needs special handling because its
  219. // type is the field's type.
  220. bool ParseDefaultAssignment(FieldDescriptorProto* field);
  221. // Parse a single option name/value pair, e.g. "ctype = CORD". The name
  222. // identifies a field of the given Message, and the value of that field
  223. // is set to the parsed value.
  224. bool ParseOptionAssignment(Message* options);
  225. // Parses a single part of a multipart option name. A multipart name consists
  226. // of names separated by dots. Each name is either an identifier or a series
  227. // of identifiers separated by dots and enclosed in parentheses. E.g.,
  228. // "foo.(bar.baz).qux".
  229. bool ParseOptionNamePart(UninterpretedOption* uninterpreted_option);
  230. // =================================================================
  231. io::Tokenizer* input_;
  232. io::ErrorCollector* error_collector_;
  233. SourceLocationTable* source_location_table_;
  234. bool had_errors_;
  235. bool require_syntax_identifier_;
  236. string syntax_identifier_;
  237. GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(Parser);
  238. };
  239. // A table mapping (descriptor, ErrorLocation) pairs -- as reported by
  240. // DescriptorPool when validating descriptors -- to line and column numbers
  241. // within the original source code.
  242. class LIBPROTOBUF_EXPORT SourceLocationTable {
  243. public:
  244. SourceLocationTable();
  245. ~SourceLocationTable();
  246. // Finds the precise location of the given error and fills in *line and
  247. // *column with the line and column numbers. If not found, sets *line to
  248. // -1 and *column to 0 (since line = -1 is used to mean "error has no exact
  249. // location" in the ErrorCollector interface). Returns true if found, false
  250. // otherwise.
  251. bool Find(const Message* descriptor,
  252. DescriptorPool::ErrorCollector::ErrorLocation location,
  253. int* line, int* column) const;
  254. // Adds a location to the table.
  255. void Add(const Message* descriptor,
  256. DescriptorPool::ErrorCollector::ErrorLocation location,
  257. int line, int column);
  258. // Clears the contents of the table.
  259. void Clear();
  260. private:
  261. typedef map<
  262. pair<const Message*, DescriptorPool::ErrorCollector::ErrorLocation>,
  263. pair<int, int> > LocationMap;
  264. LocationMap location_map_;
  265. };
  266. } // namespace compiler
  267. } // namespace protobuf
  268. } // namespace google
  269. #endif // GOOGLE_PROTOBUF_COMPILER_PARSER_H__