/thirdparty/breakpad/third_party/protobuf/protobuf/src/google/protobuf/text_format.cc

http://github.com/tomahawk-player/tomahawk · C++ · 1285 lines · 944 code · 188 blank · 153 comment · 191 complexity · a95498726f54ca15a5c996add5504582 MD5 · raw file

  1. // Protocol Buffers - Google's data interchange format
  2. // Copyright 2008 Google Inc. All rights reserved.
  3. // http://code.google.com/p/protobuf/
  4. //
  5. // Redistribution and use in source and binary forms, with or without
  6. // modification, are permitted provided that the following conditions are
  7. // met:
  8. //
  9. // * Redistributions of source code must retain the above copyright
  10. // notice, this list of conditions and the following disclaimer.
  11. // * Redistributions in binary form must reproduce the above
  12. // copyright notice, this list of conditions and the following disclaimer
  13. // in the documentation and/or other materials provided with the
  14. // distribution.
  15. // * Neither the name of Google Inc. nor the names of its
  16. // contributors may be used to endorse or promote products derived from
  17. // this software without specific prior written permission.
  18. //
  19. // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  20. // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  21. // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  22. // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  23. // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  24. // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  25. // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  26. // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  27. // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  28. // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  29. // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  30. // Author: jschorr@google.com (Joseph Schorr)
  31. // Based on original Protocol Buffers design by
  32. // Sanjay Ghemawat, Jeff Dean, and others.
  33. #include <float.h>
  34. #include <math.h>
  35. #include <stdio.h>
  36. #include <stack>
  37. #include <limits>
  38. #include <vector>
  39. #include <google/protobuf/text_format.h>
  40. #include <google/protobuf/descriptor.h>
  41. #include <google/protobuf/io/coded_stream.h>
  42. #include <google/protobuf/io/zero_copy_stream.h>
  43. #include <google/protobuf/io/zero_copy_stream_impl.h>
  44. #include <google/protobuf/unknown_field_set.h>
  45. #include <google/protobuf/descriptor.pb.h>
  46. #include <google/protobuf/io/tokenizer.h>
  47. #include <google/protobuf/stubs/strutil.h>
  48. namespace google {
  49. namespace protobuf {
  50. string Message::DebugString() const {
  51. string debug_string;
  52. TextFormat::PrintToString(*this, &debug_string);
  53. return debug_string;
  54. }
  55. string Message::ShortDebugString() const {
  56. string debug_string;
  57. TextFormat::Printer printer;
  58. printer.SetSingleLineMode(true);
  59. printer.PrintToString(*this, &debug_string);
  60. // Single line mode currently might have an extra space at the end.
  61. if (debug_string.size() > 0 &&
  62. debug_string[debug_string.size() - 1] == ' ') {
  63. debug_string.resize(debug_string.size() - 1);
  64. }
  65. return debug_string;
  66. }
  67. string Message::Utf8DebugString() const {
  68. string debug_string;
  69. TextFormat::Printer printer;
  70. printer.SetUseUtf8StringEscaping(true);
  71. printer.PrintToString(*this, &debug_string);
  72. return debug_string;
  73. }
  74. void Message::PrintDebugString() const {
  75. printf("%s", DebugString().c_str());
  76. }
  77. // ===========================================================================
  78. // Internal class for parsing an ASCII representation of a Protocol Message.
  79. // This class makes use of the Protocol Message compiler's tokenizer found
  80. // in //google/protobuf/io/tokenizer.h. Note that class's Parse
  81. // method is *not* thread-safe and should only be used in a single thread at
  82. // a time.
  83. // Makes code slightly more readable. The meaning of "DO(foo)" is
  84. // "Execute foo and fail if it fails.", where failure is indicated by
  85. // returning false. Borrowed from parser.cc (Thanks Kenton!).
  86. #define DO(STATEMENT) if (STATEMENT) {} else return false
  87. class TextFormat::Parser::ParserImpl {
  88. public:
  89. // Determines if repeated values for a non-repeated field are
  90. // permitted, e.g., the string "foo: 1 foo: 2" for a
  91. // required/optional field named "foo".
  92. enum SingularOverwritePolicy {
  93. ALLOW_SINGULAR_OVERWRITES = 0, // the last value is retained
  94. FORBID_SINGULAR_OVERWRITES = 1, // an error is issued
  95. };
  96. ParserImpl(const Descriptor* root_message_type,
  97. io::ZeroCopyInputStream* input_stream,
  98. io::ErrorCollector* error_collector,
  99. TextFormat::Finder* finder,
  100. SingularOverwritePolicy singular_overwrite_policy)
  101. : error_collector_(error_collector),
  102. finder_(finder),
  103. tokenizer_error_collector_(this),
  104. tokenizer_(input_stream, &tokenizer_error_collector_),
  105. root_message_type_(root_message_type),
  106. singular_overwrite_policy_(singular_overwrite_policy),
  107. had_errors_(false) {
  108. // For backwards-compatibility with proto1, we need to allow the 'f' suffix
  109. // for floats.
  110. tokenizer_.set_allow_f_after_float(true);
  111. // '#' starts a comment.
  112. tokenizer_.set_comment_style(io::Tokenizer::SH_COMMENT_STYLE);
  113. // Consume the starting token.
  114. tokenizer_.Next();
  115. }
  116. ~ParserImpl() { }
  117. // Parses the ASCII representation specified in input and saves the
  118. // information into the output pointer (a Message). Returns
  119. // false if an error occurs (an error will also be logged to
  120. // GOOGLE_LOG(ERROR)).
  121. bool Parse(Message* output) {
  122. // Consume fields until we cannot do so anymore.
  123. while(true) {
  124. if (LookingAtType(io::Tokenizer::TYPE_END)) {
  125. return !had_errors_;
  126. }
  127. DO(ConsumeField(output));
  128. }
  129. }
  130. bool ParseField(const FieldDescriptor* field, Message* output) {
  131. bool suc;
  132. if (field->cpp_type() == FieldDescriptor::CPPTYPE_MESSAGE) {
  133. suc = ConsumeFieldMessage(output, output->GetReflection(), field);
  134. } else {
  135. suc = ConsumeFieldValue(output, output->GetReflection(), field);
  136. }
  137. return suc && LookingAtType(io::Tokenizer::TYPE_END);
  138. }
  139. void ReportError(int line, int col, const string& message) {
  140. had_errors_ = true;
  141. if (error_collector_ == NULL) {
  142. if (line >= 0) {
  143. GOOGLE_LOG(ERROR) << "Error parsing text-format "
  144. << root_message_type_->full_name()
  145. << ": " << (line + 1) << ":"
  146. << (col + 1) << ": " << message;
  147. } else {
  148. GOOGLE_LOG(ERROR) << "Error parsing text-format "
  149. << root_message_type_->full_name()
  150. << ": " << message;
  151. }
  152. } else {
  153. error_collector_->AddError(line, col, message);
  154. }
  155. }
  156. void ReportWarning(int line, int col, const string& message) {
  157. if (error_collector_ == NULL) {
  158. if (line >= 0) {
  159. GOOGLE_LOG(WARNING) << "Warning parsing text-format "
  160. << root_message_type_->full_name()
  161. << ": " << (line + 1) << ":"
  162. << (col + 1) << ": " << message;
  163. } else {
  164. GOOGLE_LOG(WARNING) << "Warning parsing text-format "
  165. << root_message_type_->full_name()
  166. << ": " << message;
  167. }
  168. } else {
  169. error_collector_->AddWarning(line, col, message);
  170. }
  171. }
  172. private:
  173. GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(ParserImpl);
  174. // Reports an error with the given message with information indicating
  175. // the position (as derived from the current token).
  176. void ReportError(const string& message) {
  177. ReportError(tokenizer_.current().line, tokenizer_.current().column,
  178. message);
  179. }
  180. // Reports a warning with the given message with information indicating
  181. // the position (as derived from the current token).
  182. void ReportWarning(const string& message) {
  183. ReportWarning(tokenizer_.current().line, tokenizer_.current().column,
  184. message);
  185. }
  186. // Consumes the specified message with the given starting delimeter.
  187. // This method checks to see that the end delimeter at the conclusion of
  188. // the consumption matches the starting delimeter passed in here.
  189. bool ConsumeMessage(Message* message, const string delimeter) {
  190. while (!LookingAt(">") && !LookingAt("}")) {
  191. DO(ConsumeField(message));
  192. }
  193. // Confirm that we have a valid ending delimeter.
  194. DO(Consume(delimeter));
  195. return true;
  196. }
  197. // Consumes the current field (as returned by the tokenizer) on the
  198. // passed in message.
  199. bool ConsumeField(Message* message) {
  200. const Reflection* reflection = message->GetReflection();
  201. const Descriptor* descriptor = message->GetDescriptor();
  202. string field_name;
  203. const FieldDescriptor* field = NULL;
  204. if (TryConsume("[")) {
  205. // Extension.
  206. DO(ConsumeIdentifier(&field_name));
  207. while (TryConsume(".")) {
  208. string part;
  209. DO(ConsumeIdentifier(&part));
  210. field_name += ".";
  211. field_name += part;
  212. }
  213. DO(Consume("]"));
  214. field = (finder_ != NULL
  215. ? finder_->FindExtension(message, field_name)
  216. : reflection->FindKnownExtensionByName(field_name));
  217. if (field == NULL) {
  218. ReportError("Extension \"" + field_name + "\" is not defined or "
  219. "is not an extension of \"" +
  220. descriptor->full_name() + "\".");
  221. return false;
  222. }
  223. } else {
  224. DO(ConsumeIdentifier(&field_name));
  225. field = descriptor->FindFieldByName(field_name);
  226. // Group names are expected to be capitalized as they appear in the
  227. // .proto file, which actually matches their type names, not their field
  228. // names.
  229. if (field == NULL) {
  230. string lower_field_name = field_name;
  231. LowerString(&lower_field_name);
  232. field = descriptor->FindFieldByName(lower_field_name);
  233. // If the case-insensitive match worked but the field is NOT a group,
  234. if (field != NULL && field->type() != FieldDescriptor::TYPE_GROUP) {
  235. field = NULL;
  236. }
  237. }
  238. // Again, special-case group names as described above.
  239. if (field != NULL && field->type() == FieldDescriptor::TYPE_GROUP
  240. && field->message_type()->name() != field_name) {
  241. field = NULL;
  242. }
  243. if (field == NULL) {
  244. ReportError("Message type \"" + descriptor->full_name() +
  245. "\" has no field named \"" + field_name + "\".");
  246. return false;
  247. }
  248. }
  249. // Fail if the field is not repeated and it has already been specified.
  250. if ((singular_overwrite_policy_ == FORBID_SINGULAR_OVERWRITES) &&
  251. !field->is_repeated() && reflection->HasField(*message, field)) {
  252. ReportError("Non-repeated field \"" + field_name +
  253. "\" is specified multiple times.");
  254. return false;
  255. }
  256. // Perform special handling for embedded message types.
  257. if (field->cpp_type() == FieldDescriptor::CPPTYPE_MESSAGE) {
  258. // ':' is optional here.
  259. TryConsume(":");
  260. DO(ConsumeFieldMessage(message, reflection, field));
  261. } else {
  262. DO(Consume(":"));
  263. if (field->is_repeated() && TryConsume("[")) {
  264. // Short repeated format, e.g. "foo: [1, 2, 3]"
  265. while (true) {
  266. DO(ConsumeFieldValue(message, reflection, field));
  267. if (TryConsume("]")) {
  268. break;
  269. }
  270. DO(Consume(","));
  271. }
  272. } else {
  273. DO(ConsumeFieldValue(message, reflection, field));
  274. }
  275. }
  276. // For historical reasons, fields may optionally be separated by commas or
  277. // semicolons.
  278. TryConsume(";") || TryConsume(",");
  279. if (field->options().deprecated()) {
  280. ReportWarning("text format contains deprecated field \""
  281. + field_name + "\"");
  282. }
  283. return true;
  284. }
  285. bool ConsumeFieldMessage(Message* message,
  286. const Reflection* reflection,
  287. const FieldDescriptor* field) {
  288. string delimeter;
  289. if (TryConsume("<")) {
  290. delimeter = ">";
  291. } else {
  292. DO(Consume("{"));
  293. delimeter = "}";
  294. }
  295. if (field->is_repeated()) {
  296. DO(ConsumeMessage(reflection->AddMessage(message, field), delimeter));
  297. } else {
  298. DO(ConsumeMessage(reflection->MutableMessage(message, field),
  299. delimeter));
  300. }
  301. return true;
  302. }
  303. bool ConsumeFieldValue(Message* message,
  304. const Reflection* reflection,
  305. const FieldDescriptor* field) {
  306. // Define an easy to use macro for setting fields. This macro checks
  307. // to see if the field is repeated (in which case we need to use the Add
  308. // methods or not (in which case we need to use the Set methods).
  309. #define SET_FIELD(CPPTYPE, VALUE) \
  310. if (field->is_repeated()) { \
  311. reflection->Add##CPPTYPE(message, field, VALUE); \
  312. } else { \
  313. reflection->Set##CPPTYPE(message, field, VALUE); \
  314. } \
  315. switch(field->cpp_type()) {
  316. case FieldDescriptor::CPPTYPE_INT32: {
  317. int64 value;
  318. DO(ConsumeSignedInteger(&value, kint32max));
  319. SET_FIELD(Int32, static_cast<int32>(value));
  320. break;
  321. }
  322. case FieldDescriptor::CPPTYPE_UINT32: {
  323. uint64 value;
  324. DO(ConsumeUnsignedInteger(&value, kuint32max));
  325. SET_FIELD(UInt32, static_cast<uint32>(value));
  326. break;
  327. }
  328. case FieldDescriptor::CPPTYPE_INT64: {
  329. int64 value;
  330. DO(ConsumeSignedInteger(&value, kint64max));
  331. SET_FIELD(Int64, value);
  332. break;
  333. }
  334. case FieldDescriptor::CPPTYPE_UINT64: {
  335. uint64 value;
  336. DO(ConsumeUnsignedInteger(&value, kuint64max));
  337. SET_FIELD(UInt64, value);
  338. break;
  339. }
  340. case FieldDescriptor::CPPTYPE_FLOAT: {
  341. double value;
  342. DO(ConsumeDouble(&value));
  343. SET_FIELD(Float, static_cast<float>(value));
  344. break;
  345. }
  346. case FieldDescriptor::CPPTYPE_DOUBLE: {
  347. double value;
  348. DO(ConsumeDouble(&value));
  349. SET_FIELD(Double, value);
  350. break;
  351. }
  352. case FieldDescriptor::CPPTYPE_STRING: {
  353. string value;
  354. DO(ConsumeString(&value));
  355. SET_FIELD(String, value);
  356. break;
  357. }
  358. case FieldDescriptor::CPPTYPE_BOOL: {
  359. if (LookingAtType(io::Tokenizer::TYPE_INTEGER)) {
  360. uint64 value;
  361. DO(ConsumeUnsignedInteger(&value, 1));
  362. SET_FIELD(Bool, value);
  363. } else {
  364. string value;
  365. DO(ConsumeIdentifier(&value));
  366. if (value == "true" || value == "t") {
  367. SET_FIELD(Bool, true);
  368. } else if (value == "false" || value == "f") {
  369. SET_FIELD(Bool, false);
  370. } else {
  371. ReportError("Invalid value for boolean field \"" + field->name()
  372. + "\". Value: \"" + value + "\".");
  373. return false;
  374. }
  375. }
  376. break;
  377. }
  378. case FieldDescriptor::CPPTYPE_ENUM: {
  379. string value;
  380. const EnumDescriptor* enum_type = field->enum_type();
  381. const EnumValueDescriptor* enum_value = NULL;
  382. if (LookingAtType(io::Tokenizer::TYPE_IDENTIFIER)) {
  383. DO(ConsumeIdentifier(&value));
  384. // Find the enumeration value.
  385. enum_value = enum_type->FindValueByName(value);
  386. } else if (LookingAt("-") ||
  387. LookingAtType(io::Tokenizer::TYPE_INTEGER)) {
  388. int64 int_value;
  389. DO(ConsumeSignedInteger(&int_value, kint32max));
  390. value = SimpleItoa(int_value); // for error reporting
  391. enum_value = enum_type->FindValueByNumber(int_value);
  392. } else {
  393. ReportError("Expected integer or identifier.");
  394. return false;
  395. }
  396. if (enum_value == NULL) {
  397. ReportError("Unknown enumeration value of \"" + value + "\" for "
  398. "field \"" + field->name() + "\".");
  399. return false;
  400. }
  401. SET_FIELD(Enum, enum_value);
  402. break;
  403. }
  404. case FieldDescriptor::CPPTYPE_MESSAGE: {
  405. // We should never get here. Put here instead of a default
  406. // so that if new types are added, we get a nice compiler warning.
  407. GOOGLE_LOG(FATAL) << "Reached an unintended state: CPPTYPE_MESSAGE";
  408. break;
  409. }
  410. }
  411. #undef SET_FIELD
  412. return true;
  413. }
  414. // Returns true if the current token's text is equal to that specified.
  415. bool LookingAt(const string& text) {
  416. return tokenizer_.current().text == text;
  417. }
  418. // Returns true if the current token's type is equal to that specified.
  419. bool LookingAtType(io::Tokenizer::TokenType token_type) {
  420. return tokenizer_.current().type == token_type;
  421. }
  422. // Consumes an identifier and saves its value in the identifier parameter.
  423. // Returns false if the token is not of type IDENTFIER.
  424. bool ConsumeIdentifier(string* identifier) {
  425. if (!LookingAtType(io::Tokenizer::TYPE_IDENTIFIER)) {
  426. ReportError("Expected identifier.");
  427. return false;
  428. }
  429. *identifier = tokenizer_.current().text;
  430. tokenizer_.Next();
  431. return true;
  432. }
  433. // Consumes a string and saves its value in the text parameter.
  434. // Returns false if the token is not of type STRING.
  435. bool ConsumeString(string* text) {
  436. if (!LookingAtType(io::Tokenizer::TYPE_STRING)) {
  437. ReportError("Expected string.");
  438. return false;
  439. }
  440. text->clear();
  441. while (LookingAtType(io::Tokenizer::TYPE_STRING)) {
  442. io::Tokenizer::ParseStringAppend(tokenizer_.current().text, text);
  443. tokenizer_.Next();
  444. }
  445. return true;
  446. }
  447. // Consumes a uint64 and saves its value in the value parameter.
  448. // Returns false if the token is not of type INTEGER.
  449. bool ConsumeUnsignedInteger(uint64* value, uint64 max_value) {
  450. if (!LookingAtType(io::Tokenizer::TYPE_INTEGER)) {
  451. ReportError("Expected integer.");
  452. return false;
  453. }
  454. if (!io::Tokenizer::ParseInteger(tokenizer_.current().text,
  455. max_value, value)) {
  456. ReportError("Integer out of range.");
  457. return false;
  458. }
  459. tokenizer_.Next();
  460. return true;
  461. }
  462. // Consumes an int64 and saves its value in the value parameter.
  463. // Note that since the tokenizer does not support negative numbers,
  464. // we actually may consume an additional token (for the minus sign) in this
  465. // method. Returns false if the token is not an integer
  466. // (signed or otherwise).
  467. bool ConsumeSignedInteger(int64* value, uint64 max_value) {
  468. bool negative = false;
  469. if (TryConsume("-")) {
  470. negative = true;
  471. // Two's complement always allows one more negative integer than
  472. // positive.
  473. ++max_value;
  474. }
  475. uint64 unsigned_value;
  476. DO(ConsumeUnsignedInteger(&unsigned_value, max_value));
  477. *value = static_cast<int64>(unsigned_value);
  478. if (negative) {
  479. *value = -*value;
  480. }
  481. return true;
  482. }
  483. // Consumes a double and saves its value in the value parameter.
  484. // Note that since the tokenizer does not support negative numbers,
  485. // we actually may consume an additional token (for the minus sign) in this
  486. // method. Returns false if the token is not a double
  487. // (signed or otherwise).
  488. bool ConsumeDouble(double* value) {
  489. bool negative = false;
  490. if (TryConsume("-")) {
  491. negative = true;
  492. }
  493. // A double can actually be an integer, according to the tokenizer.
  494. // Therefore, we must check both cases here.
  495. if (LookingAtType(io::Tokenizer::TYPE_INTEGER)) {
  496. // We have found an integer value for the double.
  497. uint64 integer_value;
  498. DO(ConsumeUnsignedInteger(&integer_value, kuint64max));
  499. *value = static_cast<double>(integer_value);
  500. } else if (LookingAtType(io::Tokenizer::TYPE_FLOAT)) {
  501. // We have found a float value for the double.
  502. *value = io::Tokenizer::ParseFloat(tokenizer_.current().text);
  503. // Mark the current token as consumed.
  504. tokenizer_.Next();
  505. } else if (LookingAtType(io::Tokenizer::TYPE_IDENTIFIER)) {
  506. string text = tokenizer_.current().text;
  507. LowerString(&text);
  508. if (text == "inf" || text == "infinity") {
  509. *value = std::numeric_limits<double>::infinity();
  510. tokenizer_.Next();
  511. } else if (text == "nan") {
  512. *value = std::numeric_limits<double>::quiet_NaN();
  513. tokenizer_.Next();
  514. } else {
  515. ReportError("Expected double.");
  516. return false;
  517. }
  518. } else {
  519. ReportError("Expected double.");
  520. return false;
  521. }
  522. if (negative) {
  523. *value = -*value;
  524. }
  525. return true;
  526. }
  527. // Consumes a token and confirms that it matches that specified in the
  528. // value parameter. Returns false if the token found does not match that
  529. // which was specified.
  530. bool Consume(const string& value) {
  531. const string& current_value = tokenizer_.current().text;
  532. if (current_value != value) {
  533. ReportError("Expected \"" + value + "\", found \"" + current_value
  534. + "\".");
  535. return false;
  536. }
  537. tokenizer_.Next();
  538. return true;
  539. }
  540. // Attempts to consume the supplied value. Returns false if a the
  541. // token found does not match the value specified.
  542. bool TryConsume(const string& value) {
  543. if (tokenizer_.current().text == value) {
  544. tokenizer_.Next();
  545. return true;
  546. } else {
  547. return false;
  548. }
  549. }
  550. // An internal instance of the Tokenizer's error collector, used to
  551. // collect any base-level parse errors and feed them to the ParserImpl.
  552. class ParserErrorCollector : public io::ErrorCollector {
  553. public:
  554. explicit ParserErrorCollector(TextFormat::Parser::ParserImpl* parser) :
  555. parser_(parser) { }
  556. virtual ~ParserErrorCollector() { };
  557. virtual void AddError(int line, int column, const string& message) {
  558. parser_->ReportError(line, column, message);
  559. }
  560. virtual void AddWarning(int line, int column, const string& message) {
  561. parser_->ReportWarning(line, column, message);
  562. }
  563. private:
  564. GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(ParserErrorCollector);
  565. TextFormat::Parser::ParserImpl* parser_;
  566. };
  567. io::ErrorCollector* error_collector_;
  568. TextFormat::Finder* finder_;
  569. ParserErrorCollector tokenizer_error_collector_;
  570. io::Tokenizer tokenizer_;
  571. const Descriptor* root_message_type_;
  572. SingularOverwritePolicy singular_overwrite_policy_;
  573. bool had_errors_;
  574. };
  575. #undef DO
  576. // ===========================================================================
  577. // Internal class for writing text to the io::ZeroCopyOutputStream. Adapted
  578. // from the Printer found in //google/protobuf/io/printer.h
  579. class TextFormat::Printer::TextGenerator {
  580. public:
  581. explicit TextGenerator(io::ZeroCopyOutputStream* output,
  582. int initial_indent_level)
  583. : output_(output),
  584. buffer_(NULL),
  585. buffer_size_(0),
  586. at_start_of_line_(true),
  587. failed_(false),
  588. indent_(""),
  589. initial_indent_level_(initial_indent_level) {
  590. indent_.resize(initial_indent_level_ * 2, ' ');
  591. }
  592. ~TextGenerator() {
  593. // Only BackUp() if we're sure we've successfully called Next() at least
  594. // once.
  595. if (buffer_size_ > 0) {
  596. output_->BackUp(buffer_size_);
  597. }
  598. }
  599. // Indent text by two spaces. After calling Indent(), two spaces will be
  600. // inserted at the beginning of each line of text. Indent() may be called
  601. // multiple times to produce deeper indents.
  602. void Indent() {
  603. indent_ += " ";
  604. }
  605. // Reduces the current indent level by two spaces, or crashes if the indent
  606. // level is zero.
  607. void Outdent() {
  608. if (indent_.empty() ||
  609. indent_.size() < initial_indent_level_ * 2) {
  610. GOOGLE_LOG(DFATAL) << " Outdent() without matching Indent().";
  611. return;
  612. }
  613. indent_.resize(indent_.size() - 2);
  614. }
  615. // Print text to the output stream.
  616. void Print(const string& str) {
  617. Print(str.data(), str.size());
  618. }
  619. // Print text to the output stream.
  620. void Print(const char* text) {
  621. Print(text, strlen(text));
  622. }
  623. // Print text to the output stream.
  624. void Print(const char* text, int size) {
  625. int pos = 0; // The number of bytes we've written so far.
  626. for (int i = 0; i < size; i++) {
  627. if (text[i] == '\n') {
  628. // Saw newline. If there is more text, we may need to insert an indent
  629. // here. So, write what we have so far, including the '\n'.
  630. Write(text + pos, i - pos + 1);
  631. pos = i + 1;
  632. // Setting this true will cause the next Write() to insert an indent
  633. // first.
  634. at_start_of_line_ = true;
  635. }
  636. }
  637. // Write the rest.
  638. Write(text + pos, size - pos);
  639. }
  640. // True if any write to the underlying stream failed. (We don't just
  641. // crash in this case because this is an I/O failure, not a programming
  642. // error.)
  643. bool failed() const { return failed_; }
  644. private:
  645. GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(TextGenerator);
  646. void Write(const char* data, int size) {
  647. if (failed_) return;
  648. if (size == 0) return;
  649. if (at_start_of_line_) {
  650. // Insert an indent.
  651. at_start_of_line_ = false;
  652. Write(indent_.data(), indent_.size());
  653. if (failed_) return;
  654. }
  655. while (size > buffer_size_) {
  656. // Data exceeds space in the buffer. Copy what we can and request a
  657. // new buffer.
  658. memcpy(buffer_, data, buffer_size_);
  659. data += buffer_size_;
  660. size -= buffer_size_;
  661. void* void_buffer;
  662. failed_ = !output_->Next(&void_buffer, &buffer_size_);
  663. if (failed_) return;
  664. buffer_ = reinterpret_cast<char*>(void_buffer);
  665. }
  666. // Buffer is big enough to receive the data; copy it.
  667. memcpy(buffer_, data, size);
  668. buffer_ += size;
  669. buffer_size_ -= size;
  670. }
  671. io::ZeroCopyOutputStream* const output_;
  672. char* buffer_;
  673. int buffer_size_;
  674. bool at_start_of_line_;
  675. bool failed_;
  676. string indent_;
  677. int initial_indent_level_;
  678. };
  679. // ===========================================================================
  680. TextFormat::Finder::~Finder() {
  681. }
  682. TextFormat::Parser::Parser()
  683. : error_collector_(NULL),
  684. finder_(NULL),
  685. allow_partial_(false) {
  686. }
  687. TextFormat::Parser::~Parser() {}
  688. bool TextFormat::Parser::Parse(io::ZeroCopyInputStream* input,
  689. Message* output) {
  690. output->Clear();
  691. ParserImpl parser(output->GetDescriptor(), input, error_collector_,
  692. finder_, ParserImpl::FORBID_SINGULAR_OVERWRITES);
  693. return MergeUsingImpl(input, output, &parser);
  694. }
  695. bool TextFormat::Parser::ParseFromString(const string& input,
  696. Message* output) {
  697. io::ArrayInputStream input_stream(input.data(), input.size());
  698. return Parse(&input_stream, output);
  699. }
  700. bool TextFormat::Parser::Merge(io::ZeroCopyInputStream* input,
  701. Message* output) {
  702. ParserImpl parser(output->GetDescriptor(), input, error_collector_,
  703. finder_, ParserImpl::ALLOW_SINGULAR_OVERWRITES);
  704. return MergeUsingImpl(input, output, &parser);
  705. }
  706. bool TextFormat::Parser::MergeFromString(const string& input,
  707. Message* output) {
  708. io::ArrayInputStream input_stream(input.data(), input.size());
  709. return Merge(&input_stream, output);
  710. }
  711. bool TextFormat::Parser::MergeUsingImpl(io::ZeroCopyInputStream* input,
  712. Message* output,
  713. ParserImpl* parser_impl) {
  714. if (!parser_impl->Parse(output)) return false;
  715. if (!allow_partial_ && !output->IsInitialized()) {
  716. vector<string> missing_fields;
  717. output->FindInitializationErrors(&missing_fields);
  718. parser_impl->ReportError(-1, 0, "Message missing required fields: " +
  719. JoinStrings(missing_fields, ", "));
  720. return false;
  721. }
  722. return true;
  723. }
  724. bool TextFormat::Parser::ParseFieldValueFromString(
  725. const string& input,
  726. const FieldDescriptor* field,
  727. Message* output) {
  728. io::ArrayInputStream input_stream(input.data(), input.size());
  729. ParserImpl parser(output->GetDescriptor(), &input_stream, error_collector_,
  730. finder_, ParserImpl::ALLOW_SINGULAR_OVERWRITES);
  731. return parser.ParseField(field, output);
  732. }
  733. /* static */ bool TextFormat::Parse(io::ZeroCopyInputStream* input,
  734. Message* output) {
  735. return Parser().Parse(input, output);
  736. }
  737. /* static */ bool TextFormat::Merge(io::ZeroCopyInputStream* input,
  738. Message* output) {
  739. return Parser().Merge(input, output);
  740. }
  741. /* static */ bool TextFormat::ParseFromString(const string& input,
  742. Message* output) {
  743. return Parser().ParseFromString(input, output);
  744. }
  745. /* static */ bool TextFormat::MergeFromString(const string& input,
  746. Message* output) {
  747. return Parser().MergeFromString(input, output);
  748. }
  749. // ===========================================================================
  750. TextFormat::Printer::Printer()
  751. : initial_indent_level_(0),
  752. single_line_mode_(false),
  753. use_short_repeated_primitives_(false),
  754. utf8_string_escaping_(false) {}
  755. TextFormat::Printer::~Printer() {}
  756. bool TextFormat::Printer::PrintToString(const Message& message,
  757. string* output) const {
  758. GOOGLE_DCHECK(output) << "output specified is NULL";
  759. output->clear();
  760. io::StringOutputStream output_stream(output);
  761. bool result = Print(message, &output_stream);
  762. return result;
  763. }
  764. bool TextFormat::Printer::PrintUnknownFieldsToString(
  765. const UnknownFieldSet& unknown_fields,
  766. string* output) const {
  767. GOOGLE_DCHECK(output) << "output specified is NULL";
  768. output->clear();
  769. io::StringOutputStream output_stream(output);
  770. return PrintUnknownFields(unknown_fields, &output_stream);
  771. }
  772. bool TextFormat::Printer::Print(const Message& message,
  773. io::ZeroCopyOutputStream* output) const {
  774. TextGenerator generator(output, initial_indent_level_);
  775. Print(message, generator);
  776. // Output false if the generator failed internally.
  777. return !generator.failed();
  778. }
  779. bool TextFormat::Printer::PrintUnknownFields(
  780. const UnknownFieldSet& unknown_fields,
  781. io::ZeroCopyOutputStream* output) const {
  782. TextGenerator generator(output, initial_indent_level_);
  783. PrintUnknownFields(unknown_fields, generator);
  784. // Output false if the generator failed internally.
  785. return !generator.failed();
  786. }
  787. void TextFormat::Printer::Print(const Message& message,
  788. TextGenerator& generator) const {
  789. const Reflection* reflection = message.GetReflection();
  790. vector<const FieldDescriptor*> fields;
  791. reflection->ListFields(message, &fields);
  792. for (int i = 0; i < fields.size(); i++) {
  793. PrintField(message, reflection, fields[i], generator);
  794. }
  795. PrintUnknownFields(reflection->GetUnknownFields(message), generator);
  796. }
  797. void TextFormat::Printer::PrintFieldValueToString(
  798. const Message& message,
  799. const FieldDescriptor* field,
  800. int index,
  801. string* output) const {
  802. GOOGLE_DCHECK(output) << "output specified is NULL";
  803. output->clear();
  804. io::StringOutputStream output_stream(output);
  805. TextGenerator generator(&output_stream, initial_indent_level_);
  806. PrintFieldValue(message, message.GetReflection(), field, index, generator);
  807. }
  808. void TextFormat::Printer::PrintField(const Message& message,
  809. const Reflection* reflection,
  810. const FieldDescriptor* field,
  811. TextGenerator& generator) const {
  812. if (use_short_repeated_primitives_ &&
  813. field->is_repeated() &&
  814. field->cpp_type() != FieldDescriptor::CPPTYPE_STRING &&
  815. field->cpp_type() != FieldDescriptor::CPPTYPE_MESSAGE) {
  816. PrintShortRepeatedField(message, reflection, field, generator);
  817. return;
  818. }
  819. int count = 0;
  820. if (field->is_repeated()) {
  821. count = reflection->FieldSize(message, field);
  822. } else if (reflection->HasField(message, field)) {
  823. count = 1;
  824. }
  825. for (int j = 0; j < count; ++j) {
  826. PrintFieldName(message, reflection, field, generator);
  827. if (field->cpp_type() == FieldDescriptor::CPPTYPE_MESSAGE) {
  828. if (single_line_mode_) {
  829. generator.Print(" { ");
  830. } else {
  831. generator.Print(" {\n");
  832. generator.Indent();
  833. }
  834. } else {
  835. generator.Print(": ");
  836. }
  837. // Write the field value.
  838. int field_index = j;
  839. if (!field->is_repeated()) {
  840. field_index = -1;
  841. }
  842. PrintFieldValue(message, reflection, field, field_index, generator);
  843. if (field->cpp_type() == FieldDescriptor::CPPTYPE_MESSAGE) {
  844. if (single_line_mode_) {
  845. generator.Print("} ");
  846. } else {
  847. generator.Outdent();
  848. generator.Print("}\n");
  849. }
  850. } else {
  851. if (single_line_mode_) {
  852. generator.Print(" ");
  853. } else {
  854. generator.Print("\n");
  855. }
  856. }
  857. }
  858. }
  859. void TextFormat::Printer::PrintShortRepeatedField(
  860. const Message& message,
  861. const Reflection* reflection,
  862. const FieldDescriptor* field,
  863. TextGenerator& generator) const {
  864. // Print primitive repeated field in short form.
  865. PrintFieldName(message, reflection, field, generator);
  866. int size = reflection->FieldSize(message, field);
  867. generator.Print(": [");
  868. for (int i = 0; i < size; i++) {
  869. if (i > 0) generator.Print(", ");
  870. PrintFieldValue(message, reflection, field, i, generator);
  871. }
  872. if (single_line_mode_) {
  873. generator.Print("] ");
  874. } else {
  875. generator.Print("]\n");
  876. }
  877. }
  878. void TextFormat::Printer::PrintFieldName(const Message& message,
  879. const Reflection* reflection,
  880. const FieldDescriptor* field,
  881. TextGenerator& generator) const {
  882. if (field->is_extension()) {
  883. generator.Print("[");
  884. // We special-case MessageSet elements for compatibility with proto1.
  885. if (field->containing_type()->options().message_set_wire_format()
  886. && field->type() == FieldDescriptor::TYPE_MESSAGE
  887. && field->is_optional()
  888. && field->extension_scope() == field->message_type()) {
  889. generator.Print(field->message_type()->full_name());
  890. } else {
  891. generator.Print(field->full_name());
  892. }
  893. generator.Print("]");
  894. } else {
  895. if (field->type() == FieldDescriptor::TYPE_GROUP) {
  896. // Groups must be serialized with their original capitalization.
  897. generator.Print(field->message_type()->name());
  898. } else {
  899. generator.Print(field->name());
  900. }
  901. }
  902. }
  903. void TextFormat::Printer::PrintFieldValue(
  904. const Message& message,
  905. const Reflection* reflection,
  906. const FieldDescriptor* field,
  907. int index,
  908. TextGenerator& generator) const {
  909. GOOGLE_DCHECK(field->is_repeated() || (index == -1))
  910. << "Index must be -1 for non-repeated fields";
  911. switch (field->cpp_type()) {
  912. #define OUTPUT_FIELD(CPPTYPE, METHOD, TO_STRING) \
  913. case FieldDescriptor::CPPTYPE_##CPPTYPE: \
  914. generator.Print(TO_STRING(field->is_repeated() ? \
  915. reflection->GetRepeated##METHOD(message, field, index) : \
  916. reflection->Get##METHOD(message, field))); \
  917. break; \
  918. OUTPUT_FIELD( INT32, Int32, SimpleItoa);
  919. OUTPUT_FIELD( INT64, Int64, SimpleItoa);
  920. OUTPUT_FIELD(UINT32, UInt32, SimpleItoa);
  921. OUTPUT_FIELD(UINT64, UInt64, SimpleItoa);
  922. OUTPUT_FIELD( FLOAT, Float, SimpleFtoa);
  923. OUTPUT_FIELD(DOUBLE, Double, SimpleDtoa);
  924. #undef OUTPUT_FIELD
  925. case FieldDescriptor::CPPTYPE_STRING: {
  926. string scratch;
  927. const string& value = field->is_repeated() ?
  928. reflection->GetRepeatedStringReference(
  929. message, field, index, &scratch) :
  930. reflection->GetStringReference(message, field, &scratch);
  931. generator.Print("\"");
  932. if (utf8_string_escaping_) {
  933. generator.Print(strings::Utf8SafeCEscape(value));
  934. } else {
  935. generator.Print(CEscape(value));
  936. }
  937. generator.Print("\"");
  938. break;
  939. }
  940. case FieldDescriptor::CPPTYPE_BOOL:
  941. if (field->is_repeated()) {
  942. generator.Print(reflection->GetRepeatedBool(message, field, index)
  943. ? "true" : "false");
  944. } else {
  945. generator.Print(reflection->GetBool(message, field)
  946. ? "true" : "false");
  947. }
  948. break;
  949. case FieldDescriptor::CPPTYPE_ENUM:
  950. generator.Print(field->is_repeated() ?
  951. reflection->GetRepeatedEnum(message, field, index)->name() :
  952. reflection->GetEnum(message, field)->name());
  953. break;
  954. case FieldDescriptor::CPPTYPE_MESSAGE:
  955. Print(field->is_repeated() ?
  956. reflection->GetRepeatedMessage(message, field, index) :
  957. reflection->GetMessage(message, field),
  958. generator);
  959. break;
  960. }
  961. }
  962. /* static */ bool TextFormat::Print(const Message& message,
  963. io::ZeroCopyOutputStream* output) {
  964. return Printer().Print(message, output);
  965. }
  966. /* static */ bool TextFormat::PrintUnknownFields(
  967. const UnknownFieldSet& unknown_fields,
  968. io::ZeroCopyOutputStream* output) {
  969. return Printer().PrintUnknownFields(unknown_fields, output);
  970. }
  971. /* static */ bool TextFormat::PrintToString(
  972. const Message& message, string* output) {
  973. return Printer().PrintToString(message, output);
  974. }
  975. /* static */ bool TextFormat::PrintUnknownFieldsToString(
  976. const UnknownFieldSet& unknown_fields, string* output) {
  977. return Printer().PrintUnknownFieldsToString(unknown_fields, output);
  978. }
  979. /* static */ void TextFormat::PrintFieldValueToString(
  980. const Message& message,
  981. const FieldDescriptor* field,
  982. int index,
  983. string* output) {
  984. return Printer().PrintFieldValueToString(message, field, index, output);
  985. }
  986. /* static */ bool TextFormat::ParseFieldValueFromString(
  987. const string& input,
  988. const FieldDescriptor* field,
  989. Message* message) {
  990. return Parser().ParseFieldValueFromString(input, field, message);
  991. }
  992. // Prints an integer as hex with a fixed number of digits dependent on the
  993. // integer type.
  994. template<typename IntType>
  995. static string PaddedHex(IntType value) {
  996. string result;
  997. result.reserve(sizeof(value) * 2);
  998. for (int i = sizeof(value) * 2 - 1; i >= 0; i--) {
  999. result.push_back(int_to_hex_digit(value >> (i*4) & 0x0F));
  1000. }
  1001. return result;
  1002. }
  1003. void TextFormat::Printer::PrintUnknownFields(
  1004. const UnknownFieldSet& unknown_fields, TextGenerator& generator) const {
  1005. for (int i = 0; i < unknown_fields.field_count(); i++) {
  1006. const UnknownField& field = unknown_fields.field(i);
  1007. string field_number = SimpleItoa(field.number());
  1008. switch (field.type()) {
  1009. case UnknownField::TYPE_VARINT:
  1010. generator.Print(field_number);
  1011. generator.Print(": ");
  1012. generator.Print(SimpleItoa(field.varint()));
  1013. if (single_line_mode_) {
  1014. generator.Print(" ");
  1015. } else {
  1016. generator.Print("\n");
  1017. }
  1018. break;
  1019. case UnknownField::TYPE_FIXED32: {
  1020. generator.Print(field_number);
  1021. generator.Print(": 0x");
  1022. char buffer[kFastToBufferSize];
  1023. generator.Print(FastHex32ToBuffer(field.fixed32(), buffer));
  1024. if (single_line_mode_) {
  1025. generator.Print(" ");
  1026. } else {
  1027. generator.Print("\n");
  1028. }
  1029. break;
  1030. }
  1031. case UnknownField::TYPE_FIXED64: {
  1032. generator.Print(field_number);
  1033. generator.Print(": 0x");
  1034. char buffer[kFastToBufferSize];
  1035. generator.Print(FastHex64ToBuffer(field.fixed64(), buffer));
  1036. if (single_line_mode_) {
  1037. generator.Print(" ");
  1038. } else {
  1039. generator.Print("\n");
  1040. }
  1041. break;
  1042. }
  1043. case UnknownField::TYPE_LENGTH_DELIMITED: {
  1044. generator.Print(field_number);
  1045. const string& value = field.length_delimited();
  1046. UnknownFieldSet embedded_unknown_fields;
  1047. if (!value.empty() && embedded_unknown_fields.ParseFromString(value)) {
  1048. // This field is parseable as a Message.
  1049. // So it is probably an embedded message.
  1050. if (single_line_mode_) {
  1051. generator.Print(" { ");
  1052. } else {
  1053. generator.Print(" {\n");
  1054. generator.Indent();
  1055. }
  1056. PrintUnknownFields(embedded_unknown_fields, generator);
  1057. if (single_line_mode_) {
  1058. generator.Print("} ");
  1059. } else {
  1060. generator.Outdent();
  1061. generator.Print("}\n");
  1062. }
  1063. } else {
  1064. // This field is not parseable as a Message.
  1065. // So it is probably just a plain string.
  1066. generator.Print(": \"");
  1067. generator.Print(CEscape(value));
  1068. generator.Print("\"");
  1069. if (single_line_mode_) {
  1070. generator.Print(" ");
  1071. } else {
  1072. generator.Print("\n");
  1073. }
  1074. }
  1075. break;
  1076. }
  1077. case UnknownField::TYPE_GROUP:
  1078. generator.Print(field_number);
  1079. if (single_line_mode_) {
  1080. generator.Print(" { ");
  1081. } else {
  1082. generator.Print(" {\n");
  1083. generator.Indent();
  1084. }
  1085. PrintUnknownFields(field.group(), generator);
  1086. if (single_line_mode_) {
  1087. generator.Print("} ");
  1088. } else {
  1089. generator.Outdent();
  1090. generator.Print("}\n");
  1091. }
  1092. break;
  1093. }
  1094. }
  1095. }
  1096. } // namespace protobuf
  1097. } // namespace google