PageRenderTime 44ms CodeModel.GetById 17ms RepoModel.GetById 0ms app.codeStats 0ms

/proto/text_parser.go

https://code.google.com/p/goprotobuf/
Go | 515 lines | 451 code | 16 blank | 48 comment | 54 complexity | e17c6b611d44164afd1dcfba6c6a7101 MD5 | raw file
Possible License(s): BSD-3-Clause
  1. // Go support for Protocol Buffers - Google's data interchange format
  2. //
  3. // Copyright 2010 Google Inc. All rights reserved.
  4. // http://code.google.com/p/goprotobuf/
  5. //
  6. // Redistribution and use in source and binary forms, with or without
  7. // modification, are permitted provided that the following conditions are
  8. // met:
  9. //
  10. // * Redistributions of source code must retain the above copyright
  11. // notice, this list of conditions and the following disclaimer.
  12. // * Redistributions in binary form must reproduce the above
  13. // copyright notice, this list of conditions and the following disclaimer
  14. // in the documentation and/or other materials provided with the
  15. // distribution.
  16. // * Neither the name of Google Inc. nor the names of its
  17. // contributors may be used to endorse or promote products derived from
  18. // this software without specific prior written permission.
  19. //
  20. // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  21. // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  22. // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  23. // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  24. // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  25. // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  26. // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  27. // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  28. // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  29. // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  30. // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  31. package proto
  32. // Functions for parsing the Text protocol buffer format.
  33. // TODO: message sets.
  34. import (
  35. "fmt"
  36. "reflect"
  37. "strconv"
  38. "strings"
  39. )
  40. type ParseError struct {
  41. Message string
  42. Line int // 1-based line number
  43. Offset int // 0-based byte offset from start of input
  44. }
  45. func (p *ParseError) Error() string {
  46. if p.Line == 1 {
  47. // show offset only for first line
  48. return fmt.Sprintf("line 1.%d: %v", p.Offset, p.Message)
  49. }
  50. return fmt.Sprintf("line %d: %v", p.Line, p.Message)
  51. }
  52. type token struct {
  53. value string
  54. err *ParseError
  55. line int // line number
  56. offset int // byte number from start of input, not start of line
  57. unquoted string // the unquoted version of value, if it was a quoted string
  58. }
  59. func (t *token) String() string {
  60. if t.err == nil {
  61. return fmt.Sprintf("%q (line=%d, offset=%d)", t.value, t.line, t.offset)
  62. }
  63. return fmt.Sprintf("parse error: %v", t.err)
  64. }
  65. type textParser struct {
  66. s string // remaining input
  67. done bool // whether the parsing is finished (success or error)
  68. backed bool // whether back() was called
  69. offset, line int
  70. cur token
  71. }
  72. func newTextParser(s string) *textParser {
  73. p := new(textParser)
  74. p.s = s
  75. p.line = 1
  76. p.cur.line = 1
  77. return p
  78. }
  79. func (p *textParser) errorf(format string, a ...interface{}) *ParseError {
  80. pe := &ParseError{fmt.Sprintf(format, a...), p.cur.line, p.cur.offset}
  81. p.cur.err = pe
  82. p.done = true
  83. return pe
  84. }
  85. // Numbers and identifiers are matched by [-+._A-Za-z0-9]
  86. func isIdentOrNumberChar(c byte) bool {
  87. switch {
  88. case 'A' <= c && c <= 'Z', 'a' <= c && c <= 'z':
  89. return true
  90. case '0' <= c && c <= '9':
  91. return true
  92. }
  93. switch c {
  94. case '-', '+', '.', '_':
  95. return true
  96. }
  97. return false
  98. }
  99. func isWhitespace(c byte) bool {
  100. switch c {
  101. case ' ', '\t', '\n', '\r':
  102. return true
  103. }
  104. return false
  105. }
  106. func (p *textParser) skipWhitespace() {
  107. i := 0
  108. for i < len(p.s) && (isWhitespace(p.s[i]) || p.s[i] == '#') {
  109. if p.s[i] == '#' {
  110. // comment; skip to end of line or input
  111. for i < len(p.s) && p.s[i] != '\n' {
  112. i++
  113. }
  114. if i == len(p.s) {
  115. break
  116. }
  117. }
  118. if p.s[i] == '\n' {
  119. p.line++
  120. }
  121. i++
  122. }
  123. p.offset += i
  124. p.s = p.s[i:len(p.s)]
  125. if len(p.s) == 0 {
  126. p.done = true
  127. }
  128. }
  129. func (p *textParser) advance() {
  130. // Skip whitespace
  131. p.skipWhitespace()
  132. if p.done {
  133. return
  134. }
  135. // Start of non-whitespace
  136. p.cur.err = nil
  137. p.cur.offset, p.cur.line = p.offset, p.line
  138. p.cur.unquoted = ""
  139. switch p.s[0] {
  140. case '<', '>', '{', '}', ':', '[', ']':
  141. // Single symbol
  142. p.cur.value, p.s = p.s[0:1], p.s[1:len(p.s)]
  143. case '"':
  144. // Quoted string
  145. i := 1
  146. for i < len(p.s) && p.s[i] != '"' && p.s[i] != '\n' {
  147. if p.s[i] == '\\' && i+1 < len(p.s) {
  148. // skip escaped char
  149. i++
  150. }
  151. i++
  152. }
  153. if i >= len(p.s) || p.s[i] != '"' {
  154. p.errorf("unmatched quote")
  155. return
  156. }
  157. unq, err := unquoteC(p.s[0 : i+1])
  158. if err != nil {
  159. p.errorf("invalid quoted string %v", p.s[0:i+1])
  160. return
  161. }
  162. p.cur.value, p.s = p.s[0:i+1], p.s[i+1:len(p.s)]
  163. p.cur.unquoted = unq
  164. default:
  165. i := 0
  166. for i < len(p.s) && isIdentOrNumberChar(p.s[i]) {
  167. i++
  168. }
  169. if i == 0 {
  170. p.errorf("unexpected byte %#x", p.s[0])
  171. return
  172. }
  173. p.cur.value, p.s = p.s[0:i], p.s[i:len(p.s)]
  174. }
  175. p.offset += len(p.cur.value)
  176. }
  177. func unquoteC(s string) (string, error) {
  178. // A notable divergence between quoted string literals in Go
  179. // and what is acceptable for text format protocol buffers:
  180. // the former considers \' invalid, but the latter considers it valid.
  181. s = strings.Replace(s, `\'`, "'", -1)
  182. return strconv.Unquote(s)
  183. }
  184. // Back off the parser by one token. Can only be done between calls to next().
  185. // It makes the next advance() a no-op.
  186. func (p *textParser) back() { p.backed = true }
  187. // Advances the parser and returns the new current token.
  188. func (p *textParser) next() *token {
  189. if p.backed || p.done {
  190. p.backed = false
  191. return &p.cur
  192. }
  193. p.advance()
  194. if p.done {
  195. p.cur.value = ""
  196. } else if len(p.cur.value) > 0 && p.cur.value[0] == '"' {
  197. // Look for multiple quoted strings separated by whitespace,
  198. // and concatenate them.
  199. cat := p.cur
  200. for {
  201. p.skipWhitespace()
  202. if p.done || p.s[0] != '"' {
  203. break
  204. }
  205. p.advance()
  206. if p.cur.err != nil {
  207. return &p.cur
  208. }
  209. cat.value += " " + p.cur.value
  210. cat.unquoted += p.cur.unquoted
  211. }
  212. p.done = false // parser may have seen EOF, but we want to return cat
  213. p.cur = cat
  214. }
  215. return &p.cur
  216. }
  217. // Return an error indicating which required field was not set.
  218. func (p *textParser) missingRequiredFieldError(sv reflect.Value) *ParseError {
  219. st := sv.Type()
  220. sprops := GetProperties(st)
  221. for i := 0; i < st.NumField(); i++ {
  222. if !isNil(sv.Field(i)) {
  223. continue
  224. }
  225. props := sprops.Prop[i]
  226. if props.Required {
  227. return p.errorf("message %v missing required field %q", st, props.OrigName)
  228. }
  229. }
  230. return p.errorf("message %v missing required field", st) // should not happen
  231. }
  232. // Returns the index in the struct for the named field, as well as the parsed tag properties.
  233. func structFieldByName(st reflect.Type, name string) (int, *Properties, bool) {
  234. sprops := GetProperties(st)
  235. i, ok := sprops.origNames[name]
  236. if ok {
  237. return i, sprops.Prop[i], true
  238. }
  239. return -1, nil, false
  240. }
  241. // Consume a ':' from the input stream (if the next token is a colon),
  242. // returning an error if a colon is needed but not present.
  243. func (p *textParser) checkForColon(props *Properties, typ reflect.Type) *ParseError {
  244. tok := p.next()
  245. if tok.err != nil {
  246. return tok.err
  247. }
  248. if tok.value != ":" {
  249. // Colon is optional when the field is a group or message.
  250. needColon := true
  251. switch props.Wire {
  252. case "group":
  253. needColon = false
  254. case "bytes":
  255. // A "bytes" field is either a message, a string, or a repeated field;
  256. // those three become *T, *string and []T respectively, so we can check for
  257. // this field being a pointer to a non-string.
  258. if typ.Kind() == reflect.Ptr {
  259. // *T or *string
  260. if typ.Elem().Kind() == reflect.String {
  261. break
  262. }
  263. } else if typ.Kind() == reflect.Slice {
  264. // []T or []*T
  265. if typ.Elem().Kind() != reflect.Ptr {
  266. break
  267. }
  268. }
  269. needColon = false
  270. }
  271. if needColon {
  272. return p.errorf("expected ':', found %q", tok.value)
  273. }
  274. p.back()
  275. }
  276. return nil
  277. }
  278. func (p *textParser) readStruct(sv reflect.Value, terminator string) *ParseError {
  279. st := sv.Type()
  280. reqCount := GetProperties(st).reqCount
  281. // A struct is a sequence of "name: value", terminated by one of
  282. // '>' or '}', or the end of the input. A name may also be
  283. // "[extension]".
  284. for {
  285. tok := p.next()
  286. if tok.err != nil {
  287. return tok.err
  288. }
  289. if tok.value == terminator {
  290. break
  291. }
  292. if tok.value == "[" {
  293. // Looks like an extension.
  294. //
  295. // TODO: Check whether we need to handle
  296. // namespace rooted names (e.g. ".something.Foo").
  297. tok = p.next()
  298. if tok.err != nil {
  299. return tok.err
  300. }
  301. var desc *ExtensionDesc
  302. // This could be faster, but it's functional.
  303. // TODO: Do something smarter than a linear scan.
  304. for _, d := range RegisteredExtensions(reflect.New(st).Interface()) {
  305. if d.Name == tok.value {
  306. desc = d
  307. break
  308. }
  309. }
  310. if desc == nil {
  311. return p.errorf("unrecognized extension %q", tok.value)
  312. }
  313. // Check the extension terminator.
  314. tok = p.next()
  315. if tok.err != nil {
  316. return tok.err
  317. }
  318. if tok.value != "]" {
  319. return p.errorf("unrecognized extension terminator %q", tok.value)
  320. }
  321. props := &Properties{}
  322. props.Parse(desc.Tag)
  323. typ := reflect.TypeOf(desc.ExtensionType)
  324. if err := p.checkForColon(props, typ); err != nil {
  325. return err
  326. }
  327. // Read the extension structure, and set it in
  328. // the value we're constructing.
  329. ext := reflect.New(typ).Elem()
  330. if err := p.readAny(ext, props); err != nil {
  331. return err
  332. }
  333. SetExtension(sv.Addr().Interface().(extendableProto),
  334. desc, ext.Interface())
  335. } else {
  336. // This is a normal, non-extension field.
  337. fi, props, ok := structFieldByName(st, tok.value)
  338. if !ok {
  339. return p.errorf("unknown field name %q in %v", tok.value, st)
  340. }
  341. // Check that it's not already set if it's not a repeated field.
  342. if !props.Repeated && !isNil(sv.Field(fi)) {
  343. return p.errorf("non-repeated field %q was repeated", tok.value)
  344. }
  345. if err := p.checkForColon(props, st.Field(fi).Type); err != nil {
  346. return err
  347. }
  348. // Parse into the field.
  349. if err := p.readAny(sv.Field(fi), props); err != nil {
  350. return err
  351. }
  352. if props.Required {
  353. reqCount--
  354. }
  355. }
  356. }
  357. if reqCount > 0 {
  358. return p.missingRequiredFieldError(sv)
  359. }
  360. return nil
  361. }
  362. func (p *textParser) readAny(v reflect.Value, props *Properties) *ParseError {
  363. tok := p.next()
  364. if tok.err != nil {
  365. return tok.err
  366. }
  367. if tok.value == "" {
  368. return p.errorf("unexpected EOF")
  369. }
  370. switch fv := v; fv.Kind() {
  371. case reflect.Slice:
  372. at := v.Type()
  373. if at.Elem().Kind() == reflect.Uint8 {
  374. // Special case for []byte
  375. if tok.value[0] != '"' {
  376. // Deliberately written out here, as the error after
  377. // this switch statement would write "invalid []byte: ...",
  378. // which is not as user-friendly.
  379. return p.errorf("invalid string: %v", tok.value)
  380. }
  381. bytes := []byte(tok.unquoted)
  382. fv.Set(reflect.ValueOf(bytes))
  383. return nil
  384. }
  385. // Repeated field. May already exist.
  386. flen := fv.Len()
  387. if flen == fv.Cap() {
  388. nav := reflect.MakeSlice(at, flen, 2*flen+1)
  389. reflect.Copy(nav, fv)
  390. fv.Set(nav)
  391. }
  392. fv.SetLen(flen + 1)
  393. // Read one.
  394. p.back()
  395. return p.readAny(fv.Index(flen), props)
  396. case reflect.Bool:
  397. // Either "true", "false", 1 or 0.
  398. switch tok.value {
  399. case "true", "1":
  400. fv.SetBool(true)
  401. return nil
  402. case "false", "0":
  403. fv.SetBool(false)
  404. return nil
  405. }
  406. case reflect.Float32, reflect.Float64:
  407. if f, err := strconv.ParseFloat(tok.value, fv.Type().Bits()); err == nil {
  408. fv.SetFloat(f)
  409. return nil
  410. }
  411. case reflect.Int32:
  412. if x, err := strconv.ParseInt(tok.value, 10, 32); err == nil {
  413. fv.SetInt(x)
  414. return nil
  415. }
  416. if len(props.Enum) == 0 {
  417. break
  418. }
  419. m, ok := enumValueMaps[props.Enum]
  420. if !ok {
  421. break
  422. }
  423. x, ok := m[tok.value]
  424. if !ok {
  425. break
  426. }
  427. fv.SetInt(int64(x))
  428. return nil
  429. case reflect.Int64:
  430. if x, err := strconv.ParseInt(tok.value, 10, 64); err == nil {
  431. fv.SetInt(x)
  432. return nil
  433. }
  434. case reflect.Ptr:
  435. // A basic field (indirected through pointer), or a repeated message/group
  436. p.back()
  437. fv.Set(reflect.New(fv.Type().Elem()))
  438. return p.readAny(fv.Elem(), props)
  439. case reflect.String:
  440. if tok.value[0] == '"' {
  441. fv.SetString(tok.unquoted)
  442. return nil
  443. }
  444. case reflect.Struct:
  445. var terminator string
  446. switch tok.value {
  447. case "{":
  448. terminator = "}"
  449. case "<":
  450. terminator = ">"
  451. default:
  452. return p.errorf("expected '{' or '<', found %q", tok.value)
  453. }
  454. return p.readStruct(fv, terminator)
  455. case reflect.Uint32:
  456. if x, err := strconv.ParseUint(tok.value, 10, 32); err == nil {
  457. fv.SetUint(uint64(x))
  458. return nil
  459. }
  460. case reflect.Uint64:
  461. if x, err := strconv.ParseUint(tok.value, 10, 64); err == nil {
  462. fv.SetUint(x)
  463. return nil
  464. }
  465. }
  466. return p.errorf("invalid %v: %v", v.Type(), tok.value)
  467. }
  468. var notPtrStruct error = &ParseError{"destination is not a pointer to a struct", 0, 0}
  469. // UnmarshalText reads a protobuffer in Text format.
  470. func UnmarshalText(s string, pb interface{}) error {
  471. v := reflect.ValueOf(pb)
  472. if v.Kind() != reflect.Ptr || v.Elem().Kind() != reflect.Struct {
  473. return notPtrStruct
  474. }
  475. if pe := newTextParser(s).readStruct(v.Elem(), ""); pe != nil {
  476. return pe
  477. }
  478. return nil
  479. }