/proto/text_parser.go
Go | 515 lines | 451 code | 16 blank | 48 comment | 54 complexity | e17c6b611d44164afd1dcfba6c6a7101 MD5 | raw file
Possible License(s): BSD-3-Clause
- // Go support for Protocol Buffers - Google's data interchange format
- //
- // Copyright 2010 Google Inc. All rights reserved.
- // http://code.google.com/p/goprotobuf/
- //
- // Redistribution and use in source and binary forms, with or without
- // modification, are permitted provided that the following conditions are
- // met:
- //
- // * Redistributions of source code must retain the above copyright
- // notice, this list of conditions and the following disclaimer.
- // * Redistributions in binary form must reproduce the above
- // copyright notice, this list of conditions and the following disclaimer
- // in the documentation and/or other materials provided with the
- // distribution.
- // * Neither the name of Google Inc. nor the names of its
- // contributors may be used to endorse or promote products derived from
- // this software without specific prior written permission.
- //
- // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- package proto
- // Functions for parsing the Text protocol buffer format.
- // TODO: message sets.
- import (
- "fmt"
- "reflect"
- "strconv"
- "strings"
- )
- type ParseError struct {
- Message string
- Line int // 1-based line number
- Offset int // 0-based byte offset from start of input
- }
- func (p *ParseError) Error() string {
- if p.Line == 1 {
- // show offset only for first line
- return fmt.Sprintf("line 1.%d: %v", p.Offset, p.Message)
- }
- return fmt.Sprintf("line %d: %v", p.Line, p.Message)
- }
- type token struct {
- value string
- err *ParseError
- line int // line number
- offset int // byte number from start of input, not start of line
- unquoted string // the unquoted version of value, if it was a quoted string
- }
- func (t *token) String() string {
- if t.err == nil {
- return fmt.Sprintf("%q (line=%d, offset=%d)", t.value, t.line, t.offset)
- }
- return fmt.Sprintf("parse error: %v", t.err)
- }
- type textParser struct {
- s string // remaining input
- done bool // whether the parsing is finished (success or error)
- backed bool // whether back() was called
- offset, line int
- cur token
- }
- func newTextParser(s string) *textParser {
- p := new(textParser)
- p.s = s
- p.line = 1
- p.cur.line = 1
- return p
- }
- func (p *textParser) errorf(format string, a ...interface{}) *ParseError {
- pe := &ParseError{fmt.Sprintf(format, a...), p.cur.line, p.cur.offset}
- p.cur.err = pe
- p.done = true
- return pe
- }
- // Numbers and identifiers are matched by [-+._A-Za-z0-9]
- func isIdentOrNumberChar(c byte) bool {
- switch {
- case 'A' <= c && c <= 'Z', 'a' <= c && c <= 'z':
- return true
- case '0' <= c && c <= '9':
- return true
- }
- switch c {
- case '-', '+', '.', '_':
- return true
- }
- return false
- }
- func isWhitespace(c byte) bool {
- switch c {
- case ' ', '\t', '\n', '\r':
- return true
- }
- return false
- }
- func (p *textParser) skipWhitespace() {
- i := 0
- for i < len(p.s) && (isWhitespace(p.s[i]) || p.s[i] == '#') {
- if p.s[i] == '#' {
- // comment; skip to end of line or input
- for i < len(p.s) && p.s[i] != '\n' {
- i++
- }
- if i == len(p.s) {
- break
- }
- }
- if p.s[i] == '\n' {
- p.line++
- }
- i++
- }
- p.offset += i
- p.s = p.s[i:len(p.s)]
- if len(p.s) == 0 {
- p.done = true
- }
- }
- func (p *textParser) advance() {
- // Skip whitespace
- p.skipWhitespace()
- if p.done {
- return
- }
- // Start of non-whitespace
- p.cur.err = nil
- p.cur.offset, p.cur.line = p.offset, p.line
- p.cur.unquoted = ""
- switch p.s[0] {
- case '<', '>', '{', '}', ':', '[', ']':
- // Single symbol
- p.cur.value, p.s = p.s[0:1], p.s[1:len(p.s)]
- case '"':
- // Quoted string
- i := 1
- for i < len(p.s) && p.s[i] != '"' && p.s[i] != '\n' {
- if p.s[i] == '\\' && i+1 < len(p.s) {
- // skip escaped char
- i++
- }
- i++
- }
- if i >= len(p.s) || p.s[i] != '"' {
- p.errorf("unmatched quote")
- return
- }
- unq, err := unquoteC(p.s[0 : i+1])
- if err != nil {
- p.errorf("invalid quoted string %v", p.s[0:i+1])
- return
- }
- p.cur.value, p.s = p.s[0:i+1], p.s[i+1:len(p.s)]
- p.cur.unquoted = unq
- default:
- i := 0
- for i < len(p.s) && isIdentOrNumberChar(p.s[i]) {
- i++
- }
- if i == 0 {
- p.errorf("unexpected byte %#x", p.s[0])
- return
- }
- p.cur.value, p.s = p.s[0:i], p.s[i:len(p.s)]
- }
- p.offset += len(p.cur.value)
- }
- func unquoteC(s string) (string, error) {
- // A notable divergence between quoted string literals in Go
- // and what is acceptable for text format protocol buffers:
- // the former considers \' invalid, but the latter considers it valid.
- s = strings.Replace(s, `\'`, "'", -1)
- return strconv.Unquote(s)
- }
- // Back off the parser by one token. Can only be done between calls to next().
- // It makes the next advance() a no-op.
- func (p *textParser) back() { p.backed = true }
- // Advances the parser and returns the new current token.
- func (p *textParser) next() *token {
- if p.backed || p.done {
- p.backed = false
- return &p.cur
- }
- p.advance()
- if p.done {
- p.cur.value = ""
- } else if len(p.cur.value) > 0 && p.cur.value[0] == '"' {
- // Look for multiple quoted strings separated by whitespace,
- // and concatenate them.
- cat := p.cur
- for {
- p.skipWhitespace()
- if p.done || p.s[0] != '"' {
- break
- }
- p.advance()
- if p.cur.err != nil {
- return &p.cur
- }
- cat.value += " " + p.cur.value
- cat.unquoted += p.cur.unquoted
- }
- p.done = false // parser may have seen EOF, but we want to return cat
- p.cur = cat
- }
- return &p.cur
- }
- // Return an error indicating which required field was not set.
- func (p *textParser) missingRequiredFieldError(sv reflect.Value) *ParseError {
- st := sv.Type()
- sprops := GetProperties(st)
- for i := 0; i < st.NumField(); i++ {
- if !isNil(sv.Field(i)) {
- continue
- }
- props := sprops.Prop[i]
- if props.Required {
- return p.errorf("message %v missing required field %q", st, props.OrigName)
- }
- }
- return p.errorf("message %v missing required field", st) // should not happen
- }
- // Returns the index in the struct for the named field, as well as the parsed tag properties.
- func structFieldByName(st reflect.Type, name string) (int, *Properties, bool) {
- sprops := GetProperties(st)
- i, ok := sprops.origNames[name]
- if ok {
- return i, sprops.Prop[i], true
- }
- return -1, nil, false
- }
- // Consume a ':' from the input stream (if the next token is a colon),
- // returning an error if a colon is needed but not present.
- func (p *textParser) checkForColon(props *Properties, typ reflect.Type) *ParseError {
- tok := p.next()
- if tok.err != nil {
- return tok.err
- }
- if tok.value != ":" {
- // Colon is optional when the field is a group or message.
- needColon := true
- switch props.Wire {
- case "group":
- needColon = false
- case "bytes":
- // A "bytes" field is either a message, a string, or a repeated field;
- // those three become *T, *string and []T respectively, so we can check for
- // this field being a pointer to a non-string.
- if typ.Kind() == reflect.Ptr {
- // *T or *string
- if typ.Elem().Kind() == reflect.String {
- break
- }
- } else if typ.Kind() == reflect.Slice {
- // []T or []*T
- if typ.Elem().Kind() != reflect.Ptr {
- break
- }
- }
- needColon = false
- }
- if needColon {
- return p.errorf("expected ':', found %q", tok.value)
- }
- p.back()
- }
- return nil
- }
- func (p *textParser) readStruct(sv reflect.Value, terminator string) *ParseError {
- st := sv.Type()
- reqCount := GetProperties(st).reqCount
- // A struct is a sequence of "name: value", terminated by one of
- // '>' or '}', or the end of the input. A name may also be
- // "[extension]".
- for {
- tok := p.next()
- if tok.err != nil {
- return tok.err
- }
- if tok.value == terminator {
- break
- }
- if tok.value == "[" {
- // Looks like an extension.
- //
- // TODO: Check whether we need to handle
- // namespace rooted names (e.g. ".something.Foo").
- tok = p.next()
- if tok.err != nil {
- return tok.err
- }
- var desc *ExtensionDesc
- // This could be faster, but it's functional.
- // TODO: Do something smarter than a linear scan.
- for _, d := range RegisteredExtensions(reflect.New(st).Interface()) {
- if d.Name == tok.value {
- desc = d
- break
- }
- }
- if desc == nil {
- return p.errorf("unrecognized extension %q", tok.value)
- }
- // Check the extension terminator.
- tok = p.next()
- if tok.err != nil {
- return tok.err
- }
- if tok.value != "]" {
- return p.errorf("unrecognized extension terminator %q", tok.value)
- }
- props := &Properties{}
- props.Parse(desc.Tag)
- typ := reflect.TypeOf(desc.ExtensionType)
- if err := p.checkForColon(props, typ); err != nil {
- return err
- }
- // Read the extension structure, and set it in
- // the value we're constructing.
- ext := reflect.New(typ).Elem()
- if err := p.readAny(ext, props); err != nil {
- return err
- }
- SetExtension(sv.Addr().Interface().(extendableProto),
- desc, ext.Interface())
- } else {
- // This is a normal, non-extension field.
- fi, props, ok := structFieldByName(st, tok.value)
- if !ok {
- return p.errorf("unknown field name %q in %v", tok.value, st)
- }
- // Check that it's not already set if it's not a repeated field.
- if !props.Repeated && !isNil(sv.Field(fi)) {
- return p.errorf("non-repeated field %q was repeated", tok.value)
- }
- if err := p.checkForColon(props, st.Field(fi).Type); err != nil {
- return err
- }
- // Parse into the field.
- if err := p.readAny(sv.Field(fi), props); err != nil {
- return err
- }
- if props.Required {
- reqCount--
- }
- }
- }
- if reqCount > 0 {
- return p.missingRequiredFieldError(sv)
- }
- return nil
- }
- func (p *textParser) readAny(v reflect.Value, props *Properties) *ParseError {
- tok := p.next()
- if tok.err != nil {
- return tok.err
- }
- if tok.value == "" {
- return p.errorf("unexpected EOF")
- }
- switch fv := v; fv.Kind() {
- case reflect.Slice:
- at := v.Type()
- if at.Elem().Kind() == reflect.Uint8 {
- // Special case for []byte
- if tok.value[0] != '"' {
- // Deliberately written out here, as the error after
- // this switch statement would write "invalid []byte: ...",
- // which is not as user-friendly.
- return p.errorf("invalid string: %v", tok.value)
- }
- bytes := []byte(tok.unquoted)
- fv.Set(reflect.ValueOf(bytes))
- return nil
- }
- // Repeated field. May already exist.
- flen := fv.Len()
- if flen == fv.Cap() {
- nav := reflect.MakeSlice(at, flen, 2*flen+1)
- reflect.Copy(nav, fv)
- fv.Set(nav)
- }
- fv.SetLen(flen + 1)
- // Read one.
- p.back()
- return p.readAny(fv.Index(flen), props)
- case reflect.Bool:
- // Either "true", "false", 1 or 0.
- switch tok.value {
- case "true", "1":
- fv.SetBool(true)
- return nil
- case "false", "0":
- fv.SetBool(false)
- return nil
- }
- case reflect.Float32, reflect.Float64:
- if f, err := strconv.ParseFloat(tok.value, fv.Type().Bits()); err == nil {
- fv.SetFloat(f)
- return nil
- }
- case reflect.Int32:
- if x, err := strconv.ParseInt(tok.value, 10, 32); err == nil {
- fv.SetInt(x)
- return nil
- }
- if len(props.Enum) == 0 {
- break
- }
- m, ok := enumValueMaps[props.Enum]
- if !ok {
- break
- }
- x, ok := m[tok.value]
- if !ok {
- break
- }
- fv.SetInt(int64(x))
- return nil
- case reflect.Int64:
- if x, err := strconv.ParseInt(tok.value, 10, 64); err == nil {
- fv.SetInt(x)
- return nil
- }
- case reflect.Ptr:
- // A basic field (indirected through pointer), or a repeated message/group
- p.back()
- fv.Set(reflect.New(fv.Type().Elem()))
- return p.readAny(fv.Elem(), props)
- case reflect.String:
- if tok.value[0] == '"' {
- fv.SetString(tok.unquoted)
- return nil
- }
- case reflect.Struct:
- var terminator string
- switch tok.value {
- case "{":
- terminator = "}"
- case "<":
- terminator = ">"
- default:
- return p.errorf("expected '{' or '<', found %q", tok.value)
- }
- return p.readStruct(fv, terminator)
- case reflect.Uint32:
- if x, err := strconv.ParseUint(tok.value, 10, 32); err == nil {
- fv.SetUint(uint64(x))
- return nil
- }
- case reflect.Uint64:
- if x, err := strconv.ParseUint(tok.value, 10, 64); err == nil {
- fv.SetUint(x)
- return nil
- }
- }
- return p.errorf("invalid %v: %v", v.Type(), tok.value)
- }
- var notPtrStruct error = &ParseError{"destination is not a pointer to a struct", 0, 0}
- // UnmarshalText reads a protobuffer in Text format.
- func UnmarshalText(s string, pb interface{}) error {
- v := reflect.ValueOf(pb)
- if v.Kind() != reflect.Ptr || v.Elem().Kind() != reflect.Struct {
- return notPtrStruct
- }
- if pe := newTextParser(s).readStruct(v.Elem(), ""); pe != nil {
- return pe
- }
- return nil
- }