PageRenderTime 20ms CodeModel.GetById 2ms app.highlight 14ms RepoModel.GetById 1ms app.codeStats 1ms

/http/lex.go

http://github.com/petar/GoHTTP
Go | 144 lines | 123 code | 7 blank | 14 comment | 12 complexity | 1788ee6aa2c347c7c903f371963c6a35 MD5 | raw file
  1// Copyright 2009 The Go Authors. All rights reserved.
  2// Use of this source code is governed by a BSD-style
  3// license that can be found in the LICENSE file.
  4
  5package http
  6
  7// This file deals with lexical matters of HTTP
  8
  9func isSeparator(c byte) bool {
 10	switch c {
 11	case '(', ')', '<', '>', '@', ',', ';', ':', '\\', '"', '/', '[', ']', '?', '=', '{', '}', ' ', '\t':
 12		return true
 13	}
 14	return false
 15}
 16
 17func isSpace(c byte) bool {
 18	switch c {
 19	case ' ', '\t', '\r', '\n':
 20		return true
 21	}
 22	return false
 23}
 24
 25func isCtl(c byte) bool { return (0 <= c && c <= 31) || c == 127 }
 26
 27func isChar(c byte) bool { return 0 <= c && c <= 127 }
 28
 29func isAnyText(c byte) bool { return !isCtl(c) }
 30
 31func isQdText(c byte) bool { return isAnyText(c) && c != '"' }
 32
 33func isToken(c byte) bool { return isChar(c) && !isCtl(c) && !isSeparator(c) }
 34
 35// Valid escaped sequences are not specified in RFC 2616, so for now, we assume
 36// that they coincide with the common sense ones used by GO. Malformed
 37// characters should probably not be treated as errors by a robust (forgiving)
 38// parser, so we replace them with the '?' character.
 39func httpUnquotePair(b byte) byte {
 40	// skip the first byte, which should always be '\'
 41	switch b {
 42	case 'a':
 43		return '\a'
 44	case 'b':
 45		return '\b'
 46	case 'f':
 47		return '\f'
 48	case 'n':
 49		return '\n'
 50	case 'r':
 51		return '\r'
 52	case 't':
 53		return '\t'
 54	case 'v':
 55		return '\v'
 56	case '\\':
 57		return '\\'
 58	case '\'':
 59		return '\''
 60	case '"':
 61		return '"'
 62	}
 63	return '?'
 64}
 65
 66// raw must begin with a valid quoted string. Only the first quoted string is
 67// parsed and is unquoted in result. eaten is the number of bytes parsed, or -1
 68// upon failure.
 69func httpUnquote(raw []byte) (eaten int, result string) {
 70	buf := make([]byte, len(raw))
 71	if raw[0] != '"' {
 72		return -1, ""
 73	}
 74	eaten = 1
 75	j := 0 // # of bytes written in buf
 76	for i := 1; i < len(raw); i++ {
 77		switch b := raw[i]; b {
 78		case '"':
 79			eaten++
 80			buf = buf[0:j]
 81			return i + 1, string(buf)
 82		case '\\':
 83			if len(raw) < i+2 {
 84				return -1, ""
 85			}
 86			buf[j] = httpUnquotePair(raw[i+1])
 87			eaten += 2
 88			j++
 89			i++
 90		default:
 91			if isQdText(b) {
 92				buf[j] = b
 93			} else {
 94				buf[j] = '?'
 95			}
 96			eaten++
 97			j++
 98		}
 99	}
100	return -1, ""
101}
102
103// This is a best effort parse, so errors are not returned, instead not all of
104// the input string might be parsed. result is always non-nil.
105func httpSplitFieldValue(fv string) (eaten int, result []string) {
106	result = make([]string, 0, len(fv))
107	raw := []byte(fv)
108	i := 0
109	chunk := ""
110	for i < len(raw) {
111		b := raw[i]
112		switch {
113		case b == '"':
114			eaten, unq := httpUnquote(raw[i:len(raw)])
115			if eaten < 0 {
116				return i, result
117			} else {
118				i += eaten
119				chunk += unq
120			}
121		case isSeparator(b):
122			if chunk != "" {
123				result = result[0 : len(result)+1]
124				result[len(result)-1] = chunk
125				chunk = ""
126			}
127			i++
128		case isToken(b):
129			chunk += string(b)
130			i++
131		case b == '\n' || b == '\r':
132			i++
133		default:
134			chunk += "?"
135			i++
136		}
137	}
138	if chunk != "" {
139		result = result[0 : len(result)+1]
140		result[len(result)-1] = chunk
141		chunk = ""
142	}
143	return i, result
144}