PageRenderTime 56ms CodeModel.GetById 27ms RepoModel.GetById 1ms app.codeStats 0ms

/Godeps/_workspace/src/github.com/BurntSushi/toml/parse.go

https://gitlab.com/davejlong/gitlab-ci-multi-runner
Go | 498 lines | 386 code | 48 blank | 64 comment | 69 complexity | 96aebcd357cd239cb69651f1f785cd6f MD5 | raw file
  1. package toml
  2. import (
  3. "fmt"
  4. "log"
  5. "strconv"
  6. "strings"
  7. "time"
  8. "unicode"
  9. "unicode/utf8"
  10. )
  11. type parser struct {
  12. mapping map[string]interface{}
  13. types map[string]tomlType
  14. lx *lexer
  15. // A list of keys in the order that they appear in the TOML data.
  16. ordered []Key
  17. // the full key for the current hash in scope
  18. context Key
  19. // the base key name for everything except hashes
  20. currentKey string
  21. // rough approximation of line number
  22. approxLine int
  23. // A map of 'key.group.names' to whether they were created implicitly.
  24. implicits map[string]bool
  25. }
  26. type parseError string
  27. func (pe parseError) Error() string {
  28. return string(pe)
  29. }
  30. func parse(data string) (p *parser, err error) {
  31. defer func() {
  32. if r := recover(); r != nil {
  33. var ok bool
  34. if err, ok = r.(parseError); ok {
  35. return
  36. }
  37. panic(r)
  38. }
  39. }()
  40. p = &parser{
  41. mapping: make(map[string]interface{}),
  42. types: make(map[string]tomlType),
  43. lx: lex(data),
  44. ordered: make([]Key, 0),
  45. implicits: make(map[string]bool),
  46. }
  47. for {
  48. item := p.next()
  49. if item.typ == itemEOF {
  50. break
  51. }
  52. p.topLevel(item)
  53. }
  54. return p, nil
  55. }
  56. func (p *parser) panicf(format string, v ...interface{}) {
  57. msg := fmt.Sprintf("Near line %d (last key parsed '%s'): %s",
  58. p.approxLine, p.current(), fmt.Sprintf(format, v...))
  59. panic(parseError(msg))
  60. }
  61. func (p *parser) next() item {
  62. it := p.lx.nextItem()
  63. if it.typ == itemError {
  64. p.panicf("%s", it.val)
  65. }
  66. return it
  67. }
  68. func (p *parser) bug(format string, v ...interface{}) {
  69. log.Fatalf("BUG: %s\n\n", fmt.Sprintf(format, v...))
  70. }
  71. func (p *parser) expect(typ itemType) item {
  72. it := p.next()
  73. p.assertEqual(typ, it.typ)
  74. return it
  75. }
  76. func (p *parser) assertEqual(expected, got itemType) {
  77. if expected != got {
  78. p.bug("Expected '%s' but got '%s'.", expected, got)
  79. }
  80. }
  81. func (p *parser) topLevel(item item) {
  82. switch item.typ {
  83. case itemCommentStart:
  84. p.approxLine = item.line
  85. p.expect(itemText)
  86. case itemTableStart:
  87. kg := p.next()
  88. p.approxLine = kg.line
  89. var key Key
  90. for ; kg.typ != itemTableEnd && kg.typ != itemEOF; kg = p.next() {
  91. key = append(key, p.keyString(kg))
  92. }
  93. p.assertEqual(itemTableEnd, kg.typ)
  94. p.establishContext(key, false)
  95. p.setType("", tomlHash)
  96. p.ordered = append(p.ordered, key)
  97. case itemArrayTableStart:
  98. kg := p.next()
  99. p.approxLine = kg.line
  100. var key Key
  101. for ; kg.typ != itemArrayTableEnd && kg.typ != itemEOF; kg = p.next() {
  102. key = append(key, p.keyString(kg))
  103. }
  104. p.assertEqual(itemArrayTableEnd, kg.typ)
  105. p.establishContext(key, true)
  106. p.setType("", tomlArrayHash)
  107. p.ordered = append(p.ordered, key)
  108. case itemKeyStart:
  109. kname := p.next()
  110. p.approxLine = kname.line
  111. p.currentKey = p.keyString(kname)
  112. val, typ := p.value(p.next())
  113. p.setValue(p.currentKey, val)
  114. p.setType(p.currentKey, typ)
  115. p.ordered = append(p.ordered, p.context.add(p.currentKey))
  116. p.currentKey = ""
  117. default:
  118. p.bug("Unexpected type at top level: %s", item.typ)
  119. }
  120. }
  121. // Gets a string for a key (or part of a key in a table name).
  122. func (p *parser) keyString(it item) string {
  123. switch it.typ {
  124. case itemText:
  125. return it.val
  126. case itemString, itemMultilineString,
  127. itemRawString, itemRawMultilineString:
  128. s, _ := p.value(it)
  129. return s.(string)
  130. default:
  131. p.bug("Unexpected key type: %s", it.typ)
  132. panic("unreachable")
  133. }
  134. }
  135. // value translates an expected value from the lexer into a Go value wrapped
  136. // as an empty interface.
  137. func (p *parser) value(it item) (interface{}, tomlType) {
  138. switch it.typ {
  139. case itemString:
  140. return p.replaceEscapes(it.val), p.typeOfPrimitive(it)
  141. case itemMultilineString:
  142. trimmed := stripFirstNewline(stripEscapedWhitespace(it.val))
  143. return p.replaceEscapes(trimmed), p.typeOfPrimitive(it)
  144. case itemRawString:
  145. return it.val, p.typeOfPrimitive(it)
  146. case itemRawMultilineString:
  147. return stripFirstNewline(it.val), p.typeOfPrimitive(it)
  148. case itemBool:
  149. switch it.val {
  150. case "true":
  151. return true, p.typeOfPrimitive(it)
  152. case "false":
  153. return false, p.typeOfPrimitive(it)
  154. }
  155. p.bug("Expected boolean value, but got '%s'.", it.val)
  156. case itemInteger:
  157. num, err := strconv.ParseInt(it.val, 10, 64)
  158. if err != nil {
  159. // See comment below for floats describing why we make a
  160. // distinction between a bug and a user error.
  161. if e, ok := err.(*strconv.NumError); ok &&
  162. e.Err == strconv.ErrRange {
  163. p.panicf("Integer '%s' is out of the range of 64-bit "+
  164. "signed integers.", it.val)
  165. } else {
  166. p.bug("Expected integer value, but got '%s'.", it.val)
  167. }
  168. }
  169. return num, p.typeOfPrimitive(it)
  170. case itemFloat:
  171. num, err := strconv.ParseFloat(it.val, 64)
  172. if err != nil {
  173. // Distinguish float values. Normally, it'd be a bug if the lexer
  174. // provides an invalid float, but it's possible that the float is
  175. // out of range of valid values (which the lexer cannot determine).
  176. // So mark the former as a bug but the latter as a legitimate user
  177. // error.
  178. //
  179. // This is also true for integers.
  180. if e, ok := err.(*strconv.NumError); ok &&
  181. e.Err == strconv.ErrRange {
  182. p.panicf("Float '%s' is out of the range of 64-bit "+
  183. "IEEE-754 floating-point numbers.", it.val)
  184. } else {
  185. p.bug("Expected float value, but got '%s'.", it.val)
  186. }
  187. }
  188. return num, p.typeOfPrimitive(it)
  189. case itemDatetime:
  190. t, err := time.Parse("2006-01-02T15:04:05Z", it.val)
  191. if err != nil {
  192. p.bug("Expected Zulu formatted DateTime, but got '%s'.", it.val)
  193. }
  194. return t, p.typeOfPrimitive(it)
  195. case itemArray:
  196. array := make([]interface{}, 0)
  197. types := make([]tomlType, 0)
  198. for it = p.next(); it.typ != itemArrayEnd; it = p.next() {
  199. if it.typ == itemCommentStart {
  200. p.expect(itemText)
  201. continue
  202. }
  203. val, typ := p.value(it)
  204. array = append(array, val)
  205. types = append(types, typ)
  206. }
  207. return array, p.typeOfArray(types)
  208. }
  209. p.bug("Unexpected value type: %s", it.typ)
  210. panic("unreachable")
  211. }
  212. // establishContext sets the current context of the parser,
  213. // where the context is either a hash or an array of hashes. Which one is
  214. // set depends on the value of the `array` parameter.
  215. //
  216. // Establishing the context also makes sure that the key isn't a duplicate, and
  217. // will create implicit hashes automatically.
  218. func (p *parser) establishContext(key Key, array bool) {
  219. var ok bool
  220. // Always start at the top level and drill down for our context.
  221. hashContext := p.mapping
  222. keyContext := make(Key, 0)
  223. // We only need implicit hashes for key[0:-1]
  224. for _, k := range key[0 : len(key)-1] {
  225. _, ok = hashContext[k]
  226. keyContext = append(keyContext, k)
  227. // No key? Make an implicit hash and move on.
  228. if !ok {
  229. p.addImplicit(keyContext)
  230. hashContext[k] = make(map[string]interface{})
  231. }
  232. // If the hash context is actually an array of tables, then set
  233. // the hash context to the last element in that array.
  234. //
  235. // Otherwise, it better be a table, since this MUST be a key group (by
  236. // virtue of it not being the last element in a key).
  237. switch t := hashContext[k].(type) {
  238. case []map[string]interface{}:
  239. hashContext = t[len(t)-1]
  240. case map[string]interface{}:
  241. hashContext = t
  242. default:
  243. p.panicf("Key '%s' was already created as a hash.", keyContext)
  244. }
  245. }
  246. p.context = keyContext
  247. if array {
  248. // If this is the first element for this array, then allocate a new
  249. // list of tables for it.
  250. k := key[len(key)-1]
  251. if _, ok := hashContext[k]; !ok {
  252. hashContext[k] = make([]map[string]interface{}, 0, 5)
  253. }
  254. // Add a new table. But make sure the key hasn't already been used
  255. // for something else.
  256. if hash, ok := hashContext[k].([]map[string]interface{}); ok {
  257. hashContext[k] = append(hash, make(map[string]interface{}))
  258. } else {
  259. p.panicf("Key '%s' was already created and cannot be used as "+
  260. "an array.", keyContext)
  261. }
  262. } else {
  263. p.setValue(key[len(key)-1], make(map[string]interface{}))
  264. }
  265. p.context = append(p.context, key[len(key)-1])
  266. }
  267. // setValue sets the given key to the given value in the current context.
  268. // It will make sure that the key hasn't already been defined, account for
  269. // implicit key groups.
  270. func (p *parser) setValue(key string, value interface{}) {
  271. var tmpHash interface{}
  272. var ok bool
  273. hash := p.mapping
  274. keyContext := make(Key, 0)
  275. for _, k := range p.context {
  276. keyContext = append(keyContext, k)
  277. if tmpHash, ok = hash[k]; !ok {
  278. p.bug("Context for key '%s' has not been established.", keyContext)
  279. }
  280. switch t := tmpHash.(type) {
  281. case []map[string]interface{}:
  282. // The context is a table of hashes. Pick the most recent table
  283. // defined as the current hash.
  284. hash = t[len(t)-1]
  285. case map[string]interface{}:
  286. hash = t
  287. default:
  288. p.bug("Expected hash to have type 'map[string]interface{}', but "+
  289. "it has '%T' instead.", tmpHash)
  290. }
  291. }
  292. keyContext = append(keyContext, key)
  293. if _, ok := hash[key]; ok {
  294. // Typically, if the given key has already been set, then we have
  295. // to raise an error since duplicate keys are disallowed. However,
  296. // it's possible that a key was previously defined implicitly. In this
  297. // case, it is allowed to be redefined concretely. (See the
  298. // `tests/valid/implicit-and-explicit-after.toml` test in `toml-test`.)
  299. //
  300. // But we have to make sure to stop marking it as an implicit. (So that
  301. // another redefinition provokes an error.)
  302. //
  303. // Note that since it has already been defined (as a hash), we don't
  304. // want to overwrite it. So our business is done.
  305. if p.isImplicit(keyContext) {
  306. p.removeImplicit(keyContext)
  307. return
  308. }
  309. // Otherwise, we have a concrete key trying to override a previous
  310. // key, which is *always* wrong.
  311. p.panicf("Key '%s' has already been defined.", keyContext)
  312. }
  313. hash[key] = value
  314. }
  315. // setType sets the type of a particular value at a given key.
  316. // It should be called immediately AFTER setValue.
  317. //
  318. // Note that if `key` is empty, then the type given will be applied to the
  319. // current context (which is either a table or an array of tables).
  320. func (p *parser) setType(key string, typ tomlType) {
  321. keyContext := make(Key, 0, len(p.context)+1)
  322. for _, k := range p.context {
  323. keyContext = append(keyContext, k)
  324. }
  325. if len(key) > 0 { // allow type setting for hashes
  326. keyContext = append(keyContext, key)
  327. }
  328. p.types[keyContext.String()] = typ
  329. }
  330. // addImplicit sets the given Key as having been created implicitly.
  331. func (p *parser) addImplicit(key Key) {
  332. p.implicits[key.String()] = true
  333. }
  334. // removeImplicit stops tagging the given key as having been implicitly
  335. // created.
  336. func (p *parser) removeImplicit(key Key) {
  337. p.implicits[key.String()] = false
  338. }
  339. // isImplicit returns true if the key group pointed to by the key was created
  340. // implicitly.
  341. func (p *parser) isImplicit(key Key) bool {
  342. return p.implicits[key.String()]
  343. }
  344. // current returns the full key name of the current context.
  345. func (p *parser) current() string {
  346. if len(p.currentKey) == 0 {
  347. return p.context.String()
  348. }
  349. if len(p.context) == 0 {
  350. return p.currentKey
  351. }
  352. return fmt.Sprintf("%s.%s", p.context, p.currentKey)
  353. }
  354. func stripFirstNewline(s string) string {
  355. if len(s) == 0 || s[0] != '\n' {
  356. return s
  357. }
  358. return s[1:len(s)]
  359. }
  360. func stripEscapedWhitespace(s string) string {
  361. esc := strings.Split(s, "\\\n")
  362. if len(esc) > 1 {
  363. for i := 1; i < len(esc); i++ {
  364. esc[i] = strings.TrimLeftFunc(esc[i], unicode.IsSpace)
  365. }
  366. }
  367. return strings.Join(esc, "")
  368. }
  369. func (p *parser) replaceEscapes(str string) string {
  370. var replaced []rune
  371. s := []byte(str)
  372. r := 0
  373. for r < len(s) {
  374. if s[r] != '\\' {
  375. c, size := utf8.DecodeRune(s[r:])
  376. r += size
  377. replaced = append(replaced, c)
  378. continue
  379. }
  380. r += 1
  381. if r >= len(s) {
  382. p.bug("Escape sequence at end of string.")
  383. return ""
  384. }
  385. switch s[r] {
  386. default:
  387. p.bug("Expected valid escape code after \\, but got %q.", s[r])
  388. return ""
  389. case 'b':
  390. replaced = append(replaced, rune(0x0008))
  391. r += 1
  392. case 't':
  393. replaced = append(replaced, rune(0x0009))
  394. r += 1
  395. case 'n':
  396. replaced = append(replaced, rune(0x000A))
  397. r += 1
  398. case 'f':
  399. replaced = append(replaced, rune(0x000C))
  400. r += 1
  401. case 'r':
  402. replaced = append(replaced, rune(0x000D))
  403. r += 1
  404. case '"':
  405. replaced = append(replaced, rune(0x0022))
  406. r += 1
  407. case '\\':
  408. replaced = append(replaced, rune(0x005C))
  409. r += 1
  410. case 'u':
  411. // At this point, we know we have a Unicode escape of the form
  412. // `uXXXX` at [r, r+5). (Because the lexer guarantees this
  413. // for us.)
  414. escaped := p.asciiEscapeToUnicode(s[r+1 : r+5])
  415. replaced = append(replaced, escaped)
  416. r += 5
  417. case 'U':
  418. // At this point, we know we have a Unicode escape of the form
  419. // `uXXXX` at [r, r+9). (Because the lexer guarantees this
  420. // for us.)
  421. escaped := p.asciiEscapeToUnicode(s[r+1 : r+9])
  422. replaced = append(replaced, escaped)
  423. r += 9
  424. }
  425. }
  426. return string(replaced)
  427. }
  428. func (p *parser) asciiEscapeToUnicode(bs []byte) rune {
  429. s := string(bs)
  430. hex, err := strconv.ParseUint(strings.ToLower(s), 16, 32)
  431. if err != nil {
  432. p.bug("Could not parse '%s' as a hexadecimal number, but the "+
  433. "lexer claims it's OK: %s", s, err)
  434. }
  435. // BUG(burntsushi)
  436. // I honestly don't understand how this works. I can't seem
  437. // to find a way to make this fail. I figured this would fail on invalid
  438. // UTF-8 characters like U+DCFF, but it doesn't.
  439. if !utf8.ValidString(string(rune(hex))) {
  440. p.panicf("Escaped character '\\u%s' is not valid UTF-8.", s)
  441. }
  442. return rune(hex)
  443. }
  444. func isStringType(ty itemType) bool {
  445. return ty == itemString || ty == itemMultilineString ||
  446. ty == itemRawString || ty == itemRawMultilineString
  447. }