PageRenderTime 53ms CodeModel.GetById 23ms RepoModel.GetById 0ms app.codeStats 0ms

/third_party/gofrontend/libgo/go/go/scanner/scanner_test.go

http://github.com/axw/llgo
Go | 810 lines | 749 code | 39 blank | 22 comment | 28 complexity | d1680d01a02d04e8d51c5be6172e23ad MD5 | raw file
Possible License(s): BSD-3-Clause, MIT
  1. // Copyright 2009 The Go Authors. All rights reserved.
  2. // Use of this source code is governed by a BSD-style
  3. // license that can be found in the LICENSE file.
  4. package scanner
  5. import (
  6. "go/token"
  7. "io/ioutil"
  8. "os"
  9. "path/filepath"
  10. "runtime"
  11. "testing"
  12. )
  13. var fset = token.NewFileSet()
  14. const /* class */ (
  15. special = iota
  16. literal
  17. operator
  18. keyword
  19. )
  20. func tokenclass(tok token.Token) int {
  21. switch {
  22. case tok.IsLiteral():
  23. return literal
  24. case tok.IsOperator():
  25. return operator
  26. case tok.IsKeyword():
  27. return keyword
  28. }
  29. return special
  30. }
  31. type elt struct {
  32. tok token.Token
  33. lit string
  34. class int
  35. }
  36. var tokens = [...]elt{
  37. // Special tokens
  38. {token.COMMENT, "/* a comment */", special},
  39. {token.COMMENT, "// a comment \n", special},
  40. {token.COMMENT, "/*\r*/", special},
  41. {token.COMMENT, "//\r\n", special},
  42. // Identifiers and basic type literals
  43. {token.IDENT, "foobar", literal},
  44. {token.IDENT, "a۰۱۸", literal},
  45. {token.IDENT, "foo६४", literal},
  46. {token.IDENT, "bar9876", literal},
  47. {token.IDENT, "ŝ", literal}, // was bug (issue 4000)
  48. {token.IDENT, "ŝfoo", literal}, // was bug (issue 4000)
  49. {token.INT, "0", literal},
  50. {token.INT, "1", literal},
  51. {token.INT, "123456789012345678890", literal},
  52. {token.INT, "01234567", literal},
  53. {token.INT, "0xcafebabe", literal},
  54. {token.FLOAT, "0.", literal},
  55. {token.FLOAT, ".0", literal},
  56. {token.FLOAT, "3.14159265", literal},
  57. {token.FLOAT, "1e0", literal},
  58. {token.FLOAT, "1e+100", literal},
  59. {token.FLOAT, "1e-100", literal},
  60. {token.FLOAT, "2.71828e-1000", literal},
  61. {token.IMAG, "0i", literal},
  62. {token.IMAG, "1i", literal},
  63. {token.IMAG, "012345678901234567889i", literal},
  64. {token.IMAG, "123456789012345678890i", literal},
  65. {token.IMAG, "0.i", literal},
  66. {token.IMAG, ".0i", literal},
  67. {token.IMAG, "3.14159265i", literal},
  68. {token.IMAG, "1e0i", literal},
  69. {token.IMAG, "1e+100i", literal},
  70. {token.IMAG, "1e-100i", literal},
  71. {token.IMAG, "2.71828e-1000i", literal},
  72. {token.CHAR, "'a'", literal},
  73. {token.CHAR, "'\\000'", literal},
  74. {token.CHAR, "'\\xFF'", literal},
  75. {token.CHAR, "'\\uff16'", literal},
  76. {token.CHAR, "'\\U0000ff16'", literal},
  77. {token.STRING, "`foobar`", literal},
  78. {token.STRING, "`" + `foo
  79. bar` +
  80. "`",
  81. literal,
  82. },
  83. {token.STRING, "`\r`", literal},
  84. {token.STRING, "`foo\r\nbar`", literal},
  85. // Operators and delimiters
  86. {token.ADD, "+", operator},
  87. {token.SUB, "-", operator},
  88. {token.MUL, "*", operator},
  89. {token.QUO, "/", operator},
  90. {token.REM, "%", operator},
  91. {token.AND, "&", operator},
  92. {token.OR, "|", operator},
  93. {token.XOR, "^", operator},
  94. {token.SHL, "<<", operator},
  95. {token.SHR, ">>", operator},
  96. {token.AND_NOT, "&^", operator},
  97. {token.ADD_ASSIGN, "+=", operator},
  98. {token.SUB_ASSIGN, "-=", operator},
  99. {token.MUL_ASSIGN, "*=", operator},
  100. {token.QUO_ASSIGN, "/=", operator},
  101. {token.REM_ASSIGN, "%=", operator},
  102. {token.AND_ASSIGN, "&=", operator},
  103. {token.OR_ASSIGN, "|=", operator},
  104. {token.XOR_ASSIGN, "^=", operator},
  105. {token.SHL_ASSIGN, "<<=", operator},
  106. {token.SHR_ASSIGN, ">>=", operator},
  107. {token.AND_NOT_ASSIGN, "&^=", operator},
  108. {token.LAND, "&&", operator},
  109. {token.LOR, "||", operator},
  110. {token.ARROW, "<-", operator},
  111. {token.INC, "++", operator},
  112. {token.DEC, "--", operator},
  113. {token.EQL, "==", operator},
  114. {token.LSS, "<", operator},
  115. {token.GTR, ">", operator},
  116. {token.ASSIGN, "=", operator},
  117. {token.NOT, "!", operator},
  118. {token.NEQ, "!=", operator},
  119. {token.LEQ, "<=", operator},
  120. {token.GEQ, ">=", operator},
  121. {token.DEFINE, ":=", operator},
  122. {token.ELLIPSIS, "...", operator},
  123. {token.LPAREN, "(", operator},
  124. {token.LBRACK, "[", operator},
  125. {token.LBRACE, "{", operator},
  126. {token.COMMA, ",", operator},
  127. {token.PERIOD, ".", operator},
  128. {token.RPAREN, ")", operator},
  129. {token.RBRACK, "]", operator},
  130. {token.RBRACE, "}", operator},
  131. {token.SEMICOLON, ";", operator},
  132. {token.COLON, ":", operator},
  133. // Keywords
  134. {token.BREAK, "break", keyword},
  135. {token.CASE, "case", keyword},
  136. {token.CHAN, "chan", keyword},
  137. {token.CONST, "const", keyword},
  138. {token.CONTINUE, "continue", keyword},
  139. {token.DEFAULT, "default", keyword},
  140. {token.DEFER, "defer", keyword},
  141. {token.ELSE, "else", keyword},
  142. {token.FALLTHROUGH, "fallthrough", keyword},
  143. {token.FOR, "for", keyword},
  144. {token.FUNC, "func", keyword},
  145. {token.GO, "go", keyword},
  146. {token.GOTO, "goto", keyword},
  147. {token.IF, "if", keyword},
  148. {token.IMPORT, "import", keyword},
  149. {token.INTERFACE, "interface", keyword},
  150. {token.MAP, "map", keyword},
  151. {token.PACKAGE, "package", keyword},
  152. {token.RANGE, "range", keyword},
  153. {token.RETURN, "return", keyword},
  154. {token.SELECT, "select", keyword},
  155. {token.STRUCT, "struct", keyword},
  156. {token.SWITCH, "switch", keyword},
  157. {token.TYPE, "type", keyword},
  158. {token.VAR, "var", keyword},
  159. }
  160. const whitespace = " \t \n\n\n" // to separate tokens
  161. var source = func() []byte {
  162. var src []byte
  163. for _, t := range tokens {
  164. src = append(src, t.lit...)
  165. src = append(src, whitespace...)
  166. }
  167. return src
  168. }()
  169. func newlineCount(s string) int {
  170. n := 0
  171. for i := 0; i < len(s); i++ {
  172. if s[i] == '\n' {
  173. n++
  174. }
  175. }
  176. return n
  177. }
  178. func checkPos(t *testing.T, lit string, p token.Pos, expected token.Position) {
  179. pos := fset.Position(p)
  180. if pos.Filename != expected.Filename {
  181. t.Errorf("bad filename for %q: got %s, expected %s", lit, pos.Filename, expected.Filename)
  182. }
  183. if pos.Offset != expected.Offset {
  184. t.Errorf("bad position for %q: got %d, expected %d", lit, pos.Offset, expected.Offset)
  185. }
  186. if pos.Line != expected.Line {
  187. t.Errorf("bad line for %q: got %d, expected %d", lit, pos.Line, expected.Line)
  188. }
  189. if pos.Column != expected.Column {
  190. t.Errorf("bad column for %q: got %d, expected %d", lit, pos.Column, expected.Column)
  191. }
  192. }
  193. // Verify that calling Scan() provides the correct results.
  194. func TestScan(t *testing.T) {
  195. whitespace_linecount := newlineCount(whitespace)
  196. // error handler
  197. eh := func(_ token.Position, msg string) {
  198. t.Errorf("error handler called (msg = %s)", msg)
  199. }
  200. // verify scan
  201. var s Scanner
  202. s.Init(fset.AddFile("", fset.Base(), len(source)), source, eh, ScanComments|dontInsertSemis)
  203. // set up expected position
  204. epos := token.Position{
  205. Filename: "",
  206. Offset: 0,
  207. Line: 1,
  208. Column: 1,
  209. }
  210. index := 0
  211. for {
  212. pos, tok, lit := s.Scan()
  213. // check position
  214. if tok == token.EOF {
  215. // correction for EOF
  216. epos.Line = newlineCount(string(source))
  217. epos.Column = 2
  218. }
  219. checkPos(t, lit, pos, epos)
  220. // check token
  221. e := elt{token.EOF, "", special}
  222. if index < len(tokens) {
  223. e = tokens[index]
  224. index++
  225. }
  226. if tok != e.tok {
  227. t.Errorf("bad token for %q: got %s, expected %s", lit, tok, e.tok)
  228. }
  229. // check token class
  230. if tokenclass(tok) != e.class {
  231. t.Errorf("bad class for %q: got %d, expected %d", lit, tokenclass(tok), e.class)
  232. }
  233. // check literal
  234. elit := ""
  235. switch e.tok {
  236. case token.COMMENT:
  237. // no CRs in comments
  238. elit = string(stripCR([]byte(e.lit)))
  239. //-style comment literal doesn't contain newline
  240. if elit[1] == '/' {
  241. elit = elit[0 : len(elit)-1]
  242. }
  243. case token.IDENT:
  244. elit = e.lit
  245. case token.SEMICOLON:
  246. elit = ";"
  247. default:
  248. if e.tok.IsLiteral() {
  249. // no CRs in raw string literals
  250. elit = e.lit
  251. if elit[0] == '`' {
  252. elit = string(stripCR([]byte(elit)))
  253. }
  254. } else if e.tok.IsKeyword() {
  255. elit = e.lit
  256. }
  257. }
  258. if lit != elit {
  259. t.Errorf("bad literal for %q: got %q, expected %q", lit, lit, elit)
  260. }
  261. if tok == token.EOF {
  262. break
  263. }
  264. // update position
  265. epos.Offset += len(e.lit) + len(whitespace)
  266. epos.Line += newlineCount(e.lit) + whitespace_linecount
  267. }
  268. if s.ErrorCount != 0 {
  269. t.Errorf("found %d errors", s.ErrorCount)
  270. }
  271. }
  272. func checkSemi(t *testing.T, line string, mode Mode) {
  273. var S Scanner
  274. file := fset.AddFile("TestSemis", fset.Base(), len(line))
  275. S.Init(file, []byte(line), nil, mode)
  276. pos, tok, lit := S.Scan()
  277. for tok != token.EOF {
  278. if tok == token.ILLEGAL {
  279. // the illegal token literal indicates what
  280. // kind of semicolon literal to expect
  281. semiLit := "\n"
  282. if lit[0] == '#' {
  283. semiLit = ";"
  284. }
  285. // next token must be a semicolon
  286. semiPos := file.Position(pos)
  287. semiPos.Offset++
  288. semiPos.Column++
  289. pos, tok, lit = S.Scan()
  290. if tok == token.SEMICOLON {
  291. if lit != semiLit {
  292. t.Errorf(`bad literal for %q: got %q, expected %q`, line, lit, semiLit)
  293. }
  294. checkPos(t, line, pos, semiPos)
  295. } else {
  296. t.Errorf("bad token for %q: got %s, expected ;", line, tok)
  297. }
  298. } else if tok == token.SEMICOLON {
  299. t.Errorf("bad token for %q: got ;, expected no ;", line)
  300. }
  301. pos, tok, lit = S.Scan()
  302. }
  303. }
  304. var lines = []string{
  305. // # indicates a semicolon present in the source
  306. // $ indicates an automatically inserted semicolon
  307. "",
  308. "\ufeff#;", // first BOM is ignored
  309. "#;",
  310. "foo$\n",
  311. "123$\n",
  312. "1.2$\n",
  313. "'x'$\n",
  314. `"x"` + "$\n",
  315. "`x`$\n",
  316. "+\n",
  317. "-\n",
  318. "*\n",
  319. "/\n",
  320. "%\n",
  321. "&\n",
  322. "|\n",
  323. "^\n",
  324. "<<\n",
  325. ">>\n",
  326. "&^\n",
  327. "+=\n",
  328. "-=\n",
  329. "*=\n",
  330. "/=\n",
  331. "%=\n",
  332. "&=\n",
  333. "|=\n",
  334. "^=\n",
  335. "<<=\n",
  336. ">>=\n",
  337. "&^=\n",
  338. "&&\n",
  339. "||\n",
  340. "<-\n",
  341. "++$\n",
  342. "--$\n",
  343. "==\n",
  344. "<\n",
  345. ">\n",
  346. "=\n",
  347. "!\n",
  348. "!=\n",
  349. "<=\n",
  350. ">=\n",
  351. ":=\n",
  352. "...\n",
  353. "(\n",
  354. "[\n",
  355. "{\n",
  356. ",\n",
  357. ".\n",
  358. ")$\n",
  359. "]$\n",
  360. "}$\n",
  361. "#;\n",
  362. ":\n",
  363. "break$\n",
  364. "case\n",
  365. "chan\n",
  366. "const\n",
  367. "continue$\n",
  368. "default\n",
  369. "defer\n",
  370. "else\n",
  371. "fallthrough$\n",
  372. "for\n",
  373. "func\n",
  374. "go\n",
  375. "goto\n",
  376. "if\n",
  377. "import\n",
  378. "interface\n",
  379. "map\n",
  380. "package\n",
  381. "range\n",
  382. "return$\n",
  383. "select\n",
  384. "struct\n",
  385. "switch\n",
  386. "type\n",
  387. "var\n",
  388. "foo$//comment\n",
  389. "foo$//comment",
  390. "foo$/*comment*/\n",
  391. "foo$/*\n*/",
  392. "foo$/*comment*/ \n",
  393. "foo$/*\n*/ ",
  394. "foo $// comment\n",
  395. "foo $// comment",
  396. "foo $/*comment*/\n",
  397. "foo $/*\n*/",
  398. "foo $/* */ /* \n */ bar$/**/\n",
  399. "foo $/*0*/ /*1*/ /*2*/\n",
  400. "foo $/*comment*/ \n",
  401. "foo $/*0*/ /*1*/ /*2*/ \n",
  402. "foo $/**/ /*-------------*/ /*----\n*/bar $/* \n*/baa$\n",
  403. "foo $/* an EOF terminates a line */",
  404. "foo $/* an EOF terminates a line */ /*",
  405. "foo $/* an EOF terminates a line */ //",
  406. "package main$\n\nfunc main() {\n\tif {\n\t\treturn /* */ }$\n}$\n",
  407. "package main$",
  408. }
  409. func TestSemis(t *testing.T) {
  410. for _, line := range lines {
  411. checkSemi(t, line, 0)
  412. checkSemi(t, line, ScanComments)
  413. // if the input ended in newlines, the input must tokenize the
  414. // same with or without those newlines
  415. for i := len(line) - 1; i >= 0 && line[i] == '\n'; i-- {
  416. checkSemi(t, line[0:i], 0)
  417. checkSemi(t, line[0:i], ScanComments)
  418. }
  419. }
  420. }
  421. type segment struct {
  422. srcline string // a line of source text
  423. filename string // filename for current token
  424. line int // line number for current token
  425. }
  426. var segments = []segment{
  427. // exactly one token per line since the test consumes one token per segment
  428. {" line1", filepath.Join("dir", "TestLineComments"), 1},
  429. {"\nline2", filepath.Join("dir", "TestLineComments"), 2},
  430. {"\nline3 //line File1.go:100", filepath.Join("dir", "TestLineComments"), 3}, // bad line comment, ignored
  431. {"\nline4", filepath.Join("dir", "TestLineComments"), 4},
  432. {"\n//line File1.go:100\n line100", filepath.Join("dir", "File1.go"), 100},
  433. {"\n//line \t :42\n line1", "", 42},
  434. {"\n//line File2.go:200\n line200", filepath.Join("dir", "File2.go"), 200},
  435. {"\n//line foo\t:42\n line42", filepath.Join("dir", "foo"), 42},
  436. {"\n //line foo:42\n line44", filepath.Join("dir", "foo"), 44}, // bad line comment, ignored
  437. {"\n//line foo 42\n line46", filepath.Join("dir", "foo"), 46}, // bad line comment, ignored
  438. {"\n//line foo:42 extra text\n line48", filepath.Join("dir", "foo"), 48}, // bad line comment, ignored
  439. {"\n//line ./foo:42\n line42", filepath.Join("dir", "foo"), 42},
  440. {"\n//line a/b/c/File1.go:100\n line100", filepath.Join("dir", "a", "b", "c", "File1.go"), 100},
  441. }
  442. var unixsegments = []segment{
  443. {"\n//line /bar:42\n line42", "/bar", 42},
  444. }
  445. var winsegments = []segment{
  446. {"\n//line c:\\bar:42\n line42", "c:\\bar", 42},
  447. {"\n//line c:\\dir\\File1.go:100\n line100", "c:\\dir\\File1.go", 100},
  448. }
  449. // Verify that comments of the form "//line filename:line" are interpreted correctly.
  450. func TestLineComments(t *testing.T) {
  451. segs := segments
  452. if runtime.GOOS == "windows" {
  453. segs = append(segs, winsegments...)
  454. } else {
  455. segs = append(segs, unixsegments...)
  456. }
  457. // make source
  458. var src string
  459. for _, e := range segs {
  460. src += e.srcline
  461. }
  462. // verify scan
  463. var S Scanner
  464. file := fset.AddFile(filepath.Join("dir", "TestLineComments"), fset.Base(), len(src))
  465. S.Init(file, []byte(src), nil, dontInsertSemis)
  466. for _, s := range segs {
  467. p, _, lit := S.Scan()
  468. pos := file.Position(p)
  469. checkPos(t, lit, p, token.Position{
  470. Filename: s.filename,
  471. Offset: pos.Offset,
  472. Line: s.line,
  473. Column: pos.Column,
  474. })
  475. }
  476. if S.ErrorCount != 0 {
  477. t.Errorf("found %d errors", S.ErrorCount)
  478. }
  479. }
  480. // Verify that initializing the same scanner more than once works correctly.
  481. func TestInit(t *testing.T) {
  482. var s Scanner
  483. // 1st init
  484. src1 := "if true { }"
  485. f1 := fset.AddFile("src1", fset.Base(), len(src1))
  486. s.Init(f1, []byte(src1), nil, dontInsertSemis)
  487. if f1.Size() != len(src1) {
  488. t.Errorf("bad file size: got %d, expected %d", f1.Size(), len(src1))
  489. }
  490. s.Scan() // if
  491. s.Scan() // true
  492. _, tok, _ := s.Scan() // {
  493. if tok != token.LBRACE {
  494. t.Errorf("bad token: got %s, expected %s", tok, token.LBRACE)
  495. }
  496. // 2nd init
  497. src2 := "go true { ]"
  498. f2 := fset.AddFile("src2", fset.Base(), len(src2))
  499. s.Init(f2, []byte(src2), nil, dontInsertSemis)
  500. if f2.Size() != len(src2) {
  501. t.Errorf("bad file size: got %d, expected %d", f2.Size(), len(src2))
  502. }
  503. _, tok, _ = s.Scan() // go
  504. if tok != token.GO {
  505. t.Errorf("bad token: got %s, expected %s", tok, token.GO)
  506. }
  507. if s.ErrorCount != 0 {
  508. t.Errorf("found %d errors", s.ErrorCount)
  509. }
  510. }
  511. func TestStdErrorHander(t *testing.T) {
  512. const src = "@\n" + // illegal character, cause an error
  513. "@ @\n" + // two errors on the same line
  514. "//line File2:20\n" +
  515. "@\n" + // different file, but same line
  516. "//line File2:1\n" +
  517. "@ @\n" + // same file, decreasing line number
  518. "//line File1:1\n" +
  519. "@ @ @" // original file, line 1 again
  520. var list ErrorList
  521. eh := func(pos token.Position, msg string) { list.Add(pos, msg) }
  522. var s Scanner
  523. s.Init(fset.AddFile("File1", fset.Base(), len(src)), []byte(src), eh, dontInsertSemis)
  524. for {
  525. if _, tok, _ := s.Scan(); tok == token.EOF {
  526. break
  527. }
  528. }
  529. if len(list) != s.ErrorCount {
  530. t.Errorf("found %d errors, expected %d", len(list), s.ErrorCount)
  531. }
  532. if len(list) != 9 {
  533. t.Errorf("found %d raw errors, expected 9", len(list))
  534. PrintError(os.Stderr, list)
  535. }
  536. list.Sort()
  537. if len(list) != 9 {
  538. t.Errorf("found %d sorted errors, expected 9", len(list))
  539. PrintError(os.Stderr, list)
  540. }
  541. list.RemoveMultiples()
  542. if len(list) != 4 {
  543. t.Errorf("found %d one-per-line errors, expected 4", len(list))
  544. PrintError(os.Stderr, list)
  545. }
  546. }
  547. type errorCollector struct {
  548. cnt int // number of errors encountered
  549. msg string // last error message encountered
  550. pos token.Position // last error position encountered
  551. }
  552. func checkError(t *testing.T, src string, tok token.Token, pos int, lit, err string) {
  553. var s Scanner
  554. var h errorCollector
  555. eh := func(pos token.Position, msg string) {
  556. h.cnt++
  557. h.msg = msg
  558. h.pos = pos
  559. }
  560. s.Init(fset.AddFile("", fset.Base(), len(src)), []byte(src), eh, ScanComments|dontInsertSemis)
  561. _, tok0, lit0 := s.Scan()
  562. if tok0 != tok {
  563. t.Errorf("%q: got %s, expected %s", src, tok0, tok)
  564. }
  565. if tok0 != token.ILLEGAL && lit0 != lit {
  566. t.Errorf("%q: got literal %q, expected %q", src, lit0, lit)
  567. }
  568. cnt := 0
  569. if err != "" {
  570. cnt = 1
  571. }
  572. if h.cnt != cnt {
  573. t.Errorf("%q: got cnt %d, expected %d", src, h.cnt, cnt)
  574. }
  575. if h.msg != err {
  576. t.Errorf("%q: got msg %q, expected %q", src, h.msg, err)
  577. }
  578. if h.pos.Offset != pos {
  579. t.Errorf("%q: got offset %d, expected %d", src, h.pos.Offset, pos)
  580. }
  581. }
  582. var errors = []struct {
  583. src string
  584. tok token.Token
  585. pos int
  586. lit string
  587. err string
  588. }{
  589. {"\a", token.ILLEGAL, 0, "", "illegal character U+0007"},
  590. {`#`, token.ILLEGAL, 0, "", "illegal character U+0023 '#'"},
  591. {`…`, token.ILLEGAL, 0, "", "illegal character U+2026 '…'"},
  592. {`' '`, token.CHAR, 0, `' '`, ""},
  593. {`''`, token.CHAR, 0, `''`, "illegal rune literal"},
  594. {`'12'`, token.CHAR, 0, `'12'`, "illegal rune literal"},
  595. {`'123'`, token.CHAR, 0, `'123'`, "illegal rune literal"},
  596. {`'\0'`, token.CHAR, 3, `'\0'`, "illegal character U+0027 ''' in escape sequence"},
  597. {`'\07'`, token.CHAR, 4, `'\07'`, "illegal character U+0027 ''' in escape sequence"},
  598. {`'\8'`, token.CHAR, 2, `'\8'`, "unknown escape sequence"},
  599. {`'\08'`, token.CHAR, 3, `'\08'`, "illegal character U+0038 '8' in escape sequence"},
  600. {`'\x'`, token.CHAR, 3, `'\x'`, "illegal character U+0027 ''' in escape sequence"},
  601. {`'\x0'`, token.CHAR, 4, `'\x0'`, "illegal character U+0027 ''' in escape sequence"},
  602. {`'\x0g'`, token.CHAR, 4, `'\x0g'`, "illegal character U+0067 'g' in escape sequence"},
  603. {`'\u'`, token.CHAR, 3, `'\u'`, "illegal character U+0027 ''' in escape sequence"},
  604. {`'\u0'`, token.CHAR, 4, `'\u0'`, "illegal character U+0027 ''' in escape sequence"},
  605. {`'\u00'`, token.CHAR, 5, `'\u00'`, "illegal character U+0027 ''' in escape sequence"},
  606. {`'\u000'`, token.CHAR, 6, `'\u000'`, "illegal character U+0027 ''' in escape sequence"},
  607. {`'\u000`, token.CHAR, 6, `'\u000`, "escape sequence not terminated"},
  608. {`'\u0000'`, token.CHAR, 0, `'\u0000'`, ""},
  609. {`'\U'`, token.CHAR, 3, `'\U'`, "illegal character U+0027 ''' in escape sequence"},
  610. {`'\U0'`, token.CHAR, 4, `'\U0'`, "illegal character U+0027 ''' in escape sequence"},
  611. {`'\U00'`, token.CHAR, 5, `'\U00'`, "illegal character U+0027 ''' in escape sequence"},
  612. {`'\U000'`, token.CHAR, 6, `'\U000'`, "illegal character U+0027 ''' in escape sequence"},
  613. {`'\U0000'`, token.CHAR, 7, `'\U0000'`, "illegal character U+0027 ''' in escape sequence"},
  614. {`'\U00000'`, token.CHAR, 8, `'\U00000'`, "illegal character U+0027 ''' in escape sequence"},
  615. {`'\U000000'`, token.CHAR, 9, `'\U000000'`, "illegal character U+0027 ''' in escape sequence"},
  616. {`'\U0000000'`, token.CHAR, 10, `'\U0000000'`, "illegal character U+0027 ''' in escape sequence"},
  617. {`'\U0000000`, token.CHAR, 10, `'\U0000000`, "escape sequence not terminated"},
  618. {`'\U00000000'`, token.CHAR, 0, `'\U00000000'`, ""},
  619. {`'\Uffffffff'`, token.CHAR, 2, `'\Uffffffff'`, "escape sequence is invalid Unicode code point"},
  620. {`'`, token.CHAR, 0, `'`, "rune literal not terminated"},
  621. {`'\`, token.CHAR, 2, `'\`, "escape sequence not terminated"},
  622. {"'\n", token.CHAR, 0, "'", "rune literal not terminated"},
  623. {"'\n ", token.CHAR, 0, "'", "rune literal not terminated"},
  624. {`""`, token.STRING, 0, `""`, ""},
  625. {`"abc`, token.STRING, 0, `"abc`, "string literal not terminated"},
  626. {"\"abc\n", token.STRING, 0, `"abc`, "string literal not terminated"},
  627. {"\"abc\n ", token.STRING, 0, `"abc`, "string literal not terminated"},
  628. {"``", token.STRING, 0, "``", ""},
  629. {"`", token.STRING, 0, "`", "raw string literal not terminated"},
  630. {"/**/", token.COMMENT, 0, "/**/", ""},
  631. {"/*", token.COMMENT, 0, "/*", "comment not terminated"},
  632. {"077", token.INT, 0, "077", ""},
  633. {"078.", token.FLOAT, 0, "078.", ""},
  634. {"07801234567.", token.FLOAT, 0, "07801234567.", ""},
  635. {"078e0", token.FLOAT, 0, "078e0", ""},
  636. {"078", token.INT, 0, "078", "illegal octal number"},
  637. {"07800000009", token.INT, 0, "07800000009", "illegal octal number"},
  638. {"0x", token.INT, 0, "0x", "illegal hexadecimal number"},
  639. {"0X", token.INT, 0, "0X", "illegal hexadecimal number"},
  640. {"\"abc\x00def\"", token.STRING, 4, "\"abc\x00def\"", "illegal character NUL"},
  641. {"\"abc\x80def\"", token.STRING, 4, "\"abc\x80def\"", "illegal UTF-8 encoding"},
  642. {"\ufeff\ufeff", token.ILLEGAL, 3, "\ufeff\ufeff", "illegal byte order mark"}, // only first BOM is ignored
  643. {"//\ufeff", token.COMMENT, 2, "//\ufeff", "illegal byte order mark"}, // only first BOM is ignored
  644. {"'\ufeff" + `'`, token.CHAR, 1, "'\ufeff" + `'`, "illegal byte order mark"}, // only first BOM is ignored
  645. {`"` + "abc\ufeffdef" + `"`, token.STRING, 4, `"` + "abc\ufeffdef" + `"`, "illegal byte order mark"}, // only first BOM is ignored
  646. }
  647. func TestScanErrors(t *testing.T) {
  648. for _, e := range errors {
  649. checkError(t, e.src, e.tok, e.pos, e.lit, e.err)
  650. }
  651. }
  652. // Verify that no comments show up as literal values when skipping comments.
  653. func TestIssue10213(t *testing.T) {
  654. var src = `
  655. var (
  656. A = 1 // foo
  657. )
  658. var (
  659. B = 2
  660. // foo
  661. )
  662. var C = 3 // foo
  663. var D = 4
  664. // foo
  665. func anycode() {
  666. // foo
  667. }
  668. `
  669. var s Scanner
  670. s.Init(fset.AddFile("", fset.Base(), len(src)), []byte(src), nil, 0)
  671. for {
  672. pos, tok, lit := s.Scan()
  673. class := tokenclass(tok)
  674. if lit != "" && class != keyword && class != literal && tok != token.SEMICOLON {
  675. t.Errorf("%s: tok = %s, lit = %q", fset.Position(pos), tok, lit)
  676. }
  677. if tok <= token.EOF {
  678. break
  679. }
  680. }
  681. }
  682. func BenchmarkScan(b *testing.B) {
  683. b.StopTimer()
  684. fset := token.NewFileSet()
  685. file := fset.AddFile("", fset.Base(), len(source))
  686. var s Scanner
  687. b.StartTimer()
  688. for i := 0; i < b.N; i++ {
  689. s.Init(file, source, nil, ScanComments)
  690. for {
  691. _, tok, _ := s.Scan()
  692. if tok == token.EOF {
  693. break
  694. }
  695. }
  696. }
  697. }
  698. func BenchmarkScanFile(b *testing.B) {
  699. b.StopTimer()
  700. const filename = "scanner.go"
  701. src, err := ioutil.ReadFile(filename)
  702. if err != nil {
  703. panic(err)
  704. }
  705. fset := token.NewFileSet()
  706. file := fset.AddFile(filename, fset.Base(), len(src))
  707. b.SetBytes(int64(len(src)))
  708. var s Scanner
  709. b.StartTimer()
  710. for i := 0; i < b.N; i++ {
  711. s.Init(file, src, nil, ScanComments)
  712. for {
  713. _, tok, _ := s.Scan()
  714. if tok == token.EOF {
  715. break
  716. }
  717. }
  718. }
  719. }