PageRenderTime 37ms CodeModel.GetById 20ms RepoModel.GetById 1ms app.codeStats 0ms

/test/test_lexer.rb

https://github.com/kung-fu-tzu/parser
Ruby | 2651 lines | 2297 code | 318 blank | 36 comment | 7 complexity | 6999d628ef5e21b13cf6c748bcb9231c MD5 | raw file

Large files files are truncated, but you can click here to view the full file

  1. # encoding: ascii-8bit
  2. require 'helper'
  3. class TestLexer < Minitest::Test
  4. def setup_lexer(version)
  5. @lex = Parser::Lexer.new(version)
  6. @lex.comments = []
  7. @lex.diagnostics = Parser::Diagnostic::Engine.new
  8. @lex.diagnostics.all_errors_are_fatal = true
  9. # @lex.diagnostics.consumer = lambda { |diag| $stderr.puts "", diag.render }
  10. end
  11. def setup
  12. setup_lexer 18
  13. end
  14. #
  15. # Additional matchers
  16. #
  17. def util_bad_token(s, *args)
  18. assert_raises Parser::SyntaxError do
  19. util_lex_token(s, *args)
  20. end
  21. end
  22. def util_escape(expected, input)
  23. source_buffer = Parser::Source::Buffer.new('(util_escape)')
  24. source_buffer.source = "\"\\#{input}\""
  25. @lex.reset
  26. @lex.source_buffer = source_buffer
  27. lex_token, (lex_value, *) = @lex.advance
  28. if lex_value.respond_to?(:force_encoding)
  29. lex_value.force_encoding('ASCII-8BIT')
  30. end
  31. assert_equal [:tSTRING, expected],
  32. [lex_token, lex_value],
  33. source_buffer.source
  34. end
  35. def util_escape_bad(input)
  36. assert_raises Parser::SyntaxError do
  37. @lex.state = :expr_beg
  38. util_lex_token "%Q[\\#{input}]"
  39. end
  40. end
  41. def util_lex_fname(name, type)
  42. util_lex_token("def #{name} ", :kDEF, 'def', type, name)
  43. assert_equal :expr_endfn, @lex.state
  44. end
  45. def util_lex_token(input, *args)
  46. source_buffer = Parser::Source::Buffer.new('(util_lex_token)')
  47. source_buffer.source = input
  48. @lex.reset(false)
  49. @lex.source_buffer = source_buffer
  50. until args.empty? do
  51. token, value = args.shift(2)
  52. lex_token, (lex_value, *) = @lex.advance
  53. assert lex_token, 'no more tokens'
  54. assert_equal [token, value], [lex_token, lex_value], input
  55. end
  56. lex_token, (lex_value, *) = @lex.advance
  57. refute lex_token, "must be empty, but had #{[lex_token, lex_value].inspect}"
  58. end
  59. #
  60. # Tests
  61. #
  62. def test_read_escape
  63. util_escape "\\", "\\"
  64. util_escape "\n", "n"
  65. util_escape "\t", "t"
  66. util_escape "\r", "r"
  67. util_escape "\f", "f"
  68. util_escape "\13", "v"
  69. util_escape "\0", "0"
  70. util_escape "\07", "a"
  71. util_escape "\007", "a"
  72. util_escape "\033", "e"
  73. util_escape "\377", "377"
  74. util_escape "\377", "xff"
  75. util_escape "\010", "b"
  76. util_escape " ", "s"
  77. util_escape "q", "q" # plain vanilla escape
  78. end
  79. def test_read_escape_c
  80. util_escape "\030", "C-x"
  81. util_escape "\030", "cx"
  82. util_escape "\230", 'C-\M-x'
  83. util_escape "\230", 'c\M-x'
  84. util_escape "\177", "C-?"
  85. util_escape "\177", "c?"
  86. end
  87. def test_read_escape_m
  88. util_escape "\370", "M-x"
  89. util_escape "\230", 'M-\C-x'
  90. util_escape "\230", 'M-\cx'
  91. end
  92. def test_read_escape_errors
  93. util_escape_bad ""
  94. util_escape_bad "M"
  95. util_escape_bad "M-"
  96. util_escape_bad "Mx"
  97. util_escape_bad "Cx"
  98. util_escape_bad "C"
  99. util_escape_bad "C-"
  100. util_escape_bad "c"
  101. end
  102. def test_read_escape_unicode__19
  103. if RUBY_VERSION >= '1.9'
  104. util_escape "\xc4\xa3", 'u0123'
  105. util_escape "\xc4\xa3\xc3\xb0\xeb\x84\xa3", 'u{123 f0 B123}'
  106. end
  107. end
  108. def test_read_escape_unicode_bad__19
  109. if RUBY_VERSION >= '1.9'
  110. util_escape_bad 'u123'
  111. util_escape_bad 'u{}'
  112. util_escape_bad 'u{123 f0h}'
  113. util_escape_bad 'u{123 f0'
  114. end
  115. end
  116. def test_ambiguous_uminus
  117. util_lex_token("m -3",
  118. :tIDENTIFIER, "m",
  119. :tUMINUS_NUM, "-",
  120. :tINTEGER, 3)
  121. end
  122. def test_ambiguous_uplus
  123. util_lex_token("m +3",
  124. :tIDENTIFIER, "m",
  125. :tINTEGER, 3)
  126. end
  127. def test_and
  128. util_lex_token "&", :tAMPER, "&"
  129. end
  130. def test_and2
  131. @lex.state = :expr_end
  132. util_lex_token "&&", :tANDOP, "&&"
  133. end
  134. def test_and2_equals
  135. @lex.state = :expr_end
  136. util_lex_token "&&=", :tOP_ASGN, "&&"
  137. end
  138. def test_and_arg
  139. @lex.state = :expr_arg
  140. util_lex_token(" &y",
  141. :tAMPER, "&",
  142. :tIDENTIFIER, "y")
  143. end
  144. def test_and_equals
  145. @lex.state = :expr_end
  146. util_lex_token "&=", :tOP_ASGN, "&"
  147. end
  148. def test_and_expr
  149. @lex.state = :expr_arg
  150. util_lex_token("x & y",
  151. :tIDENTIFIER, "x",
  152. :tAMPER2, "&",
  153. :tIDENTIFIER, "y")
  154. end
  155. def test_and_meth
  156. util_lex_fname "&", :tAMPER2
  157. end
  158. def test_assoc
  159. util_lex_token "=>", :tASSOC, "=>"
  160. end
  161. def test_label__18
  162. util_lex_token("{a:b",
  163. :tLBRACE, "{",
  164. :tIDENTIFIER, "a",
  165. :tSYMBOL, "b")
  166. end
  167. def test_label_in_params__18
  168. util_lex_token("foo(a:b",
  169. :tIDENTIFIER, "foo",
  170. :tLPAREN2, "(",
  171. :tIDENTIFIER, "a",
  172. :tSYMBOL, "b")
  173. end
  174. def test_label__19
  175. setup_lexer 19
  176. util_lex_token("{a:b",
  177. :tLBRACE, "{",
  178. :tLABEL, "a",
  179. :tIDENTIFIER, "b")
  180. end
  181. def test_label_in_params__19
  182. setup_lexer 19
  183. util_lex_token("foo(a:b",
  184. :tIDENTIFIER, "foo",
  185. :tLPAREN2, "(",
  186. :tLABEL, "a",
  187. :tIDENTIFIER, "b")
  188. end
  189. def test_label_fid__19
  190. setup_lexer 19
  191. util_lex_token("{a?:true",
  192. :tLBRACE, '{',
  193. :tLABEL, 'a?',
  194. :kTRUE, 'true')
  195. end
  196. def test_command_start__19
  197. setup_lexer 19
  198. %w[case elsif for in until when while
  199. if unless and or].each do |keyword|
  200. token = "k#{keyword.upcase}".to_sym
  201. @lex.reset
  202. util_lex_token("#{keyword} a:b",
  203. token, keyword,
  204. :tIDENTIFIER, "a",
  205. :tSYMBOL, "b")
  206. end
  207. end
  208. def test_mod_not_command_start__19
  209. setup_lexer 19
  210. %w[if unless while until rescue].each do |keyword|
  211. token = "k#{keyword.upcase}_MOD".to_sym
  212. @lex.state = :expr_end
  213. util_lex_token("#{keyword} a:b",
  214. token, keyword,
  215. :tLABEL, "a",
  216. :tIDENTIFIER, "b")
  217. end
  218. end
  219. def test_back_ref
  220. util_lex_token("[$&, $`, $', $+]",
  221. :tLBRACK, "[",
  222. :tBACK_REF, "$&", :tCOMMA, ",",
  223. :tBACK_REF, "$`", :tCOMMA, ",",
  224. :tBACK_REF, "$'", :tCOMMA, ",",
  225. :tBACK_REF, "$+",
  226. :tRBRACK, "]")
  227. end
  228. def test_backslash
  229. util_lex_token("1 \\\n+ 2",
  230. :tINTEGER, 1,
  231. :tPLUS, "+",
  232. :tINTEGER, 2)
  233. end
  234. def test_backslash_bad
  235. util_bad_token("1 \\ + 2",
  236. :tINTEGER, 1)
  237. end
  238. def test_backtick
  239. util_lex_token("`ls`",
  240. :tXSTRING_BEG, "`",
  241. :tSTRING_CONTENT, "ls",
  242. :tSTRING_END, "`")
  243. end
  244. def test_backtick_cmdarg
  245. @lex.state = :expr_dot
  246. util_lex_token("\n`", :tBACK_REF2, "`") # \n ensures expr_cmd
  247. assert_equal :expr_arg, @lex.state
  248. end
  249. def test_backtick_dot
  250. @lex.state = :expr_dot
  251. util_lex_token("a.`(3)",
  252. :tIDENTIFIER, "a",
  253. :tDOT, ".",
  254. :tBACK_REF2, "`",
  255. :tLPAREN2, "(",
  256. :tINTEGER, 3,
  257. :tRPAREN, ")")
  258. end
  259. def test_backtick_method
  260. @lex.state = :expr_fname
  261. util_lex_token("`", :tBACK_REF2, "`")
  262. assert_equal :expr_endfn, @lex.state
  263. end
  264. def test_bad_char
  265. util_bad_token(" \010 ")
  266. end
  267. def test_bang
  268. util_lex_token "!", :tBANG, "!"
  269. end
  270. def test_bang_equals
  271. util_lex_token "!=", :tNEQ, "!="
  272. end
  273. def test_bang_tilde
  274. util_lex_token "!~", :tNMATCH, "!~"
  275. end
  276. def test_carat
  277. util_lex_token "^", :tCARET, "^"
  278. end
  279. def test_carat_equals
  280. util_lex_token "^=", :tOP_ASGN, "^"
  281. end
  282. def test_colon2
  283. util_lex_token("A::B",
  284. :tCONSTANT, "A",
  285. :tCOLON2, "::",
  286. :tCONSTANT, "B")
  287. @lex.state = :expr_arg
  288. util_lex_token("::Array",
  289. :tCOLON2, "::",
  290. :tCONSTANT, "Array")
  291. end
  292. def test_colon3
  293. util_lex_token("::Array",
  294. :tCOLON3, "::",
  295. :tCONSTANT, "Array")
  296. @lex.state = :expr_arg
  297. util_lex_token(" ::Array",
  298. :tCOLON3, "::",
  299. :tCONSTANT, "Array")
  300. end
  301. def test_comma
  302. util_lex_token ",", :tCOMMA, ","
  303. end
  304. def test_comment
  305. util_lex_token("1 # one\n# two\n2",
  306. :tINTEGER, 1,
  307. :tNL, nil,
  308. :tINTEGER, 2)
  309. assert_equal 2, @lex.comments.length
  310. assert_equal '# one', @lex.comments[0].text
  311. assert_equal '# two', @lex.comments[1].text
  312. end
  313. def test_comment_expr_beg
  314. util_lex_token("{#1\n}",
  315. :tLBRACE, "{",
  316. :tRCURLY, "}")
  317. end
  318. def test_comment_begin
  319. util_lex_token("=begin\nblah\nblah\n=end\n42",
  320. :tINTEGER, 42)
  321. assert_equal 1, @lex.comments.length
  322. assert_equal "=begin\nblah\nblah\n=end\n", @lex.comments[0].text
  323. end
  324. def test_comment_begin_bad
  325. util_bad_token("=begin\nblah\nblah\n")
  326. end
  327. def test_comment_begin_not_comment
  328. util_lex_token("beginfoo = 5\np x \\\n=beginfoo",
  329. :tIDENTIFIER, "beginfoo",
  330. :tEQL, "=",
  331. :tINTEGER, 5,
  332. :tNL, nil,
  333. :tIDENTIFIER, "p",
  334. :tIDENTIFIER, "x",
  335. :tEQL, "=",
  336. :tIDENTIFIER, "beginfoo")
  337. end
  338. def test_comment_begin_space
  339. util_lex_token("=begin blah\nblah\n=end\n")
  340. assert_equal 1, @lex.comments.length
  341. assert_equal "=begin blah\nblah\n=end\n", @lex.comments[0].text
  342. end
  343. def test_comment_end_space_and_text
  344. util_lex_token("=begin blah\nblah\n=end blab\n")
  345. assert_equal 1, @lex.comments.length
  346. assert_equal "=begin blah\nblah\n=end blab\n", @lex.comments[0].text
  347. end
  348. def test_comment_eos
  349. util_lex_token("# comment")
  350. end
  351. def test_constant
  352. util_lex_token("ArgumentError",
  353. :tCONSTANT, "ArgumentError")
  354. end
  355. def test_constant_semi
  356. util_lex_token("ArgumentError;",
  357. :tCONSTANT, "ArgumentError",
  358. :tSEMI, ";")
  359. end
  360. def test_cvar
  361. util_lex_token "@@blah", :tCVAR, "@@blah"
  362. end
  363. def test_cvar_bad
  364. util_bad_token "@@1"
  365. end
  366. def test_div
  367. util_lex_token("a / 2",
  368. :tIDENTIFIER, "a",
  369. :tDIVIDE, "/",
  370. :tINTEGER, 2)
  371. end
  372. def test_div_equals
  373. util_lex_token("a /= 2",
  374. :tIDENTIFIER, "a",
  375. :tOP_ASGN, "/",
  376. :tINTEGER, 2)
  377. end
  378. def test_do
  379. util_lex_token("x do 42 end",
  380. :tIDENTIFIER, "x",
  381. :kDO, "do",
  382. :tINTEGER, 42,
  383. :kEND, "end")
  384. end
  385. def test_do_cond
  386. @lex.cond.push(true)
  387. util_lex_token("x do 42 end",
  388. :tIDENTIFIER, "x",
  389. :kDO_COND, "do",
  390. :tINTEGER, 42,
  391. :kEND, "end")
  392. end
  393. def test_do_block
  394. @lex.state = :expr_endarg
  395. util_lex_token("do 42 end",
  396. :kDO_BLOCK, "do",
  397. :tINTEGER, 42,
  398. :kEND, "end")
  399. end
  400. def test_do_cond
  401. @lex.cond.push true
  402. util_lex_token("x do 42 end",
  403. :tIDENTIFIER, "x",
  404. :kDO_COND, "do",
  405. :tINTEGER, 42,
  406. :kEND, "end")
  407. end
  408. def test_dot
  409. util_lex_token ".", :tDOT, "."
  410. end
  411. def test_dot2
  412. util_lex_token "..", :tDOT2, ".."
  413. end
  414. def test_dot3
  415. util_lex_token "...", :tDOT3, "..."
  416. end
  417. def test_equals
  418. util_lex_token "=", :tEQL, "="
  419. end
  420. def test_equals2
  421. util_lex_token "==", :tEQ, "=="
  422. end
  423. def test_equals3
  424. util_lex_token "===", :tEQQ, "==="
  425. end
  426. def test_equals_tilde
  427. util_lex_token "=~", :tMATCH, "=~"
  428. end
  429. def test_float
  430. util_lex_token "1.0", :tFLOAT, 1.0
  431. end
  432. def test_float_bad_no_underscores
  433. util_bad_token "1__0.0"
  434. end
  435. def test_float_bad_no_zero_leading
  436. util_bad_token ".0"
  437. end
  438. def test_float_bad_trailing_underscore
  439. util_bad_token "123_.0"
  440. end
  441. def test_float_call
  442. util_lex_token("1.0.to_s",
  443. :tFLOAT, 1.0,
  444. :tDOT, ".",
  445. :tIDENTIFIER, "to_s")
  446. end
  447. def test_float_dot_E
  448. util_lex_token "1.0E10", :tFLOAT, 1.0e10
  449. end
  450. def test_float_dot_E_neg
  451. util_lex_token("-1.0E10",
  452. :tUMINUS_NUM, "-",
  453. :tFLOAT, 1.0e10)
  454. end
  455. def test_float_dot_e
  456. util_lex_token "1.0e10", :tFLOAT, 1.0e10
  457. end
  458. def test_float_dot_e_neg
  459. util_lex_token("-1.0e10",
  460. :tUMINUS_NUM, "-",
  461. :tFLOAT, 1.0e10)
  462. end
  463. def test_float_e
  464. util_lex_token "1e10", :tFLOAT, 1e10
  465. end
  466. def test_float_e_bad_trailing_underscore
  467. util_bad_token "123_e10"
  468. end
  469. def test_float_e_minus
  470. util_lex_token "1e-10", :tFLOAT, 1e-10
  471. end
  472. def test_float_e_neg
  473. util_lex_token("-1e10",
  474. :tUMINUS_NUM, "-",
  475. :tFLOAT, 1e10)
  476. end
  477. def test_float_e_neg_minus
  478. util_lex_token("-1e-10",
  479. :tUMINUS_NUM, "-",
  480. :tFLOAT, 1e-10)
  481. end
  482. def test_float_e_neg_plus
  483. util_lex_token("-1e+10",
  484. :tUMINUS_NUM, "-",
  485. :tFLOAT, 1e10)
  486. end
  487. def test_float_e_plus
  488. util_lex_token "1e+10", :tFLOAT, 1e10
  489. end
  490. def test_float_e_zero
  491. util_lex_token "0e0", :tFLOAT, 0e0
  492. end
  493. def test_float_neg
  494. util_lex_token("-1.0",
  495. :tUMINUS_NUM, "-",
  496. :tFLOAT, 1.0)
  497. end
  498. def test_ge
  499. util_lex_token("a >= 2",
  500. :tIDENTIFIER, "a",
  501. :tGEQ, ">=",
  502. :tINTEGER, 2)
  503. end
  504. def test_global
  505. util_lex_token("$blah", :tGVAR, "$blah")
  506. end
  507. def test_global_backref
  508. util_lex_token("$`", :tBACK_REF, "$`")
  509. end
  510. # This was removed in 2.1.
  511. # def test_global_dash_nothing
  512. # util_lex_token("$- ", :tGVAR, "$-")
  513. # end
  514. def test_global_dash_something
  515. util_lex_token("$-x", :tGVAR, "$-x")
  516. end
  517. def test_global_number
  518. util_lex_token("$10", :tNTH_REF, 10)
  519. end
  520. def test_global_other
  521. util_lex_token("[$~, $*, $$, $?, $!, $@, $/, $\\, $;, $,, $., $=, $:, $<, $>, $\"]",
  522. :tLBRACK, "[",
  523. :tGVAR, "$~", :tCOMMA, ",",
  524. :tGVAR, "$*", :tCOMMA, ",",
  525. :tGVAR, "$$", :tCOMMA, ",",
  526. :tGVAR, "$\?", :tCOMMA, ",",
  527. :tGVAR, "$!", :tCOMMA, ",",
  528. :tGVAR, "$@", :tCOMMA, ",",
  529. :tGVAR, "$/", :tCOMMA, ",",
  530. :tGVAR, "$\\", :tCOMMA, ",",
  531. :tGVAR, "$;", :tCOMMA, ",",
  532. :tGVAR, "$,", :tCOMMA, ",",
  533. :tGVAR, "$.", :tCOMMA, ",",
  534. :tGVAR, "$=", :tCOMMA, ",",
  535. :tGVAR, "$:", :tCOMMA, ",",
  536. :tGVAR, "$<", :tCOMMA, ",",
  537. :tGVAR, "$>", :tCOMMA, ",",
  538. :tGVAR, "$\"",
  539. :tRBRACK, "]")
  540. end
  541. def test_global_underscore
  542. util_lex_token("$_",
  543. :tGVAR, "$_")
  544. end
  545. def test_global_wierd
  546. util_lex_token("$__blah",
  547. :tGVAR, "$__blah")
  548. end
  549. def test_global_zero
  550. util_lex_token("$0", :tGVAR, "$0")
  551. end
  552. def test_gt
  553. util_lex_token("a > 2",
  554. :tIDENTIFIER, "a",
  555. :tGT, ">",
  556. :tINTEGER, 2)
  557. end
  558. def test_heredoc_backtick
  559. util_lex_token("a = <<`EOF`\n blah blah\nEOF\n",
  560. :tIDENTIFIER, "a",
  561. :tEQL, "=",
  562. :tXSTRING_BEG, "`",
  563. :tSTRING_CONTENT, " blah blah\n",
  564. :tSTRING_END, "EOF",
  565. :tNL, nil)
  566. end
  567. def test_heredoc_double
  568. util_lex_token("a = <<\"EOF\"\n blah blah\nEOF\n",
  569. :tIDENTIFIER, "a",
  570. :tEQL, "=",
  571. :tSTRING_BEG, "\"",
  572. :tSTRING_CONTENT, " blah blah\n",
  573. :tSTRING_END, "EOF",
  574. :tNL, nil)
  575. end
  576. def test_heredoc_double_dash
  577. util_lex_token("a = <<-\"EOF\"\n blah blah\n EOF\n",
  578. :tIDENTIFIER, "a",
  579. :tEQL, "=",
  580. :tSTRING_BEG, "\"",
  581. :tSTRING_CONTENT, " blah blah\n",
  582. :tSTRING_END, "EOF",
  583. :tNL, nil)
  584. end
  585. def test_heredoc_double_eos
  586. util_bad_token("a = <<\"EOF\"\nblah",
  587. :tIDENTIFIER, "a",
  588. :tEQL, "=",
  589. :tSTRING_BEG, "\"")
  590. end
  591. def test_heredoc_double_eos_nl
  592. util_bad_token("a = <<\"EOF\"\nblah\n",
  593. :tIDENTIFIER, "a",
  594. :tEQL, "=",
  595. :tSTRING_BEG, "\"")
  596. end
  597. def test_heredoc_double_interp
  598. util_lex_token("a = <<\"EOF\"\n#x a \#@a b \#$b c \#{3} \nEOF\n",
  599. :tIDENTIFIER, "a",
  600. :tEQL, "=",
  601. :tSTRING_BEG, "\"",
  602. :tSTRING_CONTENT, "#x a ",
  603. :tSTRING_DVAR, nil,
  604. :tIVAR, "@a",
  605. :tSTRING_CONTENT, " b ",
  606. :tSTRING_DVAR, nil,
  607. :tGVAR, "$b",
  608. :tSTRING_CONTENT, " c ",
  609. :tSTRING_DBEG, '#{',
  610. :tINTEGER, 3,
  611. :tRCURLY, "}",
  612. :tSTRING_CONTENT, " \n",
  613. :tSTRING_END, "EOF",
  614. :tNL, nil)
  615. end
  616. def test_heredoc_empty
  617. util_lex_token("<<\"\"\n\#{x}\nblah2\n\n",
  618. :tSTRING_BEG, "\"",
  619. :tSTRING_DBEG, "\#{",
  620. :tIDENTIFIER, "x",
  621. :tRCURLY, "}",
  622. :tSTRING_CONTENT, "\n",
  623. :tSTRING_CONTENT, "blah2\n",
  624. :tSTRING_END, "",
  625. :tNL, nil)
  626. end
  627. def test_heredoc_none
  628. util_lex_token("a = <<EOF\nblah\nblah\nEOF",
  629. :tIDENTIFIER, "a",
  630. :tEQL, "=",
  631. :tSTRING_BEG, "\"",
  632. :tSTRING_CONTENT, "blah\n",
  633. :tSTRING_CONTENT, "blah\n",
  634. :tSTRING_END, "EOF",
  635. :tNL, nil)
  636. end
  637. def test_heredoc_none_dash
  638. util_lex_token("a = <<-EOF\nblah\nblah\n EOF",
  639. :tIDENTIFIER, "a",
  640. :tEQL, "=",
  641. :tSTRING_BEG, "\"",
  642. :tSTRING_CONTENT, "blah\n",
  643. :tSTRING_CONTENT, "blah\n",
  644. :tSTRING_END, "EOF",
  645. :tNL, nil)
  646. end
  647. def test_heredoc_single
  648. util_lex_token("a = <<'EOF'\n blah blah\nEOF\n",
  649. :tIDENTIFIER, "a",
  650. :tEQL, "=",
  651. :tSTRING_BEG, "'",
  652. :tSTRING_CONTENT, " blah blah\n",
  653. :tSTRING_END, "EOF",
  654. :tNL, nil)
  655. end
  656. def test_heredoc_single_bad_eos_body
  657. util_bad_token("a = <<'EOF'\nblah",
  658. :tIDENTIFIER, "a",
  659. :tEQL, "=",
  660. :tSTRING_BEG, "'")
  661. end
  662. def test_heredoc_single_dash
  663. util_lex_token("a = <<-'EOF'\n blah blah\n EOF\n",
  664. :tIDENTIFIER, "a",
  665. :tEQL, "=",
  666. :tSTRING_BEG, "'",
  667. :tSTRING_CONTENT, " blah blah\n",
  668. :tSTRING_END, "EOF",
  669. :tNL, nil)
  670. end
  671. def test_heredoc_one_character
  672. util_lex_token("a = <<E\nABCDEF\nE\n",
  673. :tIDENTIFIER, "a",
  674. :tEQL, "=",
  675. :tSTRING_BEG, "\"",
  676. :tSTRING_CONTENT, "ABCDEF\n",
  677. :tSTRING_END, "E",
  678. :tNL, nil)
  679. end
  680. def test_identifier
  681. util_lex_token("identifier", :tIDENTIFIER, "identifier")
  682. end
  683. def test_identifier_bang
  684. util_lex_token("identifier!",
  685. :tFID, "identifier!")
  686. util_lex_token("identifier!=",
  687. :tFID, "identifier",
  688. :tNEQ, "!=")
  689. end
  690. def test_identifier_cmp
  691. util_lex_fname "<=>", :tCMP
  692. end
  693. def test_identifier_def
  694. util_lex_fname "identifier", :tIDENTIFIER
  695. end
  696. def test_identifier_eh
  697. util_lex_token("identifier?", :tFID, "identifier?")
  698. end
  699. def test_identifier_equals_arrow
  700. util_lex_token(":blah==>",
  701. :tSYMBOL, "blah=",
  702. :tASSOC, "=>")
  703. end
  704. def test_identifier_equals3
  705. util_lex_token(":a===b",
  706. :tSYMBOL, "a",
  707. :tEQQ, "===",
  708. :tIDENTIFIER, "b")
  709. end
  710. def test_identifier_equals_equals_arrow
  711. util_lex_token(":a==>b",
  712. :tSYMBOL, "a=",
  713. :tASSOC, "=>",
  714. :tIDENTIFIER, "b")
  715. end
  716. def test_identifier_equals_caret
  717. util_lex_fname "^", :tCARET
  718. end
  719. def test_identifier_equals_def
  720. util_lex_fname "identifier=", :tIDENTIFIER
  721. end
  722. def test_identifier_equals_def2
  723. util_lex_fname "==", :tEQ
  724. end
  725. def test_identifier_equals_expr
  726. @lex.state = :expr_dot
  727. util_lex_token("y = arg",
  728. :tIDENTIFIER, "y",
  729. :tEQL, "=",
  730. :tIDENTIFIER, "arg")
  731. assert_equal :expr_arg, @lex.state
  732. end
  733. def test_identifier_equals_or
  734. util_lex_fname "|", :tPIPE
  735. end
  736. def test_identifier_equals_slash
  737. util_lex_fname "/", :tDIVIDE
  738. end
  739. def test_identifier_equals_tilde
  740. @lex.state = :expr_fname
  741. util_lex_token("identifier=~",
  742. :tIDENTIFIER, "identifier=",
  743. :tTILDE, "~")
  744. end
  745. def test_identifier_gt
  746. util_lex_fname ">", :tGT
  747. end
  748. def test_identifier_le
  749. util_lex_fname "<=", :tLEQ
  750. end
  751. def test_identifier_lt
  752. util_lex_fname "<", :tLT
  753. end
  754. def test_identifier_tilde
  755. util_lex_fname "~", :tTILDE
  756. end
  757. def test_index
  758. util_lex_fname "[]", :tAREF
  759. end
  760. def test_index_equals
  761. util_lex_fname "[]=", :tASET
  762. end
  763. def test_integer
  764. util_lex_token "42", :tINTEGER, 42
  765. end
  766. def test_integer_bin
  767. util_lex_token "0b101010", :tINTEGER, 42
  768. end
  769. def test_integer_bin_bad_none
  770. util_bad_token "0b "
  771. end
  772. def test_integer_bin_bad_underscores
  773. util_bad_token "0b10__01"
  774. end
  775. def test_integer_dec
  776. util_lex_token "42", :tINTEGER, 42
  777. end
  778. def test_integer_dec_bad_underscores
  779. util_bad_token "42__24"
  780. end
  781. def test_integer_dec_d
  782. util_lex_token "0d42", :tINTEGER, 42
  783. end
  784. def test_integer_dec_d_bad_none
  785. util_bad_token "0d"
  786. end
  787. def test_integer_dec_d_bad_underscores
  788. util_bad_token "0d42__24"
  789. end
  790. def test_question_eh_a__18
  791. setup_lexer 18
  792. util_lex_token "?a", :tINTEGER, 97
  793. end
  794. def test_question_eh_a__19
  795. setup_lexer 19
  796. util_lex_token '?a', :tSTRING, "a"
  797. end
  798. def test_question_eh_escape_M_escape_C__18
  799. setup_lexer 18
  800. util_lex_token '?\M-\C-a', :tINTEGER, 129
  801. end
  802. def test_question_eh_escape_M_escape_C__19
  803. setup_lexer 19
  804. util_lex_token '?\M-\C-a', :tSTRING, "\M-\C-a"
  805. end
  806. def test_integer_hex
  807. util_lex_token "0x2a", :tINTEGER, 42
  808. end
  809. def test_integer_hex_bad_none
  810. util_bad_token "0x "
  811. end
  812. def test_integer_hex_bad_underscores
  813. util_bad_token "0xab__cd"
  814. end
  815. def test_integer_oct
  816. util_lex_token "052", :tINTEGER, 42
  817. end
  818. def test_integer_oct_bad_range
  819. util_bad_token "08"
  820. end
  821. def test_integer_oct_bad_underscores
  822. util_bad_token "01__23"
  823. end
  824. def test_integer_oct_O
  825. util_lex_token "0O52", :tINTEGER, 42
  826. end
  827. def test_integer_oct_O_bad_range
  828. util_bad_token "0O1238"
  829. end
  830. def test_integer_oct_O_bad_underscores
  831. util_bad_token "0O1__23"
  832. end
  833. def test_integer_oct_O_not_bad_none
  834. util_lex_token "0O ", :tINTEGER, 0
  835. end
  836. def test_integer_oct_o
  837. util_lex_token "0o52", :tINTEGER, 42
  838. end
  839. def test_integer_oct_o_bad_range
  840. util_bad_token "0o1283"
  841. end
  842. def test_integer_oct_o_bad_underscores
  843. util_bad_token "0o1__23"
  844. end
  845. def test_integer_oct_o_not_bad_none
  846. util_lex_token "0o ", :tINTEGER, 0
  847. end
  848. def test_integer_trailing
  849. util_lex_token("1.to_s",
  850. :tINTEGER, 1,
  851. :tDOT, '.',
  852. :tIDENTIFIER, 'to_s')
  853. end
  854. def test_integer_underscore
  855. util_lex_token "4_2", :tINTEGER, 42
  856. end
  857. def test_integer_underscore_bad
  858. util_bad_token "4__2"
  859. end
  860. def test_integer_zero
  861. util_lex_token "0", :tINTEGER, 0
  862. end
  863. def test_ivar
  864. util_lex_token "@blah", :tIVAR, "@blah"
  865. end
  866. def test_ivar_bad
  867. util_bad_token "@1"
  868. end
  869. def test_ivar_bad_0_length
  870. util_bad_token "1+@\n", :tINTEGER, 1, :tPLUS, "+"
  871. end
  872. def test_keyword_expr
  873. @lex.state = :expr_endarg
  874. util_lex_token("if", :kIF_MOD, "if")
  875. assert_equal :expr_beg, @lex.state
  876. end
  877. def test_lt
  878. util_lex_token "<", :tLT, "<"
  879. end
  880. def test_lt2
  881. util_lex_token("a <\< b",
  882. :tIDENTIFIER, "a",
  883. :tLSHFT, "<\<",
  884. :tIDENTIFIER, "b")
  885. end
  886. def test_lt2_equals
  887. util_lex_token("a <\<= b",
  888. :tIDENTIFIER, "a",
  889. :tOP_ASGN, "<\<",
  890. :tIDENTIFIER, "b")
  891. end
  892. def test_lt_equals
  893. util_lex_token "<=", :tLEQ, "<="
  894. end
  895. def test_minus
  896. util_lex_token("1 - 2",
  897. :tINTEGER, 1,
  898. :tMINUS, "-",
  899. :tINTEGER, 2)
  900. end
  901. def test_minus_equals
  902. @lex.state = :expr_end
  903. util_lex_token "-=", :tOP_ASGN, "-"
  904. end
  905. def test_minus_method
  906. @lex.state = :expr_fname
  907. util_lex_token "-", :tMINUS, "-"
  908. end
  909. def test_minus_unary_method
  910. @lex.state = :expr_fname
  911. util_lex_token "-@", :tUMINUS, "-@"
  912. end
  913. def test_minus_unary_number
  914. util_lex_token("-42",
  915. :tUMINUS_NUM, "-",
  916. :tINTEGER, 42)
  917. end
  918. def test_nth_ref
  919. util_lex_token('[$1, $2, $3]',
  920. :tLBRACK, "[",
  921. :tNTH_REF, 1, :tCOMMA, ",",
  922. :tNTH_REF, 2, :tCOMMA, ",",
  923. :tNTH_REF, 3,
  924. :tRBRACK, "]")
  925. end
  926. def test_open_bracket
  927. util_lex_token("(", :tLPAREN, "(")
  928. end
  929. def test_open_bracket_cmdarg
  930. util_lex_token("m (", :tIDENTIFIER, "m",
  931. :tLPAREN_ARG, "(")
  932. end
  933. def test_open_bracket_exprarg
  934. util_lex_token("m(", :tIDENTIFIER, "m",
  935. :tLPAREN2, "(")
  936. end
  937. def test_open_curly_bracket
  938. util_lex_token("{",
  939. :tLBRACE, "{")
  940. end
  941. def test_open_curly_bracket_arg
  942. util_lex_token("m { 3 }",
  943. :tIDENTIFIER, "m",
  944. :tLCURLY, "{",
  945. :tINTEGER, 3,
  946. :tRCURLY, "}")
  947. end
  948. def test_open_curly_bracket_block
  949. @lex.state = :expr_endarg # seen m(3)
  950. util_lex_token("{ 4 }",
  951. :tLBRACE_ARG, "{",
  952. :tINTEGER, 4,
  953. :tRCURLY, "}")
  954. end
  955. def test_open_square_bracket_arg
  956. util_lex_token("m [ 3 ]",
  957. :tIDENTIFIER, "m",
  958. :tLBRACK, "[",
  959. :tINTEGER, 3,
  960. :tRBRACK, "]")
  961. end
  962. def test_open_square_bracket_ary
  963. util_lex_token("[1, 2, 3]",
  964. :tLBRACK, "[",
  965. :tINTEGER, 1,
  966. :tCOMMA, ",",
  967. :tINTEGER, 2,
  968. :tCOMMA, ",",
  969. :tINTEGER, 3,
  970. :tRBRACK, "]")
  971. end
  972. def test_open_square_bracket_meth
  973. util_lex_token("m[3]",
  974. :tIDENTIFIER, "m",
  975. :tLBRACK2, "[",
  976. :tINTEGER, 3,
  977. :tRBRACK, "]")
  978. end
  979. def test_or
  980. util_lex_token "|", :tPIPE, "|"
  981. end
  982. def test_or2
  983. util_lex_token "||", :tOROP, "||"
  984. end
  985. def test_or2_equals
  986. util_lex_token "||=", :tOP_ASGN, "||"
  987. end
  988. def test_or_equals
  989. util_lex_token "|=", :tOP_ASGN, "|"
  990. end
  991. def test_percent
  992. util_lex_token("a % 2",
  993. :tIDENTIFIER, "a",
  994. :tPERCENT, "%",
  995. :tINTEGER, 2)
  996. end
  997. def test_percent_equals
  998. util_lex_token("a %= 2",
  999. :tIDENTIFIER, "a",
  1000. :tOP_ASGN, "%",
  1001. :tINTEGER, 2)
  1002. end
  1003. def test_plus
  1004. util_lex_token("1 + 1",
  1005. :tINTEGER, 1,
  1006. :tPLUS, "+",
  1007. :tINTEGER, 1)
  1008. end
  1009. def test_plus_equals
  1010. @lex.state = :expr_end
  1011. util_lex_token "+=", :tOP_ASGN, "+"
  1012. end
  1013. def test_plus_method
  1014. @lex.state = :expr_fname
  1015. util_lex_token "+", :tPLUS, "+"
  1016. end
  1017. def test_plus_unary_method
  1018. @lex.state = :expr_fname
  1019. util_lex_token "+@", :tUPLUS, "+@"
  1020. end
  1021. def test_numbers
  1022. util_lex_token "0b10", :tINTEGER, 2
  1023. util_lex_token "0B10", :tINTEGER, 2
  1024. util_lex_token "0d10", :tINTEGER, 10
  1025. util_lex_token "0D10", :tINTEGER, 10
  1026. util_lex_token "0x10", :tINTEGER, 16
  1027. util_lex_token "0X10", :tINTEGER, 16
  1028. util_lex_token "0o10", :tINTEGER, 8
  1029. util_lex_token "0O10", :tINTEGER, 8
  1030. util_lex_token "0o", :tINTEGER, 0
  1031. util_lex_token "0O", :tINTEGER, 0
  1032. util_lex_token "0o", :tINTEGER, 0
  1033. util_lex_token "0O", :tINTEGER, 0
  1034. util_lex_token "0", :tINTEGER, 0
  1035. util_bad_token "0x"
  1036. util_bad_token "0X"
  1037. util_bad_token "0b"
  1038. util_bad_token "0B"
  1039. util_bad_token "0d"
  1040. util_bad_token "0D"
  1041. util_bad_token "08"
  1042. util_bad_token "09"
  1043. util_bad_token "0o8"
  1044. util_bad_token "0o9"
  1045. util_bad_token "0O8"
  1046. util_bad_token "0O9"
  1047. util_bad_token "1_e1"
  1048. util_bad_token "1_.1"
  1049. util_bad_token "1__1"
  1050. util_bad_token "1end"
  1051. util_bad_token "1.1end"
  1052. end
  1053. def test_plus_unary_number
  1054. util_lex_token("+42",
  1055. :tINTEGER, 42)
  1056. end
  1057. def test_question__18
  1058. setup_lexer 18
  1059. util_lex_token "?*", :tINTEGER, 42
  1060. end
  1061. def test_question__19
  1062. setup_lexer 19
  1063. util_lex_token "?*", :tSTRING, "*"
  1064. end
  1065. def test_question_bad_eos
  1066. util_bad_token "?"
  1067. end
  1068. def test_question_bad_ws
  1069. util_lex_token "? ", :tEH, "?"
  1070. util_lex_token "?\n", :tEH, "?"
  1071. util_lex_token "?\t", :tEH, "?"
  1072. util_lex_token "?\v", :tEH, "?"
  1073. util_lex_token "?\r", :tEH, "?"
  1074. util_lex_token "?\f", :tEH, "?"
  1075. end
  1076. def test_question_ws_backslashed__18
  1077. setup_lexer 18
  1078. @lex.state = :expr_beg
  1079. util_lex_token "?\\ ", :tINTEGER, 32
  1080. @lex.state = :expr_beg
  1081. util_lex_token "?\\n", :tINTEGER, 10
  1082. @lex.state = :expr_beg
  1083. util_lex_token "?\\t", :tINTEGER, 9
  1084. @lex.state = :expr_beg
  1085. util_lex_token "?\\v", :tINTEGER, 11
  1086. @lex.state = :expr_beg
  1087. util_lex_token "?\\r", :tINTEGER, 13
  1088. @lex.state = :expr_beg
  1089. util_lex_token "?\\f", :tINTEGER, 12
  1090. end
  1091. def test_question_ws_backslashed__19
  1092. setup_lexer 19
  1093. @lex.state = :expr_beg
  1094. util_lex_token "?\\ ", :tSTRING, " "
  1095. @lex.state = :expr_beg
  1096. util_lex_token "?\\n", :tSTRING, "\n"
  1097. @lex.state = :expr_beg
  1098. util_lex_token "?\\t", :tSTRING, "\t"
  1099. @lex.state = :expr_beg
  1100. util_lex_token "?\\v", :tSTRING, "\v"
  1101. @lex.state = :expr_beg
  1102. util_lex_token "?\\r", :tSTRING, "\r"
  1103. @lex.state = :expr_beg
  1104. util_lex_token "?\\f", :tSTRING, "\f"
  1105. end
  1106. def test_rbracket
  1107. util_lex_token "]", :tRBRACK, "]"
  1108. end
  1109. def test_rcurly
  1110. util_lex_token "}", :tRCURLY, "}"
  1111. end
  1112. def test_regexp
  1113. util_lex_token("/regexp/",
  1114. :tREGEXP_BEG, "/",
  1115. :tSTRING_CONTENT, "regexp",
  1116. :tSTRING_END, "/",
  1117. :tREGEXP_OPT, "")
  1118. end
  1119. def test_regexp_ambiguous
  1120. util_lex_token("method /regexp/",
  1121. :tIDENTIFIER, "method",
  1122. :tREGEXP_BEG, "/",
  1123. :tSTRING_CONTENT, "regexp",
  1124. :tSTRING_END, "/",
  1125. :tREGEXP_OPT, "")
  1126. end
  1127. def test_regexp_bad
  1128. util_bad_token("/.*/xyz",
  1129. :tREGEXP_BEG, "/",
  1130. :tSTRING_CONTENT, ".*",
  1131. :tSTRING_END, "/")
  1132. end
  1133. def test_regexp_escape_C
  1134. util_lex_token('/regex\\C-x/',
  1135. :tREGEXP_BEG, "/",
  1136. :tSTRING_CONTENT, "regex\\C-x",
  1137. :tSTRING_END, "/",
  1138. :tREGEXP_OPT, "")
  1139. end
  1140. def test_regexp_escape_C_M
  1141. util_lex_token('/regex\\C-\\M-x/',
  1142. :tREGEXP_BEG, "/",
  1143. :tSTRING_CONTENT, "regex\\C-\\M-x",
  1144. :tSTRING_END, "/",
  1145. :tREGEXP_OPT, "")
  1146. end
  1147. def test_regexp_escape_C_M_craaaazy
  1148. util_lex_token("/regex\\C-\\\n\\M-x/",
  1149. :tREGEXP_BEG, "/",
  1150. :tSTRING_CONTENT, "regex\\C-\\M-x",
  1151. :tSTRING_END, "/",
  1152. :tREGEXP_OPT, "")
  1153. end
  1154. def test_regexp_escape_C_bad_dash
  1155. util_bad_token '/regex\\Cx/', :tREGEXP_BEG, "/"
  1156. end
  1157. def test_regexp_escape_C_bad_dash_eos
  1158. util_bad_token '/regex\\C-/', :tREGEXP_BEG, "/"
  1159. end
  1160. def test_regexp_escape_C_bad_dash_eos2
  1161. util_bad_token '/regex\\C-', :tREGEXP_BEG, "/"
  1162. end
  1163. def test_regexp_escape_C_bad_eos
  1164. util_bad_token '/regex\\C/', :tREGEXP_BEG, "/"
  1165. end
  1166. def test_regexp_escape_C_bad_eos2
  1167. util_bad_token '/regex\\c', :tREGEXP_BEG, "/"
  1168. end
  1169. def test_regexp_escape_M
  1170. util_lex_token('/regex\\M-x/',
  1171. :tREGEXP_BEG, "/",
  1172. :tSTRING_CONTENT, "regex\\M-x",
  1173. :tSTRING_END, "/",
  1174. :tREGEXP_OPT, "")
  1175. end
  1176. def test_regexp_escape_M_C
  1177. util_lex_token('/regex\\M-\\C-x/',
  1178. :tREGEXP_BEG, "/",
  1179. :tSTRING_CONTENT, "regex\\M-\\C-x",
  1180. :tSTRING_END, "/",
  1181. :tREGEXP_OPT, "")
  1182. end
  1183. def test_regexp_escape_M_bad_dash
  1184. util_bad_token '/regex\\Mx/', :tREGEXP_BEG, "/"
  1185. end
  1186. def test_regexp_escape_M_bad_dash_eos
  1187. util_bad_token '/regex\\M-/', :tREGEXP_BEG, "/"
  1188. end
  1189. def test_regexp_escape_M_bad_dash_eos2
  1190. util_bad_token '/regex\\M-', :tREGEXP_BEG, "/"
  1191. end
  1192. def test_regexp_escape_M_bad_eos
  1193. util_bad_token '/regex\\M/', :tREGEXP_BEG, "/"
  1194. end
  1195. def test_regexp_escape_backslash_slash
  1196. util_lex_token('/\\//',
  1197. :tREGEXP_BEG, "/",
  1198. :tSTRING_CONTENT, '\\/',
  1199. :tSTRING_END, "/",
  1200. :tREGEXP_OPT, "")
  1201. end
  1202. def test_regexp_escape_backslash_terminator
  1203. util_lex_token('%r%blah\\%blah%',
  1204. :tREGEXP_BEG, "%r%",
  1205. :tSTRING_CONTENT, "blah\\%blah",
  1206. :tSTRING_END, "%",
  1207. :tREGEXP_OPT, "")
  1208. end
  1209. def test_regexp_escape_backslash_terminator_meta1
  1210. util_lex_token('%r{blah\\}blah}',
  1211. :tREGEXP_BEG, "%r{",
  1212. :tSTRING_CONTENT, "blah\\}blah",
  1213. :tSTRING_END, "}",
  1214. :tREGEXP_OPT, "")
  1215. end
  1216. def test_regexp_escape_backslash_terminator_meta2
  1217. util_lex_token('%r/blah\\/blah/',
  1218. :tREGEXP_BEG, "%r/",
  1219. :tSTRING_CONTENT, "blah\\/blah",
  1220. :tSTRING_END, "/",
  1221. :tREGEXP_OPT, "")
  1222. end
  1223. def test_regexp_escape_backslash_terminator_meta3
  1224. util_lex_token('%r/blah\\%blah/',
  1225. :tREGEXP_BEG, "%r/",
  1226. :tSTRING_CONTENT, "blah\\%blah",
  1227. :tSTRING_END, "/",
  1228. :tREGEXP_OPT, "")
  1229. end
  1230. def test_regexp_escape_bad_eos
  1231. util_bad_token '/regex\\', :tREGEXP_BEG, "/"
  1232. end
  1233. def test_regexp_escape_bs
  1234. util_lex_token('/regex\\\\regex/',
  1235. :tREGEXP_BEG, "/",
  1236. :tSTRING_CONTENT, "regex\\\\regex",
  1237. :tSTRING_END, "/",
  1238. :tREGEXP_OPT, "")
  1239. end
  1240. def test_regexp_escape_c
  1241. util_lex_token('/regex\\cxxx/',
  1242. :tREGEXP_BEG, "/",
  1243. :tSTRING_CONTENT, "regex\\cxxx",
  1244. :tSTRING_END, "/",
  1245. :tREGEXP_OPT, "")
  1246. end
  1247. def test_regexp_escape_c_backslash
  1248. util_lex_token('/regex\\c\\n/',
  1249. :tREGEXP_BEG, "/",
  1250. :tSTRING_CONTENT, "regex\\c\\n",
  1251. :tSTRING_END, "/",
  1252. :tREGEXP_OPT, "")
  1253. end
  1254. def test_regexp_escape_chars
  1255. util_lex_token('/re\\tge\\nxp/',
  1256. :tREGEXP_BEG, "/",
  1257. :tSTRING_CONTENT, "re\\tge\\nxp",
  1258. :tSTRING_END, "/",
  1259. :tREGEXP_OPT, "")
  1260. end
  1261. def test_regexp_escape_double_backslash
  1262. regexp = '/[\\/\\\\]$/'
  1263. util_lex_token(regexp,
  1264. :tREGEXP_BEG, "/",
  1265. :tSTRING_CONTENT, regexp[1..-2],
  1266. :tSTRING_END, "/",
  1267. :tREGEXP_OPT, "")
  1268. end
  1269. def test_regexp_escape_hex
  1270. util_lex_token('/regex\\x61xp/',
  1271. :tREGEXP_BEG, "/",
  1272. :tSTRING_CONTENT, "regex\\x61xp",
  1273. :tSTRING_END, "/",
  1274. :tREGEXP_OPT, "")
  1275. end
  1276. def test_regexp_escape_hex_bad
  1277. util_bad_token '/regex\\xzxp/', :tREGEXP_BEG, "/"
  1278. end
  1279. def test_regexp_escape_hex_one
  1280. util_lex_token('/^[\\xd\\xa]{2}/on',
  1281. :tREGEXP_BEG, '/',
  1282. :tSTRING_CONTENT, '^[\\xd\\xa]{2}',
  1283. :tSTRING_END, "/",
  1284. :tREGEXP_OPT, 'on')
  1285. end
  1286. def test_regexp_escape_oct1
  1287. util_lex_token('/regex\\0xp/',
  1288. :tREGEXP_BEG, "/",
  1289. :tSTRING_CONTENT, "regex\\0xp",
  1290. :tSTRING_END, "/",
  1291. :tREGEXP_OPT, "")
  1292. end
  1293. def test_regexp_escape_oct2
  1294. util_lex_token('/regex\\07xp/',
  1295. :tREGEXP_BEG, "/",
  1296. :tSTRING_CONTENT, "regex\\07xp",
  1297. :tSTRING_END, "/",
  1298. :tREGEXP_OPT, "")
  1299. end
  1300. def test_regexp_escape_oct3
  1301. util_lex_token('/regex\\10142/',
  1302. :tREGEXP_BEG, "/",
  1303. :tSTRING_CONTENT, "regex\\10142",
  1304. :tSTRING_END, "/",
  1305. :tREGEXP_OPT, "")
  1306. end
  1307. def test_regexp_escape_return
  1308. util_lex_token("/regex\\\nregex/",
  1309. :tREGEXP_BEG, "/",
  1310. :tSTRING_CONTENT, "regexregex",
  1311. :tSTRING_END, "/",
  1312. :tREGEXP_OPT, "")
  1313. end
  1314. def test_regexp_nm
  1315. util_lex_token("/.*/nm",
  1316. :tREGEXP_BEG, "/",
  1317. :tSTRING_CONTENT, ".*",
  1318. :tSTRING_END, "/",
  1319. :tREGEXP_OPT, "nm")
  1320. end
  1321. def test_rparen
  1322. util_lex_token ")", :tRPAREN, ")"
  1323. end
  1324. def test_rshft
  1325. util_lex_token("a >> 2",
  1326. :tIDENTIFIER, "a",
  1327. :tRSHFT, ">>",
  1328. :tINTEGER, 2)
  1329. end
  1330. def test_rshft_equals
  1331. util_lex_token("a >>= 2",
  1332. :tIDENTIFIER, "a",
  1333. :tOP_ASGN, ">>",
  1334. :tINTEGER, 2)
  1335. end
  1336. def test_star
  1337. util_lex_token("a * ",
  1338. :tIDENTIFIER, "a",
  1339. :tSTAR2, "*")
  1340. assert_equal :expr_beg, @lex.state
  1341. end
  1342. def test_star2
  1343. util_lex_token("a ** ",
  1344. :tIDENTIFIER, "a",
  1345. :tPOW, "**")
  1346. assert_equal :expr_beg, @lex.state
  1347. end
  1348. def test_star2_equals
  1349. util_lex_token("a **= ",
  1350. :tIDENTIFIER, "a",
  1351. :tOP_ASGN, "**")
  1352. assert_equal :expr_beg, @lex.state
  1353. end
  1354. def test_star2_beg
  1355. util_lex_token("** ",
  1356. :tDSTAR, "**")
  1357. assert_equal :expr_beg, @lex.state
  1358. end
  1359. def test_star_arg
  1360. @lex.state = :expr_arg
  1361. util_lex_token(" *a",
  1362. :tSTAR, "*",
  1363. :tIDENTIFIER, "a")
  1364. assert_equal :expr_arg, @lex.state
  1365. end
  1366. def test_star_arg_beg
  1367. @lex.state = :expr_beg
  1368. util_lex_token("*a",
  1369. :tSTAR, "*",
  1370. :tIDENTIFIER, "a")
  1371. assert_equal :expr_arg, @lex.state
  1372. end
  1373. def test_star_arg_beg_fname
  1374. @lex.state = :expr_fname
  1375. util_lex_token("*a",
  1376. :tSTAR2, "*",
  1377. :tIDENTIFIER, "a")
  1378. assert_equal :expr_arg, @lex.state
  1379. end
  1380. def test_star_equals
  1381. util_lex_token("a *= ",
  1382. :tIDENTIFIER, "a",
  1383. :tOP_ASGN, "*")
  1384. assert_equal :expr_beg, @lex.state
  1385. end
  1386. def test_string_bad_eos
  1387. util_bad_token('%',
  1388. :tSTRING_BEG, '%')
  1389. end
  1390. def test_string_bad_eos_quote
  1391. util_bad_token('%{nest',
  1392. :tSTRING_BEG, '%}')
  1393. end
  1394. def test_string_double
  1395. util_lex_token('"string"',
  1396. :tSTRING, "string")
  1397. end
  1398. def test_string_double_escape_C
  1399. util_lex_token('"\\C-a"',
  1400. :tSTRING, "\001")
  1401. end
  1402. def test_string_double_escape_C_backslash
  1403. util_lex_token('"\\C-\\\\"',
  1404. :tSTRING, "\034")
  1405. end
  1406. def test_string_double_escape_C_escape
  1407. util_lex_token('"\\C-\\M-a"',
  1408. :tSTRING, "\201")
  1409. end
  1410. def test_string_double_escape_C_question
  1411. util_lex_token('"\\C-?"',
  1412. :tSTRING, "\177")
  1413. end
  1414. def test_string_double_escape_M
  1415. util_lex_token('"\\M-a"',
  1416. :tSTRING, "\341")
  1417. end
  1418. def test_string_double_escape_M_backslash
  1419. util_lex_token('"\\M-\\\\"',
  1420. :tSTRING, "\334")
  1421. end
  1422. def test_string_double_escape_M_escape
  1423. util_lex_token('"\\M-\\C-a"',
  1424. :tSTRING, "\201")
  1425. end
  1426. def test_string_double_escape_bs1
  1427. util_lex_token('"a\\a\\a"',
  1428. :tSTRING, "a\a\a")
  1429. end
  1430. def test_string_double_escape_bs2
  1431. util_lex_token('"a\\\\a"',
  1432. :tSTRING, "a\\a")
  1433. end
  1434. def test_string_double_escape_c
  1435. util_lex_token('"\\ca"',
  1436. :tSTRING, "\001")
  1437. end
  1438. def test_string_double_escape_c_escape
  1439. util_lex_token('"\\c\\M-a"',
  1440. :tSTRING, "\201")
  1441. end
  1442. def test_string_double_escape_c_question
  1443. util_lex_token('"\\c?"',
  1444. :tSTRING, "\177")
  1445. end
  1446. def test_string_double_escape_chars
  1447. util_lex_token('"s\\tri\\ng"',
  1448. :tSTRING, "s\tri\ng")
  1449. end
  1450. def test_string_double_escape_hex
  1451. util_lex_token('"n = \\x61\\x62\\x63"',
  1452. :tSTRING, "n = abc")
  1453. end
  1454. def test_string_double_escape_octal
  1455. util_lex_token('"n = \\101\\102\\103"',
  1456. :tSTRING, "n = ABC")
  1457. end
  1458. def test_string_double_interp
  1459. util_lex_token("\"blah #x a \#@a b \#$b c \#{3} # \"",
  1460. :tSTRING_BEG, "\"",
  1461. :tSTRING_CONTENT, "blah #x a ",
  1462. :tSTRING_DVAR, nil,
  1463. :tIVAR, "@a",
  1464. :tSTRING_CONTENT, " b ",
  1465. :tSTRING_DVAR, nil,
  1466. :tGVAR, "$b",
  1467. :tSTRING_CONTENT, " c ",
  1468. :tSTRING_DBEG, '#{',
  1469. :tINTEGER, 3,
  1470. :tRCURLY, "}",
  1471. :tSTRING_CONTENT, " # ",
  1472. :tSTRING_END, "\"")
  1473. end
  1474. def test_string_double_interp_label
  1475. util_lex_token('"#{foo:bar}"',
  1476. :tSTRING_BEG, '"',
  1477. :tSTRING_DBEG, '#{',
  1478. :tIDENTIFIER, 'foo',
  1479. :tSYMBOL, 'bar',
  1480. :tRCURLY, '}',
  1481. :tSTRING_END, '"')
  1482. end
  1483. def test_string_double_nested_curlies
  1484. util_lex_token('%{nest{one{two}one}nest}',
  1485. :tSTRING_BEG, '%{',
  1486. :tSTRING_CONTENT, "nest{one{two}one}nest",
  1487. :tSTRING_END, '}')
  1488. end
  1489. def test_string_double_no_interp
  1490. util_lex_token("\"# blah\"", # pound first
  1491. :tSTRING, "# blah")
  1492. util_lex_token("\"blah # blah\"", # pound not first
  1493. :tSTRING, "blah # blah")
  1494. end
  1495. def test_string_escape_x_single
  1496. util_lex_token('"\\x0"',
  1497. :tSTRING, "\000")
  1498. end
  1499. def test_string_pct_Q
  1500. util_lex_token("%Q[s1 s2]",
  1501. :tSTRING_BEG, '%Q[',
  1502. :tSTRING_CONTENT, "s1 s2",
  1503. :tSTRING_END, ']')
  1504. end
  1505. def test_string_pct_W
  1506. util_lex_token("%W[s1 s2\ns3]",
  1507. :tWORDS_BEG, "%W[",
  1508. :tSTRING_CONTENT, "s1",
  1509. :tSPACE, nil,
  1510. :tSTRING_CONTENT, "s2",
  1511. :tSPACE, nil,
  1512. :tSTRING_CONTENT, "s3",
  1513. :tSPACE, nil,
  1514. :tSTRING_END, ']')
  1515. end
  1516. def test_string_pct_W_bs_nl
  1517. util_lex_token("%W[s1 \\\ns2]",
  1518. :tWORDS_BEG, "%W[",
  1519. :tSTRING_CONTENT, "s1",
  1520. :tSPACE, nil,
  1521. :tSTRING_CONTENT, "\ns2",
  1522. :tSPACE, nil,
  1523. :tSTRING_END, ']')
  1524. end
  1525. def test_string_pct_W_interp
  1526. util_lex_token('%W[#{1}#{2} #@a]',
  1527. :tWORDS_BEG, '%W[',
  1528. :tSTRING_DBEG, '#{',
  1529. :tINTEGER, 1,
  1530. :tRCURLY, '}',
  1531. :tSTRING_DBEG, '#{',
  1532. :tINTEGER, 2,
  1533. :tRCURLY, '}',
  1534. :tSPACE, nil,
  1535. :tSTRING_DVAR, nil,
  1536. :tIVAR, '@a',
  1537. :tSPACE, nil,
  1538. :tSTRING_END, ']')
  1539. end
  1540. def test_string_pct_I
  1541. util_lex_token("%I(s1 s2)",
  1542. :tSYMBOLS_BEG, "%I(",
  1543. :tSTRING_CONTENT, "s1",
  1544. :tSPACE, nil,
  1545. :tSTRING_CONTENT, "s2",
  1546. :tSPACE, nil,
  1547. :tSTRING_END, ')')
  1548. end
  1549. def test_string_pct_angle
  1550. util_lex_token("%<blah>",
  1551. :tSTRING_BEG, '%<',
  1552. :tSTRING_CONTENT, "blah",
  1553. :tSTRING_END, '>')
  1554. end
  1555. def test_string_pct_pct
  1556. util_lex_token("%%blah%",
  1557. :tSTRING_BEG, '%',
  1558. :tSTRING_CONTENT, "blah",
  1559. :tSTRING_END, '%')
  1560. end
  1561. def test_string_pct_w
  1562. util_lex_token("%w[s1 s2 ]",
  1563. :tQWORDS_BEG, "%w[",
  1564. :tSTRING_CONTENT, "s1",
  1565. :tSPACE, nil,
  1566. :tSTRING_CONTENT, "s2",
  1567. :tSPACE, nil,
  1568. :tSTRING_END, "]")
  1569. end
  1570. def test_string_pct_w_incomplete
  1571. util_bad_token("%w[s1 ",
  1572. :tQWORDS_BEG, "%w[",
  1573. :tSTRING_CONTENT, "s1",
  1574. :tSPACE, nil)
  1575. end
  1576. def test_string_pct_w_bs_nl
  1577. util_lex_token("%w[s1 \\\ns2]",
  1578. :tQWORDS_BEG, "%w[",
  1579. :tSTRING_CONTENT, "s1",
  1580. :tSPACE, nil,
  1581. :tSTRING_CONTENT, "\ns2",
  1582. :tSPACE, nil,
  1583. :tSTRING_END, ']')
  1584. end
  1585. def test_string_pct_w_bs_sp
  1586. util_lex_token("%w[s\\ 1 s\\ 2]",
  1587. :tQWORDS_BEG, "%w[",
  1588. :tSTRING_CONTENT, "s 1",
  1589. :tSPACE, nil,
  1590. :tSTRING_CONTENT, "s 2",
  1591. :tSPACE, nil,
  1592. :tSTRING_END, ']')
  1593. end
  1594. def test_string_pct_w_tab
  1595. util_lex_token("%w[abc\tdef]",
  1596. :tQWORDS_BEG, "%w[",
  1597. :tSTRING_CONTENT, "abc",
  1598. :tSPACE, nil,
  1599. :tSTRING_CONTENT, "def",
  1600. :tSPACE, nil,
  1601. :tSTRING_END, ']')
  1602. end
  1603. def test_string_pct_i
  1604. util_lex_token("%i(s1 s2)",
  1605. :tQSYMBOLS_BEG, "%i(",
  1606. :tSTRING_CONTENT, "s1",
  1607. :tSPACE, nil,
  1608. :tSTRING_CONTENT, "s2",
  1609. :tSPACE, nil,
  1610. :tSTRING_END, ')')
  1611. end
  1612. def test_string_single
  1613. util_lex_token("'string'",
  1614. :tSTRING, "string")
  1615. end
  1616. def test_string_single_escape_chars
  1617. util_lex_token("'s\\tri\\ng'",
  1618. :tSTRING, "s\\tri\\ng")
  1619. end
  1620. def test_string_single_nl
  1621. util_lex_token("'blah\\\nblah'",
  1622. :tSTRING_BEG, "'",
  1623. :tSTRING_CONTENT, "blah\\\n",
  1624. :tSTRING_CONTENT, "blah",
  1625. :tSTRING_END, "'")
  1626. end
  1627. def test_symbol
  1628. util_lex_token(":symbol",
  1629. :tSYMBOL, "symbol")
  1630. end
  1631. def test_symbol_bad_zero
  1632. util_bad_token(":\"blah\0\"",
  1633. :tSYMBEG, ":")
  1634. end
  1635. def test_symbol_double
  1636. util_lex_token(":\"symbol\"",
  1637. :tSYMBEG,

Large files files are truncated, but you can click here to view the full file