PageRenderTime 52ms CodeModel.GetById 19ms RepoModel.GetById 0ms app.codeStats 1ms

/projects/jruby-1.7.3/test/externals/ruby1.9/ruby/test_io_m17n.rb

https://gitlab.com/essere.lab.public/qualitas.class-corpus
Ruby | 1627 lines | 1471 code | 143 blank | 13 comment | 18 complexity | 171eaafad4945dc9e5d021baf550879d MD5 | raw file
  1. require 'test/unit'
  2. require 'tmpdir'
  3. require 'timeout'
  4. require_relative 'envutil'
  5. class TestIO_M17N < Test::Unit::TestCase
  6. ENCS = [
  7. Encoding::ASCII_8BIT,
  8. Encoding::EUC_JP,
  9. Encoding::Shift_JIS,
  10. Encoding::UTF_8
  11. ]
  12. def with_tmpdir
  13. Dir.mktmpdir {|dir|
  14. Dir.chdir(dir) {
  15. yield dir
  16. }
  17. }
  18. end
  19. def pipe(*args, wp, rp)
  20. re, we = nil, nil
  21. r, w = IO.pipe(*args)
  22. rt = Thread.new do
  23. begin
  24. rp.call(r)
  25. rescue Exception
  26. r.close
  27. re = $!
  28. end
  29. end
  30. wt = Thread.new do
  31. begin
  32. wp.call(w)
  33. rescue Exception
  34. w.close
  35. we = $!
  36. end
  37. end
  38. flunk("timeout") unless wt.join(10) && rt.join(10)
  39. ensure
  40. w.close unless !w || w.closed?
  41. r.close unless !r || r.closed?
  42. (wt.kill; wt.join) if wt
  43. (rt.kill; rt.join) if rt
  44. raise we if we
  45. raise re if re
  46. end
  47. def with_pipe(*args)
  48. r, w = IO.pipe(*args)
  49. begin
  50. yield r, w
  51. ensure
  52. r.close if !r.closed?
  53. w.close if !w.closed?
  54. end
  55. end
  56. def generate_file(path, content)
  57. open(path, "wb") {|f| f.write content }
  58. end
  59. def encdump(str)
  60. "#{str.dump}.force_encoding(#{str.encoding.name.dump})"
  61. end
  62. def assert_str_equal(expected, actual, message=nil)
  63. full_message = build_message(message, <<EOT)
  64. #{encdump expected} expected but not equal to
  65. #{encdump actual}.
  66. EOT
  67. assert_block(full_message) { expected == actual }
  68. end
  69. def test_open_r
  70. with_tmpdir {
  71. generate_file('tmp', "")
  72. open("tmp", "r") {|f|
  73. assert_equal(Encoding.default_external, f.external_encoding)
  74. assert_equal(nil, f.internal_encoding)
  75. }
  76. }
  77. end
  78. def test_open_rb
  79. with_tmpdir {
  80. generate_file('tmp', "")
  81. open("tmp", "rb") {|f|
  82. assert_equal(Encoding.find("ASCII-8BIT"), f.external_encoding)
  83. assert_equal(nil, f.internal_encoding)
  84. }
  85. }
  86. end
  87. def test_open_r_enc
  88. with_tmpdir {
  89. generate_file('tmp', "")
  90. open("tmp", "r:euc-jp") {|f|
  91. assert_equal(Encoding::EUC_JP, f.external_encoding)
  92. assert_equal(nil, f.internal_encoding)
  93. }
  94. }
  95. end
  96. def test_open_r_enc_in_opt
  97. with_tmpdir {
  98. generate_file('tmp', "")
  99. open("tmp", "r", encoding: "euc-jp") {|f|
  100. assert_equal(Encoding::EUC_JP, f.external_encoding)
  101. assert_equal(nil, f.internal_encoding)
  102. }
  103. }
  104. end
  105. def test_open_r_encname_in_opt
  106. with_tmpdir {
  107. generate_file('tmp', "")
  108. open("tmp", "r", encoding: Encoding::EUC_JP) {|f|
  109. assert_equal(Encoding::EUC_JP, f.external_encoding)
  110. assert_equal(nil, f.internal_encoding)
  111. }
  112. }
  113. end
  114. def test_open_r_ext_enc_in_opt
  115. with_tmpdir {
  116. generate_file('tmp', "")
  117. open("tmp", "r", external_encoding: Encoding::EUC_JP) {|f|
  118. assert_equal(Encoding::EUC_JP, f.external_encoding)
  119. assert_equal(nil, f.internal_encoding)
  120. }
  121. }
  122. end
  123. def test_open_r_ext_encname_in_opt
  124. with_tmpdir {
  125. generate_file('tmp', "")
  126. open("tmp", "r", external_encoding: "euc-jp") {|f|
  127. assert_equal(Encoding::EUC_JP, f.external_encoding)
  128. assert_equal(nil, f.internal_encoding)
  129. }
  130. }
  131. end
  132. def test_open_r_enc_enc
  133. with_tmpdir {
  134. generate_file('tmp', "")
  135. open("tmp", "r", external_encoding: Encoding::EUC_JP, internal_encoding: Encoding::UTF_8) {|f|
  136. assert_equal(Encoding::EUC_JP, f.external_encoding)
  137. assert_equal(Encoding::UTF_8, f.internal_encoding)
  138. }
  139. }
  140. end
  141. def test_open_r_encname_encname
  142. with_tmpdir {
  143. generate_file('tmp', "")
  144. open("tmp", "r:euc-jp:utf-8") {|f|
  145. assert_equal(Encoding::EUC_JP, f.external_encoding)
  146. assert_equal(Encoding::UTF_8, f.internal_encoding)
  147. }
  148. }
  149. end
  150. def test_open_r_encname_encname_in_opt
  151. with_tmpdir {
  152. generate_file('tmp', "")
  153. open("tmp", "r", encoding: "euc-jp:utf-8") {|f|
  154. assert_equal(Encoding::EUC_JP, f.external_encoding)
  155. assert_equal(Encoding::UTF_8, f.internal_encoding)
  156. }
  157. }
  158. end
  159. def test_open_r_enc_enc_in_opt
  160. with_tmpdir {
  161. generate_file('tmp', "")
  162. open("tmp", "r", external_encoding: Encoding::EUC_JP, internal_encoding: Encoding::UTF_8) {|f|
  163. assert_equal(Encoding::EUC_JP, f.external_encoding)
  164. assert_equal(Encoding::UTF_8, f.internal_encoding)
  165. }
  166. }
  167. end
  168. def test_open_r_externalencname_internalencname_in_opt
  169. with_tmpdir {
  170. generate_file('tmp', "")
  171. open("tmp", "r", external_encoding: "euc-jp", internal_encoding: "utf-8") {|f|
  172. assert_equal(Encoding::EUC_JP, f.external_encoding)
  173. assert_equal(Encoding::UTF_8, f.internal_encoding)
  174. }
  175. }
  176. end
  177. def test_open_w
  178. with_tmpdir {
  179. open("tmp", "w") {|f|
  180. assert_equal(nil, f.external_encoding)
  181. assert_equal(nil, f.internal_encoding)
  182. }
  183. }
  184. end
  185. def test_open_wb
  186. with_tmpdir {
  187. open("tmp", "wb") {|f|
  188. assert_equal(Encoding.find("ASCII-8BIT"), f.external_encoding)
  189. assert_equal(nil, f.internal_encoding)
  190. }
  191. }
  192. end
  193. def test_open_w_enc
  194. with_tmpdir {
  195. open("tmp", "w:euc-jp") {|f|
  196. assert_equal(Encoding::EUC_JP, f.external_encoding)
  197. assert_equal(nil, f.internal_encoding)
  198. }
  199. }
  200. end
  201. def test_open_w_enc_in_opt
  202. with_tmpdir {
  203. open("tmp", "w", encoding: "euc-jp") {|f|
  204. assert_equal(Encoding::EUC_JP, f.external_encoding)
  205. assert_equal(nil, f.internal_encoding)
  206. }
  207. }
  208. end
  209. def test_open_w_enc_in_opt2
  210. with_tmpdir {
  211. open("tmp", "w", external_encoding: "euc-jp") {|f|
  212. assert_equal(Encoding::EUC_JP, f.external_encoding)
  213. assert_equal(nil, f.internal_encoding)
  214. }
  215. }
  216. end
  217. def test_open_w_enc_enc
  218. with_tmpdir {
  219. open("tmp", "w:euc-jp:utf-8") {|f|
  220. assert_equal(Encoding::EUC_JP, f.external_encoding)
  221. assert_equal(Encoding::UTF_8, f.internal_encoding)
  222. }
  223. }
  224. end
  225. def test_open_w_enc_enc_in_opt
  226. with_tmpdir {
  227. open("tmp", "w", encoding: "euc-jp:utf-8") {|f|
  228. assert_equal(Encoding::EUC_JP, f.external_encoding)
  229. assert_equal(Encoding::UTF_8, f.internal_encoding)
  230. }
  231. }
  232. end
  233. def test_open_w_enc_enc_in_opt2
  234. with_tmpdir {
  235. open("tmp", "w", external_encoding: "euc-jp", internal_encoding: "utf-8") {|f|
  236. assert_equal(Encoding::EUC_JP, f.external_encoding)
  237. assert_equal(Encoding::UTF_8, f.internal_encoding)
  238. }
  239. }
  240. end
  241. def test_open_w_enc_enc_perm
  242. with_tmpdir {
  243. open("tmp", "w:euc-jp:utf-8", 0600) {|f|
  244. assert_equal(Encoding::EUC_JP, f.external_encoding)
  245. assert_equal(Encoding::UTF_8, f.internal_encoding)
  246. }
  247. }
  248. end
  249. def test_io_new_enc
  250. with_tmpdir {
  251. generate_file("tmp", "\xa1")
  252. fd = IO.sysopen("tmp")
  253. f = IO.new(fd, "r:sjis")
  254. begin
  255. assert_equal(Encoding::Windows_31J, f.read.encoding)
  256. ensure
  257. f.close
  258. end
  259. }
  260. end
  261. def test_s_pipe_invalid
  262. pipe("utf-8", "euc-jp", { :invalid=>:replace },
  263. proc do |w|
  264. w << "\x80"
  265. w.close
  266. end,
  267. proc do |r|
  268. assert_equal("?", r.read)
  269. end)
  270. end
  271. def test_s_pipe_undef
  272. pipe("utf-8:euc-jp", { :undef=>:replace },
  273. proc do |w|
  274. w << "\ufffd"
  275. w.close
  276. end,
  277. proc do |r|
  278. assert_equal("?", r.read)
  279. end)
  280. end
  281. def test_s_pipe_undef_replace_string
  282. pipe("utf-8:euc-jp", { :undef=>:replace, :replace=>"X" },
  283. proc do |w|
  284. w << "\ufffd"
  285. w.close
  286. end,
  287. proc do |r|
  288. assert_equal("X", r.read)
  289. end)
  290. end
  291. def test_dup
  292. pipe("utf-8:euc-jp",
  293. proc do |w|
  294. w << "\u3042"
  295. w.close
  296. end,
  297. proc do |r|
  298. r2 = r.dup
  299. begin
  300. assert_equal("\xA4\xA2".force_encoding("euc-jp"), r2.read)
  301. ensure
  302. r2.close
  303. end
  304. end)
  305. end
  306. def test_dup_undef
  307. pipe("utf-8:euc-jp", { :undef=>:replace },
  308. proc do |w|
  309. w << "\uFFFD"
  310. w.close
  311. end,
  312. proc do |r|
  313. r2 = r.dup
  314. begin
  315. assert_equal("?", r2.read)
  316. ensure
  317. r2.close
  318. end
  319. end)
  320. end
  321. def test_stdin
  322. assert_equal(Encoding.default_external, STDIN.external_encoding)
  323. assert_equal(nil, STDIN.internal_encoding)
  324. end
  325. def test_stdout
  326. assert_equal(nil, STDOUT.external_encoding)
  327. assert_equal(nil, STDOUT.internal_encoding)
  328. end
  329. def test_stderr
  330. assert_equal(nil, STDERR.external_encoding)
  331. assert_equal(nil, STDERR.internal_encoding)
  332. end
  333. def test_terminator_conversion
  334. with_tmpdir {
  335. generate_file('tmp', "before \u00FF after")
  336. s = open("tmp", "r:utf-8:iso-8859-1") {|f|
  337. f.gets("\xFF".force_encoding("iso-8859-1"))
  338. }
  339. assert_equal(Encoding.find("iso-8859-1"), s.encoding)
  340. assert_str_equal("before \xFF".force_encoding("iso-8859-1"), s, '[ruby-core:14288]')
  341. }
  342. end
  343. def test_terminator_conversion2
  344. with_tmpdir {
  345. generate_file('tmp', "before \xA1\xA2\xA2\xA3 after")
  346. s = open("tmp", "r:euc-jp:utf-8") {|f|
  347. f.gets("\xA2\xA2".force_encoding("euc-jp").encode("utf-8"))
  348. }
  349. assert_equal(Encoding.find("utf-8"), s.encoding)
  350. assert_str_equal("before \xA1\xA2\xA2\xA3 after".force_encoding("euc-jp").encode("utf-8"), s, '[ruby-core:14319]')
  351. }
  352. end
  353. def test_terminator_stateful_conversion
  354. with_tmpdir {
  355. src = "before \e$B\x23\x30\x23\x31\e(B after".force_encoding("iso-2022-jp")
  356. generate_file('tmp', src)
  357. s = open("tmp", "r:iso-2022-jp:euc-jp") {|f|
  358. f.gets("0".force_encoding("euc-jp"))
  359. }
  360. assert_equal(Encoding.find("euc-jp"), s.encoding)
  361. assert_str_equal(src.encode("euc-jp"), s)
  362. }
  363. end
  364. def test_nonascii_terminator
  365. with_tmpdir {
  366. generate_file('tmp', "before \xA2\xA2 after")
  367. open("tmp", "r:euc-jp") {|f|
  368. assert_raise(ArgumentError) {
  369. f.gets("\xA2\xA2".force_encoding("utf-8"))
  370. }
  371. }
  372. }
  373. end
  374. def test_pipe_terminator_conversion
  375. rs = "\xA2\xA2".encode("utf-8", "euc-jp")
  376. pipe("euc-jp:utf-8",
  377. proc do |w|
  378. w.write "before \xa2\xa2 after"
  379. w.close
  380. end,
  381. proc do |r|
  382. timeout(1) {
  383. assert_equal("before \xa2\xa2".encode("utf-8", "euc-jp"),
  384. r.gets(rs))
  385. }
  386. end)
  387. end
  388. def test_pipe_conversion
  389. pipe("euc-jp:utf-8",
  390. proc do |w|
  391. w.write "\xa1\xa1"
  392. end,
  393. proc do |r|
  394. assert_equal("\xa1\xa1".encode("utf-8", "euc-jp"), r.getc)
  395. end)
  396. end
  397. def test_pipe_convert_partial_read
  398. pipe("euc-jp:utf-8",
  399. proc do |w|
  400. w.write "\xa1"
  401. sleep 0.1
  402. w.write "\xa1"
  403. end,
  404. proc do |r|
  405. assert_equal("\xa1\xa1".encode("utf-8", "euc-jp"), r.getc)
  406. end)
  407. end
  408. def test_getc_invalid
  409. pipe("euc-jp:utf-8",
  410. proc do |w|
  411. w << "\xa1xyz"
  412. w.close
  413. end,
  414. proc do |r|
  415. err = assert_raise(Encoding::InvalidByteSequenceError) { r.getc }
  416. assert_equal("\xA1".force_encoding("ascii-8bit"), err.error_bytes)
  417. assert_equal("xyz", r.read(10))
  418. end)
  419. end
  420. def test_getc_stateful_conversion
  421. with_tmpdir {
  422. src = "\e$B\x23\x30\x23\x31\e(B".force_encoding("iso-2022-jp")
  423. generate_file('tmp', src)
  424. open("tmp", "r:iso-2022-jp:euc-jp") {|f|
  425. assert_equal("\xa3\xb0".force_encoding("euc-jp"), f.getc)
  426. assert_equal("\xa3\xb1".force_encoding("euc-jp"), f.getc)
  427. }
  428. }
  429. end
  430. def test_getc_newlineconv
  431. with_tmpdir {
  432. src = "\u3042"
  433. generate_file('tmp', src)
  434. defext = Encoding.default_external
  435. Encoding.default_external = Encoding::UTF_8
  436. open("tmp", "rt") {|f|
  437. s = f.getc
  438. assert_equal(true, s.valid_encoding?)
  439. assert_equal("\u3042", s)
  440. }
  441. Encoding.default_external = defext
  442. }
  443. end
  444. def test_getc_newlineconv_invalid
  445. with_tmpdir {
  446. src = "\xE3\x81"
  447. generate_file('tmp', src)
  448. defext = Encoding.default_external
  449. Encoding.default_external = Encoding::UTF_8
  450. open("tmp", "rt") {|f|
  451. s = f.getc
  452. assert_equal(false, s.valid_encoding?)
  453. assert_equal("\xE3".force_encoding("UTF-8"), s)
  454. s = f.getc
  455. assert_equal(false, s.valid_encoding?)
  456. assert_equal("\x81".force_encoding("UTF-8"), s)
  457. }
  458. Encoding.default_external = defext
  459. }
  460. end
  461. def test_ungetc_int
  462. with_tmpdir {
  463. generate_file('tmp', "A")
  464. s = open("tmp", "r:GB18030") {|f|
  465. f.ungetc(0x8431A439)
  466. f.read
  467. }
  468. assert_equal(Encoding::GB18030, s.encoding)
  469. assert_str_equal(0x8431A439.chr("GB18030")+"A", s)
  470. }
  471. end
  472. def test_ungetc_str
  473. with_tmpdir {
  474. generate_file('tmp', "A")
  475. s = open("tmp", "r:GB18030") {|f|
  476. f.ungetc(0x8431A439.chr("GB18030"))
  477. f.read
  478. }
  479. assert_equal(Encoding::GB18030, s.encoding)
  480. assert_str_equal(0x8431A439.chr("GB18030")+"A", s)
  481. }
  482. end
  483. def test_ungetc_stateful_conversion
  484. with_tmpdir {
  485. src = "before \e$B\x23\x30\x23\x31\e(B after".force_encoding("iso-2022-jp")
  486. generate_file('tmp', src)
  487. s = open("tmp", "r:iso-2022-jp:euc-jp") {|f|
  488. f.ungetc("0".force_encoding("euc-jp"))
  489. f.read
  490. }
  491. assert_equal(Encoding.find("euc-jp"), s.encoding)
  492. assert_str_equal("0" + src.encode("euc-jp"), s)
  493. }
  494. end
  495. def test_ungetc_stateful_conversion2
  496. with_tmpdir {
  497. src = "before \e$B\x23\x30\x23\x31\e(B after".force_encoding("iso-2022-jp")
  498. former = "before \e$B\x23\x30\e(B".force_encoding("iso-2022-jp")
  499. rs = "\e$B\x23\x30\e(B".force_encoding("iso-2022-jp")
  500. latter = "\e$B\x23\x31\e(B after".force_encoding("iso-2022-jp")
  501. generate_file('tmp', src)
  502. s = open("tmp", "r:iso-2022-jp:euc-jp") {|f|
  503. assert_equal(former.encode("euc-jp", "iso-2022-jp"),
  504. f.gets(rs.encode("euc-jp", "iso-2022-jp")))
  505. f.ungetc("0")
  506. f.read
  507. }
  508. assert_equal(Encoding.find("euc-jp"), s.encoding)
  509. assert_str_equal("0" + latter.encode("euc-jp"), s)
  510. }
  511. end
  512. def test_open_ascii
  513. with_tmpdir {
  514. src = "abc\n"
  515. generate_file('tmp', "abc\n")
  516. ENCS.each {|enc|
  517. s = open('tmp', "r:#{enc}") {|f| f.gets }
  518. assert_equal(enc, s.encoding)
  519. assert_str_equal(src, s)
  520. }
  521. }
  522. end
  523. def test_open_nonascii
  524. with_tmpdir {
  525. src = "\xc2\xa1\n"
  526. generate_file('tmp', src)
  527. ENCS.each {|enc|
  528. content = src.dup.force_encoding(enc)
  529. s = open('tmp', "r:#{enc}") {|f| f.gets }
  530. assert_equal(enc, s.encoding)
  531. assert_str_equal(content, s)
  532. }
  533. }
  534. end
  535. def test_read_encoding
  536. with_tmpdir {
  537. src = "\xc2\xa1\n".force_encoding("ASCII-8BIT")
  538. generate_file('tmp', "\xc2\xa1\n")
  539. ENCS.each {|enc|
  540. content = src.dup.force_encoding(enc)
  541. open('tmp', "r:#{enc}") {|f|
  542. s = f.getc
  543. assert_equal(enc, s.encoding)
  544. assert_str_equal(content[0], s)
  545. }
  546. open('tmp', "r:#{enc}") {|f|
  547. s = f.readchar
  548. assert_equal(enc, s.encoding)
  549. assert_str_equal(content[0], s)
  550. }
  551. open('tmp', "r:#{enc}") {|f|
  552. s = f.gets
  553. assert_equal(enc, s.encoding)
  554. assert_str_equal(content, s)
  555. }
  556. open('tmp', "r:#{enc}") {|f|
  557. s = f.readline
  558. assert_equal(enc, s.encoding)
  559. assert_str_equal(content, s)
  560. }
  561. open('tmp', "r:#{enc}") {|f|
  562. lines = f.readlines
  563. assert_equal(1, lines.length)
  564. s = lines[0]
  565. assert_equal(enc, s.encoding)
  566. assert_str_equal(content, s)
  567. }
  568. open('tmp', "r:#{enc}") {|f|
  569. f.each_line {|s|
  570. assert_equal(enc, s.encoding)
  571. assert_str_equal(content, s)
  572. }
  573. }
  574. open('tmp', "r:#{enc}") {|f|
  575. s = f.read
  576. assert_equal(enc, s.encoding)
  577. assert_str_equal(content, s)
  578. }
  579. open('tmp', "r:#{enc}") {|f|
  580. s = f.read(1)
  581. assert_equal(Encoding::ASCII_8BIT, s.encoding)
  582. assert_str_equal(src[0], s)
  583. }
  584. open('tmp', "r:#{enc}") {|f|
  585. s = f.readpartial(1)
  586. assert_equal(Encoding::ASCII_8BIT, s.encoding)
  587. assert_str_equal(src[0], s)
  588. }
  589. open('tmp', "r:#{enc}") {|f|
  590. s = f.sysread(1)
  591. assert_equal(Encoding::ASCII_8BIT, s.encoding)
  592. assert_str_equal(src[0], s)
  593. }
  594. }
  595. }
  596. end
  597. def test_write_noenc
  598. src = "\xc2\xa1\n".force_encoding("ascii-8bit")
  599. with_tmpdir {
  600. open('tmp', "w") {|f|
  601. ENCS.each {|enc|
  602. f.write src.dup.force_encoding(enc)
  603. }
  604. }
  605. open('tmp', 'r:ascii-8bit') {|f|
  606. assert_equal(src*ENCS.length, f.read)
  607. }
  608. }
  609. end
  610. def test_write_conversion
  611. utf8 = "\u6666"
  612. eucjp = "\xb3\xa2".force_encoding("EUC-JP")
  613. with_tmpdir {
  614. open('tmp', "w:EUC-JP") {|f|
  615. assert_equal(Encoding::EUC_JP, f.external_encoding)
  616. assert_equal(nil, f.internal_encoding)
  617. f.print utf8
  618. }
  619. assert_equal(eucjp, File.read('tmp').force_encoding("EUC-JP"))
  620. open('tmp', 'r:EUC-JP:UTF-8') {|f|
  621. assert_equal(Encoding::EUC_JP, f.external_encoding)
  622. assert_equal(Encoding::UTF_8, f.internal_encoding)
  623. assert_equal(utf8, f.read)
  624. }
  625. }
  626. end
  627. def test_pipe
  628. utf8 = "\u6666"
  629. eucjp = "\xb3\xa2".force_encoding("EUC-JP")
  630. pipe(proc do |w|
  631. w << utf8
  632. w.close
  633. end, proc do |r|
  634. assert_equal(Encoding.default_external, r.external_encoding)
  635. assert_equal(nil, r.internal_encoding)
  636. s = r.read
  637. assert_equal(Encoding.default_external, s.encoding)
  638. assert_str_equal(utf8.dup.force_encoding(Encoding.default_external), s)
  639. end)
  640. pipe("EUC-JP",
  641. proc do |w|
  642. w << eucjp
  643. w.close
  644. end,
  645. proc do |r|
  646. assert_equal(Encoding::EUC_JP, r.external_encoding)
  647. assert_equal(nil, r.internal_encoding)
  648. assert_equal(eucjp, r.read)
  649. end)
  650. pipe("UTF-8",
  651. proc do |w|
  652. w << "a" * 1023 + "\u3042" + "a" * 1022
  653. w.close
  654. end,
  655. proc do |r|
  656. assert_equal(true, r.read.valid_encoding?)
  657. end)
  658. pipe("UTF-8:EUC-JP",
  659. proc do |w|
  660. w << utf8
  661. w.close
  662. end,
  663. proc do |r|
  664. assert_equal(Encoding::UTF_8, r.external_encoding)
  665. assert_equal(Encoding::EUC_JP, r.internal_encoding)
  666. assert_equal(eucjp, r.read)
  667. end)
  668. e = assert_raise(ArgumentError) {with_pipe("UTF-8", "UTF-8".encode("UTF-32BE")) {}}
  669. assert_match(/invalid name encoding/, e.message)
  670. e = assert_raise(ArgumentError) {with_pipe("UTF-8".encode("UTF-32BE")) {}}
  671. assert_match(/invalid name encoding/, e.message)
  672. ENCS.each {|enc|
  673. pipe(enc,
  674. proc do |w|
  675. w << "\xc2\xa1"
  676. w.close
  677. end,
  678. proc do |r|
  679. s = r.getc
  680. assert_equal(enc, s.encoding)
  681. end)
  682. }
  683. ENCS.each {|enc|
  684. next if enc == Encoding::ASCII_8BIT
  685. next if enc == Encoding::UTF_8
  686. pipe("#{enc}:UTF-8",
  687. proc do |w|
  688. w << "\xc2\xa1"
  689. w.close
  690. end,
  691. proc do |r|
  692. s = r.read
  693. assert_equal(Encoding::UTF_8, s.encoding)
  694. assert_equal(s.encode("UTF-8"), s)
  695. end)
  696. }
  697. end
  698. def test_marshal
  699. data = 56225
  700. pipe("EUC-JP",
  701. proc do |w|
  702. Marshal.dump(data, w)
  703. w.close
  704. end,
  705. proc do |r|
  706. result = nil
  707. assert_nothing_raised("[ruby-dev:33264]") { result = Marshal.load(r) }
  708. assert_equal(data, result)
  709. end)
  710. end
  711. def test_gets_nil
  712. pipe("UTF-8:EUC-JP",
  713. proc do |w|
  714. w << "\u{3042}"
  715. w.close
  716. end,
  717. proc do |r|
  718. result = r.gets(nil)
  719. assert_equal("\u{3042}".encode("euc-jp"), result)
  720. end)
  721. end
  722. def test_gets_limit
  723. pipe("euc-jp",
  724. proc {|w| w << "\xa4\xa2\xa4\xa4\xa4\xa6\n\xa4\xa8\xa4\xaa"; w.close },
  725. proc {|r| assert_equal("\xa4\xa2".force_encoding("euc-jp"), r.gets(1)) })
  726. pipe("euc-jp",
  727. proc {|w| w << "\xa4\xa2\xa4\xa4\xa4\xa6\n\xa4\xa8\xa4\xaa"; w.close },
  728. proc {|r| assert_equal("\xa4\xa2".force_encoding("euc-jp"), r.gets(2)) })
  729. pipe("euc-jp",
  730. proc {|w| w << "\xa4\xa2\xa4\xa4\xa4\xa6\n\xa4\xa8\xa4\xaa"; w.close },
  731. proc {|r| assert_equal("\xa4\xa2\xa4\xa4".force_encoding("euc-jp"), r.gets(3)) })
  732. pipe("euc-jp",
  733. proc {|w| w << "\xa4\xa2\xa4\xa4\xa4\xa6\n\xa4\xa8\xa4\xaa"; w.close },
  734. proc {|r| assert_equal("\xa4\xa2\xa4\xa4".force_encoding("euc-jp"), r.gets(4)) })
  735. pipe("euc-jp",
  736. proc {|w| w << "\xa4\xa2\xa4\xa4\xa4\xa6\n\xa4\xa8\xa4\xaa"; w.close },
  737. proc {|r| assert_equal("\xa4\xa2\xa4\xa4\xa4\xa6".force_encoding("euc-jp"), r.gets(5)) })
  738. pipe("euc-jp",
  739. proc {|w| w << "\xa4\xa2\xa4\xa4\xa4\xa6\n\xa4\xa8\xa4\xaa"; w.close },
  740. proc {|r| assert_equal("\xa4\xa2\xa4\xa4\xa4\xa6".force_encoding("euc-jp"), r.gets(6)) })
  741. pipe("euc-jp",
  742. proc {|w| w << "\xa4\xa2\xa4\xa4\xa4\xa6\n\xa4\xa8\xa4\xaa"; w.close },
  743. proc {|r| assert_equal("\xa4\xa2\xa4\xa4\xa4\xa6\n".force_encoding("euc-jp"), r.gets(7)) })
  744. pipe("euc-jp",
  745. proc {|w| w << "\xa4\xa2\xa4\xa4\xa4\xa6\n\xa4\xa8\xa4\xaa"; w.close },
  746. proc {|r| assert_equal("\xa4\xa2\xa4\xa4\xa4\xa6\n".force_encoding("euc-jp"), r.gets(8)) })
  747. pipe("euc-jp",
  748. proc {|w| w << "\xa4\xa2\xa4\xa4\xa4\xa6\n\xa4\xa8\xa4\xaa"; w.close },
  749. proc {|r| assert_equal("\xa4\xa2\xa4\xa4\xa4\xa6\n".force_encoding("euc-jp"), r.gets(9)) })
  750. end
  751. def test_gets_invalid
  752. before = "\u{3042}\u{3044}"
  753. invalid = "\x80".force_encoding("utf-8")
  754. after = "\u{3046}\u{3048}"
  755. pipe("utf-8:euc-jp",
  756. proc do |w|
  757. w << before + invalid + after
  758. w.close
  759. end,
  760. proc do |r|
  761. err = assert_raise(Encoding::InvalidByteSequenceError) { r.gets }
  762. assert_equal(invalid.force_encoding("ascii-8bit"), err.error_bytes)
  763. assert_equal(after.encode("euc-jp"), r.gets)
  764. end)
  765. end
  766. def test_getc_invalid2
  767. before1 = "\u{3042}"
  768. before2 = "\u{3044}"
  769. invalid = "\x80".force_encoding("utf-8")
  770. after1 = "\u{3046}"
  771. after2 = "\u{3048}"
  772. pipe("utf-8:euc-jp",
  773. proc do |w|
  774. w << before1 + before2 + invalid + after1 + after2
  775. w.close
  776. end,
  777. proc do |r|
  778. assert_equal(before1.encode("euc-jp"), r.getc)
  779. assert_equal(before2.encode("euc-jp"), r.getc)
  780. err = assert_raise(Encoding::InvalidByteSequenceError) { r.getc }
  781. assert_equal(invalid.force_encoding("ascii-8bit"), err.error_bytes)
  782. assert_equal(after1.encode("euc-jp"), r.getc)
  783. assert_equal(after2.encode("euc-jp"), r.getc)
  784. end)
  785. end
  786. def test_getc_invalid3
  787. before1 = "\x42\x30".force_encoding("utf-16le")
  788. before2 = "\x44\x30".force_encoding("utf-16le")
  789. invalid = "\x00\xd8".force_encoding("utf-16le")
  790. after1 = "\x46\x30".force_encoding("utf-16le")
  791. after2 = "\x48\x30".force_encoding("utf-16le")
  792. pipe("utf-16le:euc-jp", { :binmode => true },
  793. proc do |w|
  794. w << before1 + before2 + invalid + after1 + after2
  795. w.close
  796. end,
  797. proc do |r|
  798. assert_equal(before1.encode("euc-jp"), r.getc)
  799. assert_equal(before2.encode("euc-jp"), r.getc)
  800. err = assert_raise(Encoding::InvalidByteSequenceError) { r.getc }
  801. assert_equal(invalid.force_encoding("ascii-8bit"), err.error_bytes)
  802. assert_equal(after1.encode("euc-jp"), r.getc)
  803. assert_equal(after2.encode("euc-jp"), r.getc)
  804. end)
  805. end
  806. def test_read_all
  807. str = "\u3042\u3044"
  808. pipe("utf-8:euc-jp",
  809. proc do |w|
  810. w << str
  811. w.close
  812. end,
  813. proc do |r|
  814. assert_equal(str.encode("euc-jp"), r.read)
  815. end)
  816. end
  817. def test_read_all_invalid
  818. before = "\u{3042}\u{3044}"
  819. invalid = "\x80".force_encoding("utf-8")
  820. after = "\u{3046}\u{3048}"
  821. pipe("utf-8:euc-jp",
  822. proc do |w|
  823. w << before + invalid + after
  824. w.close
  825. end,
  826. proc do |r|
  827. err = assert_raise(Encoding::InvalidByteSequenceError) { r.read }
  828. assert_equal(invalid.force_encoding("ascii-8bit"), err.error_bytes)
  829. assert_equal(after.encode("euc-jp"), r.read)
  830. end)
  831. end
  832. def test_file_foreach
  833. with_tmpdir {
  834. generate_file('tst', 'a' * 8191 + "\xa1\xa1")
  835. assert_nothing_raised {
  836. File.foreach('tst', :encoding=>"euc-jp") {|line| line.inspect }
  837. }
  838. }
  839. end
  840. def test_set_encoding
  841. pipe("utf-8:euc-jp",
  842. proc do |w|
  843. s = "\u3042".force_encoding("ascii-8bit")
  844. s << "\x82\xa0".force_encoding("ascii-8bit")
  845. w << s
  846. w.close
  847. end,
  848. proc do |r|
  849. assert_equal("\xa4\xa2".force_encoding("euc-jp"), r.getc)
  850. r.set_encoding("shift_jis:euc-jp")
  851. assert_equal("\xa4\xa2".force_encoding("euc-jp"), r.getc)
  852. end)
  853. end
  854. def test_set_encoding2
  855. pipe("utf-8:euc-jp",
  856. proc do |w|
  857. s = "\u3042".force_encoding("ascii-8bit")
  858. s << "\x82\xa0".force_encoding("ascii-8bit")
  859. w << s
  860. w.close
  861. end,
  862. proc do |r|
  863. assert_equal("\xa4\xa2".force_encoding("euc-jp"), r.getc)
  864. r.set_encoding("shift_jis", "euc-jp")
  865. assert_equal("\xa4\xa2".force_encoding("euc-jp"), r.getc)
  866. end)
  867. end
  868. def test_set_encoding_nil
  869. pipe("utf-8:euc-jp",
  870. proc do |w|
  871. s = "\u3042".force_encoding("ascii-8bit")
  872. s << "\x82\xa0".force_encoding("ascii-8bit")
  873. w << s
  874. w.close
  875. end,
  876. proc do |r|
  877. assert_equal("\xa4\xa2".force_encoding("euc-jp"), r.getc)
  878. r.set_encoding(nil)
  879. assert_equal("\x82\xa0".force_encoding(Encoding.default_external), r.read)
  880. end)
  881. end
  882. def test_set_encoding_enc
  883. pipe("utf-8:euc-jp",
  884. proc do |w|
  885. s = "\u3042".force_encoding("ascii-8bit")
  886. s << "\x82\xa0".force_encoding("ascii-8bit")
  887. w << s
  888. w.close
  889. end,
  890. proc do |r|
  891. assert_equal("\xa4\xa2".force_encoding("euc-jp"), r.getc)
  892. r.set_encoding(Encoding::Shift_JIS)
  893. assert_equal("\x82\xa0".force_encoding(Encoding::Shift_JIS), r.getc)
  894. end)
  895. end
  896. def test_set_encoding_invalid
  897. pipe(proc do |w|
  898. w << "\x80"
  899. w.close
  900. end,
  901. proc do |r|
  902. r.set_encoding("utf-8:euc-jp", :invalid=>:replace)
  903. assert_equal("?", r.read)
  904. end)
  905. end
  906. def test_set_encoding_undef
  907. pipe(proc do |w|
  908. w << "\ufffd"
  909. w.close
  910. end,
  911. proc do |r|
  912. r.set_encoding("utf-8", "euc-jp", :undef=>:replace)
  913. assert_equal("?", r.read)
  914. end)
  915. end
  916. def test_set_encoding_undef_replace
  917. pipe(proc do |w|
  918. w << "\ufffd"
  919. w.close
  920. end,
  921. proc do |r|
  922. r.set_encoding("utf-8", "euc-jp", :undef=>:replace, :replace=>"ZZZ")
  923. assert_equal("ZZZ", r.read)
  924. end)
  925. pipe(proc do |w|
  926. w << "\ufffd"
  927. w.close
  928. end,
  929. proc do |r|
  930. r.set_encoding("utf-8:euc-jp", :undef=>:replace, :replace=>"ZZZ")
  931. assert_equal("ZZZ", r.read)
  932. end)
  933. end
  934. def test_set_encoding_binmode
  935. assert_raise(ArgumentError) {
  936. open(__FILE__, "rt") {|f|
  937. f.set_encoding("iso-2022-jp")
  938. }
  939. }
  940. assert_raise(ArgumentError) {
  941. open(__FILE__, "r") {|f|
  942. f.set_encoding("iso-2022-jp")
  943. }
  944. }
  945. assert_nothing_raised {
  946. open(__FILE__, "rb") {|f|
  947. f.set_encoding("iso-2022-jp")
  948. }
  949. }
  950. assert_nothing_raised {
  951. open(__FILE__, "r") {|f|
  952. f.binmode
  953. f.set_encoding("iso-2022-jp")
  954. }
  955. }
  956. assert_nothing_raised {
  957. open(__FILE__, "rt") {|f|
  958. f.binmode
  959. f.set_encoding("iso-2022-jp")
  960. }
  961. }
  962. end
  963. def test_write_conversion_fixenc
  964. pipe(proc do |w|
  965. w.set_encoding("iso-2022-jp:utf-8")
  966. w << "\u3042"
  967. w << "\u3044"
  968. w.close
  969. end,
  970. proc do |r|
  971. assert_equal("\e$B$\"$$\e(B".force_encoding("ascii-8bit"),
  972. r.read.force_encoding("ascii-8bit"))
  973. end)
  974. end
  975. def test_write_conversion_anyenc_stateful
  976. pipe(proc do |w|
  977. w.set_encoding("iso-2022-jp")
  978. w << "\u3042"
  979. w << "\x82\xa2".force_encoding("sjis")
  980. w.close
  981. end,
  982. proc do |r|
  983. assert_equal("\e$B$\"$$\e(B".force_encoding("ascii-8bit"),
  984. r.read.force_encoding("ascii-8bit"))
  985. end)
  986. end
  987. def test_write_conversion_anyenc_stateless
  988. pipe(proc do |w|
  989. w.set_encoding("euc-jp")
  990. w << "\u3042"
  991. w << "\x82\xa2".force_encoding("sjis")
  992. w.close
  993. end,
  994. proc do |r|
  995. assert_equal("\xa4\xa2\xa4\xa4".force_encoding("ascii-8bit"),
  996. r.read.force_encoding("ascii-8bit"))
  997. end)
  998. end
  999. def test_write_conversion_anyenc_stateful_nosync
  1000. pipe(proc do |w|
  1001. w.sync = false
  1002. w.set_encoding("iso-2022-jp")
  1003. w << "\u3042"
  1004. w << "\x82\xa2".force_encoding("sjis")
  1005. w.close
  1006. end,
  1007. proc do |r|
  1008. assert_equal("\e$B$\"$$\e(B".force_encoding("ascii-8bit"),
  1009. r.read.force_encoding("ascii-8bit"))
  1010. end)
  1011. end
  1012. def test_read_stateful
  1013. pipe("euc-jp:iso-2022-jp",
  1014. proc do |w|
  1015. w << "\xA4\xA2"
  1016. w.close
  1017. end,
  1018. proc do |r|
  1019. assert_equal("\e$B$\"\e(B".force_encoding("iso-2022-jp"), r.read)
  1020. end)
  1021. end
  1022. def test_stdin_external_encoding_with_reopen
  1023. skip "passing non-stdio fds is not supported" if /mswin|mingw/ =~ RUBY_PLATFORM
  1024. with_tmpdir {
  1025. open("tst", "w+") {|f|
  1026. pid = spawn(EnvUtil.rubybin, '-e', <<-'End', 10=>f)
  1027. io = IO.new(10, "r+")
  1028. STDIN.reopen(io)
  1029. STDIN.external_encoding
  1030. STDIN.write "\u3042"
  1031. STDIN.flush
  1032. End
  1033. Process.wait pid
  1034. f.rewind
  1035. result = f.read.force_encoding("ascii-8bit")
  1036. assert_equal("\u3042".force_encoding("ascii-8bit"), result)
  1037. }
  1038. }
  1039. end
  1040. def test_popen_r_enc
  1041. IO.popen("#{EnvUtil.rubybin} -e 'putc 255'", "r:ascii-8bit") {|f|
  1042. assert_equal(Encoding::ASCII_8BIT, f.external_encoding)
  1043. assert_equal(nil, f.internal_encoding)
  1044. s = f.read
  1045. assert_equal(Encoding::ASCII_8BIT, s.encoding)
  1046. assert_equal("\xff".force_encoding("ascii-8bit"), s)
  1047. }
  1048. end
  1049. def test_popen_r_enc_in_opt
  1050. IO.popen("#{EnvUtil.rubybin} -e 'putc 255'", "r", encoding: "ascii-8bit") {|f|
  1051. assert_equal(Encoding::ASCII_8BIT, f.external_encoding)
  1052. assert_equal(nil, f.internal_encoding)
  1053. s = f.read
  1054. assert_equal(Encoding::ASCII_8BIT, s.encoding)
  1055. assert_equal("\xff".force_encoding("ascii-8bit"), s)
  1056. }
  1057. end
  1058. def test_popen_r_enc_in_opt2
  1059. IO.popen("#{EnvUtil.rubybin} -e 'putc 255'", "r", external_encoding: "ascii-8bit") {|f|
  1060. assert_equal(Encoding::ASCII_8BIT, f.external_encoding)
  1061. assert_equal(nil, f.internal_encoding)
  1062. s = f.read
  1063. assert_equal(Encoding::ASCII_8BIT, s.encoding)
  1064. assert_equal("\xff".force_encoding("ascii-8bit"), s)
  1065. }
  1066. end
  1067. def test_popen_r_enc_enc
  1068. IO.popen("#{EnvUtil.rubybin} -e 'putc 0xa1'", "r:shift_jis:euc-jp") {|f|
  1069. assert_equal(Encoding::Shift_JIS, f.external_encoding)
  1070. assert_equal(Encoding::EUC_JP, f.internal_encoding)
  1071. s = f.read
  1072. assert_equal(Encoding::EUC_JP, s.encoding)
  1073. assert_equal("\x8e\xa1".force_encoding("euc-jp"), s)
  1074. }
  1075. end
  1076. def test_popen_r_enc_enc_in_opt
  1077. IO.popen("#{EnvUtil.rubybin} -e 'putc 0xa1'", "r", encoding: "shift_jis:euc-jp") {|f|
  1078. assert_equal(Encoding::Shift_JIS, f.external_encoding)
  1079. assert_equal(Encoding::EUC_JP, f.internal_encoding)
  1080. s = f.read
  1081. assert_equal(Encoding::EUC_JP, s.encoding)
  1082. assert_equal("\x8e\xa1".force_encoding("euc-jp"), s)
  1083. }
  1084. end
  1085. def test_popen_r_enc_enc_in_opt2
  1086. IO.popen("#{EnvUtil.rubybin} -e 'putc 0xa1'", "r", external_encoding: "shift_jis", internal_encoding: "euc-jp") {|f|
  1087. assert_equal(Encoding::Shift_JIS, f.external_encoding)
  1088. assert_equal(Encoding::EUC_JP, f.internal_encoding)
  1089. s = f.read
  1090. assert_equal(Encoding::EUC_JP, s.encoding)
  1091. assert_equal("\x8e\xa1".force_encoding("euc-jp"), s)
  1092. }
  1093. end
  1094. def test_popenv_r_enc_enc_in_opt2
  1095. IO.popen([EnvUtil.rubybin, "-e", "putc 0xa1"], "r", external_encoding: "shift_jis", internal_encoding: "euc-jp") {|f|
  1096. assert_equal(Encoding::Shift_JIS, f.external_encoding)
  1097. assert_equal(Encoding::EUC_JP, f.internal_encoding)
  1098. s = f.read
  1099. assert_equal(Encoding::EUC_JP, s.encoding)
  1100. assert_equal("\x8e\xa1".force_encoding("euc-jp"), s)
  1101. }
  1102. end
  1103. def test_open_pipe_r_enc
  1104. open("|#{EnvUtil.rubybin} -e 'putc 255'", "r:ascii-8bit") {|f|
  1105. assert_equal(Encoding::ASCII_8BIT, f.external_encoding)
  1106. assert_equal(nil, f.internal_encoding)
  1107. s = f.read
  1108. assert_equal(Encoding::ASCII_8BIT, s.encoding)
  1109. assert_equal("\xff".force_encoding("ascii-8bit"), s)
  1110. }
  1111. end
  1112. def test_open_pipe_r_enc2
  1113. open("|#{EnvUtil.rubybin} -e 'putc \"\\u3042\"'", "r:UTF-8") {|f|
  1114. assert_equal(Encoding::UTF_8, f.external_encoding)
  1115. assert_equal(nil, f.internal_encoding)
  1116. s = f.read
  1117. assert_equal(Encoding::UTF_8, s.encoding)
  1118. assert_equal("\u3042", s)
  1119. }
  1120. end
  1121. def test_s_foreach_enc
  1122. with_tmpdir {
  1123. generate_file("t", "\xff")
  1124. IO.foreach("t", :mode => "r:ascii-8bit") {|s|
  1125. assert_equal(Encoding::ASCII_8BIT, s.encoding)
  1126. assert_equal("\xff".force_encoding("ascii-8bit"), s)
  1127. }
  1128. }
  1129. end
  1130. def test_s_foreach_enc_in_opt
  1131. with_tmpdir {
  1132. generate_file("t", "\xff")
  1133. IO.foreach("t", :encoding => "ascii-8bit") {|s|
  1134. assert_equal(Encoding::ASCII_8BIT, s.encoding)
  1135. assert_equal("\xff".force_encoding("ascii-8bit"), s)
  1136. }
  1137. }
  1138. end
  1139. def test_s_foreach_enc_in_opt2
  1140. with_tmpdir {
  1141. generate_file("t", "\xff")
  1142. IO.foreach("t", :external_encoding => "ascii-8bit") {|s|
  1143. assert_equal(Encoding::ASCII_8BIT, s.encoding)
  1144. assert_equal("\xff".force_encoding("ascii-8bit"), s)
  1145. }
  1146. }
  1147. end
  1148. def test_s_foreach_enc_enc
  1149. with_tmpdir {
  1150. generate_file("t", "\u3042")
  1151. IO.foreach("t", :mode => "r:utf-8:euc-jp") {|s|
  1152. assert_equal(Encoding::EUC_JP, s.encoding)
  1153. assert_equal("\xa4\xa2".force_encoding("euc-jp"), s)
  1154. }
  1155. }
  1156. end
  1157. def test_s_foreach_enc_enc_in_opt
  1158. with_tmpdir {
  1159. generate_file("t", "\u3042")
  1160. IO.foreach("t", :mode => "r", :encoding => "utf-8:euc-jp") {|s|
  1161. assert_equal(Encoding::EUC_JP, s.encoding)
  1162. assert_equal("\xa4\xa2".force_encoding("euc-jp"), s)
  1163. }
  1164. }
  1165. end
  1166. def test_s_foreach_enc_enc_in_opt2
  1167. with_tmpdir {
  1168. generate_file("t", "\u3042")
  1169. IO.foreach("t", :mode => "r", :external_encoding => "utf-8", :internal_encoding => "euc-jp") {|s|
  1170. assert_equal(Encoding::EUC_JP, s.encoding)
  1171. assert_equal("\xa4\xa2".force_encoding("euc-jp"), s)
  1172. }
  1173. }
  1174. end
  1175. def test_s_foreach_open_args_enc
  1176. with_tmpdir {
  1177. generate_file("t", "\xff")
  1178. IO.foreach("t", :open_args => ["r:ascii-8bit"]) {|s|
  1179. assert_equal(Encoding::ASCII_8BIT, s.encoding)
  1180. assert_equal("\xff".force_encoding("ascii-8bit"), s)
  1181. }
  1182. }
  1183. end
  1184. def test_s_foreach_open_args_enc_in_opt
  1185. with_tmpdir {
  1186. generate_file("t", "\xff")
  1187. IO.foreach("t", :open_args => ["r", encoding: "ascii-8bit"]) {|s|
  1188. assert_equal(Encoding::ASCII_8BIT, s.encoding)
  1189. assert_equal("\xff".force_encoding("ascii-8bit"), s)
  1190. }
  1191. }
  1192. end
  1193. def test_s_foreach_open_args_enc_in_opt2
  1194. with_tmpdir {
  1195. generate_file("t", "\xff")
  1196. IO.foreach("t", :open_args => ["r", external_encoding: "ascii-8bit"]) {|s|
  1197. assert_equal(Encoding::ASCII_8BIT, s.encoding)
  1198. assert_equal("\xff".force_encoding("ascii-8bit"), s)
  1199. }
  1200. }
  1201. end
  1202. def test_s_foreach_open_args_enc_enc
  1203. with_tmpdir {
  1204. generate_file("t", "\u3042")
  1205. IO.foreach("t", :open_args => ["r:utf-8:euc-jp"]) {|s|
  1206. assert_equal(Encoding::EUC_JP, s.encoding)
  1207. assert_equal("\xa4\xa2".force_encoding("euc-jp"), s)
  1208. }
  1209. }
  1210. end
  1211. def test_s_foreach_open_args_enc_enc_in_opt
  1212. with_tmpdir {
  1213. generate_file("t", "\u3042")
  1214. IO.foreach("t", :open_args => ["r", encoding: "utf-8:euc-jp"]) {|s|
  1215. assert_equal(Encoding::EUC_JP, s.encoding)
  1216. assert_equal("\xa4\xa2".force_encoding("euc-jp"), s)
  1217. }
  1218. }
  1219. end
  1220. def test_s_foreach_open_args_enc_enc_in_opt2
  1221. with_tmpdir {
  1222. generate_file("t", "\u3042")
  1223. IO.foreach("t", :open_args => ["r", external_encoding: "utf-8", internal_encoding: "euc-jp"]) {|s|
  1224. assert_equal(Encoding::EUC_JP, s.encoding)
  1225. assert_equal("\xa4\xa2".force_encoding("euc-jp"), s)
  1226. }
  1227. }
  1228. end
  1229. def test_both_textmode_binmode
  1230. assert_raise(ArgumentError) { open("not-exist", "r", :textmode=>true, :binmode=>true) }
  1231. end
  1232. def test_textmode_decode_universal_newline_read
  1233. with_tmpdir {
  1234. generate_file("t.crlf", "a\r\nb\r\nc\r\n")
  1235. assert_equal("a\nb\nc\n", File.read("t.crlf", mode:"rt:euc-jp:utf-8"))
  1236. assert_equal("a\nb\nc\n", File.read("t.crlf", mode:"rt"))
  1237. open("t.crlf", "rt:euc-jp:utf-8") {|f| assert_equal("a\nb\nc\n", f.read) }
  1238. open("t.crlf", "rt") {|f| assert_equal("a\nb\nc\n", f.read) }
  1239. open("t.crlf", "r", :textmode=>true) {|f| assert_equal("a\nb\nc\n", f.read) }
  1240. open("t.crlf", "r", textmode: true, universal_newline: false) {|f|
  1241. assert_equal("a\r\nb\r\nc\r\n", f.read)
  1242. }
  1243. generate_file("t.cr", "a\rb\rc\r")
  1244. assert_equal("a\nb\nc\n", File.read("t.cr", mode:"rt:euc-jp:utf-8"))
  1245. assert_equal("a\nb\nc\n", File.read("t.cr", mode:"rt"))
  1246. generate_file("t.lf", "a\nb\nc\n")
  1247. assert_equal("a\nb\nc\n", File.read("t.cr", mode:"rt:euc-jp:utf-8"))
  1248. assert_equal("a\nb\nc\n", File.read("t.cr", mode:"rt"))
  1249. }
  1250. end
  1251. def test_textmode_decode_universal_newline_getc
  1252. with_tmpdir {
  1253. generate_file("t.crlf", "a\r\nb\r\nc\r\n")
  1254. open("t.crlf", "rt") {|f|
  1255. assert_equal("a", f.getc)
  1256. assert_equal("\n", f.getc)
  1257. assert_equal("b", f.getc)
  1258. assert_equal("\n", f.getc)
  1259. assert_equal("c", f.getc)
  1260. assert_equal("\n", f.getc)
  1261. assert_equal(nil, f.getc)
  1262. }
  1263. generate_file("t.cr", "a\rb\rc\r")
  1264. open("t.cr", "rt") {|f|
  1265. assert_equal("a", f.getc)
  1266. assert_equal("\n", f.getc)
  1267. assert_equal("b", f.getc)
  1268. assert_equal("\n", f.getc)
  1269. assert_equal("c", f.getc)
  1270. assert_equal("\n", f.getc)
  1271. assert_equal(nil, f.getc)
  1272. }
  1273. generate_file("t.lf", "a\nb\nc\n")
  1274. open("t.lf", "rt") {|f|
  1275. assert_equal("a", f.getc)
  1276. assert_equal("\n", f.getc)
  1277. assert_equal("b", f.getc)
  1278. assert_equal("\n", f.getc)
  1279. assert_equal("c", f.getc)
  1280. assert_equal("\n", f.getc)
  1281. assert_equal(nil, f.getc)
  1282. }
  1283. }
  1284. end
  1285. def test_textmode_decode_universal_newline_gets
  1286. with_tmpdir {
  1287. generate_file("t.crlf", "a\r\nb\r\nc\r\n")
  1288. open("t.crlf", "rt") {|f|
  1289. assert_equal("a\n", f.gets)
  1290. assert_equal("b\n", f.gets)
  1291. assert_equal("c\n", f.gets)
  1292. assert_equal(nil, f.gets)
  1293. }
  1294. generate_file("t.cr", "a\rb\rc\r")
  1295. open("t.cr", "rt") {|f|
  1296. assert_equal("a\n", f.gets)
  1297. assert_equal("b\n", f.gets)
  1298. assert_equal("c\n", f.gets)
  1299. assert_equal(nil, f.gets)
  1300. }
  1301. generate_file("t.lf", "a\nb\nc\n")
  1302. open("t.lf", "rt") {|f|
  1303. assert_equal("a\n", f.gets)
  1304. assert_equal("b\n", f.gets)
  1305. assert_equal("c\n", f.gets)
  1306. assert_equal(nil, f.gets)
  1307. }
  1308. }
  1309. end
  1310. def test_textmode_decode_universal_newline_utf16
  1311. with_tmpdir {
  1312. generate_file("t.utf16be.crlf", "\0a\0\r\0\n\0b\0\r\0\n\0c\0\r\0\n")
  1313. assert_equal("a\nb\nc\n", File.read("t.utf16be.crlf", mode:"rt:utf-16be:utf-8"))
  1314. generate_file("t.utf16le.crlf", "a\0\r\0\n\0b\0\r\0\n\0c\0\r\0\n\0")
  1315. assert_equal("a\nb\nc\n", File.read("t.utf16le.crlf", mode:"rt:utf-16le:utf-8"))
  1316. generate_file("t.utf16be.cr", "\0a\0\r\0b\0\r\0c\0\r")
  1317. assert_equal("a\nb\nc\n", File.read("t.utf16be.cr", mode:"rt:utf-16be:utf-8"))
  1318. generate_file("t.utf16le.cr", "a\0\r\0b\0\r\0c\0\r\0")
  1319. assert_equal("a\nb\nc\n", File.read("t.utf16le.cr", mode:"rt:utf-16le:utf-8"))
  1320. generate_file("t.utf16be.lf", "\0a\0\n\0b\0\n\0c\0\n")
  1321. assert_equal("a\nb\nc\n", File.read("t.utf16be.lf", mode:"rt:utf-16be:utf-8"))
  1322. generate_file("t.utf16le.lf", "a\0\n\0b\0\n\0c\0\n\0")
  1323. assert_equal("a\nb\nc\n", File.read("t.utf16le.lf", mode:"rt:utf-16le:utf-8"))
  1324. }
  1325. end
  1326. SYSTEM_NEWLINE = []
  1327. def system_newline
  1328. return SYSTEM_NEWLINE.first if !SYSTEM_NEWLINE.empty?
  1329. with_tmpdir {
  1330. open("newline", "wt") {|f|
  1331. f.print "\n"
  1332. }
  1333. open("newline", "rb") {|f|
  1334. SYSTEM_NEWLINE << f.read
  1335. }
  1336. }
  1337. SYSTEM_NEWLINE.first
  1338. end
  1339. def test_textmode_encode_newline
  1340. with_tmpdir {
  1341. open("t.txt", "wt") {|f|
  1342. f.puts "abc"
  1343. f.puts "def"
  1344. }
  1345. content = File.read("t.txt", :mode=>"rb")
  1346. nl = system_newline
  1347. assert_equal("abc#{nl}def#{nl}", content)
  1348. }
  1349. end
  1350. def test_textmode_encode_newline_enc
  1351. with_tmpdir {
  1352. open("t.txt", "wt:euc-jp") {|f|
  1353. f.puts "abc\u3042"
  1354. f.puts "def\u3044"
  1355. }
  1356. content = File.read("t.txt", :mode=>"rb:ascii-8bit")
  1357. nl = system_newline
  1358. assert_equal("abc\xA4\xA2#{nl}def\xA4\xA4#{nl}", content)
  1359. }
  1360. end
  1361. def test_read_newline_conversion_with_encoding_conversion
  1362. with_tmpdir {
  1363. generate_file("t.utf8.crlf", "a\r\nb\r\n")
  1364. open("t.utf8.crlf", "rb:utf-8:utf-16be") {|f|
  1365. content = f.read
  1366. assert_equal("\0a\0\r\0\n\0b\0\r\0\n".force_encoding("UTF-16BE"), content)
  1367. }
  1368. open("t.utf8.crlf", "rt:utf-8:utf-16be") {|f|
  1369. content = f.read
  1370. assert_equal("\0a\0\n\0b\0\n".force_encoding("UTF-16BE"), content)
  1371. }
  1372. open("t.utf8.crlf", "r:utf-8:utf-16be") {|f|
  1373. content = f.read
  1374. if system_newline == "\n"
  1375. assert_equal("\0a\0\r\0\n\0b\0\r\0\n".force_encoding("UTF-16BE"), content)
  1376. else
  1377. assert_equal("\0a\0\n\0b\0\n".force_encoding("UTF-16BE"), content)
  1378. end
  1379. }
  1380. }
  1381. end
  1382. def test_read_newline_conversion_without_encoding_conversion
  1383. with_tmpdir {
  1384. generate_file("t.utf16.crlf", "\0a\0\r\0\n\0b\0\r\0\n")
  1385. open("t.utf16.crlf", "rb:utf-16be") {|f|
  1386. content = f.read
  1387. assert_equal("\0a\0\r\0\n\0b\0\r\0\n".force_encoding("UTF-16BE"),
  1388. content)
  1389. }
  1390. }
  1391. end
  1392. def test_read_newline_conversion_error
  1393. with_tmpdir {
  1394. generate_file("empty.txt", "")
  1395. # ascii incompatible encoding without conversion needs binmode.
  1396. assert_raise(ArgumentError) {
  1397. open("empty.txt", "rt:utf-16be") {|f| }
  1398. }
  1399. assert_raise(ArgumentError) {
  1400. open("empty.txt", "r:utf-16be") {|f| }
  1401. }
  1402. }
  1403. end
  1404. def test_read_mode
  1405. with_tmpdir {
  1406. generate_file("t", "a\rb\r\nc\n\xc2\xa2")
  1407. generate_file("ie", "a\rb\r\nc\n\e$B\x42\x22\e(B")
  1408. generate_file("iu", "a\rb\r\nc\n\e$B\x21\x71\e(B")
  1409. generate_file("be", "\0a\0\r\0b\0\r\0\n\0c\0\n\x85\x35")
  1410. generate_file("bu", "\0a\0\r\0b\0\r\0\n\0c\0\n\0\xa2")
  1411. # "\xc2\xa2" is valid as EUC-JP and UTF-8
  1412. # EUC-JP UTF-8 Unicode
  1413. # 0xC2A2 0xE894B5 U+8535
  1414. # 0xA1F1 0xC2A2 U+00A2
  1415. open("t","rt") {|f| assert_equal("a\nb\nc\n\xc2\xa2".force_encoding(Encoding.default_external), f.read) }
  1416. open("t","rb") {|f| assert_equal("a\rb\r\nc\n\xc2\xa2".force_encoding(Encoding::ASCII_8BIT), f.read) }
  1417. open("t","rt:euc-jp") {|f| assert_equal("a\nb\nc\n\xc2\xa2".force_encoding("EUC-JP"), f.read) }
  1418. open("t","rb:euc-jp") {|f| assert_equal("a\rb\r\nc\n\xc2\xa2".force_encoding("EUC-JP"), f.read) }
  1419. open("t","rt:utf-8") {|f| assert_equal("a\nb\nc\n\xc2\xa2".force_encoding("UTF-8"), f.read) }
  1420. open("t","rb:utf-8") {|f| assert_equal("a\rb\r\nc\n\xc2\xa2".force_encoding("UTF-8"), f.read) }
  1421. assert_raise(ArgumentError) { open("t", "rt:iso-2022-jp") {|f| } }
  1422. open("t","rb:iso-2022-jp") {|f| assert_equal("a\rb\r\nc\n\xc2\xa2".force_encoding("ISO-2022-JP"), f.read) }
  1423. open("t","rt:euc-jp:utf-8") {|f| assert_equal("a\nb\nc\n\u8535", f.read) }
  1424. open("t","rt:utf-8:euc-jp") {|f| assert_equal("a\nb\nc\n\xa1\xf1".force_encoding("EUC-JP"), f.read) }
  1425. open("t","rb:euc-jp:utf-8") {|f| assert_equal("a\rb\r\nc\n\u8535", f.read) }
  1426. open("t","rb:utf-8:euc-jp") {|f| assert_equal("a\rb\r\nc\n\xa1\xf1".force_encoding("EUC-JP"), f.read) }
  1427. open("t","rt:euc-jp:iso-2022-jp"){|f| assert_equal("a\nb\nc\n\e$B\x42\x22\e(B".force_encoding("ISO-2022-JP"), f.read) }
  1428. open("t","rt:utf-8:iso-2022-jp"){|f| assert_equal("a\nb\nc\n\e$B\x21\x71\e(B".force_encoding("ISO-2022-JP"), f.read) }
  1429. open("t","rt:euc-jp:utf-16be"){|f| assert_equal("\0a\0\n\0b\0\n\0c\0\n\x85\x35".force_encoding("UTF-16BE"), f.read) }
  1430. open("t","rt:utf-8:utf-16be"){|f| assert_equal("\0a\0\n\0b\0\n\0c\0\n\0\xa2".force_encoding("UTF-16BE"), f.read) }
  1431. open("t","rb:euc-jp:iso-2022-jp"){|f|assert_equal("a\rb\r\nc\n\e$B\x42\x22\e(B".force_encoding("ISO-2022-JP"),f.read)}
  1432. open("t","rb:utf-8:iso-2022-jp"){|f|assert_equal("a\rb\r\nc\n\e$B\x21\x71\e(B".force_encoding("ISO-2022-JP"),f.read)}
  1433. open("t","rb:euc-jp:utf-16be"){|f|assert_equal("\0a\0\r\0b\0\r\0\n\0c\0\n\x85\x35".force_encoding("UTF-16BE"),f.read)}
  1434. open("t","rb:utf-8:utf-16be"){|f|assert_equal("\0a\0\r\0b\0\r\0\n\0c\0\n\0\xa2".force_encoding("UTF-16BE"),f.read)}
  1435. open("ie","rt:iso-2022-jp:euc-jp"){|f| assert_equal("a\nb\nc\n\xc2\xa2".force_encoding("EUC-JP"), f.read) }
  1436. open("iu","rt:iso-2022-jp:utf-8"){|f| assert_equal("a\nb\nc\n\xc2\xa2".force_encoding("UTF-8"), f.read) }
  1437. open("be","rt:utf-16be:euc-jp"){|f| assert_equal("a\nb\nc\n\xc2\xa2".force_encoding("EUC-JP"), f.read) }
  1438. open("bu","rt:utf-16be:utf-8"){|f| assert_equal("a\nb\nc\n\xc2\xa2".force_encoding("UTF-8"), f.read) }
  1439. open("ie","rb:iso-2022-jp:euc-jp"){|f|assert_equal("a\rb\r\nc\n\xc2\xa2".force_encoding("EUC-JP"),f.read)}
  1440. open("iu","rb:iso-2022-jp:utf-8"){|f|assert_equal("a\rb\r\nc\n\xc2\xa2".force_encoding("UTF-8"),f.read)}
  1441. open("be","rb:utf-16be:euc-jp"){|f|assert_equal("a\rb\r\nc\n\xc2\xa2".force_encoding("EUC-JP"),f.read)}
  1442. open("bu","rb:utf-16be:utf-8"){|f|assert_equal("a\rb\r\nc\n\xc2\xa2".force_encoding("UTF-8"),f.read)}
  1443. open("ie","rt:iso-2022-jp:utf-16be"){|f|assert_equal("\0a\0\n\0b\0\n\0c\0\n\x85\x35".force_encoding("UTF-16BE"),f.read)}
  1444. open("be","rt:utf-16be:iso-2022-jp"){|f|assert_equal("a\nb\nc\n\e$B\x42\x22\e(B".force_encoding("ISO-2022-JP"),f.read)}
  1445. open("ie","rb:iso-2022-jp:utf-16be"){|f|assert_equal("\0a\0\r\0b\0\r\0\n\0c\0\n\x85\x35".force_encoding("UTF-16BE"),f.read)}
  1446. open("be","rb:utf-16be:iso-2022-jp"){|f|assert_equal("a\rb\r\nc\n\e$B\x42\x22\e(B".force_encoding("ISO-2022-JP"),f.read)}
  1447. }
  1448. end
  1449. def assert_write(expected, mode, *args)
  1450. with_tmpdir {
  1451. open("t", mode) {|f|
  1452. args.each {|arg| f.print arg }
  1453. }
  1454. content = File.read("t", :mode=>"rb:ascii-8bit")
  1455. assert_equal(expected.dup.force_encoding("ascii-8bit"),
  1456. content.force_encoding("ascii-8bit"))
  1457. }
  1458. end
  1459. def test_write_mode
  1460. # "\xc2\xa2" is valid as EUC-JP and UTF-8
  1461. # EUC-JP UTF-8 Unicode
  1462. # 0xC2A2 0xE894B5 U+8535
  1463. # 0xA1F1 0xC2A2 U+00A2
  1464. a = "a\rb\r\nc\n"
  1465. e = "\xc2\xa2".force_encoding("euc-jp")
  1466. u8 = "\xc2\xa2".force_encoding("utf-8")
  1467. u16 = "\x85\x35\0\r\x00\xa2\0\r\0\n\0\n".force_encoding("utf-16be")
  1468. i = "\e$B\x42\x22\e(B\r\e$B\x21\x71\e(B\r\n\n".force_encoding("iso-2022-jp")
  1469. n = system_newline
  1470. un = n.encode("utf-16be").force_encoding("ascii-8bit")
  1471. assert_write("a\rb\r#{n}c#{n}", "wt", a)
  1472. assert_write("\xc2\xa2", "wt", e)
  1473. assert_write("\xc2\xa2", "wt", u8)
  1474. assert_write("a\rb\r\nc\n", "wb", a)
  1475. assert_write("\xc2\xa2", "wb", e)
  1476. assert_write("\xc2\xa2", "wb", u8)
  1477. #assert_write("\x85\x35\0\r\x00\xa2\0\r\0\n\0\n", "wt", u16) should raise
  1478. #assert_write("\e$B\x42\x22\e(B\r\e$B\x21\x71\e(B\r\n\n", "wt", i) should raise
  1479. assert_write("\x85\x35\0\r\x00\xa2\0\r\0\n\0\n", "wb", u16)
  1480. assert_write("\e$B\x42\x22\e(B\r\e$B\x21\x71\e(B\r\n\n", "wb", i)
  1481. t_write_mode_enc
  1482. t_