PageRenderTime 26ms CodeModel.GetById 18ms RepoModel.GetById 0ms app.codeStats 0ms

/test/ruby/test_io_m17n.rb

http://github.com/ruby/ruby
Ruby | 2747 lines | 2499 code | 226 blank | 22 comment | 35 complexity | b98bec9202b52aed94529f8d651dee9a MD5 | raw file
Possible License(s): GPL-2.0, BSD-3-Clause, AGPL-3.0

Large files files are truncated, but you can click here to view the full file

  1. # coding: US-ASCII
  2. # frozen_string_literal: false
  3. require 'test/unit'
  4. require 'tmpdir'
  5. require 'tempfile'
  6. require 'timeout'
  7. class TestIO_M17N < Test::Unit::TestCase
  8. ENCS = [
  9. Encoding::ASCII_8BIT,
  10. Encoding::EUC_JP,
  11. Encoding::Shift_JIS,
  12. Encoding::UTF_8
  13. ]
  14. def with_tmpdir
  15. Dir.mktmpdir {|dir|
  16. Dir.chdir(dir) {
  17. yield dir
  18. }
  19. }
  20. end
  21. def pipe(*args, wp, rp)
  22. re, we = nil, nil
  23. kw = args.last.is_a?(Hash) ? args.pop : {}
  24. r, w = IO.pipe(*args, **kw)
  25. rt = Thread.new do
  26. begin
  27. rp.call(r)
  28. rescue Exception
  29. r.close
  30. re = $!
  31. end
  32. end
  33. wt = Thread.new do
  34. begin
  35. wp.call(w)
  36. rescue Exception
  37. w.close
  38. we = $!
  39. end
  40. end
  41. flunk("timeout") unless wt.join(10) && rt.join(10)
  42. ensure
  43. w.close unless !w || w.closed?
  44. r.close unless !r || r.closed?
  45. (wt.kill; wt.join) if wt
  46. (rt.kill; rt.join) if rt
  47. raise we if we
  48. raise re if re
  49. end
  50. def with_pipe(*args)
  51. r, w = IO.pipe(*args)
  52. begin
  53. yield r, w
  54. ensure
  55. r.close if !r.closed?
  56. w.close if !w.closed?
  57. end
  58. end
  59. def generate_file(path, content)
  60. open(path, "wb") {|f| f.write content }
  61. end
  62. def encdump(str)
  63. "#{str.dump}.force_encoding(#{str.encoding.name.dump})"
  64. end
  65. def assert_str_equal(expected, actual, message=nil)
  66. full_message = build_message(message, <<EOT)
  67. #{encdump expected} expected but not equal to
  68. #{encdump actual}.
  69. EOT
  70. assert_equal(expected, actual, full_message)
  71. end
  72. def test_open_r
  73. with_tmpdir {
  74. generate_file('tmp', "")
  75. open("tmp", "r") {|f|
  76. assert_equal(Encoding.default_external, f.external_encoding)
  77. assert_equal(nil, f.internal_encoding)
  78. }
  79. }
  80. end
  81. def test_open_rb
  82. with_tmpdir {
  83. generate_file('tmp', "")
  84. open("tmp", "rb") {|f|
  85. assert_equal(Encoding.find("ASCII-8BIT"), f.external_encoding)
  86. assert_equal(nil, f.internal_encoding)
  87. }
  88. }
  89. end
  90. def test_open_r_enc
  91. with_tmpdir {
  92. generate_file('tmp', "")
  93. open("tmp", "r:euc-jp") {|f|
  94. assert_equal(Encoding::EUC_JP, f.external_encoding)
  95. assert_equal(nil, f.internal_encoding)
  96. }
  97. }
  98. end
  99. def test_open_r_ascii8bit
  100. with_tmpdir {
  101. generate_file('tmp', "")
  102. EnvUtil.with_default_external(Encoding::ASCII_8BIT) do
  103. EnvUtil.with_default_internal(Encoding::UTF_8) do
  104. open("tmp", "r") {|f|
  105. assert_equal(Encoding::ASCII_8BIT, f.external_encoding)
  106. assert_equal(nil, f.internal_encoding)
  107. }
  108. open("tmp", "r:ascii-8bit") {|f|
  109. assert_equal(Encoding::ASCII_8BIT, f.external_encoding)
  110. assert_equal(nil, f.internal_encoding)
  111. }
  112. open("tmp", "r:ascii-8bit:utf-16") {|f|
  113. assert_equal(Encoding::ASCII_8BIT, f.external_encoding)
  114. assert_equal(nil, f.internal_encoding)
  115. }
  116. end
  117. EnvUtil.with_default_internal(nil) do
  118. open("tmp", "r") {|f|
  119. assert_equal(Encoding::ASCII_8BIT, f.external_encoding)
  120. assert_equal(nil, f.internal_encoding)
  121. }
  122. open("tmp", "r:ascii-8bit") {|f|
  123. assert_equal(Encoding::ASCII_8BIT, f.external_encoding)
  124. assert_equal(nil, f.internal_encoding)
  125. }
  126. open("tmp", "r:ascii-8bit:utf-16") {|f|
  127. assert_equal(Encoding::ASCII_8BIT, f.external_encoding)
  128. assert_equal(nil, f.internal_encoding)
  129. }
  130. end
  131. end
  132. }
  133. end
  134. def test_open_r_enc_in_opt
  135. with_tmpdir {
  136. generate_file('tmp', "")
  137. open("tmp", "r", encoding: "euc-jp") {|f|
  138. assert_equal(Encoding::EUC_JP, f.external_encoding)
  139. assert_equal(nil, f.internal_encoding)
  140. }
  141. }
  142. end
  143. def test_open_r_encname_in_opt
  144. with_tmpdir {
  145. generate_file('tmp', "")
  146. open("tmp", "r", encoding: Encoding::EUC_JP) {|f|
  147. assert_equal(Encoding::EUC_JP, f.external_encoding)
  148. assert_equal(nil, f.internal_encoding)
  149. }
  150. }
  151. end
  152. def test_open_r_ext_enc_in_opt
  153. with_tmpdir {
  154. generate_file('tmp', "")
  155. open("tmp", "r", external_encoding: Encoding::EUC_JP) {|f|
  156. assert_equal(Encoding::EUC_JP, f.external_encoding)
  157. assert_equal(nil, f.internal_encoding)
  158. }
  159. }
  160. end
  161. def test_open_r_ext_encname_in_opt
  162. with_tmpdir {
  163. generate_file('tmp', "")
  164. open("tmp", "r", external_encoding: "euc-jp") {|f|
  165. assert_equal(Encoding::EUC_JP, f.external_encoding)
  166. assert_equal(nil, f.internal_encoding)
  167. }
  168. }
  169. end
  170. def test_open_r_enc_enc
  171. with_tmpdir {
  172. generate_file('tmp', "")
  173. open("tmp", "r", external_encoding: Encoding::EUC_JP, internal_encoding: Encoding::UTF_8) {|f|
  174. assert_equal(Encoding::EUC_JP, f.external_encoding)
  175. assert_equal(Encoding::UTF_8, f.internal_encoding)
  176. }
  177. }
  178. end
  179. def test_open_r_encname_encname
  180. with_tmpdir {
  181. generate_file('tmp', "")
  182. open("tmp", "r:euc-jp:utf-8") {|f|
  183. assert_equal(Encoding::EUC_JP, f.external_encoding)
  184. assert_equal(Encoding::UTF_8, f.internal_encoding)
  185. }
  186. }
  187. end
  188. def test_open_r_encname_encname_in_opt
  189. with_tmpdir {
  190. generate_file('tmp', "")
  191. open("tmp", "r", encoding: "euc-jp:utf-8") {|f|
  192. assert_equal(Encoding::EUC_JP, f.external_encoding)
  193. assert_equal(Encoding::UTF_8, f.internal_encoding)
  194. }
  195. }
  196. end
  197. def test_open_r_enc_enc_in_opt
  198. with_tmpdir {
  199. generate_file('tmp', "")
  200. open("tmp", "r", external_encoding: Encoding::EUC_JP, internal_encoding: Encoding::UTF_8) {|f|
  201. assert_equal(Encoding::EUC_JP, f.external_encoding)
  202. assert_equal(Encoding::UTF_8, f.internal_encoding)
  203. }
  204. }
  205. end
  206. def test_open_r_externalencname_internalencname_in_opt
  207. with_tmpdir {
  208. generate_file('tmp', "")
  209. open("tmp", "r", external_encoding: "euc-jp", internal_encoding: "utf-8") {|f|
  210. assert_equal(Encoding::EUC_JP, f.external_encoding)
  211. assert_equal(Encoding::UTF_8, f.internal_encoding)
  212. }
  213. }
  214. end
  215. def test_open_w
  216. with_tmpdir {
  217. open("tmp", "w") {|f|
  218. assert_equal(nil, f.external_encoding)
  219. assert_equal(nil, f.internal_encoding)
  220. }
  221. }
  222. end
  223. def test_open_wb
  224. with_tmpdir {
  225. open("tmp", "wb") {|f|
  226. assert_equal(Encoding.find("ASCII-8BIT"), f.external_encoding)
  227. assert_equal(nil, f.internal_encoding)
  228. }
  229. }
  230. end
  231. def test_open_w_enc
  232. with_tmpdir {
  233. open("tmp", "w:euc-jp") {|f|
  234. assert_equal(Encoding::EUC_JP, f.external_encoding)
  235. assert_equal(nil, f.internal_encoding)
  236. }
  237. }
  238. end
  239. def test_open_w_enc_in_opt
  240. with_tmpdir {
  241. open("tmp", "w", encoding: "euc-jp") {|f|
  242. assert_equal(Encoding::EUC_JP, f.external_encoding)
  243. assert_equal(nil, f.internal_encoding)
  244. }
  245. }
  246. end
  247. def test_open_w_enc_in_opt2
  248. with_tmpdir {
  249. open("tmp", "w", external_encoding: "euc-jp") {|f|
  250. assert_equal(Encoding::EUC_JP, f.external_encoding)
  251. assert_equal(nil, f.internal_encoding)
  252. }
  253. }
  254. end
  255. def test_open_w_enc_enc
  256. with_tmpdir {
  257. open("tmp", "w:euc-jp:utf-8") {|f|
  258. assert_equal(Encoding::EUC_JP, f.external_encoding)
  259. assert_equal(Encoding::UTF_8, f.internal_encoding)
  260. }
  261. }
  262. end
  263. def test_open_w_enc_enc_in_opt
  264. with_tmpdir {
  265. open("tmp", "w", encoding: "euc-jp:utf-8") {|f|
  266. assert_equal(Encoding::EUC_JP, f.external_encoding)
  267. assert_equal(Encoding::UTF_8, f.internal_encoding)
  268. }
  269. }
  270. end
  271. def test_open_w_enc_enc_in_opt2
  272. with_tmpdir {
  273. open("tmp", "w", external_encoding: "euc-jp", internal_encoding: "utf-8") {|f|
  274. assert_equal(Encoding::EUC_JP, f.external_encoding)
  275. assert_equal(Encoding::UTF_8, f.internal_encoding)
  276. }
  277. }
  278. end
  279. def test_open_w_enc_enc_perm
  280. with_tmpdir {
  281. open("tmp", "w:euc-jp:utf-8", 0600) {|f|
  282. assert_equal(Encoding::EUC_JP, f.external_encoding)
  283. assert_equal(Encoding::UTF_8, f.internal_encoding)
  284. }
  285. }
  286. end
  287. def test_ignored_encoding_option
  288. enc = "\u{30a8 30f3 30b3 30fc 30c7 30a3 30f3 30b0}"
  289. pattern = /#{enc}/
  290. assert_warning(pattern) {
  291. open(IO::NULL, external_encoding: "us-ascii", encoding: enc) {}
  292. }
  293. assert_warning(pattern) {
  294. open(IO::NULL, internal_encoding: "us-ascii", encoding: enc) {}
  295. }
  296. end
  297. def test_io_new_enc
  298. with_tmpdir {
  299. generate_file("tmp", "\xa1")
  300. fd = IO.sysopen("tmp")
  301. f = IO.new(fd, "r:sjis")
  302. begin
  303. assert_equal(Encoding::Windows_31J, f.read.encoding)
  304. ensure
  305. f.close
  306. end
  307. }
  308. end
  309. def test_s_pipe_invalid
  310. pipe("utf-8", "euc-jp", { :invalid=>:replace },
  311. proc do |w|
  312. w << "\x80"
  313. w.close
  314. end,
  315. proc do |r|
  316. assert_equal("?", r.read)
  317. end)
  318. end
  319. def test_s_pipe_undef
  320. pipe("utf-8:euc-jp", { :undef=>:replace },
  321. proc do |w|
  322. w << "\ufffd"
  323. w.close
  324. end,
  325. proc do |r|
  326. assert_equal("?", r.read)
  327. end)
  328. end
  329. def test_s_pipe_undef_replace_string
  330. pipe("utf-8:euc-jp", { :undef=>:replace, :replace=>"X" },
  331. proc do |w|
  332. w << "\ufffd"
  333. w.close
  334. end,
  335. proc do |r|
  336. assert_equal("X", r.read)
  337. end)
  338. end
  339. def test_dup
  340. pipe("utf-8:euc-jp",
  341. proc do |w|
  342. w << "\u3042"
  343. w.close
  344. end,
  345. proc do |r|
  346. r2 = r.dup
  347. begin
  348. assert_equal("\xA4\xA2".force_encoding("euc-jp"), r2.read)
  349. ensure
  350. r2.close
  351. end
  352. end)
  353. end
  354. def test_dup_undef
  355. pipe("utf-8:euc-jp", { :undef=>:replace },
  356. proc do |w|
  357. w << "\uFFFD"
  358. w.close
  359. end,
  360. proc do |r|
  361. r2 = r.dup
  362. begin
  363. assert_equal("?", r2.read)
  364. ensure
  365. r2.close
  366. end
  367. end)
  368. end
  369. def test_stdin
  370. assert_equal(Encoding.default_external, STDIN.external_encoding)
  371. assert_equal(nil, STDIN.internal_encoding)
  372. end
  373. def test_stdout
  374. assert_equal(nil, STDOUT.external_encoding)
  375. assert_equal(nil, STDOUT.internal_encoding)
  376. end
  377. def test_stderr
  378. assert_equal(nil, STDERR.external_encoding)
  379. assert_equal(nil, STDERR.internal_encoding)
  380. end
  381. def test_terminator_conversion
  382. with_tmpdir {
  383. generate_file('tmp', "before \u00FF after")
  384. s = open("tmp", "r:utf-8:iso-8859-1") {|f|
  385. f.gets("\xFF".force_encoding("iso-8859-1"))
  386. }
  387. assert_equal(Encoding.find("iso-8859-1"), s.encoding)
  388. assert_str_equal("before \xFF".force_encoding("iso-8859-1"), s, '[ruby-core:14288]')
  389. }
  390. end
  391. def test_terminator_conversion2
  392. with_tmpdir {
  393. generate_file('tmp', "before \xA1\xA2\xA2\xA3 after")
  394. s = open("tmp", "r:euc-jp:utf-8") {|f|
  395. f.gets("\xA2\xA2".force_encoding("euc-jp").encode("utf-8"))
  396. }
  397. assert_equal(Encoding.find("utf-8"), s.encoding)
  398. assert_str_equal("before \xA1\xA2\xA2\xA3 after".force_encoding("euc-jp").encode("utf-8"), s, '[ruby-core:14319]')
  399. }
  400. end
  401. def test_terminator_stateful_conversion
  402. with_tmpdir {
  403. src = "before \e$B\x23\x30\x23\x31\e(B after".force_encoding("iso-2022-jp")
  404. generate_file('tmp', src)
  405. s = open("tmp", "r:iso-2022-jp:euc-jp") {|f|
  406. f.gets("0".force_encoding("euc-jp"))
  407. }
  408. assert_equal(Encoding.find("euc-jp"), s.encoding)
  409. assert_str_equal(src.encode("euc-jp"), s)
  410. }
  411. end
  412. def test_nonascii_terminator
  413. with_tmpdir {
  414. generate_file('tmp', "before \xA2\xA2 after")
  415. open("tmp", "r:euc-jp") {|f|
  416. assert_raise(ArgumentError) {
  417. f.gets("\xA2\xA2".force_encoding("utf-8"))
  418. }
  419. }
  420. }
  421. end
  422. def test_pipe_terminator_conversion
  423. rs = "\xA2\xA2".encode("utf-8", "euc-jp")
  424. pipe("euc-jp:utf-8",
  425. proc do |w|
  426. w.write "before \xa2\xa2 after"
  427. w.close
  428. end,
  429. proc do |r|
  430. Timeout.timeout(1) {
  431. assert_equal("before \xa2\xa2".encode("utf-8", "euc-jp"),
  432. r.gets(rs))
  433. }
  434. end)
  435. end
  436. def test_pipe_conversion
  437. pipe("euc-jp:utf-8",
  438. proc do |w|
  439. w.write "\xa1\xa1"
  440. end,
  441. proc do |r|
  442. assert_equal("\xa1\xa1".encode("utf-8", "euc-jp"), r.getc)
  443. end)
  444. end
  445. def test_pipe_convert_partial_read
  446. pipe("euc-jp:utf-8",
  447. proc do |w|
  448. w.write "\xa1"
  449. sleep 0.1
  450. w.write "\xa1"
  451. end,
  452. proc do |r|
  453. assert_equal("\xa1\xa1".encode("utf-8", "euc-jp"), r.getc)
  454. end)
  455. end
  456. def test_getc_invalid
  457. pipe("euc-jp:utf-8",
  458. proc do |w|
  459. w << "\xa1xyz"
  460. w.close
  461. end,
  462. proc do |r|
  463. err = assert_raise(Encoding::InvalidByteSequenceError) { r.getc }
  464. assert_equal("\xA1".force_encoding("ascii-8bit"), err.error_bytes)
  465. assert_equal("xyz", r.read(10))
  466. end)
  467. end
  468. def test_getc_stateful_conversion
  469. with_tmpdir {
  470. src = "\e$B\x23\x30\x23\x31\e(B".force_encoding("iso-2022-jp")
  471. generate_file('tmp', src)
  472. open("tmp", "r:iso-2022-jp:euc-jp") {|f|
  473. assert_equal("\xa3\xb0".force_encoding("euc-jp"), f.getc)
  474. assert_equal("\xa3\xb1".force_encoding("euc-jp"), f.getc)
  475. }
  476. }
  477. end
  478. def test_getc_newlineconv
  479. with_tmpdir {
  480. src = "\u3042"
  481. generate_file('tmp', src)
  482. EnvUtil.with_default_external(Encoding::UTF_8) do
  483. open("tmp", "rt") {|f|
  484. s = f.getc
  485. assert_equal(true, s.valid_encoding?)
  486. assert_equal("\u3042", s)
  487. }
  488. end
  489. }
  490. end
  491. def test_getc_newlineconv_invalid
  492. with_tmpdir {
  493. src = "\xE3\x81"
  494. generate_file('tmp', src)
  495. EnvUtil.with_default_external(Encoding::UTF_8) do
  496. open("tmp", "rt") {|f|
  497. s = f.getc
  498. assert_equal(false, s.valid_encoding?)
  499. assert_equal("\xE3".force_encoding("UTF-8"), s)
  500. s = f.getc
  501. assert_equal(false, s.valid_encoding?)
  502. assert_equal("\x81".force_encoding("UTF-8"), s)
  503. }
  504. end
  505. }
  506. end
  507. def test_ungetc_int
  508. with_tmpdir {
  509. generate_file('tmp', "A")
  510. s = open("tmp", "r:GB18030") {|f|
  511. f.ungetc(0x8431A439)
  512. f.read
  513. }
  514. assert_equal(Encoding::GB18030, s.encoding)
  515. assert_str_equal(0x8431A439.chr("GB18030")+"A", s)
  516. }
  517. end
  518. def test_ungetc_str
  519. with_tmpdir {
  520. generate_file('tmp', "A")
  521. s = open("tmp", "r:GB18030") {|f|
  522. f.ungetc(0x8431A439.chr("GB18030"))
  523. f.read
  524. }
  525. assert_equal(Encoding::GB18030, s.encoding)
  526. assert_str_equal(0x8431A439.chr("GB18030")+"A", s)
  527. }
  528. end
  529. def test_ungetc_stateful_conversion
  530. with_tmpdir {
  531. src = "before \e$B\x23\x30\x23\x31\e(B after".force_encoding("iso-2022-jp")
  532. generate_file('tmp', src)
  533. s = open("tmp", "r:iso-2022-jp:euc-jp") {|f|
  534. f.ungetc("0".force_encoding("euc-jp"))
  535. f.read
  536. }
  537. assert_equal(Encoding.find("euc-jp"), s.encoding)
  538. assert_str_equal("0" + src.encode("euc-jp"), s)
  539. }
  540. end
  541. def test_ungetc_stateful_conversion2
  542. with_tmpdir {
  543. src = "before \e$B\x23\x30\x23\x31\e(B after".force_encoding("iso-2022-jp")
  544. former = "before \e$B\x23\x30\e(B".force_encoding("iso-2022-jp")
  545. rs = "\e$B\x23\x30\e(B".force_encoding("iso-2022-jp")
  546. latter = "\e$B\x23\x31\e(B after".force_encoding("iso-2022-jp")
  547. generate_file('tmp', src)
  548. s = open("tmp", "r:iso-2022-jp:euc-jp") {|f|
  549. assert_equal(former.encode("euc-jp", "iso-2022-jp"),
  550. f.gets(rs.encode("euc-jp", "iso-2022-jp")))
  551. f.ungetc("0")
  552. f.read
  553. }
  554. assert_equal(Encoding.find("euc-jp"), s.encoding)
  555. assert_str_equal("0" + latter.encode("euc-jp"), s)
  556. }
  557. end
  558. def test_open_ascii
  559. with_tmpdir {
  560. src = "abc\n"
  561. generate_file('tmp', "abc\n")
  562. ENCS.each {|enc|
  563. s = open('tmp', "r:#{enc}") {|f| f.gets }
  564. assert_equal(enc, s.encoding)
  565. assert_str_equal(src, s)
  566. }
  567. }
  568. end
  569. def test_open_nonascii
  570. with_tmpdir {
  571. src = "\xc2\xa1\n"
  572. generate_file('tmp', src)
  573. ENCS.each {|enc|
  574. content = src.dup.force_encoding(enc)
  575. s = open('tmp', "r:#{enc}") {|f| f.gets }
  576. assert_equal(enc, s.encoding)
  577. assert_str_equal(content, s)
  578. }
  579. }
  580. end
  581. def test_read_encoding
  582. with_tmpdir {
  583. src = "\xc2\xa1\n".force_encoding("ASCII-8BIT")
  584. generate_file('tmp', "\xc2\xa1\n")
  585. ENCS.each {|enc|
  586. content = src.dup.force_encoding(enc)
  587. open('tmp', "r:#{enc}") {|f|
  588. s = f.getc
  589. assert_equal(enc, s.encoding)
  590. assert_str_equal(content[0], s)
  591. }
  592. open('tmp', "r:#{enc}") {|f|
  593. s = f.readchar
  594. assert_equal(enc, s.encoding)
  595. assert_str_equal(content[0], s)
  596. }
  597. open('tmp', "r:#{enc}") {|f|
  598. s = f.gets
  599. assert_equal(enc, s.encoding)
  600. assert_str_equal(content, s)
  601. }
  602. open('tmp', "r:#{enc}") {|f|
  603. s = f.readline
  604. assert_equal(enc, s.encoding)
  605. assert_str_equal(content, s)
  606. }
  607. open('tmp', "r:#{enc}") {|f|
  608. lines = f.readlines
  609. assert_equal(1, lines.length)
  610. s = lines[0]
  611. assert_equal(enc, s.encoding)
  612. assert_str_equal(content, s)
  613. }
  614. open('tmp', "r:#{enc}") {|f|
  615. f.each_line {|s|
  616. assert_equal(enc, s.encoding)
  617. assert_str_equal(content, s)
  618. }
  619. }
  620. open('tmp', "r:#{enc}") {|f|
  621. s = f.read
  622. assert_equal(enc, s.encoding)
  623. assert_str_equal(content, s)
  624. }
  625. open('tmp', "r:#{enc}") {|f|
  626. s = f.read(1)
  627. assert_equal(Encoding::ASCII_8BIT, s.encoding)
  628. assert_str_equal(src[0], s)
  629. }
  630. open('tmp', "r:#{enc}") {|f|
  631. s = f.readpartial(1)
  632. assert_equal(Encoding::ASCII_8BIT, s.encoding)
  633. assert_str_equal(src[0], s)
  634. }
  635. open('tmp', "r:#{enc}") {|f|
  636. s = f.sysread(1)
  637. assert_equal(Encoding::ASCII_8BIT, s.encoding)
  638. assert_str_equal(src[0], s)
  639. }
  640. }
  641. }
  642. end
  643. def test_write_noenc
  644. src = "\xc2\xa1\n".force_encoding("ascii-8bit")
  645. with_tmpdir {
  646. open('tmp', "w") {|f|
  647. ENCS.each {|enc|
  648. f.write src.dup.force_encoding(enc)
  649. }
  650. }
  651. open('tmp', 'r:ascii-8bit') {|f|
  652. assert_equal(src*ENCS.length, f.read)
  653. }
  654. }
  655. end
  656. def test_write_conversion
  657. utf8 = "\u6666"
  658. eucjp = "\xb3\xa2".force_encoding("EUC-JP")
  659. with_tmpdir {
  660. open('tmp', "w:EUC-JP") {|f|
  661. assert_equal(Encoding::EUC_JP, f.external_encoding)
  662. assert_equal(nil, f.internal_encoding)
  663. f.print utf8
  664. }
  665. assert_equal(eucjp, File.read('tmp').force_encoding("EUC-JP"))
  666. open('tmp', 'r:EUC-JP:UTF-8') {|f|
  667. assert_equal(Encoding::EUC_JP, f.external_encoding)
  668. assert_equal(Encoding::UTF_8, f.internal_encoding)
  669. assert_equal(utf8, f.read)
  670. }
  671. }
  672. end
  673. def test_pipe
  674. utf8 = "\u6666"
  675. eucjp = "\xb3\xa2".force_encoding("EUC-JP")
  676. pipe(proc do |w|
  677. w << utf8
  678. w.close
  679. end, proc do |r|
  680. assert_equal(Encoding.default_external, r.external_encoding)
  681. assert_equal(nil, r.internal_encoding)
  682. s = r.read
  683. assert_equal(Encoding.default_external, s.encoding)
  684. assert_str_equal(utf8.dup.force_encoding(Encoding.default_external), s)
  685. end)
  686. pipe("EUC-JP",
  687. proc do |w|
  688. w << eucjp
  689. w.close
  690. end,
  691. proc do |r|
  692. assert_equal(Encoding::EUC_JP, r.external_encoding)
  693. assert_equal(nil, r.internal_encoding)
  694. assert_equal(eucjp, r.read)
  695. end)
  696. pipe("UTF-8",
  697. proc do |w|
  698. w << "a" * 1023 + "\u3042" + "a" * 1022
  699. w.close
  700. end,
  701. proc do |r|
  702. assert_equal(true, r.read.valid_encoding?)
  703. end)
  704. pipe("UTF-8:EUC-JP",
  705. proc do |w|
  706. w << utf8
  707. w.close
  708. end,
  709. proc do |r|
  710. assert_equal(Encoding::UTF_8, r.external_encoding)
  711. assert_equal(Encoding::EUC_JP, r.internal_encoding)
  712. assert_equal(eucjp, r.read)
  713. end)
  714. assert_raise_with_message(ArgumentError, /invalid name encoding/) do
  715. with_pipe("UTF-8", "UTF-8".encode("UTF-32BE")) {}
  716. end
  717. assert_raise_with_message(ArgumentError, /invalid name encoding/) do
  718. with_pipe("UTF-8".encode("UTF-32BE")) {}
  719. end
  720. ENCS.each {|enc|
  721. pipe(enc,
  722. proc do |w|
  723. w << "\xc2\xa1"
  724. w.close
  725. end,
  726. proc do |r|
  727. s = r.getc
  728. assert_equal(enc, s.encoding)
  729. end)
  730. }
  731. ENCS.each {|enc|
  732. next if enc == Encoding::ASCII_8BIT
  733. next if enc == Encoding::UTF_8
  734. pipe("#{enc}:UTF-8",
  735. proc do |w|
  736. w << "\xc2\xa1"
  737. w.close
  738. end,
  739. proc do |r|
  740. s = r.read
  741. assert_equal(Encoding::UTF_8, s.encoding)
  742. assert_equal(s.encode("UTF-8"), s)
  743. end)
  744. }
  745. end
  746. def test_marshal
  747. data = 56225
  748. pipe("EUC-JP",
  749. proc do |w|
  750. Marshal.dump(data, w)
  751. w.close
  752. end,
  753. proc do |r|
  754. result = nil
  755. assert_nothing_raised("[ruby-dev:33264]") { result = Marshal.load(r) }
  756. assert_equal(data, result)
  757. end)
  758. end
  759. def test_gets_nil
  760. pipe("UTF-8:EUC-JP",
  761. proc do |w|
  762. w << "\u{3042}"
  763. w.close
  764. end,
  765. proc do |r|
  766. result = r.gets(nil)
  767. assert_equal("\u{3042}".encode("euc-jp"), result)
  768. end)
  769. end
  770. def test_gets_limit
  771. pipe("euc-jp",
  772. proc {|w| w << "\xa4\xa2\xa4\xa4\xa4\xa6\n\xa4\xa8\xa4\xaa"; w.close },
  773. proc {|r| assert_equal("\xa4\xa2".force_encoding("euc-jp"), r.gets(1)) })
  774. pipe("euc-jp",
  775. proc {|w| w << "\xa4\xa2\xa4\xa4\xa4\xa6\n\xa4\xa8\xa4\xaa"; w.close },
  776. proc {|r| assert_equal("\xa4\xa2".force_encoding("euc-jp"), r.gets(2)) })
  777. pipe("euc-jp",
  778. proc {|w| w << "\xa4\xa2\xa4\xa4\xa4\xa6\n\xa4\xa8\xa4\xaa"; w.close },
  779. proc {|r| assert_equal("\xa4\xa2\xa4\xa4".force_encoding("euc-jp"), r.gets(3)) })
  780. pipe("euc-jp",
  781. proc {|w| w << "\xa4\xa2\xa4\xa4\xa4\xa6\n\xa4\xa8\xa4\xaa"; w.close },
  782. proc {|r| assert_equal("\xa4\xa2\xa4\xa4".force_encoding("euc-jp"), r.gets(4)) })
  783. pipe("euc-jp",
  784. proc {|w| w << "\xa4\xa2\xa4\xa4\xa4\xa6\n\xa4\xa8\xa4\xaa"; w.close },
  785. proc {|r| assert_equal("\xa4\xa2\xa4\xa4\xa4\xa6".force_encoding("euc-jp"), r.gets(5)) })
  786. pipe("euc-jp",
  787. proc {|w| w << "\xa4\xa2\xa4\xa4\xa4\xa6\n\xa4\xa8\xa4\xaa"; w.close },
  788. proc {|r| assert_equal("\xa4\xa2\xa4\xa4\xa4\xa6".force_encoding("euc-jp"), r.gets(6)) })
  789. pipe("euc-jp",
  790. proc {|w| w << "\xa4\xa2\xa4\xa4\xa4\xa6\n\xa4\xa8\xa4\xaa"; w.close },
  791. proc {|r| assert_equal("\xa4\xa2\xa4\xa4\xa4\xa6\n".force_encoding("euc-jp"), r.gets(7)) })
  792. pipe("euc-jp",
  793. proc {|w| w << "\xa4\xa2\xa4\xa4\xa4\xa6\n\xa4\xa8\xa4\xaa"; w.close },
  794. proc {|r| assert_equal("\xa4\xa2\xa4\xa4\xa4\xa6\n".force_encoding("euc-jp"), r.gets(8)) })
  795. pipe("euc-jp",
  796. proc {|w| w << "\xa4\xa2\xa4\xa4\xa4\xa6\n\xa4\xa8\xa4\xaa"; w.close },
  797. proc {|r| assert_equal("\xa4\xa2\xa4\xa4\xa4\xa6\n".force_encoding("euc-jp"), r.gets(9)) })
  798. end
  799. def test_gets_invalid
  800. before = "\u{3042}\u{3044}"
  801. invalid = "\x80".force_encoding("utf-8")
  802. after = "\u{3046}\u{3048}"
  803. pipe("utf-8:euc-jp",
  804. proc do |w|
  805. w << before + invalid + after
  806. w.close
  807. end,
  808. proc do |r|
  809. err = assert_raise(Encoding::InvalidByteSequenceError) { r.gets }
  810. assert_equal(invalid.force_encoding("ascii-8bit"), err.error_bytes)
  811. assert_equal(after.encode("euc-jp"), r.gets)
  812. end)
  813. end
  814. def test_getc_invalid2
  815. before1 = "\u{3042}"
  816. before2 = "\u{3044}"
  817. invalid = "\x80".force_encoding("utf-8")
  818. after1 = "\u{3046}"
  819. after2 = "\u{3048}"
  820. pipe("utf-8:euc-jp",
  821. proc do |w|
  822. w << before1 + before2 + invalid + after1 + after2
  823. w.close
  824. end,
  825. proc do |r|
  826. assert_equal(before1.encode("euc-jp"), r.getc)
  827. assert_equal(before2.encode("euc-jp"), r.getc)
  828. err = assert_raise(Encoding::InvalidByteSequenceError) { r.getc }
  829. assert_equal(invalid.force_encoding("ascii-8bit"), err.error_bytes)
  830. assert_equal(after1.encode("euc-jp"), r.getc)
  831. assert_equal(after2.encode("euc-jp"), r.getc)
  832. end)
  833. end
  834. def test_getc_invalid3
  835. before1 = "\x42\x30".force_encoding("utf-16le")
  836. before2 = "\x44\x30".force_encoding("utf-16le")
  837. invalid = "\x00\xd8".force_encoding("utf-16le")
  838. after1 = "\x46\x30".force_encoding("utf-16le")
  839. after2 = "\x48\x30".force_encoding("utf-16le")
  840. pipe("utf-16le:euc-jp", { :binmode => true },
  841. proc do |w|
  842. w << before1 + before2 + invalid + after1 + after2
  843. w.close
  844. end,
  845. proc do |r|
  846. assert_equal(before1.encode("euc-jp"), r.getc)
  847. assert_equal(before2.encode("euc-jp"), r.getc)
  848. err = assert_raise(Encoding::InvalidByteSequenceError) { r.getc }
  849. assert_equal(invalid.force_encoding("ascii-8bit"), err.error_bytes)
  850. assert_equal(after1.encode("euc-jp"), r.getc)
  851. assert_equal(after2.encode("euc-jp"), r.getc)
  852. end)
  853. end
  854. def test_read_all
  855. str = "\u3042\u3044"
  856. pipe("utf-8:euc-jp",
  857. proc do |w|
  858. w << str
  859. w.close
  860. end,
  861. proc do |r|
  862. assert_equal(str.encode("euc-jp"), r.read)
  863. end)
  864. end
  865. def test_read_all_invalid
  866. before = "\u{3042}\u{3044}"
  867. invalid = "\x80".force_encoding("utf-8")
  868. after = "\u{3046}\u{3048}"
  869. pipe("utf-8:euc-jp",
  870. proc do |w|
  871. w << before + invalid + after
  872. w.close
  873. end,
  874. proc do |r|
  875. err = assert_raise(Encoding::InvalidByteSequenceError) { r.read }
  876. assert_equal(invalid.force_encoding("ascii-8bit"), err.error_bytes)
  877. assert_equal(after.encode("euc-jp"), r.read)
  878. end)
  879. end
  880. def test_file_foreach
  881. with_tmpdir {
  882. generate_file('tst', 'a' * 8191 + "\xa1\xa1")
  883. assert_nothing_raised {
  884. File.foreach('tst', :encoding=>"euc-jp") {|line| line.inspect }
  885. }
  886. }
  887. end
  888. def test_set_encoding
  889. pipe("utf-8:euc-jp",
  890. proc do |w|
  891. s = "\u3042".force_encoding("ascii-8bit")
  892. s << "\x82\xa0".force_encoding("ascii-8bit")
  893. w << s
  894. w.close
  895. end,
  896. proc do |r|
  897. assert_equal("\xa4\xa2".force_encoding("euc-jp"), r.getc)
  898. r.set_encoding("shift_jis:euc-jp")
  899. assert_equal("\xa4\xa2".force_encoding("euc-jp"), r.getc)
  900. end)
  901. end
  902. def test_set_encoding2
  903. pipe("utf-8:euc-jp",
  904. proc do |w|
  905. s = "\u3042".force_encoding("ascii-8bit")
  906. s << "\x82\xa0".force_encoding("ascii-8bit")
  907. w << s
  908. w.close
  909. end,
  910. proc do |r|
  911. assert_equal("\xa4\xa2".force_encoding("euc-jp"), r.getc)
  912. r.set_encoding("shift_jis", "euc-jp")
  913. assert_equal("\xa4\xa2".force_encoding("euc-jp"), r.getc)
  914. end)
  915. end
  916. def test_set_encoding_nil
  917. pipe("utf-8:euc-jp",
  918. proc do |w|
  919. s = "\u3042".force_encoding("ascii-8bit")
  920. s << "\x82\xa0".force_encoding("ascii-8bit")
  921. w << s
  922. w.close
  923. end,
  924. proc do |r|
  925. assert_equal("\xa4\xa2".force_encoding("euc-jp"), r.getc)
  926. r.set_encoding(nil)
  927. assert_equal("\x82\xa0".force_encoding(Encoding.default_external), r.read)
  928. end)
  929. end
  930. def test_set_encoding_enc
  931. pipe("utf-8:euc-jp",
  932. proc do |w|
  933. s = "\u3042".force_encoding("ascii-8bit")
  934. s << "\x82\xa0".force_encoding("ascii-8bit")
  935. w << s
  936. w.close
  937. end,
  938. proc do |r|
  939. assert_equal("\xa4\xa2".force_encoding("euc-jp"), r.getc)
  940. r.set_encoding(Encoding::Shift_JIS)
  941. assert_equal("\x82\xa0".force_encoding(Encoding::Shift_JIS), r.getc)
  942. end)
  943. end
  944. def test_set_encoding_invalid
  945. pipe(proc do |w|
  946. w << "\x80"
  947. w.close
  948. end,
  949. proc do |r|
  950. r.set_encoding("utf-8:euc-jp", :invalid=>:replace)
  951. assert_equal("?", r.read)
  952. end)
  953. end
  954. def test_set_encoding_identical
  955. #bug5568 = '[ruby-core:40727]'
  956. bug6324 = '[ruby-core:44455]'
  957. open(__FILE__, "r") do |f|
  958. assert_warning('', bug6324) {
  959. f.set_encoding("eucjp:euc-jp")
  960. }
  961. assert_warning('', bug6324) {
  962. f.set_encoding("eucjp", "euc-jp")
  963. }
  964. assert_warning('', bug6324) {
  965. f.set_encoding(Encoding::EUC_JP, "euc-jp")
  966. }
  967. assert_warning('', bug6324) {
  968. f.set_encoding("eucjp", Encoding::EUC_JP)
  969. }
  970. assert_warning('', bug6324) {
  971. f.set_encoding(Encoding::EUC_JP, Encoding::EUC_JP)
  972. }
  973. nonstr = Object.new
  974. def nonstr.to_str; "eucjp"; end
  975. assert_warning('', bug6324) {
  976. f.set_encoding(nonstr, nonstr)
  977. }
  978. end
  979. end
  980. def test_set_encoding_undef
  981. pipe(proc do |w|
  982. w << "\ufffd"
  983. w.close
  984. end,
  985. proc do |r|
  986. r.set_encoding("utf-8", "euc-jp", :undef=>:replace)
  987. assert_equal("?", r.read)
  988. end)
  989. end
  990. def test_set_encoding_undef_replace
  991. pipe(proc do |w|
  992. w << "\ufffd"
  993. w.close
  994. end,
  995. proc do |r|
  996. r.set_encoding("utf-8", "euc-jp", :undef=>:replace, :replace=>"ZZZ")
  997. assert_equal("ZZZ", r.read)
  998. end)
  999. pipe(proc do |w|
  1000. w << "\ufffd"
  1001. w.close
  1002. end,
  1003. proc do |r|
  1004. r.set_encoding("utf-8:euc-jp", :undef=>:replace, :replace=>"ZZZ")
  1005. assert_equal("ZZZ", r.read)
  1006. end)
  1007. end
  1008. def test_set_encoding_binmode
  1009. assert_raise(ArgumentError) {
  1010. open(__FILE__, "rt") {|f|
  1011. f.set_encoding("iso-2022-jp")
  1012. }
  1013. }
  1014. assert_raise(ArgumentError) {
  1015. open(__FILE__, "r") {|f|
  1016. f.set_encoding("iso-2022-jp")
  1017. }
  1018. }
  1019. assert_nothing_raised {
  1020. open(__FILE__, "rb") {|f|
  1021. f.set_encoding("iso-2022-jp")
  1022. }
  1023. }
  1024. assert_nothing_raised {
  1025. open(__FILE__, "r") {|f|
  1026. f.binmode
  1027. f.set_encoding("iso-2022-jp")
  1028. }
  1029. }
  1030. assert_nothing_raised {
  1031. open(__FILE__, "rt") {|f|
  1032. f.binmode
  1033. f.set_encoding("iso-2022-jp")
  1034. }
  1035. }
  1036. assert_nothing_raised {
  1037. open(__FILE__, "r", binmode: true) {|f|
  1038. assert_equal(Encoding::ASCII_8BIT, f.external_encoding)
  1039. f.set_encoding("iso-2022-jp")
  1040. }
  1041. }
  1042. assert_raise(ArgumentError) {
  1043. open(__FILE__, "rb", binmode: true) {|f|
  1044. f.set_encoding("iso-2022-jp")
  1045. }
  1046. }
  1047. assert_raise(ArgumentError) {
  1048. open(__FILE__, "rb", binmode: false) {|f|
  1049. f.set_encoding("iso-2022-jp")
  1050. }
  1051. }
  1052. end
  1053. def test_set_encoding_unsupported
  1054. bug5567 = '[ruby-core:40726]'
  1055. IO.pipe do |r, w|
  1056. assert_nothing_raised(bug5567) do
  1057. assert_warning(/Unsupported/, bug5567) {r.set_encoding("fffffffffffxx")}
  1058. assert_warning(/Unsupported/, bug5567) {r.set_encoding("fffffffffffxx", "us-ascii")}
  1059. assert_warning(/Unsupported/, bug5567) {r.set_encoding("us-ascii", "fffffffffffxx")}
  1060. end
  1061. end
  1062. end
  1063. def test_textmode_twice
  1064. assert_raise(ArgumentError) {
  1065. open(__FILE__, "rt", textmode: true) {|f|
  1066. f.set_encoding("iso-2022-jp")
  1067. }
  1068. }
  1069. assert_raise(ArgumentError) {
  1070. open(__FILE__, "rt", textmode: false) {|f|
  1071. f.set_encoding("iso-2022-jp")
  1072. }
  1073. }
  1074. end
  1075. def test_write_conversion_fixenc
  1076. pipe(proc do |w|
  1077. w.set_encoding("iso-2022-jp:utf-8")
  1078. w << "\u3042"
  1079. w << "\u3044"
  1080. w.close
  1081. end,
  1082. proc do |r|
  1083. assert_equal("\e$B$\"$$\e(B".force_encoding("ascii-8bit"),
  1084. r.read.force_encoding("ascii-8bit"))
  1085. end)
  1086. end
  1087. def test_write_conversion_anyenc_stateful
  1088. pipe(proc do |w|
  1089. w.set_encoding("iso-2022-jp")
  1090. w << "\u3042"
  1091. w << "\x82\xa2".force_encoding("sjis")
  1092. w.close
  1093. end,
  1094. proc do |r|
  1095. assert_equal("\e$B$\"$$\e(B".force_encoding("ascii-8bit"),
  1096. r.read.force_encoding("ascii-8bit"))
  1097. end)
  1098. end
  1099. def test_write_conversion_anyenc_stateless
  1100. pipe(proc do |w|
  1101. w.set_encoding("euc-jp")
  1102. w << "\u3042"
  1103. w << "\x82\xa2".force_encoding("sjis")
  1104. w.close
  1105. end,
  1106. proc do |r|
  1107. assert_equal("\xa4\xa2\xa4\xa4".force_encoding("ascii-8bit"),
  1108. r.read.force_encoding("ascii-8bit"))
  1109. end)
  1110. end
  1111. def test_write_conversion_anyenc_stateful_nosync
  1112. pipe(proc do |w|
  1113. w.sync = false
  1114. w.set_encoding("iso-2022-jp")
  1115. w << "\u3042"
  1116. w << "\x82\xa2".force_encoding("sjis")
  1117. w.close
  1118. end,
  1119. proc do |r|
  1120. assert_equal("\e$B$\"$$\e(B".force_encoding("ascii-8bit"),
  1121. r.read.force_encoding("ascii-8bit"))
  1122. end)
  1123. end
  1124. def test_read_stateful
  1125. pipe("euc-jp:iso-2022-jp",
  1126. proc do |w|
  1127. w << "\xA4\xA2"
  1128. w.close
  1129. end,
  1130. proc do |r|
  1131. assert_equal("\e$B$\"\e(B".force_encoding("iso-2022-jp"), r.read)
  1132. end)
  1133. end
  1134. def test_stdin_external_encoding_with_reopen
  1135. with_tmpdir {
  1136. open("tst", "w+") {|f|
  1137. pid = spawn(EnvUtil.rubybin, '-e', <<-'End', 10=>f)
  1138. io = IO.new(10, "r+")
  1139. STDIN.reopen(io)
  1140. STDIN.external_encoding
  1141. STDIN.write "\u3042"
  1142. STDIN.flush
  1143. End
  1144. Process.wait pid
  1145. f.rewind
  1146. result = f.read.force_encoding("ascii-8bit")
  1147. assert_equal("\u3042".force_encoding("ascii-8bit"), result)
  1148. }
  1149. }
  1150. end unless /mswin|mingw/ =~ RUBY_PLATFORM # passing non-stdio fds is not supported
  1151. def test_popen_r_enc
  1152. IO.popen("#{EnvUtil.rubybin} -e 'putc 255'", "r:ascii-8bit") {|f|
  1153. assert_equal(Encoding::ASCII_8BIT, f.external_encoding)
  1154. assert_equal(nil, f.internal_encoding)
  1155. s = f.read
  1156. assert_equal(Encoding::ASCII_8BIT, s.encoding)
  1157. assert_equal("\xff".force_encoding("ascii-8bit"), s)
  1158. }
  1159. end
  1160. def test_popen_r_enc_in_opt
  1161. IO.popen("#{EnvUtil.rubybin} -e 'putc 255'", "r", encoding: "ascii-8bit") {|f|
  1162. assert_equal(Encoding::ASCII_8BIT, f.external_encoding)
  1163. assert_equal(nil, f.internal_encoding)
  1164. s = f.read
  1165. assert_equal(Encoding::ASCII_8BIT, s.encoding)
  1166. assert_equal("\xff".force_encoding("ascii-8bit"), s)
  1167. }
  1168. end
  1169. def test_popen_r_enc_in_opt2
  1170. IO.popen("#{EnvUtil.rubybin} -e 'putc 255'", "r", external_encoding: "ascii-8bit") {|f|
  1171. assert_equal(Encoding::ASCII_8BIT, f.external_encoding)
  1172. assert_equal(nil, f.internal_encoding)
  1173. s = f.read
  1174. assert_equal(Encoding::ASCII_8BIT, s.encoding)
  1175. assert_equal("\xff".force_encoding("ascii-8bit"), s)
  1176. }
  1177. end
  1178. def test_popen_r_enc_enc
  1179. IO.popen("#{EnvUtil.rubybin} -e 'putc 0xa1'", "r:shift_jis:euc-jp") {|f|
  1180. assert_equal(Encoding::Shift_JIS, f.external_encoding)
  1181. assert_equal(Encoding::EUC_JP, f.internal_encoding)
  1182. s = f.read
  1183. assert_equal(Encoding::EUC_JP, s.encoding)
  1184. assert_equal("\x8e\xa1".force_encoding("euc-jp"), s)
  1185. }
  1186. end
  1187. def test_popen_r_enc_enc_in_opt
  1188. IO.popen("#{EnvUtil.rubybin} -e 'putc 0xa1'", "r", encoding: "shift_jis:euc-jp") {|f|
  1189. assert_equal(Encoding::Shift_JIS, f.external_encoding)
  1190. assert_equal(Encoding::EUC_JP, f.internal_encoding)
  1191. s = f.read
  1192. assert_equal(Encoding::EUC_JP, s.encoding)
  1193. assert_equal("\x8e\xa1".force_encoding("euc-jp"), s)
  1194. }
  1195. end
  1196. def test_popen_r_enc_enc_in_opt2
  1197. IO.popen("#{EnvUtil.rubybin} -e 'putc 0xa1'", "r", external_encoding: "shift_jis", internal_encoding: "euc-jp") {|f|
  1198. assert_equal(Encoding::Shift_JIS, f.external_encoding)
  1199. assert_equal(Encoding::EUC_JP, f.internal_encoding)
  1200. s = f.read
  1201. assert_equal(Encoding::EUC_JP, s.encoding)
  1202. assert_equal("\x8e\xa1".force_encoding("euc-jp"), s)
  1203. }
  1204. end
  1205. def test_popenv_r_enc_enc_in_opt2
  1206. IO.popen([EnvUtil.rubybin, "-e", "putc 0xa1"], "r", external_encoding: "shift_jis", internal_encoding: "euc-jp") {|f|
  1207. assert_equal(Encoding::Shift_JIS, f.external_encoding)
  1208. assert_equal(Encoding::EUC_JP, f.internal_encoding)
  1209. s = f.read
  1210. assert_equal(Encoding::EUC_JP, s.encoding)
  1211. assert_equal("\x8e\xa1".force_encoding("euc-jp"), s)
  1212. }
  1213. end
  1214. def test_open_pipe_r_enc
  1215. open("|#{EnvUtil.rubybin} -e 'putc 255'", "r:ascii-8bit") {|f|
  1216. assert_equal(Encoding::ASCII_8BIT, f.external_encoding)
  1217. assert_equal(nil, f.internal_encoding)
  1218. s = f.read
  1219. assert_equal(Encoding::ASCII_8BIT, s.encoding)
  1220. assert_equal("\xff".force_encoding("ascii-8bit"), s)
  1221. }
  1222. end
  1223. def test_open_pipe_r_enc2
  1224. open("|#{EnvUtil.rubybin} -e 'putc \"\\u3042\"'", "r:UTF-8") {|f|
  1225. assert_equal(Encoding::UTF_8, f.external_encoding)
  1226. assert_equal(nil, f.internal_encoding)
  1227. s = f.read
  1228. assert_equal(Encoding::UTF_8, s.encoding)
  1229. assert_equal("\u3042", s)
  1230. }
  1231. end
  1232. def test_s_foreach_enc
  1233. with_tmpdir {
  1234. generate_file("t", "\xff")
  1235. IO.foreach("t", :mode => "r:ascii-8bit") {|s|
  1236. assert_equal(Encoding::ASCII_8BIT, s.encoding)
  1237. assert_equal("\xff".force_encoding("ascii-8bit"), s)
  1238. }
  1239. }
  1240. end
  1241. def test_s_foreach_enc_in_opt
  1242. with_tmpdir {
  1243. generate_file("t", "\xff")
  1244. IO.foreach("t", :encoding => "ascii-8bit") {|s|
  1245. assert_equal(Encoding::ASCII_8BIT, s.encoding)
  1246. assert_equal("\xff".force_encoding("ascii-8bit"), s)
  1247. }
  1248. }
  1249. end
  1250. def test_s_foreach_enc_in_opt2
  1251. with_tmpdir {
  1252. generate_file("t", "\xff")
  1253. IO.foreach("t", :external_encoding => "ascii-8bit") {|s|
  1254. assert_equal(Encoding::ASCII_8BIT, s.encoding)
  1255. assert_equal("\xff".force_encoding("ascii-8bit"), s)
  1256. }
  1257. }
  1258. end
  1259. def test_s_foreach_enc_enc
  1260. with_tmpdir {
  1261. generate_file("t", "\u3042")
  1262. IO.foreach("t", :mode => "r:utf-8:euc-jp") {|s|
  1263. assert_equal(Encoding::EUC_JP, s.encoding)
  1264. assert_equal("\xa4\xa2".force_encoding("euc-jp"), s)
  1265. }
  1266. }
  1267. end
  1268. def test_s_foreach_enc_enc_in_opt
  1269. with_tmpdir {
  1270. generate_file("t", "\u3042")
  1271. IO.foreach("t", :mode => "r", :encoding => "utf-8:euc-jp") {|s|
  1272. assert_equal(Encoding::EUC_JP, s.encoding)
  1273. assert_equal("\xa4\xa2".force_encoding("euc-jp"), s)
  1274. }
  1275. }
  1276. end
  1277. def test_s_foreach_enc_enc_in_opt2
  1278. with_tmpdir {
  1279. generate_file("t", "\u3042")
  1280. IO.foreach("t", :mode => "r", :external_encoding => "utf-8", :internal_encoding => "euc-jp") {|s|
  1281. assert_equal(Encoding::EUC_JP, s.encoding)
  1282. assert_equal("\xa4\xa2".force_encoding("euc-jp"), s)
  1283. }
  1284. }
  1285. end
  1286. def test_s_foreach_open_args_enc
  1287. with_tmpdir {
  1288. generate_file("t", "\xff")
  1289. IO.foreach("t", :open_args => ["r:ascii-8bit"]) {|s|
  1290. assert_equal(Encoding::ASCII_8BIT, s.encoding)
  1291. assert_equal("\xff".force_encoding("ascii-8bit"), s)
  1292. }
  1293. }
  1294. end
  1295. def test_s_foreach_open_args_enc_in_opt
  1296. with_tmpdir {
  1297. generate_file("t", "\xff")
  1298. IO.foreach("t", :open_args => ["r", encoding: "ascii-8bit"]) {|s|
  1299. assert_equal(Encoding::ASCII_8BIT, s.encoding)
  1300. assert_equal("\xff".force_encoding("ascii-8bit"), s)
  1301. }
  1302. }
  1303. end
  1304. def test_s_foreach_open_args_enc_in_opt2
  1305. with_tmpdir {
  1306. generate_file("t", "\xff")
  1307. IO.foreach("t", :open_args => ["r", external_encoding: "ascii-8bit"]) {|s|
  1308. assert_equal(Encoding::ASCII_8BIT, s.encoding)
  1309. assert_equal("\xff".force_encoding("ascii-8bit"), s)
  1310. }
  1311. }
  1312. end
  1313. def test_s_foreach_open_args_enc_enc
  1314. with_tmpdir {
  1315. generate_file("t", "\u3042")
  1316. IO.foreach("t", :open_args => ["r:utf-8:euc-jp"]) {|s|
  1317. assert_equal(Encoding::EUC_JP, s.encoding)
  1318. assert_equal("\xa4\xa2".force_encoding("euc-jp"), s)
  1319. }
  1320. }
  1321. end
  1322. def test_s_foreach_open_args_enc_enc_in_opt
  1323. with_tmpdir {
  1324. generate_file("t", "\u3042")
  1325. IO.foreach("t", :open_args => ["r", encoding: "utf-8:euc-jp"]) {|s|
  1326. assert_equal(Encoding::EUC_JP, s.encoding)
  1327. assert_equal("\xa4\xa2".force_encoding("euc-jp"), s)
  1328. }
  1329. }
  1330. end
  1331. def test_s_foreach_open_args_enc_enc_in_opt2
  1332. with_tmpdir {
  1333. generate_file("t", "\u3042")
  1334. IO.foreach("t", :open_args => ["r", external_encoding: "utf-8", internal_encoding: "euc-jp"]) {|s|
  1335. assert_equal(Encoding::EUC_JP, s.encoding)
  1336. assert_equal("\xa4\xa2".force_encoding("euc-jp"), s)
  1337. }
  1338. }
  1339. end
  1340. def test_both_textmode_binmode
  1341. bug5918 = '[ruby-core:42199]'
  1342. assert_raise(ArgumentError, bug5918) { open("not-exist", "r", :textmode=>true, :binmode=>true) }
  1343. assert_raise(ArgumentError, bug5918) { open("not-exist", "rt", :binmode=>true) }
  1344. assert_raise(ArgumentError, bug5918) { open("not-exist", "rt", :binmode=>false) }
  1345. assert_raise(ArgumentError, bug5918) { open("not-exist", "rb", :textmode=>true) }
  1346. assert_raise(ArgumentError, bug5918) { open("not-exist", "rb", :textmode=>false) }
  1347. end
  1348. def test_textmode_decode_universal_newline_read
  1349. with_tmpdir {
  1350. generate_file("t.crlf", "a\r\nb\r\nc\r\n")
  1351. assert_equal("a\nb\nc\n", File.read("t.crlf", mode:"rt:euc-jp:utf-8"))
  1352. assert_equal("a\nb\nc\n", File.read("t.crlf", mode:"rt"))
  1353. open("t.crlf", "rt:euc-jp:utf-8") {|f| assert_equal("a\nb\nc\n", f.read) }
  1354. open("t.crlf", "rt") {|f| assert_equal("a\nb\nc\n", f.read) }
  1355. open("t.crlf", "r", :textmode=>true) {|f| assert_equal("a\nb\nc\n", f.read) }
  1356. open("t.crlf", "r", textmode: true, universal_newline: false) {|f|
  1357. assert_equal("a\r\nb\r\nc\r\n", f.read)
  1358. }
  1359. generate_file("t.cr", "a\rb\rc\r")
  1360. assert_equal("a\nb\nc\n", File.read("t.cr", mode:"rt:euc-jp:utf-8"))
  1361. assert_equal("a\nb\nc\n", File.read("t.cr", mode:"rt"))
  1362. generate_file("t.lf", "a\nb\nc\n")
  1363. assert_equal("a\nb\nc\n", File.read("t.cr", mode:"rt:euc-jp:utf-8"))
  1364. assert_equal("a\nb\nc\n", File.read("t.cr", mode:"rt"))
  1365. }
  1366. end
  1367. def test_textmode_decode_universal_newline_getc
  1368. with_tmpdir {
  1369. generate_file("t.crlf", "a\r\nb\r\nc\r\n")
  1370. open("t.crlf", "rt") {|f|
  1371. assert_equal("a", f.getc)
  1372. assert_equal("\n", f.getc)
  1373. assert_equal("b", f.getc)
  1374. assert_equal("\n", f.getc)
  1375. assert_equal("c", f.getc)
  1376. assert_equal("\n", f.getc)
  1377. assert_equal(nil, f.getc)
  1378. }
  1379. generate_file("t.cr", "a\rb\rc\r")
  1380. open("t.cr", "rt") {|f|
  1381. assert_equal("a", f.getc)
  1382. assert_equal("\n", f.getc)
  1383. assert_equal("b", f.getc)
  1384. assert_equal("\n", f.getc)
  1385. assert_equal("c", f.getc)
  1386. assert_equal("\n", f.getc)
  1387. assert_equal(nil, f.getc)
  1388. }
  1389. generate_file("t.lf", "a\nb\nc\n")
  1390. open("t.lf", "rt") {|f|
  1391. assert_equal("a", f.getc)
  1392. assert_equal("\n", f.getc)
  1393. assert_equal("b", f.getc)
  1394. assert_equal("\n", f.getc)
  1395. assert_equal("c", f.getc)
  1396. assert_equal("\n", f.getc)
  1397. assert_equal(nil, f.getc)
  1398. }
  1399. }
  1400. end
  1401. def test_textmode_decode_universal_newline_gets
  1402. with_tmpdir {
  1403. generate_file("t.crlf", "a\r\nb\r\nc\r\n")
  1404. open("t.crlf", "rt") {|f|
  1405. assert_equal("a\n", f.gets)
  1406. assert_equal("b\n", f.gets)
  1407. assert_equal("c\n", f.gets)
  1408. assert_equal(nil, f.gets)
  1409. }
  1410. generate_file("t.cr", "a\rb\rc\r")
  1411. open("t.cr", "rt") {|f|
  1412. assert_equal("a\n", f.gets)
  1413. assert_equal("b\n", f.gets)
  1414. assert_equal("c\n", f.gets)
  1415. assert_equal(nil, f.gets)
  1416. }
  1417. generate_file("t.lf", "a\nb\nc\n")
  1418. open("t.lf", "rt") {|f|
  1419. assert_equal("a\n", f.gets)
  1420. assert_equal("b\n", f.gets)
  1421. assert_equal("c\n", f.gets)
  1422. assert_equal(nil, f.gets)
  1423. }
  1424. }
  1425. end
  1426. def test_textmode_decode_universal_newline_utf16
  1427. with_tmpdir {
  1428. generate_file("t.utf16be.crlf", "\0a\0\r\0\n\0b\0\r\0\n\0c\0\r\0\n")
  1429. assert_equal("a\nb\nc\n", File.read("t.utf16be.crlf", mode:"rt:utf-16be:utf-8"))
  1430. generate_file("t.utf16le.crlf", "a\0\r\0\n\0b\0\r\0\n\0c\0\r\0\n\0")
  1431. assert_equal("a\nb\nc\n", File.read("t.utf16le.crlf", mode:"rt:utf-16le:utf-8"))
  1432. generate_file("t.utf16be.cr", "\0a\0\r\0b\0\r\0c\0\r")
  1433. assert_equal("a\nb\nc\n", File.read("t.utf16be.cr", mode:"rt:utf-16be:utf-8"))
  1434. generate_file("t.utf16le.cr", "a\0\r\0b\0\r\0c\0\r\0")
  1435. assert_equal("a\nb\nc\n", File.read("t.utf16le.cr", mode:"rt:utf-16le:utf-8"))
  1436. generate_file("t.utf16be.lf", "\0a\0\n\0b\0\n\0c\0\n")
  1437. assert_equal("a\nb\nc\n", File.read("t.utf16be.lf", mode:"rt:utf-16be:utf-8"))
  1438. generate_file("t.utf16le.lf", "a\0\n\0b\0\n\0c\0\n\0")
  1439. assert_equal("a\nb\nc\n", File.read("t.utf16le.lf", mode:"rt:utf-16le:utf-8"))
  1440. }
  1441. end
  1442. SYSTEM_NEWLINE = []
  1443. def system_newline
  1444. return SYSTEM_NEWLINE.first if !SYSTEM_NEWLINE.empty?
  1445. with_tmpdir {
  1446. open("newline", "wt") {|f|
  1447. f.print "\n"
  1448. }
  1449. open("newline", "rb") {|f|
  1450. SYSTEM_NEWLINE << f.read
  1451. }
  1452. }
  1453. SYSTEM_NEWLINE.first
  1454. end
  1455. def test_textmode_encode_newline
  1456. with_tmpdir {
  1457. open("t.txt", "wt") {|f|
  1458. f.puts "abc"
  1459. f.puts "def"
  1460. }
  1461. content = File.read("t.txt", :mode=>"rb")
  1462. nl = system_newline
  1463. assert_equal("abc#{nl}def#{nl}", content)
  1464. }
  1465. end
  1466. def test_textmode_encode_newline_enc
  1467. with_tmpdir {
  1468. open("t.txt", "wt:euc-jp") {|f|
  1469. f.puts "abc\u3042"
  1470. f.puts "def\u3044"
  1471. }
  1472. content = File.read("t.txt", :mode=>"rb:ascii-8bit")
  1473. nl = system_newline
  1474. assert_equal("abc\xA4\xA2#{nl}def\xA4\xA4#{nl}", content)
  1475. }
  1476. end
  1477. def test_binmode_decode_universal_newline
  1478. with_tmpdir {
  1479. generate_file("t.txt", "a\n")
  1480. assert_raise(ArgumentError) {
  1481. open("t.txt", "rb", newline: :universal) {}
  1482. }
  1483. }
  1484. end
  1485. def test_default_mode_decode_universal_newline_gets
  1486. with_tmpdir {
  1487. generate_file("t.crlf", "a\r\nb\r\nc\r\n")
  1488. open("t.crlf", "r", newline: :universal) {|f|
  1489. assert_equal("a\n", f.gets)
  1490. assert_equal("b\n", f.gets)
  1491. assert_equal("c\n", f.gets)
  1492. assert_equal(nil, f.gets)
  1493. }
  1494. generate_file("t.cr", "a\rb\rc\r")
  1495. open("t.cr", "r", newline: :universal) {|f|
  1496. assert_equal("a\n", f.gets)
  1497. assert_equal("b\n", f.gets)
  1498. assert_equal("c\n", f.gets)
  1499. assert_equal(nil, f.gets)
  1500. }
  1501. generate_file("t.lf", "a\nb\nc\n")
  1502. open("t.lf", "r", newline: :universal) {|f|
  1503. assert_equal("a\n", f.gets)
  1504. assert_equal("b\n", f.gets)
  1505. assert_equal("c\n", f.gets)
  1506. assert_equal(nil, f.gets)
  1507. }
  1508. }
  1509. end
  1510. def test_read_newline_conversion_with_encoding_conversion
  1511. with_tmpdir {
  1512. generate_file("t.utf8.crlf", "a\r\nb\r\n")
  1513. open("t.utf8.crlf", "rb:utf-8:utf-16be") {|f|
  1514. content = f.read
  1515. assert_equal("\0a\0\r\0\n\0b\0\r\0\n".force_encoding("UTF-16BE"), content)
  1516. }
  1517. open("t.utf8.crlf", "rt:utf-8:utf-16be") {|f|
  1518. content = f.read
  1519. assert_equal("\0a\0\n\0b\0\n".force_encoding("UTF-16BE"), content)
  1520. }
  1521. open("t.utf8.crlf", "r:utf-8:utf-16be") {|f|
  1522. content = f.read
  1523. if system_newline == "\n"
  1524. assert_equal("\0a\0\r\0\n\0b\0\r\0\n".force_encoding("UTF-16BE"), content)
  1525. else
  1526. assert_equal("\0a\0\n\0b\0\n".force_encoding("UTF-16BE"), content)
  1527. end
  1528. }
  1529. }
  1530. end
  1531. def test_read_newline_conversion_without_encoding_conversion
  1532. with_tmpdir {
  1533. generate_file("t.utf16.crlf", "\0a\0\r\0\n\0b\0\r\0\n")
  1534. open("t.utf16.crlf", "rb:utf-16be") {|f|
  1535. content = f.read
  1536. assert_equal("\0a\0\r\0\n\0b\0\r\0\n".force_encoding("UTF-16BE"),
  1537. content)
  1538. }
  1539. }
  1540. end
  1541. def test_read_newline_conversion_error
  1542. with_tmpdir {
  1543. generate_file("empty.txt", "")
  1544. # ascii incompatible encoding without conversion needs binmode.
  1545. assert_raise(ArgumentError) {
  1546. open("empty.txt", "rt:utf-16be") {|f| }
  1547. }
  1548. assert_raise(ArgumentError) {
  1549. open("empty.txt", "r:utf-16be") {|f| }
  1550. }
  1551. }
  1552. end
  1553. def test_read_mode
  1554. with_tmpdir {
  1555. generate_file("t", "a\rb\r\nc\n\xc2\xa2")
  1556. generate_file("ie", "a\rb\r\nc\n\e$B\x42\x22\e(B")
  1557. generate_file("iu", "a\rb\r\nc\n\e$B\x21\x71\e(B")
  1558. generate_file("be", "\0a\0\r\0b\0\r\0\n\0c\0\n\x85\x35")
  1559. generate_file("bu", "\0a\0\r\0b\0\r\0\n\0c\0\n\0\xa2")
  1560. # "\xc2\xa2" is valid as EUC-JP and UTF-8
  1561. # EUC-JP UTF-8 Unicode
  1562. # 0xC2A2 0xE894B5 U+8535
  1563. # 0xA1F1 0xC2A2 U+00A2
  1564. open("t","rt") {|f| assert_equal("a\nb\nc\n\xc2…

Large files files are truncated, but you can click here to view the full file