PageRenderTime 42ms CodeModel.GetById 1ms RepoModel.GetById 0ms app.codeStats 1ms

/test/ruby/test_io_m17n.rb

http://github.com/ruby/ruby
Ruby | 2747 lines | 2499 code | 226 blank | 22 comment | 35 complexity | b98bec9202b52aed94529f8d651dee9a MD5 | raw file
Possible License(s): GPL-2.0, BSD-3-Clause, AGPL-3.0
  1. # coding: US-ASCII
  2. # frozen_string_literal: false
  3. require 'test/unit'
  4. require 'tmpdir'
  5. require 'tempfile'
  6. require 'timeout'
  7. class TestIO_M17N < Test::Unit::TestCase
  8. ENCS = [
  9. Encoding::ASCII_8BIT,
  10. Encoding::EUC_JP,
  11. Encoding::Shift_JIS,
  12. Encoding::UTF_8
  13. ]
  14. def with_tmpdir
  15. Dir.mktmpdir {|dir|
  16. Dir.chdir(dir) {
  17. yield dir
  18. }
  19. }
  20. end
  21. def pipe(*args, wp, rp)
  22. re, we = nil, nil
  23. kw = args.last.is_a?(Hash) ? args.pop : {}
  24. r, w = IO.pipe(*args, **kw)
  25. rt = Thread.new do
  26. begin
  27. rp.call(r)
  28. rescue Exception
  29. r.close
  30. re = $!
  31. end
  32. end
  33. wt = Thread.new do
  34. begin
  35. wp.call(w)
  36. rescue Exception
  37. w.close
  38. we = $!
  39. end
  40. end
  41. flunk("timeout") unless wt.join(10) && rt.join(10)
  42. ensure
  43. w.close unless !w || w.closed?
  44. r.close unless !r || r.closed?
  45. (wt.kill; wt.join) if wt
  46. (rt.kill; rt.join) if rt
  47. raise we if we
  48. raise re if re
  49. end
  50. def with_pipe(*args)
  51. r, w = IO.pipe(*args)
  52. begin
  53. yield r, w
  54. ensure
  55. r.close if !r.closed?
  56. w.close if !w.closed?
  57. end
  58. end
  59. def generate_file(path, content)
  60. open(path, "wb") {|f| f.write content }
  61. end
  62. def encdump(str)
  63. "#{str.dump}.force_encoding(#{str.encoding.name.dump})"
  64. end
  65. def assert_str_equal(expected, actual, message=nil)
  66. full_message = build_message(message, <<EOT)
  67. #{encdump expected} expected but not equal to
  68. #{encdump actual}.
  69. EOT
  70. assert_equal(expected, actual, full_message)
  71. end
  72. def test_open_r
  73. with_tmpdir {
  74. generate_file('tmp', "")
  75. open("tmp", "r") {|f|
  76. assert_equal(Encoding.default_external, f.external_encoding)
  77. assert_equal(nil, f.internal_encoding)
  78. }
  79. }
  80. end
  81. def test_open_rb
  82. with_tmpdir {
  83. generate_file('tmp', "")
  84. open("tmp", "rb") {|f|
  85. assert_equal(Encoding.find("ASCII-8BIT"), f.external_encoding)
  86. assert_equal(nil, f.internal_encoding)
  87. }
  88. }
  89. end
  90. def test_open_r_enc
  91. with_tmpdir {
  92. generate_file('tmp', "")
  93. open("tmp", "r:euc-jp") {|f|
  94. assert_equal(Encoding::EUC_JP, f.external_encoding)
  95. assert_equal(nil, f.internal_encoding)
  96. }
  97. }
  98. end
  99. def test_open_r_ascii8bit
  100. with_tmpdir {
  101. generate_file('tmp', "")
  102. EnvUtil.with_default_external(Encoding::ASCII_8BIT) do
  103. EnvUtil.with_default_internal(Encoding::UTF_8) do
  104. open("tmp", "r") {|f|
  105. assert_equal(Encoding::ASCII_8BIT, f.external_encoding)
  106. assert_equal(nil, f.internal_encoding)
  107. }
  108. open("tmp", "r:ascii-8bit") {|f|
  109. assert_equal(Encoding::ASCII_8BIT, f.external_encoding)
  110. assert_equal(nil, f.internal_encoding)
  111. }
  112. open("tmp", "r:ascii-8bit:utf-16") {|f|
  113. assert_equal(Encoding::ASCII_8BIT, f.external_encoding)
  114. assert_equal(nil, f.internal_encoding)
  115. }
  116. end
  117. EnvUtil.with_default_internal(nil) do
  118. open("tmp", "r") {|f|
  119. assert_equal(Encoding::ASCII_8BIT, f.external_encoding)
  120. assert_equal(nil, f.internal_encoding)
  121. }
  122. open("tmp", "r:ascii-8bit") {|f|
  123. assert_equal(Encoding::ASCII_8BIT, f.external_encoding)
  124. assert_equal(nil, f.internal_encoding)
  125. }
  126. open("tmp", "r:ascii-8bit:utf-16") {|f|
  127. assert_equal(Encoding::ASCII_8BIT, f.external_encoding)
  128. assert_equal(nil, f.internal_encoding)
  129. }
  130. end
  131. end
  132. }
  133. end
  134. def test_open_r_enc_in_opt
  135. with_tmpdir {
  136. generate_file('tmp', "")
  137. open("tmp", "r", encoding: "euc-jp") {|f|
  138. assert_equal(Encoding::EUC_JP, f.external_encoding)
  139. assert_equal(nil, f.internal_encoding)
  140. }
  141. }
  142. end
  143. def test_open_r_encname_in_opt
  144. with_tmpdir {
  145. generate_file('tmp', "")
  146. open("tmp", "r", encoding: Encoding::EUC_JP) {|f|
  147. assert_equal(Encoding::EUC_JP, f.external_encoding)
  148. assert_equal(nil, f.internal_encoding)
  149. }
  150. }
  151. end
  152. def test_open_r_ext_enc_in_opt
  153. with_tmpdir {
  154. generate_file('tmp', "")
  155. open("tmp", "r", external_encoding: Encoding::EUC_JP) {|f|
  156. assert_equal(Encoding::EUC_JP, f.external_encoding)
  157. assert_equal(nil, f.internal_encoding)
  158. }
  159. }
  160. end
  161. def test_open_r_ext_encname_in_opt
  162. with_tmpdir {
  163. generate_file('tmp', "")
  164. open("tmp", "r", external_encoding: "euc-jp") {|f|
  165. assert_equal(Encoding::EUC_JP, f.external_encoding)
  166. assert_equal(nil, f.internal_encoding)
  167. }
  168. }
  169. end
  170. def test_open_r_enc_enc
  171. with_tmpdir {
  172. generate_file('tmp', "")
  173. open("tmp", "r", external_encoding: Encoding::EUC_JP, internal_encoding: Encoding::UTF_8) {|f|
  174. assert_equal(Encoding::EUC_JP, f.external_encoding)
  175. assert_equal(Encoding::UTF_8, f.internal_encoding)
  176. }
  177. }
  178. end
  179. def test_open_r_encname_encname
  180. with_tmpdir {
  181. generate_file('tmp', "")
  182. open("tmp", "r:euc-jp:utf-8") {|f|
  183. assert_equal(Encoding::EUC_JP, f.external_encoding)
  184. assert_equal(Encoding::UTF_8, f.internal_encoding)
  185. }
  186. }
  187. end
  188. def test_open_r_encname_encname_in_opt
  189. with_tmpdir {
  190. generate_file('tmp', "")
  191. open("tmp", "r", encoding: "euc-jp:utf-8") {|f|
  192. assert_equal(Encoding::EUC_JP, f.external_encoding)
  193. assert_equal(Encoding::UTF_8, f.internal_encoding)
  194. }
  195. }
  196. end
  197. def test_open_r_enc_enc_in_opt
  198. with_tmpdir {
  199. generate_file('tmp', "")
  200. open("tmp", "r", external_encoding: Encoding::EUC_JP, internal_encoding: Encoding::UTF_8) {|f|
  201. assert_equal(Encoding::EUC_JP, f.external_encoding)
  202. assert_equal(Encoding::UTF_8, f.internal_encoding)
  203. }
  204. }
  205. end
  206. def test_open_r_externalencname_internalencname_in_opt
  207. with_tmpdir {
  208. generate_file('tmp', "")
  209. open("tmp", "r", external_encoding: "euc-jp", internal_encoding: "utf-8") {|f|
  210. assert_equal(Encoding::EUC_JP, f.external_encoding)
  211. assert_equal(Encoding::UTF_8, f.internal_encoding)
  212. }
  213. }
  214. end
  215. def test_open_w
  216. with_tmpdir {
  217. open("tmp", "w") {|f|
  218. assert_equal(nil, f.external_encoding)
  219. assert_equal(nil, f.internal_encoding)
  220. }
  221. }
  222. end
  223. def test_open_wb
  224. with_tmpdir {
  225. open("tmp", "wb") {|f|
  226. assert_equal(Encoding.find("ASCII-8BIT"), f.external_encoding)
  227. assert_equal(nil, f.internal_encoding)
  228. }
  229. }
  230. end
  231. def test_open_w_enc
  232. with_tmpdir {
  233. open("tmp", "w:euc-jp") {|f|
  234. assert_equal(Encoding::EUC_JP, f.external_encoding)
  235. assert_equal(nil, f.internal_encoding)
  236. }
  237. }
  238. end
  239. def test_open_w_enc_in_opt
  240. with_tmpdir {
  241. open("tmp", "w", encoding: "euc-jp") {|f|
  242. assert_equal(Encoding::EUC_JP, f.external_encoding)
  243. assert_equal(nil, f.internal_encoding)
  244. }
  245. }
  246. end
  247. def test_open_w_enc_in_opt2
  248. with_tmpdir {
  249. open("tmp", "w", external_encoding: "euc-jp") {|f|
  250. assert_equal(Encoding::EUC_JP, f.external_encoding)
  251. assert_equal(nil, f.internal_encoding)
  252. }
  253. }
  254. end
  255. def test_open_w_enc_enc
  256. with_tmpdir {
  257. open("tmp", "w:euc-jp:utf-8") {|f|
  258. assert_equal(Encoding::EUC_JP, f.external_encoding)
  259. assert_equal(Encoding::UTF_8, f.internal_encoding)
  260. }
  261. }
  262. end
  263. def test_open_w_enc_enc_in_opt
  264. with_tmpdir {
  265. open("tmp", "w", encoding: "euc-jp:utf-8") {|f|
  266. assert_equal(Encoding::EUC_JP, f.external_encoding)
  267. assert_equal(Encoding::UTF_8, f.internal_encoding)
  268. }
  269. }
  270. end
  271. def test_open_w_enc_enc_in_opt2
  272. with_tmpdir {
  273. open("tmp", "w", external_encoding: "euc-jp", internal_encoding: "utf-8") {|f|
  274. assert_equal(Encoding::EUC_JP, f.external_encoding)
  275. assert_equal(Encoding::UTF_8, f.internal_encoding)
  276. }
  277. }
  278. end
  279. def test_open_w_enc_enc_perm
  280. with_tmpdir {
  281. open("tmp", "w:euc-jp:utf-8", 0600) {|f|
  282. assert_equal(Encoding::EUC_JP, f.external_encoding)
  283. assert_equal(Encoding::UTF_8, f.internal_encoding)
  284. }
  285. }
  286. end
  287. def test_ignored_encoding_option
  288. enc = "\u{30a8 30f3 30b3 30fc 30c7 30a3 30f3 30b0}"
  289. pattern = /#{enc}/
  290. assert_warning(pattern) {
  291. open(IO::NULL, external_encoding: "us-ascii", encoding: enc) {}
  292. }
  293. assert_warning(pattern) {
  294. open(IO::NULL, internal_encoding: "us-ascii", encoding: enc) {}
  295. }
  296. end
  297. def test_io_new_enc
  298. with_tmpdir {
  299. generate_file("tmp", "\xa1")
  300. fd = IO.sysopen("tmp")
  301. f = IO.new(fd, "r:sjis")
  302. begin
  303. assert_equal(Encoding::Windows_31J, f.read.encoding)
  304. ensure
  305. f.close
  306. end
  307. }
  308. end
  309. def test_s_pipe_invalid
  310. pipe("utf-8", "euc-jp", { :invalid=>:replace },
  311. proc do |w|
  312. w << "\x80"
  313. w.close
  314. end,
  315. proc do |r|
  316. assert_equal("?", r.read)
  317. end)
  318. end
  319. def test_s_pipe_undef
  320. pipe("utf-8:euc-jp", { :undef=>:replace },
  321. proc do |w|
  322. w << "\ufffd"
  323. w.close
  324. end,
  325. proc do |r|
  326. assert_equal("?", r.read)
  327. end)
  328. end
  329. def test_s_pipe_undef_replace_string
  330. pipe("utf-8:euc-jp", { :undef=>:replace, :replace=>"X" },
  331. proc do |w|
  332. w << "\ufffd"
  333. w.close
  334. end,
  335. proc do |r|
  336. assert_equal("X", r.read)
  337. end)
  338. end
  339. def test_dup
  340. pipe("utf-8:euc-jp",
  341. proc do |w|
  342. w << "\u3042"
  343. w.close
  344. end,
  345. proc do |r|
  346. r2 = r.dup
  347. begin
  348. assert_equal("\xA4\xA2".force_encoding("euc-jp"), r2.read)
  349. ensure
  350. r2.close
  351. end
  352. end)
  353. end
  354. def test_dup_undef
  355. pipe("utf-8:euc-jp", { :undef=>:replace },
  356. proc do |w|
  357. w << "\uFFFD"
  358. w.close
  359. end,
  360. proc do |r|
  361. r2 = r.dup
  362. begin
  363. assert_equal("?", r2.read)
  364. ensure
  365. r2.close
  366. end
  367. end)
  368. end
  369. def test_stdin
  370. assert_equal(Encoding.default_external, STDIN.external_encoding)
  371. assert_equal(nil, STDIN.internal_encoding)
  372. end
  373. def test_stdout
  374. assert_equal(nil, STDOUT.external_encoding)
  375. assert_equal(nil, STDOUT.internal_encoding)
  376. end
  377. def test_stderr
  378. assert_equal(nil, STDERR.external_encoding)
  379. assert_equal(nil, STDERR.internal_encoding)
  380. end
  381. def test_terminator_conversion
  382. with_tmpdir {
  383. generate_file('tmp', "before \u00FF after")
  384. s = open("tmp", "r:utf-8:iso-8859-1") {|f|
  385. f.gets("\xFF".force_encoding("iso-8859-1"))
  386. }
  387. assert_equal(Encoding.find("iso-8859-1"), s.encoding)
  388. assert_str_equal("before \xFF".force_encoding("iso-8859-1"), s, '[ruby-core:14288]')
  389. }
  390. end
  391. def test_terminator_conversion2
  392. with_tmpdir {
  393. generate_file('tmp', "before \xA1\xA2\xA2\xA3 after")
  394. s = open("tmp", "r:euc-jp:utf-8") {|f|
  395. f.gets("\xA2\xA2".force_encoding("euc-jp").encode("utf-8"))
  396. }
  397. assert_equal(Encoding.find("utf-8"), s.encoding)
  398. assert_str_equal("before \xA1\xA2\xA2\xA3 after".force_encoding("euc-jp").encode("utf-8"), s, '[ruby-core:14319]')
  399. }
  400. end
  401. def test_terminator_stateful_conversion
  402. with_tmpdir {
  403. src = "before \e$B\x23\x30\x23\x31\e(B after".force_encoding("iso-2022-jp")
  404. generate_file('tmp', src)
  405. s = open("tmp", "r:iso-2022-jp:euc-jp") {|f|
  406. f.gets("0".force_encoding("euc-jp"))
  407. }
  408. assert_equal(Encoding.find("euc-jp"), s.encoding)
  409. assert_str_equal(src.encode("euc-jp"), s)
  410. }
  411. end
  412. def test_nonascii_terminator
  413. with_tmpdir {
  414. generate_file('tmp', "before \xA2\xA2 after")
  415. open("tmp", "r:euc-jp") {|f|
  416. assert_raise(ArgumentError) {
  417. f.gets("\xA2\xA2".force_encoding("utf-8"))
  418. }
  419. }
  420. }
  421. end
  422. def test_pipe_terminator_conversion
  423. rs = "\xA2\xA2".encode("utf-8", "euc-jp")
  424. pipe("euc-jp:utf-8",
  425. proc do |w|
  426. w.write "before \xa2\xa2 after"
  427. w.close
  428. end,
  429. proc do |r|
  430. Timeout.timeout(1) {
  431. assert_equal("before \xa2\xa2".encode("utf-8", "euc-jp"),
  432. r.gets(rs))
  433. }
  434. end)
  435. end
  436. def test_pipe_conversion
  437. pipe("euc-jp:utf-8",
  438. proc do |w|
  439. w.write "\xa1\xa1"
  440. end,
  441. proc do |r|
  442. assert_equal("\xa1\xa1".encode("utf-8", "euc-jp"), r.getc)
  443. end)
  444. end
  445. def test_pipe_convert_partial_read
  446. pipe("euc-jp:utf-8",
  447. proc do |w|
  448. w.write "\xa1"
  449. sleep 0.1
  450. w.write "\xa1"
  451. end,
  452. proc do |r|
  453. assert_equal("\xa1\xa1".encode("utf-8", "euc-jp"), r.getc)
  454. end)
  455. end
  456. def test_getc_invalid
  457. pipe("euc-jp:utf-8",
  458. proc do |w|
  459. w << "\xa1xyz"
  460. w.close
  461. end,
  462. proc do |r|
  463. err = assert_raise(Encoding::InvalidByteSequenceError) { r.getc }
  464. assert_equal("\xA1".force_encoding("ascii-8bit"), err.error_bytes)
  465. assert_equal("xyz", r.read(10))
  466. end)
  467. end
  468. def test_getc_stateful_conversion
  469. with_tmpdir {
  470. src = "\e$B\x23\x30\x23\x31\e(B".force_encoding("iso-2022-jp")
  471. generate_file('tmp', src)
  472. open("tmp", "r:iso-2022-jp:euc-jp") {|f|
  473. assert_equal("\xa3\xb0".force_encoding("euc-jp"), f.getc)
  474. assert_equal("\xa3\xb1".force_encoding("euc-jp"), f.getc)
  475. }
  476. }
  477. end
  478. def test_getc_newlineconv
  479. with_tmpdir {
  480. src = "\u3042"
  481. generate_file('tmp', src)
  482. EnvUtil.with_default_external(Encoding::UTF_8) do
  483. open("tmp", "rt") {|f|
  484. s = f.getc
  485. assert_equal(true, s.valid_encoding?)
  486. assert_equal("\u3042", s)
  487. }
  488. end
  489. }
  490. end
  491. def test_getc_newlineconv_invalid
  492. with_tmpdir {
  493. src = "\xE3\x81"
  494. generate_file('tmp', src)
  495. EnvUtil.with_default_external(Encoding::UTF_8) do
  496. open("tmp", "rt") {|f|
  497. s = f.getc
  498. assert_equal(false, s.valid_encoding?)
  499. assert_equal("\xE3".force_encoding("UTF-8"), s)
  500. s = f.getc
  501. assert_equal(false, s.valid_encoding?)
  502. assert_equal("\x81".force_encoding("UTF-8"), s)
  503. }
  504. end
  505. }
  506. end
  507. def test_ungetc_int
  508. with_tmpdir {
  509. generate_file('tmp', "A")
  510. s = open("tmp", "r:GB18030") {|f|
  511. f.ungetc(0x8431A439)
  512. f.read
  513. }
  514. assert_equal(Encoding::GB18030, s.encoding)
  515. assert_str_equal(0x8431A439.chr("GB18030")+"A", s)
  516. }
  517. end
  518. def test_ungetc_str
  519. with_tmpdir {
  520. generate_file('tmp', "A")
  521. s = open("tmp", "r:GB18030") {|f|
  522. f.ungetc(0x8431A439.chr("GB18030"))
  523. f.read
  524. }
  525. assert_equal(Encoding::GB18030, s.encoding)
  526. assert_str_equal(0x8431A439.chr("GB18030")+"A", s)
  527. }
  528. end
  529. def test_ungetc_stateful_conversion
  530. with_tmpdir {
  531. src = "before \e$B\x23\x30\x23\x31\e(B after".force_encoding("iso-2022-jp")
  532. generate_file('tmp', src)
  533. s = open("tmp", "r:iso-2022-jp:euc-jp") {|f|
  534. f.ungetc("0".force_encoding("euc-jp"))
  535. f.read
  536. }
  537. assert_equal(Encoding.find("euc-jp"), s.encoding)
  538. assert_str_equal("0" + src.encode("euc-jp"), s)
  539. }
  540. end
  541. def test_ungetc_stateful_conversion2
  542. with_tmpdir {
  543. src = "before \e$B\x23\x30\x23\x31\e(B after".force_encoding("iso-2022-jp")
  544. former = "before \e$B\x23\x30\e(B".force_encoding("iso-2022-jp")
  545. rs = "\e$B\x23\x30\e(B".force_encoding("iso-2022-jp")
  546. latter = "\e$B\x23\x31\e(B after".force_encoding("iso-2022-jp")
  547. generate_file('tmp', src)
  548. s = open("tmp", "r:iso-2022-jp:euc-jp") {|f|
  549. assert_equal(former.encode("euc-jp", "iso-2022-jp"),
  550. f.gets(rs.encode("euc-jp", "iso-2022-jp")))
  551. f.ungetc("0")
  552. f.read
  553. }
  554. assert_equal(Encoding.find("euc-jp"), s.encoding)
  555. assert_str_equal("0" + latter.encode("euc-jp"), s)
  556. }
  557. end
  558. def test_open_ascii
  559. with_tmpdir {
  560. src = "abc\n"
  561. generate_file('tmp', "abc\n")
  562. ENCS.each {|enc|
  563. s = open('tmp', "r:#{enc}") {|f| f.gets }
  564. assert_equal(enc, s.encoding)
  565. assert_str_equal(src, s)
  566. }
  567. }
  568. end
  569. def test_open_nonascii
  570. with_tmpdir {
  571. src = "\xc2\xa1\n"
  572. generate_file('tmp', src)
  573. ENCS.each {|enc|
  574. content = src.dup.force_encoding(enc)
  575. s = open('tmp', "r:#{enc}") {|f| f.gets }
  576. assert_equal(enc, s.encoding)
  577. assert_str_equal(content, s)
  578. }
  579. }
  580. end
  581. def test_read_encoding
  582. with_tmpdir {
  583. src = "\xc2\xa1\n".force_encoding("ASCII-8BIT")
  584. generate_file('tmp', "\xc2\xa1\n")
  585. ENCS.each {|enc|
  586. content = src.dup.force_encoding(enc)
  587. open('tmp', "r:#{enc}") {|f|
  588. s = f.getc
  589. assert_equal(enc, s.encoding)
  590. assert_str_equal(content[0], s)
  591. }
  592. open('tmp', "r:#{enc}") {|f|
  593. s = f.readchar
  594. assert_equal(enc, s.encoding)
  595. assert_str_equal(content[0], s)
  596. }
  597. open('tmp', "r:#{enc}") {|f|
  598. s = f.gets
  599. assert_equal(enc, s.encoding)
  600. assert_str_equal(content, s)
  601. }
  602. open('tmp', "r:#{enc}") {|f|
  603. s = f.readline
  604. assert_equal(enc, s.encoding)
  605. assert_str_equal(content, s)
  606. }
  607. open('tmp', "r:#{enc}") {|f|
  608. lines = f.readlines
  609. assert_equal(1, lines.length)
  610. s = lines[0]
  611. assert_equal(enc, s.encoding)
  612. assert_str_equal(content, s)
  613. }
  614. open('tmp', "r:#{enc}") {|f|
  615. f.each_line {|s|
  616. assert_equal(enc, s.encoding)
  617. assert_str_equal(content, s)
  618. }
  619. }
  620. open('tmp', "r:#{enc}") {|f|
  621. s = f.read
  622. assert_equal(enc, s.encoding)
  623. assert_str_equal(content, s)
  624. }
  625. open('tmp', "r:#{enc}") {|f|
  626. s = f.read(1)
  627. assert_equal(Encoding::ASCII_8BIT, s.encoding)
  628. assert_str_equal(src[0], s)
  629. }
  630. open('tmp', "r:#{enc}") {|f|
  631. s = f.readpartial(1)
  632. assert_equal(Encoding::ASCII_8BIT, s.encoding)
  633. assert_str_equal(src[0], s)
  634. }
  635. open('tmp', "r:#{enc}") {|f|
  636. s = f.sysread(1)
  637. assert_equal(Encoding::ASCII_8BIT, s.encoding)
  638. assert_str_equal(src[0], s)
  639. }
  640. }
  641. }
  642. end
  643. def test_write_noenc
  644. src = "\xc2\xa1\n".force_encoding("ascii-8bit")
  645. with_tmpdir {
  646. open('tmp', "w") {|f|
  647. ENCS.each {|enc|
  648. f.write src.dup.force_encoding(enc)
  649. }
  650. }
  651. open('tmp', 'r:ascii-8bit') {|f|
  652. assert_equal(src*ENCS.length, f.read)
  653. }
  654. }
  655. end
  656. def test_write_conversion
  657. utf8 = "\u6666"
  658. eucjp = "\xb3\xa2".force_encoding("EUC-JP")
  659. with_tmpdir {
  660. open('tmp', "w:EUC-JP") {|f|
  661. assert_equal(Encoding::EUC_JP, f.external_encoding)
  662. assert_equal(nil, f.internal_encoding)
  663. f.print utf8
  664. }
  665. assert_equal(eucjp, File.read('tmp').force_encoding("EUC-JP"))
  666. open('tmp', 'r:EUC-JP:UTF-8') {|f|
  667. assert_equal(Encoding::EUC_JP, f.external_encoding)
  668. assert_equal(Encoding::UTF_8, f.internal_encoding)
  669. assert_equal(utf8, f.read)
  670. }
  671. }
  672. end
  673. def test_pipe
  674. utf8 = "\u6666"
  675. eucjp = "\xb3\xa2".force_encoding("EUC-JP")
  676. pipe(proc do |w|
  677. w << utf8
  678. w.close
  679. end, proc do |r|
  680. assert_equal(Encoding.default_external, r.external_encoding)
  681. assert_equal(nil, r.internal_encoding)
  682. s = r.read
  683. assert_equal(Encoding.default_external, s.encoding)
  684. assert_str_equal(utf8.dup.force_encoding(Encoding.default_external), s)
  685. end)
  686. pipe("EUC-JP",
  687. proc do |w|
  688. w << eucjp
  689. w.close
  690. end,
  691. proc do |r|
  692. assert_equal(Encoding::EUC_JP, r.external_encoding)
  693. assert_equal(nil, r.internal_encoding)
  694. assert_equal(eucjp, r.read)
  695. end)
  696. pipe("UTF-8",
  697. proc do |w|
  698. w << "a" * 1023 + "\u3042" + "a" * 1022
  699. w.close
  700. end,
  701. proc do |r|
  702. assert_equal(true, r.read.valid_encoding?)
  703. end)
  704. pipe("UTF-8:EUC-JP",
  705. proc do |w|
  706. w << utf8
  707. w.close
  708. end,
  709. proc do |r|
  710. assert_equal(Encoding::UTF_8, r.external_encoding)
  711. assert_equal(Encoding::EUC_JP, r.internal_encoding)
  712. assert_equal(eucjp, r.read)
  713. end)
  714. assert_raise_with_message(ArgumentError, /invalid name encoding/) do
  715. with_pipe("UTF-8", "UTF-8".encode("UTF-32BE")) {}
  716. end
  717. assert_raise_with_message(ArgumentError, /invalid name encoding/) do
  718. with_pipe("UTF-8".encode("UTF-32BE")) {}
  719. end
  720. ENCS.each {|enc|
  721. pipe(enc,
  722. proc do |w|
  723. w << "\xc2\xa1"
  724. w.close
  725. end,
  726. proc do |r|
  727. s = r.getc
  728. assert_equal(enc, s.encoding)
  729. end)
  730. }
  731. ENCS.each {|enc|
  732. next if enc == Encoding::ASCII_8BIT
  733. next if enc == Encoding::UTF_8
  734. pipe("#{enc}:UTF-8",
  735. proc do |w|
  736. w << "\xc2\xa1"
  737. w.close
  738. end,
  739. proc do |r|
  740. s = r.read
  741. assert_equal(Encoding::UTF_8, s.encoding)
  742. assert_equal(s.encode("UTF-8"), s)
  743. end)
  744. }
  745. end
  746. def test_marshal
  747. data = 56225
  748. pipe("EUC-JP",
  749. proc do |w|
  750. Marshal.dump(data, w)
  751. w.close
  752. end,
  753. proc do |r|
  754. result = nil
  755. assert_nothing_raised("[ruby-dev:33264]") { result = Marshal.load(r) }
  756. assert_equal(data, result)
  757. end)
  758. end
  759. def test_gets_nil
  760. pipe("UTF-8:EUC-JP",
  761. proc do |w|
  762. w << "\u{3042}"
  763. w.close
  764. end,
  765. proc do |r|
  766. result = r.gets(nil)
  767. assert_equal("\u{3042}".encode("euc-jp"), result)
  768. end)
  769. end
  770. def test_gets_limit
  771. pipe("euc-jp",
  772. proc {|w| w << "\xa4\xa2\xa4\xa4\xa4\xa6\n\xa4\xa8\xa4\xaa"; w.close },
  773. proc {|r| assert_equal("\xa4\xa2".force_encoding("euc-jp"), r.gets(1)) })
  774. pipe("euc-jp",
  775. proc {|w| w << "\xa4\xa2\xa4\xa4\xa4\xa6\n\xa4\xa8\xa4\xaa"; w.close },
  776. proc {|r| assert_equal("\xa4\xa2".force_encoding("euc-jp"), r.gets(2)) })
  777. pipe("euc-jp",
  778. proc {|w| w << "\xa4\xa2\xa4\xa4\xa4\xa6\n\xa4\xa8\xa4\xaa"; w.close },
  779. proc {|r| assert_equal("\xa4\xa2\xa4\xa4".force_encoding("euc-jp"), r.gets(3)) })
  780. pipe("euc-jp",
  781. proc {|w| w << "\xa4\xa2\xa4\xa4\xa4\xa6\n\xa4\xa8\xa4\xaa"; w.close },
  782. proc {|r| assert_equal("\xa4\xa2\xa4\xa4".force_encoding("euc-jp"), r.gets(4)) })
  783. pipe("euc-jp",
  784. proc {|w| w << "\xa4\xa2\xa4\xa4\xa4\xa6\n\xa4\xa8\xa4\xaa"; w.close },
  785. proc {|r| assert_equal("\xa4\xa2\xa4\xa4\xa4\xa6".force_encoding("euc-jp"), r.gets(5)) })
  786. pipe("euc-jp",
  787. proc {|w| w << "\xa4\xa2\xa4\xa4\xa4\xa6\n\xa4\xa8\xa4\xaa"; w.close },
  788. proc {|r| assert_equal("\xa4\xa2\xa4\xa4\xa4\xa6".force_encoding("euc-jp"), r.gets(6)) })
  789. pipe("euc-jp",
  790. proc {|w| w << "\xa4\xa2\xa4\xa4\xa4\xa6\n\xa4\xa8\xa4\xaa"; w.close },
  791. proc {|r| assert_equal("\xa4\xa2\xa4\xa4\xa4\xa6\n".force_encoding("euc-jp"), r.gets(7)) })
  792. pipe("euc-jp",
  793. proc {|w| w << "\xa4\xa2\xa4\xa4\xa4\xa6\n\xa4\xa8\xa4\xaa"; w.close },
  794. proc {|r| assert_equal("\xa4\xa2\xa4\xa4\xa4\xa6\n".force_encoding("euc-jp"), r.gets(8)) })
  795. pipe("euc-jp",
  796. proc {|w| w << "\xa4\xa2\xa4\xa4\xa4\xa6\n\xa4\xa8\xa4\xaa"; w.close },
  797. proc {|r| assert_equal("\xa4\xa2\xa4\xa4\xa4\xa6\n".force_encoding("euc-jp"), r.gets(9)) })
  798. end
  799. def test_gets_invalid
  800. before = "\u{3042}\u{3044}"
  801. invalid = "\x80".force_encoding("utf-8")
  802. after = "\u{3046}\u{3048}"
  803. pipe("utf-8:euc-jp",
  804. proc do |w|
  805. w << before + invalid + after
  806. w.close
  807. end,
  808. proc do |r|
  809. err = assert_raise(Encoding::InvalidByteSequenceError) { r.gets }
  810. assert_equal(invalid.force_encoding("ascii-8bit"), err.error_bytes)
  811. assert_equal(after.encode("euc-jp"), r.gets)
  812. end)
  813. end
  814. def test_getc_invalid2
  815. before1 = "\u{3042}"
  816. before2 = "\u{3044}"
  817. invalid = "\x80".force_encoding("utf-8")
  818. after1 = "\u{3046}"
  819. after2 = "\u{3048}"
  820. pipe("utf-8:euc-jp",
  821. proc do |w|
  822. w << before1 + before2 + invalid + after1 + after2
  823. w.close
  824. end,
  825. proc do |r|
  826. assert_equal(before1.encode("euc-jp"), r.getc)
  827. assert_equal(before2.encode("euc-jp"), r.getc)
  828. err = assert_raise(Encoding::InvalidByteSequenceError) { r.getc }
  829. assert_equal(invalid.force_encoding("ascii-8bit"), err.error_bytes)
  830. assert_equal(after1.encode("euc-jp"), r.getc)
  831. assert_equal(after2.encode("euc-jp"), r.getc)
  832. end)
  833. end
  834. def test_getc_invalid3
  835. before1 = "\x42\x30".force_encoding("utf-16le")
  836. before2 = "\x44\x30".force_encoding("utf-16le")
  837. invalid = "\x00\xd8".force_encoding("utf-16le")
  838. after1 = "\x46\x30".force_encoding("utf-16le")
  839. after2 = "\x48\x30".force_encoding("utf-16le")
  840. pipe("utf-16le:euc-jp", { :binmode => true },
  841. proc do |w|
  842. w << before1 + before2 + invalid + after1 + after2
  843. w.close
  844. end,
  845. proc do |r|
  846. assert_equal(before1.encode("euc-jp"), r.getc)
  847. assert_equal(before2.encode("euc-jp"), r.getc)
  848. err = assert_raise(Encoding::InvalidByteSequenceError) { r.getc }
  849. assert_equal(invalid.force_encoding("ascii-8bit"), err.error_bytes)
  850. assert_equal(after1.encode("euc-jp"), r.getc)
  851. assert_equal(after2.encode("euc-jp"), r.getc)
  852. end)
  853. end
  854. def test_read_all
  855. str = "\u3042\u3044"
  856. pipe("utf-8:euc-jp",
  857. proc do |w|
  858. w << str
  859. w.close
  860. end,
  861. proc do |r|
  862. assert_equal(str.encode("euc-jp"), r.read)
  863. end)
  864. end
  865. def test_read_all_invalid
  866. before = "\u{3042}\u{3044}"
  867. invalid = "\x80".force_encoding("utf-8")
  868. after = "\u{3046}\u{3048}"
  869. pipe("utf-8:euc-jp",
  870. proc do |w|
  871. w << before + invalid + after
  872. w.close
  873. end,
  874. proc do |r|
  875. err = assert_raise(Encoding::InvalidByteSequenceError) { r.read }
  876. assert_equal(invalid.force_encoding("ascii-8bit"), err.error_bytes)
  877. assert_equal(after.encode("euc-jp"), r.read)
  878. end)
  879. end
  880. def test_file_foreach
  881. with_tmpdir {
  882. generate_file('tst', 'a' * 8191 + "\xa1\xa1")
  883. assert_nothing_raised {
  884. File.foreach('tst', :encoding=>"euc-jp") {|line| line.inspect }
  885. }
  886. }
  887. end
  888. def test_set_encoding
  889. pipe("utf-8:euc-jp",
  890. proc do |w|
  891. s = "\u3042".force_encoding("ascii-8bit")
  892. s << "\x82\xa0".force_encoding("ascii-8bit")
  893. w << s
  894. w.close
  895. end,
  896. proc do |r|
  897. assert_equal("\xa4\xa2".force_encoding("euc-jp"), r.getc)
  898. r.set_encoding("shift_jis:euc-jp")
  899. assert_equal("\xa4\xa2".force_encoding("euc-jp"), r.getc)
  900. end)
  901. end
  902. def test_set_encoding2
  903. pipe("utf-8:euc-jp",
  904. proc do |w|
  905. s = "\u3042".force_encoding("ascii-8bit")
  906. s << "\x82\xa0".force_encoding("ascii-8bit")
  907. w << s
  908. w.close
  909. end,
  910. proc do |r|
  911. assert_equal("\xa4\xa2".force_encoding("euc-jp"), r.getc)
  912. r.set_encoding("shift_jis", "euc-jp")
  913. assert_equal("\xa4\xa2".force_encoding("euc-jp"), r.getc)
  914. end)
  915. end
  916. def test_set_encoding_nil
  917. pipe("utf-8:euc-jp",
  918. proc do |w|
  919. s = "\u3042".force_encoding("ascii-8bit")
  920. s << "\x82\xa0".force_encoding("ascii-8bit")
  921. w << s
  922. w.close
  923. end,
  924. proc do |r|
  925. assert_equal("\xa4\xa2".force_encoding("euc-jp"), r.getc)
  926. r.set_encoding(nil)
  927. assert_equal("\x82\xa0".force_encoding(Encoding.default_external), r.read)
  928. end)
  929. end
  930. def test_set_encoding_enc
  931. pipe("utf-8:euc-jp",
  932. proc do |w|
  933. s = "\u3042".force_encoding("ascii-8bit")
  934. s << "\x82\xa0".force_encoding("ascii-8bit")
  935. w << s
  936. w.close
  937. end,
  938. proc do |r|
  939. assert_equal("\xa4\xa2".force_encoding("euc-jp"), r.getc)
  940. r.set_encoding(Encoding::Shift_JIS)
  941. assert_equal("\x82\xa0".force_encoding(Encoding::Shift_JIS), r.getc)
  942. end)
  943. end
  944. def test_set_encoding_invalid
  945. pipe(proc do |w|
  946. w << "\x80"
  947. w.close
  948. end,
  949. proc do |r|
  950. r.set_encoding("utf-8:euc-jp", :invalid=>:replace)
  951. assert_equal("?", r.read)
  952. end)
  953. end
  954. def test_set_encoding_identical
  955. #bug5568 = '[ruby-core:40727]'
  956. bug6324 = '[ruby-core:44455]'
  957. open(__FILE__, "r") do |f|
  958. assert_warning('', bug6324) {
  959. f.set_encoding("eucjp:euc-jp")
  960. }
  961. assert_warning('', bug6324) {
  962. f.set_encoding("eucjp", "euc-jp")
  963. }
  964. assert_warning('', bug6324) {
  965. f.set_encoding(Encoding::EUC_JP, "euc-jp")
  966. }
  967. assert_warning('', bug6324) {
  968. f.set_encoding("eucjp", Encoding::EUC_JP)
  969. }
  970. assert_warning('', bug6324) {
  971. f.set_encoding(Encoding::EUC_JP, Encoding::EUC_JP)
  972. }
  973. nonstr = Object.new
  974. def nonstr.to_str; "eucjp"; end
  975. assert_warning('', bug6324) {
  976. f.set_encoding(nonstr, nonstr)
  977. }
  978. end
  979. end
  980. def test_set_encoding_undef
  981. pipe(proc do |w|
  982. w << "\ufffd"
  983. w.close
  984. end,
  985. proc do |r|
  986. r.set_encoding("utf-8", "euc-jp", :undef=>:replace)
  987. assert_equal("?", r.read)
  988. end)
  989. end
  990. def test_set_encoding_undef_replace
  991. pipe(proc do |w|
  992. w << "\ufffd"
  993. w.close
  994. end,
  995. proc do |r|
  996. r.set_encoding("utf-8", "euc-jp", :undef=>:replace, :replace=>"ZZZ")
  997. assert_equal("ZZZ", r.read)
  998. end)
  999. pipe(proc do |w|
  1000. w << "\ufffd"
  1001. w.close
  1002. end,
  1003. proc do |r|
  1004. r.set_encoding("utf-8:euc-jp", :undef=>:replace, :replace=>"ZZZ")
  1005. assert_equal("ZZZ", r.read)
  1006. end)
  1007. end
  1008. def test_set_encoding_binmode
  1009. assert_raise(ArgumentError) {
  1010. open(__FILE__, "rt") {|f|
  1011. f.set_encoding("iso-2022-jp")
  1012. }
  1013. }
  1014. assert_raise(ArgumentError) {
  1015. open(__FILE__, "r") {|f|
  1016. f.set_encoding("iso-2022-jp")
  1017. }
  1018. }
  1019. assert_nothing_raised {
  1020. open(__FILE__, "rb") {|f|
  1021. f.set_encoding("iso-2022-jp")
  1022. }
  1023. }
  1024. assert_nothing_raised {
  1025. open(__FILE__, "r") {|f|
  1026. f.binmode
  1027. f.set_encoding("iso-2022-jp")
  1028. }
  1029. }
  1030. assert_nothing_raised {
  1031. open(__FILE__, "rt") {|f|
  1032. f.binmode
  1033. f.set_encoding("iso-2022-jp")
  1034. }
  1035. }
  1036. assert_nothing_raised {
  1037. open(__FILE__, "r", binmode: true) {|f|
  1038. assert_equal(Encoding::ASCII_8BIT, f.external_encoding)
  1039. f.set_encoding("iso-2022-jp")
  1040. }
  1041. }
  1042. assert_raise(ArgumentError) {
  1043. open(__FILE__, "rb", binmode: true) {|f|
  1044. f.set_encoding("iso-2022-jp")
  1045. }
  1046. }
  1047. assert_raise(ArgumentError) {
  1048. open(__FILE__, "rb", binmode: false) {|f|
  1049. f.set_encoding("iso-2022-jp")
  1050. }
  1051. }
  1052. end
  1053. def test_set_encoding_unsupported
  1054. bug5567 = '[ruby-core:40726]'
  1055. IO.pipe do |r, w|
  1056. assert_nothing_raised(bug5567) do
  1057. assert_warning(/Unsupported/, bug5567) {r.set_encoding("fffffffffffxx")}
  1058. assert_warning(/Unsupported/, bug5567) {r.set_encoding("fffffffffffxx", "us-ascii")}
  1059. assert_warning(/Unsupported/, bug5567) {r.set_encoding("us-ascii", "fffffffffffxx")}
  1060. end
  1061. end
  1062. end
  1063. def test_textmode_twice
  1064. assert_raise(ArgumentError) {
  1065. open(__FILE__, "rt", textmode: true) {|f|
  1066. f.set_encoding("iso-2022-jp")
  1067. }
  1068. }
  1069. assert_raise(ArgumentError) {
  1070. open(__FILE__, "rt", textmode: false) {|f|
  1071. f.set_encoding("iso-2022-jp")
  1072. }
  1073. }
  1074. end
  1075. def test_write_conversion_fixenc
  1076. pipe(proc do |w|
  1077. w.set_encoding("iso-2022-jp:utf-8")
  1078. w << "\u3042"
  1079. w << "\u3044"
  1080. w.close
  1081. end,
  1082. proc do |r|
  1083. assert_equal("\e$B$\"$$\e(B".force_encoding("ascii-8bit"),
  1084. r.read.force_encoding("ascii-8bit"))
  1085. end)
  1086. end
  1087. def test_write_conversion_anyenc_stateful
  1088. pipe(proc do |w|
  1089. w.set_encoding("iso-2022-jp")
  1090. w << "\u3042"
  1091. w << "\x82\xa2".force_encoding("sjis")
  1092. w.close
  1093. end,
  1094. proc do |r|
  1095. assert_equal("\e$B$\"$$\e(B".force_encoding("ascii-8bit"),
  1096. r.read.force_encoding("ascii-8bit"))
  1097. end)
  1098. end
  1099. def test_write_conversion_anyenc_stateless
  1100. pipe(proc do |w|
  1101. w.set_encoding("euc-jp")
  1102. w << "\u3042"
  1103. w << "\x82\xa2".force_encoding("sjis")
  1104. w.close
  1105. end,
  1106. proc do |r|
  1107. assert_equal("\xa4\xa2\xa4\xa4".force_encoding("ascii-8bit"),
  1108. r.read.force_encoding("ascii-8bit"))
  1109. end)
  1110. end
  1111. def test_write_conversion_anyenc_stateful_nosync
  1112. pipe(proc do |w|
  1113. w.sync = false
  1114. w.set_encoding("iso-2022-jp")
  1115. w << "\u3042"
  1116. w << "\x82\xa2".force_encoding("sjis")
  1117. w.close
  1118. end,
  1119. proc do |r|
  1120. assert_equal("\e$B$\"$$\e(B".force_encoding("ascii-8bit"),
  1121. r.read.force_encoding("ascii-8bit"))
  1122. end)
  1123. end
  1124. def test_read_stateful
  1125. pipe("euc-jp:iso-2022-jp",
  1126. proc do |w|
  1127. w << "\xA4\xA2"
  1128. w.close
  1129. end,
  1130. proc do |r|
  1131. assert_equal("\e$B$\"\e(B".force_encoding("iso-2022-jp"), r.read)
  1132. end)
  1133. end
  1134. def test_stdin_external_encoding_with_reopen
  1135. with_tmpdir {
  1136. open("tst", "w+") {|f|
  1137. pid = spawn(EnvUtil.rubybin, '-e', <<-'End', 10=>f)
  1138. io = IO.new(10, "r+")
  1139. STDIN.reopen(io)
  1140. STDIN.external_encoding
  1141. STDIN.write "\u3042"
  1142. STDIN.flush
  1143. End
  1144. Process.wait pid
  1145. f.rewind
  1146. result = f.read.force_encoding("ascii-8bit")
  1147. assert_equal("\u3042".force_encoding("ascii-8bit"), result)
  1148. }
  1149. }
  1150. end unless /mswin|mingw/ =~ RUBY_PLATFORM # passing non-stdio fds is not supported
  1151. def test_popen_r_enc
  1152. IO.popen("#{EnvUtil.rubybin} -e 'putc 255'", "r:ascii-8bit") {|f|
  1153. assert_equal(Encoding::ASCII_8BIT, f.external_encoding)
  1154. assert_equal(nil, f.internal_encoding)
  1155. s = f.read
  1156. assert_equal(Encoding::ASCII_8BIT, s.encoding)
  1157. assert_equal("\xff".force_encoding("ascii-8bit"), s)
  1158. }
  1159. end
  1160. def test_popen_r_enc_in_opt
  1161. IO.popen("#{EnvUtil.rubybin} -e 'putc 255'", "r", encoding: "ascii-8bit") {|f|
  1162. assert_equal(Encoding::ASCII_8BIT, f.external_encoding)
  1163. assert_equal(nil, f.internal_encoding)
  1164. s = f.read
  1165. assert_equal(Encoding::ASCII_8BIT, s.encoding)
  1166. assert_equal("\xff".force_encoding("ascii-8bit"), s)
  1167. }
  1168. end
  1169. def test_popen_r_enc_in_opt2
  1170. IO.popen("#{EnvUtil.rubybin} -e 'putc 255'", "r", external_encoding: "ascii-8bit") {|f|
  1171. assert_equal(Encoding::ASCII_8BIT, f.external_encoding)
  1172. assert_equal(nil, f.internal_encoding)
  1173. s = f.read
  1174. assert_equal(Encoding::ASCII_8BIT, s.encoding)
  1175. assert_equal("\xff".force_encoding("ascii-8bit"), s)
  1176. }
  1177. end
  1178. def test_popen_r_enc_enc
  1179. IO.popen("#{EnvUtil.rubybin} -e 'putc 0xa1'", "r:shift_jis:euc-jp") {|f|
  1180. assert_equal(Encoding::Shift_JIS, f.external_encoding)
  1181. assert_equal(Encoding::EUC_JP, f.internal_encoding)
  1182. s = f.read
  1183. assert_equal(Encoding::EUC_JP, s.encoding)
  1184. assert_equal("\x8e\xa1".force_encoding("euc-jp"), s)
  1185. }
  1186. end
  1187. def test_popen_r_enc_enc_in_opt
  1188. IO.popen("#{EnvUtil.rubybin} -e 'putc 0xa1'", "r", encoding: "shift_jis:euc-jp") {|f|
  1189. assert_equal(Encoding::Shift_JIS, f.external_encoding)
  1190. assert_equal(Encoding::EUC_JP, f.internal_encoding)
  1191. s = f.read
  1192. assert_equal(Encoding::EUC_JP, s.encoding)
  1193. assert_equal("\x8e\xa1".force_encoding("euc-jp"), s)
  1194. }
  1195. end
  1196. def test_popen_r_enc_enc_in_opt2
  1197. IO.popen("#{EnvUtil.rubybin} -e 'putc 0xa1'", "r", external_encoding: "shift_jis", internal_encoding: "euc-jp") {|f|
  1198. assert_equal(Encoding::Shift_JIS, f.external_encoding)
  1199. assert_equal(Encoding::EUC_JP, f.internal_encoding)
  1200. s = f.read
  1201. assert_equal(Encoding::EUC_JP, s.encoding)
  1202. assert_equal("\x8e\xa1".force_encoding("euc-jp"), s)
  1203. }
  1204. end
  1205. def test_popenv_r_enc_enc_in_opt2
  1206. IO.popen([EnvUtil.rubybin, "-e", "putc 0xa1"], "r", external_encoding: "shift_jis", internal_encoding: "euc-jp") {|f|
  1207. assert_equal(Encoding::Shift_JIS, f.external_encoding)
  1208. assert_equal(Encoding::EUC_JP, f.internal_encoding)
  1209. s = f.read
  1210. assert_equal(Encoding::EUC_JP, s.encoding)
  1211. assert_equal("\x8e\xa1".force_encoding("euc-jp"), s)
  1212. }
  1213. end
  1214. def test_open_pipe_r_enc
  1215. open("|#{EnvUtil.rubybin} -e 'putc 255'", "r:ascii-8bit") {|f|
  1216. assert_equal(Encoding::ASCII_8BIT, f.external_encoding)
  1217. assert_equal(nil, f.internal_encoding)
  1218. s = f.read
  1219. assert_equal(Encoding::ASCII_8BIT, s.encoding)
  1220. assert_equal("\xff".force_encoding("ascii-8bit"), s)
  1221. }
  1222. end
  1223. def test_open_pipe_r_enc2
  1224. open("|#{EnvUtil.rubybin} -e 'putc \"\\u3042\"'", "r:UTF-8") {|f|
  1225. assert_equal(Encoding::UTF_8, f.external_encoding)
  1226. assert_equal(nil, f.internal_encoding)
  1227. s = f.read
  1228. assert_equal(Encoding::UTF_8, s.encoding)
  1229. assert_equal("\u3042", s)
  1230. }
  1231. end
  1232. def test_s_foreach_enc
  1233. with_tmpdir {
  1234. generate_file("t", "\xff")
  1235. IO.foreach("t", :mode => "r:ascii-8bit") {|s|
  1236. assert_equal(Encoding::ASCII_8BIT, s.encoding)
  1237. assert_equal("\xff".force_encoding("ascii-8bit"), s)
  1238. }
  1239. }
  1240. end
  1241. def test_s_foreach_enc_in_opt
  1242. with_tmpdir {
  1243. generate_file("t", "\xff")
  1244. IO.foreach("t", :encoding => "ascii-8bit") {|s|
  1245. assert_equal(Encoding::ASCII_8BIT, s.encoding)
  1246. assert_equal("\xff".force_encoding("ascii-8bit"), s)
  1247. }
  1248. }
  1249. end
  1250. def test_s_foreach_enc_in_opt2
  1251. with_tmpdir {
  1252. generate_file("t", "\xff")
  1253. IO.foreach("t", :external_encoding => "ascii-8bit") {|s|
  1254. assert_equal(Encoding::ASCII_8BIT, s.encoding)
  1255. assert_equal("\xff".force_encoding("ascii-8bit"), s)
  1256. }
  1257. }
  1258. end
  1259. def test_s_foreach_enc_enc
  1260. with_tmpdir {
  1261. generate_file("t", "\u3042")
  1262. IO.foreach("t", :mode => "r:utf-8:euc-jp") {|s|
  1263. assert_equal(Encoding::EUC_JP, s.encoding)
  1264. assert_equal("\xa4\xa2".force_encoding("euc-jp"), s)
  1265. }
  1266. }
  1267. end
  1268. def test_s_foreach_enc_enc_in_opt
  1269. with_tmpdir {
  1270. generate_file("t", "\u3042")
  1271. IO.foreach("t", :mode => "r", :encoding => "utf-8:euc-jp") {|s|
  1272. assert_equal(Encoding::EUC_JP, s.encoding)
  1273. assert_equal("\xa4\xa2".force_encoding("euc-jp"), s)
  1274. }
  1275. }
  1276. end
  1277. def test_s_foreach_enc_enc_in_opt2
  1278. with_tmpdir {
  1279. generate_file("t", "\u3042")
  1280. IO.foreach("t", :mode => "r", :external_encoding => "utf-8", :internal_encoding => "euc-jp") {|s|
  1281. assert_equal(Encoding::EUC_JP, s.encoding)
  1282. assert_equal("\xa4\xa2".force_encoding("euc-jp"), s)
  1283. }
  1284. }
  1285. end
  1286. def test_s_foreach_open_args_enc
  1287. with_tmpdir {
  1288. generate_file("t", "\xff")
  1289. IO.foreach("t", :open_args => ["r:ascii-8bit"]) {|s|
  1290. assert_equal(Encoding::ASCII_8BIT, s.encoding)
  1291. assert_equal("\xff".force_encoding("ascii-8bit"), s)
  1292. }
  1293. }
  1294. end
  1295. def test_s_foreach_open_args_enc_in_opt
  1296. with_tmpdir {
  1297. generate_file("t", "\xff")
  1298. IO.foreach("t", :open_args => ["r", encoding: "ascii-8bit"]) {|s|
  1299. assert_equal(Encoding::ASCII_8BIT, s.encoding)
  1300. assert_equal("\xff".force_encoding("ascii-8bit"), s)
  1301. }
  1302. }
  1303. end
  1304. def test_s_foreach_open_args_enc_in_opt2
  1305. with_tmpdir {
  1306. generate_file("t", "\xff")
  1307. IO.foreach("t", :open_args => ["r", external_encoding: "ascii-8bit"]) {|s|
  1308. assert_equal(Encoding::ASCII_8BIT, s.encoding)
  1309. assert_equal("\xff".force_encoding("ascii-8bit"), s)
  1310. }
  1311. }
  1312. end
  1313. def test_s_foreach_open_args_enc_enc
  1314. with_tmpdir {
  1315. generate_file("t", "\u3042")
  1316. IO.foreach("t", :open_args => ["r:utf-8:euc-jp"]) {|s|
  1317. assert_equal(Encoding::EUC_JP, s.encoding)
  1318. assert_equal("\xa4\xa2".force_encoding("euc-jp"), s)
  1319. }
  1320. }
  1321. end
  1322. def test_s_foreach_open_args_enc_enc_in_opt
  1323. with_tmpdir {
  1324. generate_file("t", "\u3042")
  1325. IO.foreach("t", :open_args => ["r", encoding: "utf-8:euc-jp"]) {|s|
  1326. assert_equal(Encoding::EUC_JP, s.encoding)
  1327. assert_equal("\xa4\xa2".force_encoding("euc-jp"), s)
  1328. }
  1329. }
  1330. end
  1331. def test_s_foreach_open_args_enc_enc_in_opt2
  1332. with_tmpdir {
  1333. generate_file("t", "\u3042")
  1334. IO.foreach("t", :open_args => ["r", external_encoding: "utf-8", internal_encoding: "euc-jp"]) {|s|
  1335. assert_equal(Encoding::EUC_JP, s.encoding)
  1336. assert_equal("\xa4\xa2".force_encoding("euc-jp"), s)
  1337. }
  1338. }
  1339. end
  1340. def test_both_textmode_binmode
  1341. bug5918 = '[ruby-core:42199]'
  1342. assert_raise(ArgumentError, bug5918) { open("not-exist", "r", :textmode=>true, :binmode=>true) }
  1343. assert_raise(ArgumentError, bug5918) { open("not-exist", "rt", :binmode=>true) }
  1344. assert_raise(ArgumentError, bug5918) { open("not-exist", "rt", :binmode=>false) }
  1345. assert_raise(ArgumentError, bug5918) { open("not-exist", "rb", :textmode=>true) }
  1346. assert_raise(ArgumentError, bug5918) { open("not-exist", "rb", :textmode=>false) }
  1347. end
  1348. def test_textmode_decode_universal_newline_read
  1349. with_tmpdir {
  1350. generate_file("t.crlf", "a\r\nb\r\nc\r\n")
  1351. assert_equal("a\nb\nc\n", File.read("t.crlf", mode:"rt:euc-jp:utf-8"))
  1352. assert_equal("a\nb\nc\n", File.read("t.crlf", mode:"rt"))
  1353. open("t.crlf", "rt:euc-jp:utf-8") {|f| assert_equal("a\nb\nc\n", f.read) }
  1354. open("t.crlf", "rt") {|f| assert_equal("a\nb\nc\n", f.read) }
  1355. open("t.crlf", "r", :textmode=>true) {|f| assert_equal("a\nb\nc\n", f.read) }
  1356. open("t.crlf", "r", textmode: true, universal_newline: false) {|f|
  1357. assert_equal("a\r\nb\r\nc\r\n", f.read)
  1358. }
  1359. generate_file("t.cr", "a\rb\rc\r")
  1360. assert_equal("a\nb\nc\n", File.read("t.cr", mode:"rt:euc-jp:utf-8"))
  1361. assert_equal("a\nb\nc\n", File.read("t.cr", mode:"rt"))
  1362. generate_file("t.lf", "a\nb\nc\n")
  1363. assert_equal("a\nb\nc\n", File.read("t.cr", mode:"rt:euc-jp:utf-8"))
  1364. assert_equal("a\nb\nc\n", File.read("t.cr", mode:"rt"))
  1365. }
  1366. end
  1367. def test_textmode_decode_universal_newline_getc
  1368. with_tmpdir {
  1369. generate_file("t.crlf", "a\r\nb\r\nc\r\n")
  1370. open("t.crlf", "rt") {|f|
  1371. assert_equal("a", f.getc)
  1372. assert_equal("\n", f.getc)
  1373. assert_equal("b", f.getc)
  1374. assert_equal("\n", f.getc)
  1375. assert_equal("c", f.getc)
  1376. assert_equal("\n", f.getc)
  1377. assert_equal(nil, f.getc)
  1378. }
  1379. generate_file("t.cr", "a\rb\rc\r")
  1380. open("t.cr", "rt") {|f|
  1381. assert_equal("a", f.getc)
  1382. assert_equal("\n", f.getc)
  1383. assert_equal("b", f.getc)
  1384. assert_equal("\n", f.getc)
  1385. assert_equal("c", f.getc)
  1386. assert_equal("\n", f.getc)
  1387. assert_equal(nil, f.getc)
  1388. }
  1389. generate_file("t.lf", "a\nb\nc\n")
  1390. open("t.lf", "rt") {|f|
  1391. assert_equal("a", f.getc)
  1392. assert_equal("\n", f.getc)
  1393. assert_equal("b", f.getc)
  1394. assert_equal("\n", f.getc)
  1395. assert_equal("c", f.getc)
  1396. assert_equal("\n", f.getc)
  1397. assert_equal(nil, f.getc)
  1398. }
  1399. }
  1400. end
  1401. def test_textmode_decode_universal_newline_gets
  1402. with_tmpdir {
  1403. generate_file("t.crlf", "a\r\nb\r\nc\r\n")
  1404. open("t.crlf", "rt") {|f|
  1405. assert_equal("a\n", f.gets)
  1406. assert_equal("b\n", f.gets)
  1407. assert_equal("c\n", f.gets)
  1408. assert_equal(nil, f.gets)
  1409. }
  1410. generate_file("t.cr", "a\rb\rc\r")
  1411. open("t.cr", "rt") {|f|
  1412. assert_equal("a\n", f.gets)
  1413. assert_equal("b\n", f.gets)
  1414. assert_equal("c\n", f.gets)
  1415. assert_equal(nil, f.gets)
  1416. }
  1417. generate_file("t.lf", "a\nb\nc\n")
  1418. open("t.lf", "rt") {|f|
  1419. assert_equal("a\n", f.gets)
  1420. assert_equal("b\n", f.gets)
  1421. assert_equal("c\n", f.gets)
  1422. assert_equal(nil, f.gets)
  1423. }
  1424. }
  1425. end
  1426. def test_textmode_decode_universal_newline_utf16
  1427. with_tmpdir {
  1428. generate_file("t.utf16be.crlf", "\0a\0\r\0\n\0b\0\r\0\n\0c\0\r\0\n")
  1429. assert_equal("a\nb\nc\n", File.read("t.utf16be.crlf", mode:"rt:utf-16be:utf-8"))
  1430. generate_file("t.utf16le.crlf", "a\0\r\0\n\0b\0\r\0\n\0c\0\r\0\n\0")
  1431. assert_equal("a\nb\nc\n", File.read("t.utf16le.crlf", mode:"rt:utf-16le:utf-8"))
  1432. generate_file("t.utf16be.cr", "\0a\0\r\0b\0\r\0c\0\r")
  1433. assert_equal("a\nb\nc\n", File.read("t.utf16be.cr", mode:"rt:utf-16be:utf-8"))
  1434. generate_file("t.utf16le.cr", "a\0\r\0b\0\r\0c\0\r\0")
  1435. assert_equal("a\nb\nc\n", File.read("t.utf16le.cr", mode:"rt:utf-16le:utf-8"))
  1436. generate_file("t.utf16be.lf", "\0a\0\n\0b\0\n\0c\0\n")
  1437. assert_equal("a\nb\nc\n", File.read("t.utf16be.lf", mode:"rt:utf-16be:utf-8"))
  1438. generate_file("t.utf16le.lf", "a\0\n\0b\0\n\0c\0\n\0")
  1439. assert_equal("a\nb\nc\n", File.read("t.utf16le.lf", mode:"rt:utf-16le:utf-8"))
  1440. }
  1441. end
  1442. SYSTEM_NEWLINE = []
  1443. def system_newline
  1444. return SYSTEM_NEWLINE.first if !SYSTEM_NEWLINE.empty?
  1445. with_tmpdir {
  1446. open("newline", "wt") {|f|
  1447. f.print "\n"
  1448. }
  1449. open("newline", "rb") {|f|
  1450. SYSTEM_NEWLINE << f.read
  1451. }
  1452. }
  1453. SYSTEM_NEWLINE.first
  1454. end
  1455. def test_textmode_encode_newline
  1456. with_tmpdir {
  1457. open("t.txt", "wt") {|f|
  1458. f.puts "abc"
  1459. f.puts "def"
  1460. }
  1461. content = File.read("t.txt", :mode=>"rb")
  1462. nl = system_newline
  1463. assert_equal("abc#{nl}def#{nl}", content)
  1464. }
  1465. end
  1466. def test_textmode_encode_newline_enc
  1467. with_tmpdir {
  1468. open("t.txt", "wt:euc-jp") {|f|
  1469. f.puts "abc\u3042"
  1470. f.puts "def\u3044"
  1471. }
  1472. content = File.read("t.txt", :mode=>"rb:ascii-8bit")
  1473. nl = system_newline
  1474. assert_equal("abc\xA4\xA2#{nl}def\xA4\xA4#{nl}", content)
  1475. }
  1476. end
  1477. def test_binmode_decode_universal_newline
  1478. with_tmpdir {
  1479. generate_file("t.txt", "a\n")
  1480. assert_raise(ArgumentError) {
  1481. open("t.txt", "rb", newline: :universal) {}
  1482. }
  1483. }
  1484. end
  1485. def test_default_mode_decode_universal_newline_gets
  1486. with_tmpdir {
  1487. generate_file("t.crlf", "a\r\nb\r\nc\r\n")
  1488. open("t.crlf", "r", newline: :universal) {|f|
  1489. assert_equal("a\n", f.gets)
  1490. assert_equal("b\n", f.gets)
  1491. assert_equal("c\n", f.gets)
  1492. assert_equal(nil, f.gets)
  1493. }
  1494. generate_file("t.cr", "a\rb\rc\r")
  1495. open("t.cr", "r", newline: :universal) {|f|
  1496. assert_equal("a\n", f.gets)
  1497. assert_equal("b\n", f.gets)
  1498. assert_equal("c\n", f.gets)
  1499. assert_equal(nil, f.gets)
  1500. }
  1501. generate_file("t.lf", "a\nb\nc\n")
  1502. open("t.lf", "r", newline: :universal) {|f|
  1503. assert_equal("a\n", f.gets)
  1504. assert_equal("b\n", f.gets)
  1505. assert_equal("c\n", f.gets)
  1506. assert_equal(nil, f.gets)
  1507. }
  1508. }
  1509. end
  1510. def test_read_newline_conversion_with_encoding_conversion
  1511. with_tmpdir {
  1512. generate_file("t.utf8.crlf", "a\r\nb\r\n")
  1513. open("t.utf8.crlf", "rb:utf-8:utf-16be") {|f|
  1514. content = f.read
  1515. assert_equal("\0a\0\r\0\n\0b\0\r\0\n".force_encoding("UTF-16BE"), content)
  1516. }
  1517. open("t.utf8.crlf", "rt:utf-8:utf-16be") {|f|
  1518. content = f.read
  1519. assert_equal("\0a\0\n\0b\0\n".force_encoding("UTF-16BE"), content)
  1520. }
  1521. open("t.utf8.crlf", "r:utf-8:utf-16be") {|f|
  1522. content = f.read
  1523. if system_newline == "\n"
  1524. assert_equal("\0a\0\r\0\n\0b\0\r\0\n".force_encoding("UTF-16BE"), content)
  1525. else
  1526. assert_equal("\0a\0\n\0b\0\n".force_encoding("UTF-16BE"), content)
  1527. end
  1528. }
  1529. }
  1530. end
  1531. def test_read_newline_conversion_without_encoding_conversion
  1532. with_tmpdir {
  1533. generate_file("t.utf16.crlf", "\0a\0\r\0\n\0b\0\r\0\n")
  1534. open("t.utf16.crlf", "rb:utf-16be") {|f|
  1535. content = f.read
  1536. assert_equal("\0a\0\r\0\n\0b\0\r\0\n".force_encoding("UTF-16BE"),
  1537. content)
  1538. }
  1539. }
  1540. end
  1541. def test_read_newline_conversion_error
  1542. with_tmpdir {
  1543. generate_file("empty.txt", "")
  1544. # ascii incompatible encoding without conversion needs binmode.
  1545. assert_raise(ArgumentError) {
  1546. open("empty.txt", "rt:utf-16be") {|f| }
  1547. }
  1548. assert_raise(ArgumentError) {
  1549. open("empty.txt", "r:utf-16be") {|f| }
  1550. }
  1551. }
  1552. end
  1553. def test_read_mode
  1554. with_tmpdir {
  1555. generate_file("t", "a\rb\r\nc\n\xc2\xa2")
  1556. generate_file("ie", "a\rb\r\nc\n\e$B\x42\x22\e(B")
  1557. generate_file("iu", "a\rb\r\nc\n\e$B\x21\x71\e(B")
  1558. generate_file("be", "\0a\0\r\0b\0\r\0\n\0c\0\n\x85\x35")
  1559. generate_file("bu", "\0a\0\r\0b\0\r\0\n\0c\0\n\0\xa2")
  1560. # "\xc2\xa2" is valid as EUC-JP and UTF-8
  1561. # EUC-JP UTF-8 Unicode
  1562. # 0xC2A2 0xE894B5 U+8535
  1563. # 0xA1F1 0xC2A2 U+00A2
  1564. open("t","rt") {|f| assert_equal("a\nb\nc\n\xc2\xa2".force_encoding(Encoding.default_external), f.read) }
  1565. open("t","rb") {|f| assert_equal("a\rb\r\nc\n\xc2\xa2".force_encoding(Encoding::ASCII_8BIT), f.read) }
  1566. open("t","rt:euc-jp") {|f| assert_equal("a\nb\nc\n\xc2\xa2".force_encoding("EUC-JP"), f.read) }
  1567. open("t","rb:euc-jp") {|f| assert_equal("a\rb\r\nc\n\xc2\xa2".force_encoding("EUC-JP"), f.read) }
  1568. open("t","rt:utf-8") {|f| assert_equal("a\nb\nc\n\xc2\xa2".force_encoding("UTF-8"), f.read) }
  1569. open("t","rb:utf-8") {|f| assert_equal("a\rb\r\nc\n\xc2\xa2".force_encoding("UTF-8"), f.read) }
  1570. assert_raise(ArgumentError) { open("t", "rt:iso-2022-jp") {|f| } }
  1571. open("t","rb:iso-2022-jp") {|f| assert_equal("a\rb\r\nc\n\xc2\xa2".force_encoding("ISO-2022-JP"), f.read) }
  1572. open("t","rt:euc-jp:utf-8") {|f| assert_equal("a\nb\nc\n\u8535", f.read) }
  1573. open("t","rt:utf-8:euc-jp") {|f| assert_equal("a\nb\nc\n\xa1\xf1".force_encoding("EUC-JP"), f.read) }
  1574. open("t","rb:euc-jp:utf-8") {|f| assert_equal("a\rb\r\nc\n\u8535", f.read) }
  1575. open("t","rb:utf-8:euc-jp") {|f| assert_equal("a\rb\r\nc\n\xa1\xf1".force_encoding("EUC-JP"), f.read) }
  1576. open("t","rt:euc-jp:iso-2022-jp"){|f| assert_equal("a\nb\nc\n\e$B\x42\x22\e(B".force_encoding("ISO-2022-JP"), f.read) }
  1577. open("t","rt:utf-8:iso-2022-jp"){|f| assert_equal("a\nb\nc\n\e$B\x21\x71\e(B".force_encoding("ISO-2022-JP"), f.read) }
  1578. open("t","rt:euc-jp:utf-16be"){|f| assert_equal("\0a\0\n\0b\0\n\0c\0\n\x85\x35".force_encoding("UTF-16BE"), f.read) }
  1579. open("t","rt:utf-8:utf-16be"){|f| assert_equal("\0a\0\n\0b\0\n\0c\0\n\0\xa2".force_encoding("UTF-16BE"), f.read) }
  1580. open("t","rb:euc-jp:iso-2022-jp"){|f|assert_equal("a\rb\r\nc\n\e$B\x42\x22\e(B".force_encoding("ISO-2022-JP"),f.read)}
  1581. open("t","rb:utf-8:iso-2022-jp"){|f|assert_equal("a\rb\r\nc\n\e$B\x21\x71\e(B".force_encoding("ISO-2022-JP"),f.read)}
  1582. open("t","rb:euc-jp:utf-16be"){|f|assert_equal("\0a\0\r\0b\0\r\0\n\0c\0\n\x85\x35".force_encoding("UTF-16BE"),f.read)}
  1583. open("t","rb:utf-8:utf-16be"){|f|assert_equal("\0a\0\r\0b\0\r\0\n\0c\0\n\0\xa2".force_encoding("UTF-16BE"),f.read)}
  1584. open("ie","rt:iso-2022-jp:euc-jp"){|f| assert_equal("a\nb\nc\n\xc2\xa2".force_encoding("EUC-JP"), f.read) }
  1585. open("iu","rt:iso-2022-jp:utf-8"){|f| assert_equal("a\nb\nc\n\xc2\xa2".force_encoding("UTF-8"), f.read) }
  1586. open("be","rt:utf-16be:euc-jp"){|f| assert_equal("a\nb\nc\n\xc2\xa2".force_encoding("EUC-JP"), f.read) }
  1587. open("bu","rt:utf-16be:utf-8"){|f| assert_equal("a\nb\nc\n\xc2\xa2".force_encoding("UTF-8"), f.read) }
  1588. open("ie","rb:iso-2022-jp:euc-jp"){|f|assert_equal("a\rb\r\nc\n\xc2\xa2".force_encoding("EUC-JP"),f.read)}
  1589. open("iu","rb:iso-2022-jp:utf-8"){|f|assert_equal("a\rb\r\nc\n\xc2\xa2".force_encoding("UTF-8"),f.read)}
  1590. open("be","rb:utf-16be:euc-jp"){|f|assert_equal("a\rb\r\nc\n\xc2\xa2".force_encoding("EUC-JP"),f.read)}
  1591. open("bu","rb:utf-16be:utf-8"){|f|assert_equal("a\rb\r\nc\n\xc2\xa2".force_encoding("UTF-8"),f.read)}
  1592. open("ie","rt:iso-2022-jp:utf-16be"){|f|assert_equal("\0a\0\n\0b\0\n\0c\0\n\x85\x35".force_encoding("UTF-16BE"),f.read)}
  1593. open("be","rt:utf-16be:iso-2022-jp"){|f|assert_equal("a\nb\nc\n\e$B\x42\x22\e(B".force_encoding("ISO-2022-JP"),f.read)}
  1594. open("ie","rb:iso-2022-jp:utf-16be"){|f|assert_equal("\0a\0\r\0b\0\r\0\n\0c\0\n\x85\x35".force_encoding("UTF-16BE"),f.read)}
  1595. open("be","rb:utf-16be:iso-2022-jp"){|f|assert_equal("a\rb\r\nc\n\e$B\x42\x22\e(B".force_encoding("ISO-2022-JP"),f.read)}
  1596. }
  1597. end
  1598. def assert_write(expected, mode, *args)
  1599. with_tmpdir {
  1600. open("t", mode) {|f|
  1601. args.each {|arg| f.print arg }
  1602. }
  1603. content = File.read("t", :mode=>"rb:ascii-8bit")
  1604. assert_equal(expected.b, content.b)
  1605. }
  1606. end
  1607. def test_write_mode
  1608. # "\xc2\xa2" is valid as EUC-JP and UTF-8
  1609. # EUC-JP UTF-8 Unicode
  1610. # 0xC2A2 0xE894B5 U+8535
  1611. # 0xA1F1 0xC2A2 U+00A2
  1612. a = "a\rb\r\nc\n"
  1613. e = "\xc2\xa2".force_encoding("euc-jp")
  1614. u8 = "\xc2\xa2".force_encoding("utf-8")
  1615. u16 = "\x85\x35\0\r\x00\xa2\0\r\0\n\0\n".force_encoding("utf-16be")
  1616. i = "\e$B\x42\x22\e(B\r\e$B\x21\x71\e(B\r\n\n".force_encoding("iso-2022-jp")
  1617. n = system_newline
  1618. n.encode("utf-16be").force_encoding("ascii-8bit")
  1619. assert_write("a\rb\r#{n}c#{n}", "wt", a)
  1620. assert_write("\xc2\xa2", "wt", e)
  1621. assert_write("\xc2\xa2", "wt", u8)
  1622. assert_write("a\rb\r\nc\n", "wb", a)
  1623. assert_write("\xc2\xa2", "wb", e)
  1624. assert_write("\xc2\xa2", "wb", u8)
  1625. #assert_write("\x85\x35\0\r\x00\xa2\0\r\0\n\0\n", "wt", u16) should raise
  1626. #assert_write("\e$B\x42\x22\e(B\r\e$B\x21\x71\e(B\r\n\n", "wt", i) should raise
  1627. assert_write("\x85\x35\0\r\x00\xa2\0\r\0\n\0\n", "wb", u16)
  1628. assert_write("\e$B\x42\x22\e(B\r\e$B\x21\x71\e(B\r\n\n", "wb", i)
  1629. t_write_mode_enc
  1630. t_write_mode_enc(":utf-8")
  1631. end
  1632. def t_write_mode_enc(enc="")
  1633. # "\xc2\xa2" is valid as EUC-JP and UTF-8
  1634. # EUC-JP UTF-8 Unicode
  1635. # 0xC2A2 0xE894B5 U+8535
  1636. # 0xA1F1 0xC2A2 U+00A2
  1637. a = "a\rb\r\nc\n"
  1638. e = "\xc2\xa2".force_encoding("euc-jp")
  1639. u8 = "\xc2\xa2".force_encoding("utf-8")
  1640. u16 = "\x85\x35\0\r\x00\xa2\0\r\0\n\0\n".force_encoding("utf-16be")
  1641. i = "\e$B\x42\x22\e(B\r\e$B\x21\x71\e(B\r\n\n".force_encoding("iso-2022-jp")
  1642. n = system_newline
  1643. un = n.encode("utf-16be").force_encoding("ascii-8bit")
  1644. assert_write("a\rb\r#{n}c#{n}", "wt:euc-jp#{enc}", a)
  1645. assert_write("\xc2\xa2", "wt:euc-jp#{enc}", e)
  1646. assert_write("\xa1\xf1", "wt:euc-jp#{enc}", u8)
  1647. assert_write("a\rb\r\nc\n", "wb:euc-jp#{enc}", a)
  1648. assert_write("\xc2\xa2", "wb:euc-jp#{enc}", e)
  1649. assert_write("\xa1\xf1", "wb:euc-jp#{enc}", u8)
  1650. assert_write("\xc2\xa2\r\xa1\xf1\r#{n}#{n}", "wt:euc-jp#{enc}", u16)
  1651. assert_write("\xc2\xa2\r\xa1\xf1\r#{n}#{n}", "wt:euc-jp#{enc}", i)
  1652. assert_write("\xc2\xa2\r\xa1\xf1\r\n\n", "wb:euc-jp#{enc}", u16)
  1653. assert_write("\xc2\xa2\r\xa1\xf1\r\n\n", "wb:euc-jp#{enc}", i)
  1654. assert_write("\0a\0\r\0b\0\r#{un}\0c#{un}", "wt:utf-16be#{enc}", a)
  1655. assert_write("\x85\x35", "wt:utf-16be#{enc}", e)
  1656. assert_write("\x00\xa2", "wt:utf-16be#{enc}", u8)
  1657. assert_write("a\rb\r#{n}c#{n}", "wt:iso-2022-jp#{enc}", a)
  1658. assert_write("\e$B\x42\x22\e(B", "wt:iso-2022-jp#{enc}", e)
  1659. assert_write("\e$B\x21\x71\e(B", "wt:iso-2022-jp#{enc}", u8)
  1660. assert_write("\0a\0\r\0b\0\r\0\n\0c\0\n", "wb:utf-16be#{enc}", a)
  1661. assert_write("\x85\x35", "wb:utf-16be#{enc}", e)
  1662. assert_write("\x00\xa2", "wb:utf-16be#{enc}", u8)
  1663. assert_write("a\rb\r\nc\n", "wb:iso-2022-jp#{enc}", a)
  1664. assert_write("\e$B\x42\x22\e(B", "wb:iso-2022-jp#{enc}", e)
  1665. assert_write("\e$B\x21\x71\e(B", "wb:iso-2022-jp#{enc}", u8)
  1666. assert_write("\x85\x35\0\r\x00\xa2\0\r#{un}#{un}", "wt:utf-16be#{enc}", u16)
  1667. assert_write("\x85\x35\0\r\x00\xa2\0\r#{un}#{un}", "wt:utf-16be#{enc}", i)
  1668. assert_write("\x85\x35\0\r\x00\xa2\0\r\0\n\0\n", "wb:utf-16be#{enc}", u16)
  1669. assert_write("\x85\x35\0\r\x00\xa2\0\r\0\n\0\n", "wb:utf-16be#{enc}", i)
  1670. assert_write("\e$B\x42\x22\e(B\r\e$B\x21\x71\e(B\r#{n}#{n}", "wt:iso-2022-jp#{enc}", u16)
  1671. assert_write("\e$B\x42\x22\e(B\r\e$B\x21\x71\e(B\r#{n}#{n}", "wt:iso-2022-jp#{enc}", i)
  1672. assert_write("\e$B\x42\x22\e(B\r\e$B\x21\x71\e(B\r\n\n", "wb:iso-2022-jp#{enc}", u16)
  1673. assert_write("\e$B\x42\x22\e(B\r\e$B\x21\x71\e(B\r\n\n", "wb:iso-2022-jp#{enc}", i)
  1674. end
  1675. def test_write_mode_fail
  1676. return if system_newline == "\n"
  1677. with_tmpdir {
  1678. open("t", "wt") {|f|
  1679. assert_raise(ArgumentError) { f.print "\0\r\0\r\0\n\0\n".force_encoding("utf-16be") }
  1680. }
  1681. }
  1682. end
  1683. def test_write_ascii_incompat
  1684. with_tmpdir {
  1685. open("t.utf8", "wb:utf-8:utf-16be") {|f| }
  1686. open("t.utf8", "wt:utf-8:utf-16be") {|f| }
  1687. open("t.utf8", "w:utf-8:utf-16be") {|f| }
  1688. open("t.utf16", "wb:utf-16be") {|f| }
  1689. open("t.utf16", "wt:utf-16be") {|f| }
  1690. open("t.utf16", "w:utf-16be") {|f| }
  1691. }
  1692. end
  1693. def test_binmode_write_ascii_incompat_internal
  1694. with_tmpdir {
  1695. open("t.utf8.lf", "wb:utf-8:utf-16be") {|f|
  1696. f.print "\0a\0\n\0b\0\n".force_encoding("UTF-16BE")
  1697. }
  1698. content = File.read("t.utf8.lf", :mode=>"rb:ascii-8bit")
  1699. assert_equal("a\nb\n", content)
  1700. open("t.utf8.lf", "wb:utf-16be") {|f|
  1701. f.print "\0a\0\n\0b\0\n".force_encoding("UTF-16BE")
  1702. }
  1703. content = File.read("t.utf8.lf", :mode=>"rb:ascii-8bit")
  1704. assert_equal("\0a\0\n\0b\0\n", content)
  1705. }
  1706. end
  1707. def test_binary
  1708. with_tmpdir {
  1709. src = "a\nb\rc\r\nd\n"
  1710. generate_file("t.txt", src)
  1711. open("t.txt", "rb") {|f|
  1712. assert_equal(src, f.read)
  1713. }
  1714. open("t.txt", "r", :binmode=>true) {|f|
  1715. assert_equal(src, f.read)
  1716. }
  1717. if system_newline == "\n"
  1718. open("t.txt", "r") {|f|
  1719. assert_equal(src, f.read)
  1720. }
  1721. end
  1722. }
  1723. end
  1724. def test_binmode
  1725. with_tmpdir {
  1726. src = "a\r\nb\r\nc\r\n"
  1727. generate_file("t.txt", src)
  1728. open("t.txt", "rt") {|f|
  1729. assert_equal("a", f.getc)
  1730. assert_equal("\n", f.getc)
  1731. f.binmode
  1732. assert_equal("b", f.getc)
  1733. assert_equal("\r", f.getc)
  1734. assert_equal("\n", f.getc)
  1735. assert_equal("c", f.getc)
  1736. assert_equal("\r", f.getc)
  1737. assert_equal("\n", f.getc)
  1738. assert_equal(nil, f.getc)
  1739. }
  1740. }
  1741. end
  1742. def test_binmode2
  1743. with_tmpdir {
  1744. src = "a\r\nb\r\nc\r\n"
  1745. generate_file("t.txt", src)
  1746. open("t.txt", "rt:euc-jp:utf-8") {|f|
  1747. assert_equal("a", f.getc)
  1748. assert_equal("\n", f.getc)
  1749. f.binmode
  1750. assert_equal("b", f.getc)
  1751. assert_equal("\r", f.getc)
  1752. assert_equal("\n", f.getc)
  1753. assert_equal("c", f.getc)
  1754. assert_equal("\r", f.getc)
  1755. assert_equal("\n", f.getc)
  1756. assert_equal(nil, f.getc)
  1757. }
  1758. }
  1759. end
  1760. def test_binmode3
  1761. with_tmpdir {
  1762. src = "\u3042\r\n"
  1763. generate_file("t.txt", src)
  1764. srcbin = src.b
  1765. open("t.txt", "rt:utf-8:euc-jp") {|f|
  1766. f.binmode
  1767. result = f.read
  1768. assert_str_equal(srcbin, result)
  1769. assert_equal(Encoding::ASCII_8BIT, result.encoding)
  1770. }
  1771. }
  1772. end
  1773. def test_invalid_r
  1774. with_tmpdir {
  1775. generate_file("t.txt", "a\x80b")
  1776. open("t.txt", "r:utf-8:euc-jp", :invalid => :replace) {|f|
  1777. assert_equal("a?b", f.read)
  1778. }
  1779. open("t.txt", "r:utf-8:euc-jp", :invalid => :replace, :replace => "") {|f|
  1780. assert_equal("ab", f.read)
  1781. }
  1782. open("t.txt", "r:utf-8:euc-jp", :undef => :replace) {|f|
  1783. assert_raise(Encoding::InvalidByteSequenceError) { f.read }
  1784. assert_equal("b", f.read)
  1785. }
  1786. open("t.txt", "r:utf-8:euc-jp", :undef => :replace, :replace => "") {|f|
  1787. assert_raise(Encoding::InvalidByteSequenceError) { f.read }
  1788. assert_equal("b", f.read)
  1789. }
  1790. }
  1791. end
  1792. def test_undef_r
  1793. with_tmpdir {
  1794. generate_file("t.txt", "a\uFFFDb")
  1795. open("t.txt", "r:utf-8:euc-jp", :undef => :replace) {|f|
  1796. assert_equal("a?b", f.read)
  1797. }
  1798. open("t.txt", "r:utf-8:euc-jp", :undef => :replace, :replace => "") {|f|
  1799. assert_equal("ab", f.read)
  1800. }
  1801. open("t.txt", "r:utf-8:euc-jp", :invalid => :replace) {|f|
  1802. assert_raise(Encoding::UndefinedConversionError) { f.read }
  1803. assert_equal("b", f.read)
  1804. }
  1805. open("t.txt", "r:utf-8:euc-jp", :invalid => :replace, :replace => "") {|f|
  1806. assert_raise(Encoding::UndefinedConversionError) { f.read }
  1807. assert_equal("b", f.read)
  1808. }
  1809. }
  1810. end
  1811. def test_invalid_w
  1812. with_tmpdir {
  1813. invalid_utf8 = "a\x80b".force_encoding("utf-8")
  1814. open("t.txt", "w:euc-jp", :invalid => :replace) {|f|
  1815. assert_nothing_raised { f.write invalid_utf8 }
  1816. }
  1817. assert_equal("a?b", File.read("t.txt"))
  1818. open("t.txt", "w:euc-jp", :invalid => :replace, :replace => "") {|f|
  1819. assert_nothing_raised { f.write invalid_utf8 }
  1820. }
  1821. assert_equal("ab", File.read("t.txt"))
  1822. open("t.txt", "w:euc-jp", :undef => :replace) {|f|
  1823. assert_raise(Encoding::InvalidByteSequenceError) { f.write invalid_utf8 }
  1824. }
  1825. open("t.txt", "w:euc-jp", :undef => :replace, :replace => "") {|f|
  1826. assert_raise(Encoding::InvalidByteSequenceError) { f.write invalid_utf8 }
  1827. }
  1828. }
  1829. end
  1830. def test_undef_w_stateless
  1831. with_tmpdir {
  1832. generate_file("t.txt", "a\uFFFDb")
  1833. open("t.txt", "w:euc-jp:utf-8", :undef => :replace) {|f|
  1834. assert_nothing_raised { f.write "a\uFFFDb" }
  1835. }
  1836. assert_equal("a?b", File.read("t.txt"))
  1837. open("t.txt", "w:euc-jp:utf-8", :undef => :replace, :replace => "") {|f|
  1838. assert_nothing_raised { f.write "a\uFFFDb" }
  1839. }
  1840. assert_equal("ab", File.read("t.txt"))
  1841. open("t.txt", "w:euc-jp:utf-8", :invalid => :replace) {|f|
  1842. assert_raise(Encoding::UndefinedConversionError) { f.write "a\uFFFDb" }
  1843. }
  1844. open("t.txt", "w:euc-jp:utf-8", :invalid => :replace, :replace => "") {|f|
  1845. assert_raise(Encoding::UndefinedConversionError) { f.write "a\uFFFDb" }
  1846. }
  1847. }
  1848. end
  1849. def test_undef_w_stateful
  1850. with_tmpdir {
  1851. generate_file("t.txt", "a\uFFFDb")
  1852. open("t.txt", "w:iso-2022-jp:utf-8", :undef => :replace) {|f|
  1853. assert_nothing_raised { f.write "a\uFFFDb" }
  1854. }
  1855. assert_equal("a?b", File.read("t.txt"))
  1856. open("t.txt", "w:iso-2022-jp:utf-8", :undef => :replace, :replace => "") {|f|
  1857. assert_nothing_raised { f.write "a\uFFFDb" }
  1858. }
  1859. assert_equal("ab", File.read("t.txt"))
  1860. open("t.txt", "w:iso-2022-jp:utf-8", :invalid => :replace) {|f|
  1861. assert_raise(Encoding::UndefinedConversionError) { f.write "a\uFFFDb" }
  1862. }
  1863. open("t.txt", "w:iso-2022-jp:utf-8", :invalid => :replace, :replace => "") {|f|
  1864. assert_raise(Encoding::UndefinedConversionError) { f.write "a\uFFFDb" }
  1865. }
  1866. }
  1867. end
  1868. def test_w_xml_attr
  1869. with_tmpdir {
  1870. open("raw.txt", "wb", xml: :attr) {|f| f.print '&<>"\''; f.puts "\u4E02\u3042" }
  1871. content = File.read("raw.txt", :mode=>"rb:ascii-8bit")
  1872. assert_equal("\"&amp;&lt;&gt;&quot;'\u4E02\u3042\n\"".force_encoding("ascii-8bit"), content)
  1873. open("ascii.txt", "wb:us-ascii", xml: :attr) {|f| f.print '&<>"\''; f.puts "\u4E02\u3042" }
  1874. content = File.read("ascii.txt", :mode=>"rb:ascii-8bit")
  1875. assert_equal("\"&amp;&lt;&gt;&quot;'&#x4E02;&#x3042;\n\"".force_encoding("ascii-8bit"), content)
  1876. open("iso-2022-jp.txt", "wb:iso-2022-jp", xml: :attr) {|f| f.print '&<>"\''; f.puts "\u4E02\u3042" }
  1877. content = File.read("iso-2022-jp.txt", :mode=>"rb:ascii-8bit")
  1878. assert_equal("\"&amp;&lt;&gt;&quot;'&#x4E02;\e$B$\"\e(B\n\"".force_encoding("ascii-8bit"), content)
  1879. open("utf-16be.txt", "wb:utf-16be", xml: :attr) {|f| f.print '&<>"\''; f.puts "\u4E02\u3042" }
  1880. content = File.read("utf-16be.txt", :mode=>"rb:ascii-8bit")
  1881. assert_equal("\0\"\0&\0a\0m\0p\0;\0&\0l\0t\0;\0&\0g\0t\0;\0&\0q\0u\0o\0t\0;\0'\x4E\x02\x30\x42\0\n\0\"".force_encoding("ascii-8bit"), content)
  1882. open("eucjp.txt", "w:euc-jp:utf-8", xml: :attr) {|f|
  1883. f.print "\u4E02" # U+4E02 is 0x3021 in JIS X 0212
  1884. }
  1885. content = File.read("eucjp.txt", :mode=>"rb:ascii-8bit")
  1886. assert_equal("\"\x8F\xB0\xA1\"".force_encoding("ascii-8bit"), content)
  1887. open("sjis.txt", "w:sjis:utf-8", xml: :attr) {|f|
  1888. f.print "\u4E02" # U+4E02 is 0x3021 in JIS X 0212
  1889. }
  1890. content = File.read("sjis.txt", :mode=>"rb:ascii-8bit")
  1891. assert_equal("\"&#x4E02;\"".force_encoding("ascii-8bit"), content)
  1892. open("iso-2022-jp.txt", "w:iso-2022-jp:utf-8", xml: :attr) {|f|
  1893. f.print "\u4E02" # U+4E02 is 0x3021 in JIS X 0212
  1894. }
  1895. content = File.read("iso-2022-jp.txt", :mode=>"rb:ascii-8bit")
  1896. assert_equal("\"&#x4E02;\"".force_encoding("ascii-8bit"), content)
  1897. }
  1898. end
  1899. %w/UTF-8 UTF-16BE UTF-16LE UTF-32BE UTF-32LE/.each do |name|
  1900. define_method("test_strip_bom:#{name}") do
  1901. path = "#{name}-bom.txt"
  1902. with_tmpdir {
  1903. text = "\uFEFF\u0100a"
  1904. stripped = "\u0100a"
  1905. content = text.encode(name)
  1906. generate_file(path, content)
  1907. result = File.read(path, mode: 'rb:BOM|UTF-8')
  1908. assert_equal(Encoding.find(name), result.encoding, name)
  1909. assert_equal(content[1..-1].b, result.b, name)
  1910. %w[rb rt r].each do |mode|
  1911. message = "#{name}, mode: #{mode.dump}"
  1912. result = File.read(path, mode: "#{mode}:BOM|UTF-8:UTF-8")
  1913. assert_equal(Encoding::UTF_8, result.encoding, message)
  1914. assert_equal(stripped, result, message)
  1915. end
  1916. File.open(path, "rb") {|f|
  1917. assert_equal(Encoding.find(name), f.set_encoding_by_bom)
  1918. }
  1919. File.open(path, "rb", encoding: "iso-8859-1") {|f|
  1920. assert_raise(ArgumentError) {f.set_encoding_by_bom}
  1921. }
  1922. }
  1923. end
  1924. end
  1925. def test_strip_bom_no_conv
  1926. with_tmpdir {
  1927. path = 'UTF-8-bom.txt'
  1928. generate_file(path, "\uFEFFa")
  1929. bug3407 = '[ruby-core:30641]'
  1930. result = File.read(path, encoding: 'BOM|UTF-8')
  1931. assert_equal("a", result.b, bug3407)
  1932. File.open(path, "rb", encoding: "iso-8859-1") {|f|
  1933. assert_raise(ArgumentError) {f.set_encoding_by_bom}
  1934. }
  1935. }
  1936. end
  1937. def test_strip_bom_invalid
  1938. with_tmpdir {
  1939. path = 'UTF-8-bom.txt'
  1940. generate_file(path, "\uFEFFa")
  1941. bug8323 = '[ruby-core:54563] [Bug #8323]'
  1942. expected = "a\xff".force_encoding("utf-8")
  1943. open(path, 'ab') {|f| f.write("\xff")}
  1944. result = File.read(path, encoding: 'BOM|UTF-8')
  1945. assert_not_predicate(result, :valid_encoding?, bug8323)
  1946. assert_equal(expected, result, bug8323)
  1947. result = File.read(path, encoding: 'BOM|UTF-8:UTF-8')
  1948. assert_not_predicate(result, :valid_encoding?, bug8323)
  1949. assert_equal(expected, result, bug8323)
  1950. }
  1951. end
  1952. def test_strip_bom_no_bom
  1953. with_tmpdir {
  1954. bug8323 = '[ruby-core:54563] [Bug #8323]'
  1955. path = 'ascii.txt'
  1956. stripped = "a"
  1957. generate_file(path, stripped)
  1958. result = File.read(path, encoding: 'BOM|UTF-8')
  1959. assert_equal(stripped, result, bug8323)
  1960. result = File.read(path, encoding: 'BOM|UTF-8:UTF-8')
  1961. assert_equal(stripped, result, bug8323)
  1962. File.open(path, "rb") {|f|
  1963. assert_nil(f.set_encoding_by_bom)
  1964. }
  1965. File.open(path, "rb", encoding: "iso-8859-1") {|f|
  1966. assert_raise(ArgumentError) {f.set_encoding_by_bom}
  1967. }
  1968. }
  1969. end
  1970. def test_bom_too_long_utfname
  1971. assert_separately([], "#{<<~"begin;"}\n#{<<~'end;'}")
  1972. begin;
  1973. assert_warn(/Unsupported encoding/) {
  1974. open(IO::NULL, "r:bom|utf-" + "x" * 10000) {}
  1975. }
  1976. end;
  1977. assert_separately([], "#{<<~"begin;"}\n#{<<~'end;'}")
  1978. begin;
  1979. assert_warn(/Unsupported encoding/) {
  1980. open(IO::NULL, encoding: "bom|utf-" + "x" * 10000) {}
  1981. }
  1982. end;
  1983. end
  1984. def test_bom_non_utf
  1985. enc = nil
  1986. assert_warn(/BOM/) {
  1987. open(__FILE__, "r:bom|us-ascii") {|f| enc = f.external_encoding}
  1988. }
  1989. assert_equal(Encoding::US_ASCII, enc)
  1990. enc = nil
  1991. assert_warn(/BOM/) {
  1992. open(__FILE__, "r", encoding: "bom|us-ascii") {|f| enc = f.external_encoding}
  1993. }
  1994. assert_equal(Encoding::US_ASCII, enc)
  1995. enc = nil
  1996. assert_warn(/BOM/) {
  1997. open(IO::NULL, "w:bom|us-ascii") {|f| enc = f.external_encoding}
  1998. }
  1999. assert_equal(Encoding::US_ASCII, enc)
  2000. enc = nil
  2001. assert_warn(/BOM/) {
  2002. open(IO::NULL, "w", encoding: "bom|us-ascii") {|f| enc = f.external_encoding}
  2003. }
  2004. assert_equal(Encoding::US_ASCII, enc)
  2005. tlhInganHol = "\u{f8e4 f8d9 f8d7 f8dc f8d0 f8db} \u{f8d6 f8dd f8d9}"
  2006. assert_warn(/#{tlhInganHol}/) {
  2007. EnvUtil.with_default_internal(nil) {
  2008. open(IO::NULL, "w:bom|#{tlhInganHol}") {|f| enc = f.external_encoding}
  2009. }
  2010. }
  2011. assert_nil(enc)
  2012. end
  2013. def test_bom_non_reading
  2014. with_tmpdir {
  2015. enc = nil
  2016. assert_nothing_raised(IOError) {
  2017. open("test", "w:bom|utf-8") {|f|
  2018. enc = f.external_encoding
  2019. f.print("abc")
  2020. }
  2021. }
  2022. assert_equal(Encoding::UTF_8, enc)
  2023. assert_equal("abc", File.binread("test"))
  2024. }
  2025. end
  2026. def test_cbuf
  2027. with_tmpdir {
  2028. fn = "tst"
  2029. open(fn, "w") {|f| f.print "foo" }
  2030. open(fn, "r+t") {|f|
  2031. f.ungetc(f.getc)
  2032. assert_raise(IOError, "[ruby-dev:40493]") { f.readpartial(2) }
  2033. assert_raise(IOError) { f.read(2) }
  2034. assert_raise(IOError) { f.each_byte {|c| } }
  2035. assert_raise(IOError) { f.getbyte }
  2036. assert_raise(IOError) { f.ungetbyte(0) }
  2037. assert_raise(IOError) { f.sysread(2) }
  2038. assert_raise(IOError) { IO.copy_stream(f, "tmpout") }
  2039. assert_raise(IOError) { f.sysseek(2) }
  2040. }
  2041. open(fn, "r+t") {|f|
  2042. f.ungetc(f.getc)
  2043. assert_equal("foo", f.read)
  2044. }
  2045. }
  2046. end
  2047. def test_text_mode_ungetc_eof
  2048. with_tmpdir {
  2049. open("ff", "w") {|f| }
  2050. open("ff", "rt") {|f|
  2051. f.ungetc "a"
  2052. assert_not_predicate(f, :eof?, "[ruby-dev:40506] (3)")
  2053. }
  2054. }
  2055. end
  2056. def test_cbuf_select
  2057. pipe("US-ASCII:UTF-8", { :universal_newline => true },
  2058. proc do |w|
  2059. w << "\r\n"
  2060. end,
  2061. proc do |r|
  2062. r.ungetc(r.getc)
  2063. assert_equal([[r],[],[]], IO.select([r], nil, nil, 1))
  2064. end)
  2065. end
  2066. def test_textmode_paragraphmode
  2067. pipe("US-ASCII:UTF-8", { :universal_newline => true },
  2068. proc do |w|
  2069. w << "a\n\n\nc".gsub(/\n/, "\r\n")
  2070. w.close
  2071. end,
  2072. proc do |r|
  2073. assert_equal("a\n\n", r.gets(""))
  2074. assert_equal("c", r.gets(""), "[ruby-core:23723] (18)")
  2075. end)
  2076. end
  2077. def test_textmode_paragraph_binaryread
  2078. pipe("US-ASCII:UTF-8", { :universal_newline => true },
  2079. proc do |w|
  2080. w << "a\n\n\ncdefgh".gsub(/\n/, "\r\n")
  2081. w.close
  2082. end,
  2083. proc do |r|
  2084. assert_equal("a\n\n", r.gets(""))
  2085. assert_equal("c", r.getc)
  2086. assert_equal("defgh", r.readpartial(10))
  2087. end)
  2088. end
  2089. def test_textmode_paragraph_nonasciicompat
  2090. bug3534 = ['[ruby-dev:41803]', '[Bug #3534]']
  2091. IO.pipe {|r, w|
  2092. [Encoding::UTF_32BE, Encoding::UTF_32LE,
  2093. Encoding::UTF_16BE, Encoding::UTF_16LE,
  2094. Encoding::UTF_8].each do |e|
  2095. r.set_encoding(Encoding::US_ASCII, e)
  2096. wthr = Thread.new{ w.print(bug3534[0], "\n\n\n\n", bug3534[1], "\n") }
  2097. assert_equal((bug3534[0]+"\n\n").encode(e), r.gets(""), bug3534[0])
  2098. assert_equal((bug3534[1]+"\n").encode(e), r.gets(), bug3534[1])
  2099. wthr.join
  2100. end
  2101. }
  2102. end
  2103. def test_binmode_paragraph_nonasciicompat
  2104. bug3534 = ['[ruby-dev:41803]', '[Bug #3534]']
  2105. IO.pipe {|r, w|
  2106. r.binmode
  2107. w.binmode
  2108. [Encoding::UTF_32BE, Encoding::UTF_32LE,
  2109. Encoding::UTF_16BE, Encoding::UTF_16LE,
  2110. Encoding::UTF_8].each do |e|
  2111. r.set_encoding(Encoding::US_ASCII, e)
  2112. wthr = Thread.new{ w.print(bug3534[0], "\n\n\n\n", bug3534[1], "\n") }
  2113. assert_equal((bug3534[0]+"\n\n").encode(e), r.gets(""), bug3534[0])
  2114. assert_equal((bug3534[1]+"\n").encode(e), r.gets(), bug3534[1])
  2115. wthr.join
  2116. end
  2117. }
  2118. end
  2119. def test_puts_widechar
  2120. bug = '[ruby-dev:42212]'
  2121. pipe(Encoding::ASCII_8BIT,
  2122. proc do |w|
  2123. w.binmode
  2124. w.puts(0x010a.chr(Encoding::UTF_32BE))
  2125. w.puts(0x010a.chr(Encoding::UTF_16BE))
  2126. w.puts(0x0a01.chr(Encoding::UTF_32LE))
  2127. w.puts(0x0a01.chr(Encoding::UTF_16LE))
  2128. w.close
  2129. end,
  2130. proc do |r|
  2131. r.binmode
  2132. assert_equal("\x00\x00\x01\x0a\n", r.read(5), bug)
  2133. assert_equal("\x01\x0a\n", r.read(3), bug)
  2134. assert_equal("\x01\x0a\x00\x00\n", r.read(5), bug)
  2135. assert_equal("\x01\x0a\n", r.read(3), bug)
  2136. assert_equal("", r.read, bug)
  2137. r.close
  2138. end)
  2139. end
  2140. def test_getc_ascii_only
  2141. bug4557 = '[ruby-core:35630]'
  2142. c = with_tmpdir {
  2143. open("a", "wb") {|f| f.puts "a"}
  2144. open("a", "rt") {|f| f.getc}
  2145. }
  2146. assert_predicate(c, :ascii_only?, bug4557)
  2147. end
  2148. def test_getc_conversion
  2149. bug8516 = '[ruby-core:55444] [Bug #8516]'
  2150. c = with_tmpdir {
  2151. open("a", "wb") {|f| f.putc "\xe1"}
  2152. open("a", "r:iso-8859-1:utf-8") {|f| f.getc}
  2153. }
  2154. assert_not_predicate(c, :ascii_only?, bug8516)
  2155. assert_equal(1, c.size, bug8516)
  2156. end
  2157. def test_default_mode_on_dosish
  2158. with_tmpdir {
  2159. open("a", "w") {|f| f.write "\n"}
  2160. assert_equal("\r\n", IO.binread("a"))
  2161. }
  2162. end if /mswin|mingw/ =~ RUBY_PLATFORM
  2163. def test_default_mode_on_unix
  2164. with_tmpdir {
  2165. open("a", "w") {|f| f.write "\n"}
  2166. assert_equal("\n", IO.binread("a"))
  2167. }
  2168. end unless /mswin|mingw/ =~ RUBY_PLATFORM
  2169. def test_text_mode
  2170. with_tmpdir {
  2171. open("a", "wb") {|f| f.write "\r\n"}
  2172. assert_equal("\n", open("a", "rt"){|f| f.read})
  2173. }
  2174. end
  2175. def test_binary_mode
  2176. with_tmpdir {
  2177. open("a", "wb") {|f| f.write "\r\n"}
  2178. assert_equal("\r\n", open("a", "rb"){|f| f.read})
  2179. }
  2180. end
  2181. def test_default_stdout_stderr_mode
  2182. with_pipe do |in_r, in_w|
  2183. with_pipe do |out_r, out_w|
  2184. pid = Process.spawn({}, EnvUtil.rubybin, in: in_r, out: out_w, err: out_w)
  2185. in_r.close
  2186. out_w.close
  2187. in_w.write <<-EOS
  2188. STDOUT.puts "abc"
  2189. STDOUT.flush
  2190. STDERR.puts "def"
  2191. STDERR.flush
  2192. EOS
  2193. in_w.close
  2194. Process.wait pid
  2195. assert_equal "abc\r\ndef\r\n", out_r.binmode.read
  2196. out_r.close
  2197. end
  2198. end
  2199. end if /mswin|mingw/ =~ RUBY_PLATFORM
  2200. def test_cr_decorator_on_stdout
  2201. with_pipe do |in_r, in_w|
  2202. with_pipe do |out_r, out_w|
  2203. pid = Process.spawn({}, EnvUtil.rubybin, in: in_r, out: out_w)
  2204. in_r.close
  2205. out_w.close
  2206. in_w.write <<-EOS
  2207. STDOUT.set_encoding('locale', nil, newline: :cr)
  2208. STDOUT.puts "abc"
  2209. STDOUT.flush
  2210. EOS
  2211. in_w.close
  2212. Process.wait pid
  2213. assert_equal "abc\r", out_r.binmode.read
  2214. out_r.close
  2215. end
  2216. end
  2217. end
  2218. def test_lf_decorator_on_stdout
  2219. with_pipe do |in_r, in_w|
  2220. with_pipe do |out_r, out_w|
  2221. pid = Process.spawn({}, EnvUtil.rubybin, in: in_r, out: out_w)
  2222. in_r.close
  2223. out_w.close
  2224. in_w.write <<-EOS
  2225. STDOUT.set_encoding('locale', nil, newline: :lf)
  2226. STDOUT.puts "abc"
  2227. STDOUT.flush
  2228. EOS
  2229. in_w.close
  2230. Process.wait pid
  2231. assert_equal "abc\n", out_r.binmode.read
  2232. out_r.close
  2233. end
  2234. end
  2235. end
  2236. def test_crlf_decorator_on_stdout
  2237. with_pipe do |in_r, in_w|
  2238. with_pipe do |out_r, out_w|
  2239. pid = Process.spawn({}, EnvUtil.rubybin, in: in_r, out: out_w)
  2240. in_r.close
  2241. out_w.close
  2242. in_w.write <<-EOS
  2243. STDOUT.set_encoding('locale', nil, newline: :crlf)
  2244. STDOUT.puts "abc"
  2245. STDOUT.flush
  2246. EOS
  2247. in_w.close
  2248. Process.wait pid
  2249. assert_equal "abc\r\n", out_r.binmode.read
  2250. out_r.close
  2251. end
  2252. end
  2253. end
  2254. def test_binmode_with_pipe
  2255. with_pipe do |r, w|
  2256. src = "a\r\nb\r\nc\r\n"
  2257. w.binmode.write src
  2258. w.close
  2259. assert_equal("a", r.getc)
  2260. assert_equal("\n", r.getc)
  2261. r.binmode
  2262. assert_equal("b", r.getc)
  2263. assert_equal("\r", r.getc)
  2264. assert_equal("\n", r.getc)
  2265. assert_equal("c", r.getc)
  2266. assert_equal("\r", r.getc)
  2267. assert_equal("\n", r.getc)
  2268. assert_equal(nil, r.getc)
  2269. r.close
  2270. end
  2271. end if /mswin|mingw/ =~ RUBY_PLATFORM
  2272. def test_stdin_binmode
  2273. with_pipe do |in_r, in_w|
  2274. with_pipe do |out_r, out_w|
  2275. pid = Process.spawn({}, EnvUtil.rubybin, '-e', <<-'End', in: in_r, out: out_w)
  2276. STDOUT.binmode
  2277. STDOUT.write STDIN.getc
  2278. STDOUT.write STDIN.getc
  2279. STDIN.binmode
  2280. STDOUT.write STDIN.getc
  2281. STDOUT.write STDIN.getc
  2282. STDOUT.write STDIN.getc
  2283. STDOUT.write STDIN.getc
  2284. STDOUT.write STDIN.getc
  2285. STDOUT.write STDIN.getc
  2286. STDOUT.write STDIN.getc
  2287. End
  2288. in_r.close
  2289. out_w.close
  2290. src = "a\r\nb\r\nc\r\n"
  2291. in_w.binmode.write src
  2292. in_w.close
  2293. Process.wait pid
  2294. assert_equal "a\nb\r\nc\r\n", out_r.binmode.read
  2295. out_r.close
  2296. end
  2297. end
  2298. end if /mswin|mingw/ =~ RUBY_PLATFORM
  2299. def test_read_with_length
  2300. with_tmpdir {
  2301. str = "a\nb"
  2302. generate_file("tmp", str)
  2303. open("tmp", "r") do |f|
  2304. assert_equal(str, f.read(3))
  2305. end
  2306. }
  2307. end if /mswin|mingw/ =~ RUBY_PLATFORM
  2308. def test_read_with_length_binmode
  2309. with_tmpdir {
  2310. str = "a\r\nb\r\nc\r\n\r\n"
  2311. generate_file("tmp", str)
  2312. open("tmp", "r") do |f|
  2313. # read with length should be binary mode
  2314. assert_equal("a\r\n", f.read(3)) # binary
  2315. assert_equal("b\nc\n\n", f.read) # text
  2316. end
  2317. }
  2318. end if /mswin|mingw/ =~ RUBY_PLATFORM
  2319. def test_gets_and_read_with_binmode
  2320. with_tmpdir {
  2321. str = "a\r\nb\r\nc\r\n\n\r\n"
  2322. generate_file("tmp", str)
  2323. open("tmp", "r") do |f|
  2324. assert_equal("a\n", f.gets) # text
  2325. assert_equal("b\r\n", f.read(3)) # binary
  2326. assert_equal("c\r\n", f.read(3)) # binary
  2327. assert_equal("\n\n", f.read) # text
  2328. end
  2329. }
  2330. end if /mswin|mingw/ =~ RUBY_PLATFORM
  2331. def test_getc_and_read_with_binmode
  2332. with_tmpdir {
  2333. str = "a\r\nb\r\nc\n\n\r\n\r\n"
  2334. generate_file("tmp", str)
  2335. open("tmp", "r") do |f|
  2336. assert_equal("a", f.getc) # text
  2337. assert_equal("\n", f.getc) # text
  2338. assert_equal("b\r\n", f.read(3)) # binary
  2339. assert_equal("c\n\n\n\n", f.read) # text
  2340. end
  2341. }
  2342. end if /mswin|mingw/ =~ RUBY_PLATFORM
  2343. def test_read_with_binmode_and_gets
  2344. with_tmpdir {
  2345. str = "a\r\nb\r\nc\r\n\r\n"
  2346. open("tmp", "wb") { |f| f.write str }
  2347. open("tmp", "r") do |f|
  2348. assert_equal("a", f.getc) # text
  2349. assert_equal("\n", f.getc) # text
  2350. assert_equal("b\r\n", f.read(3)) # binary
  2351. assert_equal("c\n", f.gets) # text
  2352. assert_equal("\n", f.gets) # text
  2353. end
  2354. }
  2355. end if /mswin|mingw/ =~ RUBY_PLATFORM
  2356. def test_read_with_binmode_and_getc
  2357. with_tmpdir {
  2358. str = "a\r\nb\r\nc\r\n\r\n"
  2359. open("tmp", "wb") { |f| f.write str }
  2360. open("tmp", "r") do |f|
  2361. assert_equal("a", f.getc) # text
  2362. assert_equal("\n", f.getc) # text
  2363. assert_equal("b\r\n", f.read(3)) # binary
  2364. assert_equal("c", f.getc) # text
  2365. assert_equal("\n", f.getc) # text
  2366. assert_equal("\n", f.getc) # text
  2367. end
  2368. }
  2369. end if /mswin|mingw/ =~ RUBY_PLATFORM
  2370. def test_read_write_with_binmode
  2371. with_tmpdir {
  2372. str = "a\r\n"
  2373. generate_file("tmp", str)
  2374. open("tmp", "r+") do |f|
  2375. assert_equal("a\r\n", f.read(3)) # binary
  2376. f.write("b\n\n"); # text
  2377. f.rewind
  2378. assert_equal("a\nb\n\n", f.read) # text
  2379. f.rewind
  2380. assert_equal("a\r\nb\r\n\r\n", f.binmode.read) # binary
  2381. end
  2382. }
  2383. end if /mswin|mingw/ =~ RUBY_PLATFORM
  2384. def test_seek_with_setting_binmode
  2385. with_tmpdir {
  2386. str = "a\r\nb\r\nc\r\n\r\n\n\n\n\n\n\n\n"
  2387. generate_file("tmp", str)
  2388. open("tmp", "r") do |f|
  2389. assert_equal("a\n", f.gets) # text
  2390. assert_equal("b\r\n", f.read(3)) # binary
  2391. end
  2392. }
  2393. end if /mswin|mingw/ =~ RUBY_PLATFORM
  2394. def test_error_nonascii
  2395. bug6071 = '[ruby-dev:45279]'
  2396. paths = ["\u{3042}".encode("sjis"), "\u{ff}".encode("iso-8859-1")]
  2397. encs = with_tmpdir {
  2398. paths.map {|path|
  2399. open(path) rescue $!.message.encoding
  2400. }
  2401. }
  2402. assert_equal(paths.map(&:encoding), encs, bug6071)
  2403. end
  2404. def test_inspect_nonascii
  2405. bug6072 = '[ruby-dev:45280]'
  2406. paths = ["\u{3042}".encode("sjis"), "\u{ff}".encode("iso-8859-1")]
  2407. encs = with_tmpdir {
  2408. paths.map {|path|
  2409. open(path, "wb") {|f| f.inspect.encoding}
  2410. }
  2411. }
  2412. assert_equal(paths.map(&:encoding), encs, bug6072)
  2413. end
  2414. def test_pos_dont_move_cursor_position
  2415. bug6179 = '[ruby-core:43497]'
  2416. with_tmpdir {
  2417. str = "line one\r\nline two\r\nline three\r\n"
  2418. generate_file("tmp", str)
  2419. open("tmp", "r") do |f|
  2420. assert_equal("line one\n", f.readline)
  2421. assert_equal(10, f.pos, bug6179)
  2422. assert_equal("line two\n", f.readline, bug6179)
  2423. assert_equal(20, f.pos, bug6179)
  2424. assert_equal("line three\n", f.readline, bug6179)
  2425. end
  2426. }
  2427. end if /mswin|mingw/ =~ RUBY_PLATFORM
  2428. def test_pos_with_buffer_end_cr
  2429. bug6401 = '[ruby-core:44874]'
  2430. with_tmpdir {
  2431. # Read buffer size is 8191. This generates '\r' at 8191.
  2432. lines = ["X" * 8187, "X"]
  2433. generate_file("tmp", lines.join("\r\n") + "\r\n")
  2434. open("tmp", "r") do |f|
  2435. lines.each do |line|
  2436. f.pos
  2437. assert_equal(line, f.readline.chomp, bug6401)
  2438. end
  2439. end
  2440. }
  2441. end if /mswin|mingw/ =~ RUBY_PLATFORM
  2442. def test_read_crlf_and_eof
  2443. bug6271 = '[ruby-core:44189]'
  2444. with_tmpdir {
  2445. str = "a\r\nb\r\nc\r\n"
  2446. generate_file("tmp", str)
  2447. open("tmp", "r") do |f|
  2448. i = 0
  2449. until f.eof?
  2450. assert_equal(str[i], f.read(1), bug6271)
  2451. i += 1
  2452. end
  2453. assert_equal(str.size, i, bug6271)
  2454. end
  2455. }
  2456. end if /mswin|mingw/ =~ RUBY_PLATFORM
  2457. def test_read_with_buf_broken_ascii_only
  2458. a, b = IO.pipe
  2459. a.binmode
  2460. b.binmode
  2461. b.write("\xE2\x9C\x93")
  2462. b.close
  2463. buf = "".force_encoding("binary")
  2464. assert buf.ascii_only?, "should have been ascii_only?"
  2465. a.read(1, buf)
  2466. assert !buf.ascii_only?, "should not have been ascii_only?"
  2467. ensure
  2468. a.close rescue nil
  2469. b.close rescue nil
  2470. end
  2471. def test_each_codepoint_need_more
  2472. bug11444 = '[ruby-core:70379] [Bug #11444]'
  2473. tests = [
  2474. ["incomplete multibyte", "\u{1f376}".b[0,3], [], ["invalid byte sequence in UTF-8"]],
  2475. ["multibyte at boundary", "x"*8190+"\u{1f376}", ["1f376"], []],
  2476. ]
  2477. failure = []
  2478. ["bin", "text"].product(tests) do |mode, (test, data, out, err)|
  2479. code = <<-"end;"
  2480. c = nil
  2481. begin
  2482. open(ARGV[0], "r#{mode[0]}:utf-8") do |f|
  2483. f.each_codepoint{|i| c = i}
  2484. end
  2485. rescue ArgumentError => e
  2486. STDERR.puts e.message
  2487. else
  2488. printf "%x", c
  2489. end
  2490. end;
  2491. Tempfile.create("codepoint") do |f|
  2492. args = ['-e', code, f.path]
  2493. f.print data
  2494. f.close
  2495. begin
  2496. assert_in_out_err(args, "", out, err,
  2497. "#{bug11444}: #{test} in #{mode} mode",
  2498. timeout: 10)
  2499. rescue Exception => e
  2500. failure << e
  2501. end
  2502. end
  2503. end
  2504. unless failure.empty?
  2505. flunk failure.join("\n---\n")
  2506. end
  2507. end
  2508. end