PageRenderTime 41ms CodeModel.GetById 0ms RepoModel.GetById 0ms app.codeStats 1ms

/test/ruby/test_econv.rb

http://github.com/ruby/ruby
Ruby | 943 lines | 851 code | 91 blank | 1 comment | 1 complexity | 255ff937b6a7c05506f7ba8ed6933c93 MD5 | raw file
Possible License(s): GPL-2.0, BSD-3-Clause, AGPL-3.0
  1. # frozen_string_literal: false
  2. require 'test/unit'
  3. class TestEncodingConverter < Test::Unit::TestCase
  4. def check_ec(edst, esrc, eres, dst, src, ec, off, len, opts=nil)
  5. case opts
  6. when Hash
  7. res = ec.primitive_convert(src, dst, off, len, **opts)
  8. else
  9. res = ec.primitive_convert(src, dst, off, len, opts)
  10. end
  11. assert_equal([edst.b, esrc.b, eres],
  12. [dst.b, src.b, res])
  13. end
  14. def assert_econv(converted, eres, obuf_bytesize, ec, consumed, rest, opts=nil)
  15. ec = Encoding::Converter.new(*ec) if Array === ec
  16. i = consumed + rest
  17. o = ""
  18. ret = ec.primitive_convert(i, o, 0, obuf_bytesize, opts)
  19. assert_equal([converted, eres, rest],
  20. [o, ret, i])
  21. end
  22. def assert_errinfo(e_res, e_enc1, e_enc2, e_error_bytes, e_readagain_bytes, ec)
  23. assert_equal([e_res, e_enc1, e_enc2,
  24. e_error_bytes&.b,
  25. e_readagain_bytes&.b],
  26. ec.primitive_errinfo)
  27. end
  28. def test_s_asciicompat_encoding
  29. assert_equal(Encoding::STATELESS_ISO_2022_JP, Encoding::Converter.asciicompat_encoding("ISO-2022-JP"))
  30. assert_equal(Encoding::STATELESS_ISO_2022_JP, Encoding::Converter.asciicompat_encoding(Encoding::ISO_2022_JP))
  31. assert_equal(Encoding::UTF_8, Encoding::Converter.asciicompat_encoding("UTF-16BE"))
  32. assert_equal(Encoding::UTF_8, Encoding::Converter.asciicompat_encoding("UTF-16LE"))
  33. assert_equal(Encoding::UTF_8, Encoding::Converter.asciicompat_encoding("UTF-32BE"))
  34. assert_equal(Encoding::UTF_8, Encoding::Converter.asciicompat_encoding("UTF-32LE"))
  35. assert_nil(Encoding::Converter.asciicompat_encoding("EUC-JP"))
  36. assert_nil(Encoding::Converter.asciicompat_encoding("UTF-8"))
  37. assert_nil(Encoding::Converter.asciicompat_encoding(Encoding::UTF_8))
  38. assert_nil(Encoding::Converter.asciicompat_encoding("xml_attr_escape"))
  39. assert_nil(Encoding::Converter.asciicompat_encoding("encoding-not-exist"))
  40. end
  41. def test_asciicompat_encoding_iso2022jp
  42. acenc = Encoding::Converter.asciicompat_encoding("ISO-2022-JP")
  43. str = "\e$B~~\e(B".force_encoding("iso-2022-jp")
  44. str2 = str.encode(acenc)
  45. str3 = str2.encode("ISO-2022-JP")
  46. assert_equal(str, str3)
  47. end
  48. def test_s_new
  49. assert_kind_of(Encoding::Converter, Encoding::Converter.new("UTF-8", "EUC-JP"))
  50. assert_kind_of(Encoding::Converter, Encoding::Converter.new(Encoding::UTF_8, Encoding::EUC_JP))
  51. end
  52. def test_s_new_convpath
  53. assert_equal([], Encoding::Converter.new([]).convpath)
  54. assert_equal([[Encoding::UTF_8, Encoding::EUC_JP]],
  55. Encoding::Converter.new([["UTF-8", "EUC-JP"]]).convpath)
  56. assert_equal([[Encoding::UTF_8, Encoding::WINDOWS_31J]],
  57. Encoding::Converter.new([["utf-8", "cp932"]]).convpath)
  58. assert_equal([[Encoding::UTF_8, Encoding::EUC_JP]],
  59. Encoding::Converter.new([[Encoding::UTF_8, Encoding::EUC_JP]]).convpath)
  60. assert_equal([[Encoding::ISO_8859_1, Encoding::UTF_8],
  61. [Encoding::UTF_8, Encoding::EUC_JP]],
  62. Encoding::Converter.new([["iso-8859-1", "euc-jp"]]).convpath)
  63. assert_equal([[Encoding::ISO_8859_1, Encoding::UTF_8],
  64. [Encoding::UTF_8, Encoding::EUC_JP],
  65. "universal_newline"],
  66. Encoding::Converter.new([["iso-8859-1", "euc-jp"], "universal_newline"]).convpath)
  67. assert_equal(["universal_newline",
  68. [Encoding::ISO_8859_1, Encoding::UTF_8],
  69. [Encoding::UTF_8, Encoding::EUC_JP],
  70. "universal_newline"],
  71. Encoding::Converter.new(["universal_newline", ["iso-8859-1", "euc-jp"], "universal_newline"]).convpath)
  72. end
  73. def test_s_new_fail
  74. name1 = "encoding-which-is-not-exist-1"
  75. name2 = "encoding-which-is-not-exist-2"
  76. assert_raise(Encoding::ConverterNotFoundError) {
  77. Encoding::Converter.new(name1, name2)
  78. }
  79. encoding_list = Encoding.list.map {|e| e.name }
  80. assert_not_include(encoding_list, name1)
  81. assert_not_include(encoding_list, name2)
  82. end
  83. def test_newline_converter_with_ascii_incompatible
  84. assert_nothing_raised {
  85. Encoding::Converter.new("UTF-8", "UTF-16BE", Encoding::Converter::UNIVERSAL_NEWLINE_DECORATOR)
  86. }
  87. assert_nothing_raised {
  88. Encoding::Converter.new("UTF-16BE", "UTF-8", Encoding::Converter::CRLF_NEWLINE_DECORATOR)
  89. }
  90. assert_nothing_raised {
  91. Encoding::Converter.new("UTF-16BE", "UTF-8", Encoding::Converter::CR_NEWLINE_DECORATOR)
  92. }
  93. assert_nothing_raised {
  94. Encoding::Converter.new("UTF-16BE", "UTF-8", Encoding::Converter::UNIVERSAL_NEWLINE_DECORATOR)
  95. }
  96. assert_nothing_raised {
  97. Encoding::Converter.new("UTF-8", "UTF-16BE", Encoding::Converter::CRLF_NEWLINE_DECORATOR)
  98. }
  99. assert_nothing_raised {
  100. Encoding::Converter.new("UTF-8", "UTF-16BE", Encoding::Converter::CR_NEWLINE_DECORATOR)
  101. }
  102. end
  103. def test_get_encoding
  104. ec = Encoding::Converter.new("UTF-8", "EUC-JP")
  105. assert_equal(Encoding::UTF_8, ec.source_encoding)
  106. assert_equal(Encoding::EUC_JP, ec.destination_encoding)
  107. end
  108. def test_result_encoding
  109. ec = Encoding::Converter.new("UTF-8", "EUC-JP")
  110. dst = "".force_encoding("ASCII-8BIT")
  111. assert_equal(Encoding::ASCII_8BIT, dst.encoding)
  112. ec.primitive_convert("\u{3042}", dst, nil, 10)
  113. assert_equal(Encoding::EUC_JP, dst.encoding)
  114. end
  115. def test_output_region
  116. ec = Encoding::Converter.new("UTF-8", "EUC-JP")
  117. ec.primitive_convert(src="a", dst="b", nil, 1, :partial_input=>true)
  118. assert_equal("ba", dst)
  119. ec.primitive_convert(src="a", dst="b", 0, 1, :partial_input=>true)
  120. assert_equal("a", dst)
  121. ec.primitive_convert(src="a", dst="b", 1, 1, :partial_input=>true)
  122. assert_equal("ba", dst)
  123. assert_raise(ArgumentError) {
  124. ec.primitive_convert(src="a", dst="b", 2, 1, :partial_input=>true)
  125. }
  126. assert_raise(ArgumentError) {
  127. ec.primitive_convert(src="a", dst="b", -1, 1, :partial_input=>true)
  128. }
  129. assert_raise(ArgumentError) {
  130. ec.primitive_convert(src="a", dst="b", 1, -1, :partial_input=>true)
  131. }
  132. end
  133. def test_nil_source_buffer
  134. ec = Encoding::Converter.new("UTF-8", "EUC-JP")
  135. ret = ec.primitive_convert(nil, "", nil, 10)
  136. assert_equal(:finished, ret)
  137. end
  138. def test_nil_destination_bytesize
  139. ec = Encoding::Converter.new("Shift_JIS", "UTF-8")
  140. n = 10000
  141. src = "\xa1".force_encoding("Shift_JIS") * n
  142. ret = ec.primitive_convert(src, dst="", nil, nil)
  143. assert_equal(:finished, ret)
  144. assert_equal("\xEF\xBD\xA1".force_encoding("UTF-8") * n, dst)
  145. end
  146. def test_nil_destination_bytesize2
  147. ec = Encoding::Converter.new("Shift_JIS", "UTF-8")
  148. n = 10000
  149. src = "\xa1".force_encoding("Shift_JIS") * n
  150. ret = ec.primitive_convert(src, dst="")
  151. assert_equal(:finished, ret)
  152. assert_equal("\xEF\xBD\xA1".force_encoding("UTF-8") * n, dst)
  153. end
  154. def test_nil_destination_bytesize_with_nonnil_byteoffset
  155. ec = Encoding::Converter.new("Shift_JIS", "UTF-8")
  156. n = 2000
  157. src = "\xa1".force_encoding("Shift_JIS") * n
  158. dst = "abcd" * 2000
  159. ret = ec.primitive_convert(src, dst, 3, nil)
  160. assert_equal(:finished, ret)
  161. assert_equal("abc" + "\xEF\xBD\xA1".force_encoding("UTF-8") * n, dst)
  162. end
  163. def test_partial_input
  164. ec = Encoding::Converter.new("UTF-8", "EUC-JP")
  165. ret = ec.primitive_convert(src="", dst="", nil, 10, :partial_input=>true)
  166. assert_equal(:source_buffer_empty, ret)
  167. ret = ec.primitive_convert(src="", dst="", nil, 10)
  168. assert_equal(:finished, ret)
  169. end
  170. def test_accumulate_dst1
  171. ec = Encoding::Converter.new("UTF-8", "EUC-JP")
  172. a = ["", "abc\u{3042}def", ec, nil, 1]
  173. check_ec("a", "c\u{3042}def", :destination_buffer_full, *a)
  174. check_ec("ab", "\u{3042}def", :destination_buffer_full, *a)
  175. check_ec("abc", "def", :destination_buffer_full, *a)
  176. check_ec("abc\xA4", "def", :destination_buffer_full, *a)
  177. check_ec("abc\xA4\xA2", "ef", :destination_buffer_full, *a)
  178. check_ec("abc\xA4\xA2d", "f", :destination_buffer_full, *a)
  179. check_ec("abc\xA4\xA2de", "", :destination_buffer_full, *a)
  180. check_ec("abc\xA4\xA2def", "", :finished, *a)
  181. end
  182. def test_accumulate_dst2
  183. ec = Encoding::Converter.new("UTF-8", "EUC-JP")
  184. a = ["", "abc\u{3042}def", ec, nil, 2]
  185. check_ec("ab", "\u{3042}def", :destination_buffer_full, *a)
  186. check_ec("abc\xA4", "def", :destination_buffer_full, *a)
  187. check_ec("abc\xA4\xA2d", "f", :destination_buffer_full, *a)
  188. check_ec("abc\xA4\xA2def", "", :finished, *a)
  189. end
  190. def test_eucjp_to_utf8
  191. assert_econv("", :finished, 100, ["UTF-8", "EUC-JP"], "", "")
  192. assert_econv("a", :finished, 100, ["UTF-8", "EUC-JP"], "a", "")
  193. end
  194. def test_iso2022jp
  195. assert_econv("", :finished, 100, ["Shift_JIS", "ISO-2022-JP"], "", "")
  196. end
  197. def test_iso2022jp_encode
  198. ec = Encoding::Converter.new("EUC-JP", "ISO-2022-JP")
  199. a = ["", src="", ec, nil, 50, :partial_input=>true]
  200. src << "a"; check_ec("a", "", :source_buffer_empty, *a)
  201. src << "\xA2"; check_ec("a", "", :source_buffer_empty, *a)
  202. src << "\xA4"; check_ec("a\e$B\"$", "", :source_buffer_empty, *a)
  203. src << "\xA1"; check_ec("a\e$B\"$", "", :source_buffer_empty, *a)
  204. src << "\xA2"; check_ec("a\e$B\"$!\"", "", :source_buffer_empty, *a)
  205. src << "b"; check_ec("a\e$B\"$!\"\e(Bb", "", :source_buffer_empty, *a)
  206. src << "\xA2\xA6"; check_ec("a\e$B\"$!\"\e(Bb\e$B\"&", "", :source_buffer_empty, *a)
  207. a[-1] = 0; check_ec("a\e$B\"$!\"\e(Bb\e$B\"&\e(B", "", :finished, *a)
  208. end
  209. def test_iso2022jp_decode
  210. ec = Encoding::Converter.new("ISO-2022-JP", "EUC-JP")
  211. a = ["", src="", ec, nil, 50, :partial_input=>true]
  212. src << "a"; check_ec("a", "", :source_buffer_empty, *a)
  213. src << "\e"; check_ec("a", "", :source_buffer_empty, *a)
  214. src << "$"; check_ec("a", "", :source_buffer_empty, *a)
  215. src << "B"; check_ec("a", "", :source_buffer_empty, *a)
  216. src << "\x21"; check_ec("a", "", :source_buffer_empty, *a)
  217. src << "\x22"; check_ec("a\xA1\xA2", "", :source_buffer_empty, *a)
  218. src << "\n"; check_ec("a\xA1\xA2", "", :invalid_byte_sequence, *a)
  219. src << "\x23"; check_ec("a\xA1\xA2", "", :source_buffer_empty, *a)
  220. src << "\x24"; check_ec("a\xA1\xA2\xA3\xA4", "", :source_buffer_empty, *a)
  221. src << "\e"; check_ec("a\xA1\xA2\xA3\xA4", "", :source_buffer_empty, *a)
  222. src << "("; check_ec("a\xA1\xA2\xA3\xA4", "", :source_buffer_empty, *a)
  223. src << "B"; check_ec("a\xA1\xA2\xA3\xA4", "", :source_buffer_empty, *a)
  224. src << "c"; check_ec("a\xA1\xA2\xA3\xA4c", "", :source_buffer_empty, *a)
  225. src << "\n"; check_ec("a\xA1\xA2\xA3\xA4c\n","", :source_buffer_empty, *a)
  226. end
  227. def test_invalid
  228. assert_econv("", :invalid_byte_sequence, 100, ["UTF-8", "EUC-JP"], "\x80", "")
  229. assert_econv("a", :invalid_byte_sequence, 100, ["UTF-8", "EUC-JP"], "a\x80", "")
  230. assert_econv("a", :invalid_byte_sequence, 100, ["UTF-8", "EUC-JP"], "a\x80", "\x80")
  231. assert_econv("abc", :invalid_byte_sequence, 100, ["UTF-8", "EUC-JP"], "abc\xFF", "def")
  232. assert_econv("abc", :invalid_byte_sequence, 100, ["Shift_JIS", "EUC-JP"], "abc\xFF", "def")
  233. assert_econv("abc", :invalid_byte_sequence, 100, ["ISO-2022-JP", "EUC-JP"], "abc\xFF", "def")
  234. end
  235. def test_invalid2
  236. ec = Encoding::Converter.new("Shift_JIS", "EUC-JP")
  237. a = ["", "abc\xFFdef", ec, nil, 1]
  238. check_ec("a", "c\xFFdef", :destination_buffer_full, *a)
  239. check_ec("ab", "\xFFdef", :destination_buffer_full, *a)
  240. check_ec("abc", "def", :invalid_byte_sequence, *a)
  241. check_ec("abcd", "f", :destination_buffer_full, *a)
  242. check_ec("abcde", "", :destination_buffer_full, *a)
  243. check_ec("abcdef", "", :finished, *a)
  244. end
  245. def test_invalid3
  246. ec = Encoding::Converter.new("Shift_JIS", "EUC-JP")
  247. a = ["", "abc\xFFdef", ec, nil, 10]
  248. check_ec("abc", "def", :invalid_byte_sequence, *a)
  249. check_ec("abcdef", "", :finished, *a)
  250. end
  251. def test_invalid4
  252. ec = Encoding::Converter.new("Shift_JIS", "EUC-JP")
  253. a = ["", "abc\xFFdef", ec, nil, 10, :after_output=>true]
  254. check_ec("a", "bc\xFFdef", :after_output, *a)
  255. check_ec("ab", "c\xFFdef", :after_output, *a)
  256. check_ec("abc", "\xFFdef", :after_output, *a)
  257. check_ec("abc", "def", :invalid_byte_sequence, *a)
  258. check_ec("abcd", "ef", :after_output, *a)
  259. check_ec("abcde", "f", :after_output, *a)
  260. check_ec("abcdef", "", :after_output, *a)
  261. check_ec("abcdef", "", :finished, *a)
  262. end
  263. def test_invalid_utf16le
  264. ec = Encoding::Converter.new("UTF-16LE", "UTF-8")
  265. a = ["", src="", ec, nil, 50, :partial_input=>true]
  266. src << "A"; check_ec("", "", :source_buffer_empty, *a)
  267. src << "\x00"; check_ec("A", "", :source_buffer_empty, *a)
  268. src << "\x00"; check_ec("A", "", :source_buffer_empty, *a)
  269. src << "\xd8"; check_ec("A", "", :source_buffer_empty, *a)
  270. src << "\x01"; check_ec("A", "", :source_buffer_empty, *a)
  271. src << "\x02"; check_ec("A", "", :invalid_byte_sequence, *a)
  272. src << "\x03"; check_ec("A\u{0201}", "", :source_buffer_empty, *a)
  273. src << "\x04"; check_ec("A\u{0201}\u{0403}", "", :source_buffer_empty, *a)
  274. src << "\x00"; check_ec("A\u{0201}\u{0403}", "", :source_buffer_empty, *a)
  275. src << "\xd8"; check_ec("A\u{0201}\u{0403}", "", :source_buffer_empty, *a)
  276. src << "\x00"; check_ec("A\u{0201}\u{0403}", "", :source_buffer_empty, *a)
  277. src << "\xd8"; check_ec("A\u{0201}\u{0403}", "", :invalid_byte_sequence, *a)
  278. src << "\x00"; check_ec("A\u{0201}\u{0403}", "", :source_buffer_empty, *a)
  279. src << "\xdc"; check_ec("A\u{0201}\u{0403}\u{10000}", "", :source_buffer_empty, *a)
  280. end
  281. def test_invalid_utf16be
  282. ec = Encoding::Converter.new("UTF-16BE", "UTF-8")
  283. a = ["", src="", ec, nil, 50, :partial_input=>true]
  284. src << "\x00"; check_ec("", "", :source_buffer_empty, *a)
  285. src << "A"; check_ec("A", "", :source_buffer_empty, *a)
  286. src << "\xd8"; check_ec("A", "", :source_buffer_empty, *a)
  287. src << "\x00"; check_ec("A", "", :source_buffer_empty, *a)
  288. src << "\x02"; check_ec("A", "", :invalid_byte_sequence, *a)
  289. src << "\x01"; check_ec("A\u{0201}", "", :source_buffer_empty, *a)
  290. src << "\x04"; check_ec("A\u{0201}", "", :source_buffer_empty, *a)
  291. src << "\x03"; check_ec("A\u{0201}\u{0403}", "", :source_buffer_empty, *a)
  292. src << "\xd8"; check_ec("A\u{0201}\u{0403}", "", :source_buffer_empty, *a)
  293. src << "\x00"; check_ec("A\u{0201}\u{0403}", "", :source_buffer_empty, *a)
  294. src << "\xd8"; check_ec("A\u{0201}\u{0403}", "", :invalid_byte_sequence, *a)
  295. src << "\x00"; check_ec("A\u{0201}\u{0403}", "", :source_buffer_empty, *a)
  296. src << "\xdc"; check_ec("A\u{0201}\u{0403}", "", :source_buffer_empty, *a)
  297. src << "\x00"; check_ec("A\u{0201}\u{0403}\u{10000}", "", :source_buffer_empty, *a)
  298. end
  299. def test_invalid_utf32be
  300. ec = Encoding::Converter.new("UTF-32BE", "UTF-8")
  301. a = ["", src="", ec, nil, 50, :partial_input=>true]
  302. src << "\x00"; check_ec("", "", :source_buffer_empty, *a)
  303. src << "\x00"; check_ec("", "", :source_buffer_empty, *a)
  304. src << "\x00"; check_ec("", "", :source_buffer_empty, *a)
  305. src << "A"; check_ec("A", "", :source_buffer_empty, *a)
  306. src << "\x00"; check_ec("A", "", :source_buffer_empty, *a)
  307. src << "\x00"; check_ec("A", "", :source_buffer_empty, *a)
  308. src << "\xdc"; check_ec("A", "", :source_buffer_empty, *a)
  309. src << "\x00"; check_ec("A", "", :invalid_byte_sequence, *a)
  310. src << "\x00"; check_ec("A", "", :source_buffer_empty, *a)
  311. src << "\x00"; check_ec("A", "", :source_buffer_empty, *a)
  312. src << "\x00"; check_ec("A", "", :source_buffer_empty, *a)
  313. src << "B"; check_ec("AB", "", :source_buffer_empty, *a)
  314. src << "\x00"; check_ec("AB", "", :source_buffer_empty, *a)
  315. src << "\x00"; check_ec("AB", "", :source_buffer_empty, *a)
  316. src << "\x00"; check_ec("AB", "", :source_buffer_empty, *a)
  317. src << "C"; check_ec("ABC", "", :source_buffer_empty, *a)
  318. end
  319. def test_invalid_utf32le
  320. ec = Encoding::Converter.new("UTF-32LE", "UTF-8")
  321. a = ["", src="", ec, nil, 50, :partial_input=>true]
  322. src << "A"; check_ec("", "", :source_buffer_empty, *a)
  323. src << "\x00"; check_ec("", "", :source_buffer_empty, *a)
  324. src << "\x00"; check_ec("", "", :source_buffer_empty, *a)
  325. src << "\x00"; check_ec("A", "", :source_buffer_empty, *a)
  326. src << "\x00"; check_ec("A", "", :source_buffer_empty, *a)
  327. src << "\xdc"; check_ec("A", "", :source_buffer_empty, *a)
  328. src << "\x00"; check_ec("A", "", :source_buffer_empty, *a)
  329. src << "\x00"; check_ec("A", "", :invalid_byte_sequence, *a)
  330. src << "B"; check_ec("A", "", :source_buffer_empty, *a)
  331. src << "\x00"; check_ec("A", "", :source_buffer_empty, *a)
  332. src << "\x00"; check_ec("A", "", :source_buffer_empty, *a)
  333. src << "\x00"; check_ec("AB", "", :source_buffer_empty, *a)
  334. src << "C"; check_ec("AB", "", :source_buffer_empty, *a)
  335. src << "\x00"; check_ec("AB", "", :source_buffer_empty, *a)
  336. src << "\x00"; check_ec("AB", "", :source_buffer_empty, *a)
  337. src << "\x00"; check_ec("ABC", "", :source_buffer_empty, *a)
  338. end
  339. def test_errors
  340. ec = Encoding::Converter.new("UTF-16BE", "EUC-JP")
  341. a = ["", "\xFF\xFE\x00A\xDC\x00\x00B", ec, nil, 10]
  342. check_ec("", "\x00A\xDC\x00\x00B", :undefined_conversion, *a)
  343. check_ec("A", "\x00B", :invalid_byte_sequence, *a) # \xDC\x00 is invalid as UTF-16BE
  344. check_ec("AB", "", :finished, *a)
  345. end
  346. def test_errors2
  347. ec = Encoding::Converter.new("UTF-16BE", "EUC-JP")
  348. a = ["", "\xFF\xFE\x00A\xDC\x00\x00B", ec, nil, 10, :after_output=>true]
  349. check_ec("", "\x00A\xDC\x00\x00B", :undefined_conversion, *a)
  350. check_ec("A", "\xDC\x00\x00B", :after_output, *a)
  351. check_ec("A", "\x00B", :invalid_byte_sequence, *a)
  352. check_ec("AB", "", :after_output, *a)
  353. check_ec("AB", "", :finished, *a)
  354. end
  355. def test_universal_newline
  356. ec = Encoding::Converter.new("UTF-8", "EUC-JP", universal_newline: true)
  357. a = ["", src="", ec, nil, 50, :partial_input=>true]
  358. src << "abc\r\ndef"; check_ec("abc\ndef", "", :source_buffer_empty, *a)
  359. src << "ghi\njkl"; check_ec("abc\ndefghi\njkl", "", :source_buffer_empty, *a)
  360. src << "mno\rpqr"; check_ec("abc\ndefghi\njklmno\npqr", "", :source_buffer_empty, *a)
  361. src << "stu\r"; check_ec("abc\ndefghi\njklmno\npqrstu", "", :source_buffer_empty, *a)
  362. src << "\nvwx"; check_ec("abc\ndefghi\njklmno\npqrstu\nvwx", "", :source_buffer_empty, *a)
  363. src << "\nyz"; check_ec("abc\ndefghi\njklmno\npqrstu\nvwx\nyz", "", :source_buffer_empty, *a)
  364. end
  365. def test_universal_newline2
  366. ec = Encoding::Converter.new("", "", universal_newline: true)
  367. a = ["", src="", ec, nil, 50, :partial_input=>true]
  368. src << "abc\r\ndef"; check_ec("abc\ndef", "", :source_buffer_empty, *a)
  369. src << "ghi\njkl"; check_ec("abc\ndefghi\njkl", "", :source_buffer_empty, *a)
  370. src << "mno\rpqr"; check_ec("abc\ndefghi\njklmno\npqr", "", :source_buffer_empty, *a)
  371. src << "stu\r"; check_ec("abc\ndefghi\njklmno\npqrstu", "", :source_buffer_empty, *a)
  372. src << "\nvwx"; check_ec("abc\ndefghi\njklmno\npqrstu\nvwx", "", :source_buffer_empty, *a)
  373. src << "\nyz"; check_ec("abc\ndefghi\njklmno\npqrstu\nvwx\nyz", "", :source_buffer_empty, *a)
  374. end
  375. def test_universal_newline3
  376. ec = Encoding::Converter.new("", "", universal_newline: true)
  377. a = ["", src="", ec, nil, 50, :partial_input=>true]
  378. src << "abc\r\ndef"; check_ec("abc\ndef", "", :source_buffer_empty, *a)
  379. src << "ghi\njkl"; check_ec("abc\ndefghi\njkl", "", :source_buffer_empty, *a)
  380. src << "mno\rpqr"; check_ec("abc\ndefghi\njklmno\npqr", "", :source_buffer_empty, *a)
  381. src << "stu\r"; check_ec("abc\ndefghi\njklmno\npqrstu", "", :source_buffer_empty, *a)
  382. src << "\nvwx"; check_ec("abc\ndefghi\njklmno\npqrstu\nvwx", "", :source_buffer_empty, *a)
  383. src << "\nyz"; check_ec("abc\ndefghi\njklmno\npqrstu\nvwx\nyz", "", :source_buffer_empty, *a)
  384. src << "\r"; check_ec("abc\ndefghi\njklmno\npqrstu\nvwx\nyz", "", :source_buffer_empty, *a)
  385. a[-1] = nil
  386. src << ""; check_ec("abc\ndefghi\njklmno\npqrstu\nvwx\nyz\n", "", :finished, *a)
  387. end
  388. def test_crlf_newline
  389. ec = Encoding::Converter.new("UTF-8", "EUC-JP", crlf_newline: true)
  390. assert_econv("abc\r\ndef", :finished, 50, ec, "abc\ndef", "")
  391. end
  392. def test_crlf_newline2
  393. ec = Encoding::Converter.new("", "", crlf_newline: true)
  394. assert_econv("abc\r\ndef", :finished, 50, ec, "abc\ndef", "")
  395. end
  396. def test_cr_newline
  397. ec = Encoding::Converter.new("UTF-8", "EUC-JP", cr_newline: true)
  398. assert_econv("abc\rdef", :finished, 50, ec, "abc\ndef", "")
  399. end
  400. def test_cr_newline2
  401. ec = Encoding::Converter.new("", "", cr_newline: true)
  402. assert_econv("abc\rdef", :finished, 50, ec, "abc\ndef", "")
  403. end
  404. def test_no_universal_newline1
  405. ec = Encoding::Converter.new("UTF-8", "EUC-JP", universal_newline: false)
  406. assert_econv("abc\r\ndef", :finished, 50, ec, "abc\r\ndef", "")
  407. end
  408. def test_no_universal_newline2
  409. ec = Encoding::Converter.new("", "", universal_newline: false)
  410. assert_econv("abc\r\ndef", :finished, 50, ec, "abc\r\ndef", "")
  411. end
  412. def test_after_output
  413. ec = Encoding::Converter.new("UTF-8", "EUC-JP")
  414. a = ["", "abc\u{3042}def", ec, nil, 100, :after_output=>true]
  415. check_ec("a", "bc\u{3042}def", :after_output, *a)
  416. check_ec("ab", "c\u{3042}def", :after_output, *a)
  417. check_ec("abc", "\u{3042}def", :after_output, *a)
  418. check_ec("abc\xA4\xA2", "def", :after_output, *a)
  419. check_ec("abc\xA4\xA2d", "ef", :after_output, *a)
  420. check_ec("abc\xA4\xA2de", "f", :after_output, *a)
  421. check_ec("abc\xA4\xA2def", "", :after_output, *a)
  422. check_ec("abc\xA4\xA2def", "", :finished, *a)
  423. end
  424. def test_errinfo_invalid_euc_jp
  425. ec = Encoding::Converter.new("EUC-JP", "Shift_JIS")
  426. ec.primitive_convert("\xff", "", nil, 10)
  427. assert_errinfo(:invalid_byte_sequence, "EUC-JP", "Shift_JIS", "\xFF", "", ec)
  428. end
  429. def test_errinfo_invalid_euc_jp2
  430. ec = Encoding::Converter.new("EUC-JP", "ISO-8859-1")
  431. ec.primitive_convert("\xff", "", nil, 10)
  432. assert_errinfo(:invalid_byte_sequence, "EUC-JP", "UTF-8", "\xFF", "", ec)
  433. end
  434. def test_errinfo_undefined_hiragana
  435. ec = Encoding::Converter.new("EUC-JP", "ISO-8859-1")
  436. ec.primitive_convert("\xa4\xa2", "", nil, 10)
  437. assert_errinfo(:undefined_conversion, "UTF-8", "ISO-8859-1", "\xE3\x81\x82", "", ec)
  438. end
  439. def test_errinfo_invalid_partial_character
  440. ec = Encoding::Converter.new("EUC-JP", "ISO-8859-1")
  441. ec.primitive_convert("\xa4", "", nil, 10)
  442. assert_errinfo(:incomplete_input, "EUC-JP", "UTF-8", "\xA4", "", ec)
  443. end
  444. def test_errinfo_valid_partial_character
  445. ec = Encoding::Converter.new("EUC-JP", "ISO-8859-1")
  446. ec.primitive_convert("\xa4", "", nil, 10, :partial_input=>true)
  447. assert_errinfo(:source_buffer_empty, nil, nil, nil, nil, ec)
  448. end
  449. def test_errinfo_invalid_utf16be
  450. ec = Encoding::Converter.new("UTF-16BE", "UTF-8")
  451. ec.primitive_convert(src="\xd8\x00\x00@", "", nil, 10)
  452. assert_errinfo(:invalid_byte_sequence, "UTF-16BE", "UTF-8", "\xD8\x00", "\x00", ec)
  453. assert_equal("@", src)
  454. end
  455. def test_errinfo_invalid_utf16le
  456. ec = Encoding::Converter.new("UTF-16LE", "UTF-8")
  457. ec.primitive_convert(src="\x00\xd8@\x00", "", nil, 10)
  458. assert_errinfo(:invalid_byte_sequence, "UTF-16LE", "UTF-8", "\x00\xD8", "@\x00", ec)
  459. assert_equal("", src)
  460. end
  461. def test_output_iso2022jp
  462. ec = Encoding::Converter.new("EUC-JP", "ISO-2022-JP")
  463. ec.primitive_convert(src="\xa1\xa1", dst="", nil, 10, :partial_input=>true)
  464. assert_equal("\e$B!!".force_encoding("ISO-2022-JP"), dst)
  465. assert_equal(nil, ec.insert_output("???"))
  466. ec.primitive_convert("", dst, nil, 10, :partial_input=>true)
  467. assert_equal("\e$B!!\e(B???".force_encoding("ISO-2022-JP"), dst)
  468. ec.primitive_convert(src="\xa1\xa2", dst, nil, 10, :partial_input=>true)
  469. assert_equal("\e$B!!\e(B???\e$B!\"".force_encoding("ISO-2022-JP"), dst)
  470. assert_equal(nil, ec.insert_output("\xA1\xA1".force_encoding("EUC-JP")))
  471. ec.primitive_convert("", dst, nil, 10, :partial_input=>true)
  472. assert_equal("\e$B!!\e(B???\e$B!\"!!".force_encoding("ISO-2022-JP"), dst)
  473. ec.primitive_convert(src="\xa1\xa3", dst, nil, 10, :partial_input=>true)
  474. assert_equal("\e$B!!\e(B???\e$B!\"!!!\#".force_encoding("ISO-2022-JP"), dst)
  475. assert_equal(nil, ec.insert_output("\u3042"))
  476. ec.primitive_convert("", dst, nil, 10, :partial_input=>true)
  477. assert_equal("\e$B!!\e(B???\e$B!\"!!!\#$\"".force_encoding("ISO-2022-JP"), dst)
  478. assert_raise(Encoding::UndefinedConversionError) {
  479. ec.insert_output("\uFFFD")
  480. }
  481. assert_equal("\e$B!!\e(B???\e$B!\"!!!\#$\"".force_encoding("ISO-2022-JP"), dst)
  482. ec.primitive_convert("", dst, nil, 10)
  483. assert_equal("\e$B!!\e(B???\e$B!\"!!!\#$\"\e(B".force_encoding("ISO-2022-JP"), dst)
  484. end
  485. def test_exc_invalid
  486. err = assert_raise(Encoding::InvalidByteSequenceError) {
  487. "abc\xa4def".encode("ISO-8859-1", "EUC-JP")
  488. }
  489. assert_equal("EUC-JP", err.source_encoding_name)
  490. assert_equal("UTF-8", err.destination_encoding_name)
  491. assert_equal(Encoding::EUC_JP, err.source_encoding)
  492. assert_equal(Encoding::UTF_8, err.destination_encoding)
  493. assert_equal("\xA4".force_encoding("ASCII-8BIT"), err.error_bytes)
  494. assert_equal("d", err.readagain_bytes)
  495. assert_equal(false, err.incomplete_input?)
  496. end
  497. def test_exc_incomplete
  498. err = assert_raise(Encoding::InvalidByteSequenceError) {
  499. "abc\xa4".encode("ISO-8859-1", "EUC-JP")
  500. }
  501. assert_equal("EUC-JP", err.source_encoding_name)
  502. assert_equal("UTF-8", err.destination_encoding_name)
  503. assert_equal(Encoding::EUC_JP, err.source_encoding)
  504. assert_equal(Encoding::UTF_8, err.destination_encoding)
  505. assert_equal("\xA4".force_encoding("ASCII-8BIT"), err.error_bytes)
  506. assert_equal(nil, err.readagain_bytes)
  507. assert_equal(true, err.incomplete_input?)
  508. end
  509. def test_exc_undef
  510. err = assert_raise(Encoding::UndefinedConversionError) {
  511. "abc\xa4\xa2def".encode("ISO-8859-1", "EUC-JP")
  512. }
  513. assert_equal("UTF-8", err.source_encoding_name)
  514. assert_equal("ISO-8859-1", err.destination_encoding_name)
  515. assert_equal(Encoding::UTF_8, err.source_encoding)
  516. assert_equal(Encoding::ISO_8859_1, err.destination_encoding)
  517. assert_equal("\u{3042}", err.error_char)
  518. end
  519. def test_putback
  520. ec = Encoding::Converter.new("EUC-JP", "ISO-8859-1")
  521. ret = ec.primitive_convert(src="abc\xa1def", dst="", nil, 10)
  522. assert_equal(:invalid_byte_sequence, ret)
  523. assert_equal(["abc", "ef"], [dst, src])
  524. src = ec.putback + src
  525. assert_equal(["abc", "def"], [dst, src])
  526. ret = ec.primitive_convert(src, dst, nil, 10)
  527. assert_equal(:finished, ret)
  528. assert_equal(["abcdef", ""], [dst, src])
  529. end
  530. def test_putback2
  531. ec = Encoding::Converter.new("utf-16le", "euc-jp")
  532. ret = ec.primitive_convert("\x00\xd8\x21\x00", "", nil, nil)
  533. assert_equal(:invalid_byte_sequence, ret)
  534. assert_equal("\x00".force_encoding("utf-16le"), ec.putback(1))
  535. assert_equal("\x21".force_encoding("utf-16le"), ec.putback(1))
  536. assert_equal("", ec.putback(1))
  537. end
  538. def test_invalid_replace
  539. ec = Encoding::Converter.new("UTF-8", "EUC-JP", invalid: :replace)
  540. ret = ec.primitive_convert(src="abc\x80def", dst="", nil, 100)
  541. assert_equal(:finished, ret)
  542. assert_equal("", src)
  543. assert_equal("abc?def", dst)
  544. end
  545. def test_invalid_ignore
  546. ec = Encoding::Converter.new("UTF-8", "EUC-JP", :invalid => :replace, :replace => "")
  547. ret = ec.primitive_convert(src="abc\x80def", dst="", nil, 100)
  548. assert_equal(:finished, ret)
  549. assert_equal("", src)
  550. assert_equal("abcdef", dst)
  551. end
  552. def test_undef_replace
  553. ec = Encoding::Converter.new("UTF-8", "EUC-JP", :undef => :replace)
  554. ret = ec.primitive_convert(src="abc\u{fffd}def", dst="", nil, 100)
  555. assert_equal(:finished, ret)
  556. assert_equal("", src)
  557. assert_equal("abc?def", dst)
  558. end
  559. def test_undef_ignore
  560. ec = Encoding::Converter.new("UTF-8", "EUC-JP", :undef => :replace, :replace => "")
  561. ret = ec.primitive_convert(src="abc\u{fffd}def", dst="", nil, 100)
  562. assert_equal(:finished, ret)
  563. assert_equal("", src)
  564. assert_equal("abcdef", dst)
  565. end
  566. def test_noconv
  567. ec = Encoding::Converter.new("", "")
  568. assert_equal(nil, ec.source_encoding)
  569. assert_equal(nil, ec.destination_encoding)
  570. assert_equal([:source_buffer_empty, nil, nil, nil, nil], ec.primitive_errinfo)
  571. a = ["", "abcdefg", ec, nil, 2]
  572. check_ec("ab", "cdefg", :destination_buffer_full, *a)
  573. check_ec("abcd", "efg", :destination_buffer_full, *a)
  574. check_ec("abcdef", "g", :destination_buffer_full, *a)
  575. check_ec("abcdefg", "", :finished, *a)
  576. end
  577. def test_noconv_partial
  578. ec = Encoding::Converter.new("", "")
  579. a = ["", "abcdefg", ec, nil, 2, :partial_input=>true]
  580. check_ec("ab", "cdefg", :destination_buffer_full, *a)
  581. check_ec("abcd", "efg", :destination_buffer_full, *a)
  582. check_ec("abcdef", "g", :destination_buffer_full, *a)
  583. check_ec("abcdefg", "", :source_buffer_empty, *a)
  584. end
  585. def test_noconv_after_output
  586. ec = Encoding::Converter.new("", "")
  587. a = ["", "abcdefg", ec, nil, 2, :after_output=>true]
  588. check_ec("a", "bcdefg", :after_output, *a)
  589. check_ec("ab", "cdefg", :after_output, *a)
  590. check_ec("abc", "defg", :after_output, *a)
  591. check_ec("abcd", "efg", :after_output, *a)
  592. check_ec("abcde", "fg", :after_output, *a)
  593. check_ec("abcdef", "g", :after_output, *a)
  594. check_ec("abcdefg", "", :after_output, *a)
  595. check_ec("abcdefg", "", :finished, *a)
  596. end
  597. def test_noconv_insert_output
  598. ec = Encoding::Converter.new("", "")
  599. ec.insert_output("xyz")
  600. ret = ec.primitive_convert(src="abc", dst="", nil, 20)
  601. assert_equal(:finished, ret)
  602. assert_equal(["xyzabc", ""], [dst, src])
  603. end
  604. def test_convert
  605. ec = Encoding::Converter.new("utf-8", "euc-jp")
  606. assert_raise(Encoding::InvalidByteSequenceError) { ec.convert("a\x80") }
  607. assert_raise(Encoding::UndefinedConversionError) { ec.convert("\ufffd") }
  608. ret = ec.primitive_convert(nil, "", nil, nil)
  609. assert_equal(:finished, ret)
  610. assert_raise(ArgumentError) { ec.convert("a") }
  611. end
  612. def test_finish_iso2022jp
  613. ec = Encoding::Converter.new("utf-8", "iso-2022-jp")
  614. assert_equal("\e$B$\"".force_encoding("iso-2022-jp"), ec.convert("\u3042"))
  615. assert_equal("\e(B".force_encoding("iso-2022-jp"), ec.finish)
  616. end
  617. def test_finish_incomplete_error
  618. ec = Encoding::Converter.new("utf-8", "euc-jp")
  619. ec.convert("\xEF")
  620. assert_raise(Encoding::InvalidByteSequenceError) { ec.finish }
  621. end
  622. def test_last_error1
  623. ec = Encoding::Converter.new("sjis", "euc-jp")
  624. assert_equal(nil, ec.last_error)
  625. assert_equal(:incomplete_input, ec.primitive_convert("fo\x81", "", nil, nil))
  626. assert_kind_of(Encoding::InvalidByteSequenceError, ec.last_error)
  627. end
  628. def test_last_error2
  629. ec = Encoding::Converter.new("sjis", "euc-jp")
  630. assert_equal("fo", ec.convert("fo\x81"))
  631. assert_raise(Encoding::InvalidByteSequenceError) { ec.finish }
  632. assert_kind_of(Encoding::InvalidByteSequenceError, ec.last_error)
  633. end
  634. def test_us_ascii
  635. ec = Encoding::Converter.new("UTF-8", "US-ASCII")
  636. ec.primitive_convert("\u{3042}", "")
  637. err = ec.last_error
  638. assert_kind_of(Encoding::UndefinedConversionError, err)
  639. assert_equal("\u{3042}", err.error_char)
  640. end
  641. def test_88591
  642. ec = Encoding::Converter.new("UTF-8", "ISO-8859-1")
  643. ec.primitive_convert("\u{3042}", "")
  644. err = ec.last_error
  645. assert_kind_of(Encoding::UndefinedConversionError, err)
  646. assert_equal("\u{3042}", err.error_char)
  647. end
  648. def test_get_replacement
  649. ec = Encoding::Converter.new("euc-jp", "iso-8859-1")
  650. assert_equal("?", ec.replacement)
  651. ec = Encoding::Converter.new("euc-jp", "utf-8")
  652. assert_equal("\uFFFD", ec.replacement)
  653. end
  654. def test_set_replacement
  655. ec = Encoding::Converter.new("utf-8", "us-ascii", :undef => :replace)
  656. ec.replacement = "<undef>"
  657. assert_equal("a <undef> b", ec.convert("a \u3042 b"))
  658. end
  659. def test_econv_new_hash
  660. ec = Encoding::Converter.new("utf-8", "us-ascii", :undef => :replace)
  661. assert_equal("a ? b", ec.convert("a \u3042 b"))
  662. ec = Encoding::Converter.new("utf-8", "us-ascii", :undef => :replace, :replace => "X")
  663. assert_equal("a X b", ec.convert("a \u3042 b"))
  664. end
  665. def test_hex_charref
  666. ec = Encoding::Converter.new("UTF-8", "US-ASCII", Encoding::Converter::UNDEF_HEX_CHARREF)
  667. assert_equal("&#x3042;", ec.convert("\u3042"))
  668. ec = Encoding::Converter.new("UTF-8", "EUC-JP", Encoding::Converter::UNDEF_HEX_CHARREF)
  669. assert_equal("\xa4\xcf\xa4\xa1\xa4\xa4&#x2665;\xa1\xa3".force_encoding("euc-jp"),
  670. ec.convert("\u{306f 3041 3044 2665 3002}"))
  671. ec = Encoding::Converter.new("UTF-8", "ISO-2022-JP", Encoding::Converter::UNDEF_HEX_CHARREF)
  672. assert_equal("\e$B$O$!$$\e(B&#x2665;\e$B!#".force_encoding("ISO-2022-JP"),
  673. ec.convert("\u{306f 3041 3044 2665 3002}"))
  674. assert_equal("\e(B".force_encoding("ISO-2022-JP"),
  675. ec.finish)
  676. ec = Encoding::Converter.new("EUC-JP", "US-ASCII", Encoding::Converter::UNDEF_HEX_CHARREF)
  677. assert_equal("&#x4EA4;&#x63DB;&#x6CD5;&#x5247;: n&#xD7;m=m&#xD7;n".force_encoding("ISO-8859-1"),
  678. ec.convert("\xB8\xF2\xB4\xB9\xCB\xA1\xC2\xA7: n\xA1\xDFm=m\xA1\xDFn"))
  679. ec = Encoding::Converter.new("EUC-JP", "ISO-8859-1", Encoding::Converter::UNDEF_HEX_CHARREF)
  680. assert_equal("&#x4EA4;&#x63DB;&#x6CD5;&#x5247;: n\xD7m=m\xD7n".force_encoding("ISO-8859-1"),
  681. ec.convert("\xB8\xF2\xB4\xB9\xCB\xA1\xC2\xA7: n\xA1\xDFm=m\xA1\xDFn"))
  682. ec = Encoding::Converter.new("UTF-8", "US-ASCII", Encoding::Converter::UNDEF_HEX_CHARREF)
  683. assert_equal("&", ec.convert("&"))
  684. end
  685. def test_xml_escape_text
  686. ec = Encoding::Converter.new("", "amp_escape")
  687. assert_equal('&amp;<>"', ec.convert("&<>\""))
  688. assert_equal('', ec.finish)
  689. ec = Encoding::Converter.new("", "xml_text_escape")
  690. assert_equal('&amp;&lt;&gt;"', ec.convert("&<>\""))
  691. assert_equal('', ec.finish)
  692. end
  693. def test_xml_escape_attr_content
  694. ec = Encoding::Converter.new("", "xml_attr_content_escape")
  695. assert_equal('', ec.finish)
  696. ec = Encoding::Converter.new("", "xml_attr_content_escape")
  697. assert_equal('', ec.convert(""))
  698. assert_equal('', ec.finish)
  699. ec = Encoding::Converter.new("", "xml_attr_content_escape")
  700. assert_equal('&quot;', ec.convert('"'))
  701. assert_equal('', ec.finish)
  702. ec = Encoding::Converter.new("", "xml_attr_content_escape")
  703. assert_equal('&amp;&lt;&gt;&quot;', ec.convert("&<>\""))
  704. assert_equal('', ec.finish)
  705. end
  706. def test_xml_escape_attr_quote
  707. ec = Encoding::Converter.new("", "xml_attr_quote")
  708. assert_equal('""', ec.finish)
  709. ec = Encoding::Converter.new("", "xml_attr_quote")
  710. assert_equal('', ec.convert(""))
  711. assert_equal('""', ec.finish)
  712. ec = Encoding::Converter.new("", "xml_attr_quote")
  713. assert_equal('""', ec.convert('"'))
  714. assert_equal('"', ec.finish)
  715. ec = Encoding::Converter.new("", "xml_attr_quote")
  716. assert_equal('"&<>"', ec.convert("&<>\""))
  717. assert_equal('"', ec.finish)
  718. end
  719. def test_xml_escape_with_charref
  720. ec = Encoding::Converter.new("utf-8", "euc-jp", Encoding::Converter::XML_TEXT_DECORATOR|Encoding::Converter::UNDEF_HEX_CHARREF)
  721. assert_equal('&lt;&#x2665;&gt;&amp;"&#x2661;"', ec.convert("<\u2665>&\"\u2661\""))
  722. assert_equal('', ec.finish)
  723. ec = Encoding::Converter.new("utf-8", "euc-jp",
  724. Encoding::Converter::XML_ATTR_CONTENT_DECORATOR|
  725. Encoding::Converter::XML_ATTR_QUOTE_DECORATOR|
  726. Encoding::Converter::UNDEF_HEX_CHARREF)
  727. assert_equal('"&lt;&#x2665;&gt;&amp;&quot;&#x2661;&quot;', ec.convert("<\u2665>&\"\u2661\""))
  728. assert_equal('"', ec.finish)
  729. ec = Encoding::Converter.new("utf-8", "iso-2022-jp", Encoding::Converter::XML_TEXT_DECORATOR)
  730. assert_equal("&amp;\e$B$&\e(B&amp;".force_encoding("iso-2022-jp"), ec.convert("&\u3046&"))
  731. assert_equal('', ec.finish)
  732. end
  733. def test_xml_hasharg
  734. assert_equal("&amp;\e$B$&\e(B&#x2665;&amp;\"'".force_encoding("iso-2022-jp"),
  735. "&\u3046\u2665&\"'".encode("iso-2022-jp", xml: :text))
  736. assert_equal("\"&amp;\e$B$&\e(B&#x2661;&amp;&quot;'\"".force_encoding("iso-2022-jp"),
  737. "&\u3046\u2661&\"'".encode("iso-2022-jp", xml: :attr))
  738. assert_equal("&amp;\u3046\u2661&amp;\"'".force_encoding("utf-8"),
  739. "&\u3046\u2661&\"'".encode("utf-8", xml: :text))
  740. end
  741. def test_iso2022jp_invalid_replace
  742. assert_equal("?x".force_encoding("iso-2022-jp"),
  743. "\222\xA1x".encode("iso-2022-jp", "stateless-iso-2022-jp", :invalid => :replace))
  744. end
  745. def test_convpath
  746. eucjp = Encoding::EUC_JP
  747. utf8 = Encoding::UTF_8
  748. utf16be = Encoding::UTF_16BE
  749. utf16le = Encoding::UTF_16LE
  750. iso88591 = Encoding::ISO_8859_1
  751. iso2022jp = Encoding::ISO_2022_JP
  752. siso2022jp = Encoding::STATELESS_ISO_2022_JP
  753. assert_equal([], Encoding::Converter.new("", "").convpath)
  754. assert_equal([[eucjp, utf8], [utf8, iso88591]],
  755. Encoding::Converter.new(eucjp, iso88591).convpath)
  756. assert_equal([[eucjp, siso2022jp], [siso2022jp, iso2022jp]],
  757. Encoding::Converter.new(eucjp, iso2022jp).convpath)
  758. assert_equal([[iso2022jp, siso2022jp],
  759. [siso2022jp, eucjp],
  760. [eucjp, utf8],
  761. [utf8, iso88591]],
  762. Encoding::Converter.new(iso2022jp, iso88591).convpath)
  763. assert_equal(["universal_newline", [utf8, utf16be]],
  764. Encoding::Converter.new(utf8, utf16be, universal_newline: true).convpath)
  765. assert_equal([[utf16be, utf8], "universal_newline"],
  766. Encoding::Converter.new(utf16be, utf8, universal_newline: true).convpath)
  767. assert_equal([[utf16be, utf8], "universal_newline", [utf8, utf16le]],
  768. Encoding::Converter.new(utf16be, utf16le, universal_newline: true).convpath)
  769. end
  770. def test_search_convpath
  771. eucjp = Encoding::EUC_JP
  772. utf8 = Encoding::UTF_8
  773. utf32be = Encoding::UTF_32BE
  774. iso88591 = Encoding::ISO_8859_1
  775. assert_equal([[iso88591,utf8], [utf8,eucjp]],
  776. Encoding::Converter.search_convpath("ISO-8859-1", "EUC-JP"))
  777. assert_equal([[iso88591,utf8], [utf8,eucjp]],
  778. Encoding::Converter.search_convpath(iso88591, eucjp))
  779. assert_equal([[iso88591,utf8], [utf8,eucjp], "universal_newline"],
  780. Encoding::Converter.search_convpath("ISO-8859-1", "EUC-JP", universal_newline: true))
  781. assert_equal([[iso88591,utf8], "universal_newline", [utf8,utf32be]],
  782. Encoding::Converter.search_convpath("ISO-8859-1", "UTF-32BE", universal_newline: true))
  783. end
  784. def test_invalid_replace2
  785. assert_raise(ArgumentError) {
  786. broken = "\x80".force_encoding("euc-jp")
  787. "".encode("euc-jp", :undef => :replace, :replace => broken)
  788. }
  789. end
  790. def test_newline_option
  791. ec1 = Encoding::Converter.new("", "", universal_newline: true)
  792. ec2 = Encoding::Converter.new("", "", newline: :universal)
  793. assert_equal(ec1, ec2)
  794. assert_raise_with_message(ArgumentError, /\u{3042}/) {
  795. Encoding::Converter.new("", "", newline: "\u{3042}".to_sym)
  796. }
  797. newlines = %i[universal_newline crlf_newline cr_newline]
  798. (2..newlines.size).each do |i|
  799. newlines.combination(i) do |opts|
  800. assert_raise(Encoding::ConverterNotFoundError, "#{opts} are mutually exclusive") do
  801. Encoding::Converter.new("", "", **opts.inject({}) {|o,nl|o[nl]=true;o})
  802. end
  803. end
  804. end
  805. newlines.each do |nl|
  806. opts = {newline: :universal, nl => true}
  807. ec2 = assert_warning(/:newline option preceds/, opts.inspect) do
  808. Encoding::Converter.new("", "", **opts)
  809. end
  810. assert_equal(ec1, ec2)
  811. end
  812. end
  813. def test_default_external
  814. Encoding.list.grep(->(enc) {/\AISO-8859-\d+\z/i =~ enc.name}) do |enc|
  815. assert_separately(%W[--disable=gems -d - #{enc.name}], <<-EOS, ignore_stderr: true)
  816. Encoding.default_external = ext = ARGV[0]
  817. Encoding.default_internal = int ='utf-8'
  818. assert_nothing_raised do
  819. Encoding::Converter.new(ext, int)
  820. end
  821. EOS
  822. end
  823. end
  824. end