PageRenderTime 26ms CodeModel.GetById 23ms RepoModel.GetById 0ms app.codeStats 0ms

/projects/jruby-1.7.3/build.eclipse/externals/ruby1.9/ruby/enc/test_utf16.rb

https://gitlab.com/essere.lab.public/qualitas.class-corpus
Ruby | 384 lines | 334 code | 47 blank | 3 comment | 7 complexity | f27866badc93a2d3840ae6b0ccdef707 MD5 | raw file
  1. require 'test/unit'
  2. class TestUTF16 < Test::Unit::TestCase
  3. def encdump(obj)
  4. case obj
  5. when String
  6. d = obj.dump
  7. if /\.force_encoding\("[A-Za-z0-9.:_+-]*"\)\z/ =~ d
  8. d
  9. else
  10. "#{d}.force_encoding(#{obj.encoding.name.dump})"
  11. end
  12. when Regexp
  13. "Regexp.new(#{encdump(obj.source)}, #{obj.options})"
  14. else
  15. raise Argument, "unexpected: #{obj.inspect}"
  16. end
  17. end
  18. def enccall(recv, meth, *args)
  19. desc = ''
  20. if String === recv
  21. desc << encdump(recv)
  22. else
  23. desc << recv.inspect
  24. end
  25. desc << '.' << meth.to_s
  26. if !args.empty?
  27. desc << '('
  28. args.each_with_index {|a, i|
  29. desc << ',' if 0 < i
  30. if String === a
  31. desc << encdump(a)
  32. else
  33. desc << a.inspect
  34. end
  35. }
  36. desc << ')'
  37. end
  38. result = nil
  39. assert_nothing_raised(desc) {
  40. result = recv.send(meth, *args)
  41. }
  42. result
  43. end
  44. def assert_str_equal(expected, actual, message=nil)
  45. full_message = build_message(message, <<EOT)
  46. #{encdump expected} expected but not equal to
  47. #{encdump actual}.
  48. EOT
  49. assert_block(full_message) { expected == actual }
  50. end
  51. # tests start
  52. def test_utf16be_valid_encoding
  53. [
  54. "\x00\x00",
  55. "\xd7\xff",
  56. "\xd8\x00\xdc\x00",
  57. "\xdb\xff\xdf\xff",
  58. "\xe0\x00",
  59. "\xff\xff",
  60. ].each {|s|
  61. s.force_encoding("utf-16be")
  62. assert_equal(true, s.valid_encoding?, "#{encdump s}.valid_encoding?")
  63. }
  64. [
  65. "\x00",
  66. "\xd7",
  67. "\xd8\x00",
  68. "\xd8\x00\xd8\x00",
  69. "\xdc\x00",
  70. "\xdc\x00\xd8\x00",
  71. "\xdc\x00\xdc\x00",
  72. "\xe0",
  73. "\xff",
  74. ].each {|s|
  75. s.force_encoding("utf-16be")
  76. assert_equal(false, s.valid_encoding?, "#{encdump s}.valid_encoding?")
  77. }
  78. end
  79. def test_utf16le_valid_encoding
  80. [
  81. "\x00\x00",
  82. "\xff\xd7",
  83. "\x00\xd8\x00\xdc",
  84. "\xff\xdb\xff\xdf",
  85. "\x00\xe0",
  86. "\xff\xff",
  87. ].each {|s|
  88. s.force_encoding("utf-16le")
  89. assert_equal(true, s.valid_encoding?, "#{encdump s}.valid_encoding?")
  90. }
  91. [
  92. "\x00",
  93. "\xd7",
  94. "\x00\xd8",
  95. "\x00\xd8\x00\xd8",
  96. "\x00\xdc",
  97. "\x00\xdc\x00\xd8",
  98. "\x00\xdc\x00\xdc",
  99. "\xe0",
  100. "\xff",
  101. ].each {|s|
  102. s.force_encoding("utf-16le")
  103. assert_equal(false, s.valid_encoding?, "#{encdump s}.valid_encoding?")
  104. }
  105. end
  106. def test_strftime
  107. s = "aa".force_encoding("utf-16be")
  108. assert_raise(ArgumentError, "Time.now.strftime(#{encdump s})") { Time.now.strftime(s) }
  109. end
  110. def test_intern
  111. s = "aaaa".force_encoding("utf-16be")
  112. assert_equal(s.encoding, s.intern.to_s.encoding, "#{encdump s}.intern.to_s.encoding")
  113. end
  114. def test_sym_eq
  115. s = "aa".force_encoding("utf-16le")
  116. assert(s.intern != :aa, "#{encdump s}.intern != :aa")
  117. end
  118. def test_compatible
  119. s1 = "aa".force_encoding("utf-16be")
  120. s2 = "z".force_encoding("us-ascii")
  121. assert_nil(Encoding.compatible?(s1, s2), "Encoding.compatible?(#{encdump s1}, #{encdump s2})")
  122. end
  123. def test_casecmp
  124. s1 = "aa".force_encoding("utf-16be")
  125. s2 = "AA"
  126. assert_not_equal(0, s1.casecmp(s2), "#{encdump s1}.casecmp(#{encdump s2})")
  127. end
  128. def test_end_with
  129. s1 = "ab".force_encoding("utf-16be")
  130. s2 = "b".force_encoding("utf-16be")
  131. assert_equal(false, s1.end_with?(s2), "#{encdump s1}.end_with?(#{encdump s2})")
  132. end
  133. def test_hex
  134. assert_raise(Encoding::CompatibilityError) {
  135. "ff".encode("utf-16le").hex
  136. }
  137. assert_raise(Encoding::CompatibilityError) {
  138. "ff".encode("utf-16be").hex
  139. }
  140. end
  141. def test_oct
  142. assert_raise(Encoding::CompatibilityError) {
  143. "77".encode("utf-16le").oct
  144. }
  145. assert_raise(Encoding::CompatibilityError) {
  146. "77".encode("utf-16be").oct
  147. }
  148. end
  149. def test_count
  150. s1 = "aa".force_encoding("utf-16be")
  151. s2 = "aa"
  152. assert_raise(Encoding::CompatibilityError, "#{encdump s1}.count(#{encdump s2})") {
  153. s1.count(s2)
  154. }
  155. end
  156. def test_plus
  157. s1 = "a".force_encoding("us-ascii")
  158. s2 = "aa".force_encoding("utf-16be")
  159. assert_raise(Encoding::CompatibilityError, "#{encdump s1} + #{encdump s2}") {
  160. s1 + s2
  161. }
  162. end
  163. def test_encoding_find
  164. assert_raise(ArgumentError) {
  165. Encoding.find("utf-8".force_encoding("utf-16be"))
  166. }
  167. end
  168. def test_interpolation
  169. s = "aa".force_encoding("utf-16be")
  170. assert_raise(Encoding::CompatibilityError, "\"a\#{#{encdump s}}\"") {
  171. "a#{s}"
  172. }
  173. end
  174. def test_slice!
  175. enccall("aa".force_encoding("UTF-16BE"), :slice!, -1)
  176. end
  177. def test_plus_empty1
  178. s1 = ""
  179. s2 = "aa".force_encoding("utf-16be")
  180. assert_nothing_raised("#{encdump s1} << #{encdump s2}") {
  181. s1 + s2
  182. }
  183. end
  184. def test_plus_empty2
  185. s1 = "aa"
  186. s2 = "".force_encoding("utf-16be")
  187. assert_nothing_raised("#{encdump s1} << #{encdump s2}") {
  188. s1 + s2
  189. }
  190. end
  191. def test_plus_nonempty
  192. s1 = "aa"
  193. s2 = "bb".force_encoding("utf-16be")
  194. assert_raise(Encoding::CompatibilityError, "#{encdump s1} << #{encdump s2}") {
  195. s1 + s2
  196. }
  197. end
  198. def test_concat_empty1
  199. s1 = ""
  200. s2 = "aa".force_encoding("utf-16be")
  201. assert_nothing_raised("#{encdump s1} << #{encdump s2}") {
  202. s1 << s2
  203. }
  204. end
  205. def test_concat_empty2
  206. s1 = "aa"
  207. s2 = "".force_encoding("utf-16be")
  208. assert_nothing_raised("#{encdump s1} << #{encdump s2}") {
  209. s1 << s2
  210. }
  211. end
  212. def test_concat_nonempty
  213. s1 = "aa"
  214. s2 = "bb".force_encoding("utf-16be")
  215. assert_raise(Encoding::CompatibilityError, "#{encdump s1} << #{encdump s2}") {
  216. s1 << s2
  217. }
  218. end
  219. def test_chomp
  220. s = "\1\n".force_encoding("utf-16be")
  221. assert_equal(s, s.chomp, "#{encdump s}.chomp")
  222. s = "\0\n".force_encoding("utf-16be")
  223. assert_equal("", s.chomp, "#{encdump s}.chomp")
  224. s = "\0\r\0\n".force_encoding("utf-16be")
  225. assert_equal("", s.chomp, "#{encdump s}.chomp")
  226. end
  227. def test_succ
  228. s = "\xff\xff".force_encoding("utf-16be")
  229. assert(s.succ.valid_encoding?, "#{encdump s}.succ.valid_encoding?")
  230. s = "\xdb\xff\xdf\xff".force_encoding("utf-16be")
  231. assert(s.succ.valid_encoding?, "#{encdump s}.succ.valid_encoding?")
  232. end
  233. def test_regexp_union
  234. enccall(Regexp, :union, "aa".force_encoding("utf-16be"), "bb".force_encoding("utf-16be"))
  235. end
  236. def test_empty_regexp
  237. s = "".force_encoding("utf-16be")
  238. assert_equal(Encoding.find("utf-16be"), Regexp.new(s).encoding,
  239. "Regexp.new(#{encdump s}).encoding")
  240. end
  241. def test_regexp_match
  242. assert_raise(Encoding::CompatibilityError) { Regexp.new("aa".force_encoding("utf-16be")) =~ "aa" }
  243. end
  244. def test_gsub
  245. s = "abcd".force_encoding("utf-16be")
  246. assert_nothing_raised {
  247. s.gsub(Regexp.new(".".encode("utf-16be")), "xy")
  248. }
  249. s = "ab\0\ncd".force_encoding("utf-16be")
  250. assert_raise(Encoding::CompatibilityError) {
  251. s.gsub(Regexp.new(".".encode("utf-16be")), "xy")
  252. }
  253. end
  254. def test_split_awk
  255. s = " ab cd ".encode("utf-16be")
  256. r = s.split(" ".encode("utf-16be"))
  257. assert_equal(2, r.length)
  258. assert_str_equal("ab".encode("utf-16be"), r[0])
  259. assert_str_equal("cd".encode("utf-16be"), r[1])
  260. end
  261. def test_count2
  262. e = "abc".count("^b")
  263. assert_equal(e, "abc".encode("utf-16be").count("^b".encode("utf-16be")))
  264. assert_equal(e, "abc".encode("utf-16le").count("^b".encode("utf-16le")))
  265. end
  266. def test_header
  267. assert_raise(ArgumentError) { eval("# encoding:utf-16le\nfoo") }
  268. assert_raise(ArgumentError) { eval("# encoding:utf-16be\nfoo") }
  269. end
  270. def test_is_mbc_newline
  271. sl = "f\0o\0o\0\n\0b\0a\0r\0\n\0b\0a\0z\0\n\0".force_encoding("utf-16le")
  272. sb = "\0f\0o\0o\0\n\0b\0a\0r\0\n\0b\0a\0z\0\n".force_encoding("utf-16be")
  273. al = sl.lines.to_a
  274. ab = sb.lines.to_a
  275. assert_equal("f\0o\0o\0\n\0".force_encoding("utf-16le"), al.shift)
  276. assert_equal("b\0a\0r\0\n\0".force_encoding("utf-16le"), al.shift)
  277. assert_equal("b\0a\0z\0\n\0".force_encoding("utf-16le"), al.shift)
  278. assert_equal("\0f\0o\0o\0\n".force_encoding("utf-16be"), ab.shift)
  279. assert_equal("\0b\0a\0r\0\n".force_encoding("utf-16be"), ab.shift)
  280. assert_equal("\0b\0a\0z\0\n".force_encoding("utf-16be"), ab.shift)
  281. sl = "f\0o\0o\0\n\0".force_encoding("utf-16le")
  282. sb = "\0f\0o\0o\0\n".force_encoding("utf-16be")
  283. sl2 = "f\0o\0o\0".force_encoding("utf-16le")
  284. sb2 = "\0f\0o\0o".force_encoding("utf-16be")
  285. assert_equal(sl2, sl.chomp)
  286. assert_equal(sl2, sl.chomp.chomp)
  287. assert_equal(sb2, sb.chomp)
  288. assert_equal(sb2, sb.chomp.chomp)
  289. sl = "f\0o\0o\0\n".force_encoding("utf-16le")
  290. sb = "\0f\0o\0o\n".force_encoding("utf-16be")
  291. assert_equal(sl, sl.chomp)
  292. assert_equal(sb, sb.chomp)
  293. end
  294. def test_code_to_mbc
  295. assert_equal("a\0".force_encoding("utf-16le"), "a".ord.chr("utf-16le"))
  296. assert_equal("\0a".force_encoding("utf-16be"), "a".ord.chr("utf-16be"))
  297. end
  298. def utf8_to_utf16(s, e)
  299. s.chars.map {|c| c.ord.chr(e) }.join
  300. end
  301. def test_mbc_case_fold
  302. rl = Regexp.new(utf8_to_utf16("^(\u3042)(a)\\1\\2$", "utf-16le"), "i")
  303. rb = Regexp.new(utf8_to_utf16("^(\u3042)(a)\\1\\2$", "utf-16be"), "i")
  304. assert_equal(Encoding.find("utf-16le"), rl.encoding)
  305. assert_equal(Encoding.find("utf-16be"), rb.encoding)
  306. assert_match(rl, utf8_to_utf16("\u3042a\u3042a", "utf-16le"))
  307. assert_match(rb, utf8_to_utf16("\u3042a\u3042a", "utf-16be"))
  308. end
  309. def test_surrogate_pair
  310. sl = "\x42\xd8\xb7\xdf".force_encoding("utf-16le")
  311. sb = "\xd8\x42\xdf\xb7".force_encoding("utf-16be")
  312. assert_equal(1, sl.size)
  313. assert_equal(1, sb.size)
  314. assert_equal(0x20bb7, sl.ord)
  315. assert_equal(0x20bb7, sb.ord)
  316. assert_equal(sl, 0x20bb7.chr("utf-16le"))
  317. assert_equal(sb, 0x20bb7.chr("utf-16be"))
  318. assert_equal("", sl.chop)
  319. assert_equal("", sb.chop)
  320. end
  321. def test_regexp_escape
  322. s = "\0*".force_encoding("UTF-16BE")
  323. r = Regexp.new(Regexp.escape(s))
  324. assert(r =~ s, "#{encdump(r)} =~ #{encdump(s)}")
  325. end
  326. def test_casecmp2
  327. assert_equal(0, "\0A".force_encoding("UTF-16BE").casecmp("\0a".force_encoding("UTF-16BE")))
  328. assert_not_equal(0, "\0A".force_encoding("UTF-16LE").casecmp("\0a".force_encoding("UTF-16LE")))
  329. assert_not_equal(0, "A\0".force_encoding("UTF-16BE").casecmp("a\0".force_encoding("UTF-16BE")))
  330. assert_equal(0, "A\0".force_encoding("UTF-16LE").casecmp("a\0".force_encoding("UTF-16LE")))
  331. ary = ["01".force_encoding("UTF-16LE"),
  332. "10".force_encoding("UTF-16LE")]
  333. e = ary.sort {|x,y| x <=> y }
  334. a = ary.sort {|x,y| x.casecmp(y) }
  335. assert_equal(e, a)
  336. end
  337. end