PageRenderTime 59ms CodeModel.GetById 20ms RepoModel.GetById 0ms app.codeStats 0ms

/projects/jruby-1.7.3/test/externals/ruby1.9/ruby/test_m17n.rb

https://gitlab.com/essere.lab.public/qualitas.class-corpus
Ruby | 1423 lines | 1218 code | 189 blank | 16 comment | 19 complexity | 69fd33cd05c25838050974c129ff31dd MD5 | raw file
  1. require 'test/unit'
  2. require 'stringio'
  3. class TestM17N < Test::Unit::TestCase
  4. def assert_encoding(encname, actual, message=nil)
  5. assert_equal(Encoding.find(encname), actual, message)
  6. end
  7. module AESU
  8. def ua(str) str.dup.force_encoding("US-ASCII") end
  9. def a(str) str.dup.force_encoding("ASCII-8BIT") end
  10. def e(str) str.dup.force_encoding("EUC-JP") end
  11. def s(str) str.dup.force_encoding("Windows-31J") end
  12. def u(str) str.dup.force_encoding("UTF-8") end
  13. end
  14. include AESU
  15. extend AESU
  16. def assert_strenc(bytes, enc, actual, message=nil)
  17. assert_instance_of(String, actual, message)
  18. enc = Encoding.find(enc) if String === enc
  19. assert_equal(enc, actual.encoding, message)
  20. assert_equal(a(bytes), a(actual), message)
  21. end
  22. def assert_warning(pat, mesg=nil)
  23. begin
  24. org_stderr = $stderr
  25. $stderr = StringIO.new(warn = '')
  26. yield
  27. ensure
  28. $stderr = org_stderr
  29. end
  30. assert_match(pat, warn, mesg)
  31. end
  32. def assert_regexp_generic_encoding(r)
  33. assert(!r.fixed_encoding?)
  34. %w[ASCII-8BIT EUC-JP Windows-31J UTF-8].each {|ename|
  35. # "\xc2\xa1" is a valid sequence for ASCII-8BIT, EUC-JP, Windows-31J and UTF-8.
  36. assert_nothing_raised { r =~ "\xc2\xa1".force_encoding(ename) }
  37. }
  38. end
  39. def assert_regexp_fixed_encoding(r)
  40. assert(r.fixed_encoding?)
  41. %w[ASCII-8BIT EUC-JP Windows-31J UTF-8].each {|ename|
  42. enc = Encoding.find(ename)
  43. if enc == r.encoding
  44. assert_nothing_raised { r =~ "\xc2\xa1".force_encoding(enc) }
  45. else
  46. assert_raise(Encoding::CompatibilityError) { r =~ "\xc2\xa1".force_encoding(enc) }
  47. end
  48. }
  49. end
  50. def assert_regexp_generic_ascii(r)
  51. assert_encoding("US-ASCII", r.encoding)
  52. assert_regexp_generic_encoding(r)
  53. end
  54. def assert_regexp_fixed_ascii8bit(r)
  55. assert_encoding("ASCII-8BIT", r.encoding)
  56. assert_regexp_fixed_encoding(r)
  57. end
  58. def assert_regexp_fixed_eucjp(r)
  59. assert_encoding("EUC-JP", r.encoding)
  60. assert_regexp_fixed_encoding(r)
  61. end
  62. def assert_regexp_fixed_sjis(r)
  63. assert_encoding("Windows-31J", r.encoding)
  64. assert_regexp_fixed_encoding(r)
  65. end
  66. def assert_regexp_fixed_utf8(r)
  67. assert_encoding("UTF-8", r.encoding)
  68. assert_regexp_fixed_encoding(r)
  69. end
  70. def assert_regexp_usascii_literal(r, enc, ex = nil)
  71. code = "# -*- encoding: US-ASCII -*-\n#{r}.encoding"
  72. if ex
  73. assert_raise(ex) { eval(code) }
  74. else
  75. assert_equal(enc, eval(code))
  76. end
  77. end
  78. def encdump(str)
  79. d = str.dump
  80. if /\.force_encoding\("[A-Za-z0-9.:_+-]*"\)\z/ =~ d
  81. d
  82. else
  83. "#{d}.force_encoding(#{str.encoding.name.dump})"
  84. end
  85. end
  86. def encdumpargs(args)
  87. r = '('
  88. args.each_with_index {|a, i|
  89. r << ',' if 0 < i
  90. if String === a
  91. r << encdump(a)
  92. else
  93. r << a.inspect
  94. end
  95. }
  96. r << ')'
  97. r
  98. end
  99. def assert_str_enc_propagation(t, s1, s2)
  100. if !s1.ascii_only?
  101. assert_equal(s1.encoding, t.encoding)
  102. elsif !s2.ascii_only?
  103. assert_equal(s2.encoding, t.encoding)
  104. else
  105. assert([s1.encoding, s2.encoding].include?(t.encoding))
  106. end
  107. end
  108. def assert_same_result(expected_proc, actual_proc)
  109. e = nil
  110. begin
  111. t = expected_proc.call
  112. rescue
  113. e = $!
  114. end
  115. if e
  116. assert_raise(e.class) { actual_proc.call }
  117. else
  118. assert_equal(t, actual_proc.call)
  119. end
  120. end
  121. def str_enc_compatible?(*strs)
  122. encs = []
  123. strs.each {|s|
  124. encs << s.encoding if !s.ascii_only?
  125. }
  126. encs.uniq!
  127. encs.length <= 1
  128. end
  129. # tests start
  130. def test_string_ascii_literal
  131. assert_encoding("ASCII-8BIT", eval(a(%{""})).encoding)
  132. assert_encoding("ASCII-8BIT", eval(a(%{"a"})).encoding)
  133. end
  134. def test_string_eucjp_literal
  135. assert_encoding("EUC-JP", eval(e(%{""})).encoding)
  136. assert_encoding("EUC-JP", eval(e(%{"a"})).encoding)
  137. assert_encoding("EUC-JP", eval(e(%{"\xa1\xa1"})).encoding)
  138. assert_encoding("EUC-JP", eval(e(%{"\\xa1\\xa1"})).encoding)
  139. assert_encoding("EUC-JP", eval(e(%{"\\x20"})).encoding)
  140. assert_encoding("EUC-JP", eval(e(%{"\\n"})).encoding)
  141. assert_encoding("EUC-JP", eval(e(%{"\\x80"})).encoding)
  142. end
  143. def test_utf8_literal
  144. assert_equal(Encoding::UTF_8, "\u3042".encoding, "[ruby-dev:33406] \"\\u3042\".encoding")
  145. assert_raise(SyntaxError) { eval(a('\u3052\x80')) }
  146. end
  147. def test_string_mixed_unicode
  148. assert_raise(SyntaxError) { eval(a(%{"\xc2\xa1\\u{6666}"})) }
  149. assert_raise(SyntaxError) { eval(e(%{"\xc2\xa1\\u{6666}"})) }
  150. assert_raise(SyntaxError) { eval(s(%{"\xc2\xa1\\u{6666}"})) }
  151. assert_nothing_raised { eval(u(%{"\xc2\xa1\\u{6666}"})) }
  152. assert_raise(SyntaxError) { eval(a(%{"\\u{6666}\xc2\xa1"})) }
  153. assert_raise(SyntaxError) { eval(e(%{"\\u{6666}\xc2\xa1"})) }
  154. assert_raise(SyntaxError) { eval(s(%{"\\u{6666}\xc2\xa1"})) }
  155. assert_nothing_raised { eval(u(%{"\\u{6666}\xc2\xa1"})) }
  156. end
  157. def test_string_inspect_invalid
  158. assert_equal('"\xFE"', e("\xfe").inspect)
  159. assert_equal('"\x8E"', e("\x8e").inspect)
  160. assert_equal('"\x8F"', e("\x8f").inspect)
  161. assert_equal('"\x8F\xA1"', e("\x8f\xa1").inspect)
  162. assert_equal('"\xEF"', s("\xef").inspect)
  163. assert_equal('"\xC2"', u("\xc2").inspect)
  164. assert_equal('"\xE0\x80"', u("\xe0\x80").inspect)
  165. assert_equal('"\xF0\x80\x80"', u("\xf0\x80\x80").inspect)
  166. assert_equal('"\xF8\x80\x80\x80"', u("\xf8\x80\x80\x80").inspect)
  167. assert_equal('"\xFC\x80\x80\x80\x80"', u("\xfc\x80\x80\x80\x80").inspect)
  168. assert_equal('"\xFE "', e("\xfe ").inspect)
  169. assert_equal('"\x8E "', e("\x8e ").inspect)
  170. assert_equal('"\x8F "', e("\x8f ").inspect)
  171. assert_equal('"\x8F\xA1 "', e("\x8f\xa1 ").inspect)
  172. assert_equal('"\xEF "', s("\xef ").inspect)
  173. assert_equal('"\xC2 "', u("\xc2 ").inspect)
  174. assert_equal('"\xE0\x80 "', u("\xe0\x80 ").inspect)
  175. assert_equal('"\xF0\x80\x80 "', u("\xf0\x80\x80 ").inspect)
  176. assert_equal('"\xF8\x80\x80\x80 "', u("\xf8\x80\x80\x80 ").inspect)
  177. assert_equal('"\xFC\x80\x80\x80\x80 "', u("\xfc\x80\x80\x80\x80 ").inspect)
  178. assert_equal('"\x81."', s("\x81.").inspect)
  179. assert_equal('"\xFC"', u("\xfc").inspect)
  180. end
  181. def test_string_inspect_encoding
  182. orig_int = Encoding.default_internal
  183. orig_ext = Encoding.default_external
  184. Encoding.default_internal = nil
  185. [Encoding::UTF_8, Encoding::EUC_JP, Encoding::Windows_31J, Encoding::GB18030].
  186. each do |e|
  187. Encoding.default_external = e
  188. str = "\x81\x30\x81\x30".force_encoding('GB18030')
  189. assert_equal(Encoding::GB18030 == e ? %{"#{str}"} : '"\x{81308130}"', str.inspect)
  190. str = e("\xa1\x8f\xa1\xa1")
  191. expected = "\"\\xA1\x8F\xA1\xA1\"".force_encoding("EUC-JP")
  192. assert_equal(Encoding::EUC_JP == e ? expected : "\"\\xA1\\x{8FA1A1}\"", str.inspect)
  193. str = s("\x81@")
  194. assert_equal(Encoding::Windows_31J == e ? %{"#{str}"} : '"\x{8140}"', str.inspect)
  195. str = "\u3042\u{10FFFD}"
  196. assert_equal(Encoding::UTF_8 == e ? %{"#{str}"} : '"\u3042\u{10FFFD}"', str.inspect)
  197. end
  198. Encoding.default_external = Encoding::UTF_8
  199. [Encoding::UTF_16BE, Encoding::UTF_16LE, Encoding::UTF_32BE, Encoding::UTF_32LE,
  200. Encoding::UTF8_SOFTBANK].each do |e|
  201. str = "abc".encode(e)
  202. assert_equal('"abc"', str.inspect)
  203. end
  204. ensure
  205. Encoding.default_internal = orig_int
  206. Encoding.default_external = orig_ext
  207. end
  208. def test_utf_16_32_inspect
  209. str = "\u3042"
  210. %w/UTF-16 UTF-32/.each do |enc|
  211. %w/BE LE/.each do |endian|
  212. s = str.encode(enc + endian)
  213. # When a UTF-16/32 string doesn't have a BOM,
  214. # inspect as a dummy encoding string.
  215. assert_equal(s.dup.force_encoding("ISO-2022-JP").inspect,
  216. s.dup.force_encoding(enc).inspect)
  217. end
  218. end
  219. str = "\uFEFF\u3042"
  220. %w/UTF-16 UTF-32/.each do |enc|
  221. %w/BE LE/.each do |endian|
  222. s = str.encode(enc + endian)
  223. # When a UTF-16/32 string doesn't have a BOM,
  224. # inspect as a dummy encoding string.
  225. assert_equal(s.inspect,
  226. s.dup.force_encoding(enc).inspect)
  227. end
  228. end
  229. end
  230. def test_str_dump
  231. [
  232. e("\xfe"),
  233. e("\x8e"),
  234. e("\x8f"),
  235. e("\x8f\xa1"),
  236. s("\xef"),
  237. u("\xc2"),
  238. u("\xe0\x80"),
  239. u("\xf0\x80\x80"),
  240. u("\xf8\x80\x80\x80"),
  241. u("\xfc\x80\x80\x80\x80"),
  242. e("\xfe "),
  243. e("\x8e "),
  244. e("\x8f "),
  245. e("\x8f\xa1 "),
  246. s("\xef "),
  247. u("\xc2 "),
  248. u("\xe0\x80 "),
  249. u("\xf0\x80\x80 "),
  250. u("\xf8\x80\x80\x80 "),
  251. u("\xfc\x80\x80\x80\x80 "),
  252. e("\xa1\x8f\xa1\xa1"),
  253. s("\x81."),
  254. s("\x81@"),
  255. u("\xfc"),
  256. "\u3042",
  257. "ascii",
  258. "\u3042".encode("UTF-16LE"),
  259. "\u3042".encode("UTF-16BE"),
  260. ].each do |str|
  261. assert_equal(str, eval(str.dump), "[ruby-dev:33142]")
  262. end
  263. end
  264. def test_validate_redundant_utf8
  265. bits_0x10ffff = "11110100 10001111 10111111 10111111"
  266. [
  267. "0xxxxxxx",
  268. "110XXXXx 10xxxxxx",
  269. "1110XXXX 10Xxxxxx 10xxxxxx",
  270. "11110XXX 10XXxxxx 10xxxxxx 10xxxxxx",
  271. "111110XX 10XXXxxx 10xxxxxx 10xxxxxx 10xxxxxx",
  272. "1111110X 10XXXXxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx",
  273. "11111110 10XXXXXx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx",
  274. "11111111 10XXXXXX 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx",
  275. ].each {|pat0|
  276. [
  277. pat0.gsub(/x/, '1'),
  278. pat0.gsub(/x/, '0')
  279. ].each {|pat1|
  280. [
  281. pat1.sub(/X([^X]*)\z/, '1\1').gsub(/X/, "0"),
  282. pat1.gsub(/X/, "1"),
  283. ].each {|pat2|
  284. s = [pat2.gsub(/ /, "")].pack("B*").force_encoding("utf-8")
  285. if pat2 <= bits_0x10ffff
  286. assert(s.valid_encoding?, "#{pat2}")
  287. else
  288. assert(!s.valid_encoding?, "#{pat2}")
  289. end
  290. }
  291. if / / =~ pat0
  292. pat3 = pat1.gsub(/X/, "0")
  293. s = [pat3.gsub(/ /, "")].pack("B*").force_encoding("utf-8")
  294. assert(!s.valid_encoding?, "#{pat3}")
  295. end
  296. }
  297. }
  298. end
  299. def test_validate_surrogate
  300. # 1110XXXX 10Xxxxxx 10xxxxxx : 3 bytes UTF-8
  301. pats = [
  302. "11101101 10011111 10111111", # just before surrogate high
  303. "11101101 1010xxxx 10xxxxxx", # surrogate high
  304. "11101101 1011xxxx 10xxxxxx", # surrogate low
  305. "11101110 10000000 10000000", # just after surrogate low
  306. ]
  307. pats.values_at(1,2).each {|pat0|
  308. [
  309. pat0.gsub(/x/, '0'),
  310. pat0.gsub(/x/, '1'),
  311. ].each {|pat1|
  312. s = [pat1.gsub(/ /, "")].pack("B*").force_encoding("utf-8")
  313. assert(!s.valid_encoding?, "#{pat1}")
  314. }
  315. }
  316. pats.values_at(0,3).each {|pat|
  317. s = [pat.gsub(/ /, "")].pack("B*").force_encoding("utf-8")
  318. assert(s.valid_encoding?, "#{pat}")
  319. }
  320. end
  321. def test_regexp_too_short_multibyte_character
  322. assert_raise(SyntaxError) { eval('/\xfe/e') }
  323. assert_raise(SyntaxError) { eval('/\x8e/e') }
  324. assert_raise(SyntaxError) { eval('/\x8f/e') }
  325. assert_raise(SyntaxError) { eval('/\x8f\xa1/e') }
  326. assert_raise(SyntaxError) { eval('/\xef/s') }
  327. assert_raise(SyntaxError) { eval('/\xc2/u') }
  328. assert_raise(SyntaxError) { eval('/\xe0\x80/u') }
  329. assert_raise(SyntaxError) { eval('/\xf0\x80\x80/u') }
  330. assert_raise(SyntaxError) { eval('/\xf8\x80\x80\x80/u') }
  331. assert_raise(SyntaxError) { eval('/\xfc\x80\x80\x80\x80/u') }
  332. # raw 8bit
  333. assert_raise(SyntaxError) { eval("/\xfe/e") }
  334. assert_raise(SyntaxError) { eval("/\xc2/u") }
  335. # invalid suffix
  336. assert_raise(SyntaxError) { eval('/\xc2\xff/u') }
  337. assert_raise(SyntaxError) { eval('/\xc2 /u') }
  338. assert_raise(SyntaxError) { eval('/\xc2\x20/u') }
  339. end
  340. def test_regexp_generic
  341. assert_regexp_generic_ascii(/a/)
  342. assert_regexp_generic_ascii(Regexp.new(a("a")))
  343. assert_regexp_generic_ascii(Regexp.new(e("a")))
  344. assert_regexp_generic_ascii(Regexp.new(s("a")))
  345. assert_regexp_generic_ascii(Regexp.new(u("a")))
  346. [/a/, Regexp.new(a("a"))].each {|r|
  347. assert_equal(0, r =~ a("a"))
  348. assert_equal(0, r =~ e("a"))
  349. assert_equal(0, r =~ s("a"))
  350. assert_equal(0, r =~ u("a"))
  351. assert_equal(nil, r =~ a("\xc2\xa1"))
  352. assert_equal(nil, r =~ e("\xc2\xa1"))
  353. assert_equal(nil, r =~ s("\xc2\xa1"))
  354. assert_equal(nil, r =~ u("\xc2\xa1"))
  355. }
  356. end
  357. def test_regexp_ascii_none
  358. r = /a/n
  359. assert_warning(%r{regexp match /.../n against to}) {
  360. assert_regexp_generic_ascii(r)
  361. }
  362. assert_equal(0, r =~ a("a"))
  363. assert_equal(0, r =~ e("a"))
  364. assert_equal(0, r =~ s("a"))
  365. assert_equal(0, r =~ u("a"))
  366. assert_equal(nil, r =~ a("\xc2\xa1"))
  367. assert_warning(%r{regexp match /.../n against to EUC-JP string}) {
  368. assert_equal(nil, r =~ e("\xc2\xa1"))
  369. }
  370. assert_warning(%r{regexp match /.../n against to Windows-31J string}) {
  371. assert_equal(nil, r =~ s("\xc2\xa1"))
  372. }
  373. assert_warning(%r{regexp match /.../n against to UTF-8 string}) {
  374. assert_equal(nil, r =~ u("\xc2\xa1"))
  375. }
  376. assert_nothing_raised { eval(e("/\\x80/n")) }
  377. end
  378. def test_regexp_ascii
  379. assert_regexp_fixed_ascii8bit(/\xc2\xa1/n)
  380. assert_regexp_fixed_ascii8bit(eval(a(%{/\xc2\xa1/})))
  381. assert_regexp_fixed_ascii8bit(eval(a(%{/\xc2\xa1/n})))
  382. assert_regexp_fixed_ascii8bit(eval(a(%q{/\xc2\xa1/})))
  383. assert_raise(SyntaxError) { eval("/\xa1\xa1/n".force_encoding("euc-jp")) }
  384. [/\xc2\xa1/n, eval(a(%{/\xc2\xa1/})), eval(a(%{/\xc2\xa1/n}))].each {|r|
  385. assert_equal(nil, r =~ a("a"))
  386. assert_equal(nil, r =~ e("a"))
  387. assert_equal(nil, r =~ s("a"))
  388. assert_equal(nil, r =~ u("a"))
  389. assert_equal(0, r =~ a("\xc2\xa1"))
  390. assert_raise(Encoding::CompatibilityError) { r =~ e("\xc2\xa1") }
  391. assert_raise(Encoding::CompatibilityError) { r =~ s("\xc2\xa1") }
  392. assert_raise(Encoding::CompatibilityError) { r =~ u("\xc2\xa1") }
  393. }
  394. end
  395. def test_regexp_euc
  396. assert_regexp_fixed_eucjp(/a/e)
  397. assert_regexp_fixed_eucjp(/\xc2\xa1/e)
  398. assert_regexp_fixed_eucjp(eval(e(%{/\xc2\xa1/})))
  399. assert_regexp_fixed_eucjp(eval(e(%q{/\xc2\xa1/})))
  400. [/a/e].each {|r|
  401. assert_equal(0, r =~ a("a"))
  402. assert_equal(0, r =~ e("a"))
  403. assert_equal(0, r =~ s("a"))
  404. assert_equal(0, r =~ u("a"))
  405. assert_raise(Encoding::CompatibilityError) { r =~ a("\xc2\xa1") }
  406. assert_equal(nil, r =~ e("\xc2\xa1"))
  407. assert_raise(Encoding::CompatibilityError) { r =~ s("\xc2\xa1") }
  408. assert_raise(Encoding::CompatibilityError) { r =~ u("\xc2\xa1") }
  409. }
  410. [/\xc2\xa1/e, eval(e(%{/\xc2\xa1/})), eval(e(%q{/\xc2\xa1/}))].each {|r|
  411. assert_equal(nil, r =~ a("a"))
  412. assert_equal(nil, r =~ e("a"))
  413. assert_equal(nil, r =~ s("a"))
  414. assert_equal(nil, r =~ u("a"))
  415. assert_raise(Encoding::CompatibilityError) { r =~ a("\xc2\xa1") }
  416. assert_equal(0, r =~ e("\xc2\xa1"))
  417. assert_raise(Encoding::CompatibilityError) { r =~ s("\xc2\xa1") }
  418. assert_raise(Encoding::CompatibilityError) { r =~ u("\xc2\xa1") }
  419. }
  420. end
  421. def test_regexp_sjis
  422. assert_regexp_fixed_sjis(/a/s)
  423. assert_regexp_fixed_sjis(/\xc2\xa1/s)
  424. assert_regexp_fixed_sjis(eval(s(%{/\xc2\xa1/})))
  425. assert_regexp_fixed_sjis(eval(s(%q{/\xc2\xa1/})))
  426. end
  427. def test_regexp_windows_31j
  428. begin
  429. Regexp.new("\xa1".force_encoding("windows-31j")) =~ "\xa1\xa1".force_encoding("euc-jp")
  430. rescue Encoding::CompatibilityError
  431. err = $!
  432. end
  433. assert_match(/windows-31j/i, err.message)
  434. end
  435. def test_regexp_embed
  436. r = eval(e("/\xc2\xa1/"))
  437. assert_raise(RegexpError) { eval(s("/\xc2\xa1\#{r}/s")) }
  438. assert_raise(RegexpError) { eval(s("/\#{r}\xc2\xa1/s")) }
  439. r = /\xc2\xa1/e
  440. assert_raise(RegexpError) { eval(s("/\xc2\xa1\#{r}/s")) }
  441. assert_raise(RegexpError) { eval(s("/\#{r}\xc2\xa1/s")) }
  442. r = eval(e("/\xc2\xa1/"))
  443. assert_raise(RegexpError) { /\xc2\xa1#{r}/s }
  444. r = /\xc2\xa1/e
  445. assert_raise(RegexpError) { /\xc2\xa1#{r}/s }
  446. r1 = Regexp.new('foo'.force_encoding("ascii-8bit"))
  447. r2 = eval('/bar#{r1}/'.force_encoding('ascii-8bit'))
  448. assert_equal(Encoding::US_ASCII, r2.encoding)
  449. r1 = Regexp.new('foo'.force_encoding("us-ascii"))
  450. r2 = eval('/bar#{r1}/'.force_encoding('ascii-8bit'))
  451. assert_equal(Encoding::US_ASCII, r2.encoding)
  452. r1 = Regexp.new('foo'.force_encoding("ascii-8bit"))
  453. r2 = eval('/bar#{r1}/'.force_encoding('us-ascii'))
  454. assert_equal(Encoding::US_ASCII, r2.encoding)
  455. r1 = Regexp.new('foo'.force_encoding("us-ascii"))
  456. r2 = eval('/bar#{r1}/'.force_encoding('us-ascii'))
  457. assert_equal(Encoding::US_ASCII, r2.encoding)
  458. r1 = Regexp.new('\xa1'.force_encoding("ascii-8bit"))
  459. r2 = eval('/bar#{r1}/'.force_encoding('ascii-8bit'))
  460. assert_equal(Encoding::ASCII_8BIT, r2.encoding)
  461. r1 = Regexp.new('\xa1'.force_encoding("ascii-8bit"))
  462. r2 = eval('/bar#{r1}/'.force_encoding('us-ascii'))
  463. assert_equal(Encoding::ASCII_8BIT, r2.encoding)
  464. r1 = Regexp.new('foo'.force_encoding("ascii-8bit"))
  465. r2 = eval('/\xa1#{r1}/'.force_encoding('ascii-8bit'))
  466. assert_equal(Encoding::ASCII_8BIT, r2.encoding)
  467. r1 = Regexp.new('foo'.force_encoding("us-ascii"))
  468. r2 = eval('/\xa1#{r1}/'.force_encoding('ascii-8bit'))
  469. assert_equal(Encoding::ASCII_8BIT, r2.encoding)
  470. r1 = Regexp.new('\xa1'.force_encoding("ascii-8bit"))
  471. r2 = eval('/\xa1#{r1}/'.force_encoding('ascii-8bit'))
  472. assert_equal(Encoding::ASCII_8BIT, r2.encoding)
  473. end
  474. def test_regexp_named_class
  475. assert_match(/[[:space:]]/u, "\u{00a0}")
  476. assert_match(/[[:space:]]/, "\u{00a0}")
  477. end
  478. def test_regexp_property
  479. s = '\p{Hiragana}'.force_encoding("euc-jp")
  480. assert_equal(Encoding::EUC_JP, s.encoding)
  481. r = nil
  482. assert_nothing_raised {
  483. r = Regexp.new(s)
  484. }
  485. assert(r.fixed_encoding?)
  486. assert_match(r, "\xa4\xa2".force_encoding("euc-jp"))
  487. r = eval('/\p{Hiragana}/'.force_encoding("euc-jp"))
  488. assert(r.fixed_encoding?)
  489. assert_match(r, "\xa4\xa2".force_encoding("euc-jp"))
  490. r = /\p{Hiragana}/e
  491. assert(r.fixed_encoding?)
  492. assert_match(r, "\xa4\xa2".force_encoding("euc-jp"))
  493. r = /\p{AsciI}/e
  494. assert(r.fixed_encoding?)
  495. assert_match(r, "a".force_encoding("euc-jp"))
  496. r = /\p{hiraganA}/e
  497. assert(r.fixed_encoding?)
  498. assert_match(r, "\xa4\xa2".force_encoding("euc-jp"))
  499. r = eval('/\u{3042}\p{Hiragana}/'.force_encoding("euc-jp"))
  500. assert(r.fixed_encoding?)
  501. assert_equal(Encoding::UTF_8, r.encoding)
  502. r = eval('/\p{Hiragana}\u{3042}/'.force_encoding("euc-jp"))
  503. assert(r.fixed_encoding?)
  504. assert_equal(Encoding::UTF_8, r.encoding)
  505. end
  506. def test_regexp_embed_preprocess
  507. r1 = /\xa4\xa2/e
  508. r2 = /#{r1}/
  509. assert(r2.source.include?(r1.source))
  510. end
  511. def test_begin_end_offset
  512. str = e("\244\242\244\244\244\246\244\250\244\252a")
  513. assert(/(a)/ =~ str)
  514. assert_equal("a", $&)
  515. assert_equal(5, $~.begin(0))
  516. assert_equal(6, $~.end(0))
  517. assert_equal([5,6], $~.offset(0))
  518. assert_equal(5, $~.begin(1))
  519. assert_equal(6, $~.end(1))
  520. assert_equal([5,6], $~.offset(1))
  521. end
  522. def test_begin_end_offset_sjis
  523. str = s("\x81@@")
  524. assert(/@/ =~ str)
  525. assert_equal(s("\x81@"), $`)
  526. assert_equal("@", $&)
  527. assert_equal("", $')
  528. assert_equal([1,2], $~.offset(0))
  529. end
  530. def test_quote
  531. assert_regexp_generic_ascii(/#{Regexp.quote(a("a"))}#{Regexp.quote(e("e"))}/)
  532. assert_encoding("US-ASCII", Regexp.quote(a("")).encoding)
  533. assert_encoding("US-ASCII", Regexp.quote(e("")).encoding)
  534. assert_encoding("US-ASCII", Regexp.quote(s("")).encoding)
  535. assert_encoding("US-ASCII", Regexp.quote(u("")).encoding)
  536. assert_encoding("US-ASCII", Regexp.quote(a("a")).encoding)
  537. assert_encoding("US-ASCII", Regexp.quote(e("a")).encoding)
  538. assert_encoding("US-ASCII", Regexp.quote(s("a")).encoding)
  539. assert_encoding("US-ASCII", Regexp.quote(u("a")).encoding)
  540. assert_encoding("ASCII-8BIT", Regexp.quote(a("\xc2\xa1")).encoding)
  541. assert_encoding("EUC-JP", Regexp.quote(e("\xc2\xa1")).encoding)
  542. assert_encoding("Windows-31J", Regexp.quote(s("\xc2\xa1")).encoding)
  543. assert_encoding("UTF-8", Regexp.quote(u("\xc2\xa1")).encoding)
  544. end
  545. def test_union_0
  546. r = Regexp.union
  547. assert_regexp_generic_ascii(r)
  548. assert(r !~ a(""))
  549. assert(r !~ e(""))
  550. assert(r !~ s(""))
  551. assert(r !~ u(""))
  552. end
  553. def test_union_1_asciionly_string
  554. assert_regexp_generic_ascii(Regexp.union(a("")))
  555. assert_regexp_generic_ascii(Regexp.union(e("")))
  556. assert_regexp_generic_ascii(Regexp.union(s("")))
  557. assert_regexp_generic_ascii(Regexp.union(u("")))
  558. assert_regexp_generic_ascii(Regexp.union(a("a")))
  559. assert_regexp_generic_ascii(Regexp.union(e("a")))
  560. assert_regexp_generic_ascii(Regexp.union(s("a")))
  561. assert_regexp_generic_ascii(Regexp.union(u("a")))
  562. assert_regexp_generic_ascii(Regexp.union(a("\t")))
  563. assert_regexp_generic_ascii(Regexp.union(e("\t")))
  564. assert_regexp_generic_ascii(Regexp.union(s("\t")))
  565. assert_regexp_generic_ascii(Regexp.union(u("\t")))
  566. end
  567. def test_union_1_nonascii_string
  568. assert_regexp_fixed_ascii8bit(Regexp.union(a("\xc2\xa1")))
  569. assert_regexp_fixed_eucjp(Regexp.union(e("\xc2\xa1")))
  570. assert_regexp_fixed_sjis(Regexp.union(s("\xc2\xa1")))
  571. assert_regexp_fixed_utf8(Regexp.union(u("\xc2\xa1")))
  572. end
  573. def test_union_1_regexp
  574. assert_regexp_generic_ascii(Regexp.union(//))
  575. assert_warning(%r{regexp match /.../n against to}) {
  576. assert_regexp_generic_ascii(Regexp.union(//n))
  577. }
  578. assert_regexp_fixed_eucjp(Regexp.union(//e))
  579. assert_regexp_fixed_sjis(Regexp.union(//s))
  580. assert_regexp_fixed_utf8(Regexp.union(//u))
  581. end
  582. def test_union_2
  583. ary = [
  584. a(""), e(""), s(""), u(""),
  585. a("\xc2\xa1"), e("\xc2\xa1"), s("\xc2\xa1"), u("\xc2\xa1")
  586. ]
  587. ary.each {|s1|
  588. ary.each {|s2|
  589. if s1.empty?
  590. if s2.empty?
  591. assert_regexp_generic_ascii(Regexp.union(s1, s2))
  592. else
  593. r = Regexp.union(s1, s2)
  594. assert_regexp_fixed_encoding(r)
  595. assert_equal(s2.encoding, r.encoding)
  596. end
  597. else
  598. if s2.empty?
  599. r = Regexp.union(s1, s2)
  600. assert_regexp_fixed_encoding(r)
  601. assert_equal(s1.encoding, r.encoding)
  602. else
  603. if s1.encoding == s2.encoding
  604. r = Regexp.union(s1, s2)
  605. assert_regexp_fixed_encoding(r)
  606. assert_equal(s1.encoding, r.encoding)
  607. else
  608. assert_raise(ArgumentError) { Regexp.union(s1, s2) }
  609. end
  610. end
  611. end
  612. }
  613. }
  614. end
  615. def test_dynamic_ascii_regexp
  616. assert_warning(%r{regexp match /.../n against to}) {
  617. assert_regexp_generic_ascii(/#{ }/n)
  618. }
  619. assert_regexp_fixed_ascii8bit(/#{ }\xc2\xa1/n)
  620. assert_regexp_fixed_ascii8bit(/\xc2\xa1#{ }/n)
  621. assert_nothing_raised { s1, s2 = a('\xc2'), a('\xa1'); /#{s1}#{s2}/ }
  622. end
  623. def test_dynamic_eucjp_regexp
  624. assert_regexp_fixed_eucjp(/#{ }/e)
  625. assert_regexp_fixed_eucjp(/#{ }\xc2\xa1/e)
  626. assert_regexp_fixed_eucjp(/\xc2\xa1#{ }/e)
  627. assert_raise(SyntaxError) { eval('/\xc2#{ }/e') }
  628. assert_raise(SyntaxError) { eval('/#{ }\xc2/e') }
  629. assert_raise(SyntaxError) { eval('/\xc2#{ }\xa1/e') }
  630. assert_raise(ArgumentError) { s1, s2 = e('\xc2'), e('\xa1'); /#{s1}#{s2}/ }
  631. end
  632. def test_dynamic_sjis_regexp
  633. assert_regexp_fixed_sjis(/#{ }/s)
  634. assert_regexp_fixed_sjis(/#{ }\xc2\xa1/s)
  635. assert_regexp_fixed_sjis(/\xc2\xa1#{ }/s)
  636. assert_raise(SyntaxError) { eval('/\x81#{ }/s') }
  637. assert_raise(SyntaxError) { eval('/#{ }\x81/s') }
  638. assert_raise(SyntaxError) { eval('/\x81#{ }\xa1/s') }
  639. assert_raise(ArgumentError) { s1, s2 = s('\x81'), s('\xa1'); /#{s1}#{s2}/ }
  640. end
  641. def test_dynamic_utf8_regexp
  642. assert_regexp_fixed_utf8(/#{ }/u)
  643. assert_regexp_fixed_utf8(/#{ }\xc2\xa1/u)
  644. assert_regexp_fixed_utf8(/\xc2\xa1#{ }/u)
  645. assert_raise(SyntaxError) { eval('/\xc2#{ }/u') }
  646. assert_raise(SyntaxError) { eval('/#{ }\xc2/u') }
  647. assert_raise(SyntaxError) { eval('/\xc2#{ }\xa1/u') }
  648. assert_raise(ArgumentError) { s1, s2 = u('\xc2'), u('\xa1'); /#{s1}#{s2}/ }
  649. end
  650. def test_regexp_unicode
  651. assert_nothing_raised { eval '/\u{0}/u' }
  652. assert_nothing_raised { eval '/\u{D7FF}/u' }
  653. assert_raise(SyntaxError) { eval '/\u{D800}/u' }
  654. assert_raise(SyntaxError) { eval '/\u{DFFF}/u' }
  655. assert_nothing_raised { eval '/\u{E000}/u' }
  656. assert_nothing_raised { eval '/\u{10FFFF}/u' }
  657. assert_raise(SyntaxError) { eval '/\u{110000}/u' }
  658. end
  659. def test_regexp_mixed_unicode
  660. assert_raise(SyntaxError) { eval(a(%{/\xc2\xa1\\u{6666}/})) }
  661. assert_raise(SyntaxError) { eval(e(%{/\xc2\xa1\\u{6666}/})) }
  662. assert_raise(SyntaxError) { eval(s(%{/\xc2\xa1\\u{6666}/})) }
  663. assert_nothing_raised { eval(u(%{/\xc2\xa1\\u{6666}/})) }
  664. assert_raise(SyntaxError) { eval(a(%{/\\u{6666}\xc2\xa1/})) }
  665. assert_raise(SyntaxError) { eval(e(%{/\\u{6666}\xc2\xa1/})) }
  666. assert_raise(SyntaxError) { eval(s(%{/\\u{6666}\xc2\xa1/})) }
  667. assert_nothing_raised { eval(u(%{/\\u{6666}\xc2\xa1/})) }
  668. assert_raise(SyntaxError) { eval(a(%{/\\xc2\\xa1\\u{6666}/})) }
  669. assert_raise(SyntaxError) { eval(e(%{/\\xc2\\xa1\\u{6666}/})) }
  670. assert_raise(SyntaxError) { eval(s(%{/\\xc2\\xa1\\u{6666}/})) }
  671. assert_nothing_raised { eval(u(%{/\\xc2\\xa1\\u{6666}/})) }
  672. assert_raise(SyntaxError) { eval(a(%{/\\u{6666}\\xc2\\xa1/})) }
  673. assert_raise(SyntaxError) { eval(e(%{/\\u{6666}\\xc2\\xa1/})) }
  674. assert_raise(SyntaxError) { eval(s(%{/\\u{6666}\\xc2\\xa1/})) }
  675. assert_nothing_raised { eval(u(%{/\\u{6666}\\xc2\\xa1/})) }
  676. assert_raise(SyntaxError) { eval(a(%{/\xc2\xa1#{ }\\u{6666}/})) }
  677. assert_raise(SyntaxError) { eval(e(%{/\xc2\xa1#{ }\\u{6666}/})) }
  678. assert_raise(SyntaxError) { eval(s(%{/\xc2\xa1#{ }\\u{6666}/})) }
  679. assert_nothing_raised { eval(u(%{/\xc2\xa1#{ }\\u{6666}/})) }
  680. assert_raise(SyntaxError) { eval(a(%{/\\u{6666}#{ }\xc2\xa1/})) }
  681. assert_raise(SyntaxError) { eval(e(%{/\\u{6666}#{ }\xc2\xa1/})) }
  682. assert_raise(SyntaxError) { eval(s(%{/\\u{6666}#{ }\xc2\xa1/})) }
  683. assert_nothing_raised { eval(u(%{/\\u{6666}#{ }\xc2\xa1/})) }
  684. assert_raise(SyntaxError) { eval(a(%{/\\xc2\\xa1#{ }\\u{6666}/})) }
  685. assert_raise(SyntaxError) { eval(e(%{/\\xc2\\xa1#{ }\\u{6666}/})) }
  686. assert_raise(SyntaxError) { eval(s(%{/\\xc2\\xa1#{ }\\u{6666}/})) }
  687. assert_nothing_raised { eval(u(%{/\\xc2\\xa1#{ }\\u{6666}/})) }
  688. assert_raise(SyntaxError) { eval(a(%{/\\u{6666}#{ }\\xc2\\xa1/})) }
  689. assert_raise(SyntaxError) { eval(e(%{/\\u{6666}#{ }\\xc2\\xa1/})) }
  690. assert_raise(SyntaxError) { eval(s(%{/\\u{6666}#{ }\\xc2\\xa1/})) }
  691. assert_nothing_raised { eval(u(%{/\\u{6666}#{ }\\xc2\\xa1/})) }
  692. end
  693. def test_str_allocate
  694. s = String.allocate
  695. assert_equal(Encoding::ASCII_8BIT, s.encoding)
  696. end
  697. def test_str_String
  698. s = String(10)
  699. assert_equal(Encoding::US_ASCII, s.encoding)
  700. end
  701. def test_sprintf_c
  702. assert_strenc("\x80", 'ASCII-8BIT', a("%c") % 128)
  703. #assert_raise(ArgumentError) { a("%c") % 0xc2a1 }
  704. assert_strenc("\xc2\xa1", 'EUC-JP', e("%c") % 0xc2a1)
  705. assert_raise(ArgumentError) { e("%c") % 0xc2 }
  706. assert_strenc("\xc2", 'Windows-31J', s("%c") % 0xc2)
  707. #assert_raise(ArgumentError) { s("%c") % 0xc2a1 }
  708. assert_strenc("\u{c2a1}", 'UTF-8', u("%c") % 0xc2a1)
  709. assert_strenc("\u{c2}", 'UTF-8', u("%c") % 0xc2)
  710. assert_raise(Encoding::CompatibilityError) {
  711. "%s%s" % [s("\xc2\xa1"), e("\xc2\xa1")]
  712. }
  713. end
  714. def test_sprintf_p
  715. Encoding.list.each do |e|
  716. format = "%p".force_encoding(e)
  717. ['', 'a', "\xC2\xA1", "\x00"].each do |s|
  718. s.force_encoding(e)
  719. enc = (''.force_encoding(e) + s.inspect).encoding
  720. assert_strenc(s.inspect, enc, format % s)
  721. end
  722. s = "\xC2\xA1".force_encoding(e)
  723. enc = ('' + s.inspect).encoding
  724. assert_strenc('%10s' % s.inspect, enc, "%10p" % s)
  725. end
  726. end
  727. def test_sprintf_s
  728. assert_strenc('', 'ASCII-8BIT', a("%s") % a(""))
  729. assert_strenc('', 'EUC-JP', e("%s") % e(""))
  730. assert_strenc('', 'Windows-31J', s("%s") % s(""))
  731. assert_strenc('', 'UTF-8', u("%s") % u(""))
  732. assert_strenc('a', 'ASCII-8BIT', a("%s") % a("a"))
  733. assert_strenc('a', 'EUC-JP', e("%s") % e("a"))
  734. assert_strenc('a', 'Windows-31J', s("%s") % s("a"))
  735. assert_strenc('a', 'UTF-8', u("%s") % u("a"))
  736. assert_strenc("\xC2\xA1", 'ASCII-8BIT', a("%s") % a("\xc2\xa1"))
  737. assert_strenc("\xC2\xA1", 'EUC-JP', e("%s") % e("\xc2\xa1"))
  738. #assert_strenc("\xC2\xA1", 'Windows-31J', s("%s") % s("\xc2\xa1"))
  739. assert_strenc("\xC2\xA1", 'UTF-8', u("%s") % u("\xc2\xa1"))
  740. assert_strenc(" \xC2\xA1", 'ASCII-8BIT', "%10s" % a("\xc2\xa1"))
  741. assert_strenc(" \xA1\xA1", 'EUC-JP', "%10s" % e("\xa1\xa1"))
  742. #assert_strenc(" \xC2\xA1", 'Windows-31J', "%10s" % s("\xc2\xa1"))
  743. assert_strenc(" \xC2\xA1", 'UTF-8', "%10s" % u("\xc2\xa1"))
  744. assert_strenc("\x00", 'ASCII-8BIT', a("%s") % a("\x00"))
  745. assert_strenc("\x00", 'EUC-JP', e("%s") % e("\x00"))
  746. assert_strenc("\x00", 'Windows-31J', s("%s") % s("\x00"))
  747. assert_strenc("\x00", 'UTF-8', u("%s") % u("\x00"))
  748. assert_equal("EUC-JP", (e("\xc2\xa1 %s") % "foo").encoding.name)
  749. end
  750. def test_str_lt
  751. assert(a("a") < a("\xa1"))
  752. assert(a("a") < s("\xa1"))
  753. assert(s("a") < a("\xa1"))
  754. end
  755. def test_str_multiply
  756. str = "\u3042"
  757. assert_equal(true, (str * 0).ascii_only?, "[ruby-dev:33895]")
  758. assert_equal(false, (str * 1).ascii_only?)
  759. assert_equal(false, (str * 2).ascii_only?)
  760. end
  761. def test_str_aref
  762. assert_equal(a("\xc2"), a("\xc2\xa1")[0])
  763. assert_equal(a("\xa1"), a("\xc2\xa1")[1])
  764. assert_equal(nil, a("\xc2\xa1")[2])
  765. assert_equal(e("\xc2\xa1"), e("\xc2\xa1")[0])
  766. assert_equal(nil, e("\xc2\xa1")[1])
  767. assert_equal(s("\xc2"), s("\xc2\xa1")[0])
  768. assert_equal(s("\xa1"), s("\xc2\xa1")[1])
  769. assert_equal(nil, s("\xc2\xa1")[2])
  770. assert_equal(u("\xc2\xa1"), u("\xc2\xa1")[0])
  771. assert_equal(nil, u("\xc2\xa1")[1])
  772. str = "\u3042"
  773. assert_equal(true, str[0, 0].ascii_only?, "[ruby-dev:33895]")
  774. assert_equal(false, str[0, 1].ascii_only?)
  775. assert_equal(false, str[0..-1].ascii_only?)
  776. end
  777. def test_utf8str_aref
  778. s = "abcdefghijklmnopqrstuvwxyz\u{3042 3044 3046 3048 304A}"
  779. assert_equal("a", s[0])
  780. assert_equal("h", s[7])
  781. assert_equal("i", s[8])
  782. assert_equal("j", s[9])
  783. assert_equal("\u{3044}", s[27])
  784. assert_equal("\u{3046}", s[28])
  785. assert_equal("\u{3048}", s[29])
  786. s = "abcdefghijklmnopqrstuvw\u{3042 3044 3046 3048 304A}"
  787. assert_equal("\u{3044}", s[24])
  788. end
  789. def test_str_aref_len
  790. assert_equal(a("\xa1"), a("\xc2\xa1\xc2\xa2\xc2\xa3")[1, 1])
  791. assert_equal(a("\xa1\xc2"), a("\xc2\xa1\xc2\xa2\xc2\xa3")[1, 2])
  792. assert_equal(e("\xc2\xa2"), e("\xc2\xa1\xc2\xa2\xc2\xa3")[1, 1])
  793. assert_equal(e("\xc2\xa2\xc2\xa3"), e("\xc2\xa1\xc2\xa2\xc2\xa3")[1, 2])
  794. assert_equal(s("\xa1"), s("\xc2\xa1\xc2\xa2\xc2\xa3")[1, 1])
  795. assert_equal(s("\xa1\xc2"), s("\xc2\xa1\xc2\xa2\xc2\xa3")[1, 2])
  796. assert_equal(u("\xc2\xa2"), u("\xc2\xa1\xc2\xa2\xc2\xa3")[1, 1])
  797. assert_equal(u("\xc2\xa2\xc2\xa3"), u("\xc2\xa1\xc2\xa2\xc2\xa3")[1, 2])
  798. end
  799. def test_str_aref_substr
  800. assert_equal(a("\xa1\xc2"), a("\xc2\xa1\xc2\xa2\xc2\xa3")[a("\xa1\xc2")])
  801. assert_raise(Encoding::CompatibilityError) { a("\xc2\xa1\xc2\xa2\xc2\xa3")[e("\xa1\xc2")] }
  802. assert_equal(nil, e("\xc2\xa1\xc2\xa2\xc2\xa3")[e("\xa1\xc2")])
  803. assert_raise(Encoding::CompatibilityError) { e("\xc2\xa1\xc2\xa2\xc2\xa3")[s("\xa1\xc2")] }
  804. assert_equal(s("\xa1\xc2"), s("\xc2\xa1\xc2\xa2\xc2\xa3")[s("\xa1\xc2")])
  805. assert_raise(Encoding::CompatibilityError) { s("\xc2\xa1\xc2\xa2\xc2\xa3")[u("\xa1\xc2")] }
  806. assert_equal(nil, u("\xc2\xa1\xc2\xa2\xc2\xa3")[u("\xa1\xc2")])
  807. assert_raise(Encoding::CompatibilityError) { u("\xc2\xa1\xc2\xa2\xc2\xa3")[a("\xa1\xc2")] }
  808. assert_nil(e("\xa1\xa2\xa3\xa4")[e("\xa2\xa3")])
  809. bug2379 = '[ruby-core:26787]'
  810. assert_equal("\u{439}", "\u{439}"[0, 30], bug2379)
  811. assert_equal("\u{439}", "a\u{439}"[1, 30], bug2379)
  812. assert_equal("\u{439}", "a\u{439}bcdefghijklmnop"[1, 1][0, 1], bug2379)
  813. end
  814. def test_aset
  815. s = e("\xa3\xb0\xa3\xb1\xa3\xb2\xa3\xb3\xa3\xb4")
  816. assert_raise(Encoding::CompatibilityError){s["\xb0\xa3"] = "foo"}
  817. a = ua("a")
  818. a[/a/] = u("")
  819. assert_equal Encoding::US_ASCII, a.encoding
  820. end
  821. def test_str_center
  822. assert_encoding("EUC-JP", "a".center(5, e("\xa1\xa2")).encoding)
  823. assert_encoding("EUC-JP", e("\xa3\xb0").center(10).encoding)
  824. end
  825. def test_squeeze
  826. s = e("\xa3\xb0\xa3\xb1\xa3\xb1\xa3\xb3\xa3\xb4")
  827. assert_equal(e("\xa3\xb0\xa3\xb1\xa3\xb3\xa3\xb4"), s.squeeze)
  828. end
  829. def test_tr
  830. s = s("\x81\x41")
  831. assert_equal(s.tr("A", "B"), s)
  832. assert_equal(s.tr_s("A", "B"), s)
  833. assert_nothing_raised {
  834. "a".force_encoding("ASCII-8BIT").tr(a("a"), a("a"))
  835. }
  836. assert_equal(e("\xA1\xA1"), a("a").tr(a("a"), e("\xA1\xA1")))
  837. assert_equal("X\u3042\u3044X", "A\u3042\u3044\u3046".tr("^\u3042\u3044", "X"))
  838. assert_equal("\u3042\u3046" * 100, ("\u3042\u3044" * 100).tr("\u3044", "\u3046"))
  839. assert_equal("Y", "\u3042".tr("^X", "Y"))
  840. end
  841. def test_tr_s
  842. assert_equal("\xA1\xA1".force_encoding("EUC-JP"),
  843. "a".force_encoding("ASCII-8BIT").tr("a".force_encoding("ASCII-8BIT"), "\xA1\xA1".force_encoding("EUC-JP")))
  844. end
  845. def test_count
  846. assert_equal(0, e("\xa1\xa2").count("z"))
  847. s = e("\xa3\xb0\xa3\xb1\xa3\xb2\xa3\xb3\xa3\xb4")
  848. assert_raise(Encoding::CompatibilityError){s.count(a("\xa3\xb0"))}
  849. end
  850. def test_delete
  851. assert_equal(1, e("\xa1\xa2").delete("z").length)
  852. s = e("\xa3\xb0\xa3\xb1\xa3\xb2\xa3\xb3\xa3\xb4")
  853. assert_raise(Encoding::CompatibilityError){s.delete(a("\xa3\xb2"))}
  854. a = "\u3042\u3044\u3046\u3042\u3044\u3046"
  855. a.delete!("\u3042\u3044", "^\u3044")
  856. assert_equal("\u3044\u3046\u3044\u3046", a)
  857. end
  858. def test_include?
  859. assert_equal(false, e("\xa1\xa2\xa3\xa4").include?(e("\xa3")))
  860. s = e("\xa3\xb0\xa3\xb1\xa3\xb2\xa3\xb3\xa3\xb4")
  861. assert_equal(false, s.include?(e("\xb0\xa3")))
  862. end
  863. def test_index
  864. s = e("\xa3\xb0\xa3\xb1\xa3\xb2\xa3\xb3\xa3\xb4")
  865. assert_nil(s.index(e("\xb3\xa3")))
  866. assert_nil(e("\xa1\xa2\xa3\xa4").index(e("\xa3")))
  867. assert_nil(e("\xa1\xa2\xa3\xa4").rindex(e("\xa3")))
  868. s = e("\xa3\xb0\xa3\xb1\xa3\xb2\xa3\xb3\xa3\xb4")
  869. assert_raise(Encoding::CompatibilityError){s.rindex(a("\xb1\xa3"))}
  870. end
  871. def test_next
  872. s1 = e("\xa1\xa1")
  873. s2 = s1.dup
  874. (94*94+94).times { s2.next! }
  875. assert_not_equal(s1, s2)
  876. end
  877. def test_sub
  878. s = "abc".sub(/b/, "\xa1\xa1".force_encoding("euc-jp"))
  879. assert_encoding("EUC-JP", s.encoding)
  880. assert_equal(Encoding::EUC_JP, "\xa4\xa2".force_encoding("euc-jp").sub(/./, '\&').encoding)
  881. assert_equal(Encoding::EUC_JP, "\xa4\xa2".force_encoding("euc-jp").gsub(/./, '\&').encoding)
  882. end
  883. def test_sub2
  884. s = "\x80".force_encoding("ASCII-8BIT")
  885. r = Regexp.new("\x80".force_encoding("ASCII-8BIT"))
  886. s2 = s.sub(r, "")
  887. assert(s2.empty?)
  888. assert(s2.ascii_only?)
  889. end
  890. def test_sub3
  891. repl = "\x81".force_encoding("sjis")
  892. assert_equal(false, repl.valid_encoding?)
  893. s = "a@".sub(/a/, repl)
  894. assert(s.valid_encoding?)
  895. end
  896. def test_insert
  897. s = e("\xa3\xb0\xa3\xb1\xa3\xb2\xa3\xb3\xa3\xb4")
  898. assert_equal(e("\xa3\xb0\xa3\xb1\xa3\xb2\xa3\xb3\xa3\xb4a"), s.insert(-1, "a"))
  899. end
  900. def test_scan
  901. assert_equal(["a"], e("\xa1\xa2a\xa3\xa4").scan(/a/))
  902. end
  903. def test_dup_scan
  904. s1 = e("\xa4\xa2")*100
  905. s2 = s1.dup.force_encoding("ascii-8bit")
  906. s2.scan(/\A./n) {|f|
  907. assert_equal(Encoding::ASCII_8BIT, f.encoding)
  908. }
  909. end
  910. def test_dup_aref
  911. s1 = e("\xa4\xa2")*100
  912. s2 = s1.dup.force_encoding("ascii-8bit")
  913. assert_equal(Encoding::ASCII_8BIT, s2[10..-1].encoding)
  914. end
  915. def test_upto
  916. s1 = e("\xa1\xa2")
  917. s2 = s("\xa1\xa2")
  918. assert_raise(Encoding::CompatibilityError){s1.upto(s2) {|x| break }}
  919. end
  920. def test_casecmp
  921. s1 = s("\x81\x41")
  922. s2 = s("\x81\x61")
  923. assert_not_equal(0, s1.casecmp(s2))
  924. end
  925. def test_reverse
  926. assert_equal(u("\xf0jihgfedcba"), u("abcdefghij\xf0").reverse)
  927. end
  928. def test_reverse_bang
  929. s = u("abcdefghij\xf0")
  930. s.reverse!
  931. assert_equal(u("\xf0jihgfedcba"), s)
  932. end
  933. def test_plus
  934. assert_raise(Encoding::CompatibilityError){u("\xe3\x81\x82") + a("\xa1")}
  935. end
  936. def test_chomp
  937. s = e("\xa3\xb0\xa3\xb1\xa3\xb2\xa3\xb3\xa3\xb4")
  938. assert_raise(Encoding::CompatibilityError){s.chomp(s("\xa3\xb4"))}
  939. end
  940. def test_gsub
  941. s = 'abc'
  942. s.ascii_only?
  943. s.gsub!(/b/, "\x80")
  944. assert_equal(false, s.ascii_only?, "[ruby-core:14566] reported by Sam Ruby")
  945. s = "abc".force_encoding(Encoding::ASCII_8BIT)
  946. assert_equal(Encoding::ASCII_8BIT, s.encoding)
  947. assert_raise(Encoding::CompatibilityError) {
  948. "abc".gsub(/[ac]/) {
  949. $& == "a" ? "\xc2\xa1".force_encoding("euc-jp") :
  950. "\xc2\xa1".force_encoding("utf-8")
  951. }
  952. }
  953. s = e("\xa3\xb0\xa3\xb1\xa3\xb2\xa3\xb3\xa3\xb4")
  954. assert_equal(e("\xa3\xb0z\xa3\xb2\xa3\xb3\xa3\xb4"), s.gsub(/\xa3\xb1/e, "z"))
  955. assert_equal(Encoding::ASCII_8BIT, (a("").gsub(//) { e("") }.encoding))
  956. assert_equal(Encoding::ASCII_8BIT, (a("a").gsub(/a/) { e("") }.encoding))
  957. end
  958. def test_end_with
  959. s1 = s("\x81\x40")
  960. s2 = "@"
  961. assert_equal(false, s1.end_with?(s2), "#{encdump s1}.end_with?(#{encdump s2})")
  962. each_encoding("\u3042\u3044", "\u3044") do |_s1, _s2|
  963. assert_equal(true, _s1.end_with?(_s2), "#{encdump _s1}.end_with?(#{encdump _s2})")
  964. end
  965. each_encoding("\u3042a\u3044", "a\u3044") do |_s1, _s2|
  966. assert_equal(true, _s1.end_with?(_s2), "#{encdump _s1}.end_with?(#{encdump _s2})")
  967. end
  968. end
  969. def test_each_line
  970. s = e("\xa3\xb0\xa3\xb1\xa3\xb2\xa3\xb3\xa3\xb4")
  971. assert_raise(Encoding::CompatibilityError){s.each_line(a("\xa3\xb1")) {|l| }}
  972. s = e("\xa4\xa2\nfoo")
  973. actual = []
  974. s.each_line {|line| actual << line }
  975. expected = [e("\xa4\xa2\n"), e("foo")]
  976. assert_equal(expected, actual)
  977. end
  978. def test_each_char
  979. a = [e("\xa4\xa2"), "b", e("\xa4\xa4"), "c"]
  980. s = "\xa4\xa2b\xa4\xa4c".force_encoding("euc-jp")
  981. assert_equal(a, s.each_char.to_a, "[ruby-dev:33211] #{encdump s}.each_char.to_a")
  982. end
  983. def test_str_concat
  984. assert_equal(1, "".concat(0xA2).size)
  985. assert_equal(Encoding::ASCII_8BIT, "".force_encoding("US-ASCII").concat(0xA2).encoding)
  986. assert_equal("A\x84\x31\xA4\x39".force_encoding("GB18030"),
  987. "A".force_encoding("GB18030") << 0x8431A439)
  988. end
  989. def test_regexp_match
  990. assert_equal([0,0], //.match("\xa1\xa1".force_encoding("euc-jp"),-1).offset(0))
  991. assert_equal(0, // =~ :a)
  992. end
  993. def test_split
  994. assert_equal(e("\xa1\xa2\xa1\xa3").split(//),
  995. [e("\xa1\xa2"), e("\xa1\xa3")],
  996. '[ruby-dev:32452]')
  997. each_encoding("abc,def", ",", "abc", "def") do |str, sep, *expected|
  998. assert_equal(expected, str.split(sep, -1))
  999. end
  1000. end
  1001. def test_nonascii_method_name
  1002. eval(e("def \xc2\xa1() @nonascii_method_name = :e end"))
  1003. eval(u("def \xc2\xa1() @nonascii_method_name = :u end"))
  1004. eval(e("\xc2\xa1()"))
  1005. assert_equal(:e, @nonascii_method_name)
  1006. eval(u("\xc2\xa1()"))
  1007. assert_equal(:u, @nonascii_method_name)
  1008. me = method(e("\xc2\xa1"))
  1009. mu = method(u("\xc2\xa1"))
  1010. assert_not_equal(me.name, mu.name)
  1011. assert_not_equal(me.inspect, mu.inspect)
  1012. assert_equal(e("\xc2\xa1"), me.name.to_s)
  1013. assert_equal(u("\xc2\xa1"), mu.name.to_s)
  1014. end
  1015. def test_symbol
  1016. s1 = "\xc2\xa1".force_encoding("euc-jp").intern
  1017. s2 = "\xc2\xa1".force_encoding("utf-8").intern
  1018. assert_not_equal(s1, s2)
  1019. end
  1020. def test_symbol_op
  1021. ops = %w"
  1022. .. ... + - +(binary) -(binary) * / % ** +@ -@ | ^ & ! <=> > >= < <= ==
  1023. === != =~ !~ ~ ! [] []= << >> :: `
  1024. "
  1025. ops.each do |op|
  1026. assert_equal(Encoding::US_ASCII, op.intern.encoding, "[ruby-dev:33449]")
  1027. end
  1028. end
  1029. def test_chr
  1030. 0.upto(255) {|b|
  1031. assert_equal([b].pack("C"), b.chr)
  1032. }
  1033. assert_equal("\x84\x31\xA4\x39".force_encoding("GB18030"), 0x8431A439.chr("GB18030"))
  1034. e = assert_raise(RangeError) {
  1035. 2206368128.chr(Encoding::UTF_8)
  1036. }
  1037. assert_not_match(/-\d+ out of char range/, e.message)
  1038. assert_raise(RangeError){ 0x80.chr("US-ASCII") }
  1039. assert_raise(RangeError){ 0x80.chr("SHIFT_JIS") }
  1040. assert_raise(RangeError){ 0xE0.chr("SHIFT_JIS") }
  1041. assert_raise(RangeError){ 0x100.chr("SHIFT_JIS") }
  1042. assert_raise(RangeError){ 0xA0.chr("EUC-JP") }
  1043. assert_raise(RangeError){ 0x100.chr("EUC-JP") }
  1044. assert_raise(RangeError){ 0xA1A0.chr("EUC-JP") }
  1045. end
  1046. def test_marshal
  1047. s1 = "\xa1\xa1".force_encoding("euc-jp")
  1048. s2 = Marshal.load(Marshal.dump(s1))
  1049. assert_equal(s1, s2)
  1050. end
  1051. def test_env
  1052. locale_encoding = Encoding.find("locale")
  1053. ENV.each {|k, v|
  1054. assert_equal(locale_encoding, k.encoding)
  1055. assert_equal(locale_encoding, v.encoding)
  1056. }
  1057. end
  1058. def test_empty_string
  1059. assert_equal(Encoding::US_ASCII, "".encoding)
  1060. end
  1061. def test_nil_to_s
  1062. assert_equal(Encoding::US_ASCII, nil.to_s.encoding)
  1063. end
  1064. def test_nil_inspect
  1065. assert_equal(Encoding::US_ASCII, nil.inspect.encoding)
  1066. end
  1067. def test_true_to_s
  1068. assert_equal(Encoding::US_ASCII, true.to_s.encoding)
  1069. end
  1070. def test_false_to_s
  1071. assert_equal(Encoding::US_ASCII, false.to_s.encoding)
  1072. end
  1073. def test_fixnum_to_s
  1074. assert_equal(Encoding::US_ASCII, 1.to_s.encoding)
  1075. end
  1076. def test_float_to_s
  1077. assert_equal(Encoding::US_ASCII, 1.0.to_s.encoding)
  1078. end
  1079. def test_bignum_to_s
  1080. assert_equal(Encoding::US_ASCII, (1 << 129).to_s.encoding)
  1081. end
  1082. def test_array_to_s
  1083. assert_equal(Encoding::US_ASCII, [].to_s.encoding)
  1084. assert_equal(Encoding::US_ASCII, [nil].to_s.encoding)
  1085. assert_equal(Encoding::US_ASCII, [1].to_s.encoding)
  1086. assert_equal("".inspect.encoding, [""].to_s.encoding)
  1087. assert_equal("a".inspect.encoding, ["a"].to_s.encoding)
  1088. assert_equal(Encoding::US_ASCII, [nil,1,"","a","\x20",[]].to_s.encoding)
  1089. end
  1090. def test_hash_to_s
  1091. assert_equal(Encoding::US_ASCII, {}.to_s.encoding)
  1092. assert_equal(Encoding::US_ASCII, {1=>nil,"foo"=>""}.to_s.encoding)
  1093. end
  1094. def test_encoding_find
  1095. assert_raise(TypeError) {Encoding.find(nil)}
  1096. assert_raise(TypeError) {Encoding.find(0)}
  1097. assert_raise(TypeError) {Encoding.find([])}
  1098. assert_raise(TypeError) {Encoding.find({})}
  1099. end
  1100. def test_encoding_to_s
  1101. assert_equal(Encoding::US_ASCII, Encoding::US_ASCII.to_s.encoding)
  1102. assert_equal(Encoding::US_ASCII, Encoding::US_ASCII.inspect.encoding)
  1103. end
  1104. def test_regexp_source
  1105. s = "\xa4\xa2".force_encoding("euc-jp")
  1106. r = Regexp.new(s)
  1107. t = r.source
  1108. assert_equal(s, t, "[ruby-dev:33377] Regexp.new(#{encdump s}).source")
  1109. end
  1110. def test_magic_comment
  1111. assert_equal(Encoding::US_ASCII, eval("__ENCODING__".force_encoding("US-ASCII")))
  1112. assert_equal(Encoding::ASCII_8BIT, eval("__ENCODING__".force_encoding("ASCII-8BIT")))
  1113. assert_equal(Encoding::US_ASCII, eval("# -*- encoding: US-ASCII -*-\n__ENCODING__".force_encoding("ASCII-8BIT")))
  1114. assert_equal(Encoding::ASCII_8BIT, eval("# -*- encoding: ASCII-8BIT -*-\n__ENCODING__".force_encoding("US-ASCII")))
  1115. end
  1116. def test_magic_comment_vim
  1117. assert_equal(Encoding::US_ASCII, eval("# vim: filetype=ruby, fileencoding: US-ASCII, ts=3, sw=3\n__ENCODING__".force_encoding("ASCII-8BIT")))
  1118. assert_equal(Encoding::ASCII_8BIT, eval("# vim: filetype=ruby, fileencoding: ASCII-8BIT, ts=3, sw=3\n__ENCODING__".force_encoding("US-ASCII")))
  1119. end
  1120. def test_magic_comment_at_various_positions
  1121. # after shebang
  1122. assert_equal(Encoding::US_ASCII, eval("#!/usr/bin/ruby\n# -*- encoding: US-ASCII -*-\n__ENCODING__".force_encoding("ASCII-8BIT")))
  1123. assert_equal(Encoding::ASCII_8BIT, eval("#!/usr/bin/ruby\n# -*- encoding: ASCII-8BIT -*-\n__ENCODING__".force_encoding("US-ASCII")))
  1124. # wrong position
  1125. assert_equal(Encoding::ASCII_8BIT, eval("\n# -*- encoding: US-ASCII -*-\n__ENCODING__".force_encoding("ASCII-8BIT")))
  1126. assert_equal(Encoding::US_ASCII, eval("\n# -*- encoding: ASCII-8BIT -*-\n__ENCODING__".force_encoding("US-ASCII")))
  1127. # leading expressions
  1128. assert_equal(Encoding::ASCII_8BIT, eval("v=1 # -*- encoding: US-ASCII -*-\n__ENCODING__".force_encoding("ASCII-8BIT")))
  1129. assert_equal(Encoding::US_ASCII, eval("v=1 # -*- encoding: ASCII-8BIT -*-\n__ENCODING__".force_encoding("US-ASCII")))
  1130. end
  1131. def test_regexp_usascii
  1132. assert_regexp_usascii_literal('//', Encoding::US_ASCII)
  1133. assert_regexp_usascii_literal('/#{ }/', Encoding::US_ASCII)
  1134. assert_regexp_usascii_literal('/#{"a"}/', Encoding::US_ASCII)
  1135. assert_regexp_usascii_literal('/#{%q"\x80"}/', Encoding::ASCII_8BIT)
  1136. assert_regexp_usascii_literal('/#{"\x80"}/', nil, SyntaxError)
  1137. assert_regexp_usascii_literal('/a/', Encoding::US_ASCII)
  1138. assert_regexp_usascii_literal('/a#{ }/', Encoding::US_ASCII)
  1139. assert_regexp_usascii_literal('/a#{"a"}/', Encoding::US_ASCII)
  1140. assert_regexp_usascii_literal('/a#{%q"\x80"}/', Encoding::ASCII_8BIT)
  1141. assert_regexp_usascii_literal('/a#{"\x80"}/', nil, SyntaxError)
  1142. assert_regexp_usascii_literal('/\x80/', Encoding::ASCII_8BIT)
  1143. assert_regexp_usascii_literal('/\x80#{ }/', Encoding::ASCII_8BIT)
  1144. assert_regexp_usascii_literal('/\x80#{"a"}/', Encoding::ASCII_8BIT)
  1145. assert_regexp_usascii_literal('/\x80#{%q"\x80"}/', Encoding::ASCII_8BIT)
  1146. assert_regexp_usascii_literal('/\x80#{"\x80"}/', nil, SyntaxError)
  1147. assert_regexp_usascii_literal('/\u1234/', Encoding::UTF_8)
  1148. assert_regexp_usascii_literal('/\u1234#{ }/', Encoding::UTF_8)
  1149. assert_regexp_usascii_literal('/\u1234#{"a"}/', Encoding::UTF_8)
  1150. assert_regexp_usascii_literal('/\u1234#{%q"\x80"}/', nil, SyntaxError)
  1151. assert_regexp_usascii_literal('/\u1234#{"\x80"}/', nil, SyntaxError)
  1152. assert_regexp_usascii_literal('/\u1234\x80/', nil, SyntaxError)
  1153. assert_regexp_usascii_literal('/\u1234#{ }\x80/', nil, RegexpError)
  1154. end
  1155. def test_gbk
  1156. assert_equal("", "\x81\x40".force_encoding("GBK").chop)
  1157. end
  1158. def test_euc_tw
  1159. assert_equal("a", "a\x8e\xa2\xa1\xa1".force_encoding("euc-tw").chop)
  1160. end
  1161. def test_valid_encoding
  1162. s = "\xa1".force_encoding("euc-jp")
  1163. assert_equal(false, s.valid_encoding?)
  1164. assert_equal(true, (s+s).valid_encoding?, "[ruby-dev:33826]")
  1165. assert_equal(true, (s*2).valid_encoding?, "[ruby-dev:33826]")
  1166. assert_equal(true, ("%s%s" % [s, s]).valid_encoding?)
  1167. assert_equal(true, (s.dup << s).valid_encoding?)
  1168. assert_equal(true, "".center(2, s).valid_encoding?)
  1169. s = "\xa1\xa1\x8f".force_encoding("euc-jp")
  1170. assert_equal(false, s.valid_encoding?)
  1171. assert_equal(true, s.reverse.valid_encoding?)
  1172. bug4018 = '[ruby-core:33027]'
  1173. s = "\xa1\xa1".force_encoding("euc-jp")
  1174. assert_equal(true, s.valid_encoding?)
  1175. s << "\x8f".force_encoding("euc-jp")
  1176. assert_equal(false, s.valid_encoding?, bug4018)
  1177. s = "aa".force_encoding("utf-16be")
  1178. assert_equal(true, s.valid_encoding?)
  1179. s << "\xff".force_encoding("utf-16be")
  1180. assert_equal(false, s.valid_encoding?, bug4018)
  1181. end
  1182. def test_getbyte
  1183. assert_equal(0x82, u("\xE3\x81\x82\xE3\x81\x84").getbyte(2))
  1184. assert_equal(0x82, u("\xE3\x81\x82\xE3\x81\x84").getbyte(-4))
  1185. assert_nil(u("\xE3\x81\x82\xE3\x81\x84").getbyte(100))
  1186. end
  1187. def test_setbyte
  1188. s = u("\xE3\x81\x82\xE3\x81\x84")
  1189. s.setbyte(2, 0x84)
  1190. assert_equal(u("\xE3\x81\x84\xE3\x81\x84"), s)
  1191. s = u("\xE3\x81\x82\xE3\x81\x84")
  1192. assert_raise(IndexError) { s.setbyte(100, 0) }
  1193. s = u("\xE3\x81\x82\xE3\x81\x84")
  1194. s.setbyte(-4, 0x84)
  1195. assert_equal(u("\xE3\x81\x84\xE3\x81\x84"), s)
  1196. end
  1197. def test_compatible
  1198. assert_nil Encoding.compatible?("",0)
  1199. assert_equal(Encoding::UTF_8, Encoding.compatible?(u(""), ua("abc")))
  1200. assert_equal(Encoding::UTF_8, Encoding.compatible?(Encoding::UTF_8, Encoding::UTF_8))
  1201. assert_equal(Encoding::UTF_8, Encoding.compatible?(Encoding::UTF_8, Encoding::US_ASCII))
  1202. assert_equal(Encoding::ASCII_8BIT,
  1203. Encoding.compatible?(Encoding::ASCII_8BIT, Encoding::US_ASCII))
  1204. assert_nil Encoding.compatible?(Encoding::UTF_8, Encoding::ASCII_8BIT)
  1205. end
  1206. def test_force_encoding
  1207. assert(("".center(1, "\x80".force_encoding("utf-8")); true),
  1208. "moved from btest/knownbug, [ruby-dev:33807]")
  1209. a = "".force_encoding("ascii-8bit") << 0xC3 << 0xB6
  1210. assert_equal(1, a.force_encoding("utf-8").size, '[ruby-core:22437]')
  1211. b = "".force_encoding("ascii-8bit") << 0xC3.chr << 0xB6.chr
  1212. assert_equal(1, b.force_encoding("utf-8").size, '[ruby-core:22437]')
  1213. end
  1214. def test_combchar_codepoint
  1215. assert_equal([0x30BB, 0x309A], "\u30BB\u309A".codepoints.to_a)
  1216. end
  1217. def each_encoding(*strings)
  1218. Encoding.list.each do |enc|
  1219. next if enc.dummy?
  1220. strs = strings.map {|s| s.encode(enc)} rescue next
  1221. yield(*strs)
  1222. end
  1223. end
  1224. end