PageRenderTime 49ms CodeModel.GetById 17ms RepoModel.GetById 0ms app.codeStats 1ms

/test/externals/ruby1.9/ruby/test_m17n.rb

https://bitbucket.org/nicksieger/jruby
Ruby | 1339 lines | 1148 code | 179 blank | 12 comment | 20 complexity | da5cdc0832ef3f4caf490bb0a775445a MD5 | raw file
Possible License(s): GPL-3.0, JSON
  1. require 'test/unit'
  2. require 'stringio'
  3. class TestM17N < Test::Unit::TestCase
  4. def assert_encoding(encname, actual, message=nil)
  5. assert_equal(Encoding.find(encname), actual, message)
  6. end
  7. module AESU
  8. def ua(str) str.dup.force_encoding("US-ASCII") end
  9. def a(str) str.dup.force_encoding("ASCII-8BIT") end
  10. def e(str) str.dup.force_encoding("EUC-JP") end
  11. def s(str) str.dup.force_encoding("Windows-31J") end
  12. def u(str) str.dup.force_encoding("UTF-8") end
  13. end
  14. include AESU
  15. extend AESU
  16. def assert_strenc(bytes, enc, actual, message=nil)
  17. assert_instance_of(String, actual, message)
  18. enc = Encoding.find(enc) if String === enc
  19. assert_equal(enc, actual.encoding, message)
  20. assert_equal(a(bytes), a(actual), message)
  21. end
  22. def assert_warning(pat, mesg=nil)
  23. begin
  24. org_stderr = $stderr
  25. $stderr = StringIO.new(warn = '')
  26. yield
  27. ensure
  28. $stderr = org_stderr
  29. end
  30. assert_match(pat, warn, mesg)
  31. end
  32. def assert_regexp_generic_encoding(r)
  33. assert(!r.fixed_encoding?)
  34. %w[ASCII-8BIT EUC-JP Windows-31J UTF-8].each {|ename|
  35. # "\xc2\xa1" is a valid sequence for ASCII-8BIT, EUC-JP, Windows-31J and UTF-8.
  36. assert_nothing_raised { r =~ "\xc2\xa1".force_encoding(ename) }
  37. }
  38. end
  39. def assert_regexp_fixed_encoding(r)
  40. assert(r.fixed_encoding?)
  41. %w[ASCII-8BIT EUC-JP Windows-31J UTF-8].each {|ename|
  42. enc = Encoding.find(ename)
  43. if enc == r.encoding
  44. assert_nothing_raised { r =~ "\xc2\xa1".force_encoding(enc) }
  45. else
  46. assert_raise(Encoding::CompatibilityError) { r =~ "\xc2\xa1".force_encoding(enc) }
  47. end
  48. }
  49. end
  50. def assert_regexp_generic_ascii(r)
  51. assert_encoding("US-ASCII", r.encoding)
  52. assert_regexp_generic_encoding(r)
  53. end
  54. def assert_regexp_fixed_ascii8bit(r)
  55. assert_encoding("ASCII-8BIT", r.encoding)
  56. assert_regexp_fixed_encoding(r)
  57. end
  58. def assert_regexp_fixed_eucjp(r)
  59. assert_encoding("EUC-JP", r.encoding)
  60. assert_regexp_fixed_encoding(r)
  61. end
  62. def assert_regexp_fixed_sjis(r)
  63. assert_encoding("Windows-31J", r.encoding)
  64. assert_regexp_fixed_encoding(r)
  65. end
  66. def assert_regexp_fixed_utf8(r)
  67. assert_encoding("UTF-8", r.encoding)
  68. assert_regexp_fixed_encoding(r)
  69. end
  70. def assert_regexp_usascii_literal(r, enc, ex = nil)
  71. code = "# -*- encoding: US-ASCII -*-\n#{r}.encoding"
  72. if ex
  73. assert_raise(ex) { eval(code) }
  74. else
  75. assert_equal(enc, eval(code))
  76. end
  77. end
  78. def encdump(str)
  79. d = str.dump
  80. if /\.force_encoding\("[A-Za-z0-9.:_+-]*"\)\z/ =~ d
  81. d
  82. else
  83. "#{d}.force_encoding(#{str.encoding.name.dump})"
  84. end
  85. end
  86. def encdumpargs(args)
  87. r = '('
  88. args.each_with_index {|a, i|
  89. r << ',' if 0 < i
  90. if String === a
  91. r << encdump(a)
  92. else
  93. r << a.inspect
  94. end
  95. }
  96. r << ')'
  97. r
  98. end
  99. def assert_str_enc_propagation(t, s1, s2)
  100. if !s1.ascii_only?
  101. assert_equal(s1.encoding, t.encoding)
  102. elsif !s2.ascii_only?
  103. assert_equal(s2.encoding, t.encoding)
  104. else
  105. assert([s1.encoding, s2.encoding].include?(t.encoding))
  106. end
  107. end
  108. def assert_same_result(expected_proc, actual_proc)
  109. e = nil
  110. begin
  111. t = expected_proc.call
  112. rescue
  113. e = $!
  114. end
  115. if e
  116. assert_raise(e.class) { actual_proc.call }
  117. else
  118. assert_equal(t, actual_proc.call)
  119. end
  120. end
  121. def str_enc_compatible?(*strs)
  122. encs = []
  123. strs.each {|s|
  124. encs << s.encoding if !s.ascii_only?
  125. }
  126. encs.uniq!
  127. encs.length <= 1
  128. end
  129. # tests start
  130. def test_string_ascii_literal
  131. assert_encoding("ASCII-8BIT", eval(a(%{""})).encoding)
  132. assert_encoding("ASCII-8BIT", eval(a(%{"a"})).encoding)
  133. end
  134. def test_string_eucjp_literal
  135. assert_encoding("EUC-JP", eval(e(%{""})).encoding)
  136. assert_encoding("EUC-JP", eval(e(%{"a"})).encoding)
  137. assert_encoding("EUC-JP", eval(e(%{"\xa1\xa1"})).encoding)
  138. assert_encoding("EUC-JP", eval(e(%{"\\xa1\\xa1"})).encoding)
  139. assert_encoding("EUC-JP", eval(e(%{"\\x20"})).encoding)
  140. assert_encoding("EUC-JP", eval(e(%{"\\n"})).encoding)
  141. assert_encoding("EUC-JP", eval(e(%{"\\x80"})).encoding)
  142. end
  143. def test_utf8_literal
  144. assert_equal(Encoding::UTF_8, "\u3042".encoding, "[ruby-dev:33406] \"\\u3042\".encoding")
  145. assert_raise(SyntaxError) { eval(a('\u3052\x80')) }
  146. end
  147. def test_string_mixed_unicode
  148. assert_raise(SyntaxError) { eval(a(%{"\xc2\xa1\\u{6666}"})) }
  149. assert_raise(SyntaxError) { eval(e(%{"\xc2\xa1\\u{6666}"})) }
  150. assert_raise(SyntaxError) { eval(s(%{"\xc2\xa1\\u{6666}"})) }
  151. assert_nothing_raised { eval(u(%{"\xc2\xa1\\u{6666}"})) }
  152. assert_raise(SyntaxError) { eval(a(%{"\\u{6666}\xc2\xa1"})) }
  153. assert_raise(SyntaxError) { eval(e(%{"\\u{6666}\xc2\xa1"})) }
  154. assert_raise(SyntaxError) { eval(s(%{"\\u{6666}\xc2\xa1"})) }
  155. assert_nothing_raised { eval(u(%{"\\u{6666}\xc2\xa1"})) }
  156. end
  157. def test_string_inspect_invalid
  158. assert_equal('"\xFE"', e("\xfe").inspect)
  159. assert_equal('"\x8E"', e("\x8e").inspect)
  160. assert_equal('"\x8F"', e("\x8f").inspect)
  161. assert_equal('"\x8F\xA1"', e("\x8f\xa1").inspect)
  162. assert_equal('"\xEF"', s("\xef").inspect)
  163. assert_equal('"\xC2"', u("\xc2").inspect)
  164. assert_equal('"\xE0\x80"', u("\xe0\x80").inspect)
  165. assert_equal('"\xF0\x80\x80"', u("\xf0\x80\x80").inspect)
  166. assert_equal('"\xF8\x80\x80\x80"', u("\xf8\x80\x80\x80").inspect)
  167. assert_equal('"\xFC\x80\x80\x80\x80"', u("\xfc\x80\x80\x80\x80").inspect)
  168. assert_equal('"\xFE "', e("\xfe ").inspect)
  169. assert_equal('"\x8E "', e("\x8e ").inspect)
  170. assert_equal('"\x8F "', e("\x8f ").inspect)
  171. assert_equal('"\x8F\xA1 "', e("\x8f\xa1 ").inspect)
  172. assert_equal('"\xEF "', s("\xef ").inspect)
  173. assert_equal('"\xC2 "', u("\xc2 ").inspect)
  174. assert_equal('"\xE0\x80 "', u("\xe0\x80 ").inspect)
  175. assert_equal('"\xF0\x80\x80 "', u("\xf0\x80\x80 ").inspect)
  176. assert_equal('"\xF8\x80\x80\x80 "', u("\xf8\x80\x80\x80 ").inspect)
  177. assert_equal('"\xFC\x80\x80\x80\x80 "', u("\xfc\x80\x80\x80\x80 ").inspect)
  178. assert_equal('"\x81."', s("\x81.").inspect)
  179. assert_equal('"\xFC"', u("\xfc").inspect)
  180. end
  181. def test_string_inspect_encoding
  182. orig_int = Encoding.default_internal
  183. orig_ext = Encoding.default_external
  184. Encoding.default_internal = nil
  185. [Encoding::UTF_8, Encoding::EUC_JP, Encoding::Windows_31J, Encoding::GB18030].
  186. each do |e|
  187. Encoding.default_external = e
  188. str = "\x81\x30\x81\x30".force_encoding('GB18030')
  189. assert_equal(Encoding::GB18030 == e ? %{"#{str}"} : '"\x{81308130}"', str.inspect)
  190. str = e("\xa1\x8f\xa1\xa1")
  191. expected = "\"\\xA1\x8F\xA1\xA1\"".force_encoding("EUC-JP")
  192. assert_equal(Encoding::EUC_JP == e ? expected : "\"\\xA1\\x{8FA1A1}\"", str.inspect)
  193. str = s("\x81@")
  194. assert_equal(Encoding::Windows_31J == e ? %{"#{str}"} : '"\x{8140}"', str.inspect)
  195. str = "\u3042\u{10FFFD}"
  196. assert_equal(Encoding::UTF_8 == e ? %{"#{str}"} : '"\u3042\u{10FFFD}"', str.inspect)
  197. end
  198. Encoding.default_external = Encoding::UTF_8
  199. [Encoding::UTF_16BE, Encoding::UTF_16LE, Encoding::UTF_32BE, Encoding::UTF_32LE,
  200. Encoding::UTF8_SOFTBANK].each do |e|
  201. str = "abc".encode(e)
  202. assert_equal('"abc"', str.inspect)
  203. end
  204. ensure
  205. Encoding.default_internal = orig_int
  206. Encoding.default_external = orig_ext
  207. end
  208. def test_str_dump
  209. [
  210. e("\xfe"),
  211. e("\x8e"),
  212. e("\x8f"),
  213. e("\x8f\xa1"),
  214. s("\xef"),
  215. u("\xc2"),
  216. u("\xe0\x80"),
  217. u("\xf0\x80\x80"),
  218. u("\xf8\x80\x80\x80"),
  219. u("\xfc\x80\x80\x80\x80"),
  220. e("\xfe "),
  221. e("\x8e "),
  222. e("\x8f "),
  223. e("\x8f\xa1 "),
  224. s("\xef "),
  225. u("\xc2 "),
  226. u("\xe0\x80 "),
  227. u("\xf0\x80\x80 "),
  228. u("\xf8\x80\x80\x80 "),
  229. u("\xfc\x80\x80\x80\x80 "),
  230. e("\xa1\x8f\xa1\xa1"),
  231. s("\x81."),
  232. s("\x81@"),
  233. u("\xfc"),
  234. "\u3042",
  235. "ascii",
  236. "\u3042".encode("UTF-16LE"),
  237. "\u3042".encode("UTF-16BE"),
  238. ].each do |str|
  239. assert_equal(str, eval(str.dump), "[ruby-dev:33142]")
  240. end
  241. end
  242. def test_validate_redundant_utf8
  243. bits_0x10ffff = "11110100 10001111 10111111 10111111"
  244. [
  245. "0xxxxxxx",
  246. "110XXXXx 10xxxxxx",
  247. "1110XXXX 10Xxxxxx 10xxxxxx",
  248. "11110XXX 10XXxxxx 10xxxxxx 10xxxxxx",
  249. "111110XX 10XXXxxx 10xxxxxx 10xxxxxx 10xxxxxx",
  250. "1111110X 10XXXXxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx",
  251. "11111110 10XXXXXx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx",
  252. "11111111 10XXXXXX 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx",
  253. ].each {|pat0|
  254. [
  255. pat0.gsub(/x/, '1'),
  256. pat0.gsub(/x/, '0')
  257. ].each {|pat1|
  258. [
  259. pat1.sub(/X([^X]*)\z/, '1\1').gsub(/X/, "0"),
  260. pat1.gsub(/X/, "1"),
  261. ].each {|pat2|
  262. s = [pat2.gsub(/ /, "")].pack("B*").force_encoding("utf-8")
  263. if pat2 <= bits_0x10ffff
  264. assert(s.valid_encoding?, "#{pat2}")
  265. else
  266. assert(!s.valid_encoding?, "#{pat2}")
  267. end
  268. }
  269. if / / =~ pat0
  270. pat3 = pat1.gsub(/X/, "0")
  271. s = [pat3.gsub(/ /, "")].pack("B*").force_encoding("utf-8")
  272. assert(!s.valid_encoding?, "#{pat3}")
  273. end
  274. }
  275. }
  276. end
  277. def test_validate_surrogate
  278. # 1110XXXX 10Xxxxxx 10xxxxxx : 3 bytes UTF-8
  279. pats = [
  280. "11101101 10011111 10111111", # just before surrogate high
  281. "11101101 1010xxxx 10xxxxxx", # surrogate high
  282. "11101101 1011xxxx 10xxxxxx", # surrogate low
  283. "11101110 10000000 10000000", # just after surrogate low
  284. ]
  285. pats.values_at(1,2).each {|pat0|
  286. [
  287. pat0.gsub(/x/, '0'),
  288. pat0.gsub(/x/, '1'),
  289. ].each {|pat1|
  290. s = [pat1.gsub(/ /, "")].pack("B*").force_encoding("utf-8")
  291. assert(!s.valid_encoding?, "#{pat1}")
  292. }
  293. }
  294. pats.values_at(0,3).each {|pat|
  295. s = [pat.gsub(/ /, "")].pack("B*").force_encoding("utf-8")
  296. assert(s.valid_encoding?, "#{pat}")
  297. }
  298. end
  299. def test_regexp_too_short_multibyte_character
  300. assert_raise(SyntaxError) { eval('/\xfe/e') }
  301. assert_raise(SyntaxError) { eval('/\x8e/e') }
  302. assert_raise(SyntaxError) { eval('/\x8f/e') }
  303. assert_raise(SyntaxError) { eval('/\x8f\xa1/e') }
  304. assert_raise(SyntaxError) { eval('/\xef/s') }
  305. assert_raise(SyntaxError) { eval('/\xc2/u') }
  306. assert_raise(SyntaxError) { eval('/\xe0\x80/u') }
  307. assert_raise(SyntaxError) { eval('/\xf0\x80\x80/u') }
  308. assert_raise(SyntaxError) { eval('/\xf8\x80\x80\x80/u') }
  309. assert_raise(SyntaxError) { eval('/\xfc\x80\x80\x80\x80/u') }
  310. # raw 8bit
  311. assert_raise(SyntaxError) { eval("/\xfe/e") }
  312. assert_raise(SyntaxError) { eval("/\xc2/u") }
  313. # invalid suffix
  314. assert_raise(SyntaxError) { eval('/\xc2\xff/u') }
  315. assert_raise(SyntaxError) { eval('/\xc2 /u') }
  316. assert_raise(SyntaxError) { eval('/\xc2\x20/u') }
  317. end
  318. def test_regexp_generic
  319. assert_regexp_generic_ascii(/a/)
  320. assert_regexp_generic_ascii(Regexp.new(a("a")))
  321. assert_regexp_generic_ascii(Regexp.new(e("a")))
  322. assert_regexp_generic_ascii(Regexp.new(s("a")))
  323. assert_regexp_generic_ascii(Regexp.new(u("a")))
  324. [/a/, Regexp.new(a("a"))].each {|r|
  325. assert_equal(0, r =~ a("a"))
  326. assert_equal(0, r =~ e("a"))
  327. assert_equal(0, r =~ s("a"))
  328. assert_equal(0, r =~ u("a"))
  329. assert_equal(nil, r =~ a("\xc2\xa1"))
  330. assert_equal(nil, r =~ e("\xc2\xa1"))
  331. assert_equal(nil, r =~ s("\xc2\xa1"))
  332. assert_equal(nil, r =~ u("\xc2\xa1"))
  333. }
  334. end
  335. def test_regexp_ascii_none
  336. r = /a/n
  337. assert_warning(%r{regexp match /.../n against to}) {
  338. assert_regexp_generic_ascii(r)
  339. }
  340. assert_equal(0, r =~ a("a"))
  341. assert_equal(0, r =~ e("a"))
  342. assert_equal(0, r =~ s("a"))
  343. assert_equal(0, r =~ u("a"))
  344. assert_equal(nil, r =~ a("\xc2\xa1"))
  345. assert_warning(%r{regexp match /.../n against to EUC-JP string}) {
  346. assert_equal(nil, r =~ e("\xc2\xa1"))
  347. }
  348. assert_warning(%r{regexp match /.../n against to Windows-31J string}) {
  349. assert_equal(nil, r =~ s("\xc2\xa1"))
  350. }
  351. assert_warning(%r{regexp match /.../n against to UTF-8 string}) {
  352. assert_equal(nil, r =~ u("\xc2\xa1"))
  353. }
  354. assert_nothing_raised { eval(e("/\\x80/n")) }
  355. end
  356. def test_regexp_ascii
  357. assert_regexp_fixed_ascii8bit(/\xc2\xa1/n)
  358. assert_regexp_fixed_ascii8bit(eval(a(%{/\xc2\xa1/})))
  359. assert_regexp_fixed_ascii8bit(eval(a(%{/\xc2\xa1/n})))
  360. assert_regexp_fixed_ascii8bit(eval(a(%q{/\xc2\xa1/})))
  361. assert_raise(SyntaxError) { eval("/\xa1\xa1/n".force_encoding("euc-jp")) }
  362. [/\xc2\xa1/n, eval(a(%{/\xc2\xa1/})), eval(a(%{/\xc2\xa1/n}))].each {|r|
  363. assert_equal(nil, r =~ a("a"))
  364. assert_equal(nil, r =~ e("a"))
  365. assert_equal(nil, r =~ s("a"))
  366. assert_equal(nil, r =~ u("a"))
  367. assert_equal(0, r =~ a("\xc2\xa1"))
  368. assert_raise(Encoding::CompatibilityError) { r =~ e("\xc2\xa1") }
  369. assert_raise(Encoding::CompatibilityError) { r =~ s("\xc2\xa1") }
  370. assert_raise(Encoding::CompatibilityError) { r =~ u("\xc2\xa1") }
  371. }
  372. end
  373. def test_regexp_euc
  374. assert_regexp_fixed_eucjp(/a/e)
  375. assert_regexp_fixed_eucjp(/\xc2\xa1/e)
  376. assert_regexp_fixed_eucjp(eval(e(%{/\xc2\xa1/})))
  377. assert_regexp_fixed_eucjp(eval(e(%q{/\xc2\xa1/})))
  378. [/a/e].each {|r|
  379. assert_equal(0, r =~ a("a"))
  380. assert_equal(0, r =~ e("a"))
  381. assert_equal(0, r =~ s("a"))
  382. assert_equal(0, r =~ u("a"))
  383. assert_raise(Encoding::CompatibilityError) { r =~ a("\xc2\xa1") }
  384. assert_equal(nil, r =~ e("\xc2\xa1"))
  385. assert_raise(Encoding::CompatibilityError) { r =~ s("\xc2\xa1") }
  386. assert_raise(Encoding::CompatibilityError) { r =~ u("\xc2\xa1") }
  387. }
  388. [/\xc2\xa1/e, eval(e(%{/\xc2\xa1/})), eval(e(%q{/\xc2\xa1/}))].each {|r|
  389. assert_equal(nil, r =~ a("a"))
  390. assert_equal(nil, r =~ e("a"))
  391. assert_equal(nil, r =~ s("a"))
  392. assert_equal(nil, r =~ u("a"))
  393. assert_raise(Encoding::CompatibilityError) { r =~ a("\xc2\xa1") }
  394. assert_equal(0, r =~ e("\xc2\xa1"))
  395. assert_raise(Encoding::CompatibilityError) { r =~ s("\xc2\xa1") }
  396. assert_raise(Encoding::CompatibilityError) { r =~ u("\xc2\xa1") }
  397. }
  398. end
  399. def test_regexp_sjis
  400. assert_regexp_fixed_sjis(/a/s)
  401. assert_regexp_fixed_sjis(/\xc2\xa1/s)
  402. assert_regexp_fixed_sjis(eval(s(%{/\xc2\xa1/})))
  403. assert_regexp_fixed_sjis(eval(s(%q{/\xc2\xa1/})))
  404. end
  405. def test_regexp_windows_31j
  406. begin
  407. Regexp.new("\xa1".force_encoding("windows-31j")) =~ "\xa1\xa1".force_encoding("euc-jp")
  408. rescue Encoding::CompatibilityError
  409. err = $!
  410. end
  411. assert_match(/windows-31j/i, err.message)
  412. end
  413. def test_regexp_embed
  414. r = eval(e("/\xc2\xa1/"))
  415. assert_raise(RegexpError) { eval(s("/\xc2\xa1\#{r}/s")) }
  416. assert_raise(RegexpError) { eval(s("/\#{r}\xc2\xa1/s")) }
  417. r = /\xc2\xa1/e
  418. assert_raise(RegexpError) { eval(s("/\xc2\xa1\#{r}/s")) }
  419. assert_raise(RegexpError) { eval(s("/\#{r}\xc2\xa1/s")) }
  420. r = eval(e("/\xc2\xa1/"))
  421. assert_raise(RegexpError) { /\xc2\xa1#{r}/s }
  422. r = /\xc2\xa1/e
  423. assert_raise(RegexpError) { /\xc2\xa1#{r}/s }
  424. r1 = Regexp.new('foo'.force_encoding("ascii-8bit"))
  425. r2 = eval('/bar#{r1}/'.force_encoding('ascii-8bit'))
  426. assert_equal(Encoding::US_ASCII, r2.encoding)
  427. r1 = Regexp.new('foo'.force_encoding("us-ascii"))
  428. r2 = eval('/bar#{r1}/'.force_encoding('ascii-8bit'))
  429. assert_equal(Encoding::US_ASCII, r2.encoding)
  430. r1 = Regexp.new('foo'.force_encoding("ascii-8bit"))
  431. r2 = eval('/bar#{r1}/'.force_encoding('us-ascii'))
  432. assert_equal(Encoding::US_ASCII, r2.encoding)
  433. r1 = Regexp.new('foo'.force_encoding("us-ascii"))
  434. r2 = eval('/bar#{r1}/'.force_encoding('us-ascii'))
  435. assert_equal(Encoding::US_ASCII, r2.encoding)
  436. r1 = Regexp.new('\xa1'.force_encoding("ascii-8bit"))
  437. r2 = eval('/bar#{r1}/'.force_encoding('ascii-8bit'))
  438. assert_equal(Encoding::ASCII_8BIT, r2.encoding)
  439. r1 = Regexp.new('\xa1'.force_encoding("ascii-8bit"))
  440. r2 = eval('/bar#{r1}/'.force_encoding('us-ascii'))
  441. assert_equal(Encoding::ASCII_8BIT, r2.encoding)
  442. r1 = Regexp.new('foo'.force_encoding("ascii-8bit"))
  443. r2 = eval('/\xa1#{r1}/'.force_encoding('ascii-8bit'))
  444. assert_equal(Encoding::ASCII_8BIT, r2.encoding)
  445. r1 = Regexp.new('foo'.force_encoding("us-ascii"))
  446. r2 = eval('/\xa1#{r1}/'.force_encoding('ascii-8bit'))
  447. assert_equal(Encoding::ASCII_8BIT, r2.encoding)
  448. r1 = Regexp.new('\xa1'.force_encoding("ascii-8bit"))
  449. r2 = eval('/\xa1#{r1}/'.force_encoding('ascii-8bit'))
  450. assert_equal(Encoding::ASCII_8BIT, r2.encoding)
  451. end
  452. def test_regexp_named_class
  453. assert_match(/[[:space:]]/u, "\u{00a0}")
  454. assert_match(/[[:space:]]/, "\u{00a0}")
  455. end
  456. def test_regexp_property
  457. s = '\p{Hiragana}'.force_encoding("euc-jp")
  458. assert_equal(Encoding::EUC_JP, s.encoding)
  459. r = nil
  460. assert_nothing_raised {
  461. r = Regexp.new(s)
  462. }
  463. assert(r.fixed_encoding?)
  464. assert_match(r, "\xa4\xa2".force_encoding("euc-jp"))
  465. r = eval('/\p{Hiragana}/'.force_encoding("euc-jp"))
  466. assert(r.fixed_encoding?)
  467. assert_match(r, "\xa4\xa2".force_encoding("euc-jp"))
  468. r = /\p{Hiragana}/e
  469. assert(r.fixed_encoding?)
  470. assert_match(r, "\xa4\xa2".force_encoding("euc-jp"))
  471. r = eval('/\u{3042}\p{Hiragana}/'.force_encoding("euc-jp"))
  472. assert(r.fixed_encoding?)
  473. assert_equal(Encoding::UTF_8, r.encoding)
  474. r = eval('/\p{Hiragana}\u{3042}/'.force_encoding("euc-jp"))
  475. assert(r.fixed_encoding?)
  476. assert_equal(Encoding::UTF_8, r.encoding)
  477. end
  478. def test_regexp_embed_preprocess
  479. r1 = /\xa4\xa2/e
  480. r2 = /#{r1}/
  481. assert(r2.source.include?(r1.source))
  482. end
  483. def test_begin_end_offset
  484. str = e("\244\242\244\244\244\246\244\250\244\252a")
  485. assert(/(a)/ =~ str)
  486. assert_equal("a", $&)
  487. assert_equal(5, $~.begin(0))
  488. assert_equal(6, $~.end(0))
  489. assert_equal([5,6], $~.offset(0))
  490. assert_equal(5, $~.begin(1))
  491. assert_equal(6, $~.end(1))
  492. assert_equal([5,6], $~.offset(1))
  493. end
  494. def test_begin_end_offset_sjis
  495. str = s("\x81@@")
  496. assert(/@/ =~ str)
  497. assert_equal(s("\x81@"), $`)
  498. assert_equal("@", $&)
  499. assert_equal("", $')
  500. assert_equal([1,2], $~.offset(0))
  501. end
  502. def test_quote
  503. assert_regexp_generic_ascii(/#{Regexp.quote(a("a"))}#{Regexp.quote(e("e"))}/)
  504. assert_encoding("US-ASCII", Regexp.quote(a("")).encoding)
  505. assert_encoding("US-ASCII", Regexp.quote(e("")).encoding)
  506. assert_encoding("US-ASCII", Regexp.quote(s("")).encoding)
  507. assert_encoding("US-ASCII", Regexp.quote(u("")).encoding)
  508. assert_encoding("US-ASCII", Regexp.quote(a("a")).encoding)
  509. assert_encoding("US-ASCII", Regexp.quote(e("a")).encoding)
  510. assert_encoding("US-ASCII", Regexp.quote(s("a")).encoding)
  511. assert_encoding("US-ASCII", Regexp.quote(u("a")).encoding)
  512. assert_encoding("ASCII-8BIT", Regexp.quote(a("\xc2\xa1")).encoding)
  513. assert_encoding("EUC-JP", Regexp.quote(e("\xc2\xa1")).encoding)
  514. assert_encoding("Windows-31J", Regexp.quote(s("\xc2\xa1")).encoding)
  515. assert_encoding("UTF-8", Regexp.quote(u("\xc2\xa1")).encoding)
  516. end
  517. def test_union_0
  518. r = Regexp.union
  519. assert_regexp_generic_ascii(r)
  520. assert(r !~ a(""))
  521. assert(r !~ e(""))
  522. assert(r !~ s(""))
  523. assert(r !~ u(""))
  524. end
  525. def test_union_1_asciionly_string
  526. assert_regexp_generic_ascii(Regexp.union(a("")))
  527. assert_regexp_generic_ascii(Regexp.union(e("")))
  528. assert_regexp_generic_ascii(Regexp.union(s("")))
  529. assert_regexp_generic_ascii(Regexp.union(u("")))
  530. assert_regexp_generic_ascii(Regexp.union(a("a")))
  531. assert_regexp_generic_ascii(Regexp.union(e("a")))
  532. assert_regexp_generic_ascii(Regexp.union(s("a")))
  533. assert_regexp_generic_ascii(Regexp.union(u("a")))
  534. assert_regexp_generic_ascii(Regexp.union(a("\t")))
  535. assert_regexp_generic_ascii(Regexp.union(e("\t")))
  536. assert_regexp_generic_ascii(Regexp.union(s("\t")))
  537. assert_regexp_generic_ascii(Regexp.union(u("\t")))
  538. end
  539. def test_union_1_nonascii_string
  540. assert_regexp_fixed_ascii8bit(Regexp.union(a("\xc2\xa1")))
  541. assert_regexp_fixed_eucjp(Regexp.union(e("\xc2\xa1")))
  542. assert_regexp_fixed_sjis(Regexp.union(s("\xc2\xa1")))
  543. assert_regexp_fixed_utf8(Regexp.union(u("\xc2\xa1")))
  544. end
  545. def test_union_1_regexp
  546. assert_regexp_generic_ascii(Regexp.union(//))
  547. assert_warning(%r{regexp match /.../n against to}) {
  548. assert_regexp_generic_ascii(Regexp.union(//n))
  549. }
  550. assert_regexp_fixed_eucjp(Regexp.union(//e))
  551. assert_regexp_fixed_sjis(Regexp.union(//s))
  552. assert_regexp_fixed_utf8(Regexp.union(//u))
  553. end
  554. def test_union_2
  555. ary = [
  556. a(""), e(""), s(""), u(""),
  557. a("\xc2\xa1"), e("\xc2\xa1"), s("\xc2\xa1"), u("\xc2\xa1")
  558. ]
  559. ary.each {|s1|
  560. ary.each {|s2|
  561. if s1.empty?
  562. if s2.empty?
  563. assert_regexp_generic_ascii(Regexp.union(s1, s2))
  564. else
  565. r = Regexp.union(s1, s2)
  566. assert_regexp_fixed_encoding(r)
  567. assert_equal(s2.encoding, r.encoding)
  568. end
  569. else
  570. if s2.empty?
  571. r = Regexp.union(s1, s2)
  572. assert_regexp_fixed_encoding(r)
  573. assert_equal(s1.encoding, r.encoding)
  574. else
  575. if s1.encoding == s2.encoding
  576. r = Regexp.union(s1, s2)
  577. assert_regexp_fixed_encoding(r)
  578. assert_equal(s1.encoding, r.encoding)
  579. else
  580. assert_raise(ArgumentError) { Regexp.union(s1, s2) }
  581. end
  582. end
  583. end
  584. }
  585. }
  586. end
  587. def test_dynamic_ascii_regexp
  588. assert_warning(%r{regexp match /.../n against to}) {
  589. assert_regexp_generic_ascii(/#{ }/n)
  590. }
  591. assert_regexp_fixed_ascii8bit(/#{ }\xc2\xa1/n)
  592. assert_regexp_fixed_ascii8bit(/\xc2\xa1#{ }/n)
  593. assert_nothing_raised { s1, s2 = a('\xc2'), a('\xa1'); /#{s1}#{s2}/ }
  594. end
  595. def test_dynamic_eucjp_regexp
  596. assert_regexp_fixed_eucjp(/#{ }/e)
  597. assert_regexp_fixed_eucjp(/#{ }\xc2\xa1/e)
  598. assert_regexp_fixed_eucjp(/\xc2\xa1#{ }/e)
  599. assert_raise(SyntaxError) { eval('/\xc2#{ }/e') }
  600. assert_raise(SyntaxError) { eval('/#{ }\xc2/e') }
  601. assert_raise(SyntaxError) { eval('/\xc2#{ }\xa1/e') }
  602. assert_raise(ArgumentError) { s1, s2 = e('\xc2'), e('\xa1'); /#{s1}#{s2}/ }
  603. end
  604. def test_dynamic_sjis_regexp
  605. assert_regexp_fixed_sjis(/#{ }/s)
  606. assert_regexp_fixed_sjis(/#{ }\xc2\xa1/s)
  607. assert_regexp_fixed_sjis(/\xc2\xa1#{ }/s)
  608. assert_raise(SyntaxError) { eval('/\x81#{ }/s') }
  609. assert_raise(SyntaxError) { eval('/#{ }\x81/s') }
  610. assert_raise(SyntaxError) { eval('/\x81#{ }\xa1/s') }
  611. assert_raise(ArgumentError) { s1, s2 = s('\x81'), s('\xa1'); /#{s1}#{s2}/ }
  612. end
  613. def test_dynamic_utf8_regexp
  614. assert_regexp_fixed_utf8(/#{ }/u)
  615. assert_regexp_fixed_utf8(/#{ }\xc2\xa1/u)
  616. assert_regexp_fixed_utf8(/\xc2\xa1#{ }/u)
  617. assert_raise(SyntaxError) { eval('/\xc2#{ }/u') }
  618. assert_raise(SyntaxError) { eval('/#{ }\xc2/u') }
  619. assert_raise(SyntaxError) { eval('/\xc2#{ }\xa1/u') }
  620. assert_raise(ArgumentError) { s1, s2 = u('\xc2'), u('\xa1'); /#{s1}#{s2}/ }
  621. end
  622. def test_regexp_unicode
  623. assert_nothing_raised { eval '/\u{0}/u' }
  624. assert_nothing_raised { eval '/\u{D7FF}/u' }
  625. assert_raise(SyntaxError) { eval '/\u{D800}/u' }
  626. assert_raise(SyntaxError) { eval '/\u{DFFF}/u' }
  627. assert_nothing_raised { eval '/\u{E000}/u' }
  628. assert_nothing_raised { eval '/\u{10FFFF}/u' }
  629. assert_raise(SyntaxError) { eval '/\u{110000}/u' }
  630. end
  631. def test_regexp_mixed_unicode
  632. assert_raise(SyntaxError) { eval(a(%{/\xc2\xa1\\u{6666}/})) }
  633. assert_raise(SyntaxError) { eval(e(%{/\xc2\xa1\\u{6666}/})) }
  634. assert_raise(SyntaxError) { eval(s(%{/\xc2\xa1\\u{6666}/})) }
  635. assert_nothing_raised { eval(u(%{/\xc2\xa1\\u{6666}/})) }
  636. assert_raise(SyntaxError) { eval(a(%{/\\u{6666}\xc2\xa1/})) }
  637. assert_raise(SyntaxError) { eval(e(%{/\\u{6666}\xc2\xa1/})) }
  638. assert_raise(SyntaxError) { eval(s(%{/\\u{6666}\xc2\xa1/})) }
  639. assert_nothing_raised { eval(u(%{/\\u{6666}\xc2\xa1/})) }
  640. assert_raise(SyntaxError) { eval(a(%{/\\xc2\\xa1\\u{6666}/})) }
  641. assert_raise(SyntaxError) { eval(e(%{/\\xc2\\xa1\\u{6666}/})) }
  642. assert_raise(SyntaxError) { eval(s(%{/\\xc2\\xa1\\u{6666}/})) }
  643. assert_nothing_raised { eval(u(%{/\\xc2\\xa1\\u{6666}/})) }
  644. assert_raise(SyntaxError) { eval(a(%{/\\u{6666}\\xc2\\xa1/})) }
  645. assert_raise(SyntaxError) { eval(e(%{/\\u{6666}\\xc2\\xa1/})) }
  646. assert_raise(SyntaxError) { eval(s(%{/\\u{6666}\\xc2\\xa1/})) }
  647. assert_nothing_raised { eval(u(%{/\\u{6666}\\xc2\\xa1/})) }
  648. assert_raise(SyntaxError) { eval(a(%{/\xc2\xa1#{ }\\u{6666}/})) }
  649. assert_raise(SyntaxError) { eval(e(%{/\xc2\xa1#{ }\\u{6666}/})) }
  650. assert_raise(SyntaxError) { eval(s(%{/\xc2\xa1#{ }\\u{6666}/})) }
  651. assert_nothing_raised { eval(u(%{/\xc2\xa1#{ }\\u{6666}/})) }
  652. assert_raise(SyntaxError) { eval(a(%{/\\u{6666}#{ }\xc2\xa1/})) }
  653. assert_raise(SyntaxError) { eval(e(%{/\\u{6666}#{ }\xc2\xa1/})) }
  654. assert_raise(SyntaxError) { eval(s(%{/\\u{6666}#{ }\xc2\xa1/})) }
  655. assert_nothing_raised { eval(u(%{/\\u{6666}#{ }\xc2\xa1/})) }
  656. assert_raise(SyntaxError) { eval(a(%{/\\xc2\\xa1#{ }\\u{6666}/})) }
  657. assert_raise(SyntaxError) { eval(e(%{/\\xc2\\xa1#{ }\\u{6666}/})) }
  658. assert_raise(SyntaxError) { eval(s(%{/\\xc2\\xa1#{ }\\u{6666}/})) }
  659. assert_nothing_raised { eval(u(%{/\\xc2\\xa1#{ }\\u{6666}/})) }
  660. assert_raise(SyntaxError) { eval(a(%{/\\u{6666}#{ }\\xc2\\xa1/})) }
  661. assert_raise(SyntaxError) { eval(e(%{/\\u{6666}#{ }\\xc2\\xa1/})) }
  662. assert_raise(SyntaxError) { eval(s(%{/\\u{6666}#{ }\\xc2\\xa1/})) }
  663. assert_nothing_raised { eval(u(%{/\\u{6666}#{ }\\xc2\\xa1/})) }
  664. end
  665. def test_str_allocate
  666. s = String.allocate
  667. assert_equal(Encoding::ASCII_8BIT, s.encoding)
  668. end
  669. def test_str_String
  670. s = String(10)
  671. assert_equal(Encoding::US_ASCII, s.encoding)
  672. end
  673. def test_sprintf_c
  674. assert_strenc("\x80", 'ASCII-8BIT', a("%c") % 128)
  675. #assert_raise(ArgumentError) { a("%c") % 0xc2a1 }
  676. assert_strenc("\xc2\xa1", 'EUC-JP', e("%c") % 0xc2a1)
  677. assert_raise(ArgumentError) { e("%c") % 0xc2 }
  678. assert_strenc("\xc2", 'Windows-31J', s("%c") % 0xc2)
  679. #assert_raise(ArgumentError) { s("%c") % 0xc2a1 }
  680. assert_strenc("\u{c2a1}", 'UTF-8', u("%c") % 0xc2a1)
  681. assert_strenc("\u{c2}", 'UTF-8', u("%c") % 0xc2)
  682. assert_raise(Encoding::CompatibilityError) {
  683. "%s%s" % [s("\xc2\xa1"), e("\xc2\xa1")]
  684. }
  685. end
  686. def test_sprintf_p
  687. enc = "".inspect.encoding
  688. asc = Encoding::US_ASCII
  689. Encoding.list.each do |e|
  690. format = "%p".force_encoding(e)
  691. ['', 'a', "\xC2\xA1", "\x00"].each do |s|
  692. s.force_encoding(e)
  693. assert_strenc(s.inspect, e.ascii_compatible? && enc == asc ? e : enc, format % s)
  694. end
  695. s = "\xC2\xA1".force_encoding(e)
  696. assert_strenc('%10s' % s.inspect, enc, "%10p" % s)
  697. end
  698. end
  699. def test_sprintf_s
  700. assert_strenc('', 'ASCII-8BIT', a("%s") % a(""))
  701. assert_strenc('', 'EUC-JP', e("%s") % e(""))
  702. assert_strenc('', 'Windows-31J', s("%s") % s(""))
  703. assert_strenc('', 'UTF-8', u("%s") % u(""))
  704. assert_strenc('a', 'ASCII-8BIT', a("%s") % a("a"))
  705. assert_strenc('a', 'EUC-JP', e("%s") % e("a"))
  706. assert_strenc('a', 'Windows-31J', s("%s") % s("a"))
  707. assert_strenc('a', 'UTF-8', u("%s") % u("a"))
  708. assert_strenc("\xC2\xA1", 'ASCII-8BIT', a("%s") % a("\xc2\xa1"))
  709. assert_strenc("\xC2\xA1", 'EUC-JP', e("%s") % e("\xc2\xa1"))
  710. #assert_strenc("\xC2\xA1", 'Windows-31J', s("%s") % s("\xc2\xa1"))
  711. assert_strenc("\xC2\xA1", 'UTF-8', u("%s") % u("\xc2\xa1"))
  712. assert_strenc(" \xC2\xA1", 'ASCII-8BIT', "%10s" % a("\xc2\xa1"))
  713. assert_strenc(" \xA1\xA1", 'EUC-JP', "%10s" % e("\xa1\xa1"))
  714. #assert_strenc(" \xC2\xA1", 'Windows-31J', "%10s" % s("\xc2\xa1"))
  715. assert_strenc(" \xC2\xA1", 'UTF-8', "%10s" % u("\xc2\xa1"))
  716. assert_strenc("\x00", 'ASCII-8BIT', a("%s") % a("\x00"))
  717. assert_strenc("\x00", 'EUC-JP', e("%s") % e("\x00"))
  718. assert_strenc("\x00", 'Windows-31J', s("%s") % s("\x00"))
  719. assert_strenc("\x00", 'UTF-8', u("%s") % u("\x00"))
  720. assert_equal("EUC-JP", (e("\xc2\xa1 %s") % "foo").encoding.name)
  721. end
  722. def test_str_lt
  723. assert(a("a") < a("\xa1"))
  724. assert(a("a") < s("\xa1"))
  725. assert(s("a") < a("\xa1"))
  726. end
  727. def test_str_multiply
  728. str = "\u3042"
  729. assert_equal(true, (str * 0).ascii_only?, "[ruby-dev:33895]")
  730. assert_equal(false, (str * 1).ascii_only?)
  731. assert_equal(false, (str * 2).ascii_only?)
  732. end
  733. def test_str_aref
  734. assert_equal(a("\xc2"), a("\xc2\xa1")[0])
  735. assert_equal(a("\xa1"), a("\xc2\xa1")[1])
  736. assert_equal(nil, a("\xc2\xa1")[2])
  737. assert_equal(e("\xc2\xa1"), e("\xc2\xa1")[0])
  738. assert_equal(nil, e("\xc2\xa1")[1])
  739. assert_equal(s("\xc2"), s("\xc2\xa1")[0])
  740. assert_equal(s("\xa1"), s("\xc2\xa1")[1])
  741. assert_equal(nil, s("\xc2\xa1")[2])
  742. assert_equal(u("\xc2\xa1"), u("\xc2\xa1")[0])
  743. assert_equal(nil, u("\xc2\xa1")[1])
  744. str = "\u3042"
  745. assert_equal(true, str[0, 0].ascii_only?, "[ruby-dev:33895]")
  746. assert_equal(false, str[0, 1].ascii_only?)
  747. assert_equal(false, str[0..-1].ascii_only?)
  748. end
  749. def test_utf8str_aref
  750. s = "abcdefghijklmnopqrstuvwxyz\u{3042 3044 3046 3048 304A}"
  751. assert_equal("a", s[0])
  752. assert_equal("h", s[7])
  753. assert_equal("i", s[8])
  754. assert_equal("j", s[9])
  755. assert_equal("\u{3044}", s[27])
  756. assert_equal("\u{3046}", s[28])
  757. assert_equal("\u{3048}", s[29])
  758. s = "abcdefghijklmnopqrstuvw\u{3042 3044 3046 3048 304A}"
  759. assert_equal("\u{3044}", s[24])
  760. end
  761. def test_str_aref_len
  762. assert_equal(a("\xa1"), a("\xc2\xa1\xc2\xa2\xc2\xa3")[1, 1])
  763. assert_equal(a("\xa1\xc2"), a("\xc2\xa1\xc2\xa2\xc2\xa3")[1, 2])
  764. assert_equal(e("\xc2\xa2"), e("\xc2\xa1\xc2\xa2\xc2\xa3")[1, 1])
  765. assert_equal(e("\xc2\xa2\xc2\xa3"), e("\xc2\xa1\xc2\xa2\xc2\xa3")[1, 2])
  766. assert_equal(s("\xa1"), s("\xc2\xa1\xc2\xa2\xc2\xa3")[1, 1])
  767. assert_equal(s("\xa1\xc2"), s("\xc2\xa1\xc2\xa2\xc2\xa3")[1, 2])
  768. assert_equal(u("\xc2\xa2"), u("\xc2\xa1\xc2\xa2\xc2\xa3")[1, 1])
  769. assert_equal(u("\xc2\xa2\xc2\xa3"), u("\xc2\xa1\xc2\xa2\xc2\xa3")[1, 2])
  770. end
  771. def test_str_aref_substr
  772. assert_equal(a("\xa1\xc2"), a("\xc2\xa1\xc2\xa2\xc2\xa3")[a("\xa1\xc2")])
  773. assert_raise(Encoding::CompatibilityError) { a("\xc2\xa1\xc2\xa2\xc2\xa3")[e("\xa1\xc2")] }
  774. assert_equal(nil, e("\xc2\xa1\xc2\xa2\xc2\xa3")[e("\xa1\xc2")])
  775. assert_raise(Encoding::CompatibilityError) { e("\xc2\xa1\xc2\xa2\xc2\xa3")[s("\xa1\xc2")] }
  776. assert_equal(s("\xa1\xc2"), s("\xc2\xa1\xc2\xa2\xc2\xa3")[s("\xa1\xc2")])
  777. assert_raise(Encoding::CompatibilityError) { s("\xc2\xa1\xc2\xa2\xc2\xa3")[u("\xa1\xc2")] }
  778. assert_equal(nil, u("\xc2\xa1\xc2\xa2\xc2\xa3")[u("\xa1\xc2")])
  779. assert_raise(Encoding::CompatibilityError) { u("\xc2\xa1\xc2\xa2\xc2\xa3")[a("\xa1\xc2")] }
  780. assert_nil(e("\xa1\xa2\xa3\xa4")[e("\xa2\xa3")])
  781. bug2379 = '[ruby-core:26787]'
  782. assert_equal("\u{439}", "\u{439}"[0, 30], bug2379)
  783. assert_equal("\u{439}", "a\u{439}"[1, 30], bug2379)
  784. assert_equal("\u{439}", "a\u{439}bcdefghijklmnop"[1, 1][0, 1], bug2379)
  785. end
  786. def test_aset
  787. s = e("\xa3\xb0\xa3\xb1\xa3\xb2\xa3\xb3\xa3\xb4")
  788. assert_raise(Encoding::CompatibilityError){s["\xb0\xa3"] = "foo"}
  789. end
  790. def test_str_center
  791. assert_encoding("EUC-JP", "a".center(5, e("\xa1\xa2")).encoding)
  792. assert_encoding("EUC-JP", e("\xa3\xb0").center(10).encoding)
  793. end
  794. def test_squeeze
  795. s = e("\xa3\xb0\xa3\xb1\xa3\xb1\xa3\xb3\xa3\xb4")
  796. assert_equal(e("\xa3\xb0\xa3\xb1\xa3\xb3\xa3\xb4"), s.squeeze)
  797. end
  798. def test_tr
  799. s = s("\x81\x41")
  800. assert_equal(s.tr("A", "B"), s)
  801. assert_equal(s.tr_s("A", "B"), s)
  802. assert_nothing_raised {
  803. "a".force_encoding("ASCII-8BIT").tr(a("a"), a("a"))
  804. }
  805. assert_equal(e("\xA1\xA1"), a("a").tr(a("a"), e("\xA1\xA1")))
  806. assert_equal("X\u3042\u3044X", "A\u3042\u3044\u3046".tr("^\u3042\u3044", "X"))
  807. assert_equal("\u3042\u3046" * 100, ("\u3042\u3044" * 100).tr("\u3044", "\u3046"))
  808. end
  809. def test_tr_s
  810. assert_equal("\xA1\xA1".force_encoding("EUC-JP"),
  811. "a".force_encoding("ASCII-8BIT").tr("a".force_encoding("ASCII-8BIT"), "\xA1\xA1".force_encoding("EUC-JP")))
  812. end
  813. def test_count
  814. assert_equal(0, e("\xa1\xa2").count("z"))
  815. s = e("\xa3\xb0\xa3\xb1\xa3\xb2\xa3\xb3\xa3\xb4")
  816. assert_raise(Encoding::CompatibilityError){s.count(a("\xa3\xb0"))}
  817. end
  818. def test_delete
  819. assert_equal(1, e("\xa1\xa2").delete("z").length)
  820. s = e("\xa3\xb0\xa3\xb1\xa3\xb2\xa3\xb3\xa3\xb4")
  821. assert_raise(Encoding::CompatibilityError){s.delete(a("\xa3\xb2"))}
  822. a = "\u3042\u3044\u3046\u3042\u3044\u3046"
  823. a.delete!("\u3042\u3044", "^\u3044")
  824. assert_equal("\u3044\u3046\u3044\u3046", a)
  825. end
  826. def test_include?
  827. assert_equal(false, e("\xa1\xa2\xa3\xa4").include?(e("\xa3")))
  828. s = e("\xa3\xb0\xa3\xb1\xa3\xb2\xa3\xb3\xa3\xb4")
  829. assert_equal(false, s.include?(e("\xb0\xa3")))
  830. end
  831. def test_index
  832. s = e("\xa3\xb0\xa3\xb1\xa3\xb2\xa3\xb3\xa3\xb4")
  833. assert_nil(s.index(e("\xb3\xa3")))
  834. assert_nil(e("\xa1\xa2\xa3\xa4").index(e("\xa3")))
  835. assert_nil(e("\xa1\xa2\xa3\xa4").rindex(e("\xa3")))
  836. s = e("\xa3\xb0\xa3\xb1\xa3\xb2\xa3\xb3\xa3\xb4")
  837. assert_raise(Encoding::CompatibilityError){s.rindex(a("\xb1\xa3"))}
  838. end
  839. def test_next
  840. s1 = e("\xa1\xa1")
  841. s2 = s1.dup
  842. (94*94+94).times { s2.next! }
  843. assert_not_equal(s1, s2)
  844. end
  845. def test_sub
  846. s = "abc".sub(/b/, "\xa1\xa1".force_encoding("euc-jp"))
  847. assert_encoding("EUC-JP", s.encoding)
  848. assert_equal(Encoding::EUC_JP, "\xa4\xa2".force_encoding("euc-jp").sub(/./, '\&').encoding)
  849. assert_equal(Encoding::EUC_JP, "\xa4\xa2".force_encoding("euc-jp").gsub(/./, '\&').encoding)
  850. end
  851. def test_sub2
  852. s = "\x80".force_encoding("ASCII-8BIT")
  853. r = Regexp.new("\x80".force_encoding("ASCII-8BIT"))
  854. s2 = s.sub(r, "")
  855. assert(s2.empty?)
  856. assert(s2.ascii_only?)
  857. end
  858. def test_sub3
  859. repl = "\x81".force_encoding("sjis")
  860. assert_equal(false, repl.valid_encoding?)
  861. s = "a@".sub(/a/, repl)
  862. assert(s.valid_encoding?)
  863. end
  864. def test_insert
  865. s = e("\xa3\xb0\xa3\xb1\xa3\xb2\xa3\xb3\xa3\xb4")
  866. assert_equal(e("\xa3\xb0\xa3\xb1\xa3\xb2\xa3\xb3\xa3\xb4a"), s.insert(-1, "a"))
  867. end
  868. def test_scan
  869. assert_equal(["a"], e("\xa1\xa2a\xa3\xa4").scan(/a/))
  870. end
  871. def test_dup_scan
  872. s1 = e("\xa4\xa2")*100
  873. s2 = s1.dup.force_encoding("ascii-8bit")
  874. s2.scan(/\A./n) {|f|
  875. assert_equal(Encoding::ASCII_8BIT, f.encoding)
  876. }
  877. end
  878. def test_dup_aref
  879. s1 = e("\xa4\xa2")*100
  880. s2 = s1.dup.force_encoding("ascii-8bit")
  881. assert_equal(Encoding::ASCII_8BIT, s2[10..-1].encoding)
  882. end
  883. def test_upto
  884. s1 = e("\xa1\xa2")
  885. s2 = s("\xa1\xa2")
  886. assert_raise(Encoding::CompatibilityError){s1.upto(s2) {|x| break }}
  887. end
  888. def test_casecmp
  889. s1 = s("\x81\x41")
  890. s2 = s("\x81\x61")
  891. assert_not_equal(0, s1.casecmp(s2))
  892. end
  893. def test_reverse
  894. assert_equal(u("\xf0jihgfedcba"), u("abcdefghij\xf0").reverse)
  895. end
  896. def test_reverse_bang
  897. s = u("abcdefghij\xf0")
  898. s.reverse!
  899. assert_equal(u("\xf0jihgfedcba"), s)
  900. end
  901. def test_plus
  902. assert_raise(Encoding::CompatibilityError){u("\xe3\x81\x82") + a("\xa1")}
  903. end
  904. def test_chomp
  905. s = e("\xa3\xb0\xa3\xb1\xa3\xb2\xa3\xb3\xa3\xb4")
  906. assert_raise(Encoding::CompatibilityError){s.chomp(s("\xa3\xb4"))}
  907. end
  908. def test_gsub
  909. s = 'abc'
  910. s.ascii_only?
  911. s.gsub!(/b/, "\x80")
  912. assert_equal(false, s.ascii_only?, "[ruby-core:14566] reported by Sam Ruby")
  913. s = "abc".force_encoding(Encoding::ASCII_8BIT)
  914. t = s.gsub(/b/, "\xa1\xa1".force_encoding("euc-jp"))
  915. assert_equal(Encoding::ASCII_8BIT, s.encoding)
  916. assert_raise(Encoding::CompatibilityError) {
  917. "abc".gsub(/[ac]/) {
  918. $& == "a" ? "\xc2\xa1".force_encoding("euc-jp") :
  919. "\xc2\xa1".force_encoding("utf-8")
  920. }
  921. }
  922. s = e("\xa3\xb0\xa3\xb1\xa3\xb2\xa3\xb3\xa3\xb4")
  923. assert_equal(e("\xa3\xb0z\xa3\xb2\xa3\xb3\xa3\xb4"), s.gsub(/\xa3\xb1/e, "z"))
  924. assert_equal(Encoding::EUC_JP, (a("").gsub(//) { e("") }.encoding))
  925. assert_equal(Encoding::EUC_JP, (a("a").gsub(/a/) { e("") }.encoding))
  926. end
  927. def test_end_with
  928. s1 = s("\x81\x40")
  929. s2 = "@"
  930. assert_equal(false, s1.end_with?(s2), "#{encdump s1}.end_with?(#{encdump s2})")
  931. end
  932. def test_each_line
  933. s = e("\xa3\xb0\xa3\xb1\xa3\xb2\xa3\xb3\xa3\xb4")
  934. assert_raise(Encoding::CompatibilityError){s.each_line(a("\xa3\xb1")) {|l| }}
  935. s = e("\xa4\xa2\nfoo")
  936. actual = []
  937. s.each_line {|line| actual << line }
  938. expected = [e("\xa4\xa2\n"), e("foo")]
  939. assert_equal(expected, actual)
  940. end
  941. def test_each_char
  942. a = [e("\xa4\xa2"), "b", e("\xa4\xa4"), "c"]
  943. s = "\xa4\xa2b\xa4\xa4c".force_encoding("euc-jp")
  944. assert_equal(a, s.each_char.to_a, "[ruby-dev:33211] #{encdump s}.each_char.to_a")
  945. end
  946. def test_regexp_match
  947. assert_equal([0,0], //.match("\xa1\xa1".force_encoding("euc-jp"),-1).offset(0))
  948. assert_equal(0, // =~ :a)
  949. end
  950. def test_split
  951. assert_equal(e("\xa1\xa2\xa1\xa3").split(//),
  952. [e("\xa1\xa2"), e("\xa1\xa3")],
  953. '[ruby-dev:32452]')
  954. end
  955. def test_nonascii_method_name
  956. eval(e("def \xc2\xa1() @nonascii_method_name = :e end"))
  957. eval(u("def \xc2\xa1() @nonascii_method_name = :u end"))
  958. eval(e("\xc2\xa1()"))
  959. assert_equal(:e, @nonascii_method_name)
  960. eval(u("\xc2\xa1()"))
  961. assert_equal(:u, @nonascii_method_name)
  962. me = method(e("\xc2\xa1"))
  963. mu = method(u("\xc2\xa1"))
  964. assert_not_equal(me.name, mu.name)
  965. assert_not_equal(me.inspect, mu.inspect)
  966. assert_equal(e("\xc2\xa1"), me.name.to_s)
  967. assert_equal(u("\xc2\xa1"), mu.name.to_s)
  968. end
  969. def test_symbol
  970. s1 = "\xc2\xa1".force_encoding("euc-jp").intern
  971. s2 = "\xc2\xa1".force_encoding("utf-8").intern
  972. assert_not_equal(s1, s2)
  973. end
  974. def test_symbol_op
  975. ops = %w"
  976. .. ... + - +(binary) -(binary) * / % ** +@ -@ | ^ & ! <=> > >= < <= ==
  977. === != =~ !~ ~ ! [] []= << >> :: `
  978. "
  979. ops.each do |op|
  980. assert_equal(Encoding::US_ASCII, op.intern.encoding, "[ruby-dev:33449]")
  981. end
  982. end
  983. def test_chr
  984. 0.upto(255) {|b|
  985. assert_equal([b].pack("C"), b.chr)
  986. }
  987. end
  988. def test_marshal
  989. s1 = "\xa1\xa1".force_encoding("euc-jp")
  990. s2 = Marshal.load(Marshal.dump(s1))
  991. assert_equal(s1, s2)
  992. end
  993. def test_env
  994. locale_encoding = Encoding.find("locale")
  995. ENV.each {|k, v|
  996. assert_equal(locale_encoding, k.encoding)
  997. assert_equal(locale_encoding, v.encoding)
  998. }
  999. end
  1000. def test_empty_string
  1001. assert_equal(Encoding::US_ASCII, "".encoding)
  1002. end
  1003. def test_nil_to_s
  1004. assert_equal(Encoding::US_ASCII, nil.to_s.encoding)
  1005. end
  1006. def test_nil_inspect
  1007. assert_equal(Encoding::US_ASCII, nil.inspect.encoding)
  1008. end
  1009. def test_true_to_s
  1010. assert_equal(Encoding::US_ASCII, true.to_s.encoding)
  1011. end
  1012. def test_false_to_s
  1013. assert_equal(Encoding::US_ASCII, false.to_s.encoding)
  1014. end
  1015. def test_fixnum_to_s
  1016. assert_equal(Encoding::US_ASCII, 1.to_s.encoding)
  1017. end
  1018. def test_float_to_s
  1019. assert_equal(Encoding::US_ASCII, 1.0.to_s.encoding)
  1020. end
  1021. def test_bignum_to_s
  1022. assert_equal(Encoding::US_ASCII, (1 << 129).to_s.encoding)
  1023. end
  1024. def test_array_to_s
  1025. assert_equal(Encoding::US_ASCII, [].to_s.encoding)
  1026. assert_equal(Encoding::US_ASCII, [nil].to_s.encoding)
  1027. assert_equal(Encoding::US_ASCII, [1].to_s.encoding)
  1028. assert_equal("".inspect.encoding, [""].to_s.encoding)
  1029. assert_equal("a".inspect.encoding, ["a"].to_s.encoding)
  1030. assert_equal(Encoding::US_ASCII, [nil,1,"","a","\x20",[]].to_s.encoding)
  1031. end
  1032. def test_hash_to_s
  1033. assert_equal(Encoding::US_ASCII, {}.to_s.encoding)
  1034. assert_equal(Encoding::US_ASCII, {1=>nil,"foo"=>""}.to_s.encoding)
  1035. end
  1036. def test_encoding_find
  1037. assert_raise(TypeError) {Encoding.find(nil)}
  1038. assert_raise(TypeError) {Encoding.find(0)}
  1039. assert_raise(TypeError) {Encoding.find([])}
  1040. assert_raise(TypeError) {Encoding.find({})}
  1041. end
  1042. def test_encoding_to_s
  1043. assert_equal(Encoding::US_ASCII, Encoding::US_ASCII.to_s.encoding)
  1044. assert_equal(Encoding::US_ASCII, Encoding::US_ASCII.inspect.encoding)
  1045. end
  1046. def test_regexp_source
  1047. s = "\xa4\xa2".force_encoding("euc-jp")
  1048. r = Regexp.new(s)
  1049. t = r.source
  1050. assert_equal(s, t, "[ruby-dev:33377] Regexp.new(#{encdump s}).source")
  1051. end
  1052. def test_magic_comment
  1053. assert_equal(Encoding::US_ASCII, eval("__ENCODING__".force_encoding("US-ASCII")))
  1054. assert_equal(Encoding::ASCII_8BIT, eval("__ENCODING__".force_encoding("ASCII-8BIT")))
  1055. assert_equal(Encoding::US_ASCII, eval("# -*- encoding: US-ASCII -*-\n__ENCODING__".force_encoding("ASCII-8BIT")))
  1056. assert_equal(Encoding::ASCII_8BIT, eval("# -*- encoding: ASCII-8BIT -*-\n__ENCODING__".force_encoding("US-ASCII")))
  1057. end
  1058. def test_magic_comment_vim
  1059. assert_equal(Encoding::US_ASCII, eval("# vim: filetype=ruby, fileencoding: US-ASCII, ts=3, sw=3\n__ENCODING__".force_encoding("ASCII-8BIT")))
  1060. assert_equal(Encoding::ASCII_8BIT, eval("# vim: filetype=ruby, fileencoding: ASCII-8BIT, ts=3, sw=3\n__ENCODING__".force_encoding("US-ASCII")))
  1061. end
  1062. def test_magic_comment_at_various_positions
  1063. # after shebang
  1064. assert_equal(Encoding::US_ASCII, eval("#!/usr/bin/ruby\n# -*- encoding: US-ASCII -*-\n__ENCODING__".force_encoding("ASCII-8BIT")))
  1065. assert_equal(Encoding::ASCII_8BIT, eval("#!/usr/bin/ruby\n# -*- encoding: ASCII-8BIT -*-\n__ENCODING__".force_encoding("US-ASCII")))
  1066. # wrong position
  1067. assert_equal(Encoding::ASCII_8BIT, eval("\n# -*- encoding: US-ASCII -*-\n__ENCODING__".force_encoding("ASCII-8BIT")))
  1068. assert_equal(Encoding::US_ASCII, eval("\n# -*- encoding: ASCII-8BIT -*-\n__ENCODING__".force_encoding("US-ASCII")))
  1069. # leading expressions
  1070. assert_equal(Encoding::ASCII_8BIT, eval("v=1 # -*- encoding: US-ASCII -*-\n__ENCODING__".force_encoding("ASCII-8BIT")))
  1071. assert_equal(Encoding::US_ASCII, eval("v=1 # -*- encoding: ASCII-8BIT -*-\n__ENCODING__".force_encoding("US-ASCII")))
  1072. end
  1073. def test_regexp_usascii
  1074. assert_regexp_usascii_literal('//', Encoding::US_ASCII)
  1075. assert_regexp_usascii_literal('/#{ }/', Encoding::US_ASCII)
  1076. assert_regexp_usascii_literal('/#{"a"}/', Encoding::US_ASCII)
  1077. assert_regexp_usascii_literal('/#{%q"\x80"}/', Encoding::ASCII_8BIT)
  1078. assert_regexp_usascii_literal('/#{"\x80"}/', nil, SyntaxError)
  1079. assert_regexp_usascii_literal('/a/', Encoding::US_ASCII)
  1080. assert_regexp_usascii_literal('/a#{ }/', Encoding::US_ASCII)
  1081. assert_regexp_usascii_literal('/a#{"a"}/', Encoding::US_ASCII)
  1082. assert_regexp_usascii_literal('/a#{%q"\x80"}/', Encoding::ASCII_8BIT)
  1083. assert_regexp_usascii_literal('/a#{"\x80"}/', nil, SyntaxError)
  1084. assert_regexp_usascii_literal('/\x80/', Encoding::ASCII_8BIT)
  1085. assert_regexp_usascii_literal('/\x80#{ }/', Encoding::ASCII_8BIT)
  1086. assert_regexp_usascii_literal('/\x80#{"a"}/', Encoding::ASCII_8BIT)
  1087. assert_regexp_usascii_literal('/\x80#{%q"\x80"}/', Encoding::ASCII_8BIT)
  1088. assert_regexp_usascii_literal('/\x80#{"\x80"}/', nil, SyntaxError)
  1089. assert_regexp_usascii_literal('/\u1234/', Encoding::UTF_8)
  1090. assert_regexp_usascii_literal('/\u1234#{ }/', Encoding::UTF_8)
  1091. assert_regexp_usascii_literal('/\u1234#{"a"}/', Encoding::UTF_8)
  1092. assert_regexp_usascii_literal('/\u1234#{%q"\x80"}/', nil, SyntaxError)
  1093. assert_regexp_usascii_literal('/\u1234#{"\x80"}/', nil, SyntaxError)
  1094. assert_regexp_usascii_literal('/\u1234\x80/', nil, SyntaxError)
  1095. assert_regexp_usascii_literal('/\u1234#{ }\x80/', nil, RegexpError)
  1096. end
  1097. def test_gbk
  1098. assert_equal("", "\x81\x40".force_encoding("GBK").chop)
  1099. end
  1100. def test_euc_tw
  1101. assert_equal("a", "a\x8e\xa2\xa1\xa1".force_encoding("euc-tw").chop)
  1102. end
  1103. def test_valid_encoding
  1104. s = "\xa1".force_encoding("euc-jp")
  1105. assert_equal(false, s.valid_encoding?)
  1106. assert_equal(true, (s+s).valid_encoding?, "[ruby-dev:33826]")
  1107. assert_equal(true, (s*2).valid_encoding?, "[ruby-dev:33826]")
  1108. assert_equal(true, ("%s%s" % [s, s]).valid_encoding?)
  1109. assert_equal(true, (s.dup << s).valid_encoding?)
  1110. assert_equal(true, "".center(2, s).valid_encoding?)
  1111. s = "\xa1\xa1\x8f".force_encoding("euc-jp")
  1112. assert_equal(false, s.valid_encoding?)
  1113. assert_equal(true, s.reverse.valid_encoding?)
  1114. end
  1115. def test_getbyte
  1116. assert_equal(0x82, u("\xE3\x81\x82\xE3\x81\x84").getbyte(2))
  1117. assert_equal(0x82, u("\xE3\x81\x82\xE3\x81\x84").getbyte(-4))
  1118. assert_nil(u("\xE3\x81\x82\xE3\x81\x84").getbyte(100))
  1119. end
  1120. def test_setbyte
  1121. s = u("\xE3\x81\x82\xE3\x81\x84")
  1122. s.setbyte(2, 0x84)
  1123. assert_equal(u("\xE3\x81\x84\xE3\x81\x84"), s)
  1124. s = u("\xE3\x81\x82\xE3\x81\x84")
  1125. assert_raise(IndexError) { s.setbyte(100, 0) }
  1126. s = u("\xE3\x81\x82\xE3\x81\x84")
  1127. s.setbyte(-4, 0x84)
  1128. assert_equal(u("\xE3\x81\x84\xE3\x81\x84"), s)
  1129. end
  1130. def test_compatible
  1131. assert_nil Encoding.compatible?("",0)
  1132. assert_equal(Encoding::UTF_8, Encoding.compatible?(u(""), ua("abc")))
  1133. assert_equal(Encoding::UTF_8, Encoding.compatible?(Encoding::UTF_8, Encoding::UTF_8))
  1134. assert_equal(Encoding::UTF_8, Encoding.compatible?(Encoding::UTF_8, Encoding::US_ASCII))
  1135. assert_equal(Encoding::ASCII_8BIT,
  1136. Encoding.compatible?(Encoding::ASCII_8BIT, Encoding::US_ASCII))
  1137. assert_nil Encoding.compatible?(Encoding::UTF_8, Encoding::ASCII_8BIT)
  1138. end
  1139. def test_force_encoding
  1140. assert(("".center(1, "\x80".force_encoding("utf-8")); true),
  1141. "moved from btest/knownbug, [ruby-dev:33807]")
  1142. a = "".force_encoding("ascii-8bit") << 0xC3 << 0xB6
  1143. assert_equal(1, a.force_encoding("utf-8").size, '[ruby-core:22437]')
  1144. b = "".force_encoding("ascii-8bit") << 0xC3.chr << 0xB6.chr
  1145. assert_equal(1, b.force_encoding("utf-8").size, '[ruby-core:22437]')
  1146. end
  1147. def test_combchar_codepoint
  1148. assert_equal([0x30BB, 0x309A], "\u30BB\u309A".codepoints.to_a)
  1149. end
  1150. end