PageRenderTime 35ms CodeModel.GetById 0ms RepoModel.GetById 0ms app.codeStats 0ms

/test/ruby/test_dir_m17n.rb

http://github.com/ruby/ruby
Ruby | 449 lines | 409 code | 33 blank | 7 comment | 48 complexity | 0d35c2b59bfa34bad85b72bff342368a MD5 | raw file
Possible License(s): GPL-2.0, BSD-3-Clause, AGPL-3.0
  1. # frozen_string_literal: false
  2. require 'test/unit'
  3. require 'tmpdir'
  4. require '-test-/file'
  5. class TestDir_M17N < Test::Unit::TestCase
  6. def with_tmpdir
  7. Dir.mktmpdir {|dir|
  8. Dir.chdir(dir) {
  9. yield dir
  10. }
  11. }
  12. end
  13. def assert_raw_file_name(code, encoding)
  14. with_tmpdir { |dir|
  15. assert_separately(["-E#{encoding}"], <<-EOS, :chdir=>dir)
  16. filename = #{code}.chr('UTF-8').force_encoding("#{encoding}")
  17. File.open(filename, "w") {}
  18. opts = {:encoding => Encoding.default_external} if /mswin|mingw/ =~ RUBY_PLATFORM
  19. ents = Dir.entries(".", **(opts||{}))
  20. assert_include(ents, filename)
  21. EOS
  22. return if /cygwin/ =~ RUBY_PLATFORM
  23. assert_separately(%w[-EASCII-8BIT], <<-EOS, :chdir=>dir)
  24. filename = #{code}.chr('UTF-8').force_encoding("ASCII-8BIT")
  25. opts = {:encoding => Encoding.default_external} if /mswin|mingw/ =~ RUBY_PLATFORM
  26. ents = Dir.entries(".", **(opts||{}))
  27. expected_filename = #{code}.chr('UTF-8').encode(Encoding.find("filesystem")) rescue expected_filename = "?"
  28. expected_filename = expected_filename.force_encoding("ASCII-8BIT")
  29. if /mswin|mingw/ =~ RUBY_PLATFORM
  30. case
  31. when ents.include?(filename)
  32. when ents.include?(expected_filename)
  33. filename = expected_filename
  34. else
  35. ents = Dir.entries(".", :encoding => Encoding.find("filesystem"))
  36. filename = expected_filename
  37. end
  38. end
  39. assert_include(ents, filename)
  40. EOS
  41. }
  42. end
  43. ## UTF-8 default_external, no default_internal
  44. def test_filename_extutf8
  45. with_tmpdir {|d|
  46. assert_separately(%w[-EUTF-8], <<-'EOS', :chdir=>d)
  47. filename = "\u3042"
  48. File.open(filename, "w") {}
  49. opts = {:encoding => Encoding.default_external} if /mswin|mingw/ =~ RUBY_PLATFORM
  50. ents = Dir.entries(".", **(opts||{}))
  51. assert_include(ents, filename)
  52. EOS
  53. }
  54. end
  55. def test_filename_extutf8_invalid
  56. return if /cygwin/ =~ RUBY_PLATFORM
  57. # High Sierra's APFS cannot use invalid filenames
  58. return if Bug::File::Fs.fsname(Dir.tmpdir) == "apfs"
  59. with_tmpdir {|d|
  60. assert_separately(%w[-EASCII-8BIT], <<-'EOS', :chdir=>d)
  61. filename = "\xff".force_encoding("ASCII-8BIT") # invalid byte sequence as UTF-8
  62. File.open(filename, "w") {}
  63. opts = {:encoding => Encoding.default_external} if /mswin|mingw/ =~ RUBY_PLATFORM
  64. ents = Dir.entries(".", **(opts||{}))
  65. filename = "%FF" if /darwin/ =~ RUBY_PLATFORM && ents.include?("%FF")
  66. assert_include(ents, filename)
  67. EOS
  68. assert_separately(%w[-EUTF-8], <<-'EOS', :chdir=>d)
  69. filename = "\xff".force_encoding("UTF-8") # invalid byte sequence as UTF-8
  70. File.open(filename, "w") {}
  71. opts = {:encoding => Encoding.default_external} if /mswin|mingw/ =~ RUBY_PLATFORM
  72. ents = Dir.entries(".", **(opts||{}))
  73. filename = "%FF" if /darwin/ =~ RUBY_PLATFORM && ents.include?("%FF")
  74. assert_include(ents, filename)
  75. EOS
  76. }
  77. end unless /mswin|mingw/ =~ RUBY_PLATFORM
  78. def test_filename_as_bytes_extutf8
  79. with_tmpdir {|d|
  80. assert_separately(%w[-EUTF-8], <<-'EOS', :chdir=>d)
  81. filename = "\xc2\xa1".force_encoding("utf-8")
  82. File.open(filename, "w") {}
  83. opts = {:encoding => Encoding.default_external} if /mswin|mingw/ =~ RUBY_PLATFORM
  84. ents = Dir.entries(".", **(opts||{}))
  85. assert_include(ents, filename)
  86. EOS
  87. assert_separately(%w[-EUTF-8], <<-'EOS', :chdir=>d)
  88. if /mswin|mingw|darwin/ =~ RUBY_PLATFORM
  89. filename = "\x8f\xa2\xc2".force_encoding("euc-jp")
  90. else
  91. filename = "\xc2\xa1".force_encoding("euc-jp")
  92. end
  93. assert_nothing_raised(Errno::ENOENT) do
  94. open(filename) {}
  95. end
  96. EOS
  97. # no meaning test on windows
  98. unless /mswin|mingw|darwin/ =~ RUBY_PLATFORM
  99. assert_separately(%W[-EUTF-8], <<-'EOS', :chdir=>d)
  100. filename1 = "\xc2\xa1".force_encoding("utf-8")
  101. filename2 = "\xc2\xa1".force_encoding("euc-jp")
  102. filename3 = filename1.encode("euc-jp")
  103. filename4 = filename2.encode("utf-8")
  104. assert_file.stat(filename1)
  105. assert_file.stat(filename2)
  106. assert_file.not_exist?(filename3)
  107. assert_file.not_exist?(filename4)
  108. EOS
  109. end
  110. }
  111. end
  112. ## UTF-8 default_external, EUC-JP default_internal
  113. def test_filename_extutf8_inteucjp_representable
  114. with_tmpdir {|d|
  115. assert_separately(%w[-EUTF-8], <<-'EOS', :chdir=>d)
  116. filename = "\u3042"
  117. File.open(filename, "w") {}
  118. opts = {:encoding => Encoding.default_external} if /mswin|mingw/ =~ RUBY_PLATFORM
  119. ents = Dir.entries(".", **(opts||{}))
  120. assert_include(ents, filename)
  121. EOS
  122. assert_separately(%w[-EUTF-8:EUC-JP], <<-'EOS', :chdir=>d)
  123. filename = "\xA4\xA2".force_encoding("euc-jp")
  124. opts = {:encoding => Encoding.default_external} if /mswin|mingw/ =~ RUBY_PLATFORM
  125. ents = Dir.entries(".", **(opts||{}))
  126. assert_include(ents, filename)
  127. EOS
  128. assert_separately(%w[-EUTF-8:EUC-JP], <<-'EOS', :chdir=>d)
  129. filename = "\xA4\xA2".force_encoding("euc-jp")
  130. assert_nothing_raised(Errno::ENOENT) do
  131. open(filename) {}
  132. end
  133. EOS
  134. }
  135. end
  136. def test_filename_extutf8_inteucjp_unrepresentable
  137. with_tmpdir {|d|
  138. assert_separately(%w[-EUTF-8], <<-'EOS', :chdir=>d)
  139. filename1 = "\u2661" # WHITE HEART SUIT which is not representable in EUC-JP
  140. filename2 = "\u3042" # HIRAGANA LETTER A which is representable in EUC-JP
  141. File.open(filename1, "w") {}
  142. File.open(filename2, "w") {}
  143. opts = {:encoding => Encoding.default_external} if /mswin|mingw/ =~ RUBY_PLATFORM
  144. ents = Dir.entries(".", **(opts||{}))
  145. assert_include(ents, filename1)
  146. assert_include(ents, filename2)
  147. EOS
  148. assert_separately(%w[-EUTF-8:EUC-JP], <<-'EOS', :chdir=>d)
  149. filename1 = "\u2661" # WHITE HEART SUIT which is not representable in EUC-JP
  150. filename2 = "\xA4\xA2".force_encoding("euc-jp") # HIRAGANA LETTER A in EUC-JP
  151. opts = {:encoding => Encoding.default_external} if /mswin|mingw/ =~ RUBY_PLATFORM
  152. ents = Dir.entries(".", **(opts||{}))
  153. assert_include(ents, filename1)
  154. assert_include(ents, filename2)
  155. EOS
  156. assert_separately(%w[-EUTF-8:EUC-JP], <<-'EOS', :chdir=>d)
  157. filename1 = "\u2661" # WHITE HEART SUIT which is not representable in EUC-JP
  158. filename2 = "\u3042" # HIRAGANA LETTER A which is representable in EUC-JP
  159. filename3 = "\xA4\xA2".force_encoding("euc-jp") # HIRAGANA LETTER A in EUC-JP
  160. assert_file.stat(filename1)
  161. assert_file.stat(filename2)
  162. assert_file.stat(filename3)
  163. EOS
  164. }
  165. end
  166. ## others
  167. def test_filename_bytes_euc_jp
  168. return if /cygwin/ =~ RUBY_PLATFORM
  169. with_tmpdir {|d|
  170. assert_separately(%w[-EEUC-JP], <<-'EOS', :chdir=>d)
  171. filename = "\xA4\xA2".force_encoding("euc-jp")
  172. File.open(filename, "w") {}
  173. opts = {:encoding => Encoding.default_external} if /mswin|mingw/ =~ RUBY_PLATFORM
  174. ents = Dir.entries(".", **(opts||{}))
  175. ents.each {|e| e.force_encoding("ASCII-8BIT") }
  176. if /darwin/ =~ RUBY_PLATFORM
  177. filename = filename.encode("utf-8")
  178. end
  179. assert_include(ents, filename.force_encoding("ASCII-8BIT"))
  180. EOS
  181. }
  182. end
  183. def test_filename_euc_jp
  184. return if /cygwin/ =~ RUBY_PLATFORM
  185. with_tmpdir {|d|
  186. assert_separately(%w[-EEUC-JP], <<-'EOS', :chdir=>d)
  187. filename = "\xA4\xA2".force_encoding("euc-jp")
  188. File.open(filename, "w") {}
  189. opts = {:encoding => Encoding.default_external} if /mswin|mingw/ =~ RUBY_PLATFORM
  190. ents = Dir.entries(".", **(opts||{}))
  191. if /darwin/ =~ RUBY_PLATFORM
  192. filename = filename.encode("utf-8").force_encoding("euc-jp")
  193. end
  194. assert_include(ents, filename)
  195. EOS
  196. assert_separately(%w[-EASCII-8BIT], <<-'EOS', :chdir=>d)
  197. filename = "\xA4\xA2".force_encoding('ASCII-8BIT')
  198. win_expected_filename = filename.encode(Encoding.find("filesystem"), "euc-jp") rescue "?"
  199. opts = {:encoding => Encoding.default_external} if /mswin|mingw/ =~ RUBY_PLATFORM
  200. ents = Dir.entries(".", **(opts||{}))
  201. unless ents.include?(filename)
  202. case RUBY_PLATFORM
  203. when /darwin/
  204. filename = filename.encode("utf-8", "euc-jp").b
  205. when /mswin|mingw/
  206. if ents.include?(win_expected_filename.b)
  207. ents = Dir.entries(".", :encoding => Encoding.find("filesystem"))
  208. filename = win_expected_filename
  209. end
  210. end
  211. end
  212. assert_include(ents, filename)
  213. EOS
  214. }
  215. end
  216. def test_filename_utf8_raw_jp_name
  217. assert_raw_file_name(0x3042, "UTF-8")
  218. end
  219. def test_filename_utf8_raw_windows_1251_name
  220. assert_raw_file_name(0x0424, "UTF-8")
  221. end
  222. def test_filename_utf8_raw_windows_1252_name
  223. assert_raw_file_name(0x00c6, "UTF-8")
  224. end
  225. def test_filename_ext_euc_jp_and_int_utf_8
  226. return if /cygwin/ =~ RUBY_PLATFORM
  227. with_tmpdir {|d|
  228. assert_separately(%w[-EEUC-JP], <<-'EOS', :chdir=>d)
  229. filename = "\xA4\xA2".force_encoding("euc-jp")
  230. File.open(filename, "w") {}
  231. opts = {:encoding => Encoding.default_external} if /mswin|mingw/ =~ RUBY_PLATFORM
  232. ents = Dir.entries(".", **(opts||{}))
  233. if /darwin/ =~ RUBY_PLATFORM
  234. filename = filename.encode("utf-8", "euc-jp").force_encoding("euc-jp")
  235. end
  236. assert_include(ents, filename)
  237. EOS
  238. assert_separately(%w[-EEUC-JP:UTF-8], <<-'EOS', :chdir=>d)
  239. filename = "\u3042"
  240. opts = {:encoding => Encoding.default_external} if /mswin|mingw/ =~ RUBY_PLATFORM
  241. ents = Dir.entries(".", **(opts||{}))
  242. if /darwin/ =~ RUBY_PLATFORM
  243. filename = filename.force_encoding("euc-jp")
  244. end
  245. assert_include(ents, filename)
  246. EOS
  247. }
  248. end
  249. def test_error_nonascii
  250. bug6071 = '[ruby-dev:45279]'
  251. paths = ["\u{3042}".encode("sjis"), "\u{ff}".encode("iso-8859-1")]
  252. encs = with_tmpdir {
  253. paths.map {|path|
  254. Dir.open(path) rescue $!.message.encoding
  255. }
  256. }
  257. assert_equal(paths.map(&:encoding), encs, bug6071)
  258. end
  259. def test_inspect_nonascii
  260. bug6072 = '[ruby-dev:45280]'
  261. paths = ["\u{3042}".encode("sjis"), "\u{ff}".encode("iso-8859-1")]
  262. encs = with_tmpdir {
  263. paths.map {|path|
  264. Dir.mkdir(path)
  265. Dir.open(path) {|d| d.inspect.encoding}
  266. }
  267. }
  268. assert_equal(paths.map(&:encoding), encs, bug6072)
  269. end
  270. def test_glob_incompatible
  271. d = "\u{3042}\u{3044}".encode("utf-16le")
  272. assert_raise(Encoding::CompatibilityError) {Dir.glob(d)}
  273. m = Class.new {define_method(:to_path) {d}}
  274. assert_raise(Encoding::CompatibilityError) {Dir.glob(m.new)}
  275. end
  276. def test_glob_compose
  277. bug7267 = '[ruby-core:48745] [Bug #7267]'
  278. pp = Object.new.extend(Test::Unit::Assertions)
  279. def pp.mu_pp(str) #:nodoc:
  280. str.dump
  281. end
  282. with_tmpdir {|d|
  283. orig = %W"d\u{e9}tente x\u{304c 304e 3050 3052 3054}"
  284. orig.each {|n| open(n, "w") {}}
  285. orig.each do |o|
  286. n = Dir.glob("#{o[0..0]}*")[0]
  287. pp.assert_equal(o, n, bug7267)
  288. end
  289. }
  290. end
  291. def with_enc_path
  292. with_tmpdir do |d|
  293. names = %W"\u{391 392 393 394 395} \u{3042 3044 3046 3048 304a}"
  294. names.each do |dir|
  295. EnvUtil.with_default_external(Encoding::UTF_8) do
  296. Dir.mkdir(dir) rescue next
  297. begin
  298. yield(dir)
  299. ensure
  300. File.chmod(0700, dir)
  301. end
  302. end
  303. end
  304. end
  305. end
  306. def test_glob_warning_opendir
  307. with_enc_path do |dir|
  308. open("#{dir}/x", "w") {}
  309. File.chmod(0300, dir)
  310. next if File.readable?(dir)
  311. assert_warning(/#{dir}/) do
  312. Dir.glob("#{dir}/*")
  313. end
  314. end
  315. end
  316. def test_glob_warning_match_all
  317. with_enc_path do |dir|
  318. open("#{dir}/x", "w") {}
  319. File.chmod(0000, dir)
  320. next if File.readable?(dir)
  321. assert_warning(/#{dir}/) do
  322. Dir.glob("#{dir}/x")
  323. end
  324. end
  325. end
  326. def test_glob_warning_match_dir
  327. with_enc_path do |dir|
  328. Dir.mkdir("#{dir}/x")
  329. File.chmod(0000, dir)
  330. next if File.readable?(dir)
  331. assert_warning(/#{dir}/) do
  332. Dir.glob("#{dir}/x/")
  333. end
  334. end
  335. end
  336. def test_glob_escape_multibyte
  337. name = "\x81\\".force_encoding(Encoding::Shift_JIS)
  338. with_tmpdir do
  339. open(name, "w") {} rescue next
  340. match, = Dir.glob("#{name}*")
  341. next unless match and match.encoding == Encoding::Shift_JIS
  342. assert_equal([name], Dir.glob("\\#{name}*"))
  343. end
  344. end
  345. def test_glob_encoding
  346. with_tmpdir do
  347. list = %W"file_one.ext file_two.ext \u{6587 4ef6}1.txt \u{6587 4ef6}2.txt"
  348. list.each {|f| open(f, "w") {}}
  349. a = "file_one*".force_encoding Encoding::IBM437
  350. b = "file_two*".force_encoding Encoding::EUC_JP
  351. assert_equal([a, b].map(&:encoding), Dir[a, b].map(&:encoding))
  352. if Bug::File::Fs.fsname(Dir.pwd) == "apfs"
  353. # High Sierra's APFS cannot use filenames with undefined character
  354. dir = "\u{76EE}"
  355. else
  356. dir = "\u{76EE 5F551}"
  357. end
  358. Dir.mkdir(dir)
  359. list << dir
  360. bug12081 = '[ruby-core:73868] [Bug #12081]'
  361. a = "*".force_encoding("us-ascii")
  362. result = Dir[a].map {|n|
  363. if n.encoding == Encoding::ASCII_8BIT ||
  364. n.encoding == Encoding::ISO_8859_1 ||
  365. !n.valid_encoding?
  366. n.force_encoding(Encoding::UTF_8)
  367. else
  368. n.encode(Encoding::UTF_8)
  369. end
  370. }
  371. assert_equal(list, result.sort!, bug12081)
  372. end
  373. end
  374. PP = Object.new.extend(Test::Unit::Assertions)
  375. def PP.mu_pp(ary) #:nodoc:
  376. '[' << ary.map {|str| "#{str.dump}(#{str.encoding})"}.join(', ') << ']'
  377. end
  378. def test_entries_compose
  379. bug7267 = '[ruby-core:48745] [Bug #7267]'
  380. with_tmpdir {|d|
  381. orig = %W"d\u{e9}tente x\u{304c 304e 3050 3052 3054}"
  382. orig.each {|n| open(n, "w") {}}
  383. enc = Encoding.find("filesystem")
  384. enc = Encoding::ASCII_8BIT if enc == Encoding::US_ASCII
  385. if /mswin|mingw/ =~ RUBY_PLATFORM
  386. opts = {:encoding => enc}
  387. orig.map! {|o| o.encode("filesystem") rescue o.tr("^a-z", "?")}
  388. else
  389. orig.each {|o| o.force_encoding(enc) }
  390. end
  391. ents = Dir.entries(".", **(opts||{})).reject {|n| /\A\./ =~ n}
  392. ents.sort!
  393. PP.assert_equal(orig, ents, bug7267)
  394. }
  395. end
  396. def test_pwd
  397. orig = %W"d\u{e9}tente x\u{304c 304e 3050 3052 3054}"
  398. expected = []
  399. results = []
  400. orig.each {|o|
  401. if /mswin|mingw/ =~ RUBY_PLATFORM
  402. n = (o.encode("filesystem") rescue next)
  403. else
  404. enc = Encoding.find("filesystem")
  405. enc = Encoding::ASCII_8BIT if enc == Encoding::US_ASCII
  406. n = o.dup.force_encoding(enc)
  407. end
  408. expected << n
  409. with_tmpdir {
  410. Dir.mkdir(o)
  411. results << File.basename(Dir.chdir(o) {Dir.pwd})
  412. }
  413. }
  414. PP.assert_equal(expected, results)
  415. end
  416. end