PageRenderTime 71ms CodeModel.GetById 40ms RepoModel.GetById 1ms app.codeStats 0ms

/spec/ruby/core/string/split_spec.rb

https://github.com/pwnall/rubinius
Ruby | 379 lines | 357 code | 19 blank | 3 comment | 24 complexity | 49d4b9a04dc908c3483509512ae540b2 MD5 | raw file
  1. # -*- encoding: utf-8 -*-
  2. require File.expand_path('../../../spec_helper', __FILE__)
  3. require File.expand_path('../fixtures/classes.rb', __FILE__)
  4. describe "String#split with String" do
  5. before :each do
  6. @kcode = $KCODE
  7. end
  8. after :each do
  9. $KCODE = @kcode
  10. end
  11. it "returns an array of substrings based on splitting on the given string" do
  12. "mellow yellow".split("ello").should == ["m", "w y", "w"]
  13. end
  14. it "suppresses trailing empty fields when limit isn't given or 0" do
  15. "1,2,,3,4,,".split(',').should == ["1", "2", "", "3", "4"]
  16. "1,2,,3,4,,".split(',', 0).should == ["1", "2", "", "3", "4"]
  17. " a b c\nd ".split(" ").should == ["", "a", "b", "c\nd"]
  18. "hai".split("hai").should == []
  19. ",".split(",").should == []
  20. ",".split(",", 0).should == []
  21. end
  22. it "returns an array with one entry if limit is 1: the original string" do
  23. "hai".split("hai", 1).should == ["hai"]
  24. "x.y.z".split(".", 1).should == ["x.y.z"]
  25. "hello world ".split(" ", 1).should == ["hello world "]
  26. "hi!".split("", 1).should == ["hi!"]
  27. end
  28. it "returns at most limit fields when limit > 1" do
  29. "hai".split("hai", 2).should == ["", ""]
  30. "1,2,,3,4,,".split(',', 2).should == ["1", "2,,3,4,,"]
  31. "1,2,,3,4,,".split(',', 3).should == ["1", "2", ",3,4,,"]
  32. "1,2,,3,4,,".split(',', 4).should == ["1", "2", "", "3,4,,"]
  33. "1,2,,3,4,,".split(',', 5).should == ["1", "2", "", "3", "4,,"]
  34. "1,2,,3,4,,".split(',', 6).should == ["1", "2", "", "3", "4", ","]
  35. "x".split('x', 2).should == ["", ""]
  36. "xx".split('x', 2).should == ["", "x"]
  37. "xx".split('x', 3).should == ["", "", ""]
  38. "xxx".split('x', 2).should == ["", "xx"]
  39. "xxx".split('x', 3).should == ["", "", "x"]
  40. "xxx".split('x', 4).should == ["", "", "", ""]
  41. end
  42. it "doesn't suppress or limit fields when limit is negative" do
  43. "1,2,,3,4,,".split(',', -1).should == ["1", "2", "", "3", "4", "", ""]
  44. "1,2,,3,4,,".split(',', -5).should == ["1", "2", "", "3", "4", "", ""]
  45. " a b c\nd ".split(" ", -1).should == ["", "a", "b", "c\nd", ""]
  46. ",".split(",", -1).should == ["", ""]
  47. end
  48. it "defaults to $; when string isn't given or nil" do
  49. begin
  50. old_fs = $;
  51. [",", ":", "", "XY", nil].each do |fs|
  52. $; = fs
  53. ["x,y,z,,,", "1:2:", "aXYbXYcXY", ""].each do |str|
  54. expected = str.split(fs || " ")
  55. str.split(nil).should == expected
  56. str.split.should == expected
  57. str.split(nil, -1).should == str.split(fs || " ", -1)
  58. str.split(nil, 0).should == str.split(fs || " ", 0)
  59. str.split(nil, 2).should == str.split(fs || " ", 2)
  60. end
  61. end
  62. ensure
  63. $; = old_fs
  64. end
  65. end
  66. it "ignores leading and continuous whitespace when string is a single space" do
  67. " now's the time ".split(' ').should == ["now's", "the", "time"]
  68. " now's the time ".split(' ', -1).should == ["now's", "the", "time", ""]
  69. " now's the time ".split(' ', 3).should == ["now's", "the", "time "]
  70. "\t\n a\t\tb \n\r\r\nc\v\vd\v ".split(' ').should == ["a", "b", "c", "d"]
  71. "a\x00a b".split(' ').should == ["a\x00a", "b"]
  72. end
  73. it "splits between characters when its argument is an empty string" do
  74. "hi!".split("").should == ["h", "i", "!"]
  75. "hi!".split("", -1).should == ["h", "i", "!", ""]
  76. "hi!".split("", 2).should == ["h", "i!"]
  77. end
  78. it "tries converting its pattern argument to a string via to_str" do
  79. obj = mock('::')
  80. obj.should_receive(:to_str).and_return("::")
  81. "hello::world".split(obj).should == ["hello", "world"]
  82. end
  83. it "tries converting limit to an integer via to_int" do
  84. obj = mock('2')
  85. obj.should_receive(:to_int).and_return(2)
  86. "1.2.3.4".split(".", obj).should == ["1", "2.3.4"]
  87. end
  88. it "doesn't set $~" do
  89. $~ = nil
  90. "x.y.z".split(".")
  91. $~.should == nil
  92. end
  93. it "returns subclass instances based on self" do
  94. ["", "x.y.z.", " x y "].each do |str|
  95. ["", ".", " "].each do |pat|
  96. [-1, 0, 1, 2].each do |limit|
  97. StringSpecs::MyString.new(str).split(pat, limit).each do |x|
  98. x.should be_kind_of(StringSpecs::MyString)
  99. end
  100. str.split(StringSpecs::MyString.new(pat), limit).each do |x|
  101. x.should be_kind_of(String)
  102. end
  103. end
  104. end
  105. end
  106. end
  107. it "does not call constructor on created subclass instances" do
  108. # can't call should_not_receive on an object that doesn't yet exist
  109. # so failure here is signalled by exception, not expectation failure
  110. s = StringSpecs::StringWithRaisingConstructor.new('silly:string')
  111. s.split(':').first.should == 'silly'
  112. end
  113. it "taints the resulting strings if self is tainted" do
  114. ["", "x.y.z.", " x y "].each do |str|
  115. ["", ".", " "].each do |pat|
  116. [-1, 0, 1, 2].each do |limit|
  117. str.dup.taint.split(pat).each do |x|
  118. x.tainted?.should == true
  119. end
  120. str.split(pat.dup.taint).each do |x|
  121. x.tainted?.should == false
  122. end
  123. end
  124. end
  125. end
  126. end
  127. end
  128. describe "String#split with Regexp" do
  129. it "divides self on regexp matches" do
  130. " now's the time".split(/ /).should == ["", "now's", "", "the", "time"]
  131. " x\ny ".split(/ /).should == ["", "x\ny"]
  132. "1, 2.34,56, 7".split(/,\s*/).should == ["1", "2.34", "56", "7"]
  133. "1x2X3".split(/x/i).should == ["1", "2", "3"]
  134. end
  135. it "treats negative limits as no limit" do
  136. "".split(%r!/+!, -1).should == []
  137. end
  138. it "suppresses trailing empty fields when limit isn't given or 0" do
  139. "1,2,,3,4,,".split(/,/).should == ["1", "2", "", "3", "4"]
  140. "1,2,,3,4,,".split(/,/, 0).should == ["1", "2", "", "3", "4"]
  141. " a b c\nd ".split(/\s+/).should == ["", "a", "b", "c", "d"]
  142. "hai".split(/hai/).should == []
  143. ",".split(/,/).should == []
  144. ",".split(/,/, 0).should == []
  145. end
  146. it "returns an array with one entry if limit is 1: the original string" do
  147. "hai".split(/hai/, 1).should == ["hai"]
  148. "xAyBzC".split(/[A-Z]/, 1).should == ["xAyBzC"]
  149. "hello world ".split(/\s+/, 1).should == ["hello world "]
  150. "hi!".split(//, 1).should == ["hi!"]
  151. end
  152. it "returns at most limit fields when limit > 1" do
  153. "hai".split(/hai/, 2).should == ["", ""]
  154. "1,2,,3,4,,".split(/,/, 2).should == ["1", "2,,3,4,,"]
  155. "1,2,,3,4,,".split(/,/, 3).should == ["1", "2", ",3,4,,"]
  156. "1,2,,3,4,,".split(/,/, 4).should == ["1", "2", "", "3,4,,"]
  157. "1,2,,3,4,,".split(/,/, 5).should == ["1", "2", "", "3", "4,,"]
  158. "1,2,,3,4,,".split(/,/, 6).should == ["1", "2", "", "3", "4", ","]
  159. "x".split(/x/, 2).should == ["", ""]
  160. "xx".split(/x/, 2).should == ["", "x"]
  161. "xx".split(/x/, 3).should == ["", "", ""]
  162. "xxx".split(/x/, 2).should == ["", "xx"]
  163. "xxx".split(/x/, 3).should == ["", "", "x"]
  164. "xxx".split(/x/, 4).should == ["", "", "", ""]
  165. end
  166. it "doesn't suppress or limit fields when limit is negative" do
  167. "1,2,,3,4,,".split(/,/, -1).should == ["1", "2", "", "3", "4", "", ""]
  168. "1,2,,3,4,,".split(/,/, -5).should == ["1", "2", "", "3", "4", "", ""]
  169. " a b c\nd ".split(/\s+/, -1).should == ["", "a", "b", "c", "d", ""]
  170. ",".split(/,/, -1).should == ["", ""]
  171. end
  172. it "defaults to $; when regexp isn't given or nil" do
  173. begin
  174. old_fs = $;
  175. [/,/, /:/, //, /XY/, /./].each do |fs|
  176. $; = fs
  177. ["x,y,z,,,", "1:2:", "aXYbXYcXY", ""].each do |str|
  178. expected = str.split(fs)
  179. str.split(nil).should == expected
  180. str.split.should == expected
  181. str.split(nil, -1).should == str.split(fs, -1)
  182. str.split(nil, 0).should == str.split(fs, 0)
  183. str.split(nil, 2).should == str.split(fs, 2)
  184. end
  185. end
  186. ensure
  187. $; = old_fs
  188. end
  189. end
  190. it "splits between characters when regexp matches a zero-length string" do
  191. "hello".split(//).should == ["h", "e", "l", "l", "o"]
  192. "hello".split(//, -1).should == ["h", "e", "l", "l", "o", ""]
  193. "hello".split(//, 2).should == ["h", "ello"]
  194. "hi mom".split(/\s*/).should == ["h", "i", "m", "o", "m"]
  195. "AABCCBAA".split(/(?=B)/).should == ["AA", "BCC", "BAA"]
  196. "AABCCBAA".split(/(?=B)/, -1).should == ["AA", "BCC", "BAA"]
  197. "AABCCBAA".split(/(?=B)/, 2).should == ["AA", "BCCBAA"]
  198. end
  199. it "respects $KCODE when splitting between characters" do
  200. str = "こにちわ"
  201. reg = %r!!
  202. $KCODE = "utf-8"
  203. ary = str.split(reg)
  204. ary.size.should == 4
  205. ary.should == ["こ", "に", "ち", "わ"]
  206. end
  207. ruby_version_is ""..."1.9" do
  208. it "uses $KCODE when splitting invalid characters" do
  209. str = [129, 0].pack('C*')
  210. $KCODE = "SJIS"
  211. ary = str.split(//)
  212. ary.size.should == 1
  213. ary.should == [str]
  214. end
  215. end
  216. it "respects the encoding of the regexp when splitting between characters" do
  217. str = "\303\202"
  218. $KCODE = "a"
  219. ary = str.split(//u)
  220. ary.size.should == 1
  221. ary.should == ["\303\202"]
  222. end
  223. it "includes all captures in the result array" do
  224. "hello".split(/(el)/).should == ["h", "el", "lo"]
  225. "hi!".split(/()/).should == ["h", "", "i", "", "!"]
  226. "hi!".split(/()/, -1).should == ["h", "", "i", "", "!", "", ""]
  227. "hello".split(/((el))()/).should == ["h", "el", "el", "", "lo"]
  228. "AabB".split(/([a-z])+/).should == ["A", "b", "B"]
  229. end
  230. it "does not include non-matching captures in the result array" do
  231. "hello".split(/(el)|(xx)/).should == ["h", "el", "lo"]
  232. end
  233. it "tries converting limit to an integer via to_int" do
  234. obj = mock('2')
  235. obj.should_receive(:to_int).and_return(2)
  236. "1.2.3.4".split(".", obj).should == ["1", "2.3.4"]
  237. end
  238. it "returns a type error if limit can't be converted to an integer" do
  239. lambda {"1.2.3.4".split(".", "three")}.should raise_error(TypeError)
  240. lambda {"1.2.3.4".split(".", nil) }.should raise_error(TypeError)
  241. end
  242. it "doesn't set $~" do
  243. $~ = nil
  244. "x:y:z".split(/:/)
  245. $~.should == nil
  246. end
  247. it "returns the original string if no matches are found" do
  248. "foo".split("\n").should == ["foo"]
  249. end
  250. it "returns subclass instances based on self" do
  251. ["", "x:y:z:", " x y "].each do |str|
  252. [//, /:/, /\s+/].each do |pat|
  253. [-1, 0, 1, 2].each do |limit|
  254. StringSpecs::MyString.new(str).split(pat, limit).each do |x|
  255. x.should be_kind_of(StringSpecs::MyString)
  256. end
  257. end
  258. end
  259. end
  260. end
  261. it "does not call constructor on created subclass instances" do
  262. # can't call should_not_receive on an object that doesn't yet exist
  263. # so failure here is signalled by exception, not expectation failure
  264. s = StringSpecs::StringWithRaisingConstructor.new('silly:string')
  265. s.split(/:/).first.should == 'silly'
  266. end
  267. it "taints the resulting strings if self is tainted" do
  268. ["", "x:y:z:", " x y "].each do |str|
  269. [//, /:/, /\s+/].each do |pat|
  270. [-1, 0, 1, 2].each do |limit|
  271. str.dup.taint.split(pat, limit).each do |x|
  272. # See the spec below for why the conditional is here
  273. x.tainted?.should be_true unless x.empty?
  274. end
  275. end
  276. end
  277. end
  278. end
  279. # When split is called with a limit of -1, empty fields are not suppressed
  280. # and a final empty field is *alawys* created (who knows why). This empty
  281. # string is not tainted (again, who knows why) on 1.8 but is on 1.9.
  282. ruby_bug "#", "1.8" do
  283. it "taints an empty string if self is tainted" do
  284. ":".taint.split(//, -1).last.tainted?.should be_true
  285. end
  286. end
  287. it "doesn't taints the resulting strings if the Regexp is tainted" do
  288. ["", "x:y:z:", " x y "].each do |str|
  289. [//, /:/, /\s+/].each do |pat|
  290. [-1, 0, 1, 2].each do |limit|
  291. str.split(pat.dup.taint, limit).each do |x|
  292. x.tainted?.should be_false
  293. end
  294. end
  295. end
  296. end
  297. end
  298. ruby_version_is "1.9" do
  299. it "retains the encoding of the source string" do
  300. ary = "а б в".split
  301. encodings = ary.map { |s| s.encoding }
  302. encodings.should == [Encoding::UTF_8, Encoding::UTF_8, Encoding::UTF_8]
  303. end
  304. it "returns an ArgumentError if an invalid UTF-8 string is supplied" do
  305. broken_str = 'проверка' # in russian, means "test"
  306. broken_str.force_encoding('binary')
  307. broken_str.chop!
  308. broken_str.force_encoding('utf-8')
  309. lambda{ broken_str.split(/\r\n|\r|\n/) }.should raise_error(ArgumentError)
  310. end
  311. end
  312. end