PageRenderTime 57ms CodeModel.GetById 17ms RepoModel.GetById 0ms app.codeStats 0ms

/src/kernel/bootstrap/String.rb

https://github.com/MagLev/maglev
Ruby | 2116 lines | 1656 code | 181 blank | 279 comment | 184 complexity | af1c14ac7168ea75c9bb3a038240685b MD5 | raw file
Possible License(s): LGPL-2.1

Large files files are truncated, but you can click here to view the full file

  1. class String
  2. def to_rx
  3. Regexp.new(self)
  4. end
  5. primitive_nobridge '__copyfrom_to', 'copyFrom:to:'
  6. primitive_nobridge '__findStringStartingAt', 'findString:startingAt:'
  7. primitive_nobridge '__md5sum', 'md5sumDigest' # used by lib file digest/md5.rb
  8. primitive_nobridge '__remove_from_to', 'removeFrom:to:'
  9. class_primitive_nobridge '__withAll', 'withAll:'
  10. class_primitive_nobridge '__alloc', '_basicNew'
  11. class_primitive_nobridge '__new', 'new:'
  12. def self.new(*args)
  13. # this version gets bridge methods
  14. len = args.__size
  15. str = self.__alloc
  16. str.initialize(*args)
  17. str
  18. end
  19. def self.new(str)
  20. # implement commonly used variant for performance
  21. if self._equal?(String)
  22. if str._isString
  23. s = __withAll(str)
  24. else
  25. s = __alloc
  26. str = Maglev::Type.coerce_to(str, String, :to_str)
  27. s.replace(str)
  28. end
  29. else
  30. s = __alloc
  31. end
  32. s.initialize(str)
  33. s
  34. end
  35. def self.new()
  36. # implement commonly used variant for performance
  37. s = __alloc
  38. s.initialize
  39. s
  40. end
  41. def initialize(*args, &block)
  42. # this version gets bridge methods , block is ignored
  43. len = args.length
  44. # Do nothing for zero args (return self)
  45. if len._equal?(1)
  46. if self.class._equal?(String)
  47. # do nothing
  48. else
  49. str = Maglev::Type.coerce_to(args[0], String, :to_str)
  50. self.replace(str)
  51. end
  52. elsif len > 1
  53. raise ArgumentError, 'too many args'
  54. end
  55. self
  56. end
  57. def initialize(str)
  58. # implement commonly used variant for performance
  59. if self.class._equal?(String)
  60. # do nothing
  61. else
  62. str = Maglev::Type.coerce_to(str, String, :to_str)
  63. self.replace(str)
  64. end
  65. self
  66. end
  67. def initialize(str, &block)
  68. # implement commonly used variant for performance, ignore block
  69. if self.class._equal?(String)
  70. # do nothing
  71. else
  72. str = Maglev::Type.coerce_to(str, String, :to_str)
  73. self.replace(str)
  74. end
  75. self
  76. end
  77. def initialize
  78. # implement commonly used variant for performance
  79. self
  80. end
  81. def initialize(&block)
  82. # implement commonly used variant for performance, ignore block
  83. self
  84. end
  85. primitive '__basic_dup', '_rubyBasicDup' # use non-singleton class
  86. def dup
  87. res = self.__basic_dup
  88. res.initialize_copy(self)
  89. res
  90. end
  91. def signal
  92. raise RuntimeError, self
  93. end
  94. # Class Methods
  95. # Instance Methods
  96. def %(arg)
  97. unless arg._isArray
  98. arg = [ arg ]
  99. end
  100. sprintf(self, *arg)
  101. end
  102. def *(n)
  103. n = Maglev::Type.coerce_to(n, Integer, :to_int)
  104. unless n._isFixnum
  105. if n._isInteger
  106. raise RangeError , 'arg exceeds max Fixnum'
  107. end
  108. end
  109. if (n < 0)
  110. raise ArgumentError , 'arg must be positive'
  111. end
  112. str = self.class.__alloc
  113. if n >= 64
  114. # optimization to reduce number of iterations for large n
  115. kstr = self.class.__alloc
  116. kstr.__append_internal(self)
  117. k = 1
  118. klim = n.__divide(16)
  119. # grow kstr to max of ( 1/16 of result size , 16K bytes)
  120. while k < klim && kstr.__size < 8000
  121. kstr.__append_internal(kstr)
  122. k = k * 2
  123. end
  124. while n > k
  125. str.__append_internal(kstr)
  126. n -= k
  127. end
  128. end
  129. while n > 0
  130. str.__append_internal(self)
  131. n -= 1
  132. end
  133. str
  134. end
  135. primitive '+', 'rubyConcatenate:'
  136. # note smalltalk addAll: returns arg, not receiver
  137. primitive '__append', '_rubyAddAll:'
  138. def <<(arg)
  139. __append_internal(arg)
  140. end
  141. def __append_internal(arg)
  142. # raise TypeError, "<<: can't modify frozen string" if self.frozen?
  143. # frozen checked in __append primitive
  144. if arg._isFixnum
  145. # raise TypeError, "<<: #{arg} out of range" if arg < 0 or arg > 255 # in prim
  146. # range checked in __append primitive
  147. other = arg
  148. else
  149. other = Maglev::Type.coerce_to(arg, String, :to_str)
  150. end
  151. self.__append(other)
  152. # self.taint if other.tainted?
  153. self
  154. end
  155. primitive_env '<=>', '_rubyCompare' , ':'
  156. def __prim_compare_failed(o)
  157. # invoked from Smalltalk code in _rubyCompare<env>:
  158. return nil unless o.respond_to?(:to_str) && o.respond_to?(:<=>)
  159. return nil unless tmp = (o <=> self)
  160. return -tmp
  161. end
  162. primitive_nobridge '__uppercaseAt', 'rubyUpperCaseAt:' # arg is one-based
  163. primitive 'bytesize', 'size' # added for 1.8.7
  164. def bytes(&block) # added for 1.8.7
  165. unless block_given?
  166. return StringByteEnumerator.new(self, :bytes)
  167. end
  168. arr = [1]
  169. broke = false
  170. ea_res = arr.each { |ignore|
  171. n = 0
  172. lim = self.__size
  173. while n < lim
  174. ch = self.__at(n)
  175. broke = true
  176. block.call( ch )
  177. broke = false
  178. n += 1
  179. end
  180. }
  181. if broke
  182. return ea_res # the argument block did a break
  183. end
  184. self
  185. end
  186. def chars(&block) # added for 1.8.7
  187. # Maglev not yet KCODE aware
  188. unless block_given?
  189. return StringCharEnumerator.new(self, :chars)
  190. end
  191. arr = [1]
  192. broke = false
  193. ea_res = arr.each { |ignore|
  194. n = 0
  195. lim = self.__size
  196. while n < lim
  197. str = ' '
  198. str[0] = self.__at(n)
  199. broke = true
  200. block.call( str )
  201. broke = false
  202. n += 1
  203. end
  204. }
  205. if broke
  206. return ea_res # the argument block did a break
  207. end
  208. self
  209. end
  210. def casecmp(o)
  211. # case-insensitive version of String#<=>
  212. unless o._isString
  213. if o._equal?(nil)
  214. return nil
  215. end
  216. return nil if o._isSymbol
  217. o = Maglev::Type.coerce_to(o, String, :to_str)
  218. end
  219. i = 1
  220. o_size = o.__size
  221. lim = size > o_size ? o_size : size # lim is the min
  222. while i <= lim
  223. sc = self.__uppercaseAt(i)
  224. oc = o.__uppercaseAt(i)
  225. result = sc <=> oc
  226. return result unless result._equal?(0)
  227. i += 1
  228. end
  229. return size <=> o_size
  230. end
  231. primitive_env '==', '_rubyEqual' , ':'
  232. # primitive assumes nil.respond_to?(:to_str) == false
  233. # primitive assumes a_symbol.respond_to?(:to_str) == false
  234. primitive_env '===', '_rubyEqual' , ':' # === same as == for String
  235. def __prim_equal_failed(other)
  236. # invoked from Smalltalk code in _rubyEqual<env>:
  237. if other.respond_to? :to_str
  238. other == self # per specs
  239. else
  240. false
  241. end
  242. end
  243. # str =~ obj => fixnum or nil
  244. #
  245. # Match---If <i>obj</i> is a <code>Regexp</code>, use it as a pattern to match
  246. # against <i>str</i>,and returns the position the match starts, or
  247. # <code>nil</code> if there is no match. Otherwise, invokes
  248. # <i>obj.=~</i>, passing <i>str</i> as an argument. The default
  249. # <code>=~</code> in <code>Object</code> returns <code>false</code>.
  250. #
  251. # "cat o' 9 tails" =~ /\d/ #=> 7
  252. # "cat o' 9 tails" =~ 9 #=> false
  253. def =~(*args, &block)
  254. # only one-arg call supported. any other invocation
  255. # will have a bridge method interposed which would
  256. # require different args to __storeRubyVcGlobal
  257. raise ArgumentError, 'expected 1 arg'
  258. end
  259. def =~(other)
  260. # no bridge method for this variant
  261. # =~ is mostly translated to :match Sexpression by parser ...
  262. if other._isRegexp
  263. m = other.__search(self, 0, nil)
  264. m.__storeRubyVcGlobal(0x20) # store into caller's $~
  265. if (m)
  266. return m.begin(0)
  267. end
  268. m
  269. elsif other._isString
  270. raise TypeError, 'String given'
  271. else
  272. other =~ self
  273. end
  274. end
  275. def [](*args)
  276. # This variant gets bridge methods
  277. raise ArgumentError, 'wrong number of arguments'
  278. end
  279. primitive_nobridge_env '[]' , '_rubyAt', ':'
  280. primitive_nobridge_env '__at' , '_rubyAt', ':'
  281. def __prim_at_failed(index)
  282. # invoked from prim failure code in _rubyAt<env>:
  283. if index._isRange
  284. arr = index.__beg_len(self.__size)
  285. if arr._equal?(nil)
  286. nil
  287. else
  288. self.__at( arr[0] , arr[1] )
  289. end
  290. elsif index._isInteger
  291. raise ArgumentError, 'String#[index] primitive failed'
  292. else
  293. index = Maglev::Type.__coerce_to_Fixnum_to_int(index)
  294. self.__at(index)
  295. end
  296. end
  297. primitive_nobridge_env '[]' , '_rubyAt', ':length:'
  298. primitive_nobridge_env '__at' , '_rubyAt', ':length:'
  299. def __prim_at_length_failed(start, length)
  300. # called from Smalltalk primitive failure code
  301. if start._isRegexp
  302. arr = self.__match_regexp(start, length, 0x40) # arr is [m_begin, m_len]
  303. return nil if arr._equal?(nil)
  304. # no tainted logic
  305. self.__at( arr[0] , arr[1] )
  306. else
  307. if start._isFixnum
  308. if length._isFixnum
  309. raise ArgumentError, 'String#[start,length] primitive failed'
  310. else
  311. length = Maglev::Type.__coerce_to_Fixnum_to_int(length)
  312. end
  313. else
  314. start = Maglev::Type.__coerce_to_Fixnum_to_int(start)
  315. length = Maglev::Type.coerce_to(length, Fixnum, :to_int)
  316. end
  317. # no tainted logic
  318. return nil if length < 0
  319. self.__at(start, length)
  320. end
  321. end
  322. def []=(*args)
  323. # This variant gets bridge methods
  324. na = args.__size
  325. if na._equal?(2)
  326. self[args.__at(0)] = args.__at(1)
  327. elsif na._equal?(3)
  328. self[args.__at(0), args.__at(1)] = args.__at(2)
  329. else
  330. raise ArgumentError, 'expected 2 or 3 args'
  331. end
  332. end
  333. primitive_nobridge_env '[]=', '_rubyAt', ':put:'
  334. primitive_nobridge_env '__at_put', '_rubyAt', ':put:'
  335. # Smalltalk code handles Regexp and String first args
  336. def __prim_at_put_failed(index, value)
  337. # called from Smalltalk
  338. if value._isFixnum || value._isString
  339. # ok
  340. else
  341. value = Maglev::Type.__coerce_to_String_to_str( value )
  342. val_coerced = true
  343. end
  344. if index._isFixnum
  345. unless val_coerced._equal?(true)
  346. raise IndexError, ('String#[index]=, ' + " index #{index} out of range")
  347. end
  348. self.__at_put(index, value)
  349. elsif index._isRange
  350. arr = index.__beg_len(self.__size)
  351. if arr._equal?(nil)
  352. raise IndexError, ('String#[range]=' + "start out of range for range=#{index}")
  353. else
  354. self.__at_length_put( arr[0] , arr[1], value)
  355. end
  356. else
  357. index = Maglev::Type.coerce_to(index, Fixnum, :to_int)
  358. self.__at_put(index, value)
  359. end
  360. # taint if value.tainted?
  361. value
  362. end
  363. primitive_nobridge_env '[]=', '_rubyAt', ':length:put:'
  364. primitive_nobridge_env '__at_length_put', '_rubyAt', ':length:put:'
  365. # smalltalk code handles Regexp and Fixnum first args
  366. def __prim_at_length_put_failed(index, count, value)
  367. index = Maglev::Type.coerce_to(index, Fixnum, :to_int)
  368. str_value = Maglev::Type.coerce_to(value, String, :to_str)
  369. count = Maglev::Type.coerce_to(count, Fixnum, :to_int)
  370. self.__at_length_put(idx, count, str_value)
  371. # no taint logic
  372. end
  373. # MNI: String#~
  374. primitive '__capitalize', 'rubyCapitalize'
  375. def capitalize
  376. x = __capitalize
  377. # x.taint if self.tainted?
  378. x
  379. end
  380. def capitalize!
  381. raise TypeError, "can't modify frozen string" if frozen?
  382. x = __capitalize
  383. return nil if x == self
  384. replace(x)
  385. end
  386. def center(width, padstr = " ") # from Rubinius
  387. centered = self.dup
  388. centered.justify(width, :center, padstr)
  389. end
  390. # str.chomp(separator=$/) => new_str
  391. #
  392. # Returns a new <code>String</code> with the given record separator removed
  393. # from the end of <i>str</i> (if present). If <code>$/</code> has not been
  394. # changed from the default Ruby record separator, then <code>chomp</code> also
  395. # removes carriage return characters (that is it will remove <code>\n</code>,
  396. # <code>\r</code>, and <code>\r\n</code>).
  397. #
  398. # "hello".chomp #=> "hello"
  399. # "hello\n".chomp #=> "hello"
  400. # "hello\r\n".chomp #=> "hello"
  401. # "hello\n\r".chomp #=> "hello\n"
  402. # "hello\r".chomp #=> "hello"
  403. # "hello \n there".chomp #=> "hello \n there"
  404. # "hello".chomp("llo") #=> "he"
  405. def chomp(separator=$/)
  406. str = self.dup
  407. res = str.chomp!(separator)
  408. if res._equal?(nil)
  409. res = str
  410. end
  411. res
  412. end
  413. # str.chomp!(separator=$/) => str or nil
  414. #
  415. # Modifies <i>str</i> in place as described for <code>String#chomp</code>,
  416. # returning <i>str</i>, or <code>nil</code> if no modifications were made.
  417. def chomp!(sep=$/)
  418. return nil if sep._equal?(nil)
  419. my_size = self.__size
  420. return nil if my_size._equal?(0)
  421. sep = Maglev::Type.coerce_to(sep, String, :to_str)
  422. if sep == "\n"
  423. last_ch = self.__at(-1)
  424. diminish_by = 0
  425. if last_ch.eql?( ?\n )
  426. diminish_by += 1 if self.__at(-2).eql?( ?\r ) && my_size > 1
  427. elsif last_ch.not_eql?( ?\r )
  428. return nil
  429. end
  430. diminish_by += 1
  431. self.__size=(my_size - diminish_by)
  432. else
  433. separator_sz = sep.__size
  434. if separator_sz._equal?(0)
  435. sz = my_size
  436. while sz > 0 && self.__at(sz-1).eql?( ?\n )
  437. if sz > 1 && self.__at(sz-2).eql?( ?\r )
  438. sz -= 2
  439. else
  440. sz -= 1
  441. end
  442. end
  443. return nil if sz._equal?( my_size )
  444. self.__size=(sz)
  445. else
  446. sep_size = separator_sz
  447. sz = my_size
  448. return nil if sep_size > sz
  449. sep_size = -sep_size
  450. while sep_size < 0
  451. return nil if sep.__at(sep_size) != self.__at(sep_size)
  452. sep_size += 1
  453. end
  454. self.__size=(sz - separator_sz)
  455. end
  456. end
  457. self
  458. end
  459. # call-seq:
  460. # str.chop => new_str
  461. #
  462. # Returns a new <code>String</code> with the last character removed. If the
  463. # string ends with <code>\r\n</code>, both characters are removed. Applying
  464. # <code>chop</code> to an empty string returns an empty
  465. # string. <code>String#chomp</code> is often a safer alternative, as it leaves
  466. # the string unchanged if it doesn't end in a record separator.
  467. #
  468. # "string\r\n".chop #=> "string"
  469. # "string\n\r".chop #=> "string\n"
  470. # "string\n".chop #=> "string"
  471. # "string".chop #=> "strin"
  472. # "x".chop.chop #=> ""
  473. def chop
  474. str = self.class.__withAll(self) # preserve species
  475. str.chop!
  476. str
  477. end
  478. # str.chop! => str or nil
  479. #
  480. # Processes <i>str</i> as for <code>String#chop</code>, returning <i>str</i>,
  481. # or <code>nil</code> if <i>str</i> is the empty string. See also
  482. # <code>String#chomp!</code>.
  483. def chop!
  484. my_size = self.__size
  485. if my_size._not_equal?( 0 )
  486. if self.__at(-1).eql?( ?\n )
  487. if my_size > 1 && self.__at(-2).eql?( ?\r )
  488. self.__size=(my_size - 2)
  489. else
  490. self.__size=(my_size - 1)
  491. end
  492. else
  493. self.__size=(my_size - 1)
  494. end
  495. return self
  496. end
  497. return nil # no modification made
  498. end
  499. alias concat <<
  500. # def count(*args); end
  501. # arg to rubyCount: is expected to be an Array , so declare as 'count*'
  502. primitive 'count*', 'rubyCount:'
  503. # MNI: crypt
  504. primitive 'delete*', 'rubyDelete:'
  505. primitive 'delete!*', 'rubyDeleteInPlace:'
  506. # asLowercase is a smalltalk to:do: loop in CharacterCollection
  507. primitive '__downcase', 'asLowercase'
  508. primitive '__downcase!', 'rubyDowncaseInPlace'
  509. def downcase
  510. s = __downcase
  511. # s.taint if self.tainted?
  512. s
  513. end
  514. def downcase!
  515. raise TypeError, "can't modify frozen string" if frozen?
  516. __downcase!
  517. end
  518. primitive '__dumpInto' , 'rubyDumpInto:'
  519. def dump
  520. res = self.class.__alloc
  521. self.__dumpInto(res)
  522. res
  523. end
  524. # Splits <i>self</i> using the supplied parameter as the record separator
  525. # (<code>$/</code> by default), passing each substring in turn to the supplied
  526. # block. If a zero-length record separator is supplied, the string is split on
  527. # <code>\n</code> characters, except that multiple successive newlines are
  528. # appended together.
  529. #
  530. # print "Example one\n"
  531. # "hello\nworld".each {|s| p s}
  532. # print "Example two\n"
  533. # "hello\nworld".each('l') {|s| p s}
  534. # print "Example three\n"
  535. # "hello\n\n\nworld".each('') {|s| p s}
  536. #
  537. # <em>produces:</em>
  538. #
  539. # Example one
  540. # "hello\n"
  541. # "world"
  542. # Example two
  543. # "hel"
  544. # "l"
  545. # "o\nworl"
  546. # "d"
  547. # Example three
  548. # "hello\n\n\n"
  549. # "world"
  550. def each(a_sep=$/, &block)
  551. # Modified Rubinius
  552. unless block_given?
  553. return StringEachEnumerator.new(self, :each , a_sep) # for 1.8.7
  554. end
  555. if a_sep._equal?(nil)
  556. block.call(self)
  557. return self
  558. end
  559. sep = Maglev::Type.coerce_to(a_sep, String, :to_str)
  560. # algorithm replicated in StringEachEnumerator
  561. # id = self.__id__
  562. my_size = self.__size
  563. sep_size = sep.__size
  564. newline = sep_size._equal?(0) ? ?\n : sep.__at(sep_size - 1)
  565. last = 0
  566. i = sep_size
  567. if sep_size._equal?(1)
  568. while i < my_size
  569. if self.__at(i-1)._equal?(newline)
  570. line = self.__at(last, i-last)
  571. block.call( line )
  572. # We don't have a way yet to check if the data was modified...
  573. # modified? id, my_size
  574. last = i
  575. end
  576. i += 1
  577. end
  578. elsif sep_size._equal?(0)
  579. while i < my_size
  580. if self.__at(i).eql?( ?\n )
  581. if self.__at(i+=1).not_eql?( ?\n )
  582. i += 1
  583. next
  584. end
  585. i += 1 while i < my_size && self.__at(i).eql?( ?\n )
  586. end
  587. if i > 0 && self.__at(i-1)._equal?( newline )
  588. line = self.__at(last, i-last)
  589. # line.taint if tainted?
  590. block.call( line )
  591. # We don't have a way yet to check if the data was modified...
  592. #modified? id, my_size
  593. last = i
  594. end
  595. i += 1
  596. end
  597. else
  598. i += 1
  599. while i < my_size
  600. if self.__at(i-1)._equal?(newline) &&
  601. (sep_size < 2 || self.__at_equals( i - sep_size + 1, sep))
  602. line = self.__at(last, i-last)
  603. # line.taint if tainted?
  604. block.call( line )
  605. # We don't have a way yet to check if the data was modified...
  606. #modified? id, my_size
  607. last = i
  608. end
  609. i += 1
  610. end
  611. end
  612. unless last._equal?(my_size)
  613. line = self.__at(last, my_size-last+1)
  614. # line.taint if tainted?
  615. block.call( line)
  616. end
  617. self
  618. end
  619. alias each_line each
  620. def each_byte(&block)
  621. unless block_given?
  622. return ArrayEnumerator.new(self, :each_byte) # for 1.8.7
  623. end
  624. n = 0
  625. # Do not cache size before looping. Specs require
  626. # us to go to new end when string grows or shrinks in the yield.
  627. while n < self.__size
  628. block.call( self.__ordAt(n) )
  629. n = n + 1
  630. end
  631. self
  632. end
  633. alias each_char chars # changed to an alias for 1.8.7
  634. primitive 'empty?', 'isEmpty'
  635. def end_with?(*args) # added for 1.8.7
  636. n = 0
  637. lim = args.__size
  638. my_siz = self.__size
  639. while n < lim
  640. str = args[n]
  641. begin
  642. str = Maglev::Type.coerce_to(str, String, :to_str)
  643. if self.__at_equals(my_siz - str.__size + 1 , str)
  644. return true
  645. end
  646. rescue
  647. # ignore non-coercable arg
  648. end
  649. n += 1
  650. end
  651. false
  652. end
  653. primitive 'eql?', '='
  654. def not_eql?(other)
  655. not self.eql?(other)
  656. end
  657. def _gsub_copyfrom_to(from, match_start)
  658. to = match_start # match_start is zero based
  659. if to > (sz = self.__size)
  660. to = sz
  661. end
  662. self.__copyfrom_to( from + 1 , to )
  663. end
  664. # Returns a copy of <i>self</i> with <em>all</em> occurrences of <i>pattern</i>
  665. # replaced with either <i>replacement</i> or the value of the block. The
  666. # <i>pattern</i> will typically be a <code>Regexp</code>; if it is a
  667. # <code>String</code> then no regular expression metacharacters will be
  668. # interpreted (that is <code>/\d/</code> will match a digit, but
  669. # <code>'\d'</code> will match a backslash followed by a 'd').
  670. #
  671. # If a string is used as the replacement, special variables from the match
  672. # (such as <code>$&</code> and <code>$1</code>) cannot be substituted into it,
  673. # as substitution into the string occurs before the pattern match
  674. # starts. However, the sequences <code>\1</code>, <code>\2</code>, and so on
  675. # may be used to interpolate successive groups in the match.
  676. #
  677. # In the block form, the current match string is passed in as a parameter, and
  678. # variables such as <code>$1</code>, <code>$2</code>, <code>$`</code>,
  679. # <code>$&</code>, and <code>$'</code> will be set appropriately. The value
  680. # returned by the block will be substituted for the match on each call.
  681. #
  682. # "hello".gsub(/[aeiou]/, '*') #=> "h*ll*"
  683. # "hello".gsub(/([aeiou])/, '<\1>') #=> "h<e>ll<o>"
  684. # "hello".gsub(/./) {|s| s[0].to_s + ' '} #=> "104 101 108 108 111 "
  685. #
  686. # Generic version of gsub, for aliasing
  687. # BOTH BRANCHES COPIED FROM SPECIALIZED VERSIONS BELOW
  688. def gsub(regex, replacement=MaglevUndefined, &block)
  689. if !replacement._equal?(MaglevUndefined)
  690. __gsub_perform_substitution(regex, replacement)[0]
  691. elsif block
  692. __gsub_perform_block_substitution(regex, &block)
  693. else
  694. StringGsubEnumerator.new(self, :gsub, regex)
  695. end
  696. end
  697. # specialized version for invocation with block
  698. # COPY TO ELSE BRANCH OF GENERIC VERSION ABOVE IF CHANGED
  699. def gsub(regex, &block)
  700. return StringGsubEnumerator.new(self, :gsub, regex) unless block
  701. __gsub_perform_block_substitution(regex, &block)
  702. end
  703. #-- Returns an array of [newvalue, modified], where modified is true if a
  704. # substitution was performed. The old gsub! code tried to compare self
  705. # == gsub(...) to see is a substitution was performed, but that returned
  706. # incorrect results for something like "replace the last 's' with an 's'"
  707. # (which breaks Rails routing...)
  708. #++
  709. def gsub(regex, replacement)
  710. __gsub_perform_substitution(regex, replacement)[0]
  711. end
  712. def __gsub_perform_substitution(regex, replacement)
  713. # 1. phase convert arguments to correct types
  714. hash = Maglev::Type.__coerce_to_Hash_to_hash_or_nil(replacement)
  715. replacement = Maglev::Type.coerce_to(replacement, String, :to_str) if hash._equal?(nil)
  716. modified = false
  717. # 2. phase: prepare substitution loop
  718. out = self.class.__alloc
  719. out.force_encoding(self.encoding) # TODO: if force encoding is implemented this should be tested
  720. start = 0
  721. pat = self.__get_pattern(regex, true)
  722. last_match = nil
  723. # 3. phase: substitute
  724. pat.__each_match(self) do |match|
  725. modified = true
  726. last_match = match
  727. # append string between matches
  728. out.__append_internal(self._gsub_copyfrom_to(start, match.begin(0)))
  729. if hash
  730. # replace with hash
  731. val = hash[match.to_s]
  732. val = val.to_s unless val.kind_of?(String)
  733. else
  734. # replace with string
  735. val = replacement.__to_sub_replacement(match)
  736. end
  737. val.force_encoding(self.encoding) # TODO: if force encoding is implemented this should be tested
  738. out.__append_internal(val)
  739. start = match.end(0)
  740. end
  741. # append from last match to end of string
  742. out.__append_internal(self.__copyfrom_to(start + 1, self.__size))
  743. last_match.__storeRubyVcGlobal(0x30) # store into caller's $~
  744. return [out, modified]
  745. end
  746. def __gsub_perform_block_substitution(regex, &block)
  747. # if block_given?,
  748. # $~ and related variables will be valid in block if
  749. # blocks's home method and caller's home method are the same
  750. start = 0
  751. out = self.class.__alloc
  752. last_match = nil
  753. self.__get_pattern(regex, true).__each_match_vcgl(self, 0x30) do |match|
  754. last_match = match
  755. out.__append_internal(self._gsub_copyfrom_to(start, match.begin(0)))
  756. saveTilde = block.__fetchRubyVcGlobal(0)
  757. begin
  758. block.__setRubyVcGlobal(0, match)
  759. out.__append_internal(block.call(match.__at(0)).to_s)
  760. ensure
  761. block.__setRubyVcGlobal(0, saveTilde)
  762. end
  763. start = match.end(0)
  764. end
  765. out.__append_internal(self.__copyfrom_to(start + 1, self.__size))
  766. last_match.__storeRubyVcGlobal(0x30) # store into caller's $~
  767. out
  768. end
  769. # From Rubinius
  770. def __to_sub_replacement(match)
  771. index = 0
  772. result = ""
  773. lim = self.__size
  774. while index < lim
  775. current = index
  776. while current < lim && self.__at(current) != ?\\
  777. current += 1
  778. end
  779. result << self.__at(index, current - index)
  780. break if current == lim
  781. # found backslash escape, looking next
  782. if current == lim - 1
  783. result << ?\\ # backslash at end of string
  784. break
  785. end
  786. index = current + 1
  787. cap = self.__at(index)
  788. if cap.eql?( ?& )
  789. result << match.__at(0)
  790. elsif cap.eql?( ?` )
  791. result << match.pre_match
  792. elsif cap.eql?( ?' )
  793. result << match.post_match
  794. elsif cap.eql?( ?+ )
  795. result << match.captures.compact.__at(-1).to_s
  796. elsif cap >= ?0 && cap <= ?9
  797. result << match.__at(cap.to_i).to_s
  798. elsif cap.eql?( ?\\ ) # escaped backslash
  799. result << '\\'
  800. else # unknown escape
  801. result << '\\'
  802. result << cap
  803. end
  804. index += 1
  805. end
  806. return result
  807. end
  808. def __replace_match_with(match, replacement, flag=true)
  809. out = self.class.__alloc
  810. out.__append_internal(self._gsub_copyfrom_to(0, match.begin(0) ))
  811. unless replacement._equal?(nil)
  812. if flag
  813. out.__append_internal(replacement.__to_sub_replacement(match))
  814. else
  815. out.__append_internal(replacement)
  816. end
  817. end
  818. out.__append_internal(self.__copyfrom_to(match.end(0) + 1, self.__size))
  819. out
  820. end
  821. def gsub!(regex, str)
  822. result, modified = __gsub_perform_substitution(regex, str)
  823. unless modified
  824. nil
  825. else
  826. replace(result) # replace detects frozen
  827. end
  828. end
  829. def gsub!(regex, &block)
  830. return StringGsubEnumerator.new(self, :gsub!, regex) unless block
  831. out = __gsub_perform_block_substitution(regex, &block)
  832. if self == out
  833. nil
  834. else
  835. replace(out) # replace detects frozen
  836. end
  837. end
  838. def __delete_underscore_strip
  839. str = self
  840. idx = 1
  841. idx = str.__indexOfByte( ?_.ord , 1 )
  842. unless idx._equal?(0)
  843. str = str.delete('_')
  844. end
  845. str.strip
  846. end
  847. def __delete_underscore
  848. str = self
  849. idx = str.__indexOfByte( ?_.ord , 1 )
  850. unless idx._equal?(0)
  851. str = str.delete('_')
  852. end
  853. str
  854. end
  855. def __zreplace_first_double_underscore
  856. idx = 0
  857. dest_idx = nil
  858. lim = self.__size
  859. ch = self.__at(idx)
  860. while idx < lim
  861. nxt = self.__at(idx + 1)
  862. if ch.eql?( ?_ ) && nxt.eql?( ?_ )
  863. str = self.dup
  864. str[idx] = ?Z
  865. return str
  866. end
  867. idx += 1
  868. ch = nxt
  869. end
  870. self
  871. end
  872. def __delete_single_underscores_strip
  873. str = self.dup
  874. idx = 0
  875. dest_idx = nil
  876. lim = str.__size
  877. ch = str.__at(idx)
  878. while idx < lim
  879. nxt = str.__at(idx + 1)
  880. if ch.eql?( ?_ ) && nxt.not_eql?( ?_ )
  881. dest_idx = idx
  882. break
  883. end
  884. idx += 1
  885. ch = nxt
  886. end
  887. while idx < lim
  888. nxt = str.__at(idx + 1)
  889. if ch.eql?( ?_ ) && nxt.not_eql?( ?_ )
  890. # do not include ch in result
  891. else
  892. str[dest_idx] = ch
  893. dest_idx += 1
  894. end
  895. idx += 1
  896. ch = nxt
  897. end
  898. if dest_idx._not_equal?(nil)
  899. str.size=(dest_idx)
  900. end
  901. str.strip
  902. end
  903. def hex
  904. # Because 0b1 is a proper hex number, rather than the binary number 1,
  905. # we repeat code here and tweak for hex. Only 0X and 0x should be removed.
  906. s = self.__delete_single_underscores_strip # for 1.8.7
  907. s =~ /^([+-]?)(0[xX])?([[:xdigit:]]*)/
  908. sign_str = $1
  909. num = Integer.__from_string_radix( $3 , 16)
  910. if sign_str[0].eql?( ?- )
  911. num = num * -1
  912. end
  913. num
  914. end
  915. primitive 'hash' , 'hash'
  916. def include?(item)
  917. if item._isFixnum
  918. item = item % 256
  919. end
  920. self.index(item)._not_equal?(nil)
  921. end
  922. primitive_nobridge '__indexOfByte', 'indexOfByte:startingAt:' # one-based offset/result
  923. def index(item, offset=MaglevUndefined, &block)
  924. # came here via a bridge from args*
  925. if offset._equal?(MaglevUndefined)
  926. self.__index(item, 0, 0x50)
  927. else
  928. offset = Maglev::Type.coerce_to(offset, Integer, :to_int)
  929. self.__index(item, offset, 0x50)
  930. end
  931. end
  932. def index(item)
  933. # code other variants explicitly so num frames from __index_string
  934. # to caller will be constant
  935. self.__index(item, 0, 0x40)
  936. end
  937. def index(item, &block)
  938. self.__index(item, 0, 0x40)
  939. end
  940. def index(item, offset)
  941. offset = Maglev::Type.coerce_to(offset, Integer, :to_int)
  942. self.__index(item, offset, 0x40)
  943. end
  944. def index(item, offset, &block)
  945. self.__index(item, offset, 0x40)
  946. end
  947. def __index(item, zoffset, vcgl_idx)
  948. my_size = self.__size
  949. zoffset += my_size if zoffset < 0
  950. return nil if zoffset < 0 || zoffset > my_size
  951. if item._isString
  952. return zoffset if item.__size._equal?(0)
  953. st_idx = self.__findStringStartingAt(item, zoffset + 1)
  954. return st_idx._equal?(0) ? nil : st_idx - 1
  955. elsif item._isInteger
  956. return nil if item > 255 || item < 0
  957. st_idx = self.__indexOfByte(item % 256, zoffset + 1)
  958. return st_idx._equal?(0) ? nil : st_idx - 1
  959. elsif item._isRegexp
  960. idx = item.__index_string(self, zoffset, vcgl_idx)
  961. return idx
  962. else
  963. # try to coerce to a number or string and try again,
  964. # will raise TypeError if item is a Symbol .
  965. coerced = Maglev::Type.__coerce_to_string_or_integer(item)
  966. return self.index(coerced, zoffset)
  967. end
  968. end
  969. primitive_nobridge '__insertall_at', 'insertAll:at:'
  970. def insert(index, string)
  971. # account for smalltalk index
  972. index = Maglev::Type.coerce_to(index, Integer, :to_int)
  973. string = Maglev::Type.coerce_to(string, String, :to_str)
  974. idx = index < 0 ? index + size + 2 : index + 1
  975. if idx <= 0 || idx > size + 1
  976. raise IndexError, "index #{index} out of string"
  977. end
  978. __insertall_at(string, idx) # Flip order of parameters
  979. self
  980. end
  981. primitive '__as_symbol', 'asSymbol' # allows zero size Symbols
  982. primitive 'inspect', '_rubyInspect'
  983. def intern
  984. if self.__size._equal?(0)
  985. raise ArgumentError , 'cannot intern zero sized String'
  986. end
  987. if self.__indexOfByte(0, 1)._not_equal?(0)
  988. raise ArgumentError, 'symbol string may not contain `\\0\' '
  989. end
  990. self.__as_symbol
  991. end
  992. # to_sym is aliased to intern, see below
  993. primitive "_paddedToWithString", "padded:to:withString:"
  994. def justify(width, direction, padstr=" ")
  995. # This started off as Rubinius, but was heavily modified since most
  996. # work is done in smalltalk.
  997. padstr = Maglev::Type.coerce_to(padstr, String, :to_str)
  998. raise ArgumentError, "zero width padding" if padstr.__size._equal?(0)
  999. width = Maglev::Type.coerce_to(width, Integer, :to_int) unless width._isFixnum
  1000. sz = size
  1001. if width > sz
  1002. padsize = width - sz
  1003. else
  1004. return dup
  1005. end
  1006. _paddedToWithString(direction, width, padstr)
  1007. # taint if padstr.tainted?
  1008. self
  1009. end
  1010. primitive 'length', 'size'
  1011. alias lines each # added for 1.8.7 , String#each goes away in 1.9
  1012. def ljust(width, padstr = " ") # from Rubinius
  1013. justified = dup
  1014. justified.justify(width, :left, padstr)
  1015. end
  1016. primitive 'lstrip', '_rubyLstrip'
  1017. primitive 'lstrip!', '_rubyLstripInPlace' # in .mcz
  1018. def match(pattern)
  1019. if pattern._isRegexp
  1020. regexp = pattern
  1021. elsif pattern._isString
  1022. regexp = Regexp.new(pattern)
  1023. else
  1024. begin
  1025. regexp = Regexp.new(pattern.to_str)
  1026. rescue StandardError
  1027. raise TypeError, "wrong argument type #{pattern.class} (expected Regexp)"
  1028. end
  1029. end
  1030. regexp.__match_vcglobals(self, 0x30)
  1031. end
  1032. # MNI: next
  1033. # MNI: next!
  1034. def oct
  1035. str = self.__zreplace_first_double_underscore.strip # for 1.8.7
  1036. arr = str.__extract_base(8)
  1037. base = arr.__at(0)
  1038. sign_str = arr.__at(1)
  1039. body = arr.__at(2)
  1040. first_ch = body.__at(0)
  1041. if first_ch.eql?( ?+ ) || first_ch.eql?( ?- )
  1042. return 0 # redundant sign character is not an octal digit
  1043. end
  1044. num = Integer.__from_string_radix(body, base)
  1045. if sign_str[0].eql?( ?- )
  1046. num = num * -1
  1047. end
  1048. num
  1049. end
  1050. def partition(pattern) # added for 1.8.7
  1051. if pattern._isString
  1052. arg_siz = pattern.__size
  1053. if arg_siz._not_equal?(0)
  1054. st_idx = self.__findStringStartingAt(pattern, 1)
  1055. if st_idx._not_equal?(0)
  1056. z_idx = st_idx - 1
  1057. return [self[0, z_idx ] , self[z_idx , arg_siz] , self[z_idx + arg_siz, self.__size]]
  1058. end
  1059. end
  1060. return [ self.dup , '', '' ]
  1061. elsif pattern._isRegexp
  1062. md = pattern.__search(self, 0, nil)
  1063. md.__storeRubyVcGlobal(0x20) # store into caller's $~
  1064. if md._not_equal?(nil)
  1065. idx = md.begin(0)
  1066. mid = md[0]
  1067. return [ self[0, idx], mid , self[idx + mid.__size, self.__size] ]
  1068. end
  1069. return [ self.dup , '', '' ]
  1070. else
  1071. pstr = Maglev::Type.coerce_to(pattern, String, :to_str)
  1072. return self.partition(pstr)
  1073. end
  1074. end
  1075. def partition(&block)
  1076. # reimplement for 1.8.7, otherwise bridge meths hide the implem in Enumerable.
  1077. left = []
  1078. right = []
  1079. each { |o| block.call(o) ? left.push(o) : right.push(o) }
  1080. return [left, right]
  1081. end
  1082. primitive 'replace', '_rubyReplace:'
  1083. primitive 'reverse', 'reverse'
  1084. primitive_nobridge '__reverse_from', '_reverseFrom:'
  1085. def reverse!
  1086. self.__reverse_from(self) # returns self
  1087. end
  1088. primitive_nobridge '__lastSubstring', 'findLastSubString:startingAt:'
  1089. primitive_nobridge '__indexOfLastByte', 'indexOfLastByte:startingAt:'
  1090. # Return the index of the last occurrence of the given substring,
  1091. # character or pattern in self. Returns nil if not found. If the second
  1092. # parameter is present, it specifies the position in the string to end
  1093. # the search -- characters beyond this point will not be considered.
  1094. def rindex(item, offset=MaglevUndefined, &block)
  1095. self.__index(item, offset, 0x50) # came here via a bridge from args*
  1096. end
  1097. def rindex(item)
  1098. # code other variants explicitly so num frames to __rindex_string known
  1099. __rindex(item, MaglevUndefined, 0x40)
  1100. end
  1101. def rindex(item, &block)
  1102. __rindex(item, MaglevUndefined, 0x40)
  1103. end
  1104. def rindex(item, original_offset, &block)
  1105. __rindex(item, original_offset, 0x40)
  1106. end
  1107. def rindex(item, original_offset)
  1108. __rindex(item, original_offset, 0x40)
  1109. end
  1110. def __rindex(item, original_offset, vcgl_idx)
  1111. my_size = self.__size
  1112. if my_size._equal?(0)
  1113. return nil
  1114. end
  1115. if original_offset._equal?(MaglevUndefined)
  1116. was_undef = true
  1117. zoffset = my_size._equal?(0) ? 0 : my_size
  1118. else
  1119. zoffset = Maglev::Type.coerce_to(original_offset, Integer, :to_int)
  1120. zoffset += my_size if zoffset < 0
  1121. end
  1122. return nil if zoffset < 0
  1123. if item._isString
  1124. zorig = zoffset
  1125. zoffset = my_size - 1 if zoffset >= my_size
  1126. if item.__size._equal?(0)
  1127. if was_undef
  1128. return my_size
  1129. elsif zorig >= my_size
  1130. return my_size
  1131. else
  1132. return zoffset
  1133. end
  1134. end
  1135. st_idx = self.__lastSubstring(item, zoffset + 1)
  1136. return st_idx._equal?(0) ? nil : st_idx - 1
  1137. elsif item._isInteger
  1138. return nil if item > 255 || item < 0
  1139. zoffset = my_size - 1 if zoffset >= my_size
  1140. st_idx = self.__indexOfLastByte(item % 256 , zoffset + 1)
  1141. return st_idx._equal?(0) ? nil : st_idx - 1
  1142. elsif item._isRegexp
  1143. zoffset = my_size if zoffset > my_size # allow searching for end of string
  1144. zidx = item.__rindex_string(self, zoffset, vcgl_idx)
  1145. return zidx
  1146. else
  1147. coerced = Maglev::Type.coerce_to(item, String, :to_str)
  1148. return self.rindex(coerced, original_offset)
  1149. end
  1150. end
  1151. def rjust(width, padstr = " ") # from Rubinius
  1152. justified = dup
  1153. justified.justify(width, :right, padstr)
  1154. end
  1155. def rpartition(pattern) # added for 1.8.7
  1156. my_siz = self.__size
  1157. if pattern._isString
  1158. arg_siz = pattern.__size
  1159. if arg_siz._not_equal?(0)
  1160. st_idx = self.__lastSubstring(pattern, my_siz)
  1161. if st_idx._not_equal?(0)
  1162. z_idx = st_idx - 1
  1163. return [self[0, z_idx], self[z_idx , arg_siz], self[z_idx + arg_siz, self.__size]]
  1164. end
  1165. end
  1166. return [ '', '', self.dup ]
  1167. elsif pattern._isRegexp
  1168. md = pattern.__search(self, my_siz , 0)
  1169. md.__storeRubyVcGlobal(0x20) # store into caller's $~
  1170. if md._not_equal?(nil)
  1171. idx = md.begin(0)
  1172. mid = md[0]
  1173. return [ self[0, idx], mid , self[idx + mid.__size, self.__size] ]
  1174. end
  1175. return [ '', '', self.dup ]
  1176. else
  1177. pstr = Maglev::Type.coerce_to(pattern, String, :to_str)
  1178. return self.rpartition(pstr)
  1179. end
  1180. end
  1181. primitive 'rstrip', '_rubyRstrip'
  1182. primitive 'rstrip!', '_rubyRstripInPlace' # in .mcz
  1183. # def scan # implemented in common/string.rb
  1184. primitive 'size', 'size'
  1185. primitive '__size', 'size'
  1186. primitive 'size=', 'size:' # Note size=() not in MRI
  1187. primitive '__size=', 'size:'
  1188. alias slice []
  1189. # str.slice!(fixnum) => fixnum or nil
  1190. # str.slice!(fixnum, fixnum) => new_str or nil
  1191. # str.slice!(range) => new_str or nil
  1192. # str.slice!(regexp) => new_str or nil
  1193. # str.slice!(other_str) => new_str or nil
  1194. #
  1195. # Deletes the specified portion from <i>str</i>, and returns the portion
  1196. # deleted. The forms that take a <code>Fixnum</code> will raise an
  1197. # <code>IndexError</code> if the value is out of range; the <code>Range</code>
  1198. # form will raise a <code>RangeError</code>, and the <code>Regexp</code> and
  1199. # <code>String</code> forms will silently ignore the assignment.
  1200. #
  1201. # string = "this is a string"
  1202. # string.slice!(2) #=> 105
  1203. # string.slice!(3..6) #=> " is "
  1204. # string.slice!(/s.*t/) #=> "sa st"
  1205. # string.slice!("r") #=> "r"
  1206. # string #=> "thing"
  1207. def slice!(a1, a2=MaglevUndefined, &block)
  1208. if a2._equal?(MaglevUndefined)
  1209. self.slice!(a1)
  1210. else
  1211. self.slice!(a1, a2)
  1212. end
  1213. end
  1214. def slice!(start, a_len)
  1215. sz = self.__size
  1216. if start._isRegexp
  1217. arr = self.__match_regexp(start, a_len, 0x30) # arr is [ m_begin, m_len]
  1218. return nil if arr._equal?(nil)
  1219. r = slice!(arr.__at(0), arr.__at(1))
  1220. # r.taint if self.tainted? or start.tainted?
  1221. return r
  1222. end
  1223. start = Maglev::Type.coerce_to(start, Integer, :to_int)
  1224. len = Maglev::Type.coerce_to(a_len, Integer, :to_int)
  1225. return nil if len < 0
  1226. return self.class.__alloc if len._equal?(0)
  1227. start += sz if start < 0
  1228. return nil if start < 0 || start > sz
  1229. return self.class.__alloc if start._equal?(sz)
  1230. # __remove_from_to will detect frozen if changes would occur
  1231. s = __at(start, len)
  1232. stop = start + len
  1233. stop = sz if stop > sz
  1234. __remove_from_to(start + 1, stop) # convert to smalltalk indexing
  1235. if s._equal?(nil)
  1236. return self.class.__alloc
  1237. end
  1238. s
  1239. end
  1240. def slice!(arg)
  1241. # Do NOT check for frozen here...fails specs
  1242. if arg._isRegexp
  1243. md = arg.__search(self, 0, nil) # inline Regexp#match to update $~
  1244. md.__storeRubyVcGlobal(0x20) # store into caller's $~
  1245. return nil if md._equal?(nil)
  1246. raise TypeError, "can't modify frozen string" if self.frozen?
  1247. start = md.begin(0)
  1248. len = md.end(0) - start
  1249. slice!(start, len)
  1250. elsif arg._isRange
  1251. first, len = arg.__beg_len(self.__size)
  1252. return nil if first._equal?(nil)
  1253. slice!(first, len)
  1254. elsif arg._isString
  1255. start = self.__findStringStartingAt(arg, 1)
  1256. return nil if start._equal?(0)
  1257. slice!(start - 1, arg.__size) # adjust coming from smalltalk
  1258. else
  1259. arg = Maglev::Type.coerce_to(arg, Integer, :to_int)
  1260. s = slice!(arg, 1)
  1261. return nil if s._equal?(nil)
  1262. s.__at(0)
  1263. end
  1264. end
  1265. def __match_regexp(regexp, length, vcgl_idx)
  1266. md = regexp.match(self)
  1267. md.__storeRubyVcGlobal( vcgl_idx ) # update $~
  1268. return nil if md._equal?(nil)
  1269. idx = Maglev::Type.coerce_to(length, Integer, :to_int)
  1270. return nil if idx >= md.size or idx < 0
  1271. m_begin = md.begin(idx)
  1272. m_len = md.end(idx) - m_begin
  1273. [m_begin, m_len]
  1274. end
  1275. # call-seq:
  1276. # str.split(pattern=$;, [limit]) => anArray
  1277. #
  1278. # Divides <i>str</i> into substrings based on a delimiter, returning an array
  1279. # of these substrings.
  1280. #
  1281. # If <i>pattern</i> is a <code>String</code>, then its contents are used as
  1282. # the delimiter when splitting <i>str</i>. If <i>pattern</i> is a single
  1283. # space, <i>str</i> is split on whitespace, with leading whitespace and runs
  1284. # of contiguous whitespace characters ignored.
  1285. #
  1286. # If <i>pattern</i> is a <code>Regexp</code>, <i>str</i> is divided where the
  1287. # pattern matches. Whenever the pattern matches a zero-length string,
  1288. # <i>str</i> is split into individual characters.
  1289. #
  1290. # If <i>pattern</i> is omitted, the value of <code>$;</code> is used. If
  1291. # <code>$;</code> is <code>nil</code> (which is the default), <i>str</i> is
  1292. # split on whitespace as if ` ' were specified.
  1293. #
  1294. # If the <i>limit</i> parameter is omitted, trailing null fields are
  1295. # suppressed. If <i>limit</i> is a positive number, at most that number of
  1296. # fields will be returned (if <i>limit</i> is <code>1</code>, the entire
  1297. # string is returned as the only entry in an array). If negative, there is no
  1298. # limit to the number of fields returned, and trailing null fields are not
  1299. # suppressed.
  1300. #
  1301. # " now's the time".split #=> ["now's", "the", "time"]
  1302. # " now's the time".split(' ') #=> ["now's", "the", "time"]
  1303. # " now's the time".split(/ /) #=> ["", "now's", "", "the", "time"]
  1304. # "1, 2.34,56, 7".split(%r{,\s*}) #=> ["1", "2.34", "56", "7"]
  1305. # "hello".split(//) #=> ["h", "e", "l", "l", "o"]
  1306. # "hello".split(//, 3) #=> ["h", "e", "llo"]
  1307. # "hi mom".split(%r{\s*}) #=> ["h", "i", "m", "o", "m"]
  1308. #
  1309. # "mellow yellow".split("ello") #=> ["m", "w y", "w"]
  1310. # "1,2,,3,4,,".split(',') #=> ["1", "2", "", "3", "4"]
  1311. # "1,2,,3,4,,".split(',', 4) #=> ["1", "2", "", "3,4,,"]
  1312. # "1,2,,3,4,,".split(',', -4) #=> ["1", "2", "", "3", "4", "", ""]
  1313. def split(pattern=nil, limit=MaglevUndefined)
  1314. return [] if self.__size._equal?(0)
  1315. if limit._equal?(MaglevUndefined)
  1316. suppress_trailing_empty = true
  1317. limited = false
  1318. limit = nil
  1319. else
  1320. limit = Maglev::Type.coerce_to(limit, Integer, :to_int)
  1321. return [self.dup] if limit._equal?(1)
  1322. limited = limit > 0 ? true : false
  1323. suppress_trailing_empty = limit._equal?(0)
  1324. end
  1325. pattern ||= ($; || " ")
  1326. result = if pattern == ''
  1327. __split_chars(limit, limited, suppress_trailing_empty)
  1328. elsif pattern == ' '
  1329. __split_on_contiguous_whitespace(limit, limited, suppress_trailing_empty)
  1330. elsif pattern._isString
  1331. __split_string_on(pattern, limit, limited, suppress_trailing_empty)
  1332. else
  1333. __split_regex(pattern, limit, limited, suppress_trailing_empty)
  1334. end
  1335. result
  1336. end
  1337. primitive '__at_equals', 'at:equals:' # first arg is one-based offset, no coercion
  1338. def __split_string_on(delim, limit, limited, suppress_trailing_empty)
  1339. results = []
  1340. delim_length = delim.__size
  1341. count = start = current = 0
  1342. num = limited ? limit - 1 : 0
  1343. lim = self.__size
  1344. first_char = delim.__at(0)
  1345. while current < lim
  1346. if self.__at(current).eql?(first_char) and self.__at(current, delim_length).eql?(delim)
  1347. results << self.__at(start, (current - start))
  1348. count += 1
  1349. start = current + delim_length
  1350. current = start
  1351. break if limited and count == num
  1352. else
  1353. current += 1
  1354. end
  1355. end
  1356. results << self.__at(start, (lim-start)) unless limited and count > limit
  1357. if suppress_trailing_empty
  1358. while s = results.last and s.empty?
  1359. results.pop
  1360. end
  1361. end
  1362. results
  1363. end
  1364. def __is_whitespace(char)
  1365. char.eql?( ?\ .ord ) ||
  1366. char.eql?( ?\t.ord) ||
  1367. char.eql?( ?\n.ord) ||
  1368. char.eql?( ?\r.ord) ||
  1369. char.eql?( ?\v.ord)
  1370. end
  1371. # Skip contiguous whitespace starting at index and return the index of
  1372. # the first non-whitespace character. If the end of the string is white
  1373. # space, then the length of the string is returned (i.e., an index past
  1374. # the end).
  1375. def __skip_contiguous_whitespace(index)
  1376. lim = self.__size
  1377. while(index < lim)
  1378. char = self.__ordAt(index)
  1379. return index unless char <= 32 and __is_whitespace(char) # \t \n etc. are less than space which is 32
  1380. index += 1
  1381. end
  1382. return index
  1383. end
  1384. def __split_on_contiguous_whitespace(limit, limited, suppress_trailing_empty)
  1385. results = []
  1386. eos = self.__size
  1387. count = 0
  1388. start = current = __skip_contiguous_whitespace(0)
  1389. num = limited ? limit - 1 : 0
  1390. while current < eos
  1391. char = self.__ordAt(current)
  1392. if char <= 32 and __is_whitespace(char)
  1393. results << self.__at(start, (current - start))
  1394. count += 1
  1395. start = __skip_contiguous_whitespace(current)
  1396. current = start
  1397. break if limited and count == num
  1398. else
  1399. current += 1
  1400. end
  1401. end
  1402. last = self.__at(start, (eos-start))
  1403. results << last unless last.empty? and suppress_trailing_empty
  1404. results
  1405. end
  1406. # Split on each character, honoring the limits.
  1407. def __split_chars(limit, limited, suppress_trailing_empty)
  1408. result = []
  1409. # lim will be the number of single characters in the result. If we are
  1410. # limited, then the last element will be the rest of the string:
  1411. # 'hi!'.split('', 2) # => ['h', 'i!']
  1412. my_siz = self.__size
  1413. lim = my_siz
  1414. lim = (limit-1) if limited and limit < lim
  1415. index = 0
  1416. while index < lim
  1417. result << self.__at(index, 1)
  1418. index += 1
  1419. end
  1420. result << self.__at(index, (my_siz - index)) if limited
  1421. # self[0,0] returns an instance of the recievier: support for sub-classes
  1422. result << self.__at(0,0) unless suppress_trailing_empty || limited
  1423. result
  1424. end
  1425. def __split_regex(pattern, limit, limited, suppress_trailing_empty)
  1426. unless pattern._isRegexp
  1427. pattern = Maglev::Type.coerce_to(pattern, String, :to_str)
  1428. pattern = Regexp.new(Regexp.quote(pattern))
  1429. end
  1430. start = 0
  1431. ret = []
  1432. last_match = nil
  1433. while match = pattern.match_from(self, start)
  1434. break if limited && limit - ret.__size <= 1
  1435. collapsed = match.collapsing?
  1436. if !collapsed || !(match.begin(0)._equal?(0))
  1437. ret << match.pre_match_from(last_match ? last_match.end(0) : 0)
  1438. ret.push(*match.captures.compact)
  1439. end
  1440. if collapsed
  1441. start += 1
  1442. elsif last_match && last_match.collapsing?
  1443. start = match.end(0) + 1
  1444. else
  1445. start = match.end(0)
  1446. end
  1447. last_match = match
  1448. end
  1449. if ! last_match._equal?(nil)
  1450. pm = last_match.post_match
  1451. # self[0,0] returns an instance of the recievier: support for sub-classes
  1452. ret << (pm._equal?(nil) ? self.__at(0,0) : pm)
  1453. elsif ret.empty?
  1454. ret << self.dup
  1455. end
  1456. # Trim from end
  1457. if suppress_trailing_empty
  1458. while s = ret.last and s.empty?
  1459. ret.pop
  1460. end
  1461. end
  1462. # If we are matching the empty string, and we have matches, then
  1463. # we need to tack on the trailing empty string match.
  1464. # self[0,0] returns an instance of the recievier: support for sub-classes
  1465. ret << self.__at(0,0) if ret && limit && limit < 0 && last_match && last_match.collapsing?
  1466. ret = ret.map { |str| self.class.__withAll(str) } if !self.instance_of?(String)
  1467. ret
  1468. end
  1469. primitive 'squeeze*', 'rubySqueeze:'
  1470. primitive_nobridge 'squeeze', 'rubySqueeze'
  1471. primitive 'squeeze!*', 'rubySqueezeSelf:'
  1472. primitive_nobridge 'squeeze!', 'rubySqueezeSelf'
  1473. def start_with?(*args) # added for 1.8.7
  1474. n = 0
  1475. lim = args.__size
  1476. while n < lim
  1477. str = args[n]
  1478. begin
  1479. str = Maglev::Type.coerce_to(str, String, :to_str)
  1480. if self.__at_equals(1 , str)
  1481. return true
  1482. end
  1483. rescue
  1484. # ignore elements of args not coercable
  1485. end
  1486. n += 1
  1487. end
  1488. false
  1489. end
  1490. def start_with?(string)
  1491. begin
  1492. str = Maglev::Type.coerce_to(string, String, :to_str)
  1493. if self.__at_equals(1 , str)
  1494. return true
  1495. end
  1496. rescue
  1497. # ignore arg not coercable
  1498. end
  1499. false
  1500. end
  1501. primitive 'strip', '_rubyStrip'
  1502. primitive 'strip!', '_rubyStripInPlace'
  1503. # Returns a copy of +str+ with the first occurrence of +pattern+ replaced
  1504. # with either +replacement+ or the value of the block. See the
  1505. # description of <tt>String#gsub</tt> for a description of the
  1506. # parameters.
  1507. # If we were to implement
  1508. # def sub(pattern, replacement=MaglevUndefined, &block) ; end
  1509. # to support fully general send or super() ,
  1510. # would still have problems in that
  1511. # we don't know number of frames up stack to find caller's $~
  1512. def sub(pattern, replacement)
  1513. replacement = Maglev::Type.coerce_to(replacement, String, :to_str)
  1514. regex = self.__get_pattern(pattern, true)
  1515. # If pattern is a string, then do NOT interpret regex special characters.
  1516. # stores into caller's $~
  1517. if (match = regex.__match_vcglobals(self, 0x30))
  1518. __replace_match_with(match, replacement)
  1519. else
  1520. dup
  1521. end
  1522. # r.taint if replacement.tainted? || self.tainted?
  1523. end
  1524. def sub(pattern, &block)
  1525. # $~ and related variables will be valid in block if
  1526. # blocks's home method and caller's home method are the same
  1527. regex = self.__get_pattern(pattern, true)
  1528. if (match = regex.__match_vcglobals(self, 0x30))
  1529. res = __replace_match_with(match, block.call(match.__at(0)).to_s)

Large files files are truncated, but you can click here to view the full file