PageRenderTime 75ms CodeModel.GetById 19ms RepoModel.GetById 5ms app.codeStats 0ms

/kernel/common/string18.rb

http://github.com/rubinius/rubinius
Ruby | 879 lines | 444 code | 155 blank | 280 comment | 145 complexity | 0cbc82dc1fddbd9b266cbbe0dfe54c7b MD5 | raw file
Possible License(s): BSD-3-Clause, MPL-2.0-no-copyleft-exception, 0BSD, GPL-2.0, LGPL-2.1
  1. # -*- encoding: us-ascii -*-
  2. class String
  3. include Enumerable
  4. alias_method :bytesize, :size
  5. # Treats leading characters from <i>self</i> as a string of hexadecimal digits
  6. # (with an optional sign and an optional <code>0x</code>) and returns the
  7. # corresponding number. Zero is returned on error.
  8. #
  9. # "0x0a".hex #=> 10
  10. # "-1234".hex #=> -4660
  11. # "0".hex #=> 0
  12. # "wombat".hex #=> 0
  13. def hex
  14. to_inum(16, false)
  15. end
  16. def upto(stop, exclusive=false)
  17. stop = StringValue(stop)
  18. return self if self > stop
  19. after_stop = exclusive ? stop : stop.succ
  20. current = self
  21. until current == after_stop
  22. yield current
  23. current = StringValue(current.succ)
  24. break if current.size > stop.size || current.size == 0
  25. end
  26. self
  27. end
  28. # Reverses <i>self</i> in place.
  29. def reverse!
  30. return self if @num_bytes <= 1
  31. self.modify!
  32. @data.reverse(0, @num_bytes)
  33. self
  34. end
  35. # Deletes the specified portion from <i>self</i>, and returns the portion
  36. # deleted. The forms that take a <code>Fixnum</code> will raise an
  37. # <code>IndexError</code> if the value is out of range; the <code>Range</code>
  38. # form will raise a <code>RangeError</code>, and the <code>Regexp</code> and
  39. # <code>String</code> forms will silently ignore the assignment.
  40. #
  41. # string = "this is a string"
  42. # string.slice!(2) #=> 105
  43. # string.slice!(3..6) #=> " is "
  44. # string.slice!(/s.*t/) #=> "sa st"
  45. # string.slice!("r") #=> "r"
  46. # string #=> "thing"
  47. def slice!(one, two=undefined)
  48. # This is un-DRY, but it's a simple manual argument splitting. Keeps
  49. # the code fast and clean since the sequence are pretty short.
  50. #
  51. if two.equal?(undefined)
  52. result = slice(one)
  53. if one.kind_of? Regexp
  54. lm = Regexp.last_match
  55. self[one] = '' if result
  56. Regexp.last_match = lm
  57. else
  58. self[one] = '' if result
  59. end
  60. else
  61. result = slice(one, two)
  62. if one.kind_of? Regexp
  63. lm = Regexp.last_match
  64. self[one, two] = '' if result
  65. Regexp.last_match = lm
  66. else
  67. self[one, two] = '' if result
  68. end
  69. end
  70. result
  71. end
  72. # Squeezes <i>self</i> in place, returning either <i>self</i>, or
  73. # <code>nil</code> if no changes were made.
  74. def squeeze!(*strings)
  75. return if @num_bytes == 0
  76. self.modify!
  77. table = count_table(*strings).__data__
  78. i, j, last = 1, 0, @data[0]
  79. while i < @num_bytes
  80. c = @data[i]
  81. unless c == last and table[c] == 1
  82. @data[j+=1] = last = c
  83. end
  84. i += 1
  85. end
  86. if (j += 1) < @num_bytes
  87. self.num_bytes = j
  88. self
  89. else
  90. nil
  91. end
  92. end
  93. # Performs the substitutions of <code>String#sub</code> in place,
  94. # returning <i>self</i>, or <code>nil</code> if no substitutions were
  95. # performed.
  96. #
  97. def sub!(pattern, replacement=undefined)
  98. # Copied mostly from sub to keep Regexp.last_match= working right.
  99. if replacement.equal?(undefined) and !block_given?
  100. raise ArgumentError, "wrong number of arguments (1 for 2)"
  101. end
  102. unless pattern
  103. raise ArgumentError, "wrong number of arguments (0 for 2)"
  104. end
  105. if match = get_pattern(pattern, true).match_from(self, 0)
  106. out = match.pre_match
  107. Regexp.last_match = match
  108. if replacement.equal?(undefined)
  109. replacement = yield(match[0].dup).to_s
  110. out.taint if replacement.tainted?
  111. out.append(replacement).append(match.post_match)
  112. else
  113. out.taint if replacement.tainted?
  114. replacement = StringValue(replacement).to_sub_replacement(out, match)
  115. out.append(match.post_match)
  116. end
  117. # We have to reset it again to match the specs
  118. Regexp.last_match = match
  119. out.taint if self.tainted?
  120. else
  121. out = self
  122. Regexp.last_match = nil
  123. return nil
  124. end
  125. replace(out)
  126. return self
  127. end
  128. # Equivalent to <code>String#succ</code>, but modifies the receiver in
  129. # place.
  130. def succ!
  131. self.modify!
  132. return self if @num_bytes == 0
  133. carry = nil
  134. last_alnum = 0
  135. start = @num_bytes - 1
  136. ctype = Rubinius::CType
  137. while start >= 0
  138. s = @data[start]
  139. if ctype.isalnum(s)
  140. carry = 0
  141. if (?0 <= s && s < ?9) ||
  142. (?a <= s && s < ?z) ||
  143. (?A <= s && s < ?Z)
  144. @data[start] += 1
  145. elsif s == ?9
  146. @data[start] = ?0
  147. carry = ?1
  148. elsif s == ?z
  149. @data[start] = carry = ?a
  150. elsif s == ?Z
  151. @data[start] = carry = ?A
  152. end
  153. break if carry == 0
  154. last_alnum = start
  155. end
  156. start -= 1
  157. end
  158. if carry.nil?
  159. start = length - 1
  160. carry = ?\001
  161. while start >= 0
  162. if @data[start] >= 255
  163. @data[start] = 0
  164. else
  165. @data[start] += 1
  166. break
  167. end
  168. start -= 1
  169. end
  170. end
  171. if start < 0
  172. splice! last_alnum, 1, carry.chr + @data[last_alnum].chr
  173. end
  174. return self
  175. end
  176. alias_method :next, :succ
  177. alias_method :next!, :succ!
  178. ##
  179. # call-seq:
  180. # str.unpack(format) => anArray
  181. #
  182. # Decodes <i>str</i> (which may contain binary data) according to
  183. # the format string, returning an array of each value
  184. # extracted. The format string consists of a sequence of
  185. # single-character directives, summarized in the table at the end
  186. # of this entry.
  187. #
  188. # Each directive may be followed by a number, indicating the number
  189. # of times to repeat with this directive. An asterisk
  190. # (``<code>*</code>'') will use up all remaining elements. The
  191. # directives <code>sSiIlL</code> may each be followed by an
  192. # underscore (``<code>_</code>'') to use the underlying platform's
  193. # native size for the specified type; otherwise, it uses a
  194. # platform-independent consistent size. Spaces are ignored in the
  195. # format string. See also <code>Array#pack</code>.
  196. #
  197. # "abc \0\0abc \0\0".unpack('A6Z6') #=> ["abc", "abc "]
  198. # "abc \0\0".unpack('a3a3') #=> ["abc", " \000\000"]
  199. # "abc \0abc \0".unpack('Z*Z*') #=> ["abc ", "abc "]
  200. # "aa".unpack('b8B8') #=> ["10000110", "01100001"]
  201. # "aaa".unpack('h2H2c') #=> ["16", "61", 97]
  202. # "\xfe\xff\xfe\xff".unpack('sS') #=> [-2, 65534]
  203. # "now=20is".unpack('M*') #=> ["now is"]
  204. # "whole".unpack('xax2aX2aX1aX2a') #=> ["h", "e", "l", "l", "o"]
  205. #
  206. # This table summarizes the various formats and the Ruby classes
  207. # returned by each.
  208. #
  209. # Format | Returns | Function
  210. # -------+---------+-----------------------------------------
  211. # A | String | with trailing nulls and spaces removed
  212. # -------+---------+-----------------------------------------
  213. # a | String | string
  214. # -------+---------+-----------------------------------------
  215. # B | String | extract bits from each character (msb first)
  216. # -------+---------+-----------------------------------------
  217. # b | String | extract bits from each character (lsb first)
  218. # -------+---------+-----------------------------------------
  219. # C | Fixnum | extract a character as an unsigned integer
  220. # -------+---------+-----------------------------------------
  221. # c | Fixnum | extract a character as an integer
  222. # -------+---------+-----------------------------------------
  223. # d,D | Float | treat sizeof(double) characters as
  224. # | | a native double
  225. # -------+---------+-----------------------------------------
  226. # E | Float | treat sizeof(double) characters as
  227. # | | a double in little-endian byte order
  228. # -------+---------+-----------------------------------------
  229. # e | Float | treat sizeof(float) characters as
  230. # | | a float in little-endian byte order
  231. # -------+---------+-----------------------------------------
  232. # f,F | Float | treat sizeof(float) characters as
  233. # | | a native float
  234. # -------+---------+-----------------------------------------
  235. # G | Float | treat sizeof(double) characters as
  236. # | | a double in network byte order
  237. # -------+---------+-----------------------------------------
  238. # g | Float | treat sizeof(float) characters as a
  239. # | | float in network byte order
  240. # -------+---------+-----------------------------------------
  241. # H | String | extract hex nibbles from each character
  242. # | | (most significant first)
  243. # -------+---------+-----------------------------------------
  244. # h | String | extract hex nibbles from each character
  245. # | | (least significant first)
  246. # -------+---------+-----------------------------------------
  247. # I | Integer | treat sizeof(int) (modified by _)
  248. # | | successive characters as an unsigned
  249. # | | native integer
  250. # -------+---------+-----------------------------------------
  251. # i | Integer | treat sizeof(int) (modified by _)
  252. # | | successive characters as a signed
  253. # | | native integer
  254. # -------+---------+-----------------------------------------
  255. # L | Integer | treat four (modified by _) successive
  256. # | | characters as an unsigned native
  257. # | | long integer
  258. # -------+---------+-----------------------------------------
  259. # l | Integer | treat four (modified by _) successive
  260. # | | characters as a signed native
  261. # | | long integer
  262. # -------+---------+-----------------------------------------
  263. # M | String | quoted-printable
  264. # -------+---------+-----------------------------------------
  265. # m | String | base64-encoded
  266. # -------+---------+-----------------------------------------
  267. # N | Integer | treat four characters as an unsigned
  268. # | | long in network byte order
  269. # -------+---------+-----------------------------------------
  270. # n | Fixnum | treat two characters as an unsigned
  271. # | | short in network byte order
  272. # -------+---------+-----------------------------------------
  273. # P | String | treat sizeof(char *) characters as a
  274. # | | pointer, and return \emph{len} characters
  275. # | | from the referenced location
  276. # -------+---------+-----------------------------------------
  277. # p | String | treat sizeof(char *) characters as a
  278. # | | pointer to a null-terminated string
  279. # -------+---------+-----------------------------------------
  280. # Q | Integer | treat 8 characters as an unsigned
  281. # | | quad word (64 bits)
  282. # -------+---------+-----------------------------------------
  283. # q | Integer | treat 8 characters as a signed
  284. # | | quad word (64 bits)
  285. # -------+---------+-----------------------------------------
  286. # S | Fixnum | treat two (different if _ used)
  287. # | | successive characters as an unsigned
  288. # | | short in native byte order
  289. # -------+---------+-----------------------------------------
  290. # s | Fixnum | Treat two (different if _ used)
  291. # | | successive characters as a signed short
  292. # | | in native byte order
  293. # -------+---------+-----------------------------------------
  294. # U | Integer | UTF-8 characters as unsigned integers
  295. # -------+---------+-----------------------------------------
  296. # u | String | UU-encoded
  297. # -------+---------+-----------------------------------------
  298. # V | Fixnum | treat four characters as an unsigned
  299. # | | long in little-endian byte order
  300. # -------+---------+-----------------------------------------
  301. # v | Fixnum | treat two characters as an unsigned
  302. # | | short in little-endian byte order
  303. # -------+---------+-----------------------------------------
  304. # w | Integer | BER-compressed integer (see Array.pack)
  305. # -------+---------+-----------------------------------------
  306. # X | --- | skip backward one character
  307. # -------+---------+-----------------------------------------
  308. # x | --- | skip forward one character
  309. # -------+---------+-----------------------------------------
  310. # Z | String | with trailing nulls removed
  311. # | | upto first null with *
  312. # -------+---------+-----------------------------------------
  313. # @ | --- | skip to the offset given by the
  314. # | | length argument
  315. # -------+---------+-----------------------------------------
  316. def unpack(directives)
  317. Rubinius.primitive :string_unpack18
  318. unless directives.kind_of? String
  319. return unpack(StringValue(directives))
  320. end
  321. raise ArgumentError, "invalid directives string: #{directives}"
  322. end
  323. # Removes trailing whitespace from <i>self</i>, returning <code>nil</code> if
  324. # no change was made. See also <code>String#lstrip!</code> and
  325. # <code>String#strip!</code>.
  326. #
  327. # " hello ".rstrip #=> " hello"
  328. # "hello".rstrip! #=> nil
  329. def rstrip!
  330. return if @num_bytes == 0
  331. stop = @num_bytes - 1
  332. while stop >= 0 && @data[stop] == 0
  333. stop -= 1
  334. end
  335. ctype = Rubinius::CType
  336. while stop >= 0 && ctype.isspace(@data[stop])
  337. stop -= 1
  338. end
  339. return if (stop += 1) == @num_bytes
  340. modify!
  341. self.num_bytes = stop
  342. self
  343. end
  344. # Removes leading whitespace from <i>self</i>, returning <code>nil</code> if no
  345. # change was made. See also <code>String#rstrip!</code> and
  346. # <code>String#strip!</code>.
  347. #
  348. # " hello ".lstrip #=> "hello "
  349. # "hello".lstrip! #=> nil
  350. def lstrip!
  351. return if @num_bytes == 0
  352. start = 0
  353. ctype = Rubinius::CType
  354. while start < @num_bytes && ctype.isspace(@data[start])
  355. start += 1
  356. end
  357. return if start == 0
  358. modify!
  359. self.num_bytes -= start
  360. @data.move_bytes start, @num_bytes, 0
  361. self
  362. end
  363. # Processes <i>self</i> as for <code>String#chop</code>, returning <i>self</i>,
  364. # or <code>nil</code> if <i>self</i> is the empty string. See also
  365. # <code>String#chomp!</code>.
  366. def chop!
  367. return if @num_bytes == 0
  368. self.modify!
  369. if @num_bytes > 1 and
  370. @data[@num_bytes-1] == 10 and @data[@num_bytes-2] == 13
  371. self.num_bytes -= 2
  372. else
  373. self.num_bytes -= 1
  374. end
  375. self
  376. end
  377. # Modifies <i>self</i> in place as described for <code>String#chomp</code>,
  378. # returning <i>self</i>, or <code>nil</code> if no modifications were made.
  379. #---
  380. # NOTE: TypeError is raised in String#replace and not in String#chomp! when
  381. # self is frozen. This is intended behaviour.
  382. #+++
  383. def chomp!(sep=undefined)
  384. # special case for performance. No seperator is by far the most common usage.
  385. if sep.equal?(undefined)
  386. return if @num_bytes == 0
  387. Rubinius.check_frozen
  388. c = @data[@num_bytes-1]
  389. if c == 10 # ?\n
  390. self.num_bytes -= 1 if @num_bytes > 1 && @data[@num_bytes-2] == 13 # ?\r
  391. elsif c != 13 # ?\r
  392. return
  393. end
  394. # don't use modify! because it will dup the data when we don't need to.
  395. @hash_value = nil
  396. self.num_bytes -= 1
  397. return self
  398. end
  399. return if sep.nil? || @num_bytes == 0
  400. sep = StringValue sep
  401. if (sep == $/ && sep == DEFAULT_RECORD_SEPARATOR) || sep == "\n"
  402. c = @data[@num_bytes-1]
  403. if c == 10 # ?\n
  404. @num_bytes -= 1 if @num_bytes > 1 && @data[@num_bytes-2] == 13 # ?\r
  405. elsif c != 13 # ?\r
  406. return
  407. end
  408. Rubinius.check_frozen
  409. # don't use modify! because it will dup the data when we don't need to.
  410. @hash_value = nil
  411. self.num_bytes -= 1
  412. elsif sep.size == 0
  413. size = @num_bytes
  414. while size > 0 && @data[size-1] == 10 # ?\n
  415. if size > 1 && @data[size-2] == 13 # ?\r
  416. size -= 2
  417. else
  418. size -= 1
  419. end
  420. end
  421. return if size == @num_bytes
  422. Rubinius.check_frozen
  423. # don't use modify! because it will dup the data when we don't need to.
  424. @hash_value = nil
  425. self.num_bytes = size
  426. else
  427. size = sep.size
  428. return if size > @num_bytes || sep.compare_substring(self, -size, size) != 0
  429. Rubinius.check_frozen
  430. # don't use modify! because it will dup the data when we don't need to.
  431. @hash_value = nil
  432. self.num_bytes -= size
  433. end
  434. return self
  435. end
  436. # Replaces the contents and taintedness of <i>string</i> with the corresponding
  437. # values in <i>other</i>.
  438. #
  439. # s = "hello" #=> "hello"
  440. # s.replace "world" #=> "world"
  441. def replace(other)
  442. # If we're replacing with ourselves, then we have nothing to do
  443. return self if equal?(other)
  444. Rubinius.check_frozen
  445. other = StringValue(other)
  446. @shared = true
  447. other.shared!
  448. @data = other.__data__
  449. self.num_bytes = other.num_bytes
  450. @hash_value = nil
  451. Rubinius::Type.infect(self, other)
  452. end
  453. alias_method :initialize_copy, :replace
  454. # private :initialize_copy
  455. # Returns a new string with the characters from <i>self</i> in reverse order.
  456. #
  457. # "stressed".reverse #=> "desserts"
  458. # Append --- Concatenates the given object to <i>self</i>. If the object is a
  459. # <code>Fixnum</code> between 0 and 255, it is converted to a character before
  460. # concatenation.
  461. #
  462. # a = "hello "
  463. # a << "world" #=> "hello world"
  464. # a.concat(33) #=> "hello world!"
  465. def <<(other)
  466. modify!
  467. unless other.kind_of? String
  468. if other.kind_of?(Integer) && other >= 0 && other <= 255
  469. other = other.chr
  470. else
  471. other = StringValue(other)
  472. end
  473. end
  474. Rubinius::Type.infect(self, other)
  475. append(other)
  476. end
  477. alias_method :concat, :<<
  478. # Splits <i>self</i> using the supplied parameter as the record separator
  479. # (<code>$/</code> by default), passing each substring in turn to the supplied
  480. # block. If a zero-length record separator is supplied, the string is split on
  481. # <code>\n</code> characters, except that multiple successive newlines are
  482. # appended together.
  483. #
  484. # print "Example one\n"
  485. # "hello\nworld".each { |s| p s }
  486. # print "Example two\n"
  487. # "hello\nworld".each('l') { |s| p s }
  488. # print "Example three\n"
  489. # "hello\n\n\nworld".each('') { |s| p s }
  490. #
  491. # <em>produces:</em>
  492. #
  493. # Example one
  494. # "hello\n"
  495. # "world"
  496. # Example two
  497. # "hel"
  498. # "l"
  499. # "o\nworl"
  500. # "d"
  501. # Example three
  502. # "hello\n\n\n"
  503. # "world"
  504. def lines(sep=$/)
  505. return to_enum(:lines, sep) unless block_given?
  506. # weird edge case.
  507. if sep.nil?
  508. yield self
  509. return self
  510. end
  511. sep = StringValue(sep)
  512. pos = 0
  513. size = @num_bytes
  514. orig_data = @data
  515. # If the separator is empty, we're actually in paragraph mode. This
  516. # is used so infrequently, we'll handle it completely separately from
  517. # normal line breaking.
  518. if sep.empty?
  519. sep = "\n\n"
  520. pat_size = 2
  521. while pos < size
  522. nxt = find_string(sep, pos)
  523. break unless nxt
  524. while @data[nxt] == 10 and nxt < @num_bytes
  525. nxt += 1
  526. end
  527. match_size = nxt - pos
  528. # string ends with \n's
  529. break if pos == @num_bytes
  530. str = byteslice(pos, match_size)
  531. yield str unless str.empty?
  532. # detect mutation within the block
  533. if !@data.equal?(orig_data) or @num_bytes != size
  534. raise RuntimeError, "string modified while iterating"
  535. end
  536. pos = nxt
  537. end
  538. # No more separates, but we need to grab the last part still.
  539. fin = byteslice(pos, @num_bytes - pos)
  540. yield fin if fin and !fin.empty?
  541. else
  542. # This is the normal case.
  543. pat_size = sep.size
  544. while pos < size
  545. nxt = find_string(sep, pos)
  546. break unless nxt
  547. match_size = nxt - pos
  548. str = byteslice(pos, match_size + pat_size)
  549. yield str unless str.empty?
  550. # detect mutation within the block
  551. if !@data.equal?(orig_data) or @num_bytes != size
  552. raise RuntimeError, "string modified while iterating"
  553. end
  554. pos = nxt + pat_size
  555. end
  556. # No more separates, but we need to grab the last part still.
  557. fin = byteslice(pos, @num_bytes - pos)
  558. yield fin unless fin.empty?
  559. end
  560. self
  561. end
  562. alias_method :each_line, :lines
  563. alias_method :each, :lines
  564. # Returns a copy of <i>self</i> with <em>all</em> occurrences of <i>pattern</i>
  565. # replaced with either <i>replacement</i> or the value of the block. The
  566. # <i>pattern</i> will typically be a <code>Regexp</code>; if it is a
  567. # <code>String</code> then no regular expression metacharacters will be
  568. # interpreted (that is <code>/\d/</code> will match a digit, but
  569. # <code>'\d'</code> will match a backslash followed by a 'd').
  570. #
  571. # If a string is used as the replacement, special variables from the match
  572. # (such as <code>$&</code> and <code>$1</code>) cannot be substituted into it,
  573. # as substitution into the string occurs before the pattern match
  574. # starts. However, the sequences <code>\1</code>, <code>\2</code>, and so on
  575. # may be used to interpolate successive groups in the match.
  576. #
  577. # In the block form, the current match string is passed in as a parameter, and
  578. # variables such as <code>$1</code>, <code>$2</code>, <code>$`</code>,
  579. # <code>$&</code>, and <code>$'</code> will be set appropriately. The value
  580. # returned by the block will be substituted for the match on each call.
  581. #
  582. # The result inherits any tainting in the original string or any supplied
  583. # replacement string.
  584. #
  585. # "hello".gsub(/[aeiou]/, '*') #=> "h*ll*"
  586. # "hello".gsub(/([aeiou])/, '<\1>') #=> "h<e>ll<o>"
  587. # "hello".gsub(/./) { |s| s[0].to_s + ' ' } #=> "104 101 108 108 111 "
  588. def gsub(pattern, replacement=undefined)
  589. unless block_given? or replacement != undefined
  590. return to_enum(:gsub, pattern, replacement)
  591. end
  592. tainted = false
  593. if replacement.equal?(undefined)
  594. use_yield = true
  595. else
  596. tainted = replacement.tainted?
  597. replacement = StringValue(replacement)
  598. tainted ||= replacement.tainted?
  599. use_yield = false
  600. end
  601. pattern = get_pattern(pattern, true)
  602. orig_len = @num_bytes
  603. orig_data = @data
  604. last_end = 0
  605. offset = nil
  606. ret = byteslice(0, 0) # Empty string and string subclass
  607. last_match = nil
  608. match = pattern.match_from self, last_end
  609. if match
  610. ma_range = match.full
  611. ma_start = ma_range.at(0)
  612. ma_end = ma_range.at(1)
  613. offset = ma_start
  614. end
  615. while match
  616. nd = ma_start - 1
  617. pre_len = nd-last_end+1
  618. if pre_len > 0
  619. ret.append byteslice(last_end, pre_len)
  620. end
  621. if use_yield
  622. Regexp.last_match = match
  623. val = yield match.to_s
  624. val = val.to_s unless val.kind_of?(String)
  625. tainted ||= val.tainted?
  626. ret.append val
  627. if !@data.equal?(orig_data) or @num_bytes != orig_len
  628. raise RuntimeError, "string modified"
  629. end
  630. else
  631. replacement.to_sub_replacement(ret, match)
  632. end
  633. tainted ||= val.tainted?
  634. last_end = ma_end
  635. if ma_start == ma_end
  636. if char = find_character(offset)
  637. offset += char.size
  638. else
  639. offset += 1
  640. end
  641. else
  642. offset = ma_end
  643. end
  644. last_match = match
  645. match = pattern.match_from self, offset
  646. break unless match
  647. ma_range = match.full
  648. ma_start = ma_range.at(0)
  649. ma_end = ma_range.at(1)
  650. offset = ma_start
  651. end
  652. Regexp.last_match = last_match
  653. str = byteslice(last_end, @num_bytes-last_end+1)
  654. ret.append str if str
  655. ret.taint if tainted || self.tainted?
  656. return ret
  657. end
  658. # Performs the substitutions of <code>String#gsub</code> in place, returning
  659. # <i>self</i>, or <code>nil</code> if no substitutions were performed.
  660. def gsub!(pattern, replacement=undefined)
  661. # Because of the behavior of $~, this is duplicated from gsub! because
  662. # if we call gsub! from gsub, the last_match can't be updated properly.
  663. if undefined.equal? replacement
  664. unless block_given?
  665. return to_enum(:gsub, pattern, replacement)
  666. end
  667. tainted = false
  668. else
  669. tainted = replacement.tainted?
  670. unless replacement.kind_of? String
  671. replacement = StringValue(replacement)
  672. tainted ||= replacement.tainted?
  673. end
  674. end
  675. pattern = get_pattern(pattern, true) unless pattern.kind_of? Regexp
  676. match = pattern.search_region(self, 0, @num_bytes, true)
  677. return nil unless match
  678. orig_len = @num_bytes
  679. last_end = 0
  680. offset = nil
  681. last_match = nil
  682. ret = byteslice(0, 0) # Empty string and string subclass
  683. offset = match.begin 0 if match
  684. while match
  685. if str = match.pre_match_from(last_end)
  686. ret.append str
  687. end
  688. if replacement.equal?(undefined)
  689. Regexp.last_match = match
  690. val = yield(match[0]).to_s
  691. tainted ||= val.tainted?
  692. ret.append val
  693. raise RuntimeError, "string modified" unless @num_bytes == orig_len
  694. else
  695. replacement.to_sub_replacement(ret, match)
  696. end
  697. tainted ||= val.tainted?
  698. last_end = match.end(0)
  699. if match.collapsing?
  700. if char = find_character(offset)
  701. offset += char.size
  702. else
  703. offset += 1
  704. end
  705. else
  706. offset = match.end(0)
  707. end
  708. last_match = match
  709. match = pattern.match_from self, offset
  710. break unless match
  711. offset = match.begin 0
  712. end
  713. Regexp.last_match = last_match
  714. str = byteslice(last_end, @num_bytes-last_end+1)
  715. ret.append str if str
  716. ret.taint if tainted || self.tainted?
  717. if last_match
  718. replace(ret)
  719. return self
  720. else
  721. return nil
  722. end
  723. end
  724. end