PageRenderTime 62ms CodeModel.GetById 24ms RepoModel.GetById 0ms app.codeStats 0ms

/kernel/common/string19.rb

http://github.com/rubinius/rubinius
Ruby | 987 lines | 533 code | 166 blank | 288 comment | 159 complexity | 8c0355a2416aba23eabd19db83099a68 MD5 | raw file
Possible License(s): BSD-3-Clause, MPL-2.0-no-copyleft-exception, 0BSD, GPL-2.0, LGPL-2.1
  1. # -*- encoding: us-ascii -*-
  2. class String
  3. def self.try_convert(obj)
  4. Rubinius::Type.try_convert obj, String, :to_str
  5. end
  6. def codepoints
  7. return to_enum :codepoints unless block_given?
  8. chars { |c| yield c.ord }
  9. self
  10. end
  11. alias_method :each_codepoint, :codepoints
  12. def encode!(to=undefined, from=undefined, options=nil)
  13. Rubinius.check_frozen
  14. # TODO
  15. if to.equal? undefined
  16. to = Encoding.default_internal
  17. else
  18. to = Rubinius::Type.coerce_to_encoding to
  19. end
  20. force_encoding to
  21. self
  22. end
  23. def encode(to=undefined, from=undefined, options=nil)
  24. dup.encode!(to, from, options)
  25. end
  26. def force_encoding(enc)
  27. @ascii_only = @valid_encoding = nil
  28. @encoding = Rubinius::Type.coerce_to_encoding enc
  29. self
  30. end
  31. def hex
  32. return 0 if self.chars.first == "_"
  33. to_inum(16, false)
  34. end
  35. def prepend(other)
  36. self[0,0] = other
  37. self
  38. end
  39. def upto(stop, exclusive=false)
  40. return to_enum :upto, stop, exclusive unless block_given?
  41. stop = StringValue(stop)
  42. return self if self > stop
  43. if stop.size == 1 && size == 1
  44. after_stop = stop.getbyte(0) + (exclusive ? 0 : 1)
  45. current = getbyte(0)
  46. until current == after_stop
  47. yield current.chr
  48. current += 1
  49. end
  50. else
  51. unless stop.size < size
  52. after_stop = exclusive ? stop : stop.succ
  53. current = self
  54. until current == after_stop
  55. yield current
  56. current = StringValue(current.succ)
  57. break if current.size > stop.size || current.size == 0
  58. end
  59. end
  60. end
  61. self
  62. end
  63. # Reverses <i>self</i> in place.
  64. def reverse!
  65. Rubinius.check_frozen
  66. return self if @num_bytes <= 1
  67. self.modify!
  68. @data.reverse(0, @num_bytes)
  69. self
  70. end
  71. # Squeezes <i>self</i> in place, returning either <i>self</i>, or
  72. # <code>nil</code> if no changes were made.
  73. def squeeze!(*strings)
  74. if strings.first =~ /.+\-.+/
  75. range = strings.first.gsub(/-/, '').split('')
  76. raise ArgumentError, "invalid range #{strings} in string transliteration" unless range == range.sort
  77. end
  78. return if @num_bytes == 0
  79. self.modify!
  80. table = count_table(*strings).__data__
  81. i, j, last = 1, 0, @data[0]
  82. while i < @num_bytes
  83. c = @data[i]
  84. unless c == last and table[c] == 1
  85. @data[j+=1] = last = c
  86. end
  87. i += 1
  88. end
  89. if (j += 1) < @num_bytes
  90. self.num_bytes = j
  91. self
  92. else
  93. nil
  94. end
  95. end
  96. # Performs the substitutions of <code>String#sub</code> in place,
  97. # returning <i>self</i>, or <code>nil</code> if no substitutions were
  98. # performed.
  99. #
  100. def sub!(pattern, replacement=undefined)
  101. # Copied mostly from sub to keep Regexp.last_match= working right.
  102. if replacement.equal?(undefined) and !block_given?
  103. raise ArgumentError, "wrong number of arguments (1 for 2)"
  104. end
  105. unless pattern
  106. raise ArgumentError, "wrong number of arguments (0 for 2)"
  107. end
  108. Rubinius.check_frozen
  109. if match = get_pattern(pattern, true).match_from(self, 0)
  110. out = match.pre_match
  111. Regexp.last_match = match
  112. if replacement.equal?(undefined)
  113. replacement = yield(match[0].dup).to_s
  114. out.taint if replacement.tainted?
  115. out.append(replacement).append(match.post_match)
  116. else
  117. out.taint if replacement.tainted?
  118. replacement = StringValue(replacement).to_sub_replacement(out, match)
  119. out.append(match.post_match)
  120. end
  121. # We have to reset it again to match the specs
  122. Regexp.last_match = match
  123. out.taint if self.tainted?
  124. else
  125. out = self
  126. Regexp.last_match = nil
  127. return nil
  128. end
  129. replace(out)
  130. return self
  131. end
  132. # Deletes the specified portion from <i>self</i>, and returns the portion
  133. # deleted. The forms that take a <code>Fixnum</code> will raise an
  134. # <code>IndexError</code> if the value is out of range; the <code>Range</code>
  135. # form will raise a <code>RangeError</code>, and the <code>Regexp</code> and
  136. # <code>String</code> forms will silently ignore the assignment.
  137. #
  138. # string = "this is a string"
  139. # string.slice!(2) #=> 105
  140. # string.slice!(3..6) #=> " is "
  141. # string.slice!(/s.*t/) #=> "sa st"
  142. # string.slice!("r") #=> "r"
  143. # string #=> "thing"
  144. def slice!(one, two=undefined)
  145. Rubinius.check_frozen
  146. # This is un-DRY, but it's a simple manual argument splitting. Keeps
  147. # the code fast and clean since the sequence are pretty short.
  148. #
  149. if two.equal?(undefined)
  150. result = slice(one)
  151. if one.kind_of? Regexp
  152. lm = Regexp.last_match
  153. self[one] = '' if result
  154. Regexp.last_match = lm
  155. else
  156. self[one] = '' if result
  157. end
  158. else
  159. result = slice(one, two)
  160. if one.kind_of? Regexp
  161. lm = Regexp.last_match
  162. self[one, two] = '' if result
  163. Regexp.last_match = lm
  164. else
  165. self[one, two] = '' if result
  166. end
  167. end
  168. result
  169. end
  170. # Equivalent to <code>String#succ</code>, but modifies the receiver in
  171. # place.
  172. #
  173. # TODO: make encoding aware.
  174. def succ!
  175. self.modify!
  176. return self if @num_bytes == 0
  177. carry = nil
  178. last_alnum = 0
  179. start = @num_bytes - 1
  180. ctype = Rubinius::CType
  181. while start >= 0
  182. s = @data[start]
  183. if ctype.isalnum(s)
  184. carry = 0
  185. if (48 <= s && s < 57) ||
  186. (97 <= s && s < 122) ||
  187. (65 <= s && s < 90)
  188. @data[start] += 1
  189. elsif s == 57
  190. @data[start] = 48
  191. carry = 49
  192. elsif s == 122
  193. @data[start] = carry = 97
  194. elsif s == 90
  195. @data[start] = carry = 65
  196. end
  197. break if carry == 0
  198. last_alnum = start
  199. end
  200. start -= 1
  201. end
  202. if carry.nil?
  203. start = length - 1
  204. carry = 1
  205. while start >= 0
  206. if @data[start] >= 255
  207. @data[start] = 0
  208. else
  209. @data[start] += 1
  210. break
  211. end
  212. start -= 1
  213. end
  214. end
  215. if start < 0
  216. splice! last_alnum, 1, carry.chr + @data[last_alnum].chr
  217. end
  218. return self
  219. end
  220. alias_method :next, :succ
  221. alias_method :next!, :succ!
  222. def to_c
  223. Complexifier.new(self).convert
  224. end
  225. def to_r
  226. Rationalizer.new(self).convert
  227. end
  228. ##
  229. # call-seq:
  230. # str.unpack(format) => anArray
  231. #
  232. # Decodes <i>str</i> (which may contain binary data) according to
  233. # the format string, returning an array of each value
  234. # extracted. The format string consists of a sequence of
  235. # single-character directives, summarized in the table at the end
  236. # of this entry.
  237. #
  238. # Each directive may be followed by a number, indicating the number
  239. # of times to repeat with this directive. An asterisk
  240. # (``<code>*</code>'') will use up all remaining elements. The
  241. # directives <code>sSiIlL</code> may each be followed by an
  242. # underscore (``<code>_</code>'') to use the underlying platform's
  243. # native size for the specified type; otherwise, it uses a
  244. # platform-independent consistent size. Spaces are ignored in the
  245. # format string. See also <code>Array#pack</code>.
  246. #
  247. # "abc \0\0abc \0\0".unpack('A6Z6') #=> ["abc", "abc "]
  248. # "abc \0\0".unpack('a3a3') #=> ["abc", " \000\000"]
  249. # "abc \0abc \0".unpack('Z*Z*') #=> ["abc ", "abc "]
  250. # "aa".unpack('b8B8') #=> ["10000110", "01100001"]
  251. # "aaa".unpack('h2H2c') #=> ["16", "61", 97]
  252. # "\xfe\xff\xfe\xff".unpack('sS') #=> [-2, 65534]
  253. # "now=20is".unpack('M*') #=> ["now is"]
  254. # "whole".unpack('xax2aX2aX1aX2a') #=> ["h", "e", "l", "l", "o"]
  255. #
  256. # This table summarizes the various formats and the Ruby classes
  257. # returned by each.
  258. #
  259. # Format | Returns | Function
  260. # -------+---------+-----------------------------------------
  261. # A | String | with trailing nulls and spaces removed
  262. # -------+---------+-----------------------------------------
  263. # a | String | string
  264. # -------+---------+-----------------------------------------
  265. # B | String | extract bits from each character (msb first)
  266. # -------+---------+-----------------------------------------
  267. # b | String | extract bits from each character (lsb first)
  268. # -------+---------+-----------------------------------------
  269. # C | Fixnum | extract a character as an unsigned integer
  270. # -------+---------+-----------------------------------------
  271. # c | Fixnum | extract a character as an integer
  272. # -------+---------+-----------------------------------------
  273. # d,D | Float | treat sizeof(double) characters as
  274. # | | a native double
  275. # -------+---------+-----------------------------------------
  276. # E | Float | treat sizeof(double) characters as
  277. # | | a double in little-endian byte order
  278. # -------+---------+-----------------------------------------
  279. # e | Float | treat sizeof(float) characters as
  280. # | | a float in little-endian byte order
  281. # -------+---------+-----------------------------------------
  282. # f,F | Float | treat sizeof(float) characters as
  283. # | | a native float
  284. # -------+---------+-----------------------------------------
  285. # G | Float | treat sizeof(double) characters as
  286. # | | a double in network byte order
  287. # -------+---------+-----------------------------------------
  288. # g | Float | treat sizeof(float) characters as a
  289. # | | float in network byte order
  290. # -------+---------+-----------------------------------------
  291. # H | String | extract hex nibbles from each character
  292. # | | (most significant first)
  293. # -------+---------+-----------------------------------------
  294. # h | String | extract hex nibbles from each character
  295. # | | (least significant first)
  296. # -------+---------+-----------------------------------------
  297. # I | Integer | treat sizeof(int) (modified by _)
  298. # | | successive characters as an unsigned
  299. # | | native integer
  300. # -------+---------+-----------------------------------------
  301. # i | Integer | treat sizeof(int) (modified by _)
  302. # | | successive characters as a signed
  303. # | | native integer
  304. # -------+---------+-----------------------------------------
  305. # L | Integer | treat four (modified by _) successive
  306. # | | characters as an unsigned native
  307. # | | long integer
  308. # -------+---------+-----------------------------------------
  309. # l | Integer | treat four (modified by _) successive
  310. # | | characters as a signed native
  311. # | | long integer
  312. # -------+---------+-----------------------------------------
  313. # M | String | quoted-printable
  314. # -------+---------+-----------------------------------------
  315. # m | String | base64-encoded
  316. # -------+---------+-----------------------------------------
  317. # N | Integer | treat four characters as an unsigned
  318. # | | long in network byte order
  319. # -------+---------+-----------------------------------------
  320. # n | Fixnum | treat two characters as an unsigned
  321. # | | short in network byte order
  322. # -------+---------+-----------------------------------------
  323. # P | String | treat sizeof(char *) characters as a
  324. # | | pointer, and return \emph{len} characters
  325. # | | from the referenced location
  326. # -------+---------+-----------------------------------------
  327. # p | String | treat sizeof(char *) characters as a
  328. # | | pointer to a null-terminated string
  329. # -------+---------+-----------------------------------------
  330. # Q | Integer | treat 8 characters as an unsigned
  331. # | | quad word (64 bits)
  332. # -------+---------+-----------------------------------------
  333. # q | Integer | treat 8 characters as a signed
  334. # | | quad word (64 bits)
  335. # -------+---------+-----------------------------------------
  336. # S | Fixnum | treat two (different if _ used)
  337. # | | successive characters as an unsigned
  338. # | | short in native byte order
  339. # -------+---------+-----------------------------------------
  340. # s | Fixnum | Treat two (different if _ used)
  341. # | | successive characters as a signed short
  342. # | | in native byte order
  343. # -------+---------+-----------------------------------------
  344. # U | Integer | UTF-8 characters as unsigned integers
  345. # -------+---------+-----------------------------------------
  346. # u | String | UU-encoded
  347. # -------+---------+-----------------------------------------
  348. # V | Fixnum | treat four characters as an unsigned
  349. # | | long in little-endian byte order
  350. # -------+---------+-----------------------------------------
  351. # v | Fixnum | treat two characters as an unsigned
  352. # | | short in little-endian byte order
  353. # -------+---------+-----------------------------------------
  354. # w | Integer | BER-compressed integer (see Array.pack)
  355. # -------+---------+-----------------------------------------
  356. # X | --- | skip backward one character
  357. # -------+---------+-----------------------------------------
  358. # x | --- | skip forward one character
  359. # -------+---------+-----------------------------------------
  360. # Z | String | with trailing nulls removed
  361. # | | upto first null with *
  362. # -------+---------+-----------------------------------------
  363. # @ | --- | skip to the offset given by the
  364. # | | length argument
  365. # -------+---------+-----------------------------------------
  366. def unpack(directives)
  367. Rubinius.primitive :string_unpack19
  368. unless directives.kind_of? String
  369. return unpack(StringValue(directives))
  370. end
  371. raise ArgumentError, "invalid directives string: #{directives}"
  372. end
  373. # Removes trailing whitespace from <i>self</i>, returning <code>nil</code> if
  374. # no change was made. See also <code>String#lstrip!</code> and
  375. # <code>String#strip!</code>.
  376. #
  377. # " hello ".rstrip #=> " hello"
  378. # "hello".rstrip! #=> nil
  379. def rstrip!
  380. Rubinius.check_frozen
  381. return if @num_bytes == 0
  382. stop = @num_bytes - 1
  383. ctype = Rubinius::CType
  384. while stop >= 0 && (@data[stop] == 0 || ctype.isspace(@data[stop]))
  385. stop -= 1
  386. end
  387. return if (stop += 1) == @num_bytes
  388. modify!
  389. self.num_bytes = stop
  390. self
  391. end
  392. # Removes leading whitespace from <i>self</i>, returning <code>nil</code> if no
  393. # change was made. See also <code>String#rstrip!</code> and
  394. # <code>String#strip!</code>.
  395. #
  396. # " hello ".lstrip #=> "hello "
  397. # "hello".lstrip! #=> nil
  398. def lstrip!
  399. Rubinius.check_frozen
  400. return if @num_bytes == 0
  401. start = 0
  402. ctype = Rubinius::CType
  403. while start < @num_bytes && ctype.isspace(@data[start])
  404. start += 1
  405. end
  406. return if start == 0
  407. modify!
  408. self.num_bytes -= start
  409. @data.move_bytes start, @num_bytes, 0
  410. self
  411. end
  412. # Processes <i>self</i> as for <code>String#chop</code>, returning <i>self</i>,
  413. # or <code>nil</code> if <i>self</i> is the empty string. See also
  414. # <code>String#chomp!</code>.
  415. def chop!
  416. Rubinius.check_frozen
  417. return if @num_bytes == 0
  418. self.modify!
  419. if @num_bytes > 1 and
  420. @data[@num_bytes-1] == 10 and @data[@num_bytes-2] == 13
  421. self.num_bytes -= 2
  422. else
  423. self.num_bytes -= 1
  424. end
  425. self
  426. end
  427. # Modifies <i>self</i> in place as described for <code>String#chomp</code>,
  428. # returning <i>self</i>, or <code>nil</code> if no modifications were made.
  429. #---
  430. # NOTE: TypeError is raised in String#replace and not in String#chomp! when
  431. # self is frozen. This is intended behaviour.
  432. #+++
  433. def chomp!(sep=undefined)
  434. Rubinius.check_frozen
  435. # special case for performance. No seperator is by far the most common usage.
  436. if sep.equal?(undefined)
  437. return if @num_bytes == 0
  438. c = @data[@num_bytes-1]
  439. if c == 10 # ?\n
  440. self.num_bytes -= 1 if @num_bytes > 1 && @data[@num_bytes-2] == 13 # ?\r
  441. elsif c != 13 # ?\r
  442. return
  443. end
  444. # don't use modify! because it will dup the data when we don't need to.
  445. @hash_value = nil
  446. self.num_bytes -= 1
  447. return self
  448. end
  449. return if sep.nil? || @num_bytes == 0
  450. sep = StringValue sep
  451. if (sep == $/ && sep == DEFAULT_RECORD_SEPARATOR) || sep == "\n"
  452. c = @data[@num_bytes-1]
  453. if c == 10 # ?\n
  454. self.num_bytes -= 1 if @num_bytes > 1 && @data[@num_bytes-2] == 13 # ?\r
  455. elsif c != 13 # ?\r
  456. return
  457. end
  458. # don't use modify! because it will dup the data when we don't need to.
  459. @hash_value = nil
  460. self.num_bytes -= 1
  461. elsif sep.size == 0
  462. size = @num_bytes
  463. while size > 0 && @data[size-1] == 10 # ?\n
  464. if size > 1 && @data[size-2] == 13 # ?\r
  465. size -= 2
  466. else
  467. size -= 1
  468. end
  469. end
  470. return if size == @num_bytes
  471. # don't use modify! because it will dup the data when we don't need to.
  472. @hash_value = nil
  473. self.num_bytes = size
  474. else
  475. size = sep.size
  476. return if size > @num_bytes || sep.compare_substring(self, -size, size) != 0
  477. # don't use modify! because it will dup the data when we don't need to.
  478. @hash_value = nil
  479. self.num_bytes -= size
  480. end
  481. return self
  482. end
  483. # Replaces the contents and taintedness of <i>string</i> with the corresponding
  484. # values in <i>other</i>.
  485. #
  486. # s = "hello" #=> "hello"
  487. # s.replace "world" #=> "world"
  488. def replace(other)
  489. Rubinius.check_frozen
  490. # If we're replacing with ourselves, then we have nothing to do
  491. return self if equal?(other)
  492. other = StringValue(other)
  493. @shared = true
  494. other.shared!
  495. @data = other.__data__
  496. self.num_bytes = other.num_bytes
  497. @hash_value = nil
  498. force_encoding(other.encoding)
  499. Rubinius::Type.infect(self, other)
  500. end
  501. alias_method :initialize_copy, :replace
  502. # private :initialize_copy
  503. def <<(other)
  504. modify!
  505. if other.kind_of? Integer
  506. if encoding == Encoding::US_ASCII and other >= 128 and other < 256
  507. force_encoding(Encoding::ASCII_8BIT)
  508. end
  509. other = other.chr(encoding)
  510. else
  511. other = StringValue(other)
  512. end
  513. enc = Rubinius::Type.compatible_encoding self, other
  514. force_encoding enc
  515. Rubinius::Type.infect(self, other)
  516. append(other)
  517. end
  518. alias_method :concat, :<<
  519. # Returns a one-character string at the beginning of the string.
  520. #
  521. # a = "abcde"
  522. # a.chr #=> "a"
  523. def chr
  524. substring 0, 1
  525. end
  526. # Splits <i>self</i> using the supplied parameter as the record separator
  527. # (<code>$/</code> by default), passing each substring in turn to the supplied
  528. # block. If a zero-length record separator is supplied, the string is split on
  529. # <code>\n</code> characters, except that multiple successive newlines are
  530. # appended together.
  531. #
  532. # print "Example one\n"
  533. # "hello\nworld".each { |s| p s }
  534. # print "Example two\n"
  535. # "hello\nworld".each('l') { |s| p s }
  536. # print "Example three\n"
  537. # "hello\n\n\nworld".each('') { |s| p s }
  538. #
  539. # <em>produces:</em>
  540. #
  541. # Example one
  542. # "hello\n"
  543. # "world"
  544. # Example two
  545. # "hel"
  546. # "l"
  547. # "o\nworl"
  548. # "d"
  549. # Example three
  550. # "hello\n\n\n"
  551. # "world"
  552. def lines(sep=$/)
  553. return to_enum(:lines, sep) unless block_given?
  554. # weird edge case.
  555. if sep.nil?
  556. yield self
  557. return self
  558. end
  559. sep = StringValue(sep)
  560. pos = 0
  561. size = @num_bytes
  562. orig_data = @data
  563. # If the separator is empty, we're actually in paragraph mode. This
  564. # is used so infrequently, we'll handle it completely separately from
  565. # normal line breaking.
  566. if sep.empty?
  567. sep = "\n\n"
  568. pat_size = 2
  569. while pos < size
  570. nxt = find_string(sep, pos)
  571. break unless nxt
  572. while @data[nxt] == 10 and nxt < @num_bytes
  573. nxt += 1
  574. end
  575. match_size = nxt - pos
  576. # string ends with \n's
  577. break if pos == @num_bytes
  578. str = byteslice pos, match_size
  579. yield str unless str.empty?
  580. # detect mutation within the block
  581. if !@data.equal?(orig_data) or @num_bytes != size
  582. raise RuntimeError, "string modified while iterating"
  583. end
  584. pos = nxt
  585. end
  586. # No more separates, but we need to grab the last part still.
  587. fin = byteslice pos, @num_bytes - pos
  588. yield fin if fin and !fin.empty?
  589. else
  590. # This is the normal case.
  591. pat_size = sep.size
  592. unmodified_self = clone
  593. while pos < size
  594. nxt = unmodified_self.find_string(sep, pos)
  595. break unless nxt
  596. match_size = nxt - pos
  597. str = unmodified_self.byteslice pos, match_size + pat_size
  598. yield str unless str.empty?
  599. pos = nxt + pat_size
  600. end
  601. # No more separates, but we need to grab the last part still.
  602. fin = unmodified_self.byteslice pos, @num_bytes - pos
  603. yield fin unless fin.empty?
  604. end
  605. self
  606. end
  607. alias_method :each_line, :lines
  608. # Returns a copy of <i>self</i> with <em>all</em> occurrences of <i>pattern</i>
  609. # replaced with either <i>replacement</i> or the value of the block. The
  610. # <i>pattern</i> will typically be a <code>Regexp</code>; if it is a
  611. # <code>String</code> then no regular expression metacharacters will be
  612. # interpreted (that is <code>/\d/</code> will match a digit, but
  613. # <code>'\d'</code> will match a backslash followed by a 'd').
  614. #
  615. # If a string is used as the replacement, special variables from the match
  616. # (such as <code>$&</code> and <code>$1</code>) cannot be substituted into it,
  617. # as substitution into the string occurs before the pattern match
  618. # starts. However, the sequences <code>\1</code>, <code>\2</code>, and so on
  619. # may be used to interpolate successive groups in the match.
  620. #
  621. # In the block form, the current match string is passed in as a parameter, and
  622. # variables such as <code>$1</code>, <code>$2</code>, <code>$`</code>,
  623. # <code>$&</code>, and <code>$'</code> will be set appropriately. The value
  624. # returned by the block will be substituted for the match on each call.
  625. #
  626. # The result inherits any tainting andd trustiness in the original string or any supplied
  627. # replacement string.
  628. #
  629. # "hello".gsub(/[aeiou]/, '*') #=> "h*ll*"
  630. # "hello".gsub(/([aeiou])/, '<\1>') #=> "h<e>ll<o>"
  631. # "hello".gsub(/./) { |s| s[0].to_s + ' ' } #=> "104 101 108 108 111 "
  632. def gsub(pattern, replacement=undefined)
  633. unless block_given? or replacement != undefined
  634. return to_enum(:gsub, pattern, replacement)
  635. end
  636. tainted = false
  637. untrusted = untrusted?
  638. if replacement.equal?(undefined)
  639. use_yield = true
  640. else
  641. tainted = replacement.tainted?
  642. untrusted ||= replacement.untrusted?
  643. hash = Rubinius::Type.check_convert_type(replacement, Hash, :to_hash)
  644. replacement = StringValue(replacement) unless hash
  645. tainted ||= replacement.tainted?
  646. untrusted ||= replacement.untrusted?
  647. use_yield = false
  648. end
  649. pattern = get_pattern(pattern, true)
  650. orig_len = @num_bytes
  651. orig_data = @data
  652. last_end = 0
  653. offset = nil
  654. ret = byteslice 0, 0 # Empty string and string subclass
  655. last_match = nil
  656. match = pattern.match_from self, last_end
  657. if match
  658. ma_range = match.full
  659. ma_start = ma_range.at(0)
  660. ma_end = ma_range.at(1)
  661. offset = ma_start
  662. end
  663. while match
  664. nd = ma_start - 1
  665. pre_len = nd-last_end+1
  666. if pre_len > 0
  667. ret.append byteslice(last_end, pre_len)
  668. end
  669. if use_yield || hash
  670. Regexp.last_match = match
  671. if use_yield
  672. val = yield match.to_s
  673. else
  674. val = hash[match.to_s]
  675. end
  676. untrusted = true if val.untrusted?
  677. val = val.to_s unless val.kind_of?(String)
  678. tainted ||= val.tainted?
  679. ret.append val
  680. if !@data.equal?(orig_data) or @num_bytes != orig_len
  681. raise RuntimeError, "string modified"
  682. end
  683. else
  684. replacement.to_sub_replacement(ret, match)
  685. end
  686. tainted ||= val.tainted?
  687. last_end = ma_end
  688. if ma_start == ma_end
  689. if char = find_character(offset)
  690. offset += char.bytesize
  691. else
  692. offset += 1
  693. end
  694. else
  695. offset = ma_end
  696. end
  697. last_match = match
  698. match = pattern.match_from self, offset
  699. break unless match
  700. ma_range = match.full
  701. ma_start = ma_range.at(0)
  702. ma_end = ma_range.at(1)
  703. offset = ma_start
  704. end
  705. Regexp.last_match = last_match
  706. str = byteslice last_end, @num_bytes-last_end+1
  707. ret.append str if str
  708. ret.taint if tainted || self.tainted?
  709. ret.untrust if untrusted
  710. return ret
  711. end
  712. # Returns <i>self</i> with <em>all</em> occurrences of <i>pattern</i>
  713. # replaced with either <i>replacement</i> or the value of the block. The
  714. # <i>pattern</i> will typically be a <code>Regexp</code>; if it is a
  715. # <code>String</code> then no regular expression metacharacters will be
  716. # interpreted (that is <code>/\d/</code> will match a digit, but
  717. # <code>'\d'</code> will match a backslash followed by a 'd').
  718. #
  719. # If a string is used as the replacement, special variables from the match
  720. # (such as <code>$&</code> and <code>$1</code>) cannot be substituted into it,
  721. # as substitution into the string occurs before the pattern match
  722. # starts. However, the sequences <code>\1</code>, <code>\2</code>, and so on
  723. # may be used to interpolate successive groups in the match.
  724. #
  725. # In the block form, the current match string is passed in as a parameter, and
  726. # variables such as <code>$1</code>, <code>$2</code>, <code>$`</code>,
  727. # <code>$&</code>, and <code>$'</code> will be set appropriately. The value
  728. # returned by the block will be substituted for the match on each call.
  729. #
  730. # The result inherits any tainting andd trustiness in any supplied
  731. # replacement string.
  732. #
  733. # "hello".gsub!(/[aeiou]/, '*') #=> "h*ll*"
  734. # "hello".gsub!(/([aeiou])/, '<\1>') #=> "h<e>ll<o>"
  735. # "hello".gsub!(/./) { |s| s[0].to_s + ' ' } #=> "104 101 108 108 111 "
  736. def gsub!(pattern, replacement=undefined)
  737. unless block_given? or replacement != undefined
  738. return to_enum(:gsub, pattern, replacement)
  739. end
  740. Rubinius.check_frozen
  741. tainted = false
  742. untrusted = untrusted?
  743. if replacement.equal?(undefined)
  744. use_yield = true
  745. else
  746. tainted = replacement.tainted?
  747. untrusted ||= replacement.untrusted?
  748. hash = Rubinius::Type.check_convert_type(replacement, Hash, :to_hash)
  749. replacement = StringValue(replacement) unless hash
  750. tainted ||= replacement.tainted?
  751. untrusted ||= replacement.untrusted?
  752. use_yield = false
  753. end
  754. pattern = get_pattern(pattern, true)
  755. orig_len = @num_bytes
  756. orig_data = @data
  757. last_end = 0
  758. offset = nil
  759. ret = byteslice 0, 0 # Empty string and string subclass
  760. last_match = nil
  761. match = pattern.match_from self, last_end
  762. if match
  763. ma_range = match.full
  764. ma_start = ma_range.at(0)
  765. ma_end = ma_range.at(1)
  766. offset = ma_start
  767. else
  768. Regexp.last_match = nil
  769. return nil
  770. end
  771. while match
  772. nd = ma_start - 1
  773. pre_len = nd-last_end+1
  774. if pre_len > 0
  775. ret.append byteslice(last_end, pre_len)
  776. end
  777. if use_yield || hash
  778. Regexp.last_match = match
  779. if use_yield
  780. val = yield match.to_s
  781. else
  782. val = hash[match.to_s]
  783. end
  784. untrusted = true if val.untrusted?
  785. val = val.to_s unless val.kind_of?(String)
  786. tainted ||= val.tainted?
  787. ret.append val
  788. if !@data.equal?(orig_data) or @num_bytes != orig_len
  789. raise RuntimeError, "string modified"
  790. end
  791. else
  792. replacement.to_sub_replacement(ret, match)
  793. end
  794. tainted ||= val.tainted?
  795. last_end = ma_end
  796. if ma_start == ma_end
  797. if char = find_character(offset)
  798. offset += char.bytesize
  799. else
  800. offset += 1
  801. end
  802. else
  803. offset = ma_end
  804. end
  805. last_match = match
  806. match = pattern.match_from self, offset
  807. break unless match
  808. ma_range = match.full
  809. ma_start = ma_range.at(0)
  810. ma_end = ma_range.at(1)
  811. offset = ma_start
  812. end
  813. Regexp.last_match = last_match
  814. str = byteslice last_end, @num_bytes-last_end+1
  815. ret.append str if str
  816. self.taint if tainted
  817. self.untrust if untrusted
  818. replace(ret)
  819. return self
  820. end
  821. end