PageRenderTime 101ms CodeModel.GetById 17ms RepoModel.GetById 0ms app.codeStats 2ms

/src/org/jruby/RubyString.java

https://bitbucket.org/nicksieger/jruby
Java | 7473 lines | 6155 code | 957 blank | 361 comment | 1696 complexity | 2bbf20d04a1303c55399813fa5724e04 MD5 | raw file
Possible License(s): GPL-3.0, JSON
  1. /*
  2. **** BEGIN LICENSE BLOCK *****
  3. * Version: CPL 1.0/GPL 2.0/LGPL 2.1
  4. *
  5. * The contents of this file are subject to the Common Public
  6. * License Version 1.0 (the "License"); you may not use this file
  7. * except in compliance with the License. You may obtain a copy of
  8. * the License at http://www.eclipse.org/legal/cpl-v10.html
  9. *
  10. * Software distributed under the License is distributed on an "AS
  11. * IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
  12. * implied. See the License for the specific language governing
  13. * rights and limitations under the License.
  14. *
  15. * Copyright (C) 2001 Alan Moore <alan_moore@gmx.net>
  16. * Copyright (C) 2001-2002 Benoit Cerrina <b.cerrina@wanadoo.fr>
  17. * Copyright (C) 2001-2004 Jan Arne Petersen <jpetersen@uni-bonn.de>
  18. * Copyright (C) 2002-2004 Anders Bengtsson <ndrsbngtssn@yahoo.se>
  19. * Copyright (C) 2002-2006 Thomas E Enebo <enebo@acm.org>
  20. * Copyright (C) 2004 Stefan Matthias Aust <sma@3plus4.de>
  21. * Copyright (C) 2004 David Corbin <dcorbin@users.sourceforge.net>
  22. * Copyright (C) 2005 Tim Azzopardi <tim@tigerfive.com>
  23. * Copyright (C) 2006 Miguel Covarrubias <mlcovarrubias@gmail.com>
  24. * Copyright (C) 2006 Ola Bini <ola@ologix.com>
  25. * Copyright (C) 2007 Nick Sieger <nicksieger@gmail.com>
  26. *
  27. * Alternatively, the contents of this file may be used under the terms of
  28. * either of the GNU General Public License Version 2 or later (the "GPL"),
  29. * or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
  30. * in which case the provisions of the GPL or the LGPL are applicable instead
  31. * of those above. If you wish to allow use of your version of this file only
  32. * under the terms of either the GPL or the LGPL, and not to allow others to
  33. * use your version of this file under the terms of the CPL, indicate your
  34. * decision by deleting the provisions above and replace them with the notice
  35. * and other provisions required by the GPL or the LGPL. If you do not delete
  36. * the provisions above, a recipient may use your version of this file under
  37. * the terms of any one of the CPL, the GPL or the LGPL.
  38. ***** END LICENSE BLOCK *****/
  39. package org.jruby;
  40. import static org.jruby.RubyEnumerator.enumeratorize;
  41. import static org.jruby.anno.FrameField.BACKREF;
  42. import static org.jruby.util.StringSupport.CR_7BIT;
  43. import static org.jruby.util.StringSupport.CR_BROKEN;
  44. import static org.jruby.util.StringSupport.CR_MASK;
  45. import static org.jruby.util.StringSupport.CR_UNKNOWN;
  46. import static org.jruby.util.StringSupport.CR_VALID;
  47. import static org.jruby.util.StringSupport.codeLength;
  48. import static org.jruby.util.StringSupport.codePoint;
  49. import static org.jruby.util.StringSupport.codeRangeScan;
  50. import static org.jruby.util.StringSupport.searchNonAscii;
  51. import static org.jruby.util.StringSupport.strLengthWithCodeRange;
  52. import static org.jruby.util.StringSupport.toLower;
  53. import static org.jruby.util.StringSupport.toUpper;
  54. import static org.jruby.util.StringSupport.unpackArg;
  55. import static org.jruby.util.StringSupport.unpackResult;
  56. import java.io.UnsupportedEncodingException;
  57. import java.nio.ByteBuffer;
  58. import java.nio.charset.CharacterCodingException;
  59. import java.nio.charset.Charset;
  60. import java.nio.charset.CharsetEncoder;
  61. import java.nio.charset.CodingErrorAction;
  62. import java.util.Arrays;
  63. import java.util.Locale;
  64. import org.jcodings.Encoding;
  65. import org.jcodings.EncodingDB.Entry;
  66. import org.jcodings.ascii.AsciiTables;
  67. import org.jcodings.constants.CharacterType;
  68. import org.jcodings.specific.ASCIIEncoding;
  69. import org.jcodings.specific.USASCIIEncoding;
  70. import org.jcodings.specific.UTF8Encoding;
  71. import org.jcodings.util.IntHash;
  72. import org.joni.Matcher;
  73. import org.joni.Option;
  74. import org.joni.Regex;
  75. import org.joni.Region;
  76. import org.jruby.anno.JRubyClass;
  77. import org.jruby.anno.JRubyMethod;
  78. import org.jruby.cext.RString;
  79. import org.jruby.javasupport.util.RuntimeHelpers;
  80. import org.jruby.runtime.Arity;
  81. import org.jruby.runtime.Block;
  82. import org.jruby.runtime.ClassIndex;
  83. import org.jruby.runtime.DynamicScope;
  84. import org.jruby.runtime.ObjectAllocator;
  85. import org.jruby.runtime.ThreadContext;
  86. import static org.jruby.runtime.Visibility.*;
  87. import static org.jruby.CompatVersion.*;
  88. import org.jruby.runtime.builtin.IRubyObject;
  89. import org.jruby.runtime.encoding.EncodingCapable;
  90. import org.jruby.runtime.marshal.UnmarshalStream;
  91. import org.jruby.util.ByteList;
  92. import org.jruby.util.ConvertBytes;
  93. import org.jruby.util.Numeric;
  94. import org.jruby.util.Pack;
  95. import org.jruby.util.RegexpOptions;
  96. import org.jruby.util.Sprintf;
  97. import org.jruby.util.StringSupport;
  98. import org.jruby.util.TypeConverter;
  99. import org.jruby.util.string.JavaCrypt;
  100. /**
  101. * Implementation of Ruby String class
  102. *
  103. * Concurrency: no synchronization is required among readers, but
  104. * all users must synchronize externally with writers.
  105. *
  106. */
  107. @JRubyClass(name="String", include={"Enumerable", "Comparable"})
  108. public class RubyString extends RubyObject implements EncodingCapable {
  109. private static final ASCIIEncoding ASCII = ASCIIEncoding.INSTANCE;
  110. private static final UTF8Encoding UTF8 = UTF8Encoding.INSTANCE;
  111. private static final byte[] EMPTY_BYTE_ARRAY = new byte[0];
  112. // string doesn't share any resources
  113. private static final int SHARE_LEVEL_NONE = 0;
  114. // string has it's own ByteList, but it's pointing to a shared buffer (byte[])
  115. private static final int SHARE_LEVEL_BUFFER = 1;
  116. // string doesn't have it's own ByteList (values)
  117. private static final int SHARE_LEVEL_BYTELIST = 2;
  118. private volatile int shareLevel = SHARE_LEVEL_NONE;
  119. private ByteList value;
  120. private RString rstring;
  121. public static RubyClass createStringClass(Ruby runtime) {
  122. RubyClass stringClass = runtime.defineClass("String", runtime.getObject(), STRING_ALLOCATOR);
  123. runtime.setString(stringClass);
  124. stringClass.index = ClassIndex.STRING;
  125. stringClass.setReifiedClass(RubyString.class);
  126. stringClass.kindOf = new RubyModule.KindOf() {
  127. @Override
  128. public boolean isKindOf(IRubyObject obj, RubyModule type) {
  129. return obj instanceof RubyString;
  130. }
  131. };
  132. stringClass.includeModule(runtime.getComparable());
  133. if (!runtime.is1_9()) stringClass.includeModule(runtime.getEnumerable());
  134. stringClass.defineAnnotatedMethods(RubyString.class);
  135. return stringClass;
  136. }
  137. private static ObjectAllocator STRING_ALLOCATOR = new ObjectAllocator() {
  138. public IRubyObject allocate(Ruby runtime, RubyClass klass) {
  139. return RubyString.newEmptyString(runtime, klass);
  140. }
  141. };
  142. public Encoding getEncoding() {
  143. return value.getEncoding();
  144. }
  145. public void setEncoding(Encoding encoding) {
  146. value.setEncoding(encoding);
  147. }
  148. public void associateEncoding(Encoding enc) {
  149. if (value.getEncoding() != enc) {
  150. if (!isCodeRangeAsciiOnly() || !enc.isAsciiCompatible()) clearCodeRange();
  151. value.setEncoding(enc);
  152. }
  153. }
  154. public final void setEncodingAndCodeRange(Encoding enc, int cr) {
  155. value.setEncoding(enc);
  156. setCodeRange(cr);
  157. }
  158. public final Encoding toEncoding(Ruby runtime) {
  159. return runtime.getEncodingService().findEncoding(this);
  160. }
  161. public final int getCodeRange() {
  162. return flags & CR_MASK;
  163. }
  164. public final void setCodeRange(int codeRange) {
  165. flags |= codeRange & CR_MASK;
  166. }
  167. public final RString getRString() {
  168. return rstring;
  169. }
  170. public final void setRString(RString rstring) {
  171. this.rstring = rstring;
  172. }
  173. public final void clearCodeRange() {
  174. flags &= ~CR_MASK;
  175. }
  176. private void keepCodeRange() {
  177. if (getCodeRange() == CR_BROKEN) clearCodeRange();
  178. }
  179. // ENC_CODERANGE_ASCIIONLY
  180. public final boolean isCodeRangeAsciiOnly() {
  181. return getCodeRange() == CR_7BIT;
  182. }
  183. // rb_enc_str_asciionly_p
  184. public final boolean isAsciiOnly() {
  185. return value.getEncoding().isAsciiCompatible() && scanForCodeRange() == CR_7BIT;
  186. }
  187. public final boolean isCodeRangeValid() {
  188. return (flags & CR_VALID) != 0;
  189. }
  190. public final boolean isCodeRangeBroken() {
  191. return (flags & CR_BROKEN) != 0;
  192. }
  193. static int codeRangeAnd(int cr1, int cr2) {
  194. if (cr1 == CR_7BIT) return cr2;
  195. if (cr1 == CR_VALID) return cr2 == CR_7BIT ? CR_VALID : cr2;
  196. return CR_UNKNOWN;
  197. }
  198. private void copyCodeRangeForSubstr(RubyString from, Encoding enc) {
  199. int fromCr = from.getCodeRange();
  200. if (fromCr == CR_7BIT) {
  201. setCodeRange(fromCr);
  202. } else if (fromCr == CR_VALID) {
  203. if (!enc.isAsciiCompatible() || searchNonAscii(value) != -1) {
  204. setCodeRange(CR_VALID);
  205. } else {
  206. setCodeRange(CR_7BIT);
  207. }
  208. } else{
  209. if (value.getRealSize() == 0) {
  210. setCodeRange(!enc.isAsciiCompatible() ? CR_VALID : CR_7BIT);
  211. }
  212. }
  213. }
  214. private void copyCodeRange(RubyString from) {
  215. value.setEncoding(from.value.getEncoding());
  216. setCodeRange(from.getCodeRange());
  217. }
  218. // rb_enc_str_coderange
  219. final int scanForCodeRange() {
  220. int cr = getCodeRange();
  221. if (cr == CR_UNKNOWN) {
  222. cr = codeRangeScan(value.getEncoding(), value);
  223. setCodeRange(cr);
  224. }
  225. return cr;
  226. }
  227. final boolean singleByteOptimizable() {
  228. return getCodeRange() == CR_7BIT || value.getEncoding().isSingleByte();
  229. }
  230. final boolean singleByteOptimizable(Encoding enc) {
  231. return getCodeRange() == CR_7BIT || enc.isSingleByte();
  232. }
  233. private Encoding isCompatibleWith(RubyString other) {
  234. Encoding enc1 = value.getEncoding();
  235. Encoding enc2 = other.value.getEncoding();
  236. if (enc1 == enc2) return enc1;
  237. if (other.value.getRealSize() == 0) return enc1;
  238. if (value.getRealSize() == 0) return enc2;
  239. if (!enc1.isAsciiCompatible() || !enc2.isAsciiCompatible()) return null;
  240. return RubyEncoding.areCompatible(enc1, scanForCodeRange(), enc2, other.scanForCodeRange());
  241. }
  242. final Encoding isCompatibleWith(EncodingCapable other) {
  243. if (other instanceof RubyString) return checkEncoding((RubyString)other);
  244. Encoding enc1 = value.getEncoding();
  245. Encoding enc2 = other.getEncoding();
  246. if (enc1 == enc2) return enc1;
  247. if (value.getRealSize() == 0) return enc2;
  248. if (!enc1.isAsciiCompatible() || !enc2.isAsciiCompatible()) return null;
  249. if (enc2 instanceof USASCIIEncoding) return enc1;
  250. if (scanForCodeRange() == CR_7BIT) return enc2;
  251. return null;
  252. }
  253. final Encoding checkEncoding(RubyString other) {
  254. Encoding enc = isCompatibleWith(other);
  255. if (enc == null) throw getRuntime().newEncodingCompatibilityError("incompatible character encodings: " +
  256. value.getEncoding() + " and " + other.value.getEncoding());
  257. return enc;
  258. }
  259. final Encoding checkEncoding(EncodingCapable other) {
  260. Encoding enc = isCompatibleWith(other);
  261. if (enc == null) throw getRuntime().newEncodingCompatibilityError("incompatible character encodings: " +
  262. value.getEncoding() + " and " + other.getEncoding());
  263. return enc;
  264. }
  265. private Encoding checkDummyEncoding() {
  266. Encoding enc = value.getEncoding();
  267. if (enc.isDummy()) throw getRuntime().newEncodingCompatibilityError(
  268. "incompatible encoding with this operation: " + enc);
  269. return enc;
  270. }
  271. private boolean isComparableWith(RubyString other) {
  272. ByteList otherValue = other.value;
  273. if (value.getEncoding() == otherValue.getEncoding() ||
  274. value.getRealSize() == 0 || otherValue.getRealSize() == 0) return true;
  275. return isComparableViaCodeRangeWith(other);
  276. }
  277. private boolean isComparableViaCodeRangeWith(RubyString other) {
  278. int cr1 = scanForCodeRange();
  279. int cr2 = other.scanForCodeRange();
  280. if (cr1 == CR_7BIT && (cr2 == CR_7BIT || other.value.getEncoding().isAsciiCompatible())) return true;
  281. if (cr2 == CR_7BIT && value.getEncoding().isAsciiCompatible()) return true;
  282. return false;
  283. }
  284. private int strLength(Encoding enc) {
  285. if (singleByteOptimizable(enc)) return value.getRealSize();
  286. return strLength(value, enc);
  287. }
  288. final int strLength() {
  289. if (singleByteOptimizable()) return value.getRealSize();
  290. return strLength(value);
  291. }
  292. private int strLength(ByteList bytes) {
  293. return strLength(bytes, bytes.getEncoding());
  294. }
  295. private int strLength(ByteList bytes, Encoding enc) {
  296. if (isCodeRangeValid() && enc instanceof UTF8Encoding) return StringSupport.utf8Length(value);
  297. long lencr = strLengthWithCodeRange(bytes, enc);
  298. int cr = unpackArg(lencr);
  299. if (cr != 0) setCodeRange(cr);
  300. return unpackResult(lencr);
  301. }
  302. final int subLength(int pos) {
  303. if (singleByteOptimizable() || pos < 0) return pos;
  304. return StringSupport.strLength(value.getEncoding(), value.getUnsafeBytes(), value.getBegin(), value.getBegin() + pos);
  305. }
  306. /** short circuit for String key comparison
  307. *
  308. */
  309. @Override
  310. public final boolean eql(IRubyObject other) {
  311. Ruby runtime = getRuntime();
  312. if (getMetaClass() != runtime.getString() || getMetaClass() != other.getMetaClass()) return super.eql(other);
  313. return runtime.is1_9() ? eql19(runtime, other) : eql18(runtime, other);
  314. }
  315. private boolean eql18(Ruby runtime, IRubyObject other) {
  316. return value.equal(((RubyString)other).value);
  317. }
  318. // rb_str_hash_cmp
  319. private boolean eql19(Ruby runtime, IRubyObject other) {
  320. RubyString otherString = (RubyString)other;
  321. return isComparableWith(otherString) && value.equal(((RubyString)other).value);
  322. }
  323. public RubyString(Ruby runtime, RubyClass rubyClass) {
  324. this(runtime, rubyClass, EMPTY_BYTE_ARRAY);
  325. }
  326. public RubyString(Ruby runtime, RubyClass rubyClass, CharSequence value) {
  327. super(runtime, rubyClass);
  328. assert value != null;
  329. byte[] bytes = RubyEncoding.encodeUTF8(value);
  330. this.value = new ByteList(bytes, false);
  331. this.value.setEncoding(UTF8);
  332. }
  333. public RubyString(Ruby runtime, RubyClass rubyClass, byte[] value) {
  334. super(runtime, rubyClass);
  335. assert value != null;
  336. this.value = new ByteList(value);
  337. }
  338. public RubyString(Ruby runtime, RubyClass rubyClass, ByteList value) {
  339. super(runtime, rubyClass);
  340. assert value != null;
  341. this.value = value;
  342. }
  343. public RubyString(Ruby runtime, RubyClass rubyClass, ByteList value, boolean objectSpace) {
  344. super(runtime, rubyClass, objectSpace);
  345. assert value != null;
  346. this.value = value;
  347. }
  348. protected RubyString(Ruby runtime, RubyClass rubyClass, ByteList value, Encoding enc, int cr) {
  349. this(runtime, rubyClass, value);
  350. value.setEncoding(enc);
  351. flags |= cr;
  352. }
  353. protected RubyString(Ruby runtime, RubyClass rubyClass, ByteList value, Encoding enc) {
  354. this(runtime, rubyClass, value);
  355. value.setEncoding(enc);
  356. }
  357. protected RubyString(Ruby runtime, RubyClass rubyClass, ByteList value, int cr) {
  358. this(runtime, rubyClass, value);
  359. flags |= cr;
  360. }
  361. // Deprecated String construction routines
  362. /** Create a new String which uses the same Ruby runtime and the same
  363. * class like this String.
  364. *
  365. * This method should be used to satisfy RCR #38.
  366. * @deprecated
  367. */
  368. @Deprecated
  369. public RubyString newString(CharSequence s) {
  370. return new RubyString(getRuntime(), getType(), s);
  371. }
  372. /** Create a new String which uses the same Ruby runtime and the same
  373. * class like this String.
  374. *
  375. * This method should be used to satisfy RCR #38.
  376. * @deprecated
  377. */
  378. @Deprecated
  379. public RubyString newString(ByteList s) {
  380. return new RubyString(getRuntime(), getMetaClass(), s);
  381. }
  382. @Deprecated
  383. public static RubyString newString(Ruby runtime, RubyClass clazz, CharSequence str) {
  384. return new RubyString(runtime, clazz, str);
  385. }
  386. public static RubyString newStringLight(Ruby runtime, ByteList bytes) {
  387. return new RubyString(runtime, runtime.getString(), bytes, false);
  388. }
  389. public static RubyString newStringLight(Ruby runtime, int size) {
  390. return new RubyString(runtime, runtime.getString(), new ByteList(size), false);
  391. }
  392. public static RubyString newString(Ruby runtime, CharSequence str) {
  393. return new RubyString(runtime, runtime.getString(), str);
  394. }
  395. public static RubyString newString(Ruby runtime, String str) {
  396. return new RubyString(runtime, runtime.getString(), str);
  397. }
  398. public static RubyString newString(Ruby runtime, byte[] bytes) {
  399. return new RubyString(runtime, runtime.getString(), bytes);
  400. }
  401. public static RubyString newString(Ruby runtime, byte[] bytes, int start, int length) {
  402. byte[] copy = new byte[length];
  403. System.arraycopy(bytes, start, copy, 0, length);
  404. return new RubyString(runtime, runtime.getString(), new ByteList(copy, false));
  405. }
  406. public static RubyString newString(Ruby runtime, ByteList bytes) {
  407. return new RubyString(runtime, runtime.getString(), bytes);
  408. }
  409. public static RubyString newString(Ruby runtime, ByteList bytes, Encoding encoding) {
  410. return new RubyString(runtime, runtime.getString(), bytes, encoding);
  411. }
  412. public static RubyString newUnicodeString(Ruby runtime, String str) {
  413. return new RubyString(runtime, runtime.getString(), new ByteList(RubyEncoding.encodeUTF8(str), false));
  414. }
  415. // String construction routines by NOT byte[] buffer and making the target String shared
  416. public static RubyString newStringShared(Ruby runtime, RubyString orig) {
  417. orig.shareLevel = SHARE_LEVEL_BYTELIST;
  418. RubyString str = new RubyString(runtime, runtime.getString(), orig.value);
  419. str.shareLevel = SHARE_LEVEL_BYTELIST;
  420. return str;
  421. }
  422. public static RubyString newStringShared(Ruby runtime, ByteList bytes) {
  423. return newStringShared(runtime, runtime.getString(), bytes);
  424. }
  425. public static RubyString newStringShared(Ruby runtime, ByteList bytes, Encoding encoding) {
  426. return newStringShared(runtime, runtime.getString(), bytes, encoding);
  427. }
  428. public static RubyString newStringShared(Ruby runtime, ByteList bytes, int codeRange) {
  429. RubyString str = new RubyString(runtime, runtime.getString(), bytes, codeRange);
  430. str.shareLevel = SHARE_LEVEL_BYTELIST;
  431. return str;
  432. }
  433. public static RubyString newStringShared(Ruby runtime, RubyClass clazz, ByteList bytes) {
  434. RubyString str = new RubyString(runtime, clazz, bytes);
  435. str.shareLevel = SHARE_LEVEL_BYTELIST;
  436. return str;
  437. }
  438. public static RubyString newStringShared(Ruby runtime, RubyClass clazz, ByteList bytes, Encoding encoding) {
  439. RubyString str = new RubyString(runtime, clazz, bytes, encoding);
  440. str.shareLevel = SHARE_LEVEL_BYTELIST;
  441. return str;
  442. }
  443. public static RubyString newStringShared(Ruby runtime, byte[] bytes) {
  444. return newStringShared(runtime, new ByteList(bytes, false));
  445. }
  446. public static RubyString newStringShared(Ruby runtime, byte[] bytes, int start, int length) {
  447. return newStringShared(runtime, new ByteList(bytes, start, length, false));
  448. }
  449. public static RubyString newEmptyString(Ruby runtime) {
  450. return newEmptyString(runtime, runtime.getString());
  451. }
  452. public static RubyString newEmptyString(Ruby runtime, RubyClass metaClass) {
  453. RubyString empty = new RubyString(runtime, metaClass, ByteList.EMPTY_BYTELIST);
  454. empty.shareLevel = SHARE_LEVEL_BYTELIST;
  455. return empty;
  456. }
  457. // String construction routines by NOT byte[] buffer and NOT making the target String shared
  458. public static RubyString newStringNoCopy(Ruby runtime, ByteList bytes) {
  459. return newStringNoCopy(runtime, runtime.getString(), bytes);
  460. }
  461. public static RubyString newStringNoCopy(Ruby runtime, RubyClass clazz, ByteList bytes) {
  462. return new RubyString(runtime, clazz, bytes);
  463. }
  464. public static RubyString newStringNoCopy(Ruby runtime, byte[] bytes, int start, int length) {
  465. return newStringNoCopy(runtime, new ByteList(bytes, start, length, false));
  466. }
  467. public static RubyString newStringNoCopy(Ruby runtime, byte[] bytes) {
  468. return newStringNoCopy(runtime, new ByteList(bytes, false));
  469. }
  470. /** Encoding aware String construction routines for 1.9
  471. *
  472. */
  473. private static final class EmptyByteListHolder {
  474. final ByteList bytes;
  475. final int cr;
  476. EmptyByteListHolder(Encoding enc) {
  477. this.bytes = new ByteList(ByteList.NULL_ARRAY, enc);
  478. this.cr = bytes.getEncoding().isAsciiCompatible() ? CR_7BIT : CR_VALID;
  479. }
  480. }
  481. private static EmptyByteListHolder EMPTY_BYTELISTS[] = new EmptyByteListHolder[4];
  482. static EmptyByteListHolder getEmptyByteList(Encoding enc) {
  483. int index = enc.getIndex();
  484. EmptyByteListHolder bytes;
  485. if (index < EMPTY_BYTELISTS.length && (bytes = EMPTY_BYTELISTS[index]) != null) {
  486. return bytes;
  487. }
  488. return prepareEmptyByteList(enc);
  489. }
  490. private static EmptyByteListHolder prepareEmptyByteList(Encoding enc) {
  491. int index = enc.getIndex();
  492. if (index >= EMPTY_BYTELISTS.length) {
  493. EmptyByteListHolder tmp[] = new EmptyByteListHolder[index + 4];
  494. System.arraycopy(EMPTY_BYTELISTS,0, tmp, 0, EMPTY_BYTELISTS.length);
  495. EMPTY_BYTELISTS = tmp;
  496. }
  497. return EMPTY_BYTELISTS[index] = new EmptyByteListHolder(enc);
  498. }
  499. public static RubyString newEmptyString(Ruby runtime, RubyClass metaClass, Encoding enc) {
  500. EmptyByteListHolder holder = getEmptyByteList(enc);
  501. RubyString empty = new RubyString(runtime, metaClass, holder.bytes, holder.cr);
  502. empty.shareLevel = SHARE_LEVEL_BYTELIST;
  503. return empty;
  504. }
  505. public static RubyString newEmptyString(Ruby runtime, Encoding enc) {
  506. return newEmptyString(runtime, runtime.getString(), enc);
  507. }
  508. public static RubyString newStringNoCopy(Ruby runtime, RubyClass clazz, ByteList bytes, Encoding enc, int cr) {
  509. return new RubyString(runtime, clazz, bytes, enc, cr);
  510. }
  511. public static RubyString newStringNoCopy(Ruby runtime, ByteList bytes, Encoding enc, int cr) {
  512. return newStringNoCopy(runtime, runtime.getString(), bytes, enc, cr);
  513. }
  514. public static RubyString newUsAsciiStringNoCopy(Ruby runtime, ByteList bytes) {
  515. return newStringNoCopy(runtime, bytes, USASCIIEncoding.INSTANCE, CR_7BIT);
  516. }
  517. public static RubyString newUsAsciiStringShared(Ruby runtime, ByteList bytes) {
  518. RubyString str = newStringNoCopy(runtime, bytes, USASCIIEncoding.INSTANCE, CR_7BIT);
  519. str.shareLevel = SHARE_LEVEL_BYTELIST;
  520. return str;
  521. }
  522. public static RubyString newUsAsciiStringShared(Ruby runtime, byte[] bytes, int start, int length) {
  523. byte[] copy = new byte[length];
  524. System.arraycopy(bytes, start, copy, 0, length);
  525. return newUsAsciiStringShared(runtime, new ByteList(copy, false));
  526. }
  527. @Override
  528. public int getNativeTypeIndex() {
  529. return ClassIndex.STRING;
  530. }
  531. @Override
  532. public Class getJavaClass() {
  533. return String.class;
  534. }
  535. @Override
  536. public RubyString convertToString() {
  537. return this;
  538. }
  539. @Override
  540. public String toString() {
  541. return decodeString();
  542. }
  543. /**
  544. * Convert this Ruby string to a Java String. This version is encoding-aware.
  545. *
  546. * @return A decoded Java String, based on this Ruby string's encoding.
  547. */
  548. public String decodeString() {
  549. try {
  550. // 1.9 support for encodings
  551. // TODO: Fix charset use for JRUBY-4553
  552. if (getRuntime().is1_9()) {
  553. return new String(value.getUnsafeBytes(), value.begin(), value.length(), getEncoding().toString());
  554. }
  555. return RubyEncoding.decodeUTF8(value.getUnsafeBytes(), value.begin(), value.length());
  556. } catch (UnsupportedEncodingException uee) {
  557. return value.toString();
  558. }
  559. }
  560. /** rb_str_dup
  561. *
  562. */
  563. @Deprecated
  564. public final RubyString strDup() {
  565. return strDup(getRuntime(), getMetaClass());
  566. }
  567. public final RubyString strDup(Ruby runtime) {
  568. return strDup(runtime, getMetaClass());
  569. }
  570. @Deprecated
  571. final RubyString strDup(RubyClass clazz) {
  572. return strDup(getRuntime(), getMetaClass());
  573. }
  574. final RubyString strDup(Ruby runtime, RubyClass clazz) {
  575. shareLevel = SHARE_LEVEL_BYTELIST;
  576. RubyString dup = new RubyString(runtime, clazz, value);
  577. dup.shareLevel = SHARE_LEVEL_BYTELIST;
  578. dup.flags |= flags & (CR_MASK | TAINTED_F | UNTRUSTED_F);
  579. return dup;
  580. }
  581. /* rb_str_subseq */
  582. public final RubyString makeSharedString(Ruby runtime, int index, int len) {
  583. return makeShared(runtime, runtime.getString(), index, len);
  584. }
  585. public final RubyString makeShared(Ruby runtime, int index, int len) {
  586. return makeShared(runtime, getType(), index, len);
  587. }
  588. public final RubyString makeShared(Ruby runtime, RubyClass meta, int index, int len) {
  589. final RubyString shared;
  590. if (len == 0) {
  591. shared = newEmptyString(runtime, meta);
  592. } else if (len == 1) {
  593. shared = newStringShared(runtime, meta,
  594. RubyInteger.SINGLE_CHAR_BYTELISTS[value.getUnsafeBytes()[value.getBegin() + index] & 0xff]);
  595. } else {
  596. if (shareLevel == SHARE_LEVEL_NONE) shareLevel = SHARE_LEVEL_BUFFER;
  597. shared = new RubyString(runtime, meta, value.makeShared(index, len));
  598. shared.shareLevel = SHARE_LEVEL_BUFFER;
  599. }
  600. shared.infectBy(this);
  601. return shared;
  602. }
  603. public final RubyString makeShared19(Ruby runtime, int index, int len) {
  604. return makeShared19(runtime, value, index, len);
  605. }
  606. private RubyString makeShared19(Ruby runtime, ByteList value, int index, int len) {
  607. final RubyString shared;
  608. Encoding enc = value.getEncoding();
  609. RubyClass meta = getType();
  610. if (len == 0) {
  611. shared = newEmptyString(runtime, meta, enc);
  612. } else {
  613. if (shareLevel == SHARE_LEVEL_NONE) shareLevel = SHARE_LEVEL_BUFFER;
  614. shared = new RubyString(runtime, meta, value.makeShared(index, len));
  615. shared.shareLevel = SHARE_LEVEL_BUFFER;
  616. shared.copyCodeRangeForSubstr(this, enc); // no need to assign encoding, same bytelist shared
  617. }
  618. shared.infectBy(this);
  619. return shared;
  620. }
  621. final void modifyCheck() {
  622. frozenCheck();
  623. if (!isTaint() && getRuntime().getSafeLevel() >= 4) {
  624. throw getRuntime().newSecurityError("Insecure: can't modify string");
  625. }
  626. }
  627. private final void modifyCheck(byte[] b, int len) {
  628. if (value.getUnsafeBytes() != b || value.getRealSize() != len) throw getRuntime().newRuntimeError("string modified");
  629. }
  630. private final void modifyCheck(byte[] b, int len, Encoding enc) {
  631. if (value.getUnsafeBytes() != b || value.getRealSize() != len || value.getEncoding() != enc) throw getRuntime().newRuntimeError("string modified");
  632. }
  633. private void frozenCheck() {
  634. frozenCheck(false);
  635. }
  636. private void frozenCheck(boolean runtimeError) {
  637. if (isFrozen()) throw getRuntime().newFrozenError("string", runtimeError);
  638. }
  639. /** rb_str_modify
  640. *
  641. */
  642. public final void modify() {
  643. modifyCheck();
  644. if (shareLevel != SHARE_LEVEL_NONE) {
  645. if (shareLevel == SHARE_LEVEL_BYTELIST) {
  646. value = value.dup();
  647. } else {
  648. value.unshare();
  649. }
  650. shareLevel = SHARE_LEVEL_NONE;
  651. }
  652. value.invalidate();
  653. }
  654. public final void modify19() {
  655. modify();
  656. clearCodeRange();
  657. }
  658. private void modifyAndKeepCodeRange() {
  659. modify();
  660. keepCodeRange();
  661. }
  662. /** rb_str_modify (with length bytes ensured)
  663. *
  664. */
  665. public final void modify(int length) {
  666. modifyCheck();
  667. if (shareLevel != SHARE_LEVEL_NONE) {
  668. if (shareLevel == SHARE_LEVEL_BYTELIST) {
  669. value = value.dup(length);
  670. } else {
  671. value.unshare(length);
  672. }
  673. shareLevel = SHARE_LEVEL_NONE;
  674. } else {
  675. value.ensure(length);
  676. }
  677. value.invalidate();
  678. }
  679. public final void modify19(int length) {
  680. modify(length);
  681. clearCodeRange();
  682. }
  683. /** rb_str_resize
  684. */
  685. public final void resize(int length) {
  686. modify();
  687. if (value.getRealSize() > length) {
  688. value.setRealSize(length);
  689. } else if (value.length() < length) {
  690. value.length(length);
  691. }
  692. }
  693. final void view(ByteList bytes) {
  694. modifyCheck();
  695. value = bytes;
  696. shareLevel = SHARE_LEVEL_NONE;
  697. }
  698. private final void view(byte[]bytes) {
  699. modifyCheck();
  700. value.replace(bytes);
  701. shareLevel = SHARE_LEVEL_NONE;
  702. value.invalidate();
  703. }
  704. private final void view(int index, int len) {
  705. modifyCheck();
  706. if (shareLevel != SHARE_LEVEL_NONE) {
  707. if (shareLevel == SHARE_LEVEL_BYTELIST) {
  708. // if len == 0 then shared empty
  709. value = value.makeShared(index, len);
  710. shareLevel = SHARE_LEVEL_BUFFER;
  711. } else {
  712. value.view(index, len);
  713. }
  714. } else {
  715. value.view(index, len);
  716. // FIXME this below is temporary, but its much safer for COW (it prevents not shared Strings with begin != 0)
  717. // this allows now e.g.: ByteList#set not to be begin aware
  718. shareLevel = SHARE_LEVEL_BUFFER;
  719. }
  720. value.invalidate();
  721. }
  722. public static String bytesToString(byte[] bytes, int beg, int len) {
  723. return new String(ByteList.plain(bytes, beg, len));
  724. }
  725. public static String byteListToString(ByteList bytes) {
  726. return bytesToString(bytes.getUnsafeBytes(), bytes.begin(), bytes.length());
  727. }
  728. public static String bytesToString(byte[] bytes) {
  729. return bytesToString(bytes, 0, bytes.length);
  730. }
  731. public static byte[] stringToBytes(String string) {
  732. return ByteList.plain(string);
  733. }
  734. @Override
  735. public RubyString asString() {
  736. return this;
  737. }
  738. @Override
  739. public IRubyObject checkStringType() {
  740. return this;
  741. }
  742. @Override
  743. public IRubyObject checkStringType19() {
  744. return this;
  745. }
  746. @JRubyMethod(name = "try_convert", meta = true, compat = RUBY1_9)
  747. public static IRubyObject try_convert(ThreadContext context, IRubyObject recv, IRubyObject str) {
  748. return str.checkStringType();
  749. }
  750. @JRubyMethod(name = {"to_s", "to_str"})
  751. @Override
  752. public IRubyObject to_s() {
  753. Ruby runtime = getRuntime();
  754. if (getMetaClass().getRealClass() != runtime.getString()) {
  755. return strDup(runtime, runtime.getString());
  756. }
  757. return this;
  758. }
  759. @Override
  760. public final int compareTo(IRubyObject other) {
  761. Ruby runtime = getRuntime();
  762. if (other instanceof RubyString) {
  763. RubyString otherString = (RubyString)other;
  764. return runtime.is1_9() ? op_cmp19(otherString) : op_cmp(otherString);
  765. }
  766. return (int)op_cmpCommon(runtime.getCurrentContext(), other).convertToInteger().getLongValue();
  767. }
  768. /* rb_str_cmp_m */
  769. @JRubyMethod(name = "<=>", compat = RUBY1_8)
  770. public IRubyObject op_cmp(ThreadContext context, IRubyObject other) {
  771. if (other instanceof RubyString) {
  772. return context.getRuntime().newFixnum(op_cmp((RubyString)other));
  773. }
  774. return op_cmpCommon(context, other);
  775. }
  776. @JRubyMethod(name = "<=>", compat = RUBY1_9)
  777. public IRubyObject op_cmp19(ThreadContext context, IRubyObject other) {
  778. if (other instanceof RubyString) {
  779. return context.getRuntime().newFixnum(op_cmp19((RubyString)other));
  780. }
  781. return op_cmpCommon(context, other);
  782. }
  783. private IRubyObject op_cmpCommon(ThreadContext context, IRubyObject other) {
  784. Ruby runtime = context.getRuntime();
  785. // deal with case when "other" is not a string
  786. if (other.respondsTo("to_str") && other.respondsTo("<=>")) {
  787. IRubyObject result = other.callMethod(context, "<=>", this);
  788. if (result.isNil()) return result;
  789. if (result instanceof RubyFixnum) {
  790. return RubyFixnum.newFixnum(runtime, -((RubyFixnum)result).getLongValue());
  791. } else {
  792. return RubyFixnum.zero(runtime).callMethod(context, "-", result);
  793. }
  794. }
  795. return runtime.getNil();
  796. }
  797. /** rb_str_equal
  798. *
  799. */
  800. @JRubyMethod(name = "==", compat = RUBY1_8)
  801. @Override
  802. public IRubyObject op_equal(ThreadContext context, IRubyObject other) {
  803. Ruby runtime = context.getRuntime();
  804. if (this == other) return runtime.getTrue();
  805. if (other instanceof RubyString) {
  806. return value.equal(((RubyString)other).value) ? runtime.getTrue() : runtime.getFalse();
  807. }
  808. return op_equalCommon(context, other);
  809. }
  810. @JRubyMethod(name = "==", compat = RUBY1_9)
  811. public IRubyObject op_equal19(ThreadContext context, IRubyObject other) {
  812. Ruby runtime = context.getRuntime();
  813. if (this == other) return runtime.getTrue();
  814. if (other instanceof RubyString) {
  815. RubyString otherString = (RubyString)other;
  816. return isComparableWith(otherString) && value.equal(otherString.value) ? runtime.getTrue() : runtime.getFalse();
  817. }
  818. return op_equalCommon(context, other);
  819. }
  820. private IRubyObject op_equalCommon(ThreadContext context, IRubyObject other) {
  821. Ruby runtime = context.getRuntime();
  822. if (!other.respondsTo("to_str")) return runtime.getFalse();
  823. return other.callMethod(context, "==", this).isTrue() ? runtime.getTrue() : runtime.getFalse();
  824. }
  825. @JRubyMethod(name = "+", required = 1, compat = RUBY1_8, argTypes = RubyString.class)
  826. public IRubyObject op_plus(ThreadContext context, RubyString str) {
  827. RubyString resultStr = newString(context.getRuntime(), addByteLists(value, str.value));
  828. resultStr.infectBy(flags | str.flags);
  829. return resultStr;
  830. }
  831. public IRubyObject op_plus(ThreadContext context, IRubyObject other) {
  832. return op_plus(context, other.convertToString());
  833. }
  834. @JRubyMethod(name = "+", required = 1, compat = RUBY1_9)
  835. public IRubyObject op_plus19(ThreadContext context, RubyString str) {
  836. Encoding enc = checkEncoding(str);
  837. RubyString resultStr = newStringNoCopy(context.getRuntime(), addByteLists(value, str.value),
  838. enc, codeRangeAnd(getCodeRange(), str.getCodeRange()));
  839. resultStr.infectBy(flags | str.flags);
  840. return resultStr;
  841. }
  842. public IRubyObject op_plus19(ThreadContext context, IRubyObject other) {
  843. return op_plus19(context, other.convertToString());
  844. }
  845. private ByteList addByteLists(ByteList value1, ByteList value2) {
  846. ByteList result = new ByteList(value1.getRealSize() + value2.getRealSize());
  847. result.setRealSize(value1.getRealSize() + value2.getRealSize());
  848. System.arraycopy(value1.getUnsafeBytes(), value1.getBegin(), result.getUnsafeBytes(), 0, value1.getRealSize());
  849. System.arraycopy(value2.getUnsafeBytes(), value2.getBegin(), result.getUnsafeBytes(), value1.getRealSize(), value2.getRealSize());
  850. return result;
  851. }
  852. @JRubyMethod(name = "*", required = 1, compat = RUBY1_8)
  853. public IRubyObject op_mul(ThreadContext context, IRubyObject other) {
  854. return multiplyByteList(context, other);
  855. }
  856. @JRubyMethod(name = "*", required = 1, compat = RUBY1_9)
  857. public IRubyObject op_mul19(ThreadContext context, IRubyObject other) {
  858. RubyString result = multiplyByteList(context, other);
  859. result.value.setEncoding(value.getEncoding());
  860. result.copyCodeRange(this);
  861. return result;
  862. }
  863. private RubyString multiplyByteList(ThreadContext context, IRubyObject arg) {
  864. int len = RubyNumeric.num2int(arg);
  865. if (len < 0) throw context.getRuntime().newArgumentError("negative argument");
  866. // we limit to int because ByteBuffer can only allocate int sizes
  867. if (len > 0 && Integer.MAX_VALUE / len < value.getRealSize()) {
  868. throw context.getRuntime().newArgumentError("argument too big");
  869. }
  870. ByteList bytes = new ByteList(len *= value.getRealSize());
  871. if (len > 0) {
  872. bytes.setRealSize(len);
  873. int n = value.getRealSize();
  874. System.arraycopy(value.getUnsafeBytes(), value.getBegin(), bytes.getUnsafeBytes(), 0, n);
  875. while (n <= len >> 1) {
  876. System.arraycopy(bytes.getUnsafeBytes(), 0, bytes.getUnsafeBytes(), n, n);
  877. n <<= 1;
  878. }
  879. System.arraycopy(bytes.getUnsafeBytes(), 0, bytes.getUnsafeBytes(), n, len - n);
  880. }
  881. RubyString result = new RubyString(context.getRuntime(), getMetaClass(), bytes);
  882. result.infectBy(this);
  883. return result;
  884. }
  885. @JRubyMethod(name = "%", required = 1)
  886. public IRubyObject op_format(ThreadContext context, IRubyObject arg) {
  887. return opFormatCommon(context, arg, context.getRuntime().getInstanceConfig().getCompatVersion());
  888. }
  889. private IRubyObject opFormatCommon(ThreadContext context, IRubyObject arg, CompatVersion compat) {
  890. IRubyObject tmp = arg.checkArrayType();
  891. if (tmp.isNil()) tmp = arg;
  892. // FIXME: Should we make this work with platform's locale,
  893. // or continue hardcoding US?
  894. ByteList out = new ByteList(value.getRealSize());
  895. boolean tainted;
  896. switch (compat) {
  897. case RUBY1_8:
  898. tainted = Sprintf.sprintf(out, Locale.US, value, tmp);
  899. break;
  900. case RUBY1_9:
  901. tainted = Sprintf.sprintf1_9(out, Locale.US, value, tmp);
  902. break;
  903. default:
  904. throw new RuntimeException("invalid compat version for sprintf: " + compat);
  905. }
  906. RubyString str = newString(context.getRuntime(), out);
  907. str.setTaint(tainted || isTaint());
  908. return str;
  909. }
  910. @JRubyMethod(name = "hash")
  911. @Override
  912. public RubyFixnum hash() {
  913. Ruby runtime = getRuntime();
  914. return RubyFixnum.newFixnum(runtime, strHashCode(runtime));
  915. }
  916. @Override
  917. public int hashCode() {
  918. return strHashCode(getRuntime());
  919. }
  920. private int strHashCode(Ruby runtime) {
  921. if (runtime.is1_9()) {
  922. return value.hashCode() ^ (value.getEncoding().isAsciiCompatible() && scanForCodeRange() == CR_7BIT ? 0 : value.getEncoding().getIndex());
  923. } else {
  924. return value.hashCode();
  925. }
  926. }
  927. @Override
  928. public boolean equals(Object other) {
  929. if (this == other) return true;
  930. if (other instanceof RubyString) {
  931. if (((RubyString) other).value.equal(value)) return true;
  932. }
  933. return false;
  934. }
  935. /** rb_obj_as_string
  936. *
  937. */
  938. public static RubyString objAsString(ThreadContext context, IRubyObject obj) {
  939. if (obj instanceof RubyString) return (RubyString) obj;
  940. IRubyObject str = obj.callMethod(context, "to_s");
  941. if (!(str instanceof RubyString)) return (RubyString) obj.anyToString();
  942. if (obj.isTaint()) str.setTaint(true);
  943. return (RubyString) str;
  944. }
  945. /** rb_str_cmp
  946. *
  947. */
  948. public final int op_cmp(RubyString other) {
  949. return value.cmp(other.value);
  950. }
  951. public final int op_cmp19(RubyString other) {
  952. int ret = value.cmp(other.value);
  953. if (ret == 0 && !isComparableWith(other)) {
  954. return value.getEncoding().getIndex() > other.value.getEncoding().getIndex() ? 1 : -1;
  955. }
  956. return ret;
  957. }
  958. /** rb_to_id
  959. *
  960. */
  961. @Override
  962. public String asJavaString() {
  963. return toString();
  964. }
  965. public IRubyObject doClone(){
  966. return newString(getRuntime(), value.dup());
  967. }
  968. public final RubyString cat(byte[] str) {
  969. modify(value.getRealSize() + str.length);
  970. System.arraycopy(str, 0, value.getUnsafeBytes(), value.getBegin() + value.getRealSize(), str.length);
  971. value.setRealSize(value.getRealSize() + str.length);
  972. return this;
  973. }
  974. public final RubyString cat(byte[] str, int beg, int len) {
  975. modify(value.getRealSize() + len);
  976. System.arraycopy(str, beg, value.getUnsafeBytes(), value.getBegin() + value.getRealSize(), len);
  977. value.setRealSize(value.getRealSize() + len);
  978. return this;
  979. }
  980. // // rb_str_buf_append
  981. public final RubyString cat19(RubyString str) {
  982. ByteList strValue = str.value;
  983. int strCr = str.getCodeRange();
  984. strCr = cat(strValue.getUnsafeBytes(), strValue.getBegin(), strValue.getRealSize(), strValue.getEncoding(), strCr, strCr);
  985. infectBy(str);
  986. str.setCodeRange(strCr);
  987. return this;
  988. }
  989. public final RubyString cat(ByteList str) {
  990. modify(value.getRealSize() + str.getRealSize());
  991. System.arraycopy(str.getUnsafeBytes(), str.getBegin(), value.getUnsafeBytes(), value.getBegin() + value.getRealSize(), str.getRealSize());
  992. value.setRealSize(value.getRealSize() + str.getRealSize());
  993. return this;
  994. }
  995. public final RubyString cat(byte ch) {
  996. modify(value.getRealSize() + 1);
  997. value.getUnsafeBytes()[value.getBegin() + value.getRealSize()] = ch;
  998. value.setRealSize(value.getRealSize() + 1);
  999. return this;
  1000. }
  1001. public final RubyString cat(int ch) {
  1002. return cat((byte)ch);
  1003. }
  1004. public final RubyString cat(int code, Encoding enc) {
  1005. int n = codeLength(getRuntime(), enc, code);
  1006. modify(value.getRealSize() + n);
  1007. enc.codeToMbc(code, value.getUnsafeBytes(), value.getBegin() + value.getRealSize());
  1008. value.setRealSize(value.getRealSize() + n);
  1009. return this;
  1010. }
  1011. public final int cat(byte[]bytes, int p, int len, Encoding enc, int cr, int cr2) {
  1012. modify(value.getRealSize() + len);
  1013. int toCr = getCodeRange();
  1014. Encoding toEnc = value.getEncoding();
  1015. if (toEnc == enc) {
  1016. if (toCr == CR_UNKNOWN || (toEnc == ASCIIEncoding.INSTANCE && toCr != CR_7BIT)) {
  1017. cr = CR_UNKNOWN;
  1018. } else if (cr == CR_UNKNOWN) {
  1019. cr = codeRangeScan(enc, bytes, p, len);
  1020. }
  1021. } else {
  1022. if (!toEnc.isAsciiCompatible() || !enc.isAsciiCompatible()) {
  1023. if (len == 0) return cr2;
  1024. if (value.getRealSize() == 0) {
  1025. System.arraycopy(bytes, p, value.getUnsafeBytes(), value.getBegin() + value.getRealSize(), len);
  1026. value.setRealSize(value.getRealSize() + len);
  1027. setEncodingAndCodeRange(enc, cr);
  1028. return cr2;
  1029. }
  1030. throw getRuntime().newEncodingCompatibilityError("incompatible character encodings: " + toEnc + " and " + enc);
  1031. }
  1032. if (cr == CR_UNKNOWN) cr = codeRangeScan(enc, bytes, p, len);
  1033. if (toCr == CR_UNKNOWN) {
  1034. if (toEnc == ASCIIEncoding.INSTANCE || cr != CR_7BIT) toCr = scanForCodeRange();
  1035. }
  1036. }
  1037. if (cr2 != 0) cr2 = cr;
  1038. if (toEnc != enc && toCr != CR_7BIT && cr != CR_7BIT) {
  1039. throw getRuntime().newEncodingCompatibilityError("incompatible character encodings: " + toEnc + " and " + enc);
  1040. }
  1041. final int resCr;
  1042. final Encoding resEnc;
  1043. if (toCr == CR_UNKNOWN) {
  1044. resEnc = toEnc;
  1045. resCr = CR_UNKNOWN;
  1046. } else if (toCr == CR_7BIT) {
  1047. if (cr == CR_7BIT) {
  1048. resEnc = toEnc == ASCIIEncoding.INSTANCE ? toEnc : enc;
  1049. resCr = CR_7BIT;
  1050. } else {
  1051. resEnc = enc;
  1052. resCr = cr;
  1053. }
  1054. } else if (toCr == CR_VALID) {
  1055. resEnc = toEnc;
  1056. resCr = toCr;
  1057. } else {
  1058. resEnc = toEnc;
  1059. resCr = len > 0 ? CR_UNKNOWN : toCr;
  1060. }
  1061. if (len < 0) throw getRuntime().newArgumentError("negative string size (or size too big)");
  1062. System.arraycopy(bytes, p, value.getUnsafeBytes(), value.getBegin() + value.getRealSize(), len);
  1063. value.setRealSize(value.getRealSize() + len);
  1064. setEncodingAndCodeRange(resEnc, resCr);
  1065. return cr2;
  1066. }
  1067. public final int cat(byte[]bytes, int p, int len, Encoding enc) {
  1068. return cat(bytes, p, len, enc, CR_UNKNOWN, 0);
  1069. }
  1070. public final RubyString catAscii(byte[]bytes, int p, int len) {
  1071. Encoding enc = value.getEncoding();
  1072. if (enc.isAsciiCompatible()) {
  1073. cat(bytes, p, len, enc, CR_7BIT, 0);
  1074. } else {
  1075. byte buf[] = new byte[enc.maxLength()];
  1076. int end = p + len;
  1077. while (p < end) {
  1078. int c = bytes[p];
  1079. int cl = codeLength(getRuntime(), enc, c);
  1080. enc.codeToMbc(c, buf, 0);
  1081. cat(buf, 0, cl, enc, CR_VALID, 0);
  1082. p++;
  1083. }
  1084. }
  1085. return this;
  1086. }
  1087. /** rb_str_replace_m
  1088. *
  1089. */
  1090. @JRubyMethod(name = {"replace", "initialize_copy"}, required = 1, compat = RUBY1_8)
  1091. public IRubyObject replace(IRubyObject other) {
  1092. if (this == other) return this;
  1093. replaceCommon(other);
  1094. return this;
  1095. }
  1096. @JRubyMethod(name = {"replace", "initialize_copy"}, required = 1, compat = RUBY1_9)
  1097. public RubyString replace19(IRubyObject other) {
  1098. modifyCheck();
  1099. if (this == other) return this;
  1100. setCodeRange(replaceCommon(other).getCodeRange()); // encoding doesn't have to be copied.
  1101. return this;
  1102. }
  1103. private RubyString replaceCommon(IRubyObject other) {
  1104. modifyCheck();
  1105. RubyString otherStr = other.convertToString();
  1106. otherStr.shareLevel = shareLevel = SHARE_LEVEL_BYTELIST;
  1107. value = otherStr.value;
  1108. infectBy(otherStr);
  1109. return otherStr;
  1110. }
  1111. @JRubyMethod(name = "clear", compat = RUBY1_9)
  1112. public RubyString clear() {
  1113. modifyCheck();
  1114. Encoding enc = value.getEncoding();
  1115. EmptyByteListHolder holder = getEmptyByteList(enc);
  1116. value = holder.bytes;
  1117. shareLevel = SHARE_LEVEL_BYTELIST;
  1118. setCodeRange(holder.cr);
  1119. return this;
  1120. }
  1121. @JRubyMethod(name = "reverse", compat = RUBY1_8)
  1122. public IRubyObject reverse(ThreadContext context) {
  1123. Ruby runtime = context.getRuntime();
  1124. if (value.getRealSize() <= 1) return strDup(context.getRuntime());
  1125. byte[]bytes = value.getUnsafeBytes();
  1126. int p = value.getBegin();
  1127. int len = value.getRealSize();
  1128. byte[]obytes = new byte[len];
  1129. for (int i = 0; i <= len >> 1; i++) {
  1130. obytes[i] = bytes[p + len - i - 1];
  1131. obytes[len - i - 1] = bytes[p + i];
  1132. }
  1133. return new RubyString(runtime, getMetaClass(), new ByteList(obytes, false)).infectBy(this);
  1134. }
  1135. @JRubyMethod(name = "reverse", compat = RUBY1_9)
  1136. public IRubyObject reverse19(ThreadContext context) {
  1137. Ruby runtime = context.getRuntime();
  1138. if (value.getRealSize() <= 1) return strDup(context.getRuntime());
  1139. byte[]bytes = value.getUnsafeBytes();
  1140. int p = value.getBegin();
  1141. int len = value.getRealSize();
  1142. byte[]obytes = new byte[len];
  1143. boolean single = true;
  1144. Encoding enc = value.getEncoding();
  1145. // this really needs to be inlined here
  1146. if (singleByteOptimizable(enc)) {
  1147. for (int i = 0; i <= len >> 1; i++) {
  1148. obytes[i] = bytes[p + len - i - 1];
  1149. obytes[len - i - 1] = bytes[p + i];
  1150. }
  1151. } else {
  1152. int end = p + len;
  1153. int op = len;
  1154. while (p < end) {
  1155. int cl = StringSupport.length(enc, bytes, p, end);
  1156. if (cl > 1 || (bytes[p] & 0x80) != 0) {
  1157. single = false;
  1158. op -= cl;
  1159. System.arraycopy(bytes, p, obytes, op, cl);
  1160. p += cl;
  1161. } else {
  1162. obytes[--op] = bytes[p++];
  1163. }
  1164. }
  1165. }
  1166. RubyString result = new RubyString(runtime, getMetaClass(), new ByteList(obytes, false));
  1167. if (getCodeRange() == CR_UNKNOWN) setCodeRange(single ? CR_7BIT : CR_VALID);
  1168. Encoding encoding = value.getEncoding();
  1169. result.value.setEncoding(encoding);
  1170. result.copyCodeRangeForSubstr(this, encoding);
  1171. return result.infectBy(this);
  1172. }
  1173. @JRubyMethod(name = "reverse!", compat = RUBY1_8)
  1174. public RubyString reverse_bang(ThreadContext context) {
  1175. if (value.getRealSize() > 1) {
  1176. modify();
  1177. byte[]bytes = value.getUnsafeBytes();
  1178. int p = value.getBegin();
  1179. int len = value.getRealSize();
  1180. for (int i = 0; i < len >> 1; i++) {
  1181. byte b = bytes[p + i];
  1182. bytes[p + i] = bytes[p + len - i - 1];
  1183. bytes[p + len - i - 1] = b;
  1184. }
  1185. }
  1186. return this;
  1187. }
  1188. @JRubyMethod(name = "reverse!", compat = RUBY1_9)
  1189. public RubyString reverse_bang19(ThreadContext context) {
  1190. modifyCheck();
  1191. if (value.getRealSize() > 1) {
  1192. modifyAndKeepCodeRange();
  1193. byte[]bytes = value.getUnsafeBytes();
  1194. int p = value.getBegin();
  1195. int len = value.getRealSize();
  1196. Encoding enc = value.getEncoding();
  1197. // this really needs to be inlined here
  1198. if (singleByteOptimizable(enc)) {
  1199. for (int i = 0; i < len >> 1; i++) {
  1200. byte b = bytes[p + i];
  1201. bytes[p + i] = bytes[p + len - i - 1];
  1202. bytes[p + len - i - 1] = b;
  1203. }
  1204. } else {
  1205. int end = p + len;
  1206. int op = len;
  1207. byte[]obytes = new byte[len];
  1208. boolean single = true;
  1209. while (p < end) {
  1210. int cl = StringSupport.length(enc, bytes, p, end);
  1211. if (cl > 1 || (bytes[p] & 0x80) != 0) {
  1212. single = false;
  1213. op -= cl;
  1214. System.arraycopy(bytes, p, obytes, op, cl);
  1215. p += cl;
  1216. } else {
  1217. obytes[--op] = bytes[p++];
  1218. }
  1219. }
  1220. value.setUnsafeBytes(obytes);
  1221. if (getCodeRange() == CR_UNKNOWN) setCodeRange(single ? CR_7BIT : CR_VALID);
  1222. }
  1223. }
  1224. return this;
  1225. }
  1226. /** rb_str_s_new
  1227. *
  1228. */
  1229. public static RubyString newInstance(IRubyObject recv, IRubyObject[] args, Block block) {
  1230. RubyString newString = newStringShared(recv.getRuntime(), ByteList.EMPTY_BYTELIST);
  1231. newString.setMetaClass((RubyClass) recv);
  1232. newString.callInit(args, block);
  1233. return newString;
  1234. }
  1235. @JRubyMethod(visibility = PRIVATE, compat = RUBY1_8)
  1236. @Override
  1237. public IRubyObject initialize() {
  1238. return this;
  1239. }
  1240. @JRubyMethod(visibility = PRIVATE, compat = RUBY1_8)
  1241. public IRubyObject initialize(IRubyObject arg0) {
  1242. replace(arg0);
  1243. return this;
  1244. }
  1245. @JRubyMethod(name = "initialize", visibility = PRIVATE, compat = RUBY1_9)
  1246. public IRubyObject initialize19() {
  1247. return this;
  1248. }
  1249. @JRubyMethod(name = "initialize", visibility = PRIVATE, compat = RUBY1_9)
  1250. public IRubyObject initialize19(IRubyObject arg0) {
  1251. replace19(arg0);
  1252. return this;
  1253. }
  1254. @JRubyMethod(compat = RUBY1_8)
  1255. public IRubyObject casecmp(ThreadContext context, IRubyObject other) {
  1256. return RubyFixnum.newFixnum(context.getRuntime(), value.caseInsensitiveCmp(other.convertToString().value));
  1257. }
  1258. @JRubyMethod(name = "casecmp", compat = RUBY1_9)
  1259. public IRubyObject casecmp19(ThreadContext context, IRubyObject other) {
  1260. Ruby runtime = context.getRuntime();
  1261. RubyString otherStr = other.convertToString();
  1262. Encoding enc = isCompatibleWith(otherStr);
  1263. if (enc == null) return runtime.getNil();
  1264. if (singleByteOptimizable() && otherStr.singleByteOptimizable()) {
  1265. return RubyFixnum.newFixnum(runtime, value.caseInsensitiveCmp(otherStr.value));
  1266. } else {
  1267. return multiByteCasecmp(runtime, enc, value, otherStr.value);
  1268. }
  1269. }
  1270. private IRubyObject multiByteCasecmp(Ruby runtime, Encoding enc, ByteList value, ByteList otherValue) {
  1271. byte[]bytes = value.getUnsafeBytes();
  1272. int p = value.getBegin();
  1273. int end = p + value.getRealSize();
  1274. byte[]obytes = otherValue.getUnsafeBytes();
  1275. int op = otherValue.getBegin();
  1276. int oend = op + otherValue.getRealSize();
  1277. while (p < end && op < oend) {
  1278. final int c, oc;
  1279. if (enc.isAsciiCompatible()) {
  1280. c = bytes[p] & 0xff;
  1281. oc = obytes[op] & 0xff;
  1282. } else {
  1283. c = StringSupport.preciseCodePoint(enc, bytes, p, end);
  1284. oc = StringSupport.preciseCodePoint(enc, obytes, op, oend);
  1285. }
  1286. int cl, ocl;
  1287. if (Encoding.isAscii(c) && Encoding.isAscii(oc)) {
  1288. byte uc = AsciiTables.ToUpperCaseTable[c];
  1289. byte uoc = AsciiTables.ToUpperCaseTable[oc];
  1290. if (uc != uoc) {
  1291. return uc < uoc ? RubyFixnum.minus_one(runtime) : RubyFixnum.one(runtime);
  1292. }
  1293. cl = ocl = 1;
  1294. } else {
  1295. cl = StringSupport.length(enc, bytes, p, end);
  1296. ocl = StringSupport.length(enc, obytes, op, oend);
  1297. // TODO: opt for 2 and 3 ?
  1298. int ret = StringSupport.caseCmp(bytes, p, obytes, op, cl < ocl ? cl : ocl);
  1299. if (ret != 0) return ret < 0 ? RubyFixnum.minus_one(runtime) : RubyFixnum.one(runtime);
  1300. if (cl != ocl) return cl < ocl ? RubyFixnum.minus_one(runtime) : RubyFixnum.one(runtime);
  1301. }
  1302. p += cl;
  1303. op += ocl;
  1304. }
  1305. if (end - p == oend - op) return RubyFixnum.zero(runtime);
  1306. return end - p > oend - op ? RubyFixnum.one(runtime) : RubyFixnum.minus_one(runtime);
  1307. }
  1308. /** rb_str_match
  1309. *
  1310. */
  1311. @JRubyMethod(name = "=~", compat = RUBY1_8, writes = BACKREF)
  1312. @Override
  1313. public IRubyObject op_match(ThreadContext context, IRubyObject other) {
  1314. if (other instanceof RubyRegexp) return ((RubyRegexp) other).op_match(context, this);
  1315. if (other instanceof RubyString) throw context.getRuntime().newTypeError("type mismatch: String given");
  1316. return other.callMethod(context, "=~", this);
  1317. }
  1318. @JRubyMethod(name = "=~", compat = RUBY1_9, writes = BACKREF)
  1319. public IRubyObject op_match19(ThreadContext context, IRubyObject other) {
  1320. if (other instanceof RubyRegexp) return ((RubyRegexp) other).op_match19(context, this);
  1321. if (other instanceof RubyString) throw context.getRuntime().newTypeError("type mismatch: String given");
  1322. return other.callMethod(context, "=~", this);
  1323. }
  1324. /**
  1325. * String#match(pattern)
  1326. *
  1327. * rb_str_match_m
  1328. *
  1329. * @param pattern Regexp or String
  1330. */
  1331. @JRubyMethod(compat = RUBY1_8, reads = BACKREF)
  1332. public IRubyObject match(ThreadContext context, IRubyObject pattern) {
  1333. return getPattern(pattern).callMethod(context, "match", this);
  1334. }
  1335. @JRubyMethod(name = "match", compat = RUBY1_9, reads = BACKREF)
  1336. public IRubyObject match19(ThreadContext context, IRubyObject pattern, Block block) {
  1337. IRubyObject result = getPattern(pattern).callMethod(context, "match", this);
  1338. return block.isGiven() && !result.isNil() ? block.yield(context, result) : result;
  1339. }
  1340. @JRubyMethod(name = "match", required = 2, rest = true, compat = RUBY1_9, reads = BACKREF)
  1341. public IRubyObject match19(ThreadContext context, IRubyObject[]args, Block block) {
  1342. RubyRegexp pattern = getPattern(args[0]);
  1343. args[0] = this;
  1344. IRubyObject result = pattern.callMethod(context, "match", args);
  1345. return block.isGiven() && !result.isNil() ? block.yield(context, result) : result;
  1346. }
  1347. /** rb_str_capitalize / rb_str_capitalize_bang
  1348. *
  1349. */
  1350. @JRubyMethod(name = "capitalize", compat = RUBY1_8)
  1351. public IRubyObject capitalize(ThreadContext context) {
  1352. RubyString str = strDup(context.getRuntime());
  1353. str.capitalize_bang(context);
  1354. return str;
  1355. }
  1356. @JRubyMethod(name = "capitalize!", compat = RUBY1_8)
  1357. public IRubyObject capitalize_bang(ThreadContext context) {
  1358. Ruby runtime = context.getRuntime();
  1359. if (value.getRealSize() == 0) {
  1360. modifyCheck();
  1361. return runtime.getNil();
  1362. }
  1363. modify();
  1364. int s = value.getBegin();
  1365. int end = s + value.getRealSize();
  1366. byte[]bytes = value.getUnsafeBytes();
  1367. boolean modify = false;
  1368. int c = bytes[s] & 0xff;
  1369. if (ASCII.isLower(c)) {
  1370. bytes[s] = AsciiTables.ToUpperCaseTable[c];
  1371. modify = true;
  1372. }
  1373. while (++s < end) {
  1374. c = bytes[s] & 0xff;
  1375. if (ASCII.isUpper(c)) {
  1376. bytes[s] = AsciiTables.ToLowerCaseTable[c];
  1377. modify = true;
  1378. }
  1379. }
  1380. return modify ? this : runtime.getNil();
  1381. }
  1382. @JRubyMethod(name = "capitalize", compat = RUBY1_9)
  1383. public IRubyObject capitalize19(ThreadContext context) {
  1384. RubyString str = strDup(context.getRuntime());
  1385. str.capitalize_bang19(context);
  1386. return str;
  1387. }
  1388. @JRubyMethod(name = "capitalize!", compat = RUBY1_9)
  1389. public IRubyObject capitalize_bang19(ThreadContext context) {
  1390. Ruby runtime = context.getRuntime();
  1391. Encoding enc = checkDummyEncoding();
  1392. if (value.getRealSize() == 0) {
  1393. modifyCheck();
  1394. return runtime.getNil();
  1395. }
  1396. modifyAndKeepCodeRange();
  1397. int s = value.getBegin();
  1398. int end = s + value.getRealSize();
  1399. byte[]bytes = value.getUnsafeBytes();
  1400. boolean modify = false;
  1401. int c = codePoint(runtime, enc, bytes, s, end);
  1402. if (enc.isLower(c)) {
  1403. enc.codeToMbc(toUpper(enc, c), bytes, s);
  1404. modify = true;
  1405. }
  1406. s += codeLength(runtime, enc, c);
  1407. while (s < end) {
  1408. c = codePoint(runtime, enc, bytes, s, end);
  1409. if (enc.isUpper(c)) {
  1410. enc.codeToMbc(toLower(enc, c), bytes, s);
  1411. modify = true;
  1412. }
  1413. s += codeLength(runtime, enc, c);
  1414. }
  1415. return modify ? this : runtime.getNil();
  1416. }
  1417. @JRubyMethod(name = ">=", compat = RUBY1_8)
  1418. public IRubyObject op_ge(ThreadContext context, IRubyObject other) {
  1419. if (other instanceof RubyString) return context.getRuntime().newBoolean(op_cmp((RubyString) other) >= 0);
  1420. return RubyComparable.op_ge(context, this, other);
  1421. }
  1422. @JRubyMethod(name = ">=", compat = RUBY1_9)
  1423. public IRubyObject op_ge19(ThreadContext context, IRubyObject other) {
  1424. if (other instanceof RubyString) return context.getRuntime().newBoolean(op_cmp19((RubyString) other) >= 0);
  1425. return RubyComparable.op_ge(context, this, other);
  1426. }
  1427. @JRubyMethod(name = ">", compat = RUBY1_8)
  1428. public IRubyObject op_gt(ThreadContext context, IRubyObject other) {
  1429. if (other instanceof RubyString) return context.getRuntime().newBoolean(op_cmp((RubyString) other) > 0);
  1430. return RubyComparable.op_gt(context, this, other);
  1431. }
  1432. @JRubyMethod(name = ">", compat = RUBY1_9)
  1433. public IRubyObject op_gt19(ThreadContext context, IRubyObject other) {
  1434. if (other instanceof RubyString) return context.getRuntime().newBoolean(op_cmp19((RubyString) other) > 0);
  1435. return RubyComparable.op_gt(context, this, other);
  1436. }
  1437. @JRubyMethod(name = "<=", compat = RUBY1_8)
  1438. public IRubyObject op_le(ThreadContext context, IRubyObject other) {
  1439. if (other instanceof RubyString) return context.getRuntime().newBoolean(op_cmp((RubyString) other) <= 0);
  1440. return RubyComparable.op_le(context, this, other);
  1441. }
  1442. @JRubyMethod(name = "<=", compat = RUBY1_9)
  1443. public IRubyObject op_le19(ThreadContext context, IRubyObject other) {
  1444. if (other instanceof RubyString) return context.getRuntime().newBoolean(op_cmp19((RubyString) other) <= 0);
  1445. return RubyComparable.op_le(context, this, other);
  1446. }
  1447. @JRubyMethod(name = "<", compat = RUBY1_8)
  1448. public IRubyObject op_lt(ThreadContext context, IRubyObject other) {
  1449. if (other instanceof RubyString) return context.getRuntime().newBoolean(op_cmp((RubyString) other) < 0);
  1450. return RubyComparable.op_lt(context, this, other);
  1451. }
  1452. @JRubyMethod(name = "<", compat = RUBY1_9)
  1453. public IRubyObject op_lt19(ThreadContext context, IRubyObject other) {
  1454. if (other instanceof RubyString) return context.getRuntime().newBoolean(op_cmp19((RubyString) other) < 0);
  1455. return RubyComparable.op_lt(context, this, other);
  1456. }
  1457. @JRubyMethod(name = "eql?", compat = RUBY1_8)
  1458. public IRubyObject str_eql_p(ThreadContext context, IRubyObject other) {
  1459. Ruby runtime = context.getRuntime();
  1460. if (other instanceof RubyString && value.equal(((RubyString)other).value)) return runtime.getTrue();
  1461. return runtime.getFalse();
  1462. }
  1463. @JRubyMethod(name = "eql?", compat = RUBY1_9)
  1464. public IRubyObject str_eql_p19(ThreadContext context, IRubyObject other) {
  1465. Ruby runtime = context.getRuntime();
  1466. if (other instanceof RubyString) {
  1467. RubyString otherString = (RubyString)other;
  1468. if (isComparableWith(otherString) && value.equal(otherString.value)) return runtime.getTrue();
  1469. }
  1470. return runtime.getFalse();
  1471. }
  1472. /** rb_str_upcase / rb_str_upcase_bang
  1473. *
  1474. */
  1475. @JRubyMethod(name = "upcase", compat = RUBY1_8)
  1476. public RubyString upcase(ThreadContext context) {
  1477. RubyString str = strDup(context.getRuntime());
  1478. str.upcase_bang(context);
  1479. return str;
  1480. }
  1481. @JRubyMethod(name = "upcase!", compat = RUBY1_8)
  1482. public IRubyObject upcase_bang(ThreadContext context) {
  1483. Ruby runtime = context.getRuntime();
  1484. if (value.getRealSize() == 0) {
  1485. modifyCheck();
  1486. return runtime.getNil();
  1487. }
  1488. modify();
  1489. return singleByteUpcase(runtime, value.getUnsafeBytes(), value.getBegin(), value.getBegin() + value.getRealSize());
  1490. }
  1491. @JRubyMethod(name = "upcase", compat = RUBY1_9)
  1492. public RubyString upcase19(ThreadContext context) {
  1493. RubyString str = strDup(context.getRuntime());
  1494. str.upcase_bang19(context);
  1495. return str;
  1496. }
  1497. @JRubyMethod(name = "upcase!", compat = RUBY1_9)
  1498. public IRubyObject upcase_bang19(ThreadContext context) {
  1499. Ruby runtime = context.getRuntime();
  1500. Encoding enc = checkDummyEncoding();
  1501. if (value.getRealSize() == 0) {
  1502. modifyCheck();
  1503. return runtime.getNil();
  1504. }
  1505. modifyAndKeepCodeRange();
  1506. int s = value.getBegin();
  1507. int end = s + value.getRealSize();
  1508. byte[]bytes = value.getUnsafeBytes();
  1509. if (singleByteOptimizable(enc)) {
  1510. return singleByteUpcase(runtime, bytes, s, end);
  1511. } else {
  1512. return multiByteUpcase(runtime, enc, bytes, s, end);
  1513. }
  1514. }
  1515. private IRubyObject singleByteUpcase(Ruby runtime, byte[]bytes, int s, int end) {
  1516. boolean modify = false;
  1517. while (s < end) {
  1518. int c = bytes[s] & 0xff;
  1519. if (ASCII.isLower(c)) {
  1520. bytes[s] = AsciiTables.ToUpperCaseTable[c];
  1521. modify = true;
  1522. }
  1523. s++;
  1524. }
  1525. return modify ? this : runtime.getNil();
  1526. }
  1527. private IRubyObject multiByteUpcase(Ruby runtime, Encoding enc, byte[]bytes, int s, int end) {
  1528. boolean modify = false;
  1529. int c;
  1530. while (s < end) {
  1531. if (enc.isAsciiCompatible() && Encoding.isAscii(c = bytes[s] & 0xff)) {
  1532. if (ASCII.isLower(c)) {
  1533. bytes[s] = AsciiTables.ToUpperCaseTable[c];
  1534. modify = true;
  1535. }
  1536. s++;
  1537. } else {
  1538. c = codePoint(runtime, enc, bytes, s, end);
  1539. if (enc.isLower(c)) {
  1540. enc.codeToMbc(toUpper(enc, c), bytes, s);
  1541. modify = true;
  1542. }
  1543. s += codeLength(runtime, enc, c);
  1544. }
  1545. }
  1546. return modify ? this : runtime.getNil();
  1547. }
  1548. /** rb_str_downcase / rb_str_downcase_bang
  1549. *
  1550. */
  1551. @JRubyMethod(name = "downcase", compat = RUBY1_8)
  1552. public RubyString downcase(ThreadContext context) {
  1553. RubyString str = strDup(context.getRuntime());
  1554. str.downcase_bang(context);
  1555. return str;
  1556. }
  1557. @JRubyMethod(name = "downcase!", compat = RUBY1_8)
  1558. public IRubyObject downcase_bang(ThreadContext context) {
  1559. Ruby runtime = context.getRuntime();
  1560. if (value.getRealSize() == 0) {
  1561. modifyCheck();
  1562. return runtime.getNil();
  1563. }
  1564. modify();
  1565. return singleByteDowncase(runtime, value.getUnsafeBytes(), value.getBegin(), value.getBegin() + value.getRealSize());
  1566. }
  1567. @JRubyMethod(name = "downcase", compat = RUBY1_9)
  1568. public RubyString downcase19(ThreadContext context) {
  1569. RubyString str = strDup(context.getRuntime());
  1570. str.downcase_bang19(context);
  1571. return str;
  1572. }
  1573. @JRubyMethod(name = "downcase!", compat = RUBY1_9)
  1574. public IRubyObject downcase_bang19(ThreadContext context) {
  1575. Ruby runtime = context.getRuntime();
  1576. Encoding enc = checkDummyEncoding();
  1577. if (value.getRealSize() == 0) {
  1578. modifyCheck();
  1579. return runtime.getNil();
  1580. }
  1581. modifyAndKeepCodeRange();
  1582. int s = value.getBegin();
  1583. int end = s + value.getRealSize();
  1584. byte[]bytes = value.getUnsafeBytes();
  1585. if (singleByteOptimizable(enc)) {
  1586. return singleByteDowncase(runtime, bytes, s, end);
  1587. } else {
  1588. return multiByteDowncase(runtime, enc, bytes, s, end);
  1589. }
  1590. }
  1591. private IRubyObject singleByteDowncase(Ruby runtime, byte[]bytes, int s, int end) {
  1592. boolean modify = false;
  1593. while (s < end) {
  1594. int c = bytes[s] & 0xff;
  1595. if (ASCII.isUpper(c)) {
  1596. bytes[s] = AsciiTables.ToLowerCaseTable[c];
  1597. modify = true;
  1598. }
  1599. s++;
  1600. }
  1601. return modify ? this : runtime.getNil();
  1602. }
  1603. private IRubyObject multiByteDowncase(Ruby runtime, Encoding enc, byte[]bytes, int s, int end) {
  1604. boolean modify = false;
  1605. int c;
  1606. while (s < end) {
  1607. if (enc.isAsciiCompatible() && Encoding.isAscii(c = bytes[s] & 0xff)) {
  1608. if (ASCII.isUpper(c)) {
  1609. bytes[s] = AsciiTables.ToLowerCaseTable[c];
  1610. modify = true;
  1611. }
  1612. s++;
  1613. } else {
  1614. c = codePoint(runtime, enc, bytes, s, end);
  1615. if (enc.isUpper(c)) {
  1616. enc.codeToMbc(toLower(enc, c), bytes, s);
  1617. modify = true;
  1618. }
  1619. s += codeLength(runtime, enc, c);
  1620. }
  1621. }
  1622. return modify ? this : runtime.getNil();
  1623. }
  1624. /** rb_str_swapcase / rb_str_swapcase_bang
  1625. *
  1626. */
  1627. @JRubyMethod(name = "swapcase", compat = RUBY1_8)
  1628. public RubyString swapcase(ThreadContext context) {
  1629. RubyString str = strDup(context.getRuntime());
  1630. str.swapcase_bang(context);
  1631. return str;
  1632. }
  1633. @JRubyMethod(name = "swapcase!", compat = RUBY1_8)
  1634. public IRubyObject swapcase_bang(ThreadContext context) {
  1635. Ruby runtime = context.getRuntime();
  1636. if (value.getRealSize() == 0) {
  1637. modifyCheck();
  1638. return runtime.getNil();
  1639. }
  1640. modify();
  1641. return singleByteSwapcase(runtime, value.getUnsafeBytes(), value.getBegin(), value.getBegin() + value.getRealSize());
  1642. }
  1643. @JRubyMethod(name = "swapcase", compat = RUBY1_9)
  1644. public RubyString swapcase19(ThreadContext context) {
  1645. RubyString str = strDup(context.getRuntime());
  1646. str.swapcase_bang19(context);
  1647. return str;
  1648. }
  1649. @JRubyMethod(name = "swapcase!", compat = RUBY1_9)
  1650. public IRubyObject swapcase_bang19(ThreadContext context) {
  1651. Ruby runtime = context.getRuntime();
  1652. Encoding enc = checkDummyEncoding();
  1653. if (value.getRealSize() == 0) {
  1654. modifyCheck();
  1655. return runtime.getNil();
  1656. }
  1657. modifyAndKeepCodeRange();
  1658. int s = value.getBegin();
  1659. int end = s + value.getRealSize();
  1660. byte[]bytes = value.getUnsafeBytes();
  1661. if (singleByteOptimizable(enc)) {
  1662. return singleByteSwapcase(runtime, bytes, s, end);
  1663. } else {
  1664. return multiByteSwapcase(runtime, enc, bytes, s, end);
  1665. }
  1666. }
  1667. private IRubyObject singleByteSwapcase(Ruby runtime, byte[]bytes, int s, int end) {
  1668. boolean modify = false;
  1669. while (s < end) {
  1670. int c = bytes[s] & 0xff;
  1671. if (ASCII.isUpper(c)) {
  1672. bytes[s] = AsciiTables.ToLowerCaseTable[c];
  1673. modify = true;
  1674. } else if (ASCII.isLower(c)) {
  1675. bytes[s] = AsciiTables.ToUpperCaseTable[c];
  1676. modify = true;
  1677. }
  1678. s++;
  1679. }
  1680. return modify ? this : runtime.getNil();
  1681. }
  1682. private IRubyObject multiByteSwapcase(Ruby runtime, Encoding enc, byte[]bytes, int s, int end) {
  1683. boolean modify = false;
  1684. while (s < end) {
  1685. int c = codePoint(runtime, enc, bytes, s, end);
  1686. if (enc.isUpper(c)) {
  1687. enc.codeToMbc(toLower(enc, c), bytes, s);
  1688. modify = true;
  1689. } else if (enc.isLower(c)) {
  1690. enc.codeToMbc(toUpper(enc, c), bytes, s);
  1691. modify = true;
  1692. }
  1693. s += codeLength(runtime, enc, c);
  1694. }
  1695. return modify ? this : runtime.getNil();
  1696. }
  1697. /** rb_str_dump
  1698. *
  1699. */
  1700. @JRubyMethod(name = "dump", compat = RUBY1_8)
  1701. public IRubyObject dump() {
  1702. return dumpCommon(false);
  1703. }
  1704. @JRubyMethod(name = "dump", compat = RUBY1_9)
  1705. public IRubyObject dump19() {
  1706. return dumpCommon(true);
  1707. }
  1708. private IRubyObject dumpCommon(boolean is1_9) {
  1709. Ruby runtime = getRuntime();
  1710. ByteList buf = null;
  1711. Encoding enc = value.getEncoding();
  1712. int p = value.getBegin();
  1713. int end = p + value.getRealSize();
  1714. byte[]bytes = value.getUnsafeBytes();
  1715. int len = 2;
  1716. while (p < end) {
  1717. int c = bytes[p++] & 0xff;
  1718. switch (c) {
  1719. case '"':case '\\':case '\n':case '\r':case '\t':case '\f':
  1720. case '\013': case '\010': case '\007': case '\033':
  1721. len += 2;
  1722. break;
  1723. case '#':
  1724. len += isEVStr(bytes, p, end) ? 2 : 1;
  1725. break;
  1726. default:
  1727. if (ASCII.isPrint(c)) {
  1728. len++;
  1729. } else {
  1730. if (is1_9 && enc instanceof UTF8Encoding) {
  1731. int n = StringSupport.preciseLength(enc, bytes, p - 1, end) - 1;
  1732. if (n > 0) {
  1733. if (buf == null) buf = new ByteList();
  1734. int cc = codePoint(runtime, enc, bytes, p - 1, end);
  1735. Sprintf.sprintf(runtime, buf, "%x", cc);
  1736. len += buf.getRealSize() + 4;
  1737. buf.setRealSize(0);
  1738. p += n;
  1739. break;
  1740. }
  1741. }
  1742. len += 4;
  1743. }
  1744. break;
  1745. }
  1746. }
  1747. if (is1_9 && !enc.isAsciiCompatible()) {
  1748. len += ".force_encoding(\"".length() + enc.getName().length + "\")".length();
  1749. }
  1750. ByteList outBytes = new ByteList(len);
  1751. byte out[] = outBytes.getUnsafeBytes();
  1752. int q = 0;
  1753. p = value.getBegin();
  1754. end = p + value.getRealSize();
  1755. out[q++] = '"';
  1756. while (p < end) {
  1757. int c = bytes[p++] & 0xff;
  1758. if (c == '"' || c == '\\') {
  1759. out[q++] = '\\';
  1760. out[q++] = (byte)c;
  1761. } else if (c == '#') {
  1762. if (isEVStr(bytes, p, end)) out[q++] = '\\';
  1763. out[q++] = '#';
  1764. } else if (!is1_9 && ASCII.isPrint(c)) {
  1765. out[q++] = (byte)c;
  1766. } else if (c == '\n') {
  1767. out[q++] = '\\';
  1768. out[q++] = 'n';
  1769. } else if (c == '\r') {
  1770. out[q++] = '\\';
  1771. out[q++] = 'r';
  1772. } else if (c == '\t') {
  1773. out[q++] = '\\';
  1774. out[q++] = 't';
  1775. } else if (c == '\f') {
  1776. out[q++] = '\\';
  1777. out[q++] = 'f';
  1778. } else if (c == '\013') {
  1779. out[q++] = '\\';
  1780. out[q++] = 'v';
  1781. } else if (c == '\010') {
  1782. out[q++] = '\\';
  1783. out[q++] = 'b';
  1784. } else if (c == '\007') {
  1785. out[q++] = '\\';
  1786. out[q++] = 'a';
  1787. } else if (c == '\033') {
  1788. out[q++] = '\\';
  1789. out[q++] = 'e';
  1790. } else if (is1_9 && ASCII.isPrint(c)) {
  1791. out[q++] = (byte)c;
  1792. } else {
  1793. out[q++] = '\\';
  1794. if (is1_9) {
  1795. if (enc instanceof UTF8Encoding) {
  1796. int n = StringSupport.preciseLength(enc, bytes, p - 1, end) - 1;
  1797. if (n > 0) {
  1798. int cc = codePoint(runtime, enc, bytes, p - 1, end);
  1799. p += n;
  1800. outBytes.setRealSize(q);
  1801. Sprintf.sprintf(runtime, outBytes, "u{%x}", cc);
  1802. q = outBytes.getRealSize();
  1803. continue;
  1804. }
  1805. }
  1806. outBytes.setRealSize(q);
  1807. Sprintf.sprintf(runtime, outBytes, "x%02X", c);
  1808. q = outBytes.getRealSize();
  1809. } else {
  1810. outBytes.setRealSize(q);
  1811. Sprintf.sprintf(runtime, outBytes, "%03o", c);
  1812. q = outBytes.getRealSize();
  1813. }
  1814. }
  1815. }
  1816. out[q++] = '"';
  1817. outBytes.setRealSize(q);
  1818. assert out == outBytes.getUnsafeBytes(); // must not reallocate
  1819. final RubyString result = new RubyString(runtime, getMetaClass(), outBytes);
  1820. if (is1_9) {
  1821. if (!enc.isAsciiCompatible()) {
  1822. result.cat(".force_encoding(\"".getBytes());
  1823. result.cat(enc.getName());
  1824. result.cat((byte)'"').cat((byte)')');
  1825. enc = ASCII;
  1826. }
  1827. result.associateEncoding(enc);
  1828. result.setCodeRange(CR_7BIT);
  1829. }
  1830. return result.infectBy(this);
  1831. }
  1832. @JRubyMethod(name = "insert", compat = RUBY1_8)
  1833. public IRubyObject insert(ThreadContext context, IRubyObject indexArg, IRubyObject stringArg) {
  1834. assert !context.getRuntime().is1_9();
  1835. RubyString str = stringArg.convertToString();
  1836. int index = RubyNumeric.num2int(indexArg);
  1837. if (index == -1) return append(stringArg);
  1838. if (index < 0) index++;
  1839. replaceInternal(checkIndex(index, value.getRealSize()), 0, str);
  1840. return this;
  1841. }
  1842. @JRubyMethod(name = "insert", compat = RUBY1_9)
  1843. public IRubyObject insert19(ThreadContext context, IRubyObject indexArg, IRubyObject stringArg) {
  1844. RubyString str = stringArg.convertToString();
  1845. int index = RubyNumeric.num2int(indexArg);
  1846. if (index == -1) return append19(stringArg);
  1847. if (index < 0) index++;
  1848. replaceInternal19(checkIndex(index, strLength()), 0, str);
  1849. return this;
  1850. }
  1851. private int checkIndex(int beg, int len) {
  1852. if (beg > len) raiseIndexOutOfString(beg);
  1853. if (beg < 0) {
  1854. if (-beg > len) raiseIndexOutOfString(beg);
  1855. beg += len;
  1856. }
  1857. return beg;
  1858. }
  1859. private int checkIndexForRef(int beg, int len) {
  1860. if (beg >= len) raiseIndexOutOfString(beg);
  1861. if (beg < 0) {
  1862. if (-beg > len) raiseIndexOutOfString(beg);
  1863. beg += len;
  1864. }
  1865. return beg;
  1866. }
  1867. private int checkLength(int len) {
  1868. if (len < 0) throw getRuntime().newIndexError("negative length " + len);
  1869. return len;
  1870. }
  1871. private void raiseIndexOutOfString(int index) {
  1872. throw getRuntime().newIndexError("index " + index + " out of string");
  1873. }
  1874. /** rb_str_inspect
  1875. *
  1876. */
  1877. @JRubyMethod(name = "inspect", compat = RUBY1_8)
  1878. @Override
  1879. public IRubyObject inspect() {
  1880. int start = value.getBegin();
  1881. int len = value.getRealSize();
  1882. byte[] bytes = value.getUnsafeBytes();
  1883. try {
  1884. return inspectCommon(false);
  1885. } catch (ArrayIndexOutOfBoundsException x) {
  1886. System.out.println("" + start + ", " + len + ", " + Arrays.toString(bytes));
  1887. throw x;
  1888. }
  1889. }
  1890. @JRubyMethod(name = "inspect", compat = RUBY1_9)
  1891. public IRubyObject inspect19() {
  1892. return inspectCommon(true);
  1893. }
  1894. private void prefixEscapeCat(int c) {
  1895. cat('\\');
  1896. cat(c);
  1897. }
  1898. private void escapeCodePointCat(Ruby runtime, byte[]bytes, int p, int n) {
  1899. for (int q = p - n; q < p; q++) {
  1900. Sprintf.sprintf(runtime, value, "\\x%02X", bytes[q] & 0377);
  1901. }
  1902. }
  1903. final IRubyObject inspectCommon(final boolean is1_9) {
  1904. Ruby runtime = getRuntime();
  1905. byte bytes[] = value.getUnsafeBytes();
  1906. int p = value.getBegin();
  1907. int end = p + value.getRealSize();
  1908. RubyString result = new RubyString(runtime, runtime.getString(), new ByteList(end - p));
  1909. Encoding enc;
  1910. if (is1_9) {
  1911. enc = value.getEncoding();
  1912. if (enc != runtime.getKCode().getEncoding()) {
  1913. enc = runtime.getKCode().getEncoding();
  1914. }
  1915. if (!enc.isAsciiCompatible()) {
  1916. enc = USASCIIEncoding.INSTANCE;
  1917. }
  1918. result.associateEncoding(enc);
  1919. } else {
  1920. enc = runtime.getKCode().getEncoding();
  1921. }
  1922. result.cat('"');
  1923. while (p < end) {
  1924. int c, n;
  1925. if (is1_9) {
  1926. n = StringSupport.preciseLength(enc, bytes, p, end);
  1927. if (n <= 0) { // Illegal combination
  1928. p++;
  1929. n = 1;
  1930. result.escapeCodePointCat(runtime, bytes, p, n);
  1931. continue;
  1932. }
  1933. c = codePoint(runtime, enc, bytes, p, end);
  1934. n = codeLength(runtime, enc, c);
  1935. p += n;
  1936. } else {
  1937. c = bytes[p++] & 0xff;
  1938. n = enc.length((byte)c);
  1939. }
  1940. if (!is1_9 && n > 1 && p - 1 <= end - n) {
  1941. try {
  1942. result.cat(bytes, p - 1, n);
  1943. } catch (ArrayIndexOutOfBoundsException x) {
  1944. System.out.println("begin = " + (p - 1));
  1945. System.out.println("len = " + n);
  1946. System.out.println("bytes = " + Arrays.toString(bytes));
  1947. throw x;
  1948. }
  1949. p += n - 1;
  1950. continue;
  1951. } else if (c == '"'|| c == '\\') {
  1952. result.prefixEscapeCat(c);
  1953. continue;
  1954. } else if (c == '#') {
  1955. if (is1_9) {
  1956. int cc;
  1957. if (p < end && StringSupport.preciseLength(enc, bytes, p, end) > 0 &&
  1958. isEVStr(cc = codePoint(runtime, enc, bytes, p, end))) {
  1959. if ("$@{".indexOf(cc) != -1) {
  1960. cc = '#';
  1961. }
  1962. result.prefixEscapeCat(cc);
  1963. continue;
  1964. }
  1965. } else {
  1966. if (isEVStr(bytes, p, end)) {
  1967. result.prefixEscapeCat(c);
  1968. continue;
  1969. }
  1970. }
  1971. }
  1972. if (!is1_9 && ASCII.isPrint(c)) {
  1973. result.cat(c);
  1974. } else if (c == '\n') {
  1975. result.prefixEscapeCat('n');
  1976. } else if (c == '\r') {
  1977. result.prefixEscapeCat('r');
  1978. } else if (c == '\t') {
  1979. result.prefixEscapeCat('t');
  1980. } else if (c == '\f') {
  1981. result.prefixEscapeCat('f');
  1982. } else if (c == '\013') {
  1983. result.prefixEscapeCat('v');
  1984. } else if (c == '\010') {
  1985. result.prefixEscapeCat('b');
  1986. } else if (c == '\007') {
  1987. result.prefixEscapeCat('a');
  1988. } else if (c == '\033') {
  1989. result.prefixEscapeCat('e');
  1990. } else if (is1_9 && enc.isPrint(c)) {
  1991. result.cat(bytes, p - n, n, enc);
  1992. } else {
  1993. if (!is1_9) {
  1994. Sprintf.sprintf(runtime, result.value, "\\%03o", c & 0377);
  1995. } else {
  1996. result.escapeCodePointCat(runtime, bytes, p, n);
  1997. }
  1998. }
  1999. }
  2000. result.cat('"');
  2001. return result.infectBy(this);
  2002. }
  2003. private boolean isEVStr(byte[]bytes, int p, int end) {
  2004. return p < end ? isEVStr(bytes[p] & 0xff) : false;
  2005. }
  2006. public boolean isEVStr(int c) {
  2007. return c == '$' || c == '@' || c == '{';
  2008. }
  2009. /** rb_str_length
  2010. *
  2011. */
  2012. @JRubyMethod(name = {"length", "size"}, compat = RUBY1_8)
  2013. public RubyFixnum length() {
  2014. return getRuntime().newFixnum(value.getRealSize());
  2015. }
  2016. @JRubyMethod(name = {"length", "size"}, compat = RUBY1_9)
  2017. public RubyFixnum length19() {
  2018. return getRuntime().newFixnum(strLength());
  2019. }
  2020. @JRubyMethod(name = "bytesize")
  2021. public RubyFixnum bytesize() {
  2022. return length(); // use 1.8 impl
  2023. }
  2024. /** rb_str_empty
  2025. *
  2026. */
  2027. @JRubyMethod(name = "empty?")
  2028. public RubyBoolean empty_p(ThreadContext context) {
  2029. return isEmpty() ? context.getRuntime().getTrue() : context.getRuntime().getFalse();
  2030. }
  2031. public boolean isEmpty() {
  2032. return value.length() == 0;
  2033. }
  2034. /** rb_str_append
  2035. *
  2036. */
  2037. public RubyString append(IRubyObject other) {
  2038. RubyString otherStr = other.convertToString();
  2039. infectBy(otherStr);
  2040. return cat(otherStr.value);
  2041. }
  2042. public RubyString append19(IRubyObject other) {
  2043. return cat19(other.convertToString());
  2044. }
  2045. /** rb_str_concat
  2046. *
  2047. */
  2048. @JRubyMethod(name = {"concat", "<<"}, compat = RUBY1_8)
  2049. public RubyString concat(IRubyObject other) {
  2050. if (other instanceof RubyFixnum) {
  2051. long longValue = ((RubyFixnum) other).getLongValue();
  2052. if (longValue >= 0 && longValue < 256) return cat((byte) longValue);
  2053. }
  2054. return append(other);
  2055. }
  2056. @JRubyMethod(name = {"concat", "<<"}, backtrace = true, compat = RUBY1_9)
  2057. public RubyString concat19(ThreadContext context, IRubyObject other) {
  2058. Ruby runtime = context.getRuntime();
  2059. if (other instanceof RubyFixnum) {
  2060. int c = RubyNumeric.num2int(other);
  2061. if (c < 0) {
  2062. throw runtime.newRangeError("negative string size (or size too big)");
  2063. }
  2064. return concatNumeric(runtime, c);
  2065. } else if (other instanceof RubyBignum) {
  2066. if (((RubyBignum) other).getBigIntegerValue().signum() < 0) {
  2067. throw runtime.newRangeError("negative string size (or size too big)");
  2068. }
  2069. long c = ((RubyBignum) other).getLongValue();
  2070. return concatNumeric(runtime, (int) c);
  2071. }
  2072. return append19(other);
  2073. }
  2074. private RubyString concatNumeric(Ruby runtime, int c) {
  2075. Encoding enc = value.getEncoding();
  2076. int cl = codeLength(runtime, enc, c);
  2077. modify19(value.getRealSize() + cl);
  2078. enc.codeToMbc(c, value.getUnsafeBytes(), value.getBegin() + value.getRealSize());
  2079. value.setRealSize(value.getRealSize() + cl);
  2080. return this;
  2081. }
  2082. /** rb_str_crypt
  2083. *
  2084. */
  2085. @JRubyMethod(name = "crypt")
  2086. public RubyString crypt(ThreadContext context, IRubyObject other) {
  2087. RubyString otherStr = other.convertToString();
  2088. ByteList salt = otherStr.getByteList();
  2089. if (salt.getRealSize() < 2) {
  2090. throw context.getRuntime().newArgumentError("salt too short(need >=2 bytes)");
  2091. }
  2092. salt = salt.makeShared(0, 2);
  2093. RubyString result = RubyString.newStringShared(context.getRuntime(), JavaCrypt.crypt(salt, this.getByteList()));
  2094. result.infectBy(this);
  2095. result.infectBy(otherStr);
  2096. return result;
  2097. }
  2098. /* RubyString aka rb_string_value */
  2099. public static RubyString stringValue(IRubyObject object) {
  2100. return (RubyString) (object instanceof RubyString ? object :
  2101. object.convertToString());
  2102. }
  2103. /** rb_str_sub / rb_str_sub_bang
  2104. *
  2105. */
  2106. @JRubyMethod(reads = BACKREF, writes = BACKREF, compat = RUBY1_8)
  2107. public IRubyObject sub(ThreadContext context, IRubyObject arg0, Block block) {
  2108. RubyString str = strDup(context.getRuntime());
  2109. str.sub_bang(context, arg0, block);
  2110. return str;
  2111. }
  2112. @JRubyMethod(reads = BACKREF, writes = BACKREF, compat = RUBY1_8)
  2113. public IRubyObject sub(ThreadContext context, IRubyObject arg0, IRubyObject arg1, Block block) {
  2114. RubyString str = strDup(context.getRuntime());
  2115. str.sub_bang(context, arg0, arg1, block);
  2116. return str;
  2117. }
  2118. @JRubyMethod(name = "sub!", reads = BACKREF, writes = BACKREF, compat = RUBY1_8)
  2119. public IRubyObject sub_bang(ThreadContext context, IRubyObject arg0, Block block) {
  2120. if (block.isGiven()) return subBangIter(context, getQuotedPattern(arg0), block);
  2121. throw context.getRuntime().newArgumentError(1, 2);
  2122. }
  2123. @JRubyMethod(name = "sub!", reads = BACKREF, writes = BACKREF, compat = RUBY1_8)
  2124. public IRubyObject sub_bang(ThreadContext context, IRubyObject arg0, IRubyObject arg1, Block block) {
  2125. return subBangNoIter(context, getQuotedPattern(arg0), arg1.convertToString());
  2126. }
  2127. private IRubyObject subBangIter(ThreadContext context, Regex pattern, Block block) {
  2128. int range = value.getBegin() + value.getRealSize();
  2129. Matcher matcher = pattern.matcher(value.getUnsafeBytes(), value.getBegin(), range);
  2130. DynamicScope scope = context.getCurrentScope();
  2131. if (matcher.search(value.getBegin(), range, Option.NONE) >= 0) {
  2132. frozenCheck(true);
  2133. byte[] bytes = value.getUnsafeBytes();
  2134. int size = value.getRealSize();
  2135. RubyMatchData match = RubyRegexp.updateBackRef(context, this, scope, matcher, pattern);
  2136. RubyString repl = objAsString(context, block.yield(context,
  2137. makeShared(context.getRuntime(), matcher.getBegin(), matcher.getEnd() - matcher.getBegin())));
  2138. modifyCheck(bytes, size);
  2139. frozenCheck(true);
  2140. scope.setBackRef(match);
  2141. return subBangCommon(context, pattern, matcher, repl, repl.flags);
  2142. } else {
  2143. return scope.setBackRef(context.getRuntime().getNil());
  2144. }
  2145. }
  2146. private IRubyObject subBangNoIter(ThreadContext context, Regex pattern, RubyString repl) {
  2147. int tuFlags = repl.flags;
  2148. int range = value.getBegin() + value.getRealSize();
  2149. Matcher matcher = pattern.matcher(value.getUnsafeBytes(), value.getBegin(), range);
  2150. DynamicScope scope = context.getCurrentScope();
  2151. if (matcher.search(value.getBegin(), range, Option.NONE) >= 0) {
  2152. repl = RubyRegexp.regsub(repl, this, matcher, context.getRuntime().getKCode().getEncoding());
  2153. RubyRegexp.updateBackRef(context, this, scope, matcher, pattern);
  2154. return subBangCommon(context, pattern, matcher, repl, tuFlags);
  2155. } else {
  2156. return scope.setBackRef(context.getRuntime().getNil());
  2157. }
  2158. }
  2159. private IRubyObject subBangCommon(ThreadContext context, Regex pattern, Matcher matcher, RubyString repl, int tuFlags) {
  2160. final int beg = matcher.getBegin();
  2161. final int plen = matcher.getEnd() - beg;
  2162. ByteList replValue = repl.value;
  2163. if (replValue.getRealSize() > plen) {
  2164. modify(value.getRealSize() + replValue.getRealSize() - plen);
  2165. } else {
  2166. modify();
  2167. }
  2168. if (replValue.getRealSize() != plen) {
  2169. int src = value.getBegin() + beg + plen;
  2170. int dst = value.getBegin() + beg + replValue.getRealSize();
  2171. int length = value.getRealSize() - beg - plen;
  2172. System.arraycopy(value.getUnsafeBytes(), src, value.getUnsafeBytes(), dst, length);
  2173. }
  2174. System.arraycopy(replValue.getUnsafeBytes(), replValue.getBegin(), value.getUnsafeBytes(), value.getBegin() + beg, replValue.getRealSize());
  2175. value.setRealSize(value.getRealSize() + replValue.getRealSize() - plen);
  2176. infectBy(tuFlags);
  2177. return this;
  2178. }
  2179. @JRubyMethod(name = "sub", reads = BACKREF, writes = BACKREF, compat = RUBY1_9)
  2180. public IRubyObject sub19(ThreadContext context, IRubyObject arg0, Block block) {
  2181. RubyString str = strDup(context.getRuntime());
  2182. str.sub_bang19(context, arg0, block);
  2183. return str;
  2184. }
  2185. @JRubyMethod(name = "sub", reads = BACKREF, writes = BACKREF, compat = RUBY1_9)
  2186. public IRubyObject sub19(ThreadContext context, IRubyObject arg0, IRubyObject arg1, Block block) {
  2187. RubyString str = strDup(context.getRuntime());
  2188. str.sub_bang19(context, arg0, arg1, block);
  2189. return str;
  2190. }
  2191. @JRubyMethod(name = "sub!", reads = BACKREF, writes = BACKREF, compat = RUBY1_9)
  2192. public IRubyObject sub_bang19(ThreadContext context, IRubyObject arg0, Block block) {
  2193. Ruby runtime = context.getRuntime();
  2194. frozenCheck();
  2195. final Regex pattern, prepared;
  2196. final RubyRegexp regexp;
  2197. if (arg0 instanceof RubyRegexp) {
  2198. regexp = (RubyRegexp)arg0;
  2199. pattern = regexp.getPattern();
  2200. prepared = regexp.preparePattern(this);
  2201. } else {
  2202. regexp = null;
  2203. pattern = getStringPattern19(runtime, arg0);
  2204. prepared = RubyRegexp.preparePattern(runtime, pattern, this);
  2205. }
  2206. if (block.isGiven()) return subBangIter19(runtime, context, pattern, prepared, null, block, regexp);
  2207. throw context.getRuntime().newArgumentError(1, 2);
  2208. }
  2209. @JRubyMethod(name = "sub!", reads = BACKREF, writes = BACKREF, compat = RUBY1_9)
  2210. public IRubyObject sub_bang19(ThreadContext context, IRubyObject arg0, IRubyObject arg1, Block block) {
  2211. Ruby runtime = context.getRuntime();
  2212. IRubyObject hash = TypeConverter.convertToTypeWithCheck(arg1, runtime.getHash(), "to_hash");
  2213. frozenCheck();
  2214. final Regex pattern, prepared;
  2215. final RubyRegexp regexp;
  2216. if (arg0 instanceof RubyRegexp) {
  2217. regexp = (RubyRegexp)arg0;
  2218. pattern = regexp.getPattern();
  2219. prepared = regexp.preparePattern(this);
  2220. } else {
  2221. regexp = null;
  2222. pattern = getStringPattern19(runtime, arg0);
  2223. prepared = RubyRegexp.preparePattern(runtime, pattern, this);
  2224. }
  2225. if (hash.isNil()) {
  2226. return subBangNoIter19(runtime, context, pattern, prepared, arg1.convertToString(), regexp);
  2227. } else {
  2228. return subBangIter19(runtime, context, pattern, prepared, (RubyHash)hash, block, regexp);
  2229. }
  2230. }
  2231. private IRubyObject subBangIter19(Ruby runtime, ThreadContext context, Regex pattern, Regex prepared, RubyHash hash, Block block, RubyRegexp regexp) {
  2232. int begin = value.getBegin();
  2233. int len = value.getRealSize();
  2234. int range = begin + len;
  2235. byte[]bytes = value.getUnsafeBytes();
  2236. Encoding enc = value.getEncoding();
  2237. final Matcher matcher = prepared.matcher(bytes, begin, range);
  2238. DynamicScope scope = context.getCurrentScope();
  2239. if (matcher.search(begin, range, Option.NONE) >= 0) {
  2240. RubyMatchData match = RubyRegexp.updateBackRef19(context, this, scope, matcher, pattern);
  2241. match.regexp = regexp;
  2242. final RubyString repl;
  2243. final int tuFlags;
  2244. IRubyObject subStr = makeShared19(runtime, matcher.getBegin(), matcher.getEnd() - matcher.getBegin());
  2245. if (hash == null) {
  2246. tuFlags = 0;
  2247. repl = objAsString(context, block.yield(context, subStr));
  2248. } else {
  2249. tuFlags = hash.flags;
  2250. repl = objAsString(context, hash.op_aref(context, subStr));
  2251. }
  2252. modifyCheck(bytes, len, enc);
  2253. frozenCheck();
  2254. scope.setBackRef(match);
  2255. return subBangCommon19(context, pattern, matcher, repl, tuFlags | repl.flags);
  2256. } else {
  2257. return scope.setBackRef(runtime.getNil());
  2258. }
  2259. }
  2260. private IRubyObject subBangNoIter19(Ruby runtime, ThreadContext context, Regex pattern, Regex prepared, RubyString repl, RubyRegexp regexp) {
  2261. int begin = value.getBegin();
  2262. int range = begin + value.getRealSize();
  2263. final Matcher matcher = prepared.matcher(value.getUnsafeBytes(), begin, range);
  2264. DynamicScope scope = context.getCurrentScope();
  2265. if (matcher.search(begin, range, Option.NONE) >= 0) {
  2266. repl = RubyRegexp.regsub19(repl, this, matcher, pattern);
  2267. RubyMatchData match = RubyRegexp.updateBackRef19(context, this, scope, matcher, pattern);
  2268. match.regexp = regexp;
  2269. return subBangCommon19(context, pattern, matcher, repl, repl.flags);
  2270. } else {
  2271. return scope.setBackRef(runtime.getNil());
  2272. }
  2273. }
  2274. private IRubyObject subBangCommon19(ThreadContext context, Regex pattern, Matcher matcher, RubyString repl, int tuFlags) {
  2275. final int beg = matcher.getBegin();
  2276. final int end = matcher.getEnd();
  2277. Encoding enc = isCompatibleWith(repl);
  2278. if (enc == null) enc = subBangVerifyEncoding(context, repl, beg, end);
  2279. final int plen = end - beg;
  2280. ByteList replValue = repl.value;
  2281. if (replValue.getRealSize() > plen) {
  2282. modify19(value.getRealSize() + replValue.getRealSize() - plen);
  2283. } else {
  2284. modify19();
  2285. }
  2286. associateEncoding(enc);
  2287. int cr = getCodeRange();
  2288. if (cr > CR_UNKNOWN && cr < CR_BROKEN) {
  2289. int cr2 = repl.getCodeRange();
  2290. if (cr2 == CR_BROKEN || (cr == CR_VALID && cr2 == CR_7BIT)) {
  2291. cr = CR_UNKNOWN;
  2292. } else {
  2293. cr = cr2;
  2294. }
  2295. }
  2296. if (replValue.getRealSize() != plen) {
  2297. int src = value.getBegin() + beg + plen;
  2298. int dst = value.getBegin() + beg + replValue.getRealSize();
  2299. int length = value.getRealSize() - beg - plen;
  2300. System.arraycopy(value.getUnsafeBytes(), src, value.getUnsafeBytes(), dst, length);
  2301. }
  2302. System.arraycopy(replValue.getUnsafeBytes(), replValue.getBegin(), value.getUnsafeBytes(), value.getBegin() + beg, replValue.getRealSize());
  2303. value.setRealSize(value.getRealSize() + replValue.getRealSize() - plen);
  2304. setCodeRange(cr);
  2305. return infectBy(tuFlags);
  2306. }
  2307. private Encoding subBangVerifyEncoding(ThreadContext context, RubyString repl, int beg, int end) {
  2308. byte[]bytes = value.getUnsafeBytes();
  2309. int p = value.getBegin();
  2310. int len = value.getRealSize();
  2311. Encoding strEnc = value.getEncoding();
  2312. if (codeRangeScan(strEnc, bytes, p, beg) != CR_7BIT ||
  2313. codeRangeScan(strEnc, bytes, p + end, len - end) != CR_7BIT) {
  2314. throw context.getRuntime().newArgumentError(
  2315. "incompatible character encodings " + strEnc + " and " + repl.value.getEncoding());
  2316. }
  2317. return repl.value.getEncoding();
  2318. }
  2319. /** rb_str_gsub / rb_str_gsub_bang
  2320. *
  2321. */
  2322. @JRubyMethod(reads = BACKREF, writes = BACKREF, compat = RUBY1_8)
  2323. public IRubyObject gsub(ThreadContext context, IRubyObject arg0, Block block) {
  2324. return gsub(context, arg0, block, false);
  2325. }
  2326. @JRubyMethod(reads = BACKREF, writes = BACKREF, compat = RUBY1_8)
  2327. public IRubyObject gsub(ThreadContext context, IRubyObject arg0, IRubyObject arg1, Block block) {
  2328. return gsub(context, arg0, arg1, block, false);
  2329. }
  2330. @JRubyMethod(name = "gsub!", reads = BACKREF, writes = BACKREF, compat = RUBY1_8)
  2331. public IRubyObject gsub_bang(ThreadContext context, IRubyObject arg0, Block block) {
  2332. return gsub(context, arg0, block, true);
  2333. }
  2334. @JRubyMethod(name = "gsub!", reads = BACKREF, writes = BACKREF, compat = RUBY1_8)
  2335. public IRubyObject gsub_bang(ThreadContext context, IRubyObject arg0, IRubyObject arg1, Block block) {
  2336. return gsub(context, arg0, arg1, block, true);
  2337. }
  2338. private final IRubyObject gsub(ThreadContext context, IRubyObject arg0, Block block, final boolean bang) {
  2339. if (block.isGiven()) {
  2340. return gsubCommon(context, bang, arg0, block, null, 0);
  2341. } else {
  2342. String method = "gsub";
  2343. if (bang) {
  2344. method += "!";
  2345. }
  2346. return enumeratorize(context.getRuntime(), this, method, arg0);
  2347. }
  2348. }
  2349. private final IRubyObject gsub(ThreadContext context, IRubyObject arg0, IRubyObject arg1, Block block, final boolean bang) {
  2350. RubyString repl = arg1.convertToString();
  2351. return gsubCommon(context, bang, arg0, block, repl, repl.flags);
  2352. }
  2353. private IRubyObject gsubCommon(ThreadContext context, final boolean bang, IRubyObject arg, Block block, RubyString repl, int tuFlags) {
  2354. Ruby runtime = context.getRuntime();
  2355. DynamicScope scope = context.getCurrentScope();
  2356. Regex pattern = getQuotedPattern(arg);
  2357. int begin = value.getBegin();
  2358. int slen = value.getRealSize();
  2359. int range = begin + slen;
  2360. byte[]bytes = value.getUnsafeBytes();
  2361. Matcher matcher = pattern.matcher(bytes, begin, range);
  2362. int beg = matcher.search(begin, range, Option.NONE);
  2363. if (beg < 0) {
  2364. scope.setBackRef(runtime.getNil());
  2365. return bang ? runtime.getNil() : strDup(runtime); /* bang: true, no match, no substitution */
  2366. } else if (repl == null && bang && isFrozen()) {
  2367. throw getRuntime().newRuntimeError("can't modify frozen string");
  2368. }
  2369. int blen = slen + 30; /* len + margin */
  2370. ByteList dest = new ByteList(blen);
  2371. dest.setRealSize(blen);
  2372. int offset = 0, buf = 0, bp = 0, cp = begin;
  2373. Encoding enc = getEncodingForKCodeDefault(runtime, pattern, arg);
  2374. RubyMatchData match = null;
  2375. while (beg >= 0) {
  2376. final RubyString val;
  2377. final int begz = matcher.getBegin();
  2378. final int endz = matcher.getEnd();
  2379. if (repl == null) { // block given
  2380. match = RubyRegexp.updateBackRef(context, this, scope, matcher, pattern);
  2381. val = objAsString(context, block.yield(context, substr(runtime, begz, endz - begz)));
  2382. modifyCheck(bytes, slen);
  2383. if (bang) frozenCheck();
  2384. } else {
  2385. val = RubyRegexp.regsub(repl, this, matcher, enc);
  2386. }
  2387. tuFlags |= val.flags;
  2388. ByteList vbuf = val.value;
  2389. int len = (bp - buf) + (beg - offset) + vbuf.getRealSize() + 3;
  2390. if (blen < len) {
  2391. while (blen < len) blen <<= 1;
  2392. len = bp - buf;
  2393. dest.realloc(blen);
  2394. dest.setRealSize(blen);
  2395. bp = buf + len;
  2396. }
  2397. len = beg - offset; /* copy pre-match substr */
  2398. System.arraycopy(bytes, cp, dest.getUnsafeBytes(), bp, len);
  2399. bp += len;
  2400. System.arraycopy(vbuf.getUnsafeBytes(), vbuf.getBegin(), dest.getUnsafeBytes(), bp, vbuf.getRealSize());
  2401. bp += vbuf.getRealSize();
  2402. offset = endz;
  2403. if (begz == endz) {
  2404. if (slen <= endz) break;
  2405. len = enc.length(bytes, begin + endz, range);
  2406. System.arraycopy(bytes, begin + endz, dest.getUnsafeBytes(), bp, len);
  2407. bp += len;
  2408. offset = endz + len;
  2409. }
  2410. cp = begin + offset;
  2411. if (offset > slen) break;
  2412. beg = matcher.search(cp, range, Option.NONE);
  2413. }
  2414. if (repl == null) { // block given
  2415. scope.setBackRef(match);
  2416. } else {
  2417. RubyRegexp.updateBackRef(context, this, scope, matcher, pattern);
  2418. }
  2419. if (slen > offset) {
  2420. int len = bp - buf;
  2421. if (blen - len < slen - offset) {
  2422. blen = len + slen - offset;
  2423. dest.realloc(blen);
  2424. bp = buf + len;
  2425. }
  2426. System.arraycopy(bytes, cp, dest.getUnsafeBytes(), bp, slen - offset);
  2427. bp += slen - offset;
  2428. }
  2429. dest.setRealSize(bp - buf);
  2430. if (bang) {
  2431. view(dest);
  2432. return infectBy(tuFlags);
  2433. } else {
  2434. return new RubyString(runtime, getMetaClass(), dest).infectBy(tuFlags | flags);
  2435. }
  2436. }
  2437. @JRubyMethod(name = "gsub", reads = BACKREF, writes = BACKREF, compat = RUBY1_9)
  2438. public IRubyObject gsub19(ThreadContext context, IRubyObject arg0, Block block) {
  2439. return block.isGiven() ? gsubCommon19(context, block, null, null, arg0, false, 0) : enumeratorize(context.getRuntime(), this, "gsub", arg0);
  2440. }
  2441. @JRubyMethod(name = "gsub", reads = BACKREF, writes = BACKREF, compat = RUBY1_9)
  2442. public IRubyObject gsub19(ThreadContext context, IRubyObject arg0, IRubyObject arg1, Block block) {
  2443. return gsub19(context, arg0, arg1, block, false);
  2444. }
  2445. @JRubyMethod(name = "gsub!", reads = BACKREF, writes = BACKREF, compat = RUBY1_9)
  2446. public IRubyObject gsub_bang19(ThreadContext context, IRubyObject arg0, Block block) {
  2447. checkFrozen();
  2448. return block.isGiven() ? gsubCommon19(context, block, null, null, arg0, true, 0) : enumeratorize(context.getRuntime(), this, "gsub!", arg0);
  2449. }
  2450. @JRubyMethod(name = "gsub!", reads = BACKREF, writes = BACKREF, compat = RUBY1_9)
  2451. public IRubyObject gsub_bang19(ThreadContext context, IRubyObject arg0, IRubyObject arg1, Block block) {
  2452. checkFrozen();
  2453. return gsub19(context, arg0, arg1, block, true);
  2454. }
  2455. private IRubyObject gsub19(ThreadContext context, IRubyObject arg0, IRubyObject arg1, Block block, final boolean bang) {
  2456. Ruby runtime = context.getRuntime();
  2457. IRubyObject tryHash = TypeConverter.convertToTypeWithCheck(arg1, runtime.getHash(), "to_hash");
  2458. final RubyHash hash;
  2459. final RubyString str;
  2460. final int tuFlags;
  2461. if (tryHash.isNil()) {
  2462. hash = null;
  2463. str = arg1.convertToString();
  2464. tuFlags = str.flags;
  2465. } else {
  2466. hash = (RubyHash)tryHash;
  2467. str = null;
  2468. tuFlags = hash.flags & TAINTED_F;
  2469. }
  2470. return gsubCommon19(context, block, str, hash, arg0, bang, tuFlags);
  2471. }
  2472. private IRubyObject gsubCommon19(ThreadContext context, Block block, RubyString repl, RubyHash hash, IRubyObject arg0, final boolean bang, int tuFlags) {
  2473. Ruby runtime = context.getRuntime();
  2474. final Regex pattern, prepared;
  2475. final RubyRegexp regexp;
  2476. if (arg0 instanceof RubyRegexp) {
  2477. regexp = (RubyRegexp)arg0;
  2478. pattern = regexp.getPattern();
  2479. prepared = regexp.preparePattern(this);
  2480. } else {
  2481. regexp = null;
  2482. pattern = getStringPattern19(runtime, arg0);
  2483. prepared = RubyRegexp.preparePattern(runtime, pattern, this);
  2484. }
  2485. final int begin = value.getBegin();
  2486. int slen = value.getRealSize();
  2487. final int range = begin + slen;
  2488. byte[]bytes = value.getUnsafeBytes();
  2489. final Matcher matcher = prepared.matcher(bytes, begin, range);
  2490. final DynamicScope scope = context.getCurrentScope();
  2491. int beg = matcher.search(begin, range, Option.NONE);
  2492. if (beg < 0) {
  2493. scope.setBackRef(runtime.getNil());
  2494. return bang ? runtime.getNil() : strDup(runtime); /* bang: true, no match, no substitution */
  2495. }
  2496. RubyString dest = new RubyString(runtime, getMetaClass(), new ByteList(slen + 30));
  2497. int offset = 0, cp = begin;
  2498. Encoding enc = value.getEncoding();
  2499. RubyMatchData match = null;
  2500. do {
  2501. final RubyString val;
  2502. int begz = matcher.getBegin();
  2503. int endz = matcher.getEnd();
  2504. if (repl != null) { // string given
  2505. val = RubyRegexp.regsub19(repl, this, matcher, pattern);
  2506. } else {
  2507. final RubyString substr = makeShared19(runtime, begz, endz - begz);
  2508. if (hash != null) { // hash given
  2509. val = objAsString(context, hash.op_aref(context, substr));
  2510. } else { // block given
  2511. match = RubyRegexp.updateBackRef19(context, this, scope, matcher, pattern);
  2512. match.regexp = regexp;
  2513. val = objAsString(context, block.yield(context, substr));
  2514. }
  2515. modifyCheck(bytes, slen, enc);
  2516. if (bang) frozenCheck();
  2517. }
  2518. tuFlags |= val.flags;
  2519. int len = beg - offset;
  2520. if (len != 0) dest.cat(bytes, cp, len, enc);
  2521. dest.cat19(val);
  2522. offset = endz;
  2523. if (begz == endz) {
  2524. if (slen <= endz) break;
  2525. len = StringSupport.length(enc, bytes, begin + endz, range);
  2526. dest.cat(bytes, begin + endz, len, enc);
  2527. offset = endz + len;
  2528. }
  2529. cp = begin + offset;
  2530. if (offset > slen) break;
  2531. beg = matcher.search(cp, range, Option.NONE);
  2532. } while (beg >= 0);
  2533. if (slen > offset) dest.cat(bytes, cp, slen - offset, enc);
  2534. if (match != null) { // block given
  2535. scope.setBackRef(match);
  2536. } else {
  2537. match = RubyRegexp.updateBackRef19(context, this, scope, matcher, pattern);
  2538. match.regexp = regexp;
  2539. }
  2540. if (bang) {
  2541. view(dest.value);
  2542. setCodeRange(dest.getCodeRange());
  2543. return infectBy(tuFlags);
  2544. } else {
  2545. return dest.infectBy(tuFlags | flags);
  2546. }
  2547. }
  2548. /** rb_str_index_m
  2549. *
  2550. */
  2551. @JRubyMethod(name = "index", reads = BACKREF, writes = BACKREF, compat = RUBY1_8)
  2552. public IRubyObject index(ThreadContext context, IRubyObject arg0) {
  2553. return indexCommon(context.getRuntime(), context, arg0, 0);
  2554. }
  2555. @JRubyMethod(name = "index", reads = BACKREF, writes = BACKREF, compat = RUBY1_8)
  2556. public IRubyObject index(ThreadContext context, IRubyObject arg0, IRubyObject arg1) {
  2557. int pos = RubyNumeric.num2int(arg1);
  2558. Ruby runtime = context.getRuntime();
  2559. if (pos < 0) {
  2560. pos += value.getRealSize();
  2561. if (pos < 0) {
  2562. if (arg0 instanceof RubyRegexp) {
  2563. context.getCurrentScope().setBackRef(runtime.getNil());
  2564. }
  2565. return runtime.getNil();
  2566. }
  2567. }
  2568. return indexCommon(runtime, context, arg0, pos);
  2569. }
  2570. private IRubyObject indexCommon(Ruby runtime, ThreadContext context, IRubyObject sub, int pos) {
  2571. if (sub instanceof RubyRegexp) {
  2572. RubyRegexp regSub = (RubyRegexp) sub;
  2573. pos = regSub.adjustStartPos(this, pos, false);
  2574. pos = regSub.search(context, this, pos, false);
  2575. } else if (sub instanceof RubyFixnum) {
  2576. int c_int = RubyNumeric.fix2int((RubyFixnum)sub);
  2577. if (c_int < 0x00 || c_int > 0xFF) {
  2578. // out of byte range
  2579. // there will be no match for sure
  2580. return runtime.getNil();
  2581. }
  2582. byte c = (byte) c_int;
  2583. byte[] bytes = value.getUnsafeBytes();
  2584. int end = value.getBegin() + value.getRealSize();
  2585. pos += value.getBegin();
  2586. for (; pos < end; pos++) {
  2587. if (bytes[pos] == c) return RubyFixnum.newFixnum(runtime, pos - value.getBegin());
  2588. }
  2589. return runtime.getNil();
  2590. } else if (sub instanceof RubyString) {
  2591. pos = strIndex((RubyString) sub, pos);
  2592. } else {
  2593. IRubyObject tmp = sub.checkStringType();
  2594. if (tmp.isNil()) throw runtime.newTypeError("type mismatch: " + sub.getMetaClass().getName() + " given");
  2595. pos = strIndex((RubyString) tmp, pos);
  2596. }
  2597. return pos == -1 ? runtime.getNil() : RubyFixnum.newFixnum(runtime, pos);
  2598. }
  2599. private int strIndex(RubyString sub, int offset) {
  2600. ByteList byteList = value;
  2601. if (offset < 0) {
  2602. offset += byteList.getRealSize();
  2603. if (offset < 0) return -1;
  2604. }
  2605. ByteList other = sub.value;
  2606. if (sizeIsSmaller(byteList, offset, other)) return -1;
  2607. if (other.getRealSize() == 0) return offset;
  2608. return byteList.indexOf(other, offset);
  2609. }
  2610. private static boolean sizeIsSmaller(ByteList byteList, int offset, ByteList other) {
  2611. return byteList.getRealSize() - offset < other.getRealSize();
  2612. }
  2613. @JRubyMethod(name = "index", reads = BACKREF, writes = BACKREF, compat = RUBY1_9)
  2614. public IRubyObject index19(ThreadContext context, IRubyObject arg0) {
  2615. return indexCommon19(context.getRuntime(), context, arg0, 0);
  2616. }
  2617. @JRubyMethod(name = "index", reads = BACKREF, writes = BACKREF, compat = RUBY1_9)
  2618. public IRubyObject index19(ThreadContext context, IRubyObject arg0, IRubyObject arg1) {
  2619. int pos = RubyNumeric.num2int(arg1);
  2620. Ruby runtime = context.getRuntime();
  2621. if (pos < 0) {
  2622. pos += strLength();
  2623. if (pos < 0) {
  2624. if (arg0 instanceof RubyRegexp) context.getCurrentScope().setBackRef(runtime.getNil());
  2625. return runtime.getNil();
  2626. }
  2627. }
  2628. return indexCommon19(runtime, context, arg0, pos);
  2629. }
  2630. private IRubyObject indexCommon19(Ruby runtime, ThreadContext context, IRubyObject sub, int pos) {
  2631. if (sub instanceof RubyRegexp) {
  2632. RubyRegexp regSub = (RubyRegexp) sub;
  2633. pos = singleByteOptimizable() ? pos :
  2634. StringSupport.nth(checkEncoding(regSub), value.getUnsafeBytes(), value.getBegin(),
  2635. value.getBegin() + value.getRealSize(),
  2636. pos);
  2637. pos = regSub.adjustStartPos19(this, pos, false);
  2638. pos = regSub.search19(context, this, pos, false);
  2639. pos = subLength(pos);
  2640. } else if (sub instanceof RubyString) {
  2641. pos = strIndex19((RubyString) sub, pos);
  2642. pos = subLength(pos);
  2643. } else {
  2644. IRubyObject tmp = sub.checkStringType();
  2645. if (tmp.isNil()) throw runtime.newTypeError("type mismatch: " + sub.getMetaClass().getName() + " given");
  2646. pos = strIndex19((RubyString) tmp, pos);
  2647. pos = subLength(pos);
  2648. }
  2649. return pos == -1 ? runtime.getNil() : RubyFixnum.newFixnum(runtime, pos);
  2650. }
  2651. private int strIndex19(RubyString sub, int offset) {
  2652. Encoding enc = checkEncoding(sub);
  2653. if (sub.scanForCodeRange() == CR_BROKEN) return -1;
  2654. int len = strLength(enc);
  2655. int slen = sub.strLength(enc);
  2656. if (offset < 0) {
  2657. offset += len;
  2658. if (offset < 0) return -1;
  2659. }
  2660. if (len - offset < slen) return -1;
  2661. byte[]bytes = value.getUnsafeBytes();
  2662. int p = value.getBegin();
  2663. int end = p + value.getRealSize();
  2664. if (offset != 0) {
  2665. offset = singleByteOptimizable() ? offset : StringSupport.offset(enc, bytes, p, end, offset);
  2666. p += offset;
  2667. }
  2668. if (slen == 0) return offset;
  2669. while (true) {
  2670. int pos = value.indexOf(sub.value, p - value.getBegin());
  2671. if (pos < 0) return pos;
  2672. pos -= (p - value.getBegin());
  2673. int t = enc.rightAdjustCharHead(bytes, p, p + pos, end);
  2674. if (t == p + pos) return pos + offset;
  2675. if ((len -= t - p) <= 0) return -1;
  2676. offset += t - p;
  2677. p = t;
  2678. }
  2679. }
  2680. /** rb_str_rindex_m
  2681. *
  2682. */
  2683. @JRubyMethod(name = "rindex", reads = BACKREF, writes = BACKREF, compat = RUBY1_8)
  2684. public IRubyObject rindex(ThreadContext context, IRubyObject arg0) {
  2685. return rindexCommon(context.getRuntime(), context, arg0, value.getRealSize());
  2686. }
  2687. @JRubyMethod(name = "rindex", reads = BACKREF, writes = BACKREF, compat = RUBY1_8)
  2688. public IRubyObject rindex(ThreadContext context, IRubyObject arg0, IRubyObject arg1) {
  2689. int pos = RubyNumeric.num2int(arg1);
  2690. Ruby runtime = context.getRuntime();
  2691. if (pos < 0) {
  2692. pos += value.getRealSize();
  2693. if (pos < 0) {
  2694. if (arg0 instanceof RubyRegexp) context.getCurrentScope().setBackRef(runtime.getNil());
  2695. return runtime.getNil();
  2696. }
  2697. }
  2698. if (pos > value.getRealSize()) pos = value.getRealSize();
  2699. return rindexCommon(runtime, context, arg0, pos);
  2700. }
  2701. private IRubyObject rindexCommon(Ruby runtime, ThreadContext context, final IRubyObject sub, int pos) {
  2702. if (sub instanceof RubyRegexp) {
  2703. RubyRegexp regSub = (RubyRegexp) sub;
  2704. if (regSub.length() > 0) {
  2705. pos = regSub.adjustStartPos(this, pos, true);
  2706. pos = regSub.search(context, this, pos, true);
  2707. }
  2708. } else if (sub instanceof RubyString) {
  2709. pos = strRindex((RubyString) sub, pos);
  2710. } else if (sub instanceof RubyFixnum) {
  2711. int c_int = RubyNumeric.fix2int((RubyFixnum)sub);
  2712. if (c_int < 0x00 || c_int > 0xFF) {
  2713. // out of byte range
  2714. // there will be no match for sure
  2715. return runtime.getNil();
  2716. }
  2717. byte c = (byte) c_int;
  2718. byte[] bytes = value.getUnsafeBytes();
  2719. int pbeg = value.getBegin();
  2720. int p = pbeg + pos;
  2721. if (pos == value.getRealSize()) {
  2722. if (pos == 0) return runtime.getNil();
  2723. --p;
  2724. }
  2725. while (pbeg <= p) {
  2726. if (bytes[p] == c) return RubyFixnum.newFixnum(runtime, p - value.getBegin());
  2727. p--;
  2728. }
  2729. return runtime.getNil();
  2730. } else {
  2731. IRubyObject tmp = sub.checkStringType();
  2732. if (tmp.isNil()) throw runtime.newTypeError("type mismatch: " + sub.getMetaClass().getName() + " given");
  2733. pos = strRindex((RubyString) tmp, pos);
  2734. }
  2735. if (pos >= 0) return RubyFixnum.newFixnum(runtime, pos);
  2736. return runtime.getNil();
  2737. }
  2738. private int strRindex(RubyString sub, int pos) {
  2739. int subLength = sub.value.getRealSize();
  2740. /* substring longer than string */
  2741. if (value.getRealSize() < subLength) return -1;
  2742. if (value.getRealSize() - pos < subLength) pos = value.getRealSize() - subLength;
  2743. return value.lastIndexOf(sub.value, pos);
  2744. }
  2745. @JRubyMethod(name = "rindex", reads = BACKREF, writes = BACKREF, compat = RUBY1_9)
  2746. public IRubyObject rindex19(ThreadContext context, IRubyObject arg0) {
  2747. return rindexCommon19(context.getRuntime(), context, arg0, strLength());
  2748. }
  2749. @JRubyMethod(name = "rindex", reads = BACKREF, writes = BACKREF, compat = RUBY1_9)
  2750. public IRubyObject rindex19(ThreadContext context, IRubyObject arg0, IRubyObject arg1) {
  2751. int pos = RubyNumeric.num2int(arg1);
  2752. Ruby runtime = context.getRuntime();
  2753. int length = strLength();
  2754. if (pos < 0) {
  2755. pos += length;
  2756. if (pos < 0) {
  2757. if (arg0 instanceof RubyRegexp) context.getCurrentScope().setBackRef(runtime.getNil());
  2758. return runtime.getNil();
  2759. }
  2760. }
  2761. if (pos > length) pos = length;
  2762. return rindexCommon19(runtime, context, arg0, pos);
  2763. }
  2764. private IRubyObject rindexCommon19(Ruby runtime, ThreadContext context, final IRubyObject sub, int pos) {
  2765. if (sub instanceof RubyRegexp) {
  2766. RubyRegexp regSub = (RubyRegexp) sub;
  2767. pos = singleByteOptimizable() ? pos :
  2768. StringSupport.nth(value.getEncoding(), value.getUnsafeBytes(), value.getBegin(),
  2769. value.getBegin() + value.getRealSize(),
  2770. pos);
  2771. if (regSub.length() > 0) {
  2772. pos = regSub.adjustStartPos19(this, pos, true);
  2773. pos = regSub.search19(context, this, pos, true);
  2774. pos = subLength(pos);
  2775. }
  2776. } else if (sub instanceof RubyString) {
  2777. pos = strRindex19((RubyString) sub, pos);
  2778. } else {
  2779. IRubyObject tmp = sub.checkStringType();
  2780. if (tmp.isNil()) throw runtime.newTypeError("type mismatch: " + sub.getMetaClass().getName() + " given");
  2781. pos = strRindex19((RubyString) tmp, pos);
  2782. }
  2783. if (pos >= 0) return RubyFixnum.newFixnum(runtime, pos);
  2784. return runtime.getNil();
  2785. }
  2786. private int strRindex19(RubyString sub, int pos) {
  2787. Encoding enc = checkEncoding(sub);
  2788. if (sub.scanForCodeRange() == CR_BROKEN) return -1;
  2789. int len = strLength(enc);
  2790. int slen = sub.strLength(enc);
  2791. if (len < slen) return -1;
  2792. if (len - pos < slen) pos = len - slen;
  2793. if (len == 0) return pos;
  2794. byte[]bytes = value.getUnsafeBytes();
  2795. int p = value.getBegin();
  2796. int end = p + value.getRealSize();
  2797. byte[]sbytes = sub.value.getUnsafeBytes();
  2798. int sp = sub.value.getBegin();
  2799. slen = sub.value.getRealSize();
  2800. boolean singlebyte = singleByteOptimizable();
  2801. while (true) {
  2802. int s = singlebyte ? p + pos : StringSupport.nth(enc, bytes, p, end, pos);
  2803. if (s == -1) return -1;
  2804. if (ByteList.memcmp(bytes, s, sbytes, sp, slen) == 0) return pos;
  2805. if (pos == 0) return -1;
  2806. pos--;
  2807. }
  2808. }
  2809. @Deprecated
  2810. public final IRubyObject substr(int beg, int len) {
  2811. return substr(getRuntime(), beg, len);
  2812. }
  2813. /* rb_str_substr */
  2814. public final IRubyObject substr(Ruby runtime, int beg, int len) {
  2815. int length = value.length();
  2816. if (len < 0 || beg > length) return runtime.getNil();
  2817. if (beg < 0) {
  2818. beg += length;
  2819. if (beg < 0) return runtime.getNil();
  2820. }
  2821. int end = Math.min(length, beg + len);
  2822. return makeShared(runtime, beg, end - beg);
  2823. }
  2824. public final IRubyObject substr19(Ruby runtime, int beg, int len) {
  2825. if (len < 0) return runtime.getNil();
  2826. int length = value.getRealSize();
  2827. if (length == 0) len = 0;
  2828. Encoding enc = value.getEncoding();
  2829. if (singleByteOptimizable(enc)) {
  2830. if (beg > length) return runtime.getNil();
  2831. if (beg < 0) {
  2832. beg += length;
  2833. if (beg < 0) return runtime.getNil();
  2834. }
  2835. if (beg + len > length) len = length - beg;
  2836. if (len <= 0) len = beg = 0;
  2837. return makeShared19(runtime, beg, len);
  2838. } else {
  2839. return multibyteSubstr19(runtime, enc, len, beg, length);
  2840. }
  2841. }
  2842. private final IRubyObject multibyteSubstr19(Ruby runtime, Encoding enc, int len, int beg, int length) {
  2843. int p;
  2844. int s = value.getBegin();
  2845. int end = s + length;
  2846. byte[]bytes = value.getUnsafeBytes();
  2847. if (beg < 0) {
  2848. if (len > -beg) len = -beg;
  2849. if (-beg * enc.maxLength() < length >>> 3) {
  2850. beg = -beg;
  2851. int e = end;
  2852. while (beg-- > len && (e = enc.prevCharHead(bytes, s, e, e)) != -1) {} // nothing
  2853. p = e;
  2854. if (p == -1) return runtime.getNil();
  2855. while (len-- > 0 && (p = enc.prevCharHead(bytes, s, p, e)) != -1) {} // nothing
  2856. if (p == -1) return runtime.getNil();
  2857. return makeShared19(runtime, p - s, e - p);
  2858. } else {
  2859. beg += strLength(enc);
  2860. if (beg < 0) return runtime.getNil();
  2861. }
  2862. } else if (beg > 0 && beg > strLength(enc)) {
  2863. return runtime.getNil();
  2864. }
  2865. if (len == 0) {
  2866. p = 0;
  2867. } else if (isCodeRangeValid() && enc instanceof UTF8Encoding) {
  2868. p = StringSupport.utf8Nth(bytes, s, end, beg);
  2869. len = StringSupport.utf8Offset(bytes, p, end, len);
  2870. } else if (enc.isFixedWidth()) {
  2871. int w = enc.maxLength();
  2872. p = s + beg * w;
  2873. if (p > end) {
  2874. p = end;
  2875. len = 0;
  2876. } else if (len * w > end - p) {
  2877. len = end - p;
  2878. } else {
  2879. len *= w;
  2880. }
  2881. } else if ((p = StringSupport.nth(enc, bytes, s, end, beg)) == end) {
  2882. len = 0;
  2883. } else {
  2884. len = StringSupport.offset(enc, bytes, p, end, len);
  2885. }
  2886. return makeShared19(runtime, p - s, len);
  2887. }
  2888. /* rb_str_splice */
  2889. private IRubyObject replaceInternal(int beg, int len, RubyString repl) {
  2890. int oldLength = value.getRealSize();
  2891. if (beg + len >= oldLength) len = oldLength - beg;
  2892. ByteList replBytes = repl.value;
  2893. int replLength = replBytes.getRealSize();
  2894. int newLength = oldLength + replLength - len;
  2895. byte[]oldBytes = value.getUnsafeBytes();
  2896. int oldBegin = value.getBegin();
  2897. modify(newLength);
  2898. if (replLength != len) {
  2899. System.arraycopy(oldBytes, oldBegin + beg + len, value.getUnsafeBytes(), beg + replLength, oldLength - (beg + len));
  2900. }
  2901. if (replLength > 0) System.arraycopy(replBytes.getUnsafeBytes(), replBytes.getBegin(), value.getUnsafeBytes(), beg, replLength);
  2902. value.setRealSize(newLength);
  2903. return infectBy(repl);
  2904. }
  2905. private void replaceInternal19(int beg, int len, RubyString repl) {
  2906. Encoding enc = checkEncoding(repl);
  2907. int p = value.getBegin();
  2908. int e;
  2909. if (singleByteOptimizable()) {
  2910. p += beg;
  2911. e = p + len;
  2912. } else {
  2913. int end = p + value.getRealSize();
  2914. byte[]bytes = value.getUnsafeBytes();
  2915. p = StringSupport.nth(enc, bytes, p, end, beg);
  2916. if (p == -1) p = end;
  2917. e = StringSupport.nth(enc, bytes, p, end, len);
  2918. if (e == -1) e = end;
  2919. }
  2920. int cr = getCodeRange();
  2921. if (cr == CR_BROKEN) clearCodeRange();
  2922. replaceInternal(p - value.getBegin(), e - p, repl);
  2923. associateEncoding(enc);
  2924. cr = codeRangeAnd(cr, repl.getCodeRange());
  2925. if (cr != CR_BROKEN) setCodeRange(cr);
  2926. }
  2927. /** rb_str_aref, rb_str_aref_m
  2928. *
  2929. */
  2930. @JRubyMethod(name = {"[]", "slice"}, reads = BACKREF, writes = BACKREF, compat = RUBY1_8)
  2931. public IRubyObject op_aref(ThreadContext context, IRubyObject arg1, IRubyObject arg2) {
  2932. Ruby runtime = context.getRuntime();
  2933. if (arg1 instanceof RubyRegexp) return subpat(runtime, context, (RubyRegexp)arg1, RubyNumeric.num2int(arg2));
  2934. return substr(runtime, RubyNumeric.num2int(arg1), RubyNumeric.num2int(arg2));
  2935. }
  2936. @JRubyMethod(name = {"[]", "slice"}, reads = BACKREF, writes = BACKREF, compat = RUBY1_8)
  2937. public IRubyObject op_aref(ThreadContext context, IRubyObject arg) {
  2938. Ruby runtime = context.getRuntime();
  2939. if (arg instanceof RubyFixnum) {
  2940. return op_aref(runtime, RubyFixnum.fix2int((RubyFixnum)arg));
  2941. } if (arg instanceof RubyRegexp) {
  2942. return subpat(runtime, context, (RubyRegexp)arg, 0);
  2943. } else if (arg instanceof RubyString) {
  2944. RubyString str = (RubyString)arg;
  2945. return value.indexOf(str.value) != -1 ? str.strDup(runtime) : runtime.getNil();
  2946. } else if (arg instanceof RubyRange) {
  2947. int[] begLen = ((RubyRange) arg).begLenInt(value.length(), 0);
  2948. return begLen == null ? runtime.getNil() : substr(runtime, begLen[0], begLen[1]);
  2949. }
  2950. return op_aref(runtime, RubyFixnum.num2int(arg));
  2951. }
  2952. private IRubyObject op_aref(Ruby runtime, int idx) {
  2953. if (idx < 0) idx += value.getRealSize();
  2954. return idx < 0 || idx >= value.getRealSize() ? runtime.getNil() : runtime.newFixnum(value.get(idx) & 0xff);
  2955. }
  2956. @JRubyMethod(name = {"[]", "slice"}, reads = BACKREF, writes = BACKREF, compat = RUBY1_9)
  2957. public IRubyObject op_aref19(ThreadContext context, IRubyObject arg1, IRubyObject arg2) {
  2958. Ruby runtime = context.getRuntime();
  2959. if (arg1 instanceof RubyRegexp) return subpat19(runtime, context, (RubyRegexp)arg1, arg2);
  2960. return substr19(runtime, RubyNumeric.num2int(arg1), RubyNumeric.num2int(arg2));
  2961. }
  2962. @JRubyMethod(name = {"[]", "slice"}, reads = BACKREF, writes = BACKREF, compat = RUBY1_9)
  2963. public IRubyObject op_aref19(ThreadContext context, IRubyObject arg) {
  2964. Ruby runtime = context.getRuntime();
  2965. if (arg instanceof RubyFixnum) {
  2966. return op_aref19(runtime, RubyNumeric.fix2int((RubyFixnum)arg));
  2967. } else if (arg instanceof RubyRegexp) {
  2968. return subpat19(runtime, context, (RubyRegexp)arg);
  2969. } else if (arg instanceof RubyString) {
  2970. RubyString str = (RubyString)arg;
  2971. return strIndex19(str, 0) != -1 ? str.strDup(runtime) : runtime.getNil();
  2972. } else if (arg instanceof RubyRange) {
  2973. int len = strLength();
  2974. int[] begLen = ((RubyRange) arg).begLenInt(len, 0);
  2975. return begLen == null ? runtime.getNil() : substr19(runtime, begLen[0], begLen[1]);
  2976. }
  2977. return op_aref19(runtime, RubyNumeric.num2int(arg));
  2978. }
  2979. private IRubyObject op_aref19(Ruby runtime, int idx) {
  2980. IRubyObject str = substr19(runtime, idx, 1);
  2981. return !str.isNil() && ((RubyString) str).value.getRealSize() == 0 ? runtime.getNil() : str;
  2982. }
  2983. /**
  2984. * rb_str_subpat_set
  2985. *
  2986. */
  2987. private void subpatSet(ThreadContext context, RubyRegexp regexp, int nth, IRubyObject repl) {
  2988. Ruby runtime = context.getRuntime();
  2989. if (regexp.search(context, this, 0, false) < 0) throw runtime.newIndexError("regexp not matched");
  2990. RubyMatchData match = (RubyMatchData)context.getCurrentScope().getBackRef(runtime);
  2991. nth = subpatSetCheck(runtime, nth, match.regs);
  2992. final int start, end;
  2993. if (match.regs == null) {
  2994. start = match.begin;
  2995. end = match.end;
  2996. } else {
  2997. start = match.regs.beg[nth];
  2998. end = match.regs.end[nth];
  2999. }
  3000. if (start == -1) throw runtime.newIndexError("regexp group " + nth + " not matched");
  3001. replaceInternal(start, end - start, repl.convertToString());
  3002. }
  3003. private int subpatSetCheck(Ruby runtime, int nth, Region regs) {
  3004. int numRegs = regs == null ? 1 : regs.numRegs;
  3005. if (nth < numRegs) {
  3006. if (nth < 0) {
  3007. if (-nth < numRegs) return nth + numRegs;
  3008. } else {
  3009. return nth;
  3010. }
  3011. }
  3012. throw runtime.newIndexError("index " + nth + " out of regexp");
  3013. }
  3014. private IRubyObject subpat(Ruby runtime, ThreadContext context, RubyRegexp regex, int nth) {
  3015. if (regex.search(context, this, 0, false) >= 0) {
  3016. return RubyRegexp.nth_match(nth, context.getCurrentScope().getBackRef(runtime));
  3017. }
  3018. return runtime.getNil();
  3019. }
  3020. private void subpatSet19(ThreadContext context, RubyRegexp regexp, IRubyObject backref, IRubyObject repl) {
  3021. Ruby runtime = context.getRuntime();
  3022. if (regexp.search19(context, this, 0, false) < 0) throw runtime.newIndexError("regexp not matched");
  3023. RubyMatchData match = (RubyMatchData)context.getCurrentScope().getBackRef(runtime);
  3024. int nth = backref == null ? 0 : subpatSetCheck(runtime, match.backrefNumber(backref), match.regs);
  3025. final int start, end;
  3026. if (match.regs == null) {
  3027. start = match.begin;
  3028. end = match.end;
  3029. } else {
  3030. start = match.regs.beg[nth];
  3031. end = match.regs.end[nth];
  3032. }
  3033. if (start == -1) throw runtime.newIndexError("regexp group " + nth + " not matched");
  3034. RubyString replStr = repl.convertToString();
  3035. Encoding enc = checkEncoding(replStr);
  3036. // TODO: keep cr
  3037. replaceInternal(start, end - start, replStr); // TODO: rb_str_splice_0
  3038. associateEncoding(enc);
  3039. }
  3040. private IRubyObject subpat19(Ruby runtime, ThreadContext context, RubyRegexp regex, IRubyObject backref) {
  3041. if (regex.search19(context, this, 0, false) >= 0) {
  3042. RubyMatchData match = (RubyMatchData)context.getCurrentScope().getBackRef(runtime);
  3043. return RubyRegexp.nth_match(match.backrefNumber(backref), context.getCurrentScope().getBackRef(runtime));
  3044. }
  3045. return runtime.getNil();
  3046. }
  3047. private IRubyObject subpat19(Ruby runtime, ThreadContext context, RubyRegexp regex) {
  3048. if (regex.search19(context, this, 0, false) >= 0) {
  3049. return RubyRegexp.nth_match(0, context.getCurrentScope().getBackRef(runtime));
  3050. }
  3051. return runtime.getNil();
  3052. }
  3053. /** rb_str_aset, rb_str_aset_m
  3054. *
  3055. */
  3056. @JRubyMethod(name = "[]=", reads = BACKREF, compat = RUBY1_8)
  3057. public IRubyObject op_aset(ThreadContext context, IRubyObject arg0, IRubyObject arg1) {
  3058. if (arg0 instanceof RubyFixnum) {
  3059. return op_aset(context, RubyNumeric.fix2int((RubyFixnum)arg0), arg1);
  3060. } else if (arg0 instanceof RubyRegexp) {
  3061. subpatSet(context, (RubyRegexp)arg0, 0, arg1.convertToString());
  3062. return arg1;
  3063. } else if (arg0 instanceof RubyString) {
  3064. RubyString orig = (RubyString)arg0;
  3065. int beg = value.indexOf(orig.value);
  3066. if (beg < 0) throw context.getRuntime().newIndexError("string not matched");
  3067. replaceInternal(beg, orig.value.getRealSize(), arg1.convertToString());
  3068. return arg1;
  3069. } else if (arg0 instanceof RubyRange) {
  3070. int[] begLen = ((RubyRange) arg0).begLenInt(value.getRealSize(), 2);
  3071. replaceInternal(begLen[0], begLen[1], arg1.convertToString());
  3072. return arg1;
  3073. }
  3074. return op_aset(context, RubyNumeric.num2int(arg0), arg1);
  3075. }
  3076. private IRubyObject op_aset(ThreadContext context, int idx, IRubyObject arg1) {
  3077. idx = checkIndexForRef(idx, value.getRealSize());
  3078. if (arg1 instanceof RubyFixnum) {
  3079. modify();
  3080. value.set(idx, RubyNumeric.fix2int((RubyFixnum)arg1));
  3081. } else {
  3082. replaceInternal(idx, 1, arg1.convertToString());
  3083. }
  3084. return arg1;
  3085. }
  3086. @JRubyMethod(name = "[]=", reads = BACKREF, compat = RUBY1_8)
  3087. public IRubyObject op_aset(ThreadContext context, IRubyObject arg0, IRubyObject arg1, IRubyObject arg2) {
  3088. if (arg0 instanceof RubyRegexp) {
  3089. subpatSet(context, (RubyRegexp)arg0, RubyNumeric.num2int(arg1), arg2);
  3090. } else {
  3091. int beg = RubyNumeric.num2int(arg0);
  3092. int len = RubyNumeric.num2int(arg1);
  3093. checkLength(len);
  3094. RubyString repl = arg2.convertToString();
  3095. replaceInternal(checkIndex(beg, value.getRealSize()), len, repl);
  3096. }
  3097. return arg2;
  3098. }
  3099. @JRubyMethod(name = "[]=", reads = BACKREF, compat = RUBY1_9)
  3100. public IRubyObject op_aset19(ThreadContext context, IRubyObject arg0, IRubyObject arg1) {
  3101. if (arg0 instanceof RubyFixnum) {
  3102. return op_aset19(context, RubyNumeric.fix2int((RubyFixnum)arg0), arg1);
  3103. } else if (arg0 instanceof RubyRegexp) {
  3104. subpatSet19(context, (RubyRegexp)arg0, null, arg1);
  3105. return arg1;
  3106. } else if (arg0 instanceof RubyString) {
  3107. RubyString orig = (RubyString)arg0;
  3108. int beg = strIndex19(orig, 0);
  3109. if (beg < 0) throw context.getRuntime().newIndexError("string not matched");
  3110. beg = subLength(beg);
  3111. replaceInternal19(beg, orig.strLength(), arg1.convertToString());
  3112. return arg1;
  3113. } else if (arg0 instanceof RubyRange) {
  3114. int[] begLen = ((RubyRange) arg0).begLenInt(strLength(), 2);
  3115. replaceInternal19(begLen[0], begLen[1], arg1.convertToString());
  3116. return arg1;
  3117. }
  3118. return op_aset19(context, RubyNumeric.num2int(arg0), arg1);
  3119. }
  3120. private IRubyObject op_aset19(ThreadContext context, int idx, IRubyObject arg1) {
  3121. replaceInternal19(checkIndex(idx, strLength()), 1, arg1.convertToString());
  3122. return arg1;
  3123. }
  3124. @JRubyMethod(name = "[]=", reads = BACKREF, compat = RUBY1_9)
  3125. public IRubyObject op_aset19(ThreadContext context, IRubyObject arg0, IRubyObject arg1, IRubyObject arg2) {
  3126. if (arg0 instanceof RubyRegexp) {
  3127. subpatSet19(context, (RubyRegexp)arg0, arg1, arg2);
  3128. } else {
  3129. int beg = RubyNumeric.num2int(arg0);
  3130. int len = RubyNumeric.num2int(arg1);
  3131. checkLength(len);
  3132. RubyString repl = arg2.convertToString();
  3133. replaceInternal19(checkIndex(beg, strLength()), len, repl);
  3134. }
  3135. return arg2;
  3136. }
  3137. private final boolean isHeadSlice(int beg, int len) {
  3138. return beg == 0 && len > 0 && len <= value.getRealSize();
  3139. }
  3140. private final boolean isTailSlice(int beg, int len) {
  3141. return beg >= 0 && len > 0 && (beg + len) == value.getRealSize();
  3142. }
  3143. /**
  3144. * Excises (removes) a slice of the string that starts at index zero
  3145. *
  3146. * @param len The number of bytes to remove.
  3147. */
  3148. private final void exciseHead(int len) {
  3149. // just adjust the view start
  3150. view(len, value.getRealSize() - len);
  3151. }
  3152. /**
  3153. * Excises (removes) a slice of the string that ends at the last byte in the string
  3154. *
  3155. * @param len The number of bytes to remove.
  3156. */
  3157. private final void exciseTail(int len) {
  3158. // just adjust the view length
  3159. view(0, value.getRealSize() - len);
  3160. }
  3161. /** rb_str_slice_bang
  3162. *
  3163. */
  3164. @JRubyMethod(name = "slice!", reads = BACKREF, writes = BACKREF, compat = RUBY1_8)
  3165. public IRubyObject slice_bang(ThreadContext context, IRubyObject arg0) {
  3166. IRubyObject result = op_aref(context, arg0);
  3167. if (!result.isNil()) {
  3168. // Optimize slice!(0), slice!(0..len), and slice!(pos..-1)
  3169. int beg = -1;
  3170. int len = 1;
  3171. if (arg0 instanceof RubyFixnum) {
  3172. beg = RubyNumeric.num2int(arg0);
  3173. } else if (arg0 instanceof RubyRange) {
  3174. int[] begLen = ((RubyRange) arg0).begLenInt(value.getRealSize(), 2);
  3175. beg = begLen[0];
  3176. len = begLen[1];
  3177. }
  3178. if (isHeadSlice(beg, len)) {
  3179. exciseHead(len);
  3180. } else if (isTailSlice(beg, len)) {
  3181. exciseTail(len);
  3182. } else {
  3183. op_aset(context, arg0, RubyString.newEmptyString(context.getRuntime()));
  3184. }
  3185. }
  3186. return result;
  3187. }
  3188. @JRubyMethod(name = "slice!", reads = BACKREF, writes = BACKREF, compat = RUBY1_8)
  3189. public IRubyObject slice_bang(ThreadContext context, IRubyObject arg0, IRubyObject arg1) {
  3190. IRubyObject result = op_aref(context, arg0, arg1);
  3191. if (!result.isNil()) {
  3192. // Optimize slice!(0, len) and slice!(str.length - len, len)
  3193. int beg = -1;
  3194. int len = 0;
  3195. if (arg0 instanceof RubyFixnum && arg1 instanceof RubyFixnum) {
  3196. beg = RubyNumeric.num2int(arg0);
  3197. len = RubyNumeric.num2int(arg1);
  3198. }
  3199. if (isHeadSlice(beg, len)) {
  3200. exciseHead(len);
  3201. } else if (isTailSlice(beg, len)) {
  3202. exciseTail(len);
  3203. } else {
  3204. op_aset(context, arg0, arg1, RubyString.newEmptyString(context.getRuntime()));
  3205. }
  3206. }
  3207. return result;
  3208. }
  3209. @JRubyMethod(name = "slice!", reads = BACKREF, writes = BACKREF, compat = RUBY1_9)
  3210. public IRubyObject slice_bang19(ThreadContext context, IRubyObject arg0) {
  3211. IRubyObject result = op_aref19(context, arg0);
  3212. if (result.isNil()) {
  3213. modifyCheck(); // keep cr ?
  3214. } else {
  3215. op_aset19(context, arg0, RubyString.newEmptyString(context.getRuntime()));
  3216. }
  3217. return result;
  3218. }
  3219. @JRubyMethod(name = "slice!", reads = BACKREF, writes = BACKREF, compat = RUBY1_9)
  3220. public IRubyObject slice_bang19(ThreadContext context, IRubyObject arg0, IRubyObject arg1) {
  3221. IRubyObject result = op_aref19(context, arg0, arg1);
  3222. if (result.isNil()) {
  3223. modifyCheck(); // keep cr ?
  3224. } else {
  3225. op_aset19(context, arg0, arg1, RubyString.newEmptyString(context.getRuntime()));
  3226. }
  3227. return result;
  3228. }
  3229. @JRubyMethod(name = {"succ", "next"}, compat = RUBY1_8)
  3230. public IRubyObject succ(ThreadContext context) {
  3231. RubyString str = strDup(context.getRuntime());
  3232. str.succ_bang();
  3233. return str;
  3234. }
  3235. @JRubyMethod(name = {"succ!", "next!"}, compat = RUBY1_8)
  3236. public IRubyObject succ_bang() {
  3237. if (value.getRealSize() == 0) {
  3238. modifyCheck();
  3239. return this;
  3240. }
  3241. modify();
  3242. boolean alnumSeen = false;
  3243. int pos = -1, n = 0;
  3244. int p = value.getBegin();
  3245. int end = p + value.getRealSize();
  3246. byte[]bytes = value.getUnsafeBytes();
  3247. for (int i = end - 1; i >= p; i--) {
  3248. int c = bytes[i] & 0xff;
  3249. if (ASCII.isAlnum(c)) {
  3250. alnumSeen = true;
  3251. if ((ASCII.isDigit(c) && c < '9') || (ASCII.isLower(c) && c < 'z') || (ASCII.isUpper(c) && c < 'Z')) {
  3252. bytes[i] = (byte)(c + 1);
  3253. pos = -1;
  3254. break;
  3255. }
  3256. pos = i;
  3257. n = ASCII.isDigit(c) ? '1' : (ASCII.isLower(c) ? 'a' : 'A');
  3258. bytes[i] = ASCII.isDigit(c) ? (byte)'0' : ASCII.isLower(c) ? (byte)'a' : (byte)'A';
  3259. }
  3260. }
  3261. if (!alnumSeen) {
  3262. for (int i = end - 1; i >= p; i--) {
  3263. int c = bytes[i] & 0xff;
  3264. if (c < 0xff) {
  3265. bytes[i] = (byte)(c + 1);
  3266. pos = -1;
  3267. break;
  3268. }
  3269. pos = i;
  3270. n = '\u0001';
  3271. bytes[i] = 0;
  3272. }
  3273. }
  3274. if (pos > -1) {
  3275. // This represents left most digit in a set of incremented
  3276. // values? Therefore leftmost numeric must be '1' and not '0'
  3277. // 999 -> 1000, not 999 -> 0000. whereas chars should be
  3278. // zzz -> aaaa and non-alnum byte values should be "\377" -> "\001\000"
  3279. value.insert(pos, (byte) n);
  3280. }
  3281. return this;
  3282. }
  3283. private static enum NeighborChar {NOT_CHAR, FOUND, WRAPPED}
  3284. private static NeighborChar succChar(Encoding enc, byte[]bytes, int p, int len) {
  3285. while (true) {
  3286. int i = len - 1;
  3287. for (; i >= 0 && bytes[p + i] == (byte)0xff; i--) bytes[p + i] = 0;
  3288. if (i < 0) return NeighborChar.WRAPPED;
  3289. bytes[p + i] = (byte)((bytes[p + i] & 0xff) + 1);
  3290. int cl = StringSupport.preciseLength(enc, bytes, p, p + len);
  3291. if (cl > 0) {
  3292. if (cl == len) {
  3293. return NeighborChar.FOUND;
  3294. } else {
  3295. for (int j = p + cl; j < p + len - cl; j++) bytes[j] = (byte)0xff;
  3296. }
  3297. }
  3298. if (cl == -1 && i < len - 1) {
  3299. int len2 = len - 1;
  3300. for (; len2 > 0; len2--) {
  3301. if (StringSupport.preciseLength(enc, bytes, p, p + len2) != -1) break;
  3302. }
  3303. for (int j = p + len2 + 1; j < p + len - (len2 + 1); j++) bytes[j] = (byte)0xff;
  3304. }
  3305. }
  3306. }
  3307. private static NeighborChar predChar(Encoding enc, byte[]bytes, int p, int len) {
  3308. while (true) {
  3309. int i = len - 1;
  3310. for (; i >= 0 && bytes[p + i] == 0; i--) bytes[p + i] = (byte)0xff;
  3311. if (i < 0) return NeighborChar.WRAPPED;
  3312. bytes[p + i] = (byte)((bytes[p + i] & 0xff) - 1);
  3313. int cl = StringSupport.preciseLength(enc, bytes, p, p + len);
  3314. if (cl > 0) {
  3315. if (cl == len) {
  3316. return NeighborChar.FOUND;
  3317. } else {
  3318. for (int j = p + cl; j < p + len - cl; j++) bytes[j] = 0;
  3319. }
  3320. }
  3321. if (cl == -1 && i < len - 1) {
  3322. int len2 = len - 1;
  3323. for (; len2 > 0; len2--) {
  3324. if (StringSupport.preciseLength(enc, bytes, p, p + len2) != -1) break;
  3325. }
  3326. for (int j = p + len2 + 1; j < p + len - (len2 + 1); j++) bytes[j] = 0;
  3327. }
  3328. }
  3329. }
  3330. private static NeighborChar succAlnumChar(Encoding enc, byte[]bytes, int p, int len, byte[]carry, int carryP) {
  3331. byte save[] = new byte[org.jcodings.Config.ENC_CODE_TO_MBC_MAXLEN];
  3332. int c = enc.mbcToCode(bytes, p, p + len);
  3333. final int cType;
  3334. if (enc.isDigit(c)) {
  3335. cType = CharacterType.DIGIT;
  3336. } else if (enc.isAlpha(c)) {
  3337. cType = CharacterType.ALPHA;
  3338. } else {
  3339. return NeighborChar.NOT_CHAR;
  3340. }
  3341. System.arraycopy(bytes, p, save, 0, len);
  3342. NeighborChar ret = succChar(enc, bytes, p, len);
  3343. if (ret == NeighborChar.FOUND) {
  3344. c = enc.mbcToCode(bytes, p, p + len);
  3345. if (enc.isCodeCType(c, cType)) return NeighborChar.FOUND;
  3346. }
  3347. System.arraycopy(save, 0, bytes, p, len);
  3348. int range = 1;
  3349. while (true) {
  3350. System.arraycopy(bytes, p, save, 0, len);
  3351. ret = predChar(enc, bytes, p, len);
  3352. if (ret == NeighborChar.FOUND) {
  3353. c = enc.mbcToCode(bytes, p, p + len);
  3354. if (!enc.isCodeCType(c, cType)) {
  3355. System.arraycopy(save, 0, bytes, p, len);
  3356. break;
  3357. }
  3358. } else {
  3359. System.arraycopy(save, 0, bytes, p, len);
  3360. break;
  3361. }
  3362. range++;
  3363. }
  3364. if (range == 1) return NeighborChar.NOT_CHAR;
  3365. if (cType != CharacterType.DIGIT) {
  3366. System.arraycopy(bytes, p, carry, carryP, len);
  3367. return NeighborChar.WRAPPED;
  3368. }
  3369. System.arraycopy(bytes, p, carry, carryP, len);
  3370. succChar(enc, carry, carryP, len);
  3371. return NeighborChar.WRAPPED;
  3372. }
  3373. @JRubyMethod(name = {"succ", "next"}, compat = RUBY1_9)
  3374. public IRubyObject succ19(ThreadContext context) {
  3375. Ruby runtime = context.getRuntime();
  3376. final RubyString str;
  3377. if (value.getRealSize() > 0) {
  3378. str = new RubyString(runtime, getMetaClass(), succCommon19(value));
  3379. // TODO: rescan code range ?
  3380. } else {
  3381. str = newEmptyString(runtime, getType(), value.getEncoding());
  3382. }
  3383. return str.infectBy(this);
  3384. }
  3385. @JRubyMethod(name = {"succ!", "next!"}, compat = RUBY1_9)
  3386. public IRubyObject succ_bang19() {
  3387. modifyCheck();
  3388. if (value.getRealSize() > 0) {
  3389. value = succCommon19(value);
  3390. shareLevel = SHARE_LEVEL_NONE;
  3391. // TODO: rescan code range ?
  3392. }
  3393. return this;
  3394. }
  3395. private ByteList succCommon19(ByteList original) {
  3396. byte carry[] = new byte[org.jcodings.Config.ENC_CODE_TO_MBC_MAXLEN];
  3397. int carryP = 0;
  3398. carry[0] = 1;
  3399. int carryLen = 1;
  3400. ByteList valueCopy = new ByteList(original);
  3401. valueCopy.setEncoding(original.getEncoding());
  3402. Encoding enc = original.getEncoding();
  3403. int p = valueCopy.getBegin();
  3404. int end = p + valueCopy.getRealSize();
  3405. int s = end;
  3406. byte[]bytes = valueCopy.getUnsafeBytes();
  3407. NeighborChar neighbor = NeighborChar.FOUND;
  3408. int lastAlnum = -1;
  3409. boolean alnumSeen = false;
  3410. while ((s = enc.prevCharHead(bytes, p, s, end)) != -1) {
  3411. if (neighbor == NeighborChar.NOT_CHAR && lastAlnum != -1) {
  3412. if (ASCII.isAlpha(bytes[lastAlnum] & 0xff) ?
  3413. ASCII.isDigit(bytes[s] & 0xff) :
  3414. ASCII.isDigit(bytes[lastAlnum] & 0xff) ?
  3415. ASCII.isAlpha(bytes[s] & 0xff) : false) {
  3416. s = lastAlnum;
  3417. break;
  3418. }
  3419. }
  3420. int cl = StringSupport.preciseLength(enc, bytes, s, end);
  3421. if (cl <= 0) continue;
  3422. switch (neighbor = succAlnumChar(enc, bytes, s, cl, carry, 0)) {
  3423. case NOT_CHAR: continue;
  3424. case FOUND: return valueCopy;
  3425. case WRAPPED: lastAlnum = s;
  3426. }
  3427. alnumSeen = true;
  3428. carryP = s - p;
  3429. carryLen = cl;
  3430. }
  3431. if (!alnumSeen) {
  3432. s = end;
  3433. while ((s = enc.prevCharHead(bytes, p, s, end)) != -1) {
  3434. int cl = StringSupport.preciseLength(enc, bytes, s, end);
  3435. if (cl <= 0) continue;
  3436. neighbor = succChar(enc, bytes, s, cl);
  3437. if (neighbor == NeighborChar.FOUND) return valueCopy;
  3438. if (StringSupport.preciseLength(enc, bytes, s, s + 1) != cl) succChar(enc, bytes, s, cl); /* wrapped to \0...\0. search next valid char. */
  3439. if (!enc.isAsciiCompatible()) {
  3440. System.arraycopy(bytes, s, carry, 0, cl);
  3441. carryLen = cl;
  3442. }
  3443. carryP = s - p;
  3444. }
  3445. }
  3446. valueCopy.ensure(valueCopy.getBegin() + valueCopy.getRealSize() + carryLen);
  3447. s = valueCopy.getBegin() + carryP;
  3448. System.arraycopy(valueCopy.getUnsafeBytes(), s, valueCopy.getUnsafeBytes(), s + carryLen, valueCopy.getRealSize() - carryP);
  3449. System.arraycopy(carry, 0, valueCopy.getUnsafeBytes(), s, carryLen);
  3450. valueCopy.setRealSize(valueCopy.getRealSize() + carryLen);
  3451. return valueCopy;
  3452. }
  3453. /** rb_str_upto_m
  3454. *
  3455. */
  3456. @JRubyMethod(name = "upto", compat = RUBY1_8)
  3457. public IRubyObject upto18(ThreadContext context, IRubyObject end, Block block) {
  3458. return uptoCommon18(context, end, false, block);
  3459. }
  3460. @JRubyMethod(name = "upto", compat = RUBY1_8)
  3461. public IRubyObject upto18(ThreadContext context, IRubyObject end, IRubyObject excl, Block block) {
  3462. return uptoCommon18(context, end, excl.isTrue(), block);
  3463. }
  3464. final IRubyObject uptoCommon18(ThreadContext context, IRubyObject arg, boolean excl, Block block) {
  3465. RubyString end = arg.convertToString();
  3466. checkEncoding(end);
  3467. int n = op_cmp19(end);
  3468. if (n > 0 || (excl && n == 0)) return this;
  3469. IRubyObject afterEnd = end.callMethod(context, "succ");
  3470. RubyString current = this;
  3471. while (!current.op_equal19(context, afterEnd).isTrue()) {
  3472. block.yield(context, current);
  3473. if (!excl && current.op_equal19(context, end).isTrue()) break;
  3474. current = current.callMethod(context, "succ").convertToString();
  3475. if (excl && current.op_equal19(context, end).isTrue()) break;
  3476. if (current.value.getRealSize() > end.value.getRealSize() || current.value.getRealSize() == 0) break;
  3477. }
  3478. return this;
  3479. }
  3480. @JRubyMethod(name = "upto", compat = RUBY1_9)
  3481. public IRubyObject upto19(ThreadContext context, IRubyObject end, Block block) {
  3482. Ruby runtime = context.getRuntime();
  3483. return block.isGiven() ? uptoCommon19(context, end, false, block) : enumeratorize(runtime, this, "upto", end);
  3484. }
  3485. @JRubyMethod(name = "upto", compat = RUBY1_9)
  3486. public IRubyObject upto19(ThreadContext context, IRubyObject end, IRubyObject excl, Block block) {
  3487. return block.isGiven() ? uptoCommon19(context, end, excl.isTrue(), block) :
  3488. enumeratorize(context.getRuntime(), this, "upto", new IRubyObject[]{end, excl});
  3489. }
  3490. final IRubyObject uptoCommon19(ThreadContext context, IRubyObject arg, boolean excl, Block block) {
  3491. return uptoCommon19(context, arg, excl, block, false);
  3492. }
  3493. final IRubyObject uptoCommon19(ThreadContext context, IRubyObject arg, boolean excl, Block block, boolean asASymbol) {
  3494. Ruby runtime = context.getRuntime();
  3495. if (arg instanceof RubySymbol) {
  3496. throw runtime.newTypeError("can't convert Symbol into String");
  3497. }
  3498. RubyString end = arg.convertToString();
  3499. Encoding enc = checkEncoding(end);
  3500. if (value.getRealSize() == 1 && end.value.getRealSize() == 1 &&
  3501. scanForCodeRange() == CR_7BIT && end.scanForCodeRange() == CR_7BIT) {
  3502. byte c = value.getUnsafeBytes()[value.getBegin()];
  3503. byte e = end.value.getUnsafeBytes()[end.value.getBegin()];
  3504. if (c > e || (excl && c == e)) return this;
  3505. while (true) {
  3506. RubyString s = new RubyString(runtime, runtime.getString(), RubyInteger.SINGLE_CHAR_BYTELISTS[c & 0xff],
  3507. enc, CR_7BIT);
  3508. s.shareLevel = SHARE_LEVEL_BYTELIST;
  3509. IRubyObject argument = s;
  3510. if (asASymbol) {
  3511. argument = runtime.newSymbol(s.toString());
  3512. }
  3513. block.yield(context, argument);
  3514. if (!excl && c == e) break;
  3515. c++;
  3516. if (excl && c == e) break;
  3517. }
  3518. } else {
  3519. int n = op_cmp19(end);
  3520. if (n > 0 || (excl && n == 0)) return this;
  3521. IRubyObject afterEnd = end.callMethod(context, "succ");
  3522. RubyString current = this;
  3523. while (!current.op_equal19(context, afterEnd).isTrue()) {
  3524. if (current.value.getRealSize() > end.value.getRealSize() || current.value.getRealSize() == 0) break;
  3525. IRubyObject argument = current;
  3526. if (asASymbol) {
  3527. argument = runtime.newSymbol(current.toString());
  3528. }
  3529. block.yield(context, argument);
  3530. if (!excl && current.op_equal19(context, end).isTrue()) break;
  3531. current = current.callMethod(context, "succ").convertToString();
  3532. if (excl && current.op_equal19(context, end).isTrue()) break;
  3533. }
  3534. }
  3535. return this;
  3536. }
  3537. /** rb_str_include
  3538. *
  3539. */
  3540. @JRubyMethod(name = "include?", compat = RUBY1_8)
  3541. public RubyBoolean include_p(ThreadContext context, IRubyObject obj) {
  3542. Ruby runtime = context.getRuntime();
  3543. if (obj instanceof RubyFixnum) {
  3544. int c = RubyNumeric.fix2int((RubyFixnum)obj);
  3545. for (int i = 0; i < value.getRealSize(); i++) {
  3546. if (value.get(i) == (byte)c) return runtime.getTrue();
  3547. }
  3548. return runtime.getFalse();
  3549. }
  3550. return value.indexOf(obj.convertToString().value) == -1 ? runtime.getFalse() : runtime.getTrue();
  3551. }
  3552. @JRubyMethod(name = "include?", compat = RUBY1_9)
  3553. public RubyBoolean include_p19(ThreadContext context, IRubyObject obj) {
  3554. Ruby runtime = context.getRuntime();
  3555. return strIndex19(obj.convertToString(), 0) == -1 ? runtime.getFalse() : runtime.getTrue();
  3556. }
  3557. @JRubyMethod(name = "chr", compat = RUBY1_9)
  3558. public IRubyObject chr(ThreadContext context) {
  3559. return substr19(context.getRuntime(), 0, 1);
  3560. }
  3561. @JRubyMethod(name = "getbyte", compat = RUBY1_9)
  3562. public IRubyObject getbyte(ThreadContext context, IRubyObject index) {
  3563. Ruby runtime = context.getRuntime();
  3564. int i = RubyNumeric.num2int(index);
  3565. if (i < 0) i += value.getRealSize();
  3566. if (i < 0 || i >= value.getRealSize()) return runtime.getNil();
  3567. return RubyFixnum.newFixnum(runtime, value.getUnsafeBytes()[value.getBegin() + i] & 0xff);
  3568. }
  3569. @JRubyMethod(name = "setbyte", compat = RUBY1_9)
  3570. public IRubyObject setbyte(ThreadContext context, IRubyObject index, IRubyObject val) {
  3571. modifyCheck();
  3572. int i = RubyNumeric.num2int(index);
  3573. int b = RubyNumeric.num2int(val);
  3574. value.getUnsafeBytes()[checkIndexForRef(i, value.getRealSize())] = (byte)b;
  3575. return val;
  3576. }
  3577. /** rb_str_to_i
  3578. *
  3579. */
  3580. @JRubyMethod(name = "to_i", compat = RUBY1_8)
  3581. public IRubyObject to_i() {
  3582. return stringToInum(10, false);
  3583. }
  3584. /** rb_str_to_i
  3585. *
  3586. */
  3587. @JRubyMethod(name = "to_i", compat = RUBY1_8)
  3588. public IRubyObject to_i(IRubyObject arg0) {
  3589. long base = checkBase(arg0);
  3590. return stringToInum((int)base, false);
  3591. }
  3592. @JRubyMethod(name = "to_i", compat = RUBY1_9)
  3593. public IRubyObject to_i19() {
  3594. return stringToInum19(10, false);
  3595. }
  3596. @JRubyMethod(name = "to_i", compat = RUBY1_9)
  3597. public IRubyObject to_i19(IRubyObject arg0) {
  3598. long base = checkBase(arg0);
  3599. return stringToInum19((int)base, false);
  3600. }
  3601. private long checkBase(IRubyObject arg0) {
  3602. long base = arg0.convertToInteger().getLongValue();
  3603. if(base < 0) {
  3604. throw getRuntime().newArgumentError("illegal radix " + base);
  3605. }
  3606. return base;
  3607. }
  3608. /** rb_str_to_inum
  3609. *
  3610. */
  3611. public IRubyObject stringToInum(int base, boolean badcheck) {
  3612. ByteList s = this.value;
  3613. return ConvertBytes.byteListToInum(getRuntime(), s, base, badcheck);
  3614. }
  3615. public IRubyObject stringToInum19(int base, boolean badcheck) {
  3616. ByteList s = this.value;
  3617. return ConvertBytes.byteListToInum19(getRuntime(), s, base, badcheck);
  3618. }
  3619. /** rb_str_oct
  3620. *
  3621. */
  3622. @JRubyMethod(name = "oct", compat = RUBY1_8)
  3623. public IRubyObject oct(ThreadContext context) {
  3624. return stringToInum(-8, false);
  3625. }
  3626. @JRubyMethod(name = "oct", compat = RUBY1_9)
  3627. public IRubyObject oct19(ThreadContext context) {
  3628. if (!value.getEncoding().isAsciiCompatible()) {
  3629. throw context.getRuntime().newEncodingCompatibilityError("ASCII incompatible encoding: " + value.getEncoding());
  3630. }
  3631. return oct(context);
  3632. }
  3633. /** rb_str_hex
  3634. *
  3635. */
  3636. @JRubyMethod(name = "hex", compat = RUBY1_8)
  3637. public IRubyObject hex(ThreadContext context) {
  3638. return stringToInum(16, false);
  3639. }
  3640. @JRubyMethod(name = "hex", compat = RUBY1_9)
  3641. public IRubyObject hex19(ThreadContext context) {
  3642. if (!value.getEncoding().isAsciiCompatible()) {
  3643. throw context.getRuntime().newEncodingCompatibilityError("ASCII incompatible encoding: " + value.getEncoding());
  3644. }
  3645. return stringToInum19(16, false);
  3646. }
  3647. /** rb_str_to_f
  3648. *
  3649. */
  3650. @JRubyMethod(name = "to_f", compat = RUBY1_8)
  3651. public IRubyObject to_f() {
  3652. return RubyNumeric.str2fnum(getRuntime(), this);
  3653. }
  3654. @JRubyMethod(name = "to_f", compat = RUBY1_9)
  3655. public IRubyObject to_f19() {
  3656. return RubyNumeric.str2fnum19(getRuntime(), this, false);
  3657. }
  3658. /** rb_str_split_m
  3659. *
  3660. */
  3661. @JRubyMethod(name = "split", writes = BACKREF, compat = RUBY1_8)
  3662. public RubyArray split(ThreadContext context) {
  3663. return split(context, context.getRuntime().getNil());
  3664. }
  3665. @JRubyMethod(name = "split", writes = BACKREF, compat = RUBY1_8)
  3666. public RubyArray split(ThreadContext context, IRubyObject arg0) {
  3667. return splitCommon(arg0, false, 0, 0, context);
  3668. }
  3669. @JRubyMethod(name = "split", writes = BACKREF, compat = RUBY1_8)
  3670. public RubyArray split(ThreadContext context, IRubyObject arg0, IRubyObject arg1) {
  3671. final int lim = RubyNumeric.num2int(arg1);
  3672. if (lim <= 0) {
  3673. return splitCommon(arg0, false, lim, 1, context);
  3674. } else {
  3675. if (lim == 1) return value.getRealSize() == 0 ? context.getRuntime().newArray() : context.getRuntime().newArray(this);
  3676. return splitCommon(arg0, true, lim, 1, context);
  3677. }
  3678. }
  3679. private RubyArray splitCommon(IRubyObject spat, final boolean limit, final int lim, final int i, ThreadContext context) {
  3680. final RubyArray result;
  3681. if (spat.isNil() && (spat = context.getRuntime().getGlobalVariables().get("$;")).isNil()) {
  3682. result = awkSplit(limit, lim, i);
  3683. } else {
  3684. if (spat instanceof RubyString && ((RubyString) spat).value.getRealSize() == 1) {
  3685. RubyString strSpat = (RubyString) spat;
  3686. if (strSpat.value.getUnsafeBytes()[strSpat.value.getBegin()] == (byte) ' ') {
  3687. result = awkSplit(limit, lim, i);
  3688. } else {
  3689. result = regexSplit(context, spat, limit, lim, i);
  3690. }
  3691. } else {
  3692. result = regexSplit(context, spat, limit, lim, i);
  3693. }
  3694. }
  3695. if (!limit && lim == 0) {
  3696. while (result.size() > 0 && ((RubyString) result.eltInternal(result.size() - 1)).value.getRealSize() == 0) {
  3697. result.pop(context);
  3698. }
  3699. }
  3700. return result;
  3701. }
  3702. private RubyArray regexSplit(ThreadContext context, IRubyObject pat, boolean limit, int lim, int i) {
  3703. Ruby runtime = context.getRuntime();
  3704. final Regex pattern = getQuotedPattern(pat);
  3705. int begin = value.getBegin();
  3706. int len = value.getRealSize();
  3707. int range = begin + len;
  3708. byte[]bytes = value.getUnsafeBytes();
  3709. final Matcher matcher = pattern.matcher(bytes, begin, range);
  3710. RubyArray result = runtime.newArray();
  3711. Encoding enc = getEncodingForKCodeDefault(runtime, pattern, pat);
  3712. boolean captures = pattern.numberOfCaptures() != 0;
  3713. int end, beg = 0;
  3714. boolean lastNull = false;
  3715. int start = begin;
  3716. while ((end = matcher.search(start, range, Option.NONE)) >= 0) {
  3717. if (start == end + begin && matcher.getBegin() == matcher.getEnd()) {
  3718. if (len == 0) {
  3719. result.append(newEmptyString(runtime, getMetaClass()).infectBy(this));
  3720. break;
  3721. } else if (lastNull) {
  3722. result.append(makeShared(runtime, beg, enc.length(bytes, begin + beg, range)));
  3723. beg = start - begin;
  3724. } else {
  3725. start += start == range ? 1 : enc.length(bytes, start, range);
  3726. lastNull = true;
  3727. continue;
  3728. }
  3729. } else {
  3730. result.append(makeShared(runtime, beg, end - beg));
  3731. beg = matcher.getEnd();
  3732. start = begin + beg;
  3733. }
  3734. lastNull = false;
  3735. if (captures) populateCapturesForSplit(runtime, result, matcher, false);
  3736. if (limit && lim <= ++i) break;
  3737. }
  3738. // only this case affects backrefs
  3739. context.getCurrentScope().setBackRef(runtime.getNil());
  3740. if (len > 0 && (limit || len > beg || lim < 0)) result.append(makeShared(runtime, beg, len - beg));
  3741. return result;
  3742. }
  3743. private Encoding getEncodingForKCodeDefault(Ruby runtime, Regex pattern, IRubyObject pat) {
  3744. Encoding enc = pattern.getEncoding();
  3745. if (enc != runtime.getKCode().getEncoding() && pat instanceof RubyRegexp) {
  3746. RubyRegexp regexp = (RubyRegexp) pat;
  3747. if (regexp.isKCodeDefault()) {
  3748. enc = runtime.getKCode().getEncoding();
  3749. }
  3750. }
  3751. return enc;
  3752. }
  3753. private void populateCapturesForSplit(Ruby runtime, RubyArray result, Matcher matcher, boolean is19) {
  3754. Region region = matcher.getRegion();
  3755. for (int i = 1; i < region.numRegs; i++) {
  3756. int beg = region.beg[i];
  3757. if (beg == -1) continue;
  3758. result.append(is19 ? makeShared19(runtime, beg, region.end[i] - beg) : makeShared(runtime, beg, region.end[i] - beg));
  3759. }
  3760. }
  3761. private RubyArray awkSplit(boolean limit, int lim, int i) {
  3762. Ruby runtime = getRuntime();
  3763. RubyArray result = runtime.newArray();
  3764. byte[]bytes = value.getUnsafeBytes();
  3765. int p = value.getBegin();
  3766. int len = value.getRealSize();
  3767. int end = p + len;
  3768. boolean skip = true;
  3769. int e = 0, b = 0;
  3770. while (p < end) {
  3771. int c = bytes[p++] & 0xff;
  3772. if (skip) {
  3773. if (ASCII.isSpace(c)) {
  3774. b++;
  3775. } else {
  3776. e = b + 1;
  3777. skip = false;
  3778. if (limit && lim <= i) break;
  3779. }
  3780. } else {
  3781. if (ASCII.isSpace(c)) {
  3782. result.append(makeShared(runtime, b, e - b));
  3783. skip = true;
  3784. b = e + 1;
  3785. if (limit) i++;
  3786. } else {
  3787. e++;
  3788. }
  3789. }
  3790. }
  3791. if (len > 0 && (limit || len > b || lim < 0)) result.append(makeShared(runtime, b, len - b));
  3792. return result;
  3793. }
  3794. @JRubyMethod(name = "split", writes = BACKREF, compat = RUBY1_9)
  3795. public RubyArray split19(ThreadContext context) {
  3796. return split19(context, context.getRuntime().getNil());
  3797. }
  3798. @JRubyMethod(name = "split", writes = BACKREF, compat = RUBY1_9)
  3799. public RubyArray split19(ThreadContext context, IRubyObject arg0) {
  3800. return splitCommon19(arg0, false, 0, 0, context);
  3801. }
  3802. @JRubyMethod(name = "split", writes = BACKREF, compat = RUBY1_9)
  3803. public RubyArray split19(ThreadContext context, IRubyObject arg0, IRubyObject arg1) {
  3804. final int lim = RubyNumeric.num2int(arg1);
  3805. if (lim <= 0) {
  3806. return splitCommon19(arg0, false, lim, 1, context);
  3807. } else {
  3808. if (lim == 1) return value.getRealSize() == 0 ? context.getRuntime().newArray() : context.getRuntime().newArray(this);
  3809. return splitCommon19(arg0, true, lim, 1, context);
  3810. }
  3811. }
  3812. private RubyArray splitCommon19(IRubyObject spat, final boolean limit, final int lim, final int i, ThreadContext context) {
  3813. final RubyArray result;
  3814. if (spat.isNil() && (spat = context.getRuntime().getGlobalVariables().get("$;")).isNil()) {
  3815. result = awkSplit19(limit, lim, i);
  3816. } else {
  3817. if (spat instanceof RubyString) {
  3818. ByteList spatValue = ((RubyString)spat).value;
  3819. int len = spatValue.getRealSize();
  3820. Encoding spatEnc = spatValue.getEncoding();
  3821. if (len == 0) {
  3822. Regex pattern = RubyRegexp.getRegexpFromCache(context.getRuntime(), spatValue, spatEnc, new RegexpOptions());
  3823. result = regexSplit19(context, pattern, pattern, limit, lim, i);
  3824. } else {
  3825. final int c;
  3826. byte[]bytes = spatValue.getUnsafeBytes();
  3827. int p = spatValue.getBegin();
  3828. if (spatEnc.isAsciiCompatible()) {
  3829. c = len == 1 ? bytes[p] & 0xff : -1;
  3830. } else {
  3831. c = len == StringSupport.preciseLength(spatEnc, bytes, p, p + len) ? spatEnc.mbcToCode(bytes, p, p + len) : -1;
  3832. }
  3833. result = c == ' ' ? awkSplit19(limit, lim, i) : stringSplit19(context, (RubyString)spat, limit, lim, i);
  3834. }
  3835. } else {
  3836. final Regex pattern, prepared;
  3837. final RubyRegexp regexp;
  3838. Ruby runtime = context.getRuntime();
  3839. if (spat instanceof RubyRegexp) {
  3840. regexp = (RubyRegexp)spat;
  3841. pattern = regexp.getPattern();
  3842. prepared = regexp.preparePattern(this);
  3843. } else {
  3844. regexp = null;
  3845. pattern = getStringPattern19(runtime, spat);
  3846. prepared = RubyRegexp.preparePattern(runtime, pattern, this);
  3847. }
  3848. result = regexSplit19(context, pattern, prepared, limit, lim, i);
  3849. }
  3850. }
  3851. if (!limit && lim == 0) {
  3852. while (result.size() > 0 && ((RubyString) result.eltInternal(result.size() - 1)).value.getRealSize() == 0) {
  3853. result.pop(context);
  3854. }
  3855. }
  3856. return result;
  3857. }
  3858. private RubyArray regexSplit19(ThreadContext context, Regex pattern, Regex prepared, boolean limit, int lim, int i) {
  3859. Ruby runtime = context.getRuntime();
  3860. int begin = value.getBegin();
  3861. int len = value.getRealSize();
  3862. int range = begin + len;
  3863. byte[]bytes = value.getUnsafeBytes();
  3864. final Matcher matcher = prepared.matcher(bytes, begin, range);
  3865. RubyArray result = runtime.newArray();
  3866. Encoding enc = value.getEncoding();
  3867. boolean captures = pattern.numberOfCaptures() != 0;
  3868. int end, beg = 0;
  3869. boolean lastNull = false;
  3870. int start = begin;
  3871. while ((end = matcher.search(start, range, Option.NONE)) >= 0) {
  3872. if (start == end + begin && matcher.getBegin() == matcher.getEnd()) {
  3873. if (len == 0) {
  3874. result.append(newEmptyString(runtime, getMetaClass()).infectBy(this));
  3875. break;
  3876. } else if (lastNull) {
  3877. result.append(makeShared19(runtime, beg, StringSupport.length(enc, bytes, begin + beg, range)));
  3878. beg = start - begin;
  3879. } else {
  3880. start += start == range ? 1 : StringSupport.length(enc, bytes, start, range);
  3881. lastNull = true;
  3882. continue;
  3883. }
  3884. } else {
  3885. result.append(makeShared19(runtime, beg, end - beg));
  3886. beg = matcher.getEnd();
  3887. start = begin + beg;
  3888. }
  3889. lastNull = false;
  3890. if (captures) populateCapturesForSplit(runtime, result, matcher, true);
  3891. if (limit && lim <= ++i) break;
  3892. }
  3893. // only this case affects backrefs
  3894. context.getCurrentScope().setBackRef(runtime.getNil());
  3895. if (len > 0 && (limit || len > beg || lim < 0)) result.append(makeShared19(runtime, beg, len - beg));
  3896. return result;
  3897. }
  3898. private RubyArray awkSplit19(boolean limit, int lim, int i) {
  3899. Ruby runtime = getRuntime();
  3900. RubyArray result = runtime.newArray();
  3901. byte[]bytes = value.getUnsafeBytes();
  3902. int p = value.getBegin();
  3903. int ptr = p;
  3904. int len = value.getRealSize();
  3905. int end = p + len;
  3906. Encoding enc = value.getEncoding();
  3907. boolean skip = true;
  3908. int e = 0, b = 0;
  3909. boolean singlebyte = singleByteOptimizable(enc);
  3910. while (p < end) {
  3911. final int c;
  3912. if (singlebyte) {
  3913. c = bytes[p++] & 0xff;
  3914. } else {
  3915. c = StringSupport.codePoint(runtime, enc, bytes, p, end);
  3916. p += StringSupport.length(enc, bytes, p, end);
  3917. }
  3918. if (skip) {
  3919. if (enc.isSpace(c)) {
  3920. b = p - ptr;
  3921. } else {
  3922. e = p - ptr;
  3923. skip = false;
  3924. if (limit && lim <= i) break;
  3925. }
  3926. } else {
  3927. if (enc.isSpace(c)) {
  3928. result.append(makeShared19(runtime, b, e - b));
  3929. skip = true;
  3930. b = p - ptr;
  3931. if (limit) i++;
  3932. } else {
  3933. e = p - ptr;
  3934. }
  3935. }
  3936. }
  3937. if (len > 0 && (limit || len > b || lim < 0)) result.append(makeShared19(runtime, b, len - b));
  3938. return result;
  3939. }
  3940. private RubyArray stringSplit19(ThreadContext context, RubyString spat, boolean limit, int lim, int i) {
  3941. Ruby runtime = context.getRuntime();
  3942. if (scanForCodeRange() == CR_BROKEN) throw runtime.newArgumentError("invalid byte sequence in " + value.getEncoding());
  3943. if (spat.scanForCodeRange() == CR_BROKEN) throw runtime.newArgumentError("invalid byte sequence in " + spat.value.getEncoding());
  3944. RubyArray result = runtime.newArray();
  3945. Encoding enc = checkEncoding(spat);
  3946. ByteList pattern = spat.value;
  3947. int e, p = 0;
  3948. while (p < value.getRealSize() && (e = value.indexOf(pattern, p)) >= 0) {
  3949. int t = enc.rightAdjustCharHead(value.getUnsafeBytes(), p + value.getBegin(), e, p + value.getRealSize());
  3950. if (t != e) {
  3951. p = t;
  3952. continue;
  3953. }
  3954. result.append(makeShared19(runtime, p, e - p));
  3955. p = e + pattern.getRealSize();
  3956. if (limit && lim <= ++i) break;
  3957. }
  3958. if (value.getRealSize() > 0 && (limit || value.getRealSize() > p || lim < 0)) {
  3959. result.append(makeShared19(runtime, p, value.getRealSize() - p));
  3960. }
  3961. return result;
  3962. }
  3963. private RubyString getStringForPattern(IRubyObject obj) {
  3964. if (obj instanceof RubyString) return (RubyString)obj;
  3965. IRubyObject val = obj.checkStringType();
  3966. if (val.isNil()) throw getRuntime().newTypeError("wrong argument type " + obj.getMetaClass() + " (expected Regexp)");
  3967. return (RubyString)val;
  3968. }
  3969. /** get_pat (used by match/match19)
  3970. *
  3971. */
  3972. private RubyRegexp getPattern(IRubyObject obj) {
  3973. if (obj instanceof RubyRegexp) return (RubyRegexp)obj;
  3974. return RubyRegexp.newRegexp(getRuntime(), getStringForPattern(obj).value);
  3975. }
  3976. private Regex getQuotedPattern(IRubyObject obj) {
  3977. if (obj instanceof RubyRegexp) return ((RubyRegexp)obj).getPattern();
  3978. Ruby runtime = getRuntime();
  3979. return RubyRegexp.getQuotedRegexpFromCache(runtime, getStringForPattern(obj).value, runtime.getKCode().getEncoding(), new RegexpOptions());
  3980. }
  3981. private Regex getStringPattern(Ruby runtime, Encoding enc, IRubyObject obj) {
  3982. return RubyRegexp.getQuotedRegexpFromCache(runtime, getStringForPattern(obj).value, enc, new RegexpOptions());
  3983. }
  3984. private Regex getStringPattern19(Ruby runtime, IRubyObject obj) {
  3985. RubyString str = getStringForPattern(obj);
  3986. if (str.scanForCodeRange() == CR_BROKEN) {
  3987. throw runtime.newRegexpError("invalid multybyte character: " +
  3988. RubyRegexp.regexpDescription19(runtime, str.value, new RegexpOptions(), str.value.getEncoding()).toString());
  3989. }
  3990. if (str.value.getEncoding().isDummy()) {
  3991. throw runtime.newArgumentError("can't make regexp with dummy encoding");
  3992. }
  3993. return RubyRegexp.getQuotedRegexpFromCache19(runtime, str.value, new RegexpOptions(), str.isAsciiOnly());
  3994. }
  3995. /** rb_str_scan
  3996. *
  3997. */
  3998. @JRubyMethod(reads = BACKREF, writes = BACKREF, compat = RUBY1_8)
  3999. public IRubyObject scan(ThreadContext context, IRubyObject arg, Block block) {
  4000. Ruby runtime = context.getRuntime();
  4001. Encoding enc = runtime.getKCode().getEncoding();
  4002. final Regex pattern;
  4003. final int tuFlags;
  4004. if (arg instanceof RubyRegexp) {
  4005. RubyRegexp regex = (RubyRegexp)arg;
  4006. pattern = regex.getPattern();
  4007. tuFlags = regex.flags;
  4008. } else {
  4009. pattern = getStringPattern(runtime, enc, arg);
  4010. tuFlags = 0;
  4011. }
  4012. int begin = value.getBegin();
  4013. int range = begin + value.getRealSize();
  4014. final Matcher matcher = pattern.matcher(value.getUnsafeBytes(), begin, range);
  4015. if (block.isGiven()) {
  4016. return scanIter(context, pattern, matcher, enc, block, begin, range, tuFlags);
  4017. } else {
  4018. return scanNoIter(context, pattern, matcher, enc, begin, range, tuFlags);
  4019. }
  4020. }
  4021. private IRubyObject scanIter(ThreadContext context, Regex pattern, Matcher matcher, Encoding enc, Block block, int begin, int range, int tuFlags) {
  4022. Ruby runtime = context.getRuntime();
  4023. byte[]bytes = value.getUnsafeBytes();
  4024. int size = value.getRealSize();
  4025. RubyMatchData match = null;
  4026. DynamicScope scope = context.getCurrentScope();
  4027. int end = 0;
  4028. if (pattern.numberOfCaptures() == 0) {
  4029. while (matcher.search(begin + end, range, Option.NONE) >= 0) {
  4030. end = positionEnd(matcher, enc, begin, range);
  4031. match = RubyRegexp.updateBackRef(context, this, scope, matcher, pattern);
  4032. RubyString substr = makeShared(runtime, matcher.getBegin(), matcher.getEnd() - matcher.getBegin());
  4033. substr.infectBy(tuFlags);
  4034. match.infectBy(tuFlags);
  4035. block.yield(context, substr);
  4036. modifyCheck(bytes, size);
  4037. }
  4038. } else {
  4039. while (matcher.search(begin + end, range, Option.NONE) >= 0) {
  4040. end = positionEnd(matcher, enc, begin, range);
  4041. match = RubyRegexp.updateBackRef(context, this, scope, matcher, pattern);
  4042. match.infectBy(tuFlags);
  4043. block.yield(context, populateCapturesForScan(runtime, matcher, range, tuFlags, false));
  4044. modifyCheck(bytes, size);
  4045. }
  4046. }
  4047. scope.setBackRef(match == null ? runtime.getNil() : match);
  4048. return this;
  4049. }
  4050. private IRubyObject scanNoIter(ThreadContext context, Regex pattern, Matcher matcher, Encoding enc, int begin, int range, int tuFlags) {
  4051. Ruby runtime = context.getRuntime();
  4052. RubyArray ary = runtime.newArray();
  4053. int end = 0;
  4054. if (pattern.numberOfCaptures() == 0) {
  4055. while (matcher.search(begin + end, range, Option.NONE) >= 0) {
  4056. end = positionEnd(matcher, enc, begin, range);
  4057. RubyString substr = makeShared(runtime, matcher.getBegin(), matcher.getEnd() - matcher.getBegin());
  4058. substr.infectBy(tuFlags);
  4059. ary.append(substr);
  4060. }
  4061. } else {
  4062. while (matcher.search(begin + end, range, Option.NONE) >= 0) {
  4063. end = positionEnd(matcher, enc, begin, range);
  4064. ary.append(populateCapturesForScan(runtime, matcher, range, tuFlags, false));
  4065. }
  4066. }
  4067. DynamicScope scope = context.getCurrentScope();
  4068. if (ary.size() > 0) {
  4069. RubyMatchData match = RubyRegexp.updateBackRef(context, this, scope, matcher, pattern);
  4070. match.infectBy(tuFlags);
  4071. } else {
  4072. scope.setBackRef(runtime.getNil());
  4073. }
  4074. return ary;
  4075. }
  4076. private int positionEnd(Matcher matcher, Encoding enc, int begin, int range) {
  4077. int end = matcher.getEnd();
  4078. if (matcher.getBegin() == end) {
  4079. if (value.getRealSize() > end) {
  4080. return end + enc.length(value.getUnsafeBytes(), begin + end, range);
  4081. } else {
  4082. return end + 1;
  4083. }
  4084. } else {
  4085. return end;
  4086. }
  4087. }
  4088. private IRubyObject populateCapturesForScan(Ruby runtime, Matcher matcher, int range, int tuFlags, boolean is19) {
  4089. Region region = matcher.getRegion();
  4090. RubyArray result = getRuntime().newArray(region.numRegs);
  4091. for (int i=1; i<region.numRegs; i++) {
  4092. int beg = region.beg[i];
  4093. if (beg == -1) {
  4094. result.append(runtime.getNil());
  4095. } else {
  4096. RubyString substr = is19 ? makeShared19(runtime, beg, region.end[i] - beg) : makeShared(runtime, beg, region.end[i] - beg);
  4097. substr.infectBy(tuFlags);
  4098. result.append(substr);
  4099. }
  4100. }
  4101. return result;
  4102. }
  4103. @JRubyMethod(name = "scan", reads = BACKREF, writes = BACKREF, compat = RUBY1_9)
  4104. public IRubyObject scan19(ThreadContext context, IRubyObject arg, Block block) {
  4105. Ruby runtime = context.getRuntime();
  4106. Encoding enc = value.getEncoding();
  4107. final Regex pattern, prepared;
  4108. final RubyRegexp regexp;
  4109. final int tuFlags;
  4110. if (arg instanceof RubyRegexp) {
  4111. regexp = (RubyRegexp)arg;
  4112. tuFlags = regexp.flags;
  4113. pattern = regexp.getPattern();
  4114. prepared = regexp.preparePattern(this);
  4115. } else {
  4116. regexp = null;
  4117. tuFlags = 0;
  4118. pattern = getStringPattern19(runtime, arg);
  4119. prepared = RubyRegexp.preparePattern(runtime, pattern, this);
  4120. }
  4121. if (block.isGiven()) {
  4122. return scanIter19(context, pattern, prepared, enc, block, regexp, tuFlags);
  4123. } else {
  4124. return scanNoIter19(context, pattern, prepared, enc, regexp, tuFlags);
  4125. }
  4126. }
  4127. private IRubyObject scanIter19(ThreadContext context, Regex pattern, Regex prepared, Encoding enc, Block block, RubyRegexp regexp, int tuFlags) {
  4128. Ruby runtime = context.getRuntime();
  4129. byte[]bytes = value.getUnsafeBytes();
  4130. int begin = value.getBegin();
  4131. int len = value.getRealSize();
  4132. int range = begin + len;
  4133. final Matcher matcher = prepared.matcher(bytes, begin, range);
  4134. DynamicScope scope = context.getCurrentScope();
  4135. int end = 0;
  4136. RubyMatchData match = null;
  4137. if (pattern.numberOfCaptures() == 0) {
  4138. while (matcher.search(begin + end, range, Option.NONE) >= 0) {
  4139. end = positionEnd(matcher, enc, begin, range);
  4140. match = RubyRegexp.updateBackRef19(context, this, scope, matcher, pattern);
  4141. match.regexp = regexp;
  4142. RubyString substr = makeShared19(runtime, matcher.getBegin(), matcher.getEnd() - matcher.getBegin());
  4143. substr.infectBy(tuFlags);
  4144. match.infectBy(tuFlags);
  4145. block.yield(context, substr);
  4146. modifyCheck(bytes, len, enc);
  4147. }
  4148. } else {
  4149. while (matcher.search(begin + end, range, Option.NONE) >= 0) {
  4150. end = positionEnd(matcher, enc, begin, range);
  4151. match = RubyRegexp.updateBackRef19(context, this, scope, matcher, pattern);
  4152. match.regexp = regexp;
  4153. match.infectBy(tuFlags);
  4154. block.yield(context, populateCapturesForScan(runtime, matcher, range, tuFlags, true));
  4155. modifyCheck(bytes, len, enc);
  4156. }
  4157. }
  4158. scope.setBackRef(match == null ? runtime.getNil() : match);
  4159. return this;
  4160. }
  4161. private IRubyObject scanNoIter19(ThreadContext context, Regex pattern, Regex prepared, Encoding enc, RubyRegexp regexp, int tuFlags) {
  4162. Ruby runtime = context.getRuntime();
  4163. byte[]bytes = value.getUnsafeBytes();
  4164. int begin = value.getBegin();
  4165. int range = begin + value.getRealSize();
  4166. final Matcher matcher = prepared.matcher(bytes, begin, range);
  4167. RubyArray ary = runtime.newArray();
  4168. int end = 0;
  4169. if (pattern.numberOfCaptures() == 0) {
  4170. while (matcher.search(begin + end, range, Option.NONE) >= 0) {
  4171. end = positionEnd(matcher, enc, begin, range);
  4172. RubyString substr = makeShared19(runtime, matcher.getBegin(), matcher.getEnd() - matcher.getBegin());
  4173. substr.infectBy(tuFlags);
  4174. ary.append(substr);
  4175. }
  4176. } else {
  4177. while (matcher.search(begin + end, range, Option.NONE) >= 0) {
  4178. end = positionEnd(matcher, enc, begin, range);
  4179. ary.append(populateCapturesForScan(runtime, matcher, range, tuFlags, true));
  4180. }
  4181. }
  4182. DynamicScope scope = context.getCurrentScope();
  4183. if (ary.size() > 0) {
  4184. RubyMatchData match = RubyRegexp.updateBackRef19(context, this, scope, matcher, pattern);
  4185. match.regexp = regexp;
  4186. match.infectBy(tuFlags);
  4187. } else {
  4188. scope.setBackRef(runtime.getNil());
  4189. }
  4190. return ary;
  4191. }
  4192. @JRubyMethod(name = "start_with?")
  4193. public IRubyObject start_with_p(ThreadContext context) {
  4194. return context.getRuntime().getFalse();
  4195. }
  4196. @JRubyMethod(name = "start_with?")
  4197. public IRubyObject start_with_p(ThreadContext context, IRubyObject arg) {
  4198. return start_with_pCommon(arg) ? context.getRuntime().getTrue() : context.getRuntime().getFalse();
  4199. }
  4200. @JRubyMethod(name = "start_with?", rest = true)
  4201. public IRubyObject start_with_p(ThreadContext context, IRubyObject[]args) {
  4202. for (int i = 0; i < args.length; i++) {
  4203. if (start_with_pCommon(args[i])) return context.getRuntime().getTrue();
  4204. }
  4205. return context.getRuntime().getFalse();
  4206. }
  4207. private boolean start_with_pCommon(IRubyObject arg) {
  4208. IRubyObject tmp = arg.checkStringType();
  4209. if (tmp.isNil()) return false;
  4210. RubyString otherString = (RubyString)tmp;
  4211. checkEncoding(otherString);
  4212. if (value.getRealSize() < otherString.value.getRealSize()) return false;
  4213. return value.startsWith(otherString.value);
  4214. }
  4215. @JRubyMethod(name = "end_with?")
  4216. public IRubyObject end_with_p(ThreadContext context) {
  4217. return context.getRuntime().getFalse();
  4218. }
  4219. @JRubyMethod(name = "end_with?")
  4220. public IRubyObject end_with_p(ThreadContext context, IRubyObject arg) {
  4221. return end_with_pCommon(arg) ? context.getRuntime().getTrue() : context.getRuntime().getFalse();
  4222. }
  4223. @JRubyMethod(name = "end_with?", rest = true)
  4224. public IRubyObject end_with_p(ThreadContext context, IRubyObject[]args) {
  4225. for (int i = 0; i < args.length; i++) {
  4226. if (end_with_pCommon(args[i])) return context.getRuntime().getTrue();
  4227. }
  4228. return context.getRuntime().getFalse();
  4229. }
  4230. private boolean end_with_pCommon(IRubyObject arg) {
  4231. IRubyObject tmp = arg.checkStringType();
  4232. if (tmp.isNil()) return false;
  4233. RubyString otherString = (RubyString)tmp;
  4234. Encoding enc = checkEncoding(otherString);
  4235. if (value.getRealSize() < otherString.value.getRealSize()) return false;
  4236. int p = value.getBegin();
  4237. int end = p + value.getRealSize();
  4238. int s = end - otherString.value.getRealSize();
  4239. if (enc.leftAdjustCharHead(value.getUnsafeBytes(), p, s, end) != s) return false;
  4240. return value.endsWith(otherString.value);
  4241. }
  4242. private static final ByteList SPACE_BYTELIST = new ByteList(ByteList.plain(" "));
  4243. private IRubyObject justify(IRubyObject arg0, int jflag) {
  4244. Ruby runtime = getRuntime();
  4245. return justifyCommon(runtime, SPACE_BYTELIST, RubyFixnum.num2int(arg0), jflag);
  4246. }
  4247. private IRubyObject justify(IRubyObject arg0, IRubyObject arg1, int jflag) {
  4248. Ruby runtime = getRuntime();
  4249. RubyString padStr = arg1.convertToString();
  4250. ByteList pad = padStr.value;
  4251. if (pad.getRealSize() == 0) throw runtime.newArgumentError("zero width padding");
  4252. int width = RubyFixnum.num2int(arg0);
  4253. RubyString result = justifyCommon(runtime, pad, width, jflag);
  4254. if (value.getRealSize() < width) result.infectBy(padStr);
  4255. return result;
  4256. }
  4257. private RubyString justifyCommon(Ruby runtime, ByteList pad, int width, int jflag) {
  4258. if (width < 0 || value.getRealSize() >= width) return strDup(runtime);
  4259. ByteList res = new ByteList(width);
  4260. res.setRealSize(width);
  4261. int padP = pad.getBegin();
  4262. int padLen = pad.getRealSize();
  4263. byte padBytes[] = pad.getUnsafeBytes();
  4264. int p = res.getBegin();
  4265. byte bytes[] = res.getUnsafeBytes();
  4266. if (jflag != 'l') {
  4267. int n = width - value.getRealSize();
  4268. int end = p + ((jflag == 'r') ? n : n / 2);
  4269. if (padLen <= 1) {
  4270. while (p < end) {
  4271. bytes[p++] = padBytes[padP];
  4272. }
  4273. } else {
  4274. int q = padP;
  4275. while (p + padLen <= end) {
  4276. System.arraycopy(padBytes, padP, bytes, p, padLen);
  4277. p += padLen;
  4278. }
  4279. while (p < end) {
  4280. bytes[p++] = padBytes[q++];
  4281. }
  4282. }
  4283. }
  4284. System.arraycopy(value.getUnsafeBytes(), value.getBegin(), bytes, p, value.getRealSize());
  4285. if (jflag != 'r') {
  4286. p += value.getRealSize();
  4287. int end = res.getBegin() + width;
  4288. if (padLen <= 1) {
  4289. while (p < end) {
  4290. bytes[p++] = padBytes[padP];
  4291. }
  4292. } else {
  4293. while (p + padLen <= end) {
  4294. System.arraycopy(padBytes, padP, bytes, p, padLen);
  4295. p += padLen;
  4296. }
  4297. while (p < end) {
  4298. bytes[p++] = padBytes[padP++];
  4299. }
  4300. }
  4301. }
  4302. RubyString result = new RubyString(runtime, getMetaClass(), res);
  4303. if ((!runtime.is1_9()) && (RubyFixnum.num2int(result.length()) > RubyFixnum.num2int(length())) ||
  4304. (runtime.is1_9() && (RubyFixnum.num2int(result.length19()) > RubyFixnum.num2int(length19())))) {
  4305. result.infectBy(this);
  4306. }
  4307. return result;
  4308. }
  4309. private IRubyObject justify19(IRubyObject arg0, int jflag) {
  4310. Ruby runtime = getRuntime();
  4311. RubyString result = justifyCommon(runtime, SPACE_BYTELIST,
  4312. 1,
  4313. true, value.getEncoding(), RubyFixnum.num2int(arg0), jflag);
  4314. if (getCodeRange() != CR_BROKEN) result.setCodeRange(getCodeRange());
  4315. return result;
  4316. }
  4317. private IRubyObject justify19(IRubyObject arg0, IRubyObject arg1, int jflag) {
  4318. Ruby runtime = getRuntime();
  4319. RubyString padStr = arg1.convertToString();
  4320. ByteList pad = padStr.value;
  4321. Encoding enc = checkEncoding(padStr);
  4322. int padCharLen = padStr.strLength(enc);
  4323. if (pad.getRealSize() == 0 || padCharLen == 0) throw runtime.newArgumentError("zero width padding");
  4324. int width = RubyFixnum.num2int(arg0);
  4325. RubyString result = justifyCommon(runtime, pad,
  4326. padCharLen,
  4327. padStr.singleByteOptimizable(),
  4328. enc, width, jflag);
  4329. if (RubyFixnum.num2int(result.length19()) > RubyFixnum.num2int(length19())) result.infectBy(padStr);
  4330. int cr = codeRangeAnd(getCodeRange(), padStr.getCodeRange());
  4331. if (cr != CR_BROKEN) result.setCodeRange(cr);
  4332. return result;
  4333. }
  4334. private RubyString justifyCommon(Ruby runtime, ByteList pad, int padCharLen, boolean padSinglebyte, Encoding enc, int width, int jflag) {
  4335. int len = strLength(enc);
  4336. if (width < 0 || len >= width) return strDup(runtime);
  4337. int n = width - len;
  4338. int llen = (jflag == 'l') ? 0 : ((jflag == 'r') ? n : n / 2);
  4339. int rlen = n - llen;
  4340. int padP = pad.getBegin();
  4341. int padLen = pad.getRealSize();
  4342. byte padBytes[] = pad.getUnsafeBytes();
  4343. ByteList res = new ByteList(value.getRealSize() + n * padLen / padCharLen + 2);
  4344. int p = res.getBegin();
  4345. byte bytes[] = res.getUnsafeBytes();
  4346. while (llen > 0) {
  4347. if (padLen <= 1) {
  4348. bytes[p++] = padBytes[padP];
  4349. llen--;
  4350. } else if (llen > padCharLen) {
  4351. System.arraycopy(padBytes, padP, bytes, p, padLen);
  4352. p += padLen;
  4353. llen -= padCharLen;
  4354. } else {
  4355. int padPP = padSinglebyte ? padP + llen : StringSupport.nth(enc, padBytes, padP, padP + padLen, llen);
  4356. n = padPP - padP;
  4357. System.arraycopy(padBytes, padP, bytes, p, n);
  4358. p += n;
  4359. break;
  4360. }
  4361. }
  4362. System.arraycopy(value.getUnsafeBytes(), value.getBegin(), bytes, p, value.getRealSize());
  4363. p += value.getRealSize();
  4364. while (rlen > 0) {
  4365. if (padLen <= 1) {
  4366. bytes[p++] = padBytes[padP];
  4367. rlen--;
  4368. } else if (rlen > padCharLen) {
  4369. System.arraycopy(padBytes, padP, bytes, p, padLen);
  4370. p += padLen;
  4371. rlen -= padCharLen;
  4372. } else {
  4373. int padPP = padSinglebyte ? padP + rlen : StringSupport.nth(enc, padBytes, padP, padP + padLen, rlen);
  4374. n = padPP - padP;
  4375. System.arraycopy(padBytes, padP, bytes, p, n);
  4376. p += n;
  4377. break;
  4378. }
  4379. }
  4380. res.setRealSize(p);
  4381. RubyString result = new RubyString(runtime, getMetaClass(), res);
  4382. if ((!runtime.is1_9()) && (RubyFixnum.num2int(result.length()) > RubyFixnum.num2int(length())) ||
  4383. (runtime.is1_9() && (RubyFixnum.num2int(result.length19()) > RubyFixnum.num2int(length19())))) {
  4384. result.infectBy(this);
  4385. }
  4386. result.associateEncoding(enc);
  4387. return result;
  4388. }
  4389. /** rb_str_ljust
  4390. *
  4391. */
  4392. @JRubyMethod(name = "ljust", compat = RUBY1_8)
  4393. public IRubyObject ljust(IRubyObject arg0) {
  4394. return justify(arg0, 'l');
  4395. }
  4396. @JRubyMethod(name = "ljust", compat = RUBY1_8)
  4397. public IRubyObject ljust(IRubyObject arg0, IRubyObject arg1) {
  4398. return justify(arg0, arg1, 'l');
  4399. }
  4400. @JRubyMethod(name = "ljust", compat = RUBY1_9)
  4401. public IRubyObject ljust19(IRubyObject arg0) {
  4402. return justify19(arg0, 'l');
  4403. }
  4404. @JRubyMethod(name = "ljust", compat = RUBY1_9)
  4405. public IRubyObject ljust19(IRubyObject arg0, IRubyObject arg1) {
  4406. return justify19(arg0, arg1, 'l');
  4407. }
  4408. /** rb_str_rjust
  4409. *
  4410. */
  4411. @JRubyMethod(name = "rjust", compat = RUBY1_8)
  4412. public IRubyObject rjust(IRubyObject arg0) {
  4413. return justify(arg0, 'r');
  4414. }
  4415. @JRubyMethod(name = "rjust", compat = RUBY1_8)
  4416. public IRubyObject rjust(IRubyObject arg0, IRubyObject arg1) {
  4417. return justify(arg0, arg1, 'r');
  4418. }
  4419. @JRubyMethod(name = "rjust", compat = RUBY1_9)
  4420. public IRubyObject rjust19(IRubyObject arg0) {
  4421. return justify19(arg0, 'r');
  4422. }
  4423. @JRubyMethod(name = "rjust", compat = RUBY1_9)
  4424. public IRubyObject rjust19(IRubyObject arg0, IRubyObject arg1) {
  4425. return justify19(arg0, arg1, 'r');
  4426. }
  4427. /** rb_str_center
  4428. *
  4429. */
  4430. @JRubyMethod(compat = RUBY1_8)
  4431. public IRubyObject center(IRubyObject arg0) {
  4432. return justify(arg0, 'c');
  4433. }
  4434. @JRubyMethod(compat = RUBY1_8)
  4435. public IRubyObject center(IRubyObject arg0, IRubyObject arg1) {
  4436. return justify(arg0, arg1, 'c');
  4437. }
  4438. @JRubyMethod(name = "center", compat = RUBY1_9)
  4439. public IRubyObject center19(IRubyObject arg0) {
  4440. return justify19(arg0, 'c');
  4441. }
  4442. @JRubyMethod(name = "center", compat = RUBY1_9)
  4443. public IRubyObject center19(IRubyObject arg0, IRubyObject arg1) {
  4444. return justify19(arg0, arg1, 'c');
  4445. }
  4446. @JRubyMethod
  4447. public IRubyObject partition(ThreadContext context, Block block) {
  4448. return RubyEnumerable.partition(context, this, block);
  4449. }
  4450. @JRubyMethod
  4451. public IRubyObject partition(ThreadContext context, IRubyObject arg, Block block) {
  4452. Ruby runtime = context.getRuntime();
  4453. final int pos;
  4454. final RubyString sep;
  4455. if (arg instanceof RubyRegexp) {
  4456. RubyRegexp regex = (RubyRegexp)arg;
  4457. pos = regex.search19(context, this, 0, false);
  4458. if (pos < 0) return partitionMismatch(runtime);
  4459. sep = (RubyString)subpat19(runtime, context, regex);
  4460. if (pos == 0 && sep.value.getRealSize() == 0) return partitionMismatch(runtime);
  4461. } else {
  4462. IRubyObject tmp = arg.checkStringType();
  4463. if (tmp.isNil()) throw runtime.newTypeError("type mismatch: " + arg.getMetaClass().getName() + " given");
  4464. sep = (RubyString)tmp;
  4465. pos = strIndex19(sep, 0);
  4466. if (pos < 0) return partitionMismatch(runtime);
  4467. }
  4468. return RubyArray.newArray(runtime, new IRubyObject[]{
  4469. makeShared19(runtime, 0, pos),
  4470. sep,
  4471. makeShared19(runtime, pos + sep.value.getRealSize(), value.getRealSize() - pos - sep.value.getRealSize())});
  4472. }
  4473. private IRubyObject partitionMismatch(Ruby runtime) {
  4474. return RubyArray.newArray(runtime, new IRubyObject[]{this, newEmptyString(runtime), newEmptyString(runtime)});
  4475. }
  4476. @JRubyMethod(name = "rpartition")
  4477. public IRubyObject rpartition(ThreadContext context, IRubyObject arg) {
  4478. Ruby runtime = context.getRuntime();
  4479. final int pos;
  4480. final RubyString sep;
  4481. if (arg instanceof RubyRegexp) {
  4482. RubyRegexp regex = (RubyRegexp)arg;
  4483. pos = regex.search19(context, this, value.getRealSize(), true);
  4484. if (pos < 0) return rpartitionMismatch(runtime);
  4485. sep = (RubyString)RubyRegexp.nth_match(0, context.getCurrentScope().getBackRef(runtime));
  4486. } else {
  4487. IRubyObject tmp = arg.checkStringType();
  4488. if (tmp.isNil()) throw runtime.newTypeError("type mismatch: " + arg.getMetaClass().getName() + " given");
  4489. sep = (RubyString)tmp;
  4490. pos = strRindex19(sep, subLength(value.getRealSize()));
  4491. if (pos < 0) return rpartitionMismatch(runtime);
  4492. }
  4493. return RubyArray.newArray(runtime, new IRubyObject[]{
  4494. substr19(runtime, 0, pos),
  4495. sep,
  4496. substr19(runtime, pos + sep.strLength(), value.getRealSize())});
  4497. }
  4498. private IRubyObject rpartitionMismatch(Ruby runtime) {
  4499. return RubyArray.newArray(runtime, new IRubyObject[]{newEmptyString(runtime), newEmptyString(runtime), this});
  4500. }
  4501. /** rb_str_chop / rb_str_chop_bang
  4502. *
  4503. */
  4504. @JRubyMethod(name = "chop", compat = RUBY1_8)
  4505. public IRubyObject chop(ThreadContext context) {
  4506. if (value.getRealSize() == 0) return newEmptyString(context.getRuntime(), getMetaClass()).infectBy(this);
  4507. return makeShared(context.getRuntime(), 0, choppedLength());
  4508. }
  4509. @JRubyMethod(name = "chop!", compat = RUBY1_8)
  4510. public IRubyObject chop_bang(ThreadContext context) {
  4511. if (value.getRealSize() == 0) return context.getRuntime().getNil();
  4512. view(0, choppedLength());
  4513. return this;
  4514. }
  4515. private int choppedLength() {
  4516. int end = value.getRealSize() - 1;
  4517. if ((value.getUnsafeBytes()[value.getBegin() + end]) == '\n') {
  4518. if (end > 0 && (value.getUnsafeBytes()[value.getBegin() + end - 1]) == '\r') end--;
  4519. }
  4520. return end;
  4521. }
  4522. @JRubyMethod(name = "chop", compat = RUBY1_9)
  4523. public IRubyObject chop19(ThreadContext context) {
  4524. Ruby runtime = context.getRuntime();
  4525. if (value.getRealSize() == 0) return newEmptyString(runtime, getMetaClass(), value.getEncoding()).infectBy(this);
  4526. return makeShared19(runtime, 0, choppedLength19(runtime));
  4527. }
  4528. @JRubyMethod(name = "chop!", compat = RUBY1_9)
  4529. public IRubyObject chop_bang19(ThreadContext context) {
  4530. modifyCheck();
  4531. Ruby runtime = context.getRuntime();
  4532. if (value.getRealSize() == 0) return runtime.getNil();
  4533. keepCodeRange();
  4534. view(0, choppedLength19(runtime));
  4535. return this;
  4536. }
  4537. private int choppedLength19(Ruby runtime) {
  4538. int p = value.getBegin();
  4539. int end = p + value.getRealSize();
  4540. if (p > end) return 0;
  4541. byte bytes[] = value.getUnsafeBytes();
  4542. Encoding enc = value.getEncoding();
  4543. int s = enc.prevCharHead(bytes, p, end, end);
  4544. if (s == -1) return 0;
  4545. if (s > p && codePoint(runtime, enc, bytes, s, end) == '\n') {
  4546. int s2 = enc.prevCharHead(bytes, p, s, end);
  4547. if (s2 != -1 && codePoint(runtime, enc, bytes, s2, end) == '\r') s = s2;
  4548. }
  4549. return s - p;
  4550. }
  4551. /** rb_str_chop
  4552. *
  4553. */
  4554. @JRubyMethod(name = "chomp", compat = RUBY1_8)
  4555. public RubyString chomp(ThreadContext context) {
  4556. RubyString str = strDup(context.getRuntime());
  4557. str.chomp_bang(context);
  4558. return str;
  4559. }
  4560. @JRubyMethod(name = "chomp", compat = RUBY1_8)
  4561. public RubyString chomp(ThreadContext context, IRubyObject arg0) {
  4562. RubyString str = strDup(context.getRuntime());
  4563. str.chomp_bang(context, arg0);
  4564. return str;
  4565. }
  4566. /**
  4567. * rb_str_chomp_bang
  4568. *
  4569. * In the common case, removes CR and LF characters in various ways depending on the value of
  4570. * the optional args[0].
  4571. * If args.length==0 removes one instance of CR, CRLF or LF from the end of the string.
  4572. * If args.length>0 and args[0] is "\n" then same behaviour as args.length==0 .
  4573. * If args.length>0 and args[0] is "" then removes trailing multiple LF or CRLF (but no CRs at
  4574. * all(!)).
  4575. */
  4576. @JRubyMethod(name = "chomp!", compat = RUBY1_8)
  4577. public IRubyObject chomp_bang(ThreadContext context) {
  4578. Ruby runtime = context.getRuntime();
  4579. if (value.getRealSize() == 0) return runtime.getNil();
  4580. IRubyObject rsObj = runtime.getGlobalVariables().get("$/");
  4581. if (rsObj == runtime.getGlobalVariables().getDefaultSeparator()) return smartChopBangCommon(runtime);
  4582. return chompBangCommon(runtime, rsObj);
  4583. }
  4584. @JRubyMethod(name = "chomp!", compat = RUBY1_8)
  4585. public IRubyObject chomp_bang(ThreadContext context, IRubyObject arg0) {
  4586. Ruby runtime = context.getRuntime();
  4587. if (value.getRealSize() == 0) return runtime.getNil();
  4588. return chompBangCommon(runtime, arg0);
  4589. }
  4590. private IRubyObject chompBangCommon(Ruby runtime, IRubyObject rsObj) {
  4591. if (rsObj.isNil()) return rsObj;
  4592. RubyString rs = rsObj.convertToString();
  4593. int p = value.getBegin();
  4594. int len = value.getRealSize();
  4595. byte[] bytes = value.getUnsafeBytes();
  4596. int rslen = rs.value.getRealSize();
  4597. if (rslen == 0) {
  4598. while (len > 0 && bytes[p + len - 1] == (byte)'\n') {
  4599. len--;
  4600. if (len > 0 && bytes[p + len - 1] == (byte)'\r') len--;
  4601. }
  4602. if (len < value.getRealSize()) {
  4603. view(0, len);
  4604. return this;
  4605. }
  4606. return runtime.getNil();
  4607. }
  4608. if (rslen > len) return runtime.getNil();
  4609. byte newline = rs.value.getUnsafeBytes()[rslen - 1];
  4610. if (rslen == 1 && newline == (byte)'\n') return smartChopBangCommon(runtime);
  4611. if (bytes[p + len - 1] == newline && rslen <= 1 || value.endsWith(rs.value)) {
  4612. view(0, value.getRealSize() - rslen);
  4613. return this;
  4614. }
  4615. return runtime.getNil();
  4616. }
  4617. private IRubyObject smartChopBangCommon(Ruby runtime) {
  4618. int len = value.getRealSize();
  4619. int p = value.getBegin();
  4620. byte[]bytes = value.getUnsafeBytes();
  4621. if (bytes[p + len - 1] == (byte)'\n') {
  4622. len--;
  4623. if (len > 0 && bytes[p + len - 1] == (byte)'\r') len--;
  4624. view(0, len);
  4625. } else if (bytes[p + len - 1] == (byte)'\r') {
  4626. len--;
  4627. view(0, len);
  4628. } else {
  4629. modifyCheck();
  4630. return runtime.getNil();
  4631. }
  4632. return this;
  4633. }
  4634. @JRubyMethod(name = "chomp", compat = RUBY1_9)
  4635. public RubyString chomp19(ThreadContext context) {
  4636. RubyString str = strDup(context.getRuntime());
  4637. str.chomp_bang19(context);
  4638. return str;
  4639. }
  4640. @JRubyMethod(name = "chomp", compat = RUBY1_9)
  4641. public RubyString chomp19(ThreadContext context, IRubyObject arg0) {
  4642. RubyString str = strDup(context.getRuntime());
  4643. str.chomp_bang19(context, arg0);
  4644. return str;
  4645. }
  4646. @JRubyMethod(name = "chomp!", compat = RUBY1_9)
  4647. public IRubyObject chomp_bang19(ThreadContext context) {
  4648. Ruby runtime = context.getRuntime();
  4649. if (value.getRealSize() == 0) return runtime.getNil();
  4650. IRubyObject rsObj = runtime.getGlobalVariables().get("$/");
  4651. if (rsObj == runtime.getGlobalVariables().getDefaultSeparator()) return smartChopBangCommon19(runtime);
  4652. return chompBangCommon19(runtime, rsObj);
  4653. }
  4654. @JRubyMethod(name = "chomp!", compat = RUBY1_9)
  4655. public IRubyObject chomp_bang19(ThreadContext context, IRubyObject arg0) {
  4656. modifyCheck();
  4657. Ruby runtime = context.getRuntime();
  4658. if (value.getRealSize() == 0) return runtime.getNil();
  4659. return chompBangCommon19(runtime, arg0);
  4660. }
  4661. private IRubyObject chompBangCommon19(Ruby runtime, IRubyObject rsObj) {
  4662. if (rsObj.isNil()) return rsObj;
  4663. RubyString rs = rsObj.convertToString();
  4664. int p = value.getBegin();
  4665. int len = value.getRealSize();
  4666. int end = p + len;
  4667. byte[] bytes = value.getUnsafeBytes();
  4668. int rslen = rs.value.getRealSize();
  4669. if (rslen == 0) {
  4670. while (len > 0 && bytes[p + len - 1] == (byte)'\n') {
  4671. len--;
  4672. if (len > 0 && bytes[p + len - 1] == (byte)'\r') len--;
  4673. }
  4674. if (len < value.getRealSize()) {
  4675. keepCodeRange();
  4676. view(0, len);
  4677. return this;
  4678. }
  4679. return runtime.getNil();
  4680. }
  4681. if (rslen > len) return runtime.getNil();
  4682. byte newline = rs.value.getUnsafeBytes()[rslen - 1];
  4683. if (rslen == 1 && newline == (byte)'\n') return smartChopBangCommon19(runtime);
  4684. Encoding enc = checkEncoding(rs);
  4685. if (rs.scanForCodeRange() == CR_BROKEN) return runtime.getNil();
  4686. int pp = end - rslen;
  4687. if (bytes[p + len - 1] == newline && rslen <= 1 || value.endsWith(rs.value)) {
  4688. if (enc.leftAdjustCharHead(bytes, p, pp, end) != pp) return runtime.getNil();
  4689. if (getCodeRange() != CR_7BIT) clearCodeRange();
  4690. view(0, value.getRealSize() - rslen);
  4691. return this;
  4692. }
  4693. return runtime.getNil();
  4694. }
  4695. private IRubyObject smartChopBangCommon19(Ruby runtime) {
  4696. final int p = value.getBegin();
  4697. int len = value.getRealSize();
  4698. int end = p + len;
  4699. byte bytes[] = value.getUnsafeBytes();
  4700. Encoding enc = value.getEncoding();
  4701. keepCodeRange();
  4702. if (enc.minLength() > 1) {
  4703. int pp = enc.leftAdjustCharHead(bytes, p, end - enc.minLength(), end);
  4704. if (enc.isNewLine(bytes, pp, end)) end = pp;
  4705. pp = end - enc.minLength();
  4706. if (pp >= p) {
  4707. pp = enc.leftAdjustCharHead(bytes, p, pp, end);
  4708. if (StringSupport.preciseLength(enc, bytes, pp, end) > 0 &&
  4709. enc.mbcToCode(bytes, pp, end) == '\r') end = pp;
  4710. }
  4711. if (end == p + value.getRealSize()) {
  4712. modifyCheck();
  4713. return runtime.getNil();
  4714. }
  4715. len = end - p;
  4716. view(0, len);
  4717. } else {
  4718. if (bytes[p + len - 1] == (byte)'\n') {
  4719. len--;
  4720. if (len > 0 && bytes[p + len - 1] == (byte)'\r') len--;
  4721. view(0, len);
  4722. } else if (bytes[p + len - 1] == (byte)'\r') {
  4723. len--;
  4724. view(0, len);
  4725. } else {
  4726. modifyCheck();
  4727. return runtime.getNil();
  4728. }
  4729. }
  4730. return this;
  4731. }
  4732. /** rb_str_lstrip / rb_str_lstrip_bang
  4733. *
  4734. */
  4735. @JRubyMethod(name = "lstrip", compat = RUBY1_8)
  4736. public IRubyObject lstrip(ThreadContext context) {
  4737. RubyString str = strDup(context.getRuntime());
  4738. str.lstrip_bang(context);
  4739. return str;
  4740. }
  4741. @JRubyMethod(name = "lstrip!", compat = RUBY1_8)
  4742. public IRubyObject lstrip_bang(ThreadContext context) {
  4743. Ruby runtime = context.getRuntime();
  4744. if (value.getRealSize() == 0) return runtime.getNil();
  4745. return singleByteLStrip(runtime, ASCII, value.getUnsafeBytes(), value.getBegin(), value.getBegin() + value.getRealSize());
  4746. }
  4747. @JRubyMethod(name = "lstrip", compat = RUBY1_9)
  4748. public IRubyObject lstrip19(ThreadContext context) {
  4749. RubyString str = strDup(context.getRuntime());
  4750. str.lstrip_bang19(context);
  4751. return str;
  4752. }
  4753. @JRubyMethod(name = "lstrip!", compat = RUBY1_9)
  4754. public IRubyObject lstrip_bang19(ThreadContext context) {
  4755. modifyCheck();
  4756. Ruby runtime = context.getRuntime();
  4757. if (value.getRealSize() == 0) {
  4758. modifyCheck();
  4759. return runtime.getNil();
  4760. }
  4761. Encoding enc = value.getEncoding();
  4762. int s = value.getBegin();
  4763. int end = s + value.getRealSize();
  4764. byte[]bytes = value.getUnsafeBytes();
  4765. final IRubyObject result;
  4766. if (singleByteOptimizable(enc)) {
  4767. result = singleByteLStrip(runtime, enc, bytes, s, end);
  4768. } else {
  4769. result = multiByteLStrip(runtime, enc, bytes, s, end);
  4770. }
  4771. keepCodeRange();
  4772. return result;
  4773. }
  4774. private IRubyObject singleByteLStrip(Ruby runtime, Encoding enc, byte[]bytes, int s, int end) {
  4775. int p = s;
  4776. while (p < end && enc.isSpace(bytes[p] & 0xff)) p++;
  4777. if (p > s) {
  4778. view(p - s, end - p);
  4779. return this;
  4780. }
  4781. return runtime.getNil();
  4782. }
  4783. private IRubyObject multiByteLStrip(Ruby runtime, Encoding enc, byte[]bytes, int s, int end) {
  4784. int p = s;
  4785. int c;
  4786. while (p < end && enc.isSpace(c = codePoint(runtime, enc, bytes, p, end))) p += codeLength(runtime, enc, c);
  4787. if (p > s) {
  4788. view(p - s, end - p);
  4789. return this;
  4790. }
  4791. return runtime.getNil();
  4792. }
  4793. /** rb_str_rstrip / rb_str_rstrip_bang
  4794. *
  4795. */
  4796. @JRubyMethod(name = "rstrip", compat = RUBY1_8)
  4797. public IRubyObject rstrip(ThreadContext context) {
  4798. RubyString str = strDup(context.getRuntime());
  4799. str.rstrip_bang(context);
  4800. return str;
  4801. }
  4802. @JRubyMethod(name = "rstrip!", compat = RUBY1_8)
  4803. public IRubyObject rstrip_bang(ThreadContext context) {
  4804. Ruby runtime = context.getRuntime();
  4805. if (value.getRealSize() == 0) return runtime.getNil();
  4806. return singleByteRStrip(runtime, ASCII, value.getUnsafeBytes(), value.getBegin(), value.getBegin() + value.getRealSize());
  4807. }
  4808. @JRubyMethod(name = "rstrip", compat = RUBY1_9)
  4809. public IRubyObject rstrip19(ThreadContext context) {
  4810. RubyString str = strDup(context.getRuntime());
  4811. str.rstrip_bang19(context);
  4812. return str;
  4813. }
  4814. @JRubyMethod(name = "rstrip!", compat = RUBY1_9)
  4815. public IRubyObject rstrip_bang19(ThreadContext context) {
  4816. modifyCheck();
  4817. Ruby runtime = context.getRuntime();
  4818. if (value.getRealSize() == 0) {
  4819. modifyCheck();
  4820. return runtime.getNil();
  4821. }
  4822. Encoding enc = value.getEncoding();
  4823. int s = value.getBegin();
  4824. int end = s + value.getRealSize();
  4825. byte[]bytes = value.getUnsafeBytes();
  4826. final IRubyObject result;
  4827. if (singleByteOptimizable(enc)) {
  4828. result = singleByteRStrip(runtime, enc, bytes, s, end);
  4829. } else {
  4830. result = multiByteRStrip(runtime, enc, bytes, s, end);
  4831. }
  4832. keepCodeRange();
  4833. return result;
  4834. }
  4835. private IRubyObject singleByteRStrip2(Ruby runtime, Encoding enc, byte[]bytes, int s, int end) {
  4836. int endp = end;
  4837. while (endp - 1 >= s && bytes[endp - 1] == 0) endp--;
  4838. while (endp - 1 >= s && enc.isSpace(bytes[endp - 1] & 0xff)) endp--;
  4839. if (endp < end) {
  4840. view(0, endp - s);
  4841. return this;
  4842. }
  4843. return runtime.getNil();
  4844. }
  4845. private IRubyObject singleByteRStrip(Ruby runtime, Encoding enc, byte[]bytes, int s, int end) {
  4846. int endp = end - 1;
  4847. while (endp >= s && bytes[endp] == 0) endp--;
  4848. while (endp >= s && enc.isSpace(bytes[endp] & 0xff)) endp--;
  4849. if (endp < end - 1) {
  4850. view(0, endp - s + 1);
  4851. return this;
  4852. }
  4853. return runtime.getNil();
  4854. }
  4855. private IRubyObject multiByteRStrip(Ruby runtime, Encoding enc, byte[]bytes, int s, int end) {
  4856. int endp = end;
  4857. int prev;
  4858. while ((prev = enc.prevCharHead(bytes, s, endp, end)) != -1) {
  4859. if (!enc.isSpace(codePoint(runtime, enc, bytes, prev, end))) break;
  4860. endp = prev;
  4861. }
  4862. if (prev < end) {
  4863. view(0, prev - s + 1);
  4864. return this;
  4865. }
  4866. return runtime.getNil();
  4867. }
  4868. /** rb_str_strip / rb_str_strip_bang
  4869. *
  4870. */
  4871. @JRubyMethod(name = "strip", compat = RUBY1_8)
  4872. public IRubyObject strip(ThreadContext context) {
  4873. RubyString str = strDup(context.getRuntime());
  4874. str.strip_bang(context);
  4875. return str;
  4876. }
  4877. @JRubyMethod(name = "strip!", compat = RUBY1_8)
  4878. public IRubyObject strip_bang(ThreadContext context) {
  4879. Ruby runtime = context.getRuntime();
  4880. if (value.getRealSize() == 0) return runtime.getNil();
  4881. return singleByteStrip(runtime, ASCII, value.getUnsafeBytes(), value.getBegin(), value.getBegin() + value.getRealSize());
  4882. }
  4883. @JRubyMethod(name = "strip", compat = RUBY1_9)
  4884. public IRubyObject strip19(ThreadContext context) {
  4885. RubyString str = strDup(context.getRuntime());
  4886. str.strip_bang19(context);
  4887. return str;
  4888. }
  4889. @JRubyMethod(name = "strip!", compat = RUBY1_9)
  4890. public IRubyObject strip_bang19(ThreadContext context) {
  4891. Ruby runtime = context.getRuntime();
  4892. modifyCheck();
  4893. Encoding enc = value.getEncoding();
  4894. int s = value.getBegin();
  4895. int end = s + value.getRealSize();
  4896. byte[]bytes = value.getUnsafeBytes();
  4897. final IRubyObject result;
  4898. if (singleByteOptimizable(enc)) {
  4899. result = singleByteStrip(runtime, enc, bytes, s, end);
  4900. } else {
  4901. result = multiByteStrip(runtime, enc, bytes, s, end);
  4902. }
  4903. keepCodeRange();
  4904. return result;
  4905. }
  4906. private IRubyObject singleByteStrip(Ruby runtime, Encoding enc, byte[]bytes, int s, int end) {
  4907. int p = s;
  4908. while (p < end && enc.isSpace(bytes[p] & 0xff)) p++;
  4909. int endp = end - 1;
  4910. while (endp >= p && bytes[endp] == 0) endp--;
  4911. while (endp >= p && enc.isSpace(bytes[endp] & 0xff)) endp--;
  4912. if (p > s || endp < end - 1) {
  4913. view(p - s, endp - p + 1);
  4914. return this;
  4915. }
  4916. return runtime.getNil();
  4917. }
  4918. private IRubyObject multiByteStrip(Ruby runtime, Encoding enc, byte[]bytes, int s, int end) {
  4919. int p = s;
  4920. int c;
  4921. while (p < end && enc.isSpace(c = codePoint(runtime, enc, bytes, p, end))) p += codeLength(runtime, enc, c);
  4922. int endp = end;
  4923. int prev;
  4924. while ((prev = enc.prevCharHead(bytes, s, endp, end)) != -1) {
  4925. if (!enc.isSpace(codePoint(runtime, enc, bytes, prev, end))) break;
  4926. endp = prev;
  4927. }
  4928. if (p > s || prev < end) {
  4929. view(p - s, endp - p);
  4930. return this;
  4931. }
  4932. return runtime.getNil();
  4933. }
  4934. /** rb_str_count
  4935. *
  4936. */
  4937. @JRubyMethod(name = "count", compat = RUBY1_8)
  4938. public IRubyObject count(ThreadContext context) {
  4939. throw context.getRuntime().newArgumentError("wrong number of arguments");
  4940. }
  4941. @JRubyMethod(name = "count", compat = RUBY1_8)
  4942. public IRubyObject count(ThreadContext context, IRubyObject arg) {
  4943. final boolean[]table = new boolean[TRANS_SIZE];
  4944. arg.convertToString().trSetupTable(table, true);
  4945. return countCommon(context.getRuntime(), table);
  4946. }
  4947. @JRubyMethod(name = "count", required = 1, rest = true, compat = RUBY1_8)
  4948. public IRubyObject count(ThreadContext context, IRubyObject[] args) {
  4949. Ruby runtime = context.getRuntime();
  4950. if (value.getRealSize() == 0) return RubyFixnum.zero(runtime);
  4951. final boolean[]table = new boolean[TRANS_SIZE];
  4952. args[0].convertToString().trSetupTable(table, true);
  4953. for (int i = 1; i<args.length; i++) {
  4954. args[i].convertToString().trSetupTable(table, false);
  4955. }
  4956. return countCommon(runtime, table);
  4957. }
  4958. private IRubyObject countCommon(Ruby runtime, boolean[]table) {
  4959. int i = 0;
  4960. byte[]bytes = value.getUnsafeBytes();
  4961. int p = value.getBegin();
  4962. int end = p + value.getRealSize();
  4963. while (p < end) if (table[bytes[p++] & 0xff]) i++;
  4964. return runtime.newFixnum(i);
  4965. }
  4966. @JRubyMethod(name = "count", compat = RUBY1_9)
  4967. public IRubyObject count19(ThreadContext context) {
  4968. throw context.getRuntime().newArgumentError("wrong number of arguments");
  4969. }
  4970. @JRubyMethod(name = "count", compat = RUBY1_9)
  4971. public IRubyObject count19(ThreadContext context, IRubyObject arg) {
  4972. Ruby runtime = context.getRuntime();
  4973. if (value.getRealSize() == 0) return RubyFixnum.zero(runtime);
  4974. RubyString otherStr = arg.convertToString();
  4975. Encoding enc = checkEncoding(otherStr);
  4976. final boolean[]table = new boolean[TRANS_SIZE];
  4977. TrTables tables = otherStr.trSetupTable(context.getRuntime(), table, null, true, enc);
  4978. return countCommon19(runtime, table, tables, enc);
  4979. }
  4980. @JRubyMethod(name = "count", required = 1, rest = true, compat = RUBY1_9)
  4981. public IRubyObject count19(ThreadContext context, IRubyObject[] args) {
  4982. Ruby runtime = context.getRuntime();
  4983. if (value.getRealSize() == 0) return RubyFixnum.zero(runtime);
  4984. RubyString otherStr = args[0].convertToString();
  4985. Encoding enc = checkEncoding(otherStr);
  4986. final boolean[]table = new boolean[TRANS_SIZE];
  4987. TrTables tables = otherStr.trSetupTable(runtime, table, null, true, enc);
  4988. for (int i = 1; i<args.length; i++) {
  4989. otherStr = args[i].convertToString();
  4990. enc = checkEncoding(otherStr);
  4991. tables = otherStr.trSetupTable(runtime, table, tables, false, enc);
  4992. }
  4993. return countCommon19(runtime, table, tables, enc);
  4994. }
  4995. private IRubyObject countCommon19(Ruby runtime, boolean[]table, TrTables tables, Encoding enc) {
  4996. int i = 0;
  4997. byte[]bytes = value.getUnsafeBytes();
  4998. int p = value.getBegin();
  4999. int end = p + value.getRealSize();
  5000. int c;
  5001. while (p < end) {
  5002. if (enc.isAsciiCompatible() && Encoding.isAscii(c = bytes[p] & 0xff)) {
  5003. if (table[c]) i++;
  5004. p++;
  5005. } else {
  5006. c = codePoint(runtime, enc, bytes, p, end);
  5007. int cl = codeLength(runtime, enc, c);
  5008. if (trFind(c, table, tables)) i++;
  5009. p += cl;
  5010. }
  5011. }
  5012. return runtime.newFixnum(i);
  5013. }
  5014. /** rb_str_delete / rb_str_delete_bang
  5015. *
  5016. */
  5017. @JRubyMethod(name = "delete", compat = RUBY1_8)
  5018. public IRubyObject delete(ThreadContext context) {
  5019. throw context.getRuntime().newArgumentError("wrong number of arguments");
  5020. }
  5021. @JRubyMethod(name = "delete", compat = RUBY1_8)
  5022. public IRubyObject delete(ThreadContext context, IRubyObject arg) {
  5023. RubyString str = strDup(context.getRuntime());
  5024. str.delete_bang(context, arg);
  5025. return str;
  5026. }
  5027. @JRubyMethod(name = "delete", required = 1, rest = true, compat = RUBY1_8)
  5028. public IRubyObject delete(ThreadContext context, IRubyObject[] args) {
  5029. RubyString str = strDup(context.getRuntime());
  5030. str.delete_bang(context, args);
  5031. return str;
  5032. }
  5033. @JRubyMethod(name = "delete!", compat = RUBY1_8)
  5034. public IRubyObject delete_bang(ThreadContext context) {
  5035. throw context.getRuntime().newArgumentError("wrong number of arguments");
  5036. }
  5037. @JRubyMethod(name = "delete!", compat = RUBY1_8)
  5038. public IRubyObject delete_bang(ThreadContext context, IRubyObject arg) {
  5039. Ruby runtime = context.getRuntime();
  5040. if (value.getRealSize() == 0) return runtime.getNil();
  5041. final boolean[]squeeze = new boolean[TRANS_SIZE];
  5042. arg.convertToString().trSetupTable(squeeze, true);
  5043. return delete_bangCommon(runtime, squeeze);
  5044. }
  5045. @JRubyMethod(name = "delete!", required = 1, rest = true, compat = RUBY1_8)
  5046. public IRubyObject delete_bang(ThreadContext context, IRubyObject[] args) {
  5047. Ruby runtime = context.getRuntime();
  5048. if (value.getRealSize() == 0) return runtime.getNil();
  5049. boolean[]squeeze = new boolean[TRANS_SIZE];
  5050. args[0].convertToString().trSetupTable(squeeze, true);
  5051. for (int i=1; i<args.length; i++) {
  5052. args[i].convertToString().trSetupTable(squeeze, false);
  5053. }
  5054. return delete_bangCommon(runtime, squeeze);
  5055. }
  5056. private IRubyObject delete_bangCommon(Ruby runtime, boolean[]squeeze) {
  5057. modify();
  5058. int s = value.getBegin();
  5059. int t = s;
  5060. int send = s + value.getRealSize();
  5061. byte[]bytes = value.getUnsafeBytes();
  5062. boolean modify = false;
  5063. while (s < send) {
  5064. if (squeeze[bytes[s] & 0xff]) {
  5065. modify = true;
  5066. } else {
  5067. bytes[t++] = bytes[s];
  5068. }
  5069. s++;
  5070. }
  5071. value.setRealSize(t - value.getBegin());
  5072. return modify ? this : runtime.getNil();
  5073. }
  5074. @JRubyMethod(name = "delete", compat = RUBY1_9)
  5075. public IRubyObject delete19(ThreadContext context) {
  5076. throw context.getRuntime().newArgumentError("wrong number of arguments");
  5077. }
  5078. @JRubyMethod(name = "delete", compat = RUBY1_9)
  5079. public IRubyObject delete19(ThreadContext context, IRubyObject arg) {
  5080. RubyString str = strDup(context.getRuntime());
  5081. str.delete_bang19(context, arg);
  5082. return str;
  5083. }
  5084. @JRubyMethod(name = "delete", required = 1, rest = true, compat = RUBY1_9)
  5085. public IRubyObject delete19(ThreadContext context, IRubyObject[] args) {
  5086. RubyString str = strDup(context.getRuntime());
  5087. str.delete_bang19(context, args);
  5088. return str;
  5089. }
  5090. @JRubyMethod(name = "delete!", compat = RUBY1_9)
  5091. public IRubyObject delete_bang19(ThreadContext context) {
  5092. throw context.getRuntime().newArgumentError("wrong number of arguments");
  5093. }
  5094. @JRubyMethod(name = "delete!", compat = RUBY1_9)
  5095. public IRubyObject delete_bang19(ThreadContext context, IRubyObject arg) {
  5096. Ruby runtime = context.getRuntime();
  5097. if (value.getRealSize() == 0) return runtime.getNil();
  5098. RubyString otherStr = arg.convertToString();
  5099. Encoding enc = checkEncoding(otherStr);
  5100. final boolean[]squeeze = new boolean[TRANS_SIZE];
  5101. TrTables tables = otherStr.trSetupTable(runtime, squeeze, null, true, enc);
  5102. return delete_bangCommon19(runtime, squeeze, tables, enc);
  5103. }
  5104. @JRubyMethod(name = "delete!", required = 1, rest = true, compat = RUBY1_9)
  5105. public IRubyObject delete_bang19(ThreadContext context, IRubyObject[] args) {
  5106. Ruby runtime = context.getRuntime();
  5107. if (value.getRealSize() == 0) return runtime.getNil();
  5108. RubyString otherStr = args[0].convertToString();
  5109. Encoding enc = checkEncoding(otherStr);
  5110. boolean[]squeeze = new boolean[TRANS_SIZE];
  5111. TrTables tables = otherStr.trSetupTable(runtime, squeeze, null, true, enc);
  5112. for (int i=1; i<args.length; i++) {
  5113. otherStr = args[i].convertToString();
  5114. enc = checkEncoding(otherStr);
  5115. tables = otherStr.trSetupTable(runtime, squeeze, tables, false, enc);
  5116. }
  5117. return delete_bangCommon19(runtime, squeeze, tables, enc);
  5118. }
  5119. private IRubyObject delete_bangCommon19(Ruby runtime, boolean[]squeeze, TrTables tables, Encoding enc) {
  5120. modifyAndKeepCodeRange();
  5121. int s = value.getBegin();
  5122. int t = s;
  5123. int send = s + value.getRealSize();
  5124. byte[]bytes = value.getUnsafeBytes();
  5125. boolean modify = false;
  5126. boolean asciiCompatible = enc.isAsciiCompatible();
  5127. int cr = asciiCompatible ? CR_7BIT : CR_VALID;
  5128. while (s < send) {
  5129. int c;
  5130. if (asciiCompatible && Encoding.isAscii(c = bytes[s] & 0xff)) {
  5131. if (squeeze[c]) {
  5132. modify = true;
  5133. } else {
  5134. if (t != s) bytes[t] = (byte)c;
  5135. t++;
  5136. }
  5137. s++;
  5138. } else {
  5139. c = codePoint(runtime, enc, bytes, s, send);
  5140. int cl = codeLength(runtime, enc, c);
  5141. if (trFind(c, squeeze, tables)) {
  5142. modify = true;
  5143. } else {
  5144. if (t != s) enc.codeToMbc(c, bytes, t);
  5145. t += cl;
  5146. if (cr == CR_7BIT) cr = CR_VALID;
  5147. }
  5148. s += cl;
  5149. }
  5150. }
  5151. value.setRealSize(t - value.getBegin());
  5152. setCodeRange(cr);
  5153. return modify ? this : runtime.getNil();
  5154. }
  5155. /** rb_str_squeeze / rb_str_squeeze_bang
  5156. *
  5157. */
  5158. @JRubyMethod(name = "squeeze", compat = RUBY1_8)
  5159. public IRubyObject squeeze(ThreadContext context) {
  5160. RubyString str = strDup(context.getRuntime());
  5161. str.squeeze_bang(context);
  5162. return str;
  5163. }
  5164. @JRubyMethod(name = "squeeze", compat = RUBY1_8)
  5165. public IRubyObject squeeze(ThreadContext context, IRubyObject arg) {
  5166. RubyString str = strDup(context.getRuntime());
  5167. str.squeeze_bang(context, arg);
  5168. return str;
  5169. }
  5170. @JRubyMethod(name = "squeeze", rest = true, compat = RUBY1_8)
  5171. public IRubyObject squeeze(ThreadContext context, IRubyObject[] args) {
  5172. RubyString str = strDup(context.getRuntime());
  5173. str.squeeze_bang(context, args);
  5174. return str;
  5175. }
  5176. @JRubyMethod(name = "squeeze!", compat = RUBY1_8)
  5177. public IRubyObject squeeze_bang(ThreadContext context) {
  5178. Ruby runtime = context.getRuntime();
  5179. if (value.getRealSize() == 0) {
  5180. modifyCheck();
  5181. return runtime.getNil();
  5182. }
  5183. final boolean squeeze[] = new boolean[TRANS_SIZE];
  5184. for (int i=0; i<TRANS_SIZE; i++) squeeze[i] = true;
  5185. modify();
  5186. return squeezeCommon(runtime, squeeze);
  5187. }
  5188. @JRubyMethod(name = "squeeze!", compat = RUBY1_8)
  5189. public IRubyObject squeeze_bang(ThreadContext context, IRubyObject arg) {
  5190. Ruby runtime = context.getRuntime();
  5191. if (value.getRealSize() == 0) {
  5192. modifyCheck();
  5193. return runtime.getNil();
  5194. }
  5195. final boolean squeeze[] = new boolean[TRANS_SIZE];
  5196. arg.convertToString().trSetupTable(squeeze, true);
  5197. modify();
  5198. return squeezeCommon(runtime, squeeze);
  5199. }
  5200. @JRubyMethod(name = "squeeze!", rest = true, compat = RUBY1_8)
  5201. public IRubyObject squeeze_bang(ThreadContext context, IRubyObject[] args) {
  5202. Ruby runtime = context.getRuntime();
  5203. if (value.getRealSize() == 0) {
  5204. modifyCheck();
  5205. return runtime.getNil();
  5206. }
  5207. final boolean squeeze[] = new boolean[TRANS_SIZE];
  5208. args[0].convertToString().trSetupTable(squeeze, true);
  5209. for (int i=1; i<args.length; i++) {
  5210. args[i].convertToString().trSetupTable(squeeze, false);
  5211. }
  5212. modify();
  5213. return squeezeCommon(runtime, squeeze);
  5214. }
  5215. private IRubyObject squeezeCommon(Ruby runtime, boolean squeeze[]) {
  5216. int s = value.getBegin();
  5217. int t = s;
  5218. int send = s + value.getRealSize();
  5219. byte[]bytes = value.getUnsafeBytes();
  5220. int save = -1;
  5221. while (s < send) {
  5222. int c = bytes[s++] & 0xff;
  5223. if (c != save || !squeeze[c]) bytes[t++] = (byte)(save = c);
  5224. }
  5225. if (t - value.getBegin() != value.getRealSize()) { // modified
  5226. value.setRealSize(t - value.getBegin());
  5227. return this;
  5228. }
  5229. return runtime.getNil();
  5230. }
  5231. @JRubyMethod(name = "squeeze", compat = RUBY1_9)
  5232. public IRubyObject squeeze19(ThreadContext context) {
  5233. RubyString str = strDup(context.getRuntime());
  5234. str.squeeze_bang19(context);
  5235. return str;
  5236. }
  5237. @JRubyMethod(name = "squeeze", compat = RUBY1_9)
  5238. public IRubyObject squeeze19(ThreadContext context, IRubyObject arg) {
  5239. RubyString str = strDup(context.getRuntime());
  5240. str.squeeze_bang19(context, arg);
  5241. return str;
  5242. }
  5243. @JRubyMethod(name = "squeeze", rest = true, compat = RUBY1_9)
  5244. public IRubyObject squeeze19(ThreadContext context, IRubyObject[] args) {
  5245. RubyString str = strDup(context.getRuntime());
  5246. str.squeeze_bang19(context, args);
  5247. return str;
  5248. }
  5249. @JRubyMethod(name = "squeeze!", compat = RUBY1_9)
  5250. public IRubyObject squeeze_bang19(ThreadContext context) {
  5251. Ruby runtime = context.getRuntime();
  5252. if (value.getRealSize() == 0) {
  5253. modifyCheck();
  5254. return runtime.getNil();
  5255. }
  5256. final boolean squeeze[] = new boolean[TRANS_SIZE];
  5257. for (int i=0; i<TRANS_SIZE; i++) squeeze[i] = true;
  5258. modifyAndKeepCodeRange();
  5259. if (singleByteOptimizable()) {
  5260. return squeezeCommon(runtime, squeeze); // 1.8
  5261. } else {
  5262. return squeezeCommon19(runtime, squeeze, null, value.getEncoding(), false);
  5263. }
  5264. }
  5265. @JRubyMethod(name = "squeeze!", compat = RUBY1_9)
  5266. public IRubyObject squeeze_bang19(ThreadContext context, IRubyObject arg) {
  5267. Ruby runtime = context.getRuntime();
  5268. if (value.getRealSize() == 0) {
  5269. modifyCheck();
  5270. return runtime.getNil();
  5271. }
  5272. RubyString otherStr = arg.convertToString();
  5273. final boolean squeeze[] = new boolean[TRANS_SIZE];
  5274. TrTables tables = otherStr.trSetupTable(runtime, squeeze, null, true, checkEncoding(otherStr));
  5275. modifyAndKeepCodeRange();
  5276. if (singleByteOptimizable() && otherStr.singleByteOptimizable()) {
  5277. return squeezeCommon(runtime, squeeze); // 1.8
  5278. } else {
  5279. return squeezeCommon19(runtime, squeeze, tables, value.getEncoding(), true);
  5280. }
  5281. }
  5282. @JRubyMethod(name = "squeeze!", rest = true, compat = RUBY1_9)
  5283. public IRubyObject squeeze_bang19(ThreadContext context, IRubyObject[] args) {
  5284. Ruby runtime = context.getRuntime();
  5285. if (value.getRealSize() == 0) {
  5286. modifyCheck();
  5287. return runtime.getNil();
  5288. }
  5289. RubyString otherStr = args[0].convertToString();
  5290. Encoding enc = checkEncoding(otherStr);
  5291. final boolean squeeze[] = new boolean[TRANS_SIZE];
  5292. TrTables tables = otherStr.trSetupTable(runtime, squeeze, null, true, enc);
  5293. boolean singlebyte = singleByteOptimizable() && otherStr.singleByteOptimizable();
  5294. for (int i=1; i<args.length; i++) {
  5295. otherStr = args[i].convertToString();
  5296. enc = checkEncoding(otherStr);
  5297. singlebyte = singlebyte && otherStr.singleByteOptimizable();
  5298. tables = otherStr.trSetupTable(runtime, squeeze, tables, false, enc);
  5299. }
  5300. modifyAndKeepCodeRange();
  5301. if (singlebyte) {
  5302. return squeezeCommon(runtime, squeeze); // 1.8
  5303. } else {
  5304. return squeezeCommon19(runtime, squeeze, tables, enc, true);
  5305. }
  5306. }
  5307. private IRubyObject squeezeCommon19(Ruby runtime, boolean squeeze[], TrTables tables, Encoding enc, boolean isArg) {
  5308. int s = value.getBegin();
  5309. int t = s;
  5310. int send = s + value.getRealSize();
  5311. byte[]bytes = value.getUnsafeBytes();
  5312. int save = -1;
  5313. int c;
  5314. while (s < send) {
  5315. if (enc.isAsciiCompatible() && Encoding.isAscii(c = bytes[s] & 0xff)) {
  5316. if (c != save || (isArg && !squeeze[c])) bytes[t++] = (byte)(save = c);
  5317. s++;
  5318. } else {
  5319. c = codePoint(runtime, enc, bytes, s, send);
  5320. int cl = codeLength(runtime, enc, c);
  5321. if (c != save || (isArg && !trFind(c, squeeze, tables))) {
  5322. if (t != s) enc.codeToMbc(c, bytes, t);
  5323. save = c;
  5324. t += cl;
  5325. }
  5326. s += cl;
  5327. }
  5328. }
  5329. if (t - value.getBegin() != value.getRealSize()) { // modified
  5330. value.setRealSize(t - value.getBegin());
  5331. return this;
  5332. }
  5333. return runtime.getNil();
  5334. }
  5335. /** rb_str_tr / rb_str_tr_bang
  5336. *
  5337. */
  5338. @JRubyMethod(name = "tr", compat = RUBY1_8)
  5339. public IRubyObject tr(ThreadContext context, IRubyObject src, IRubyObject repl) {
  5340. RubyString str = strDup(context.getRuntime());
  5341. str.trTrans(context, src, repl, false);
  5342. return str;
  5343. }
  5344. @JRubyMethod(name = "tr!", compat = RUBY1_8)
  5345. public IRubyObject tr_bang(ThreadContext context, IRubyObject src, IRubyObject repl) {
  5346. return trTrans(context, src, repl, false);
  5347. }
  5348. @JRubyMethod(name = "tr", compat = RUBY1_9)
  5349. public IRubyObject tr19(ThreadContext context, IRubyObject src, IRubyObject repl) {
  5350. RubyString str = strDup(context.getRuntime());
  5351. str.trTrans19(context, src, repl, false);
  5352. return str;
  5353. }
  5354. @JRubyMethod(name = "tr!")
  5355. public IRubyObject tr_bang19(ThreadContext context, IRubyObject src, IRubyObject repl) {
  5356. return trTrans19(context, src, repl, false);
  5357. }
  5358. private static final class TR {
  5359. TR(ByteList bytes) {
  5360. p = bytes.getBegin();
  5361. pend = bytes.getRealSize() + p;
  5362. buf = bytes.getUnsafeBytes();
  5363. now = max = 0;
  5364. gen = false;
  5365. }
  5366. int p, pend, now, max;
  5367. boolean gen;
  5368. byte[]buf;
  5369. }
  5370. private static final int TRANS_SIZE = 256;
  5371. /** tr_setup_table
  5372. *
  5373. */
  5374. private void trSetupTable(boolean[]table, boolean init) {
  5375. final TR tr = new TR(value);
  5376. boolean cflag = false;
  5377. if (value.getRealSize() > 1 && value.getUnsafeBytes()[value.getBegin()] == '^') {
  5378. cflag = true;
  5379. tr.p++;
  5380. }
  5381. if (init) for (int i=0; i<TRANS_SIZE; i++) table[i] = true;
  5382. final boolean[]buf = new boolean[TRANS_SIZE];
  5383. for (int i=0; i<TRANS_SIZE; i++) buf[i] = cflag;
  5384. int c;
  5385. while ((c = trNext(tr)) >= 0) buf[c & 0xff] = !cflag;
  5386. for (int i=0; i<TRANS_SIZE; i++) table[i] = table[i] && buf[i];
  5387. }
  5388. private static final class TrTables {
  5389. private IntHash<IRubyObject> del, noDel;
  5390. }
  5391. private TrTables trSetupTable(Ruby runtime, boolean[]table, TrTables tables, boolean init, Encoding enc) {
  5392. final TR tr = new TR(value);
  5393. boolean cflag = false;
  5394. if (value.getRealSize() > 1) {
  5395. if (enc.isAsciiCompatible()) {
  5396. if ((value.getUnsafeBytes()[value.getBegin()] & 0xff) == '^') {
  5397. cflag = true;
  5398. tr.p++;
  5399. }
  5400. } else {
  5401. int l = StringSupport.preciseLength(enc, tr.buf, tr.p, tr.pend);
  5402. if (enc.mbcToCode(tr.buf, tr.p, tr.pend) == '^') {
  5403. cflag = true;
  5404. tr.p += l;
  5405. }
  5406. }
  5407. }
  5408. if (init) for (int i=0; i<TRANS_SIZE; i++) table[i] = true;
  5409. final boolean[]buf = new boolean[TRANS_SIZE];
  5410. for (int i=0; i<TRANS_SIZE; i++) buf[i] = cflag;
  5411. int c;
  5412. IntHash<IRubyObject> hash = null, phash = null;
  5413. while ((c = trNext(tr, runtime, enc)) >= 0) {
  5414. if (c < TRANS_SIZE) {
  5415. buf[c & 0xff] = !cflag;
  5416. } else {
  5417. if (hash == null) {
  5418. hash = new IntHash<IRubyObject>();
  5419. if (tables == null) tables = new TrTables();
  5420. if (cflag) {
  5421. phash = tables.noDel;
  5422. tables.noDel = hash;
  5423. } else {
  5424. phash = tables.del;
  5425. tables.del = hash;
  5426. }
  5427. }
  5428. if (phash == null || phash.get(c) != null) hash.put(c, NEVER);
  5429. }
  5430. }
  5431. for (int i=0; i<TRANS_SIZE; i++) table[i] = table[i] && buf[i];
  5432. return tables;
  5433. }
  5434. private boolean trFind(int c, boolean[]table, TrTables tables) {
  5435. return c < TRANS_SIZE ? table[c] : tables != null &&
  5436. ((tables.del != null && tables.del.get(c) != null) &&
  5437. (tables.noDel == null || tables.noDel.get(c) == null));
  5438. }
  5439. /** tr_trans
  5440. *
  5441. */
  5442. private IRubyObject trTrans(ThreadContext context, IRubyObject src, IRubyObject repl, boolean sflag) {
  5443. Ruby runtime = context.getRuntime();
  5444. if (value.getRealSize() == 0) return runtime.getNil();
  5445. ByteList replList = repl.convertToString().value;
  5446. if (replList.getRealSize() == 0) return delete_bang(context, src);
  5447. ByteList srcList = src.convertToString().value;
  5448. final TR trSrc = new TR(srcList);
  5449. boolean cflag = false;
  5450. if (srcList.getRealSize() >= 2 && srcList.getUnsafeBytes()[srcList.getBegin()] == '^') {
  5451. cflag = true;
  5452. trSrc.p++;
  5453. }
  5454. int c;
  5455. final int[]trans = new int[TRANS_SIZE];
  5456. final TR trRepl = new TR(replList);
  5457. if (cflag) {
  5458. for (int i=0; i<TRANS_SIZE; i++) trans[i] = 1;
  5459. while ((c = trNext(trSrc)) >= 0) trans[c & 0xff] = -1;
  5460. while ((c = trNext(trRepl)) >= 0) {}
  5461. for (int i=0; i<TRANS_SIZE; i++) {
  5462. if (trans[i] >= 0) trans[i] = trRepl.now;
  5463. }
  5464. } else {
  5465. for (int i=0; i<TRANS_SIZE; i++) trans[i] = -1;
  5466. while ((c = trNext(trSrc)) >= 0) {
  5467. int r = trNext(trRepl);
  5468. if (r == -1) r = trRepl.now;
  5469. trans[c & 0xff] = r;
  5470. }
  5471. }
  5472. modify();
  5473. int s = value.getBegin();
  5474. int send = s + value.getRealSize();
  5475. byte sbytes[] = value.getUnsafeBytes();
  5476. boolean modify = false;
  5477. if (sflag) {
  5478. int t = s;
  5479. int last = -1;
  5480. while (s < send) {
  5481. int c0 = sbytes[s++];
  5482. if ((c = trans[c0 & 0xff]) >= 0) {
  5483. if (last == c) continue;
  5484. last = c;
  5485. sbytes[t++] = (byte)(c & 0xff);
  5486. modify = true;
  5487. } else {
  5488. last = -1;
  5489. sbytes[t++] = (byte)c0;
  5490. }
  5491. }
  5492. if (value.getRealSize() > (t - value.getBegin())) {
  5493. value.setRealSize(t - value.getBegin());
  5494. modify = true;
  5495. }
  5496. } else {
  5497. while (s < send) {
  5498. if ((c = trans[sbytes[s] & 0xff]) >= 0) {
  5499. sbytes[s] = (byte)(c & 0xff);
  5500. modify = true;
  5501. }
  5502. s++;
  5503. }
  5504. }
  5505. return modify ? this : runtime.getNil();
  5506. }
  5507. private IRubyObject trTrans19(ThreadContext context, IRubyObject src, IRubyObject repl, boolean sflag) {
  5508. Ruby runtime = context.getRuntime();
  5509. if (value.getRealSize() == 0) return runtime.getNil();
  5510. RubyString replStr = repl.convertToString();
  5511. ByteList replList = replStr.value;
  5512. if (replList.getRealSize() == 0) return delete_bang19(context, src);
  5513. RubyString srcStr = src.convertToString();
  5514. ByteList srcList = srcStr.value;
  5515. Encoding e1 = checkEncoding(srcStr);
  5516. Encoding e2 = checkEncoding(replStr);
  5517. Encoding enc = e1 == e2 ? e1 : srcStr.checkEncoding(replStr);
  5518. int cr = getCodeRange();
  5519. final TR trSrc = new TR(srcList);
  5520. boolean cflag = false;
  5521. if (value.getRealSize() > 1) {
  5522. if (enc.isAsciiCompatible()) {
  5523. if (trSrc.buf.length > 0 && (trSrc.buf[trSrc.p] & 0xff) == '^' && trSrc.p + 1 < trSrc.pend) {
  5524. cflag = true;
  5525. trSrc.p++;
  5526. }
  5527. } else {
  5528. int cl = StringSupport.preciseLength(enc, trSrc.buf, trSrc.p, trSrc.pend);
  5529. if (enc.mbcToCode(trSrc.buf, trSrc.p, trSrc.pend) == '^' && trSrc.p + cl < trSrc.pend) {
  5530. cflag = true;
  5531. trSrc.p += cl;
  5532. }
  5533. }
  5534. }
  5535. boolean singlebyte = true;
  5536. int c;
  5537. final int[]trans = new int[TRANS_SIZE];
  5538. IntHash<Integer> hash = null;
  5539. final TR trRepl = new TR(replList);
  5540. if (cflag) {
  5541. for (int i=0; i<TRANS_SIZE; i++) trans[i] = 1;
  5542. while ((c = trNext(trSrc, runtime, enc)) >= 0) {
  5543. if (c < TRANS_SIZE) {
  5544. trans[c & 0xff] = -1;
  5545. } else {
  5546. if (hash == null) hash = new IntHash<Integer>();
  5547. hash.put(c, 1); // QTRUE
  5548. }
  5549. }
  5550. while ((c = trNext(trRepl, runtime, enc)) >= 0) {} /* retrieve last replacer */
  5551. int last = trRepl.now;
  5552. for (int i=0; i<TRANS_SIZE; i++) {
  5553. if (trans[i] >= 0) trans[i] = last;
  5554. }
  5555. } else {
  5556. for (int i=0; i<TRANS_SIZE; i++) trans[i] = -1;
  5557. while ((c = trNext(trSrc, runtime, enc)) >= 0) {
  5558. int r = trNext(trRepl, runtime, enc);
  5559. if (r == -1) r = trRepl.now;
  5560. if (c < TRANS_SIZE) {
  5561. trans[c & 0xff] = r;
  5562. if (r > TRANS_SIZE - 1) singlebyte = false;
  5563. } else {
  5564. if (hash == null) hash = new IntHash<Integer>();
  5565. hash.put(c, r);
  5566. }
  5567. }
  5568. }
  5569. if (cr == CR_VALID) cr = CR_7BIT;
  5570. modifyAndKeepCodeRange();
  5571. int s = value.getBegin();
  5572. int send = s + value.getRealSize();
  5573. byte sbytes[] = value.getUnsafeBytes();
  5574. int max = value.getRealSize();
  5575. boolean modify = false;
  5576. int last = -1;
  5577. int clen, tlen, c0;
  5578. if (sflag) {
  5579. int save = -1;
  5580. byte[]buf = new byte[max];
  5581. int t = 0;
  5582. while (s < send) {
  5583. boolean mayModify = false;
  5584. c0 = c = codePoint(runtime, e1, sbytes, s, send);
  5585. clen = codeLength(runtime, e1, c);
  5586. tlen = enc == e1 ? clen : codeLength(runtime, enc, c);
  5587. s += clen;
  5588. c = trCode(c, trans, hash, cflag, last);
  5589. if (c != -1) {
  5590. if (save == c) {
  5591. if (cr == CR_7BIT && !Encoding.isAscii(c)) cr = CR_VALID;
  5592. continue;
  5593. }
  5594. save = c;
  5595. tlen = codeLength(runtime, enc, c);
  5596. modify = true;
  5597. } else {
  5598. save = -1;
  5599. c = c0;
  5600. if (enc != e1) mayModify = true;
  5601. }
  5602. while (t + tlen >= max) {
  5603. max <<= 1;
  5604. byte[]tbuf = new byte[max];
  5605. System.arraycopy(buf, 0, tbuf, 0, buf.length);
  5606. buf = tbuf;
  5607. }
  5608. enc.codeToMbc(c, buf, t);
  5609. if (mayModify && (tlen == 1 ? sbytes[s] != buf[t] : ByteList.memcmp(sbytes, s, buf, t, tlen) != 0)) modify = true;
  5610. if (cr == CR_7BIT && !Encoding.isAscii(c)) cr = CR_VALID;
  5611. t += tlen;
  5612. }
  5613. value.setUnsafeBytes(buf);
  5614. value.setRealSize(t);
  5615. } else if (enc.isSingleByte() || (singlebyte && hash == null)) {
  5616. while (s < send) {
  5617. c = sbytes[s] & 0xff;
  5618. if (trans[c] != -1) {
  5619. if (!cflag) {
  5620. c = trans[c];
  5621. sbytes[s] = (byte)c;
  5622. } else {
  5623. sbytes[s] = (byte)last;
  5624. }
  5625. modify = true;
  5626. }
  5627. if (cr == CR_7BIT && !Encoding.isAscii(c)) cr = CR_VALID;
  5628. s++;
  5629. }
  5630. } else {
  5631. max += max >> 1;
  5632. byte[]buf = new byte[max];
  5633. int t = 0;
  5634. while (s < send) {
  5635. boolean mayModify = false;
  5636. c0 = c = codePoint(runtime, e1, sbytes, s, send);
  5637. clen = codeLength(runtime, e1, c);
  5638. tlen = enc == e1 ? clen : codeLength(runtime, enc, c);
  5639. c = trCode(c, trans, hash, cflag, last);
  5640. if (c != -1) {
  5641. tlen = codeLength(runtime, enc, c);
  5642. modify = true;
  5643. } else {
  5644. c = c0;
  5645. if (enc != e1) mayModify = true;
  5646. }
  5647. while (t + tlen >= max) {
  5648. max <<= 1;
  5649. byte[]tbuf = new byte[max];
  5650. System.arraycopy(buf, 0, tbuf, 0, buf.length);
  5651. buf = tbuf;
  5652. }
  5653. enc.codeToMbc(c, buf, t);
  5654. if (mayModify && (tlen == 1 ? sbytes[s] != buf[t] : ByteList.memcmp(sbytes, s, buf, t, tlen) != 0)) modify = true;
  5655. if (cr == CR_7BIT && !Encoding.isAscii(c)) cr = CR_VALID;
  5656. s += clen;
  5657. t += tlen;
  5658. }
  5659. value.setUnsafeBytes(buf);
  5660. value.setRealSize(t);
  5661. }
  5662. if (modify) {
  5663. if (cr != CR_BROKEN) setCodeRange(cr);
  5664. associateEncoding(enc);
  5665. return this;
  5666. }
  5667. return runtime.getNil();
  5668. }
  5669. private int trCode(int c, int[]trans, IntHash<Integer> hash, boolean cflag, int last) {
  5670. if (c < TRANS_SIZE) {
  5671. return trans[c];
  5672. } else if (hash != null) {
  5673. Integer tmp = hash.get(c);
  5674. if (tmp == null) {
  5675. return cflag ? last : -1;
  5676. } else {
  5677. return cflag ? -1 : tmp;
  5678. }
  5679. } else {
  5680. return -1;
  5681. }
  5682. }
  5683. /** trnext
  5684. *
  5685. */
  5686. private int trNext(TR t) {
  5687. byte[]buf = t.buf;
  5688. for (;;) {
  5689. if (!t.gen) {
  5690. if (t.p == t.pend) return -1;
  5691. if (t.p < t.pend -1 && buf[t.p] == '\\') t.p++;
  5692. t.now = buf[t.p++] & 0xff;
  5693. if (t.p < t.pend - 1 && buf[t.p] == '-') {
  5694. t.p++;
  5695. if (t.p < t.pend) {
  5696. if (t.now > (buf[t.p] & 0xff)) {
  5697. t.p++;
  5698. continue;
  5699. }
  5700. t.gen = true;
  5701. t.max = buf[t.p++] & 0xff;
  5702. }
  5703. }
  5704. return t.now;
  5705. } else if (++t.now < t.max) {
  5706. return t.now;
  5707. } else {
  5708. t.gen = false;
  5709. return t.max;
  5710. }
  5711. }
  5712. }
  5713. private int trNext(TR t, Ruby runtime, Encoding enc) {
  5714. byte[]buf = t.buf;
  5715. for (;;) {
  5716. if (!t.gen) {
  5717. if (t.p == t.pend) return -1;
  5718. if (t.p < t.pend -1 && buf[t.p] == '\\') t.p++;
  5719. t.now = codePoint(runtime, enc, buf, t.p, t.pend);
  5720. t.p += codeLength(runtime, enc, t.now);
  5721. if (t.p < t.pend - 1 && buf[t.p] == '-') {
  5722. t.p++;
  5723. if (t.p < t.pend) {
  5724. int c = codePoint(runtime, enc, buf, t.p, t.pend);
  5725. t.p += codeLength(runtime, enc, c);
  5726. if (t.now > c) {
  5727. if (t.now < 0x80 && c < 0x80) {
  5728. throw runtime.newArgumentError("invalid range \""
  5729. + (char) t.now + "-" + (char) c + "\" in string transliteration");
  5730. }
  5731. throw runtime.newArgumentError("invalid range in string transliteration");
  5732. }
  5733. t.gen = true;
  5734. t.max = c;
  5735. }
  5736. }
  5737. return t.now;
  5738. } else if (++t.now < t.max) {
  5739. return t.now;
  5740. } else {
  5741. t.gen = false;
  5742. return t.max;
  5743. }
  5744. }
  5745. }
  5746. /** rb_str_tr_s / rb_str_tr_s_bang
  5747. *
  5748. */
  5749. @JRubyMethod(name ="tr_s", compat = RUBY1_8)
  5750. public IRubyObject tr_s(ThreadContext context, IRubyObject src, IRubyObject repl) {
  5751. RubyString str = strDup(context.getRuntime());
  5752. str.trTrans(context, src, repl, true);
  5753. return str;
  5754. }
  5755. @JRubyMethod(name = "tr_s!", compat = RUBY1_8)
  5756. public IRubyObject tr_s_bang(ThreadContext context, IRubyObject src, IRubyObject repl) {
  5757. return trTrans(context, src, repl, true);
  5758. }
  5759. @JRubyMethod(name ="tr_s", compat = RUBY1_9)
  5760. public IRubyObject tr_s19(ThreadContext context, IRubyObject src, IRubyObject repl) {
  5761. RubyString str = strDup(context.getRuntime());
  5762. str.trTrans19(context, src, repl, true);
  5763. return str;
  5764. }
  5765. @JRubyMethod(name = "tr_s!", compat = RUBY1_9)
  5766. public IRubyObject tr_s_bang19(ThreadContext context, IRubyObject src, IRubyObject repl) {
  5767. return trTrans19(context, src, repl, true);
  5768. }
  5769. /** rb_str_each_line
  5770. *
  5771. */
  5772. public IRubyObject each_line(ThreadContext context, Block block) {
  5773. return each_lineCommon(context, context.getRuntime().getGlobalVariables().get("$/"), block);
  5774. }
  5775. public IRubyObject each_line(ThreadContext context, IRubyObject arg, Block block) {
  5776. return each_lineCommon(context, arg, block);
  5777. }
  5778. public IRubyObject each_lineCommon(ThreadContext context, IRubyObject sep, Block block) {
  5779. Ruby runtime = context.getRuntime();
  5780. if (sep.isNil()) {
  5781. block.yield(context, this);
  5782. return this;
  5783. }
  5784. RubyString sepStr = sep.convertToString();
  5785. ByteList sepValue = sepStr.value;
  5786. int rslen = sepValue.getRealSize();
  5787. final byte newline;
  5788. if (rslen == 0) {
  5789. newline = '\n';
  5790. } else {
  5791. newline = sepValue.getUnsafeBytes()[sepValue.getBegin() + rslen - 1];
  5792. }
  5793. int p = value.getBegin();
  5794. int end = p + value.getRealSize();
  5795. int ptr = p, s = p;
  5796. int len = value.getRealSize();
  5797. byte[] bytes = value.getUnsafeBytes();
  5798. p += rslen;
  5799. for (; p < end; p++) {
  5800. if (rslen == 0 && bytes[p] == '\n') {
  5801. if (++p == end || bytes[p] != '\n') continue;
  5802. while(p < end && bytes[p] == '\n') p++;
  5803. }
  5804. if (ptr < p && bytes[p - 1] == newline &&
  5805. (rslen <= 1 ||
  5806. ByteList.memcmp(sepValue.getUnsafeBytes(), sepValue.getBegin(), rslen, bytes, p - rslen, rslen) == 0)) {
  5807. block.yield(context, makeShared(runtime, s - ptr, p - s).infectBy(this));
  5808. modifyCheck(bytes, len);
  5809. s = p;
  5810. }
  5811. }
  5812. if (s != end) {
  5813. if (p > end) p = end;
  5814. block.yield(context, makeShared(runtime, s - ptr, p - s).infectBy(this));
  5815. }
  5816. return this;
  5817. }
  5818. @JRubyMethod(name = "each", compat = RUBY1_8)
  5819. public IRubyObject each18(ThreadContext context, Block block) {
  5820. return block.isGiven() ? each_line(context, block) :
  5821. enumeratorize(context.getRuntime(), this, "each");
  5822. }
  5823. @JRubyMethod(name = "each", compat = RUBY1_8)
  5824. public IRubyObject each18(ThreadContext context, IRubyObject arg, Block block) {
  5825. return block.isGiven() ? each_lineCommon(context, arg, block) :
  5826. enumeratorize(context.getRuntime(), this, "each", arg);
  5827. }
  5828. @JRubyMethod(name = "each_line", compat = RUBY1_8)
  5829. public IRubyObject each_line18(ThreadContext context, Block block) {
  5830. return block.isGiven() ? each_line(context, block) :
  5831. enumeratorize(context.getRuntime(), this, "each_line");
  5832. }
  5833. @JRubyMethod(name = "each_line", compat = RUBY1_8)
  5834. public IRubyObject each_line18(ThreadContext context, IRubyObject arg, Block block) {
  5835. return block.isGiven() ? each_lineCommon(context, arg, block) :
  5836. enumeratorize(context.getRuntime(), this, "each_line", arg);
  5837. }
  5838. @JRubyMethod(name = "lines", compat = RUBY1_8)
  5839. public IRubyObject lines18(ThreadContext context, Block block) {
  5840. return block.isGiven() ? each_line(context, block) :
  5841. enumeratorize(context.getRuntime(), this, "lines");
  5842. }
  5843. @JRubyMethod(name = "lines", compat = RUBY1_8)
  5844. public IRubyObject lines18(ThreadContext context, IRubyObject arg, Block block) {
  5845. return block.isGiven() ? each_lineCommon(context, arg, block) :
  5846. enumeratorize(context.getRuntime(), this, "lines", arg);
  5847. }
  5848. @JRubyMethod(name = "each_line", compat = RUBY1_9)
  5849. public IRubyObject each_line19(ThreadContext context, Block block) {
  5850. return block.isGiven() ? each_lineCommon19(context, block) :
  5851. enumeratorize(context.getRuntime(), this, "each_line");
  5852. }
  5853. @JRubyMethod(name = "each_line", compat = RUBY1_9)
  5854. public IRubyObject each_line19(ThreadContext context, IRubyObject arg, Block block) {
  5855. return block.isGiven() ? each_lineCommon19(context, arg, block) :
  5856. enumeratorize(context.getRuntime(), this, "each_line", arg);
  5857. }
  5858. @JRubyMethod(compat = RUBY1_9)
  5859. public IRubyObject lines(ThreadContext context, Block block) {
  5860. return block.isGiven() ? each_lineCommon19(context, block) :
  5861. enumeratorize(context.getRuntime(), this, "lines");
  5862. }
  5863. @JRubyMethod(compat = RUBY1_9)
  5864. public IRubyObject lines(ThreadContext context, IRubyObject arg, Block block) {
  5865. return block.isGiven() ? each_lineCommon19(context, arg, block) :
  5866. enumeratorize(context.getRuntime(), this, "lines", arg);
  5867. }
  5868. private IRubyObject each_lineCommon19(ThreadContext context, Block block) {
  5869. return each_lineCommon19(context, context.getRuntime().getGlobalVariables().get("$/"), block);
  5870. }
  5871. private IRubyObject each_lineCommon19(ThreadContext context, IRubyObject sep, Block block) {
  5872. Ruby runtime = context.getRuntime();
  5873. if (sep.isNil()) {
  5874. block.yield(context, this);
  5875. return this;
  5876. }
  5877. if (! sep.respondsTo("to_str")) {
  5878. throw runtime.newTypeError("can't convert " + sep.getMetaClass() + " into String");
  5879. }
  5880. ByteList val = value.shallowDup();
  5881. int p = val.getBegin();
  5882. int s = p;
  5883. int len = val.getRealSize();
  5884. int end = p + len;
  5885. byte[]bytes = val.getUnsafeBytes();
  5886. final Encoding enc;
  5887. RubyString sepStr = sep.convertToString();
  5888. if (sepStr == runtime.getGlobalVariables().getDefaultSeparator()) {
  5889. enc = val.getEncoding();
  5890. while (p < end) {
  5891. if (bytes[p] == (byte)'\n') {
  5892. int p0 = enc.leftAdjustCharHead(bytes, s, p, end);
  5893. if (enc.isNewLine(bytes, p0, end)) {
  5894. p = p0 + StringSupport.length(enc, bytes, p0, end);
  5895. block.yield(context, makeShared19(runtime, val, s, p - s).infectBy(this));
  5896. s = p++;
  5897. }
  5898. }
  5899. p++;
  5900. }
  5901. } else {
  5902. enc = checkEncoding(sepStr);
  5903. ByteList sepValue = sepStr.value;
  5904. final int newLine;
  5905. int rslen = sepValue.getRealSize();
  5906. if (rslen == 0) {
  5907. newLine = '\n';
  5908. } else {
  5909. newLine = codePoint(runtime, enc, sepValue.getUnsafeBytes(), sepValue.getBegin(), sepValue.getBegin() + sepValue.getRealSize());
  5910. }
  5911. while (p < end) {
  5912. int c = codePoint(runtime, enc, bytes, p, end);
  5913. again: do {
  5914. int n = codeLength(runtime, enc, c);
  5915. if (rslen == 0 && c == newLine) {
  5916. p += n;
  5917. if (p < end && (c = codePoint(runtime, enc, bytes, p, end)) != newLine) continue again;
  5918. while (p < end && codePoint(runtime, enc, bytes, p, end) == newLine) p += n;
  5919. p -= n;
  5920. }
  5921. if (c == newLine && (rslen <= 1 ||
  5922. ByteList.memcmp(sepValue.getUnsafeBytes(), sepValue.getBegin(), rslen, bytes, p, rslen) == 0)) {
  5923. block.yield(context, makeShared19(runtime, val, s, p - s + (rslen != 0 ? rslen : n)).infectBy(this));
  5924. s = p + (rslen != 0 ? rslen : n);
  5925. }
  5926. p += n;
  5927. } while (false);
  5928. }
  5929. }
  5930. if (s != end) {
  5931. block.yield(context, makeShared19(runtime, val, s, end - s).infectBy(this));
  5932. }
  5933. return this;
  5934. }
  5935. /**
  5936. * rb_str_each_byte
  5937. */
  5938. public RubyString each_byte(ThreadContext context, Block block) {
  5939. Ruby runtime = context.getRuntime();
  5940. // Check the length every iteration, since
  5941. // the block can modify this string.
  5942. for (int i = 0; i < value.length(); i++) {
  5943. block.yield(context, runtime.newFixnum(value.get(i) & 0xFF));
  5944. }
  5945. return this;
  5946. }
  5947. @JRubyMethod(name = "each_byte")
  5948. public IRubyObject each_byte19(ThreadContext context, Block block) {
  5949. return block.isGiven() ? each_byte(context, block) : enumeratorize(context.getRuntime(), this, "each_byte");
  5950. }
  5951. @JRubyMethod
  5952. public IRubyObject bytes(ThreadContext context, Block block) {
  5953. return block.isGiven() ? each_byte(context, block) : enumeratorize(context.getRuntime(), this, "bytes");
  5954. }
  5955. /** rb_str_each_char
  5956. *
  5957. */
  5958. @JRubyMethod(name = "each_char", compat = RUBY1_8)
  5959. public IRubyObject each_char18(ThreadContext context, Block block) {
  5960. return block.isGiven() ? each_charCommon18(context, block) : enumeratorize(context.getRuntime(), this, "each_char");
  5961. }
  5962. @JRubyMethod(name = "chars", compat = RUBY1_8)
  5963. public IRubyObject chars18(ThreadContext context, Block block) {
  5964. return block.isGiven() ? each_charCommon18(context, block) : enumeratorize(context.getRuntime(), this, "chars");
  5965. }
  5966. private IRubyObject each_charCommon18(ThreadContext context, Block block) {
  5967. byte bytes[] = value.getUnsafeBytes();
  5968. int p = value.getBegin();
  5969. int end = p + value.getRealSize();
  5970. Ruby runtime = context.getRuntime();
  5971. Encoding enc = runtime.getKCode().getEncoding();
  5972. ByteList val = value.shallowDup();
  5973. while (p < end) {
  5974. int n = StringSupport.length(enc, bytes, p, end);
  5975. block.yield(context, makeShared19(runtime, val, p-val.getBegin(), n));
  5976. p += n;
  5977. }
  5978. return this;
  5979. }
  5980. @JRubyMethod(name = "each_char", compat = RUBY1_9)
  5981. public IRubyObject each_char19(ThreadContext context, Block block) {
  5982. return block.isGiven() ? each_charCommon19(context, block) : enumeratorize(context.getRuntime(), this, "each_char");
  5983. }
  5984. @JRubyMethod(name = "chars", compat = RUBY1_9)
  5985. public IRubyObject chars19(ThreadContext context, Block block) {
  5986. return block.isGiven() ? each_charCommon19(context, block) : enumeratorize(context.getRuntime(), this, "chars");
  5987. }
  5988. private IRubyObject each_charCommon19(ThreadContext context, Block block) {
  5989. byte bytes[] = value.getUnsafeBytes();
  5990. int p = value.getBegin();
  5991. int end = p + value.getRealSize();
  5992. Encoding enc = value.getEncoding();
  5993. Ruby runtime = context.getRuntime();
  5994. ByteList val = value.shallowDup();
  5995. while (p < end) {
  5996. int n = StringSupport.length(enc, bytes, p, end);
  5997. block.yield(context, makeShared19(runtime, val, p-value.getBegin(), n));
  5998. p += n;
  5999. }
  6000. return this;
  6001. }
  6002. /** rb_str_each_codepoint
  6003. *
  6004. */
  6005. @JRubyMethod(compat = RUBY1_9)
  6006. public IRubyObject each_codepoint(ThreadContext context, Block block) {
  6007. if (!block.isGiven()) return enumeratorize(context.getRuntime(), this, "each_codepoint");
  6008. return singleByteOptimizable() ? each_byte(context, block) : each_codepointCommon(context, block);
  6009. }
  6010. @JRubyMethod(compat = RUBY1_9)
  6011. public IRubyObject codepoints(ThreadContext context, Block block) {
  6012. if (!block.isGiven()) return enumeratorize(context.getRuntime(), this, "codepoints");
  6013. return singleByteOptimizable() ? each_byte(context, block) : each_codepointCommon(context, block);
  6014. }
  6015. private IRubyObject each_codepointCommon(ThreadContext context, Block block) {
  6016. Ruby runtime = context.getRuntime();
  6017. byte bytes[] = value.getUnsafeBytes();
  6018. int p = value.getBegin();
  6019. int end = p + value.getRealSize();
  6020. Encoding enc = value.getEncoding();
  6021. while (p < end) {
  6022. int c = codePoint(runtime, enc, bytes, p, end);
  6023. int n = codeLength(runtime, enc, c);
  6024. block.yield(context, runtime.newFixnum(c));
  6025. p += n;
  6026. }
  6027. return this;
  6028. }
  6029. /** rb_str_intern
  6030. *
  6031. */
  6032. private RubySymbol to_sym() {
  6033. RubySymbol symbol = getRuntime().getSymbolTable().getSymbol(value);
  6034. if (symbol.getBytes() == value) shareLevel = SHARE_LEVEL_BYTELIST;
  6035. return symbol;
  6036. }
  6037. @JRubyMethod(name = {"to_sym", "intern"}, compat = RUBY1_8)
  6038. public RubySymbol intern() {
  6039. if (value.getRealSize() == 0) throw getRuntime().newArgumentError("interning empty string");
  6040. for (int i = 0; i < value.getRealSize(); i++) {
  6041. if (value.getUnsafeBytes()[value.getBegin() + i] == 0) throw getRuntime().newArgumentError("symbol string may not contain '\\0'");
  6042. }
  6043. return to_sym();
  6044. }
  6045. @JRubyMethod(name = {"to_sym", "intern"}, compat = RUBY1_9)
  6046. public RubySymbol intern19() {
  6047. return to_sym();
  6048. }
  6049. @JRubyMethod(name = "ord", compat = RUBY1_9)
  6050. public IRubyObject ord(ThreadContext context) {
  6051. Ruby runtime = context.getRuntime();
  6052. return RubyFixnum.newFixnum(runtime, codePoint(runtime, value.getEncoding(), value.getUnsafeBytes(), value.getBegin(),
  6053. value.getBegin() + value.getRealSize()));
  6054. }
  6055. @JRubyMethod(name = "sum")
  6056. public IRubyObject sum(ThreadContext context) {
  6057. return sumCommon(context, 16);
  6058. }
  6059. @JRubyMethod(name = "sum")
  6060. public IRubyObject sum(ThreadContext context, IRubyObject arg) {
  6061. return sumCommon(context, RubyNumeric.num2long(arg));
  6062. }
  6063. public IRubyObject sumCommon(ThreadContext context, long bits) {
  6064. Ruby runtime = context.getRuntime();
  6065. byte[]bytes = value.getUnsafeBytes();
  6066. int p = value.getBegin();
  6067. int len = value.getRealSize();
  6068. int end = p + len;
  6069. if (bits >= 8 * 8) { // long size * bits in byte
  6070. IRubyObject one = RubyFixnum.one(runtime);
  6071. IRubyObject sum = RubyFixnum.zero(runtime);
  6072. while (p < end) {
  6073. modifyCheck(bytes, len);
  6074. sum = sum.callMethod(context, "+", RubyFixnum.newFixnum(runtime, bytes[p++] & 0xff));
  6075. }
  6076. if (bits != 0) {
  6077. IRubyObject mod = one.callMethod(context, "<<", RubyFixnum.newFixnum(runtime, bits));
  6078. sum = sum.callMethod(context, "&", mod.callMethod(context, "-", one));
  6079. }
  6080. return sum;
  6081. } else {
  6082. long sum = 0;
  6083. while (p < end) {
  6084. modifyCheck(bytes, len);
  6085. sum += bytes[p++] & 0xff;
  6086. }
  6087. return RubyFixnum.newFixnum(runtime, bits == 0 ? sum : sum & (1L << bits) - 1L);
  6088. }
  6089. }
  6090. /** string_to_c
  6091. *
  6092. */
  6093. @JRubyMethod(name = "to_c", reads = BACKREF, writes = BACKREF, compat = RUBY1_9)
  6094. public IRubyObject to_c(ThreadContext context) {
  6095. Ruby runtime = context.getRuntime();
  6096. DynamicScope scope = context.getCurrentScope();
  6097. IRubyObject backref = scope.getBackRef(runtime);
  6098. if (backref instanceof RubyMatchData) ((RubyMatchData)backref).use();
  6099. IRubyObject s = RuntimeHelpers.invoke(
  6100. context, this, "gsub",
  6101. RubyRegexp.newDummyRegexp(runtime, Numeric.ComplexPatterns.underscores_pat),
  6102. runtime.newString(new ByteList(new byte[]{'_'})));
  6103. RubyArray a = RubyComplex.str_to_c_internal(context, s);
  6104. scope.setBackRef(backref);
  6105. if (!a.eltInternal(0).isNil()) {
  6106. return a.eltInternal(0);
  6107. } else {
  6108. return RubyComplex.newComplexCanonicalize(context, RubyFixnum.zero(runtime));
  6109. }
  6110. }
  6111. /** string_to_r
  6112. *
  6113. */
  6114. @JRubyMethod(name = "to_r", reads = BACKREF, writes = BACKREF, compat = RUBY1_9)
  6115. public IRubyObject to_r(ThreadContext context) {
  6116. Ruby runtime = context.getRuntime();
  6117. DynamicScope scope = context.getCurrentScope();
  6118. IRubyObject backref = scope.getBackRef(runtime);
  6119. if (backref instanceof RubyMatchData) ((RubyMatchData)backref).use();
  6120. IRubyObject s = RuntimeHelpers.invoke(
  6121. context, this, "gsub",
  6122. RubyRegexp.newDummyRegexp(runtime, Numeric.ComplexPatterns.underscores_pat),
  6123. runtime.newString(new ByteList(new byte[]{'_'})));
  6124. RubyArray a = RubyRational.str_to_r_internal(context, s);
  6125. scope.setBackRef(backref);
  6126. if (!a.eltInternal(0).isNil()) {
  6127. return a.eltInternal(0);
  6128. } else {
  6129. return RubyRational.newRationalCanonicalize(context, RubyFixnum.zero(runtime));
  6130. }
  6131. }
  6132. public static RubyString unmarshalFrom(UnmarshalStream input) throws java.io.IOException {
  6133. RubyString result = newString(input.getRuntime(), input.unmarshalString());
  6134. input.registerLinkTarget(result);
  6135. return result;
  6136. }
  6137. /**
  6138. * @see org.jruby.util.Pack#unpack
  6139. */
  6140. @JRubyMethod(name = "unpack")
  6141. public RubyArray unpack(IRubyObject obj) {
  6142. return Pack.unpack(getRuntime(), this.value, stringValue(obj).value);
  6143. }
  6144. public void empty() {
  6145. value = ByteList.EMPTY_BYTELIST;
  6146. shareLevel = SHARE_LEVEL_BYTELIST;
  6147. }
  6148. @JRubyMethod(name = "encoding", compat = RUBY1_9)
  6149. public IRubyObject encoding(ThreadContext context) {
  6150. return context.getRuntime().getEncodingService().getEncoding(value.getEncoding());
  6151. }
  6152. @JRubyMethod(name = "encode!", compat = RUBY1_9)
  6153. public IRubyObject encode_bang(ThreadContext context) {
  6154. modify19();
  6155. IRubyObject defaultInternal = context.getRuntime().getEncodingService().getDefaultInternal();
  6156. if (!defaultInternal.isNil()) {
  6157. encode_bang(context, defaultInternal);
  6158. }
  6159. return this;
  6160. }
  6161. @JRubyMethod(name = "encode!", compat = RUBY1_9)
  6162. public IRubyObject encode_bang(ThreadContext context, IRubyObject enc) {
  6163. modify19();
  6164. Ruby runtime = context.getRuntime();
  6165. this.value = encodeCommon(context, runtime, this.value, enc, runtime.getNil(),
  6166. runtime.getNil());
  6167. return this;
  6168. }
  6169. @JRubyMethod(name = "encode!", compat = RUBY1_9)
  6170. public IRubyObject encode_bang(ThreadContext context, IRubyObject enc, IRubyObject arg) {
  6171. modify19();
  6172. Ruby runtime = context.getRuntime();
  6173. IRubyObject fromEnc = arg;
  6174. IRubyObject opts = runtime.getNil();
  6175. if (arg instanceof RubyHash) {
  6176. fromEnc = runtime.getNil();
  6177. opts = arg;
  6178. }
  6179. this.value = encodeCommon(context, runtime, this.value, enc, fromEnc, opts);
  6180. return this;
  6181. }
  6182. @JRubyMethod(name = "encode!", compat = RUBY1_9)
  6183. public IRubyObject encode_bang(ThreadContext context, IRubyObject enc, IRubyObject fromEnc, IRubyObject opts) {
  6184. modify19();
  6185. this.value = encodeCommon(context, context.getRuntime(), this.value, enc, fromEnc, opts);
  6186. return this;
  6187. }
  6188. @JRubyMethod(name = "encode", compat = RUBY1_9)
  6189. public IRubyObject encode(ThreadContext context) {
  6190. Ruby runtime = context.getRuntime();
  6191. IRubyObject defaultInternal = runtime.getEncodingService().getDefaultInternal();
  6192. if (!defaultInternal.isNil()) {
  6193. ByteList encoded = encodeCommon(context, runtime, value, defaultInternal,
  6194. runtime.getNil(), runtime.getNil());
  6195. return runtime.newString(encoded);
  6196. } else {
  6197. return dup();
  6198. }
  6199. }
  6200. @JRubyMethod(name = "encode", compat = RUBY1_9)
  6201. public IRubyObject encode(ThreadContext context, IRubyObject enc) {
  6202. Ruby runtime = context.getRuntime();
  6203. ByteList encoded = encodeCommon(context, runtime, value, enc, runtime.getNil(),
  6204. runtime.getNil());
  6205. return runtime.newString(encoded);
  6206. }
  6207. @JRubyMethod(name = "encode", compat = RUBY1_9)
  6208. public IRubyObject encode(ThreadContext context, IRubyObject enc, IRubyObject arg) {
  6209. Ruby runtime = context.getRuntime();
  6210. IRubyObject fromEnc = arg;
  6211. IRubyObject opts = runtime.getNil();
  6212. if (arg instanceof RubyHash) {
  6213. fromEnc = runtime.getNil();
  6214. opts = arg;
  6215. }
  6216. ByteList encoded = encodeCommon(context, runtime, value, enc, fromEnc, opts);
  6217. return runtime.newString(encoded);
  6218. }
  6219. @JRubyMethod(name = "encode", compat = RUBY1_9)
  6220. public IRubyObject encode(ThreadContext context, IRubyObject enc, IRubyObject fromEnc, IRubyObject opts) {
  6221. Ruby runtime = context.getRuntime();
  6222. ByteList encoded = encodeCommon(context, runtime, value, enc, fromEnc, opts);
  6223. return runtime.newString(encoded);
  6224. }
  6225. private static ByteList encodeCommon(ThreadContext context, Ruby runtime, ByteList value,
  6226. IRubyObject toEnc, IRubyObject fromEnc, IRubyObject opts) {
  6227. Charset from = fromEnc.isNil() ? getCharset(runtime, value.getEncoding()) : getCharset(runtime, fromEnc);
  6228. Encoding encoding = getEncoding(runtime, toEnc);
  6229. Charset to = getCharset(runtime, encoding);
  6230. CharsetEncoder encoder = getEncoder(context, runtime, to, opts);
  6231. // decode from "from" and encode to "to"
  6232. ByteBuffer fromBytes = ByteBuffer.wrap(value.getUnsafeBytes(), value.begin(), value.length());
  6233. ByteBuffer toBytes;
  6234. try {
  6235. toBytes = encoder.encode(from.decode(fromBytes));
  6236. } catch (CharacterCodingException e) {
  6237. throw runtime.newInvalidByteSequenceError("");
  6238. }
  6239. // CharsetEncoder#encode guarantees a newly-allocated buffer, so
  6240. // it's safe for us to take ownership of it without copying
  6241. ByteList result = new ByteList(toBytes.array(), toBytes.arrayOffset(),
  6242. toBytes.limit() - toBytes.arrayOffset(), false);
  6243. result.setEncoding(encoding);
  6244. return result;
  6245. }
  6246. private static CharsetEncoder getEncoder(ThreadContext context, Ruby runtime, Charset charset, IRubyObject opts) {
  6247. CharsetEncoder encoder = charset.newEncoder();
  6248. if (!opts.isNil()) {
  6249. RubyHash hash = (RubyHash) opts;
  6250. CodingErrorAction action = CodingErrorAction.REPLACE;
  6251. IRubyObject replace = hash.fastARef(runtime.newSymbol("replace"));
  6252. if (replace != null && !replace.isNil()) {
  6253. String replaceWith = replace.toString();
  6254. if (replaceWith.length() > 0) {
  6255. encoder.replaceWith(replaceWith.getBytes());
  6256. } else {
  6257. action = CodingErrorAction.IGNORE;
  6258. }
  6259. }
  6260. IRubyObject invalid = hash.fastARef(runtime.newSymbol("invalid"));
  6261. if (invalid != null && invalid.op_equal(context, runtime.newSymbol("replace")).isTrue()) {
  6262. encoder.onMalformedInput(action);
  6263. }
  6264. IRubyObject undef = hash.fastARef(runtime.newSymbol("undef"));
  6265. if (undef != null && undef.op_equal(context, runtime.newSymbol("replace")).isTrue()) {
  6266. encoder.onUnmappableCharacter(action);
  6267. }
  6268. // FIXME: Parse the option :xml
  6269. // The value must be +:text+ or +:attr+. If the
  6270. // value is +:text+ +#encode+ replaces undefined
  6271. // characters with their (upper-case hexadecimal)
  6272. // numeric character references. '&', '<', and
  6273. // '>' are converted to "&amp;", "&lt;", and
  6274. // "&gt;", respectively. If the value is +:attr+,
  6275. // +#encode+ also quotes the replacement result
  6276. // (using '"'), and replaces '"' with "&quot;".
  6277. }
  6278. return encoder;
  6279. }
  6280. private static Encoding getEncoding(Ruby runtime, IRubyObject toEnc) {
  6281. try {
  6282. return runtime.getEncodingService().getEncodingFromObject(toEnc);
  6283. } catch (Exception e) {
  6284. throw runtime.newConverterNotFoundError("code converter not found (" + toEnc.toString() + ")");
  6285. }
  6286. }
  6287. private static Charset getCharset(Ruby runtime, IRubyObject toEnc) {
  6288. try {
  6289. Encoding encoding = runtime.getEncodingService().getEncodingFromObject(toEnc);
  6290. return getCharset(runtime, encoding);
  6291. } catch (Exception e) {
  6292. throw runtime.newConverterNotFoundError("code converter not found (" + toEnc.toString() + ")");
  6293. }
  6294. }
  6295. private static Charset getCharset(Ruby runtime, Encoding encoding) {
  6296. try {
  6297. // special-casing ASCII* to ASCII
  6298. return encoding.toString().startsWith("ASCII") ?
  6299. Charset.forName("ASCII") :
  6300. Charset.forName(encoding.toString());
  6301. } catch (Exception e) {
  6302. throw runtime.newConverterNotFoundError("code converter not found (" + encoding.toString() + ")");
  6303. }
  6304. }
  6305. @JRubyMethod(name = "force_encoding", compat = RUBY1_9)
  6306. public IRubyObject force_encoding(ThreadContext context, IRubyObject enc) {
  6307. modify19();
  6308. Encoding encoding = context.runtime.getEncodingService().getEncodingFromObject(enc);
  6309. associateEncoding(encoding);
  6310. return this;
  6311. }
  6312. @JRubyMethod(name = "valid_encoding?", compat = RUBY1_9)
  6313. public IRubyObject valid_encoding_p(ThreadContext context) {
  6314. Ruby runtime = context.getRuntime();
  6315. return scanForCodeRange() == CR_BROKEN ? runtime.getFalse() : runtime.getTrue();
  6316. }
  6317. @JRubyMethod(name = "ascii_only?", compat = RUBY1_9)
  6318. public IRubyObject ascii_only_p(ThreadContext context) {
  6319. Ruby runtime = context.getRuntime();
  6320. return scanForCodeRange() == CR_7BIT ? runtime.getTrue() : runtime.getFalse();
  6321. }
  6322. /**
  6323. * Mutator for internal string representation.
  6324. *
  6325. * @param value The new java.lang.String this RubyString should encapsulate
  6326. * @deprecated
  6327. */
  6328. public void setValue(CharSequence value) {
  6329. view(ByteList.plain(value));
  6330. }
  6331. public void setValue(ByteList value) {
  6332. view(value);
  6333. }
  6334. public CharSequence getValue() {
  6335. return toString();
  6336. }
  6337. public byte[] getBytes() {
  6338. return value.bytes();
  6339. }
  6340. public ByteList getByteList() {
  6341. return value;
  6342. }
  6343. /** used by ar-jdbc
  6344. *
  6345. */
  6346. public String getUnicodeValue() {
  6347. return RubyEncoding.decodeUTF8(value.getUnsafeBytes(), value.getBegin(), value.getRealSize());
  6348. }
  6349. @Override
  6350. public Object toJava(Class target) {
  6351. if (target.isAssignableFrom(String.class)) {
  6352. return decodeString();
  6353. } else if (target.isAssignableFrom(ByteList.class)) {
  6354. return value;
  6355. } else {
  6356. return super.toJava(target);
  6357. }
  6358. }
  6359. /**
  6360. * Variable-arity versions for compatibility. Not bound to Ruby.
  6361. * @deprecated Use the versions with zero or one arguments
  6362. */
  6363. @Deprecated
  6364. public IRubyObject initialize(IRubyObject[] args, Block unusedBlock) {
  6365. switch (args.length) {
  6366. case 0: return this;
  6367. case 1: return initialize(args[0]);
  6368. default:Arity.raiseArgumentError(getRuntime(), args.length, 0, 1); return null; // not reached
  6369. }
  6370. }
  6371. @Deprecated
  6372. public IRubyObject sub(ThreadContext context, IRubyObject[] args, Block block) {
  6373. RubyString str = strDup(context.getRuntime());
  6374. str.sub_bang(context, args, block);
  6375. return str;
  6376. }
  6377. @Deprecated
  6378. public IRubyObject sub_bang(ThreadContext context, IRubyObject[] args, Block block) {
  6379. switch (args.length) {
  6380. case 1: return sub_bang(context, args[0], block);
  6381. case 2: return sub_bang(context, args[0], args[1], block);
  6382. default:Arity.raiseArgumentError(context.getRuntime(), args.length, 1, 2); return null; // not reached
  6383. }
  6384. }
  6385. @Deprecated
  6386. public IRubyObject gsub(ThreadContext context, IRubyObject[] args, Block block) {
  6387. switch (args.length) {
  6388. case 1: return gsub(context, args[0], block);
  6389. case 2: return gsub(context, args[0], args[1], block);
  6390. default:Arity.raiseArgumentError(context.getRuntime(), args.length, 1, 2); return null; // not reached
  6391. }
  6392. }
  6393. @Deprecated
  6394. public IRubyObject gsub_bang(ThreadContext context, IRubyObject[] args, Block block) {
  6395. switch (args.length) {
  6396. case 1: return gsub_bang(context, args[0], block);
  6397. case 2: return gsub_bang(context, args[0], args[1], block);
  6398. default:Arity.raiseArgumentError(context.getRuntime(), args.length, 1, 2); return null; // not reached
  6399. }
  6400. }
  6401. @Deprecated
  6402. public IRubyObject index(ThreadContext context, IRubyObject[] args) {
  6403. switch (args.length) {
  6404. case 1: return index(context, args[0]);
  6405. case 2: return index(context, args[0], args[1]);
  6406. default:Arity.raiseArgumentError(context.getRuntime(), args.length, 1, 2); return null; // not reached
  6407. }
  6408. }
  6409. @Deprecated
  6410. public IRubyObject rindex(ThreadContext context, IRubyObject[] args) {
  6411. switch (args.length) {
  6412. case 1: return rindex(context, args[0]);
  6413. case 2: return rindex(context, args[0], args[1]);
  6414. default:Arity.raiseArgumentError(context.getRuntime(), args.length, 1, 2); return null; // not reached
  6415. }
  6416. }
  6417. @Deprecated
  6418. public IRubyObject op_aref(ThreadContext context, IRubyObject[] args) {
  6419. switch (args.length) {
  6420. case 1: return op_aref(context, args[0]);
  6421. case 2: return op_aref(context, args[0], args[1]);
  6422. default:Arity.raiseArgumentError(context.getRuntime(), args.length, 1, 2); return null; // not reached
  6423. }
  6424. }
  6425. @Deprecated
  6426. public IRubyObject op_aset(ThreadContext context, IRubyObject[] args) {
  6427. switch (args.length) {
  6428. case 2: return op_aset(context, args[0], args[1]);
  6429. case 3: return op_aset(context, args[0], args[1], args[2]);
  6430. default:Arity.raiseArgumentError(context.getRuntime(), args.length, 2, 3); return null; // not reached
  6431. }
  6432. }
  6433. @Deprecated
  6434. public IRubyObject slice_bang(ThreadContext context, IRubyObject[] args) {
  6435. switch (args.length) {
  6436. case 1: return slice_bang(context, args[0]);
  6437. case 2: return slice_bang(context, args[0], args[1]);
  6438. default:Arity.raiseArgumentError(context.getRuntime(), args.length, 1, 2); return null; // not reached
  6439. }
  6440. }
  6441. @Deprecated
  6442. public IRubyObject to_i(IRubyObject[] args) {
  6443. switch (args.length) {
  6444. case 0: return to_i();
  6445. case 1: return to_i(args[0]);
  6446. default:Arity.raiseArgumentError(getRuntime(), args.length, 0, 1); return null; // not reached
  6447. }
  6448. }
  6449. @Deprecated
  6450. public RubyArray split(ThreadContext context, IRubyObject[] args) {
  6451. switch (args.length) {
  6452. case 0: return split(context);
  6453. case 1: return split(context, args[0]);
  6454. case 2: return split(context, args[0], args[1]);
  6455. default:Arity.raiseArgumentError(context.getRuntime(), args.length, 0, 2); return null; // not reached
  6456. }
  6457. }
  6458. @Deprecated
  6459. public IRubyObject ljust(IRubyObject [] args) {
  6460. switch (args.length) {
  6461. case 1: return ljust(args[0]);
  6462. case 2: return ljust(args[0], args[1]);
  6463. default: Arity.raiseArgumentError(getRuntime(), args.length, 1, 2); return null; // not reached
  6464. }
  6465. }
  6466. @Deprecated
  6467. public IRubyObject rjust(IRubyObject [] args) {
  6468. switch (args.length) {
  6469. case 1: return rjust(args[0]);
  6470. case 2: return rjust(args[0], args[1]);
  6471. default:Arity.raiseArgumentError(getRuntime(), args.length, 1, 2); return null; // not reached
  6472. }
  6473. }
  6474. @Deprecated
  6475. public IRubyObject center(IRubyObject [] args) {
  6476. switch (args.length) {
  6477. case 1: return center(args[0]);
  6478. case 2: return center(args[0], args[1]);
  6479. default:Arity.raiseArgumentError(getRuntime(), args.length, 1, 2); return null; // not reached
  6480. }
  6481. }
  6482. @Deprecated
  6483. public RubyString chomp(IRubyObject[] args) {
  6484. switch (args.length) {
  6485. case 0:return chomp(getRuntime().getCurrentContext());
  6486. case 1:return chomp(getRuntime().getCurrentContext(), args[0]);
  6487. default:Arity.raiseArgumentError(getRuntime(), args.length, 0, 1); return null; // not reached
  6488. }
  6489. }
  6490. @Deprecated
  6491. public IRubyObject chomp_bang(IRubyObject[] args) {
  6492. switch (args.length) {
  6493. case 0: return chomp_bang(getRuntime().getCurrentContext());
  6494. case 1: return chomp_bang(getRuntime().getCurrentContext(), args[0]);
  6495. default:Arity.raiseArgumentError(getRuntime(), args.length, 0, 1); return null; // not reached
  6496. }
  6497. }
  6498. }