PageRenderTime 155ms CodeModel.GetById 28ms RepoModel.GetById 2ms app.codeStats 4ms

/src/org/jruby/RubyString.java

https://bitbucket.org/nicksieger/jruby
Java | 7473 lines | 6155 code | 957 blank | 361 comment | 1696 complexity | 2bbf20d04a1303c55399813fa5724e04 MD5 | raw file
Possible License(s): GPL-3.0, JSON

Large files files are truncated, but you can click here to view the full file

  1. /*
  2. **** BEGIN LICENSE BLOCK *****
  3. * Version: CPL 1.0/GPL 2.0/LGPL 2.1
  4. *
  5. * The contents of this file are subject to the Common Public
  6. * License Version 1.0 (the "License"); you may not use this file
  7. * except in compliance with the License. You may obtain a copy of
  8. * the License at http://www.eclipse.org/legal/cpl-v10.html
  9. *
  10. * Software distributed under the License is distributed on an "AS
  11. * IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
  12. * implied. See the License for the specific language governing
  13. * rights and limitations under the License.
  14. *
  15. * Copyright (C) 2001 Alan Moore <alan_moore@gmx.net>
  16. * Copyright (C) 2001-2002 Benoit Cerrina <b.cerrina@wanadoo.fr>
  17. * Copyright (C) 2001-2004 Jan Arne Petersen <jpetersen@uni-bonn.de>
  18. * Copyright (C) 2002-2004 Anders Bengtsson <ndrsbngtssn@yahoo.se>
  19. * Copyright (C) 2002-2006 Thomas E Enebo <enebo@acm.org>
  20. * Copyright (C) 2004 Stefan Matthias Aust <sma@3plus4.de>
  21. * Copyright (C) 2004 David Corbin <dcorbin@users.sourceforge.net>
  22. * Copyright (C) 2005 Tim Azzopardi <tim@tigerfive.com>
  23. * Copyright (C) 2006 Miguel Covarrubias <mlcovarrubias@gmail.com>
  24. * Copyright (C) 2006 Ola Bini <ola@ologix.com>
  25. * Copyright (C) 2007 Nick Sieger <nicksieger@gmail.com>
  26. *
  27. * Alternatively, the contents of this file may be used under the terms of
  28. * either of the GNU General Public License Version 2 or later (the "GPL"),
  29. * or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
  30. * in which case the provisions of the GPL or the LGPL are applicable instead
  31. * of those above. If you wish to allow use of your version of this file only
  32. * under the terms of either the GPL or the LGPL, and not to allow others to
  33. * use your version of this file under the terms of the CPL, indicate your
  34. * decision by deleting the provisions above and replace them with the notice
  35. * and other provisions required by the GPL or the LGPL. If you do not delete
  36. * the provisions above, a recipient may use your version of this file under
  37. * the terms of any one of the CPL, the GPL or the LGPL.
  38. ***** END LICENSE BLOCK *****/
  39. package org.jruby;
  40. import static org.jruby.RubyEnumerator.enumeratorize;
  41. import static org.jruby.anno.FrameField.BACKREF;
  42. import static org.jruby.util.StringSupport.CR_7BIT;
  43. import static org.jruby.util.StringSupport.CR_BROKEN;
  44. import static org.jruby.util.StringSupport.CR_MASK;
  45. import static org.jruby.util.StringSupport.CR_UNKNOWN;
  46. import static org.jruby.util.StringSupport.CR_VALID;
  47. import static org.jruby.util.StringSupport.codeLength;
  48. import static org.jruby.util.StringSupport.codePoint;
  49. import static org.jruby.util.StringSupport.codeRangeScan;
  50. import static org.jruby.util.StringSupport.searchNonAscii;
  51. import static org.jruby.util.StringSupport.strLengthWithCodeRange;
  52. import static org.jruby.util.StringSupport.toLower;
  53. import static org.jruby.util.StringSupport.toUpper;
  54. import static org.jruby.util.StringSupport.unpackArg;
  55. import static org.jruby.util.StringSupport.unpackResult;
  56. import java.io.UnsupportedEncodingException;
  57. import java.nio.ByteBuffer;
  58. import java.nio.charset.CharacterCodingException;
  59. import java.nio.charset.Charset;
  60. import java.nio.charset.CharsetEncoder;
  61. import java.nio.charset.CodingErrorAction;
  62. import java.util.Arrays;
  63. import java.util.Locale;
  64. import org.jcodings.Encoding;
  65. import org.jcodings.EncodingDB.Entry;
  66. import org.jcodings.ascii.AsciiTables;
  67. import org.jcodings.constants.CharacterType;
  68. import org.jcodings.specific.ASCIIEncoding;
  69. import org.jcodings.specific.USASCIIEncoding;
  70. import org.jcodings.specific.UTF8Encoding;
  71. import org.jcodings.util.IntHash;
  72. import org.joni.Matcher;
  73. import org.joni.Option;
  74. import org.joni.Regex;
  75. import org.joni.Region;
  76. import org.jruby.anno.JRubyClass;
  77. import org.jruby.anno.JRubyMethod;
  78. import org.jruby.cext.RString;
  79. import org.jruby.javasupport.util.RuntimeHelpers;
  80. import org.jruby.runtime.Arity;
  81. import org.jruby.runtime.Block;
  82. import org.jruby.runtime.ClassIndex;
  83. import org.jruby.runtime.DynamicScope;
  84. import org.jruby.runtime.ObjectAllocator;
  85. import org.jruby.runtime.ThreadContext;
  86. import static org.jruby.runtime.Visibility.*;
  87. import static org.jruby.CompatVersion.*;
  88. import org.jruby.runtime.builtin.IRubyObject;
  89. import org.jruby.runtime.encoding.EncodingCapable;
  90. import org.jruby.runtime.marshal.UnmarshalStream;
  91. import org.jruby.util.ByteList;
  92. import org.jruby.util.ConvertBytes;
  93. import org.jruby.util.Numeric;
  94. import org.jruby.util.Pack;
  95. import org.jruby.util.RegexpOptions;
  96. import org.jruby.util.Sprintf;
  97. import org.jruby.util.StringSupport;
  98. import org.jruby.util.TypeConverter;
  99. import org.jruby.util.string.JavaCrypt;
  100. /**
  101. * Implementation of Ruby String class
  102. *
  103. * Concurrency: no synchronization is required among readers, but
  104. * all users must synchronize externally with writers.
  105. *
  106. */
  107. @JRubyClass(name="String", include={"Enumerable", "Comparable"})
  108. public class RubyString extends RubyObject implements EncodingCapable {
  109. private static final ASCIIEncoding ASCII = ASCIIEncoding.INSTANCE;
  110. private static final UTF8Encoding UTF8 = UTF8Encoding.INSTANCE;
  111. private static final byte[] EMPTY_BYTE_ARRAY = new byte[0];
  112. // string doesn't share any resources
  113. private static final int SHARE_LEVEL_NONE = 0;
  114. // string has it's own ByteList, but it's pointing to a shared buffer (byte[])
  115. private static final int SHARE_LEVEL_BUFFER = 1;
  116. // string doesn't have it's own ByteList (values)
  117. private static final int SHARE_LEVEL_BYTELIST = 2;
  118. private volatile int shareLevel = SHARE_LEVEL_NONE;
  119. private ByteList value;
  120. private RString rstring;
  121. public static RubyClass createStringClass(Ruby runtime) {
  122. RubyClass stringClass = runtime.defineClass("String", runtime.getObject(), STRING_ALLOCATOR);
  123. runtime.setString(stringClass);
  124. stringClass.index = ClassIndex.STRING;
  125. stringClass.setReifiedClass(RubyString.class);
  126. stringClass.kindOf = new RubyModule.KindOf() {
  127. @Override
  128. public boolean isKindOf(IRubyObject obj, RubyModule type) {
  129. return obj instanceof RubyString;
  130. }
  131. };
  132. stringClass.includeModule(runtime.getComparable());
  133. if (!runtime.is1_9()) stringClass.includeModule(runtime.getEnumerable());
  134. stringClass.defineAnnotatedMethods(RubyString.class);
  135. return stringClass;
  136. }
  137. private static ObjectAllocator STRING_ALLOCATOR = new ObjectAllocator() {
  138. public IRubyObject allocate(Ruby runtime, RubyClass klass) {
  139. return RubyString.newEmptyString(runtime, klass);
  140. }
  141. };
  142. public Encoding getEncoding() {
  143. return value.getEncoding();
  144. }
  145. public void setEncoding(Encoding encoding) {
  146. value.setEncoding(encoding);
  147. }
  148. public void associateEncoding(Encoding enc) {
  149. if (value.getEncoding() != enc) {
  150. if (!isCodeRangeAsciiOnly() || !enc.isAsciiCompatible()) clearCodeRange();
  151. value.setEncoding(enc);
  152. }
  153. }
  154. public final void setEncodingAndCodeRange(Encoding enc, int cr) {
  155. value.setEncoding(enc);
  156. setCodeRange(cr);
  157. }
  158. public final Encoding toEncoding(Ruby runtime) {
  159. return runtime.getEncodingService().findEncoding(this);
  160. }
  161. public final int getCodeRange() {
  162. return flags & CR_MASK;
  163. }
  164. public final void setCodeRange(int codeRange) {
  165. flags |= codeRange & CR_MASK;
  166. }
  167. public final RString getRString() {
  168. return rstring;
  169. }
  170. public final void setRString(RString rstring) {
  171. this.rstring = rstring;
  172. }
  173. public final void clearCodeRange() {
  174. flags &= ~CR_MASK;
  175. }
  176. private void keepCodeRange() {
  177. if (getCodeRange() == CR_BROKEN) clearCodeRange();
  178. }
  179. // ENC_CODERANGE_ASCIIONLY
  180. public final boolean isCodeRangeAsciiOnly() {
  181. return getCodeRange() == CR_7BIT;
  182. }
  183. // rb_enc_str_asciionly_p
  184. public final boolean isAsciiOnly() {
  185. return value.getEncoding().isAsciiCompatible() && scanForCodeRange() == CR_7BIT;
  186. }
  187. public final boolean isCodeRangeValid() {
  188. return (flags & CR_VALID) != 0;
  189. }
  190. public final boolean isCodeRangeBroken() {
  191. return (flags & CR_BROKEN) != 0;
  192. }
  193. static int codeRangeAnd(int cr1, int cr2) {
  194. if (cr1 == CR_7BIT) return cr2;
  195. if (cr1 == CR_VALID) return cr2 == CR_7BIT ? CR_VALID : cr2;
  196. return CR_UNKNOWN;
  197. }
  198. private void copyCodeRangeForSubstr(RubyString from, Encoding enc) {
  199. int fromCr = from.getCodeRange();
  200. if (fromCr == CR_7BIT) {
  201. setCodeRange(fromCr);
  202. } else if (fromCr == CR_VALID) {
  203. if (!enc.isAsciiCompatible() || searchNonAscii(value) != -1) {
  204. setCodeRange(CR_VALID);
  205. } else {
  206. setCodeRange(CR_7BIT);
  207. }
  208. } else{
  209. if (value.getRealSize() == 0) {
  210. setCodeRange(!enc.isAsciiCompatible() ? CR_VALID : CR_7BIT);
  211. }
  212. }
  213. }
  214. private void copyCodeRange(RubyString from) {
  215. value.setEncoding(from.value.getEncoding());
  216. setCodeRange(from.getCodeRange());
  217. }
  218. // rb_enc_str_coderange
  219. final int scanForCodeRange() {
  220. int cr = getCodeRange();
  221. if (cr == CR_UNKNOWN) {
  222. cr = codeRangeScan(value.getEncoding(), value);
  223. setCodeRange(cr);
  224. }
  225. return cr;
  226. }
  227. final boolean singleByteOptimizable() {
  228. return getCodeRange() == CR_7BIT || value.getEncoding().isSingleByte();
  229. }
  230. final boolean singleByteOptimizable(Encoding enc) {
  231. return getCodeRange() == CR_7BIT || enc.isSingleByte();
  232. }
  233. private Encoding isCompatibleWith(RubyString other) {
  234. Encoding enc1 = value.getEncoding();
  235. Encoding enc2 = other.value.getEncoding();
  236. if (enc1 == enc2) return enc1;
  237. if (other.value.getRealSize() == 0) return enc1;
  238. if (value.getRealSize() == 0) return enc2;
  239. if (!enc1.isAsciiCompatible() || !enc2.isAsciiCompatible()) return null;
  240. return RubyEncoding.areCompatible(enc1, scanForCodeRange(), enc2, other.scanForCodeRange());
  241. }
  242. final Encoding isCompatibleWith(EncodingCapable other) {
  243. if (other instanceof RubyString) return checkEncoding((RubyString)other);
  244. Encoding enc1 = value.getEncoding();
  245. Encoding enc2 = other.getEncoding();
  246. if (enc1 == enc2) return enc1;
  247. if (value.getRealSize() == 0) return enc2;
  248. if (!enc1.isAsciiCompatible() || !enc2.isAsciiCompatible()) return null;
  249. if (enc2 instanceof USASCIIEncoding) return enc1;
  250. if (scanForCodeRange() == CR_7BIT) return enc2;
  251. return null;
  252. }
  253. final Encoding checkEncoding(RubyString other) {
  254. Encoding enc = isCompatibleWith(other);
  255. if (enc == null) throw getRuntime().newEncodingCompatibilityError("incompatible character encodings: " +
  256. value.getEncoding() + " and " + other.value.getEncoding());
  257. return enc;
  258. }
  259. final Encoding checkEncoding(EncodingCapable other) {
  260. Encoding enc = isCompatibleWith(other);
  261. if (enc == null) throw getRuntime().newEncodingCompatibilityError("incompatible character encodings: " +
  262. value.getEncoding() + " and " + other.getEncoding());
  263. return enc;
  264. }
  265. private Encoding checkDummyEncoding() {
  266. Encoding enc = value.getEncoding();
  267. if (enc.isDummy()) throw getRuntime().newEncodingCompatibilityError(
  268. "incompatible encoding with this operation: " + enc);
  269. return enc;
  270. }
  271. private boolean isComparableWith(RubyString other) {
  272. ByteList otherValue = other.value;
  273. if (value.getEncoding() == otherValue.getEncoding() ||
  274. value.getRealSize() == 0 || otherValue.getRealSize() == 0) return true;
  275. return isComparableViaCodeRangeWith(other);
  276. }
  277. private boolean isComparableViaCodeRangeWith(RubyString other) {
  278. int cr1 = scanForCodeRange();
  279. int cr2 = other.scanForCodeRange();
  280. if (cr1 == CR_7BIT && (cr2 == CR_7BIT || other.value.getEncoding().isAsciiCompatible())) return true;
  281. if (cr2 == CR_7BIT && value.getEncoding().isAsciiCompatible()) return true;
  282. return false;
  283. }
  284. private int strLength(Encoding enc) {
  285. if (singleByteOptimizable(enc)) return value.getRealSize();
  286. return strLength(value, enc);
  287. }
  288. final int strLength() {
  289. if (singleByteOptimizable()) return value.getRealSize();
  290. return strLength(value);
  291. }
  292. private int strLength(ByteList bytes) {
  293. return strLength(bytes, bytes.getEncoding());
  294. }
  295. private int strLength(ByteList bytes, Encoding enc) {
  296. if (isCodeRangeValid() && enc instanceof UTF8Encoding) return StringSupport.utf8Length(value);
  297. long lencr = strLengthWithCodeRange(bytes, enc);
  298. int cr = unpackArg(lencr);
  299. if (cr != 0) setCodeRange(cr);
  300. return unpackResult(lencr);
  301. }
  302. final int subLength(int pos) {
  303. if (singleByteOptimizable() || pos < 0) return pos;
  304. return StringSupport.strLength(value.getEncoding(), value.getUnsafeBytes(), value.getBegin(), value.getBegin() + pos);
  305. }
  306. /** short circuit for String key comparison
  307. *
  308. */
  309. @Override
  310. public final boolean eql(IRubyObject other) {
  311. Ruby runtime = getRuntime();
  312. if (getMetaClass() != runtime.getString() || getMetaClass() != other.getMetaClass()) return super.eql(other);
  313. return runtime.is1_9() ? eql19(runtime, other) : eql18(runtime, other);
  314. }
  315. private boolean eql18(Ruby runtime, IRubyObject other) {
  316. return value.equal(((RubyString)other).value);
  317. }
  318. // rb_str_hash_cmp
  319. private boolean eql19(Ruby runtime, IRubyObject other) {
  320. RubyString otherString = (RubyString)other;
  321. return isComparableWith(otherString) && value.equal(((RubyString)other).value);
  322. }
  323. public RubyString(Ruby runtime, RubyClass rubyClass) {
  324. this(runtime, rubyClass, EMPTY_BYTE_ARRAY);
  325. }
  326. public RubyString(Ruby runtime, RubyClass rubyClass, CharSequence value) {
  327. super(runtime, rubyClass);
  328. assert value != null;
  329. byte[] bytes = RubyEncoding.encodeUTF8(value);
  330. this.value = new ByteList(bytes, false);
  331. this.value.setEncoding(UTF8);
  332. }
  333. public RubyString(Ruby runtime, RubyClass rubyClass, byte[] value) {
  334. super(runtime, rubyClass);
  335. assert value != null;
  336. this.value = new ByteList(value);
  337. }
  338. public RubyString(Ruby runtime, RubyClass rubyClass, ByteList value) {
  339. super(runtime, rubyClass);
  340. assert value != null;
  341. this.value = value;
  342. }
  343. public RubyString(Ruby runtime, RubyClass rubyClass, ByteList value, boolean objectSpace) {
  344. super(runtime, rubyClass, objectSpace);
  345. assert value != null;
  346. this.value = value;
  347. }
  348. protected RubyString(Ruby runtime, RubyClass rubyClass, ByteList value, Encoding enc, int cr) {
  349. this(runtime, rubyClass, value);
  350. value.setEncoding(enc);
  351. flags |= cr;
  352. }
  353. protected RubyString(Ruby runtime, RubyClass rubyClass, ByteList value, Encoding enc) {
  354. this(runtime, rubyClass, value);
  355. value.setEncoding(enc);
  356. }
  357. protected RubyString(Ruby runtime, RubyClass rubyClass, ByteList value, int cr) {
  358. this(runtime, rubyClass, value);
  359. flags |= cr;
  360. }
  361. // Deprecated String construction routines
  362. /** Create a new String which uses the same Ruby runtime and the same
  363. * class like this String.
  364. *
  365. * This method should be used to satisfy RCR #38.
  366. * @deprecated
  367. */
  368. @Deprecated
  369. public RubyString newString(CharSequence s) {
  370. return new RubyString(getRuntime(), getType(), s);
  371. }
  372. /** Create a new String which uses the same Ruby runtime and the same
  373. * class like this String.
  374. *
  375. * This method should be used to satisfy RCR #38.
  376. * @deprecated
  377. */
  378. @Deprecated
  379. public RubyString newString(ByteList s) {
  380. return new RubyString(getRuntime(), getMetaClass(), s);
  381. }
  382. @Deprecated
  383. public static RubyString newString(Ruby runtime, RubyClass clazz, CharSequence str) {
  384. return new RubyString(runtime, clazz, str);
  385. }
  386. public static RubyString newStringLight(Ruby runtime, ByteList bytes) {
  387. return new RubyString(runtime, runtime.getString(), bytes, false);
  388. }
  389. public static RubyString newStringLight(Ruby runtime, int size) {
  390. return new RubyString(runtime, runtime.getString(), new ByteList(size), false);
  391. }
  392. public static RubyString newString(Ruby runtime, CharSequence str) {
  393. return new RubyString(runtime, runtime.getString(), str);
  394. }
  395. public static RubyString newString(Ruby runtime, String str) {
  396. return new RubyString(runtime, runtime.getString(), str);
  397. }
  398. public static RubyString newString(Ruby runtime, byte[] bytes) {
  399. return new RubyString(runtime, runtime.getString(), bytes);
  400. }
  401. public static RubyString newString(Ruby runtime, byte[] bytes, int start, int length) {
  402. byte[] copy = new byte[length];
  403. System.arraycopy(bytes, start, copy, 0, length);
  404. return new RubyString(runtime, runtime.getString(), new ByteList(copy, false));
  405. }
  406. public static RubyString newString(Ruby runtime, ByteList bytes) {
  407. return new RubyString(runtime, runtime.getString(), bytes);
  408. }
  409. public static RubyString newString(Ruby runtime, ByteList bytes, Encoding encoding) {
  410. return new RubyString(runtime, runtime.getString(), bytes, encoding);
  411. }
  412. public static RubyString newUnicodeString(Ruby runtime, String str) {
  413. return new RubyString(runtime, runtime.getString(), new ByteList(RubyEncoding.encodeUTF8(str), false));
  414. }
  415. // String construction routines by NOT byte[] buffer and making the target String shared
  416. public static RubyString newStringShared(Ruby runtime, RubyString orig) {
  417. orig.shareLevel = SHARE_LEVEL_BYTELIST;
  418. RubyString str = new RubyString(runtime, runtime.getString(), orig.value);
  419. str.shareLevel = SHARE_LEVEL_BYTELIST;
  420. return str;
  421. }
  422. public static RubyString newStringShared(Ruby runtime, ByteList bytes) {
  423. return newStringShared(runtime, runtime.getString(), bytes);
  424. }
  425. public static RubyString newStringShared(Ruby runtime, ByteList bytes, Encoding encoding) {
  426. return newStringShared(runtime, runtime.getString(), bytes, encoding);
  427. }
  428. public static RubyString newStringShared(Ruby runtime, ByteList bytes, int codeRange) {
  429. RubyString str = new RubyString(runtime, runtime.getString(), bytes, codeRange);
  430. str.shareLevel = SHARE_LEVEL_BYTELIST;
  431. return str;
  432. }
  433. public static RubyString newStringShared(Ruby runtime, RubyClass clazz, ByteList bytes) {
  434. RubyString str = new RubyString(runtime, clazz, bytes);
  435. str.shareLevel = SHARE_LEVEL_BYTELIST;
  436. return str;
  437. }
  438. public static RubyString newStringShared(Ruby runtime, RubyClass clazz, ByteList bytes, Encoding encoding) {
  439. RubyString str = new RubyString(runtime, clazz, bytes, encoding);
  440. str.shareLevel = SHARE_LEVEL_BYTELIST;
  441. return str;
  442. }
  443. public static RubyString newStringShared(Ruby runtime, byte[] bytes) {
  444. return newStringShared(runtime, new ByteList(bytes, false));
  445. }
  446. public static RubyString newStringShared(Ruby runtime, byte[] bytes, int start, int length) {
  447. return newStringShared(runtime, new ByteList(bytes, start, length, false));
  448. }
  449. public static RubyString newEmptyString(Ruby runtime) {
  450. return newEmptyString(runtime, runtime.getString());
  451. }
  452. public static RubyString newEmptyString(Ruby runtime, RubyClass metaClass) {
  453. RubyString empty = new RubyString(runtime, metaClass, ByteList.EMPTY_BYTELIST);
  454. empty.shareLevel = SHARE_LEVEL_BYTELIST;
  455. return empty;
  456. }
  457. // String construction routines by NOT byte[] buffer and NOT making the target String shared
  458. public static RubyString newStringNoCopy(Ruby runtime, ByteList bytes) {
  459. return newStringNoCopy(runtime, runtime.getString(), bytes);
  460. }
  461. public static RubyString newStringNoCopy(Ruby runtime, RubyClass clazz, ByteList bytes) {
  462. return new RubyString(runtime, clazz, bytes);
  463. }
  464. public static RubyString newStringNoCopy(Ruby runtime, byte[] bytes, int start, int length) {
  465. return newStringNoCopy(runtime, new ByteList(bytes, start, length, false));
  466. }
  467. public static RubyString newStringNoCopy(Ruby runtime, byte[] bytes) {
  468. return newStringNoCopy(runtime, new ByteList(bytes, false));
  469. }
  470. /** Encoding aware String construction routines for 1.9
  471. *
  472. */
  473. private static final class EmptyByteListHolder {
  474. final ByteList bytes;
  475. final int cr;
  476. EmptyByteListHolder(Encoding enc) {
  477. this.bytes = new ByteList(ByteList.NULL_ARRAY, enc);
  478. this.cr = bytes.getEncoding().isAsciiCompatible() ? CR_7BIT : CR_VALID;
  479. }
  480. }
  481. private static EmptyByteListHolder EMPTY_BYTELISTS[] = new EmptyByteListHolder[4];
  482. static EmptyByteListHolder getEmptyByteList(Encoding enc) {
  483. int index = enc.getIndex();
  484. EmptyByteListHolder bytes;
  485. if (index < EMPTY_BYTELISTS.length && (bytes = EMPTY_BYTELISTS[index]) != null) {
  486. return bytes;
  487. }
  488. return prepareEmptyByteList(enc);
  489. }
  490. private static EmptyByteListHolder prepareEmptyByteList(Encoding enc) {
  491. int index = enc.getIndex();
  492. if (index >= EMPTY_BYTELISTS.length) {
  493. EmptyByteListHolder tmp[] = new EmptyByteListHolder[index + 4];
  494. System.arraycopy(EMPTY_BYTELISTS,0, tmp, 0, EMPTY_BYTELISTS.length);
  495. EMPTY_BYTELISTS = tmp;
  496. }
  497. return EMPTY_BYTELISTS[index] = new EmptyByteListHolder(enc);
  498. }
  499. public static RubyString newEmptyString(Ruby runtime, RubyClass metaClass, Encoding enc) {
  500. EmptyByteListHolder holder = getEmptyByteList(enc);
  501. RubyString empty = new RubyString(runtime, metaClass, holder.bytes, holder.cr);
  502. empty.shareLevel = SHARE_LEVEL_BYTELIST;
  503. return empty;
  504. }
  505. public static RubyString newEmptyString(Ruby runtime, Encoding enc) {
  506. return newEmptyString(runtime, runtime.getString(), enc);
  507. }
  508. public static RubyString newStringNoCopy(Ruby runtime, RubyClass clazz, ByteList bytes, Encoding enc, int cr) {
  509. return new RubyString(runtime, clazz, bytes, enc, cr);
  510. }
  511. public static RubyString newStringNoCopy(Ruby runtime, ByteList bytes, Encoding enc, int cr) {
  512. return newStringNoCopy(runtime, runtime.getString(), bytes, enc, cr);
  513. }
  514. public static RubyString newUsAsciiStringNoCopy(Ruby runtime, ByteList bytes) {
  515. return newStringNoCopy(runtime, bytes, USASCIIEncoding.INSTANCE, CR_7BIT);
  516. }
  517. public static RubyString newUsAsciiStringShared(Ruby runtime, ByteList bytes) {
  518. RubyString str = newStringNoCopy(runtime, bytes, USASCIIEncoding.INSTANCE, CR_7BIT);
  519. str.shareLevel = SHARE_LEVEL_BYTELIST;
  520. return str;
  521. }
  522. public static RubyString newUsAsciiStringShared(Ruby runtime, byte[] bytes, int start, int length) {
  523. byte[] copy = new byte[length];
  524. System.arraycopy(bytes, start, copy, 0, length);
  525. return newUsAsciiStringShared(runtime, new ByteList(copy, false));
  526. }
  527. @Override
  528. public int getNativeTypeIndex() {
  529. return ClassIndex.STRING;
  530. }
  531. @Override
  532. public Class getJavaClass() {
  533. return String.class;
  534. }
  535. @Override
  536. public RubyString convertToString() {
  537. return this;
  538. }
  539. @Override
  540. public String toString() {
  541. return decodeString();
  542. }
  543. /**
  544. * Convert this Ruby string to a Java String. This version is encoding-aware.
  545. *
  546. * @return A decoded Java String, based on this Ruby string's encoding.
  547. */
  548. public String decodeString() {
  549. try {
  550. // 1.9 support for encodings
  551. // TODO: Fix charset use for JRUBY-4553
  552. if (getRuntime().is1_9()) {
  553. return new String(value.getUnsafeBytes(), value.begin(), value.length(), getEncoding().toString());
  554. }
  555. return RubyEncoding.decodeUTF8(value.getUnsafeBytes(), value.begin(), value.length());
  556. } catch (UnsupportedEncodingException uee) {
  557. return value.toString();
  558. }
  559. }
  560. /** rb_str_dup
  561. *
  562. */
  563. @Deprecated
  564. public final RubyString strDup() {
  565. return strDup(getRuntime(), getMetaClass());
  566. }
  567. public final RubyString strDup(Ruby runtime) {
  568. return strDup(runtime, getMetaClass());
  569. }
  570. @Deprecated
  571. final RubyString strDup(RubyClass clazz) {
  572. return strDup(getRuntime(), getMetaClass());
  573. }
  574. final RubyString strDup(Ruby runtime, RubyClass clazz) {
  575. shareLevel = SHARE_LEVEL_BYTELIST;
  576. RubyString dup = new RubyString(runtime, clazz, value);
  577. dup.shareLevel = SHARE_LEVEL_BYTELIST;
  578. dup.flags |= flags & (CR_MASK | TAINTED_F | UNTRUSTED_F);
  579. return dup;
  580. }
  581. /* rb_str_subseq */
  582. public final RubyString makeSharedString(Ruby runtime, int index, int len) {
  583. return makeShared(runtime, runtime.getString(), index, len);
  584. }
  585. public final RubyString makeShared(Ruby runtime, int index, int len) {
  586. return makeShared(runtime, getType(), index, len);
  587. }
  588. public final RubyString makeShared(Ruby runtime, RubyClass meta, int index, int len) {
  589. final RubyString shared;
  590. if (len == 0) {
  591. shared = newEmptyString(runtime, meta);
  592. } else if (len == 1) {
  593. shared = newStringShared(runtime, meta,
  594. RubyInteger.SINGLE_CHAR_BYTELISTS[value.getUnsafeBytes()[value.getBegin() + index] & 0xff]);
  595. } else {
  596. if (shareLevel == SHARE_LEVEL_NONE) shareLevel = SHARE_LEVEL_BUFFER;
  597. shared = new RubyString(runtime, meta, value.makeShared(index, len));
  598. shared.shareLevel = SHARE_LEVEL_BUFFER;
  599. }
  600. shared.infectBy(this);
  601. return shared;
  602. }
  603. public final RubyString makeShared19(Ruby runtime, int index, int len) {
  604. return makeShared19(runtime, value, index, len);
  605. }
  606. private RubyString makeShared19(Ruby runtime, ByteList value, int index, int len) {
  607. final RubyString shared;
  608. Encoding enc = value.getEncoding();
  609. RubyClass meta = getType();
  610. if (len == 0) {
  611. shared = newEmptyString(runtime, meta, enc);
  612. } else {
  613. if (shareLevel == SHARE_LEVEL_NONE) shareLevel = SHARE_LEVEL_BUFFER;
  614. shared = new RubyString(runtime, meta, value.makeShared(index, len));
  615. shared.shareLevel = SHARE_LEVEL_BUFFER;
  616. shared.copyCodeRangeForSubstr(this, enc); // no need to assign encoding, same bytelist shared
  617. }
  618. shared.infectBy(this);
  619. return shared;
  620. }
  621. final void modifyCheck() {
  622. frozenCheck();
  623. if (!isTaint() && getRuntime().getSafeLevel() >= 4) {
  624. throw getRuntime().newSecurityError("Insecure: can't modify string");
  625. }
  626. }
  627. private final void modifyCheck(byte[] b, int len) {
  628. if (value.getUnsafeBytes() != b || value.getRealSize() != len) throw getRuntime().newRuntimeError("string modified");
  629. }
  630. private final void modifyCheck(byte[] b, int len, Encoding enc) {
  631. if (value.getUnsafeBytes() != b || value.getRealSize() != len || value.getEncoding() != enc) throw getRuntime().newRuntimeError("string modified");
  632. }
  633. private void frozenCheck() {
  634. frozenCheck(false);
  635. }
  636. private void frozenCheck(boolean runtimeError) {
  637. if (isFrozen()) throw getRuntime().newFrozenError("string", runtimeError);
  638. }
  639. /** rb_str_modify
  640. *
  641. */
  642. public final void modify() {
  643. modifyCheck();
  644. if (shareLevel != SHARE_LEVEL_NONE) {
  645. if (shareLevel == SHARE_LEVEL_BYTELIST) {
  646. value = value.dup();
  647. } else {
  648. value.unshare();
  649. }
  650. shareLevel = SHARE_LEVEL_NONE;
  651. }
  652. value.invalidate();
  653. }
  654. public final void modify19() {
  655. modify();
  656. clearCodeRange();
  657. }
  658. private void modifyAndKeepCodeRange() {
  659. modify();
  660. keepCodeRange();
  661. }
  662. /** rb_str_modify (with length bytes ensured)
  663. *
  664. */
  665. public final void modify(int length) {
  666. modifyCheck();
  667. if (shareLevel != SHARE_LEVEL_NONE) {
  668. if (shareLevel == SHARE_LEVEL_BYTELIST) {
  669. value = value.dup(length);
  670. } else {
  671. value.unshare(length);
  672. }
  673. shareLevel = SHARE_LEVEL_NONE;
  674. } else {
  675. value.ensure(length);
  676. }
  677. value.invalidate();
  678. }
  679. public final void modify19(int length) {
  680. modify(length);
  681. clearCodeRange();
  682. }
  683. /** rb_str_resize
  684. */
  685. public final void resize(int length) {
  686. modify();
  687. if (value.getRealSize() > length) {
  688. value.setRealSize(length);
  689. } else if (value.length() < length) {
  690. value.length(length);
  691. }
  692. }
  693. final void view(ByteList bytes) {
  694. modifyCheck();
  695. value = bytes;
  696. shareLevel = SHARE_LEVEL_NONE;
  697. }
  698. private final void view(byte[]bytes) {
  699. modifyCheck();
  700. value.replace(bytes);
  701. shareLevel = SHARE_LEVEL_NONE;
  702. value.invalidate();
  703. }
  704. private final void view(int index, int len) {
  705. modifyCheck();
  706. if (shareLevel != SHARE_LEVEL_NONE) {
  707. if (shareLevel == SHARE_LEVEL_BYTELIST) {
  708. // if len == 0 then shared empty
  709. value = value.makeShared(index, len);
  710. shareLevel = SHARE_LEVEL_BUFFER;
  711. } else {
  712. value.view(index, len);
  713. }
  714. } else {
  715. value.view(index, len);
  716. // FIXME this below is temporary, but its much safer for COW (it prevents not shared Strings with begin != 0)
  717. // this allows now e.g.: ByteList#set not to be begin aware
  718. shareLevel = SHARE_LEVEL_BUFFER;
  719. }
  720. value.invalidate();
  721. }
  722. public static String bytesToString(byte[] bytes, int beg, int len) {
  723. return new String(ByteList.plain(bytes, beg, len));
  724. }
  725. public static String byteListToString(ByteList bytes) {
  726. return bytesToString(bytes.getUnsafeBytes(), bytes.begin(), bytes.length());
  727. }
  728. public static String bytesToString(byte[] bytes) {
  729. return bytesToString(bytes, 0, bytes.length);
  730. }
  731. public static byte[] stringToBytes(String string) {
  732. return ByteList.plain(string);
  733. }
  734. @Override
  735. public RubyString asString() {
  736. return this;
  737. }
  738. @Override
  739. public IRubyObject checkStringType() {
  740. return this;
  741. }
  742. @Override
  743. public IRubyObject checkStringType19() {
  744. return this;
  745. }
  746. @JRubyMethod(name = "try_convert", meta = true, compat = RUBY1_9)
  747. public static IRubyObject try_convert(ThreadContext context, IRubyObject recv, IRubyObject str) {
  748. return str.checkStringType();
  749. }
  750. @JRubyMethod(name = {"to_s", "to_str"})
  751. @Override
  752. public IRubyObject to_s() {
  753. Ruby runtime = getRuntime();
  754. if (getMetaClass().getRealClass() != runtime.getString()) {
  755. return strDup(runtime, runtime.getString());
  756. }
  757. return this;
  758. }
  759. @Override
  760. public final int compareTo(IRubyObject other) {
  761. Ruby runtime = getRuntime();
  762. if (other instanceof RubyString) {
  763. RubyString otherString = (RubyString)other;
  764. return runtime.is1_9() ? op_cmp19(otherString) : op_cmp(otherString);
  765. }
  766. return (int)op_cmpCommon(runtime.getCurrentContext(), other).convertToInteger().getLongValue();
  767. }
  768. /* rb_str_cmp_m */
  769. @JRubyMethod(name = "<=>", compat = RUBY1_8)
  770. public IRubyObject op_cmp(ThreadContext context, IRubyObject other) {
  771. if (other instanceof RubyString) {
  772. return context.getRuntime().newFixnum(op_cmp((RubyString)other));
  773. }
  774. return op_cmpCommon(context, other);
  775. }
  776. @JRubyMethod(name = "<=>", compat = RUBY1_9)
  777. public IRubyObject op_cmp19(ThreadContext context, IRubyObject other) {
  778. if (other instanceof RubyString) {
  779. return context.getRuntime().newFixnum(op_cmp19((RubyString)other));
  780. }
  781. return op_cmpCommon(context, other);
  782. }
  783. private IRubyObject op_cmpCommon(ThreadContext context, IRubyObject other) {
  784. Ruby runtime = context.getRuntime();
  785. // deal with case when "other" is not a string
  786. if (other.respondsTo("to_str") && other.respondsTo("<=>")) {
  787. IRubyObject result = other.callMethod(context, "<=>", this);
  788. if (result.isNil()) return result;
  789. if (result instanceof RubyFixnum) {
  790. return RubyFixnum.newFixnum(runtime, -((RubyFixnum)result).getLongValue());
  791. } else {
  792. return RubyFixnum.zero(runtime).callMethod(context, "-", result);
  793. }
  794. }
  795. return runtime.getNil();
  796. }
  797. /** rb_str_equal
  798. *
  799. */
  800. @JRubyMethod(name = "==", compat = RUBY1_8)
  801. @Override
  802. public IRubyObject op_equal(ThreadContext context, IRubyObject other) {
  803. Ruby runtime = context.getRuntime();
  804. if (this == other) return runtime.getTrue();
  805. if (other instanceof RubyString) {
  806. return value.equal(((RubyString)other).value) ? runtime.getTrue() : runtime.getFalse();
  807. }
  808. return op_equalCommon(context, other);
  809. }
  810. @JRubyMethod(name = "==", compat = RUBY1_9)
  811. public IRubyObject op_equal19(ThreadContext context, IRubyObject other) {
  812. Ruby runtime = context.getRuntime();
  813. if (this == other) return runtime.getTrue();
  814. if (other instanceof RubyString) {
  815. RubyString otherString = (RubyString)other;
  816. return isComparableWith(otherString) && value.equal(otherString.value) ? runtime.getTrue() : runtime.getFalse();
  817. }
  818. return op_equalCommon(context, other);
  819. }
  820. private IRubyObject op_equalCommon(ThreadContext context, IRubyObject other) {
  821. Ruby runtime = context.getRuntime();
  822. if (!other.respondsTo("to_str")) return runtime.getFalse();
  823. return other.callMethod(context, "==", this).isTrue() ? runtime.getTrue() : runtime.getFalse();
  824. }
  825. @JRubyMethod(name = "+", required = 1, compat = RUBY1_8, argTypes = RubyString.class)
  826. public IRubyObject op_plus(ThreadContext context, RubyString str) {
  827. RubyString resultStr = newString(context.getRuntime(), addByteLists(value, str.value));
  828. resultStr.infectBy(flags | str.flags);
  829. return resultStr;
  830. }
  831. public IRubyObject op_plus(ThreadContext context, IRubyObject other) {
  832. return op_plus(context, other.convertToString());
  833. }
  834. @JRubyMethod(name = "+", required = 1, compat = RUBY1_9)
  835. public IRubyObject op_plus19(ThreadContext context, RubyString str) {
  836. Encoding enc = checkEncoding(str);
  837. RubyString resultStr = newStringNoCopy(context.getRuntime(), addByteLists(value, str.value),
  838. enc, codeRangeAnd(getCodeRange(), str.getCodeRange()));
  839. resultStr.infectBy(flags | str.flags);
  840. return resultStr;
  841. }
  842. public IRubyObject op_plus19(ThreadContext context, IRubyObject other) {
  843. return op_plus19(context, other.convertToString());
  844. }
  845. private ByteList addByteLists(ByteList value1, ByteList value2) {
  846. ByteList result = new ByteList(value1.getRealSize() + value2.getRealSize());
  847. result.setRealSize(value1.getRealSize() + value2.getRealSize());
  848. System.arraycopy(value1.getUnsafeBytes(), value1.getBegin(), result.getUnsafeBytes(), 0, value1.getRealSize());
  849. System.arraycopy(value2.getUnsafeBytes(), value2.getBegin(), result.getUnsafeBytes(), value1.getRealSize(), value2.getRealSize());
  850. return result;
  851. }
  852. @JRubyMethod(name = "*", required = 1, compat = RUBY1_8)
  853. public IRubyObject op_mul(ThreadContext context, IRubyObject other) {
  854. return multiplyByteList(context, other);
  855. }
  856. @JRubyMethod(name = "*", required = 1, compat = RUBY1_9)
  857. public IRubyObject op_mul19(ThreadContext context, IRubyObject other) {
  858. RubyString result = multiplyByteList(context, other);
  859. result.value.setEncoding(value.getEncoding());
  860. result.copyCodeRange(this);
  861. return result;
  862. }
  863. private RubyString multiplyByteList(ThreadContext context, IRubyObject arg) {
  864. int len = RubyNumeric.num2int(arg);
  865. if (len < 0) throw context.getRuntime().newArgumentError("negative argument");
  866. // we limit to int because ByteBuffer can only allocate int sizes
  867. if (len > 0 && Integer.MAX_VALUE / len < value.getRealSize()) {
  868. throw context.getRuntime().newArgumentError("argument too big");
  869. }
  870. ByteList bytes = new ByteList(len *= value.getRealSize());
  871. if (len > 0) {
  872. bytes.setRealSize(len);
  873. int n = value.getRealSize();
  874. System.arraycopy(value.getUnsafeBytes(), value.getBegin(), bytes.getUnsafeBytes(), 0, n);
  875. while (n <= len >> 1) {
  876. System.arraycopy(bytes.getUnsafeBytes(), 0, bytes.getUnsafeBytes(), n, n);
  877. n <<= 1;
  878. }
  879. System.arraycopy(bytes.getUnsafeBytes(), 0, bytes.getUnsafeBytes(), n, len - n);
  880. }
  881. RubyString result = new RubyString(context.getRuntime(), getMetaClass(), bytes);
  882. result.infectBy(this);
  883. return result;
  884. }
  885. @JRubyMethod(name = "%", required = 1)
  886. public IRubyObject op_format(ThreadContext context, IRubyObject arg) {
  887. return opFormatCommon(context, arg, context.getRuntime().getInstanceConfig().getCompatVersion());
  888. }
  889. private IRubyObject opFormatCommon(ThreadContext context, IRubyObject arg, CompatVersion compat) {
  890. IRubyObject tmp = arg.checkArrayType();
  891. if (tmp.isNil()) tmp = arg;
  892. // FIXME: Should we make this work with platform's locale,
  893. // or continue hardcoding US?
  894. ByteList out = new ByteList(value.getRealSize());
  895. boolean tainted;
  896. switch (compat) {
  897. case RUBY1_8:
  898. tainted = Sprintf.sprintf(out, Locale.US, value, tmp);
  899. break;
  900. case RUBY1_9:
  901. tainted = Sprintf.sprintf1_9(out, Locale.US, value, tmp);
  902. break;
  903. default:
  904. throw new RuntimeException("invalid compat version for sprintf: " + compat);
  905. }
  906. RubyString str = newString(context.getRuntime(), out);
  907. str.setTaint(tainted || isTaint());
  908. return str;
  909. }
  910. @JRubyMethod(name = "hash")
  911. @Override
  912. public RubyFixnum hash() {
  913. Ruby runtime = getRuntime();
  914. return RubyFixnum.newFixnum(runtime, strHashCode(runtime));
  915. }
  916. @Override
  917. public int hashCode() {
  918. return strHashCode(getRuntime());
  919. }
  920. private int strHashCode(Ruby runtime) {
  921. if (runtime.is1_9()) {
  922. return value.hashCode() ^ (value.getEncoding().isAsciiCompatible() && scanForCodeRange() == CR_7BIT ? 0 : value.getEncoding().getIndex());
  923. } else {
  924. return value.hashCode();
  925. }
  926. }
  927. @Override
  928. public boolean equals(Object other) {
  929. if (this == other) return true;
  930. if (other instanceof RubyString) {
  931. if (((RubyString) other).value.equal(value)) return true;
  932. }
  933. return false;
  934. }
  935. /** rb_obj_as_string
  936. *
  937. */
  938. public static RubyString objAsString(ThreadContext context, IRubyObject obj) {
  939. if (obj instanceof RubyString) return (RubyString) obj;
  940. IRubyObject str = obj.callMethod(context, "to_s");
  941. if (!(str instanceof RubyString)) return (RubyString) obj.anyToString();
  942. if (obj.isTaint()) str.setTaint(true);
  943. return (RubyString) str;
  944. }
  945. /** rb_str_cmp
  946. *
  947. */
  948. public final int op_cmp(RubyString other) {
  949. return value.cmp(other.value);
  950. }
  951. public final int op_cmp19(RubyString other) {
  952. int ret = value.cmp(other.value);
  953. if (ret == 0 && !isComparableWith(other)) {
  954. return value.getEncoding().getIndex() > other.value.getEncoding().getIndex() ? 1 : -1;
  955. }
  956. return ret;
  957. }
  958. /** rb_to_id
  959. *
  960. */
  961. @Override
  962. public String asJavaString() {
  963. return toString();
  964. }
  965. public IRubyObject doClone(){
  966. return newString(getRuntime(), value.dup());
  967. }
  968. public final RubyString cat(byte[] str) {
  969. modify(value.getRealSize() + str.length);
  970. System.arraycopy(str, 0, value.getUnsafeBytes(), value.getBegin() + value.getRealSize(), str.length);
  971. value.setRealSize(value.getRealSize() + str.length);
  972. return this;
  973. }
  974. public final RubyString cat(byte[] str, int beg, int len) {
  975. modify(value.getRealSize() + len);
  976. System.arraycopy(str, beg, value.getUnsafeBytes(), value.getBegin() + value.getRealSize(), len);
  977. value.setRealSize(value.getRealSize() + len);
  978. return this;
  979. }
  980. // // rb_str_buf_append
  981. public final RubyString cat19(RubyString str) {
  982. ByteList strValue = str.value;
  983. int strCr = str.getCodeRange();
  984. strCr = cat(strValue.getUnsafeBytes(), strValue.getBegin(), strValue.getRealSize(), strValue.getEncoding(), strCr, strCr);
  985. infectBy(str);
  986. str.setCodeRange(strCr);
  987. return this;
  988. }
  989. public final RubyString cat(ByteList str) {
  990. modify(value.getRealSize() + str.getRealSize());
  991. System.arraycopy(str.getUnsafeBytes(), str.getBegin(), value.getUnsafeBytes(), value.getBegin() + value.getRealSize(), str.getRealSize());
  992. value.setRealSize(value.getRealSize() + str.getRealSize());
  993. return this;
  994. }
  995. public final RubyString cat(byte ch) {
  996. modify(value.getRealSize() + 1);
  997. value.getUnsafeBytes()[value.getBegin() + value.getRealSize()] = ch;
  998. value.setRealSize(value.getRealSize() + 1);
  999. return this;
  1000. }
  1001. public final RubyString cat(int ch) {
  1002. return cat((byte)ch);
  1003. }
  1004. public final RubyString cat(int code, Encoding enc) {
  1005. int n = codeLength(getRuntime(), enc, code);
  1006. modify(value.getRealSize() + n);
  1007. enc.codeToMbc(code, value.getUnsafeBytes(), value.getBegin() + value.getRealSize());
  1008. value.setRealSize(value.getRealSize() + n);
  1009. return this;
  1010. }
  1011. public final int cat(byte[]bytes, int p, int len, Encoding enc, int cr, int cr2) {
  1012. modify(value.getRealSize() + len);
  1013. int toCr = getCodeRange();
  1014. Encoding toEnc = value.getEncoding();
  1015. if (toEnc == enc) {
  1016. if (toCr == CR_UNKNOWN || (toEnc == ASCIIEncoding.INSTANCE && toCr != CR_7BIT)) {
  1017. cr = CR_UNKNOWN;
  1018. } else if (cr == CR_UNKNOWN) {
  1019. cr = codeRangeScan(enc, bytes, p, len);
  1020. }
  1021. } else {
  1022. if (!toEnc.isAsciiCompatible() || !enc.isAsciiCompatible()) {
  1023. if (len == 0) return cr2;
  1024. if (value.getRealSize() == 0) {
  1025. System.arraycopy(bytes, p, value.getUnsafeBytes(), value.getBegin() + value.getRealSize(), len);
  1026. value.setRealSize(value.getRealSize() + len);
  1027. setEncodingAndCodeRange(enc, cr);
  1028. return cr2;
  1029. }
  1030. throw getRuntime().newEncodingCompatibilityError("incompatible character encodings: " + toEnc + " and " + enc);
  1031. }
  1032. if (cr == CR_UNKNOWN) cr = codeRangeScan(enc, bytes, p, len);
  1033. if (toCr == CR_UNKNOWN) {
  1034. if (toEnc == ASCIIEncoding.INSTANCE || cr != CR_7BIT) toCr = scanForCodeRange();
  1035. }
  1036. }
  1037. if (cr2 != 0) cr2 = cr;
  1038. if (toEnc != enc && toCr != CR_7BIT && cr != CR_7BIT) {
  1039. throw getRuntime().newEncodingCompatibilityError("incompatible character encodings: " + toEnc + " and " + enc);
  1040. }
  1041. final int resCr;
  1042. final Encoding resEnc;
  1043. if (toCr == CR_UNKNOWN) {
  1044. resEnc = toEnc;
  1045. resCr = CR_UNKNOWN;
  1046. } else if (toCr == CR_7BIT) {
  1047. if (cr == CR_7BIT) {
  1048. resEnc = toEnc == ASCIIEncoding.INSTANCE ? toEnc : enc;
  1049. resCr = CR_7BIT;
  1050. } else {
  1051. resEnc = enc;
  1052. resCr = cr;
  1053. }
  1054. } else if (toCr == CR_VALID) {
  1055. resEnc = toEnc;
  1056. resCr = toCr;
  1057. } else {
  1058. resEnc = toEnc;
  1059. resCr = len > 0 ? CR_UNKNOWN : toCr;
  1060. }
  1061. if (len < 0) throw getRuntime().newArgumentError("negative string size (or size too big)");
  1062. System.arraycopy(bytes, p, value.getUnsafeBytes(), value.getBegin() + value.getRealSize(), len);
  1063. value.setRealSize(value.getRealSize() + len);
  1064. setEncodingAndCodeRange(resEnc, resCr);
  1065. return cr2;
  1066. }
  1067. public final int cat(byte[]bytes, int p, int len, Encoding enc) {
  1068. return cat(bytes, p, len, enc, CR_UNKNOWN, 0);
  1069. }
  1070. public final RubyString catAscii(byte[]bytes, int p, int len) {
  1071. Encoding enc = value.getEncoding();
  1072. if (enc.isAsciiCompatible()) {
  1073. cat(bytes, p, len, enc, CR_7BIT, 0);
  1074. } else {
  1075. byte buf[] = new byte[enc.maxLength()];
  1076. int end = p + len;
  1077. while (p < end) {
  1078. int c = bytes[p];
  1079. int cl = codeLength(getRuntime(), enc, c);
  1080. enc.codeToMbc(c, buf, 0);
  1081. cat(buf, 0, cl, enc, CR_VALID, 0);
  1082. p++;
  1083. }
  1084. }
  1085. return this;
  1086. }
  1087. /** rb_str_replace_m
  1088. *
  1089. */
  1090. @JRubyMethod(name = {"replace", "initialize_copy"}, required = 1, compat = RUBY1_8)
  1091. public IRubyObject replace(IRubyObject other) {
  1092. if (this == other) return this;
  1093. replaceCommon(other);
  1094. return this;
  1095. }
  1096. @JRubyMethod(name = {"replace", "initialize_copy"}, required = 1, compat = RUBY1_9)
  1097. public RubyString replace19(IRubyObject other) {
  1098. modifyCheck();
  1099. if (this == other) return this;
  1100. setCodeRange(replaceCommon(other).getCodeRange()); // encoding doesn't have to be copied.
  1101. return this;
  1102. }
  1103. private RubyString replaceCommon(IRubyObject other) {
  1104. modifyCheck();
  1105. RubyString otherStr = other.convertToString();
  1106. otherStr.shareLevel = shareLevel = SHARE_LEVEL_BYTELIST;
  1107. value = otherStr.value;
  1108. infectBy(otherStr);
  1109. return otherStr;
  1110. }
  1111. @JRubyMethod(name = "clear", compat = RUBY1_9)
  1112. public RubyString clear() {
  1113. modifyCheck();
  1114. Encoding enc = value.getEncoding();
  1115. EmptyByteListHolder holder = getEmptyByteList(enc);
  1116. value = holder.bytes;
  1117. shareLevel = SHARE_LEVEL_BYTELIST;
  1118. setCodeRange(holder.cr);
  1119. return this;
  1120. }
  1121. @JRubyMethod(name = "reverse", compat = RUBY1_8)
  1122. public IRubyObject reverse(ThreadContext context) {
  1123. Ruby runtime = context.getRuntime();
  1124. if (value.getRealSize() <= 1) return strDup(context.getRuntime());
  1125. byte[]bytes = value.getUnsafeBytes();
  1126. int p = value.getBegin();
  1127. int len = value.getRealSize();
  1128. byte[]obytes = new byte[len];
  1129. for (int i = 0; i <= len >> 1; i++) {
  1130. obytes[i] = bytes[p + len - i - 1];
  1131. obytes[len - i - 1] = bytes[p + i];
  1132. }
  1133. return new RubyString(runtime, getMetaClass(), new ByteList(obytes, false)).infectBy(this);
  1134. }
  1135. @JRubyMethod(name = "reverse", compat = RUBY1_9)
  1136. public IRubyObject reverse19(ThreadContext context) {
  1137. Ruby runtime = context.getRuntime();
  1138. if (value.getRealSize() <= 1) return strDup(context.getRuntime());
  1139. byte[]bytes = value.getUnsafeBytes();
  1140. int p = value.getBegin();
  1141. int len = value.getRealSize();
  1142. byte[]obytes = new byte[len];
  1143. boolean single = true;
  1144. Encoding enc = value.getEncoding();
  1145. // this really needs to be inlined here
  1146. if (singleByteOptimizable(enc)) {
  1147. for (int i = 0; i <= len >> 1; i++) {
  1148. obytes[i] = bytes[p + len - i - 1];
  1149. obytes[len - i - 1] = bytes[p + i];
  1150. }
  1151. } else {
  1152. int end = p + len;
  1153. int op = len;
  1154. while (p < end) {
  1155. int cl = StringSupport.length(enc, bytes, p, end);
  1156. if (cl > 1 || (bytes[p] & 0x80) != 0) {
  1157. single = false;
  1158. op -= cl;
  1159. System.arraycopy(bytes, p, obytes, op, cl);
  1160. p += cl;
  1161. } else {
  1162. obytes[--op] = bytes[p++];
  1163. }
  1164. }
  1165. }
  1166. RubyString result = new RubyString(runtime, getMetaClass(), new ByteList(obytes, false));
  1167. if (getCodeRange() == CR_UNKNOWN) setCodeRange(single ? CR_7BIT : CR_VALID);
  1168. Encoding encoding = value.getEncoding();
  1169. result.value.setEncoding(encoding);
  1170. result.copyCodeRangeForSubstr(this, encoding);
  1171. return result.infectBy(this);
  1172. }
  1173. @JRubyMethod(name = "reverse!", compat = RUBY1_8)
  1174. public RubyString reverse_bang(ThreadContext context)

Large files files are truncated, but you can click here to view the full file