PageRenderTime 67ms CodeModel.GetById 20ms RepoModel.GetById 0ms app.codeStats 1ms

/Languages/IronPython/IronPython/Runtime/Operations/StringOps.cs

http://github.com/IronLanguages/main
C# | 2751 lines | 2393 code | 267 blank | 91 comment | 426 complexity | 95b482bb7d0d13067ccac8e62c9c8e63 MD5 | raw file
Possible License(s): CPL-1.0, BSD-3-Clause, ISC, GPL-2.0, MPL-2.0-no-copyleft-exception
  1. /* ****************************************************************************
  2. *
  3. * Copyright (c) Microsoft Corporation.
  4. *
  5. * This source code is subject to terms and conditions of the Apache License, Version 2.0. A
  6. * copy of the license can be found in the License.html file at the root of this distribution. If
  7. * you cannot locate the Apache License, Version 2.0, please send an email to
  8. * dlr@microsoft.com. By using this source code in any fashion, you are agreeing to be bound
  9. * by the terms of the Apache License, Version 2.0.
  10. *
  11. * You must not remove this notice, or any other, from this software.
  12. *
  13. *
  14. * ***************************************************************************/
  15. using System;
  16. using System.Collections;
  17. using System.Collections.Generic;
  18. using System.Diagnostics;
  19. using System.Globalization;
  20. using System.Reflection;
  21. using System.Runtime.InteropServices;
  22. using System.Text;
  23. using IronPython.Runtime.Exceptions;
  24. using IronPython.Runtime.Types;
  25. using Microsoft.Scripting;
  26. using Microsoft.Scripting.Runtime;
  27. using Microsoft.Scripting.Utils;
  28. #if FEATURE_NUMERICS
  29. using System.Numerics;
  30. #else
  31. using Microsoft.Scripting.Math;
  32. #endif
  33. using SpecialNameAttribute = System.Runtime.CompilerServices.SpecialNameAttribute;
  34. namespace IronPython.Runtime.Operations {
  35. /// <summary>
  36. /// ExtensibleString is the base class that is used for types the user defines
  37. /// that derive from string. It carries along with it the string's value and
  38. /// our converter recognizes it as a string.
  39. /// </summary>
  40. public class ExtensibleString : Extensible<string>, ICodeFormattable, IStructuralEquatable
  41. #if CLR2
  42. , IValueEquality
  43. #endif
  44. {
  45. public ExtensibleString() : base(String.Empty) { }
  46. public ExtensibleString(string self) : base(self) { }
  47. public override string ToString() {
  48. return Value;
  49. }
  50. #region ICodeFormattable Members
  51. public virtual string/*!*/ __repr__(CodeContext/*!*/ context) {
  52. return StringOps.Quote(Value);
  53. }
  54. #endregion
  55. [return: MaybeNotImplemented]
  56. public object __eq__(object other) {
  57. if (other is string || other is ExtensibleString || other is Bytes) {
  58. return ScriptingRuntimeHelpers.BooleanToObject(EqualsWorker(other));
  59. }
  60. return NotImplementedType.Value;
  61. }
  62. [return: MaybeNotImplemented]
  63. public object __ne__(object other) {
  64. if (other is string || other is ExtensibleString || other is Bytes) {
  65. return ScriptingRuntimeHelpers.BooleanToObject(!EqualsWorker(other));
  66. }
  67. return NotImplementedType.Value;
  68. }
  69. #region IValueEquality members
  70. #if CLR2
  71. int IValueEquality.GetValueHashCode() {
  72. return GetHashCode();
  73. }
  74. bool IValueEquality.ValueEquals(object other) {
  75. return EqualsWorker(other);
  76. }
  77. #endif
  78. #endregion
  79. #region IStructuralEquatable Members
  80. int IStructuralEquatable.GetHashCode(IEqualityComparer comparer) {
  81. if (comparer is PythonContext.PythonEqualityComparer) {
  82. return GetHashCode();
  83. }
  84. return ((IStructuralEquatable)PythonTuple.MakeTuple(Value.ToCharArray())).GetHashCode(comparer);
  85. }
  86. bool IStructuralEquatable.Equals(object other, IEqualityComparer comparer) {
  87. if (comparer is PythonContext.PythonEqualityComparer) {
  88. return EqualsWorker(other);
  89. }
  90. ExtensibleString es = other as ExtensibleString;
  91. if (es != null) return EqualsWorker(es.Value, comparer);
  92. string os = other as string;
  93. if (os != null) return EqualsWorker(os, comparer);
  94. Bytes tempBytes = other as Bytes;
  95. if (tempBytes != null) return EqualsWorker(tempBytes.ToString(), comparer);
  96. return false;
  97. }
  98. private bool EqualsWorker(object other) {
  99. if (other == null) return false;
  100. ExtensibleString es = other as ExtensibleString;
  101. if (es != null) return Value == es.Value;
  102. string os = other as string;
  103. if (os != null) return Value == os;
  104. Bytes tempBytes = other as Bytes;
  105. if (tempBytes != null) return Value == tempBytes.ToString();
  106. return false;
  107. }
  108. private bool EqualsWorker(string/*!*/ other, IEqualityComparer comparer) {
  109. Debug.Assert(other != null);
  110. if (Value.Length != other.Length) {
  111. return false;
  112. } else if (Value.Length == 0) {
  113. // 2 empty strings are equal
  114. return true;
  115. }
  116. for (int i = 0; i < Value.Length; i++) {
  117. if (!comparer.Equals(Value[i], other[i])) {
  118. return false;
  119. }
  120. }
  121. return true;
  122. }
  123. #endregion
  124. #region ISequence Members
  125. public virtual object this[int index] {
  126. get { return ScriptingRuntimeHelpers.CharToString(Value[index]); }
  127. }
  128. public object this[Slice slice] {
  129. get { return StringOps.GetItem(Value, slice); }
  130. }
  131. public object __getslice__(int start, int stop) {
  132. return StringOps.__getslice__(Value, start, stop);
  133. }
  134. #endregion
  135. #region IPythonContainer Members
  136. public virtual int __len__() {
  137. return Value.Length;
  138. }
  139. public virtual bool __contains__(object value) {
  140. if (value is string) return Value.Contains((string)value);
  141. else if (value is ExtensibleString) return Value.Contains(((ExtensibleString)value).Value);
  142. else if (value is Bytes) return Value.Contains(value.ToString());
  143. throw PythonOps.TypeErrorForBadInstance("expected string, got {0}", value);
  144. }
  145. #endregion
  146. }
  147. /// <summary>
  148. /// StringOps is the static class that contains the methods defined on strings, i.e. 'abc'
  149. ///
  150. /// Here we define all of the methods that a Python user would see when doing dir('abc').
  151. /// If the user is running in a CLS aware context they will also see all of the methods
  152. /// defined in the CLS System.String type.
  153. /// </summary>
  154. public static class StringOps {
  155. internal const int LowestUnicodeValue = 0x7f;
  156. internal static object FastNew(CodeContext/*!*/ context, object x) {
  157. if (x == null) {
  158. return "None";
  159. }
  160. string xstr = (x as string);
  161. if (xstr != null) {
  162. return xstr;
  163. }
  164. // we don't invoke PythonOps.StringRepr here because we want to return the
  165. // Extensible<string> directly back if that's what we received from __str__.
  166. object value = PythonContext.InvokeUnaryOperator(context, UnaryOperators.String, x);
  167. if (value is string || value is Extensible<string>) {
  168. return value;
  169. }
  170. throw PythonOps.TypeError("expected str, got {0} from __str__", DynamicHelpers.GetPythonType(value).Name);
  171. }
  172. internal static string FastNewUnicode(CodeContext context, object value, object encoding, object errors) {
  173. string strErrors = errors as string;
  174. if (strErrors == null) {
  175. throw PythonOps.TypeError("unicode() argument 3 must be string, not {0}", PythonTypeOps.GetName(errors));
  176. }
  177. if (value != null) {
  178. string strValue = value as string;
  179. if (strValue != null) {
  180. return StringOps.RawDecode(context, strValue, encoding, strErrors);
  181. }
  182. Extensible<string> es = value as Extensible<string>;
  183. if (es != null) {
  184. return StringOps.RawDecode(context, es.Value, encoding, strErrors);
  185. }
  186. Bytes bytes = value as Bytes;
  187. if (bytes != null) {
  188. return StringOps.RawDecode(context, bytes.ToString(), encoding, strErrors);
  189. }
  190. PythonBuffer buffer = value as PythonBuffer;
  191. if (buffer != null) {
  192. return StringOps.RawDecode(context, buffer.ToString(), encoding, strErrors);
  193. }
  194. }
  195. throw PythonOps.TypeError("coercing to Unicode: need string or buffer, {0} found", PythonTypeOps.GetName(value));
  196. }
  197. internal static object FastNewUnicode(CodeContext context, object value, object encoding) {
  198. return FastNewUnicode(context, value, encoding, "strict");
  199. }
  200. internal static object FastNewUnicode(CodeContext context, object value) {
  201. if (value == null) {
  202. return "None";
  203. } else if (value is string) {
  204. return value;
  205. }
  206. object res;
  207. OldInstance oi = value as OldInstance;
  208. if (oi != null &&
  209. (oi.TryGetBoundCustomMember(context, "__unicode__", out res) || oi.TryGetBoundCustomMember(context, "__str__", out res))) {
  210. res = context.LanguageContext.Call(context, res);
  211. if (res is string || res is Extensible<string>) {
  212. return res;
  213. }
  214. throw PythonOps.TypeError("coercing to Unicode: expected string, got {0}", PythonTypeOps.GetName(value));
  215. }
  216. if (PythonTypeOps.TryInvokeUnaryOperator(context, value, "__unicode__", out res) ||
  217. PythonTypeOps.TryInvokeUnaryOperator(context, value, "__str__", out res)) {
  218. if (res is string || res is Extensible<string>) {
  219. return res;
  220. }
  221. throw PythonOps.TypeError("coercing to Unicode: expected string, got {0}", PythonTypeOps.GetName(value));
  222. }
  223. return FastNewUnicode(context, value, context.LanguageContext.DefaultEncoding.WebName, "strict");
  224. }
  225. #region Python Constructors
  226. [StaticExtensionMethod]
  227. public static object __new__(CodeContext/*!*/ context, PythonType cls) {
  228. if (cls == TypeCache.String) {
  229. return "";
  230. } else {
  231. return cls.CreateInstance(context);
  232. }
  233. }
  234. [StaticExtensionMethod]
  235. public static object __new__(CodeContext/*!*/ context, PythonType cls, object @object) {
  236. if (cls == TypeCache.String) {
  237. return FastNew(context, @object);
  238. } else {
  239. return cls.CreateInstance(context, __new__(context, TypeCache.String, @object));
  240. }
  241. }
  242. [StaticExtensionMethod]
  243. public static object __new__(CodeContext/*!*/ context, PythonType cls, [NotNull]string @object) {
  244. if (cls == TypeCache.String) {
  245. return @object;
  246. } else {
  247. return cls.CreateInstance(context, @object);
  248. }
  249. }
  250. [StaticExtensionMethod]
  251. public static object __new__(CodeContext/*!*/ context, PythonType cls, [NotNull]ExtensibleString @object) {
  252. if (cls == TypeCache.String) {
  253. return FastNew(context, @object);
  254. } else {
  255. return cls.CreateInstance(context, __new__(context, TypeCache.String, @object));
  256. }
  257. }
  258. [StaticExtensionMethod]
  259. public static object __new__(CodeContext/*!*/ context, PythonType cls, char @object) {
  260. if (cls == TypeCache.String) {
  261. return ScriptingRuntimeHelpers.CharToString(@object);
  262. } else {
  263. return cls.CreateInstance(context, __new__(context, TypeCache.String, @object));
  264. }
  265. }
  266. [StaticExtensionMethod]
  267. public static object __new__(CodeContext/*!*/ context, PythonType cls, [NotNull]BigInteger @object) {
  268. if (cls == TypeCache.String) {
  269. return @object.ToString();
  270. } else {
  271. return cls.CreateInstance(context, __new__(context, TypeCache.String, @object));
  272. }
  273. }
  274. [StaticExtensionMethod]
  275. public static object __new__(CodeContext/*!*/ context, PythonType cls, [NotNull]Extensible<BigInteger> @object) {
  276. if (cls == TypeCache.String) {
  277. return FastNew(context, @object);
  278. } else {
  279. return cls.CreateInstance(context, __new__(context, TypeCache.String, @object));
  280. }
  281. }
  282. [StaticExtensionMethod]
  283. public static object __new__(CodeContext/*!*/ context, PythonType cls, int @object) {
  284. if (cls == TypeCache.String) {
  285. return @object.ToString();
  286. } else {
  287. return cls.CreateInstance(context, __new__(context, TypeCache.String, @object));
  288. }
  289. }
  290. [StaticExtensionMethod]
  291. public static object __new__(CodeContext/*!*/ context, PythonType cls, bool @object) {
  292. if (cls == TypeCache.String) {
  293. return @object.ToString();
  294. } else {
  295. return cls.CreateInstance(context, __new__(context, TypeCache.String, @object));
  296. }
  297. }
  298. [StaticExtensionMethod]
  299. public static object __new__(CodeContext/*!*/ context, PythonType cls, double @object) {
  300. if (cls == TypeCache.String) {
  301. return DoubleOps.__str__(context, @object);
  302. } else {
  303. return cls.CreateInstance(context, __new__(context, TypeCache.String, @object));
  304. }
  305. }
  306. [StaticExtensionMethod]
  307. public static object __new__(CodeContext/*!*/ context, PythonType cls, Extensible<double> @object) {
  308. if (cls == TypeCache.String) {
  309. return FastNew(context, @object);
  310. } else {
  311. return cls.CreateInstance(context, __new__(context, TypeCache.String, @object));
  312. }
  313. }
  314. [StaticExtensionMethod]
  315. public static object __new__(CodeContext/*!*/ context, PythonType cls, float @object) {
  316. if (cls == TypeCache.String) {
  317. return SingleOps.__str__(context, @object);
  318. } else {
  319. return cls.CreateInstance(context, __new__(context, TypeCache.String, @object));
  320. }
  321. }
  322. [StaticExtensionMethod]
  323. public static object __new__(CodeContext/*!*/ context, PythonType cls,
  324. object @string,
  325. [DefaultParameterValue(null)] string encoding,
  326. [DefaultParameterValue("strict")] string errors) {
  327. string str = @string as string;
  328. if (str == null) throw PythonOps.TypeError("converting to unicode: need string, got {0}", DynamicHelpers.GetPythonType(@string).Name);
  329. if (cls == TypeCache.String) {
  330. return decode(context, str, encoding ?? PythonContext.GetContext(context).GetDefaultEncodingName(), errors);
  331. } else {
  332. return cls.CreateInstance(context, __new__(context, TypeCache.String, str, encoding, errors));
  333. }
  334. }
  335. #endregion
  336. #region Python __ methods
  337. public static bool __contains__(string s, [BytesConversion]string item) {
  338. return s.Contains(item);
  339. }
  340. public static bool __contains__(string s, char item) {
  341. return s.IndexOf(item) != -1;
  342. }
  343. public static string __format__(CodeContext/*!*/ context, string self, [BytesConversion]string formatSpec) {
  344. return ObjectOps.__format__(context, self, formatSpec);
  345. }
  346. public static int __len__(string s) {
  347. return s.Length;
  348. }
  349. [SpecialName]
  350. public static string GetItem(string s, int index) {
  351. return ScriptingRuntimeHelpers.CharToString(s[PythonOps.FixIndex(index, s.Length)]);
  352. }
  353. [SpecialName]
  354. public static string GetItem(string s, object index) {
  355. return GetItem(s, Converter.ConvertToIndex(index));
  356. }
  357. [SpecialName]
  358. public static string GetItem(string s, Slice slice) {
  359. if (slice == null) throw PythonOps.TypeError("string indices must be slices or integers");
  360. int start, stop, step;
  361. slice.indices(s.Length, out start, out stop, out step);
  362. if (step == 1) {
  363. return stop > start ? s.Substring(start, stop - start) : String.Empty;
  364. } else {
  365. int index = 0;
  366. char[] newData;
  367. if (step > 0) {
  368. if (start > stop) return String.Empty;
  369. int icnt = (stop - start + step - 1) / step;
  370. newData = new char[icnt];
  371. for (int i = start; i < stop; i += step) {
  372. newData[index++] = s[i];
  373. }
  374. } else {
  375. if (start < stop) return String.Empty;
  376. int icnt = (stop - start + step + 1) / step;
  377. newData = new char[icnt];
  378. for (int i = start; i > stop; i += step) {
  379. newData[index++] = s[i];
  380. }
  381. }
  382. return new string(newData);
  383. }
  384. }
  385. public static string __getslice__(string self, int x, int y) {
  386. Slice.FixSliceArguments(self.Length, ref x, ref y);
  387. if (x >= y) return String.Empty;
  388. return self.Substring(x, y - x);
  389. }
  390. #endregion
  391. #region Public Python methods
  392. /// <summary>
  393. /// Returns a copy of this string converted to uppercase
  394. /// </summary>
  395. public static string capitalize(this string self) {
  396. if (self.Length == 0) return self;
  397. return Char.ToUpperInvariant(self[0]) + self.Substring(1).ToLowerInvariant();
  398. }
  399. // default fillchar (padding char) is a space
  400. public static string center(this string self, int width) {
  401. return center(self, width, ' ');
  402. }
  403. public static string center(this string self, int width, char fillchar) {
  404. int spaces = width - self.Length;
  405. if (spaces <= 0) return self;
  406. StringBuilder ret = new StringBuilder(width);
  407. ret.Append(fillchar, spaces / 2);
  408. ret.Append(self);
  409. ret.Append(fillchar, (spaces + 1) / 2);
  410. return ret.ToString();
  411. }
  412. public static int count(this string self, [BytesConversion]string sub) {
  413. return count(self, sub, 0, self.Length);
  414. }
  415. public static int count(this string self, [BytesConversion]string sub, int start) {
  416. return count(self, sub, start, self.Length);
  417. }
  418. public static int count(this string self, [BytesConversion]string ssub, int start, int end) {
  419. if (ssub == null) throw PythonOps.TypeError("expected string for 'sub' argument, got NoneType");
  420. if (start > self.Length) {
  421. return 0;
  422. }
  423. start = PythonOps.FixSliceIndex(start, self.Length);
  424. end = PythonOps.FixSliceIndex(end, self.Length);
  425. if (ssub.Length == 0) {
  426. return Math.Max((end - start) + 1, 0);
  427. }
  428. int count = 0;
  429. CompareInfo c = CultureInfo.InvariantCulture.CompareInfo;
  430. while (true) {
  431. if (end <= start) break;
  432. int index = c.IndexOf(self, ssub, start, end - start, CompareOptions.Ordinal);
  433. if (index == -1) break;
  434. count++;
  435. start = index + ssub.Length;
  436. }
  437. return count;
  438. }
  439. public static string decode(CodeContext/*!*/ context, string s) {
  440. return decode(context, s, Missing.Value, "strict");
  441. }
  442. public static string decode(CodeContext/*!*/ context, string s, [Optional]object encoding, [DefaultParameterValue("strict")]string errors) {
  443. return RawDecode(context, s, encoding, errors);
  444. }
  445. public static string encode(CodeContext/*!*/ context, string s, [Optional]object encoding, [DefaultParameterValue("strict")]string errors) {
  446. return RawEncode(context, s, encoding, errors);
  447. }
  448. private static string CastString(object o) {
  449. string res = o as string;
  450. if (res != null) {
  451. return res;
  452. }
  453. return ((Extensible<string>)o).Value;
  454. }
  455. internal static string AsString(object o) {
  456. string res = o as string;
  457. if (res != null) {
  458. return res;
  459. }
  460. Extensible<string> es = o as Extensible<string>;
  461. if (es != null) {
  462. return es.Value;
  463. }
  464. return null;
  465. }
  466. public static bool endswith(this string self, object suffix) {
  467. TryStringOrTuple(suffix);
  468. if (suffix is PythonTuple)
  469. return endswith(self, (PythonTuple)suffix);
  470. else
  471. return endswith(self, CastString(suffix));
  472. }
  473. public static bool endswith(this string self, object suffix, int start) {
  474. TryStringOrTuple(suffix);
  475. if (suffix is PythonTuple)
  476. return endswith(self, (PythonTuple)suffix, start);
  477. else
  478. return endswith(self, CastString(suffix), start);
  479. }
  480. public static bool endswith(this string self, object suffix, int start, int end) {
  481. TryStringOrTuple(suffix);
  482. if (suffix is PythonTuple)
  483. return endswith(self, (PythonTuple)suffix, start, end);
  484. else
  485. return endswith(self, CastString(suffix), start, end);
  486. }
  487. public static string expandtabs(string self) {
  488. return expandtabs(self, 8);
  489. }
  490. public static string expandtabs(this string self, int tabsize) {
  491. StringBuilder ret = new StringBuilder(self.Length * 2);
  492. string v = self;
  493. int col = 0;
  494. for (int i = 0; i < v.Length; i++) {
  495. char ch = v[i];
  496. switch (ch) {
  497. case '\n':
  498. case '\r': col = 0; ret.Append(ch); break;
  499. case '\t':
  500. if (tabsize > 0) {
  501. int tabs = tabsize - (col % tabsize);
  502. int existingSize = ret.Capacity;
  503. ret.Capacity = checked(existingSize + tabs);
  504. ret.Append(' ', tabs);
  505. col = 0;
  506. }
  507. break;
  508. default:
  509. col++;
  510. ret.Append(ch);
  511. break;
  512. }
  513. }
  514. return ret.ToString();
  515. }
  516. public static int find(this string self, [BytesConversion]string sub) {
  517. if (sub == null) throw PythonOps.TypeError("expected string, got NoneType");
  518. if (sub.Length == 1) return self.IndexOf(sub[0]);
  519. CompareInfo c = CultureInfo.InvariantCulture.CompareInfo;
  520. return c.IndexOf(self, sub, CompareOptions.Ordinal);
  521. }
  522. public static int find(this string self, [BytesConversion]string sub, int start) {
  523. if (sub == null) throw PythonOps.TypeError("expected string, got NoneType");
  524. if (start > self.Length) return -1;
  525. start = PythonOps.FixSliceIndex(start, self.Length);
  526. CompareInfo c = CultureInfo.InvariantCulture.CompareInfo;
  527. return c.IndexOf(self, sub, start, CompareOptions.Ordinal);
  528. }
  529. public static int find(this string self, [BytesConversion]string sub, BigInteger start) {
  530. if (sub == null) throw PythonOps.TypeError("expected string, got NoneType");
  531. if (start > self.Length) return -1;
  532. return find(self, sub, (int)start);
  533. }
  534. public static int find(this string self, [BytesConversion]string sub, int start, int end) {
  535. if (sub == null) throw PythonOps.TypeError("expected string, got NoneType");
  536. if (start > self.Length) return -1;
  537. start = PythonOps.FixSliceIndex(start, self.Length);
  538. end = PythonOps.FixSliceIndex(end, self.Length);
  539. if (end < start) return -1;
  540. CompareInfo c = CultureInfo.InvariantCulture.CompareInfo;
  541. return c.IndexOf(self, sub, start, end - start, CompareOptions.Ordinal);
  542. }
  543. public static int find(this string self, [BytesConversion]string sub, BigInteger start, BigInteger end) {
  544. if (sub == null) throw PythonOps.TypeError("expected string, got NoneType");
  545. if (start > self.Length) return -1;
  546. return find(self, sub, (int)start, (int)end);
  547. }
  548. public static int find(this string self, [BytesConversion]string sub, object start, [DefaultParameterValue(null)]object end) {
  549. return find(self, sub, CheckIndex(start, 0), CheckIndex(end, self.Length));
  550. }
  551. public static int index(this string self, [BytesConversion]string sub) {
  552. if (sub == null) throw PythonOps.TypeError("expected string, got NoneType");
  553. return index(self, sub, 0, self.Length);
  554. }
  555. public static int index(this string self, [BytesConversion]string sub, int start) {
  556. if (sub == null) throw PythonOps.TypeError("expected string, got NoneType");
  557. return index(self, sub, start, self.Length);
  558. }
  559. public static int index(this string self, [BytesConversion]string sub, int start, int end) {
  560. if (sub == null) throw PythonOps.TypeError("expected string, got NoneType");
  561. int ret = find(self, sub, start, end);
  562. if (ret == -1) throw PythonOps.ValueError("substring {0} not found in {1}", sub, self);
  563. return ret;
  564. }
  565. public static int index(this string self, [BytesConversion]string sub, object start, [DefaultParameterValue(null)]object end) {
  566. return index(self, sub, CheckIndex(start, 0), CheckIndex(end, self.Length));
  567. }
  568. public static bool isalnum(this string self) {
  569. if (self.Length == 0) return false;
  570. string v = self;
  571. for (int i = v.Length - 1; i >= 0; i--) {
  572. if (!Char.IsLetterOrDigit(v, i)) return false;
  573. }
  574. return true;
  575. }
  576. public static bool isalpha(this string self) {
  577. if (self.Length == 0) return false;
  578. string v = self;
  579. for (int i = v.Length - 1; i >= 0; i--) {
  580. if (!Char.IsLetter(v, i)) return false;
  581. }
  582. return true;
  583. }
  584. public static bool isdigit(this string self) {
  585. if (self.Length == 0) return false;
  586. string v = self;
  587. for (int i = v.Length - 1; i >= 0; i--) {
  588. // CPython considers the circled digits to be digits
  589. if (!Char.IsDigit(v, i) && (v[i] < '\u2460' || v[i] > '\u2468')) return false;
  590. }
  591. return true;
  592. }
  593. public static bool isspace(this string self) {
  594. if (self.Length == 0) return false;
  595. string v = self;
  596. for (int i = v.Length - 1; i >= 0; i--) {
  597. if (!Char.IsWhiteSpace(v, i)) return false;
  598. }
  599. return true;
  600. }
  601. public static bool isdecimal(this string self) {
  602. return isnumeric(self);
  603. }
  604. public static bool isnumeric(this string self) {
  605. if (String.IsNullOrEmpty(self)) return false;
  606. foreach (char c in self) {
  607. if (!Char.IsDigit(c)) return false;
  608. }
  609. return true;
  610. }
  611. public static bool islower(this string self) {
  612. if (self.Length == 0) return false;
  613. string v = self;
  614. bool hasLower = false;
  615. for (int i = v.Length - 1; i >= 0; i--) {
  616. if (!hasLower && Char.IsLower(v, i)) hasLower = true;
  617. if (Char.IsUpper(v, i)) return false;
  618. }
  619. return hasLower;
  620. }
  621. public static bool isupper(this string self) {
  622. if (self.Length == 0) return false;
  623. string v = self;
  624. bool hasUpper = false;
  625. for (int i = v.Length - 1; i >= 0; i--) {
  626. if (!hasUpper && Char.IsUpper(v, i)) hasUpper = true;
  627. if (Char.IsLower(v, i)) return false;
  628. }
  629. return hasUpper;
  630. }
  631. /// <summary>
  632. /// return true if self is a titlecased string and there is at least one
  633. /// character in self; also, uppercase characters may only follow uncased
  634. /// characters (e.g. whitespace) and lowercase characters only cased ones.
  635. /// return false otherwise.
  636. /// </summary>
  637. public static bool istitle(this string self) {
  638. if (self == null || self.Length == 0) return false;
  639. string v = self;
  640. bool prevCharCased = false, currCharCased = false, containsUpper = false;
  641. for (int i = 0; i < v.Length; i++) {
  642. if (Char.IsUpper(v, i) || CharUnicodeInfo.GetUnicodeCategory(v, i) == UnicodeCategory.TitlecaseLetter) {
  643. containsUpper = true;
  644. if (prevCharCased)
  645. return false;
  646. else
  647. currCharCased = true;
  648. } else if (Char.IsLower(v, i))
  649. if (!prevCharCased)
  650. return false;
  651. else
  652. currCharCased = true;
  653. else
  654. currCharCased = false;
  655. prevCharCased = currCharCased;
  656. }
  657. // if we've gone through the whole string and haven't encountered any rule
  658. // violations but also haven't seen an Uppercased char, then this is not a
  659. // title e.g. '\n', all whitespace etc.
  660. return containsUpper;
  661. }
  662. public static bool isunicode(this string self) {
  663. foreach (char c in self) {
  664. if (c >= LowestUnicodeValue) return true;
  665. }
  666. return false;
  667. }
  668. /// <summary>
  669. /// Return a string which is the concatenation of the strings
  670. /// in the sequence seq. The separator between elements is the
  671. /// string providing this method
  672. /// </summary>
  673. public static string join(this string self, object sequence) {
  674. IEnumerator seq = PythonOps.GetEnumerator(sequence);
  675. if (!seq.MoveNext()) return "";
  676. // check if we have just a sequence of just one value - if so just
  677. // return that value.
  678. object curVal = seq.Current;
  679. if (!seq.MoveNext()) return Converter.ConvertToString(curVal);
  680. StringBuilder ret = new StringBuilder();
  681. AppendJoin(curVal, 0, ret);
  682. int index = 1;
  683. do {
  684. ret.Append(self);
  685. AppendJoin(seq.Current, index, ret);
  686. index++;
  687. } while (seq.MoveNext());
  688. return ret.ToString();
  689. }
  690. public static string join(this string/*!*/ self, [NotNull]List/*!*/ sequence) {
  691. if (sequence.__len__() == 0) return String.Empty;
  692. lock (sequence) {
  693. if (sequence.__len__() == 1) {
  694. return Converter.ConvertToString(sequence[0]);
  695. }
  696. StringBuilder ret = new StringBuilder();
  697. AppendJoin(sequence._data[0], 0, ret);
  698. for (int i = 1; i < sequence._size; i++) {
  699. if (!String.IsNullOrEmpty(self)) {
  700. ret.Append(self);
  701. }
  702. AppendJoin(sequence._data[i], i, ret);
  703. }
  704. return ret.ToString();
  705. }
  706. }
  707. public static string ljust(this string self, int width) {
  708. return ljust(self, width, ' ');
  709. }
  710. public static string ljust(this string self, int width, char fillchar) {
  711. if (width < 0) return self;
  712. int spaces = width - self.Length;
  713. if (spaces <= 0) return self;
  714. StringBuilder ret = new StringBuilder(width);
  715. ret.Append(self);
  716. ret.Append(fillchar, spaces);
  717. return ret.ToString();
  718. }
  719. // required for better match with cpython upper/lower
  720. private static CultureInfo CasingCultureInfo = new CultureInfo("en");
  721. public static string lower(this string self) {
  722. return CasingCultureInfo.TextInfo.ToLower(self);
  723. }
  724. internal static string ToLowerAsciiTriggered(this string self) {
  725. for (int i = 0; i < self.Length; i++) {
  726. if (self[i] >= 'A' && self[i] <= 'Z') {
  727. return self.ToLowerInvariant();
  728. }
  729. }
  730. return self;
  731. }
  732. public static string lstrip(this string self) {
  733. return self.TrimStart();
  734. }
  735. public static string lstrip(this string self, [BytesConversion]string chars) {
  736. if (chars == null) return lstrip(self);
  737. return self.TrimStart(chars.ToCharArray());
  738. }
  739. [return: SequenceTypeInfo(typeof(string))]
  740. public static PythonTuple partition(this string self, [BytesConversion]string sep) {
  741. if (sep == null)
  742. throw PythonOps.TypeError("expected string, got NoneType");
  743. if (sep.Length == 0)
  744. throw PythonOps.ValueError("empty separator");
  745. object[] obj = new object[3] { "", "", "" };
  746. if (self.Length != 0) {
  747. int index = find(self, sep);
  748. if (index == -1) {
  749. obj[0] = self;
  750. } else {
  751. obj[0] = self.Substring(0, index);
  752. obj[1] = sep;
  753. obj[2] = self.Substring(index + sep.Length, self.Length - index - sep.Length);
  754. }
  755. }
  756. return new PythonTuple(obj);
  757. }
  758. public static string replace(this string self, [BytesConversion]string old, [BytesConversion]string @new,
  759. [DefaultParameterValue(-1)]int count) {
  760. if (old == null) {
  761. throw PythonOps.TypeError("expected a character buffer object"); // cpython message
  762. }
  763. if (old.Length == 0) return ReplaceEmpty(self, @new, count);
  764. string v = self;
  765. int replacements = StringOps.count(v, old);
  766. replacements = (count < 0 || count > replacements) ? replacements : count;
  767. int newLength = v.Length;
  768. newLength -= replacements * old.Length;
  769. newLength = checked(newLength + replacements * @new.Length);
  770. StringBuilder ret = new StringBuilder(newLength);
  771. int index;
  772. int start = 0;
  773. while (count != 0 && (index = v.IndexOf(old, start, StringComparison.Ordinal)) != -1) {
  774. ret.Append(v, start, index - start);
  775. ret.Append(@new);
  776. start = index + old.Length;
  777. count--;
  778. }
  779. ret.Append(v.Substring(start));
  780. return ret.ToString();
  781. }
  782. public static int rfind(this string self, [BytesConversion]string sub) {
  783. if (sub == null) throw PythonOps.TypeError("expected string, got NoneType");
  784. return rfind(self, sub, 0, self.Length);
  785. }
  786. public static int rfind(this string self, [BytesConversion]string sub, int start) {
  787. if (sub == null) throw PythonOps.TypeError("expected string, got NoneType");
  788. if (start > self.Length) return -1;
  789. return rfind(self, sub, start, self.Length);
  790. }
  791. public static int rfind(this string self, [BytesConversion]string sub, BigInteger start) {
  792. if (sub == null) throw PythonOps.TypeError("expected string, got NoneType");
  793. if (start > self.Length) return -1;
  794. return rfind(self, sub, (int)start, self.Length);
  795. }
  796. public static int rfind(this string self, [BytesConversion]string sub, int start, int end) {
  797. if (sub == null) throw PythonOps.TypeError("expected string, got NoneType");
  798. if (start > self.Length) return -1;
  799. start = PythonOps.FixSliceIndex(start, self.Length);
  800. end = PythonOps.FixSliceIndex(end, self.Length);
  801. if (start > end) return -1; // can't possibly match anything, not even an empty string
  802. if (sub.Length == 0) return end; // match at the end
  803. if (end == 0) return -1; // can't possibly find anything
  804. CompareInfo c = CultureInfo.InvariantCulture.CompareInfo;
  805. return c.LastIndexOf(self, sub, end - 1, end - start, CompareOptions.Ordinal);
  806. }
  807. public static int rfind(this string self, [BytesConversion]string sub, BigInteger start, BigInteger end) {
  808. if (sub == null) throw PythonOps.TypeError("expected string, got NoneType");
  809. if (start > self.Length) return -1;
  810. return rfind(self, sub, (int)start, (int)end);
  811. }
  812. public static int rfind(this string self, [BytesConversion]string sub, object start, [DefaultParameterValue(null)]object end) {
  813. return rfind(self, sub, CheckIndex(start, 0), CheckIndex(end, self.Length));
  814. }
  815. public static int rindex(this string self, [BytesConversion]string sub) {
  816. return rindex(self, sub, 0, self.Length);
  817. }
  818. public static int rindex(this string self, [BytesConversion]string sub, int start) {
  819. return rindex(self, sub, start, self.Length);
  820. }
  821. public static int rindex(this string self, [BytesConversion]string sub, int start, int end) {
  822. int ret = rfind(self, sub, start, end);
  823. if (ret == -1) throw PythonOps.ValueError("substring {0} not found in {1}", sub, self);
  824. return ret;
  825. }
  826. public static int rindex(this string self, [BytesConversion]string sub, object start, [DefaultParameterValue(null)]object end) {
  827. return rindex(self, sub, CheckIndex(start, 0), CheckIndex(end, self.Length));
  828. }
  829. public static string rjust(this string self, int width) {
  830. return rjust(self, width, ' ');
  831. }
  832. public static string rjust(this string self, int width, char fillchar) {
  833. int spaces = width - self.Length;
  834. if (spaces <= 0) return self;
  835. StringBuilder ret = new StringBuilder(width);
  836. ret.Append(fillchar, spaces);
  837. ret.Append(self);
  838. return ret.ToString();
  839. }
  840. [return: SequenceTypeInfo(typeof(string))]
  841. public static PythonTuple rpartition(this string self, [BytesConversion]string sep) {
  842. if (sep == null)
  843. throw PythonOps.TypeError("expected string, got NoneType");
  844. if (sep.Length == 0)
  845. throw PythonOps.ValueError("empty separator");
  846. object[] obj = new object[3] { "", "", "" };
  847. if (self.Length != 0) {
  848. int index = rfind(self, sep);
  849. if (index == -1) {
  850. obj[2] = self;
  851. } else {
  852. obj[0] = self.Substring(0, index);
  853. obj[1] = sep;
  854. obj[2] = self.Substring(index + sep.Length, self.Length - index - sep.Length);
  855. }
  856. }
  857. return new PythonTuple(obj);
  858. }
  859. // when no maxsplit arg is given then just use split
  860. public static List rsplit(this string self) {
  861. return SplitInternal(self, (char[])null, -1);
  862. }
  863. public static List rsplit(this string self, [BytesConversion]string sep) {
  864. return rsplit(self, sep, -1);
  865. }
  866. public static List rsplit(this string self, [BytesConversion]string sep, int maxsplit) {
  867. // rsplit works like split but needs to split from the right;
  868. // reverse the original string (and the sep), split, reverse
  869. // the split list and finally reverse each element of the list
  870. string reversed = Reverse(self);
  871. if (sep != null) sep = Reverse(sep);
  872. List temp = null, ret = null;
  873. temp = split(reversed, sep, maxsplit);
  874. temp.reverse();
  875. int resultlen = temp.__len__();
  876. if (resultlen != 0) {
  877. ret = new List(resultlen);
  878. foreach (string s in temp)
  879. ret.AddNoLock(Reverse(s));
  880. } else {
  881. ret = temp;
  882. }
  883. return ret;
  884. }
  885. public static string rstrip(this string self) {
  886. return self.TrimEnd();
  887. }
  888. public static string rstrip(this string self, [BytesConversion]string chars) {
  889. if (chars == null) return rstrip(self);
  890. return self.TrimEnd(chars.ToCharArray());
  891. }
  892. public static List split(this string self) {
  893. return SplitInternal(self, (char[])null, -1);
  894. }
  895. public static List split(this string self, [BytesConversion]string sep) {
  896. return split(self, sep, -1);
  897. }
  898. public static List split(this string self, [BytesConversion]string sep, int maxsplit) {
  899. if (sep == null) {
  900. if (maxsplit == 0) {
  901. // Corner case for CPython compatibility
  902. List result = PythonOps.MakeEmptyList(1);
  903. result.AddNoLock(self.TrimStart());
  904. return result;
  905. } else {
  906. return SplitInternal(self, (char[])null, maxsplit);
  907. }
  908. }
  909. if (sep.Length == 0) {
  910. throw PythonOps.ValueError("empty separator");
  911. } else if (sep.Length == 1) {
  912. return SplitInternal(self, new char[] { sep[0] }, maxsplit);
  913. } else {
  914. return SplitInternal(self, sep, maxsplit);
  915. }
  916. }
  917. public static List splitlines(this string self) {
  918. return splitlines(self, false);
  919. }
  920. public static List splitlines(this string self, bool keepends) {
  921. List ret = new List();
  922. int i, linestart;
  923. for (i = 0, linestart = 0; i < self.Length; i++) {
  924. if (self[i] == '\n' || self[i] == '\r' || self[i] == '\x2028') {
  925. // special case of "\r\n" as end of line marker
  926. if (i < self.Length - 1 && self[i] == '\r' && self[i + 1] == '\n') {
  927. if (keepends)
  928. ret.AddNoLock(self.Substring(linestart, i - linestart + 2));
  929. else
  930. ret.AddNoLock(self.Substring(linestart, i - linestart));
  931. linestart = i + 2;
  932. i++;
  933. } else { //'\r', '\n', or unicode new line as end of line marker
  934. if (keepends)
  935. ret.AddNoLock(self.Substring(linestart, i - linestart + 1));
  936. else
  937. ret.AddNoLock(self.Substring(linestart, i - linestart));
  938. linestart = i + 1;
  939. }
  940. }
  941. }
  942. // the last line needs to be accounted for if it is not empty
  943. if (i - linestart != 0)
  944. ret.AddNoLock(self.Substring(linestart, i - linestart));
  945. return ret;
  946. }
  947. public static bool startswith(this string self, object prefix) {
  948. TryStringOrTuple(prefix);
  949. if (prefix is PythonTuple)
  950. return startswith(self, (PythonTuple)prefix);
  951. else
  952. return startswith(self, CastString(prefix));
  953. }
  954. public static bool startswith(this string self, object prefix, int start) {
  955. TryStringOrTuple(prefix);
  956. if (prefix is PythonTuple)
  957. return startswith(self, (PythonTuple)prefix, start);
  958. else
  959. return startswith(self, CastString(prefix), start);
  960. }
  961. public static bool startswith(this string self, object prefix, int start, int end) {
  962. TryStringOrTuple(prefix);
  963. if (prefix is PythonTuple)
  964. return startswith(self, (PythonTuple)prefix, start, end);
  965. else
  966. return startswith(self, CastString(prefix), start, end);
  967. }
  968. public static string strip(this string self) {
  969. return self.Trim();
  970. }
  971. public static string strip(this string self, [BytesConversion]string chars) {
  972. if (chars == null) return strip(self);
  973. return self.Trim(chars.ToCharArray());
  974. }
  975. public static string swapcase(this string self) {
  976. StringBuilder ret = new StringBuilder(self);
  977. for (int i = 0; i < ret.Length; i++) {
  978. char ch = ret[i];
  979. if (Char.IsUpper(ch)) ret[i] = Char.ToLowerInvariant(ch);
  980. else if (Char.IsLower(ch)) ret[i] = Char.ToUpperInvariant(ch);
  981. }
  982. return ret.ToString();
  983. }
  984. public static string title(this string self) {
  985. if (self == null || self.Length == 0) return self;
  986. char[] retchars = self.ToCharArray();
  987. bool prevCharCased = false;
  988. bool currCharCased = false;
  989. int i = 0;
  990. do {
  991. if (Char.IsUpper(retchars[i]) || Char.IsLower(retchars[i])) {
  992. if (!prevCharCased)
  993. retchars[i] = Char.ToUpperInvariant(retchars[i]);
  994. else
  995. retchars[i] = Char.ToLowerInvariant(retchars[i]);
  996. currCharCased = true;
  997. } else {
  998. currCharCased = false;
  999. }
  1000. i++;
  1001. prevCharCased = currCharCased;
  1002. }
  1003. while (i < retchars.Length);
  1004. return new string(retchars);
  1005. }
  1006. //translate on a unicode string differs from that on an ascii
  1007. //for unicode, the table argument is actually a dictionary with
  1008. //character ordinals as keys and the replacement strings as values
  1009. public static string translate(this string self, [NotNull]PythonDictionary table) {
  1010. if (table == null || self.Length == 0) {
  1011. return self;
  1012. }
  1013. StringBuilder ret = new StringBuilder();
  1014. for (int i = 0, idx = 0; i < self.Length; i++) {
  1015. idx = (int)self[i];
  1016. if (table.__contains__(idx)) {
  1017. var mapped = table[idx];
  1018. if (mapped == null) {
  1019. continue;
  1020. }
  1021. if (mapped is int) {
  1022. var mappedInt = (int) mapped;
  1023. if (mappedInt > 0xFFFF) {
  1024. throw PythonOps.TypeError("character mapping must be in range(0x%lx)");
  1025. }
  1026. ret.Append((char)(int)mapped);
  1027. } else if (mapped is String) {
  1028. ret.Append(mapped);
  1029. } else {
  1030. throw PythonOps.TypeError("character mapping must return integer, None or unicode");
  1031. }
  1032. } else {
  1033. ret.Append(self[i]);
  1034. }
  1035. }
  1036. return ret.ToString();
  1037. }
  1038. public static string translate(this string self, [BytesConversion]string table) {
  1039. return translate(self, table, (string)null);
  1040. }
  1041. public static string translate(this string self, [BytesConversion]string table, [BytesConversion]string deletechars) {
  1042. if (table != null && table.Length != 256) {
  1043. throw PythonOps.ValueError("translation table must be 256 characters long");
  1044. } else if (self.Length == 0) {
  1045. return self;
  1046. }
  1047. // List<char> is about 2/3rds as expensive as StringBuilder appending individual
  1048. // char's so we use that instead of a StringBuilder
  1049. List<char> res = new List<char>();
  1050. for (int i = 0; i < self.Length; i++) {
  1051. if (deletechars == null || !deletechars.Contains(Char.ToString(self[i]))) {
  1052. if (table != null) {
  1053. int idx = (int)self[i];
  1054. if (idx >= 0 && idx < 256) {
  1055. res.Add(table[idx]);
  1056. }
  1057. } else {
  1058. res.Add(self[i]);
  1059. }
  1060. }
  1061. }
  1062. return new String(res.ToArray());
  1063. }
  1064. public static string upper(this string self) {
  1065. return CasingCultureInfo.TextInfo.ToUpper(self);
  1066. }
  1067. public static string zfill(this string self, int width) {
  1068. int spaces = width - self.Length;
  1069. if (spaces <= 0) return self;
  1070. StringBuilder ret = new StringBuilder(width);
  1071. if (self.Length > 0 && IsSign(self[0])) {
  1072. ret.Append(self[0]);
  1073. ret.Append('0', spaces);
  1074. ret.Append(self.Substring(1));
  1075. } else {
  1076. ret.Append('0', spaces);
  1077. ret.Append(self);
  1078. }
  1079. return ret.ToString();
  1080. }
  1081. /// <summary>
  1082. /// Replaces each replacement field in the string with the provided arguments.
  1083. ///
  1084. /// replacement_field = "{" field_name ["!" conversion] [":" format_spec] "}"
  1085. /// field_name = (identifier | integer) ("." identifier | "[" element_index "]")*
  1086. ///
  1087. /// format_spec: [[fill]align][sign][#][0][width][,][.precision][type]
  1088. ///
  1089. /// Conversion can be 'r' for repr or 's' for string.
  1090. /// </summary>
  1091. public static string/*!*/ format(CodeContext/*!*/ context, string format_string, [NotNull]params object[] args) {
  1092. return NewStringFormatter.FormatString(
  1093. PythonContext.GetContext(context),
  1094. format_string,
  1095. PythonTuple.MakeTuple(args),
  1096. new PythonDictionary()
  1097. );
  1098. }
  1099. /// <summary>
  1100. /// Replaces each replacement field in the string with the provided arguments.
  1101. ///
  1102. /// replacement_field = "{" field_name ["!" conversion] [":" format_spec] "}"
  1103. /// field_name = (identifier | integer) ("." identifier | "[" element_index "]")*
  1104. ///
  1105. /// format_spec: [[fill]align][sign][#][0][width][.precision][type]
  1106. ///
  1107. /// Conversion can be 'r' for repr or 's' for string.
  1108. /// </summary>
  1109. public static string/*!*/ format(CodeContext/*!*/ context, string format_string\u00F8, [ParamDictionary]IDictionary<object, object> kwargs\u00F8, params object[] args\u00F8) {
  1110. return NewStringFormatter.FormatString(
  1111. PythonContext.GetContext(context),
  1112. format_string\u00F8,
  1113. PythonTuple.MakeTuple(args\u00F8),
  1114. kwargs\u00F8
  1115. );
  1116. }
  1117. public static IEnumerable<PythonTuple>/*!*/ _formatter_parser(this string/*!*/ self) {
  1118. return NewStringFormatter.GetFormatInfo(self);
  1119. }
  1120. public static PythonTuple/*!*/ _formatter_field_name_split(this string/*!*/ self) {
  1121. return NewStringFormatter.GetFieldNameInfo(self);
  1122. }
  1123. #endregion
  1124. #region operators
  1125. [SpecialName]
  1126. public static string Add([NotNull]string self, [NotNull]string other) {
  1127. return self + other;
  1128. }
  1129. [SpecialName]
  1130. public static string Add([NotNull]string self, char other) {
  1131. return self + other;
  1132. }
  1133. [SpecialName]
  1134. public static string Add(char self, [NotNull]string other) {
  1135. return self + other;
  1136. }
  1137. [SpecialName]
  1138. public static string Mod(CodeContext/*!*/ context, string self, object other) {
  1139. return new StringFormatter(context, self, other).Format();
  1140. }
  1141. [SpecialName]
  1142. [return: MaybeNotImplemented]
  1143. public static object Mod(CodeContext/*!*/ context, object other, string self) {
  1144. string str = other as string;
  1145. if (str != null) {
  1146. return new StringFormatter(context, str, self).Format();
  1147. }
  1148. Extensible<string> es = other as Extensible<string>;
  1149. if (es != null) {
  1150. return new StringFormatter(context, es.Value, self).Format();
  1151. }
  1152. return NotImplementedType.Value;
  1153. }
  1154. [SpecialName]
  1155. public static string Multiply(string s, int count) {
  1156. if (count <= 0) return String.Empty;
  1157. if (count == 1) return s;
  1158. long size = (long)s.Length * (long)count;
  1159. if (size > Int32.MaxValue) throw PythonOps.OverflowError("repeated string is too long");
  1160. int sz = s.Length;
  1161. if (sz == 1) return new string(s[0], count);
  1162. StringBuilder ret = new StringBuilder(sz * count);
  1163. ret.Insert(0, s, count);
  1164. // the above code is MUCH faster than the simple loop
  1165. //for (int i=0; i < count; i++) ret.Append(s);
  1166. return ret.ToString();
  1167. }
  1168. [SpecialName]
  1169. public static string Multiply(int other, string self) {
  1170. return Multiply(self, other);
  1171. }
  1172. [SpecialName]
  1173. public static object Multiply(string self, [NotNull]Index count) {
  1174. return PythonOps.MultiplySequence<string>(Multiply, self, count, true);
  1175. }
  1176. [SpecialName]
  1177. public static object Multiply([NotNull]Index count, string self) {
  1178. return PythonOps.MultiplySequence<string>(Multiply, self, count, false);
  1179. }
  1180. [SpecialName]
  1181. public static object Multiply(string self, object count) {
  1182. int index;
  1183. if (Converter.TryConvertToIndex(count, out index)) {
  1184. return Multiply(self, index);
  1185. }
  1186. throw PythonOps.TypeErrorForUnIndexableObject(count);
  1187. }
  1188. [SpecialName]
  1189. public static object Multiply(object count, string self) {
  1190. int index;
  1191. if (Converter.TryConvertToIndex(count, out index)) {
  1192. return Multiply(index, self);
  1193. }
  1194. throw PythonOps.TypeErrorForUnIndexableObject(count);
  1195. }
  1196. [SpecialName]
  1197. public static bool GreaterThan(string x, string y) {
  1198. return string.CompareOrdinal(x, y) > 0;
  1199. }
  1200. [SpecialName]
  1201. public static bool LessThan(string x, string y) {
  1202. return string.CompareOrdinal(x, y) < 0;
  1203. }
  1204. [SpecialName]
  1205. public static bool LessThanOrEqual(string x, string y) {
  1206. return string.CompareOrdinal(x, y) <= 0;
  1207. }
  1208. [SpecialName]
  1209. public static bool GreaterThanOrEqual(string x, string y) {
  1210. return string.CompareOrdinal(x, y) >= 0;
  1211. }
  1212. [SpecialName]
  1213. public static bool Equals(string x, string y) {
  1214. return string.Equals(x, y);
  1215. }
  1216. [SpecialName]
  1217. public static bool NotEquals(string x, string y) {
  1218. return !string.Equals(x, y);
  1219. }
  1220. #endregion
  1221. [SpecialName, ImplicitConversionMethod]
  1222. public static string ConvertFromChar(char c) {
  1223. return ScriptingRuntimeHelpers.CharToString(c);
  1224. }
  1225. [SpecialName, ExplicitConversionMethod]
  1226. public static char ConvertToChar(string s) {
  1227. if (s.Length == 1) return s[0];
  1228. throw PythonOps.TypeErrorForTypeMismatch("char", s);
  1229. }
  1230. [SpecialName, ImplicitConversionMethod]
  1231. public static IEnumerable ConvertToIEnumerable(string s) {
  1232. // make an enumerator that produces strings instead of chars
  1233. return new PythonStringEnumerable(s);
  1234. }
  1235. internal static int Compare(string self, string obj) {
  1236. int ret = string.CompareOrdinal(self, obj);
  1237. return ret == 0 ? 0 : (ret < 0 ? -1 : +1);
  1238. }
  1239. public static object __getnewargs__(CodeContext/*!*/ context, string self) {
  1240. if (!Object.ReferenceEquals(self, null)) {
  1241. // Cast self to object to avoid exception caused by trying to access SystemState on DefaultContext
  1242. return PythonTuple.MakeTuple(StringOps.__new__(context, TypeCache.String, (object)self));
  1243. }
  1244. throw PythonOps.TypeErrorForBadInstance("__getnewargs__ requires a 'str' object but received a '{0}'", self);
  1245. }
  1246. public static string __str__(string self) {
  1247. return self;
  1248. }
  1249. public static Extensible<string> __str__(ExtensibleString self) {
  1250. return self;
  1251. }
  1252. #region Internal implementation details
  1253. internal static string Quote(string s) {
  1254. bool isUnicode = false;
  1255. StringBuilder b = new StringBuilder(s.Length + 5);
  1256. char quote = '\'';
  1257. if (s.IndexOf('\'') != -1 && s.IndexOf('\"') == -1) {
  1258. quote = '\"';
  1259. }
  1260. b.Append(quote);
  1261. b.Append(ReprEncode(s, quote, ref isUnicode));
  1262. b.Append(quote);
  1263. if (isUnicode) return "u" + b.ToString();
  1264. return b.ToString();
  1265. }
  1266. internal static string ReprEncode(string s, ref bool isUnicode) {
  1267. return ReprEncode(s, (char)0, ref isUnicode);
  1268. }
  1269. internal static bool TryGetEncoding(string name, out Encoding encoding) {
  1270. #if FEATURE_ENCODING
  1271. name = NormalizeEncodingName(name);
  1272. EncodingInfoWrapper encInfo;
  1273. if (CodecsInfo.Codecs.TryGetValue(name, out encInfo)) {
  1274. encoding = (Encoding)encInfo.GetEncoding().Clone();
  1275. return true;
  1276. }
  1277. #else
  1278. switch (NormalizeEncodingName(name)) {
  1279. case "us_ascii":
  1280. case "ascii": encoding = PythonAsciiEncoding.Instance; return true;
  1281. case "utf_8": encoding = (Encoding)new EncodingWrapper(Encoding.UTF8, new byte[0]).Clone(); return true;
  1282. case "utf_16_le": encoding = (Encoding)new EncodingWrapper(Encoding.Unicode, new byte[0]).Clone(); return true;
  1283. case "utf_16_be": encoding = (Encoding)new EncodingWrapper(Encoding.BigEndianUnicode, new byte[0]).Clone(); return true;
  1284. case "utf_8_sig": encoding = Encoding.UTF8; return true;
  1285. }
  1286. #endif
  1287. encoding = null;
  1288. return false;
  1289. }
  1290. internal static string RawUnicodeEscapeEncode(string s) {
  1291. // in the common case we don't need to encode anything, so we
  1292. // lazily create the StringBuilder only if necessary.
  1293. StringBuilder b = null;
  1294. for (int i = 0; i < s.Length; i++) {
  1295. char ch = s[i];
  1296. if (ch > 0xff) {
  1297. ReprInit(ref b, s, i);
  1298. b.AppendFormat("\\u{0:x4}", (int)ch);
  1299. } else if (b != null) {
  1300. b.Append(ch);
  1301. }
  1302. }
  1303. if (b == null) return s;
  1304. return b.ToString();
  1305. }
  1306. #endregion
  1307. #region Private implementation details
  1308. private static int CheckIndex(object index, int defaultValue) {
  1309. int res;
  1310. if (index == null) {
  1311. res = defaultValue;
  1312. } else if (!Converter.TryConvertToIndex(index, out res)) {
  1313. throw PythonOps.TypeError("slice indices must be integers or None or have an __index__ method");
  1314. }
  1315. return res;
  1316. }
  1317. private static void AppendJoin(object value, int index, StringBuilder sb) {
  1318. string strVal;
  1319. if ((strVal = value as string) != null) {
  1320. sb.Append(strVal);
  1321. } else if (Converter.TryConvertToString(value, out strVal) && strVal != null) {
  1322. sb.Append(strVal);
  1323. } else {
  1324. throw PythonOps.TypeError("sequence item {0}: expected string, {1} found", index.ToString(), PythonOps.GetPythonTypeName(value));
  1325. }
  1326. }
  1327. private static string ReplaceEmpty(string self, string @new, int count) {
  1328. string v = self;
  1329. if (count == 0) return v;
  1330. else if (count < 0) count = v.Length + 1;
  1331. else if (count > v.Length + 1) count = checked(v.Length + 1);
  1332. int newLength = checked(v.Length + @new.Length * count);
  1333. int max = Math.Min(v.Length, count);
  1334. StringBuilder ret = new StringBuilder(newLength);
  1335. for (int i = 0; i < max; i++) {
  1336. ret.Append(@new);
  1337. ret.Append(v[i]);
  1338. }
  1339. if (count > max) {
  1340. ret.Append(@new);
  1341. } else {
  1342. ret.Append(v, max, v.Length - max);
  1343. }
  1344. return ret.ToString();
  1345. }
  1346. private static string Reverse(string s) {
  1347. if (s.Length == 0 || s.Length == 1) return s;
  1348. char[] rchars = new char[s.Length];
  1349. for (int i = s.Length - 1, j = 0; i >= 0; i--, j++) {
  1350. rchars[j] = s[i];
  1351. }
  1352. return new string(rchars);
  1353. }
  1354. internal static string ReprEncode(string s, char quote, ref bool isUnicode) {
  1355. // in the common case we don't need to encode anything, so we
  1356. // lazily create the StringBuilder only if necessary.
  1357. StringBuilder b = null;
  1358. for (int i = 0; i < s.Length; i++) {
  1359. char ch = s[i];
  1360. if (ch >= LowestUnicodeValue) isUnicode = true;
  1361. switch (ch) {
  1362. case '\\': ReprInit(ref b, s, i); b.Append("\\\\"); break;
  1363. case '\t': ReprInit(ref b, s, i); b.Append("\\t"); break;
  1364. case '\n': ReprInit(ref b, s, i); b.Append("\\n"); break;
  1365. case '\r': ReprInit(ref b, s, i); b.Append("\\r"); break;
  1366. default:
  1367. if (quote != 0 && ch == quote) {
  1368. ReprInit(ref b, s, i);
  1369. b.Append('\\'); b.Append(ch);
  1370. } else if (ch < ' ' || (ch >= 0x7f && ch <= 0xff)) {
  1371. ReprInit(ref b, s, i);
  1372. b.AppendFormat("\\x{0:x2}", (int)ch);
  1373. } else if (ch > 0xff) {
  1374. ReprInit(ref b, s, i);
  1375. b.AppendFormat("\\u{0:x4}", (int)ch);
  1376. } else if (b != null) {
  1377. b.Append(ch);
  1378. }
  1379. break;
  1380. }
  1381. }
  1382. if (b == null) return s;
  1383. return b.ToString();
  1384. }
  1385. private static void ReprInit(ref StringBuilder sb, string s, int c) {
  1386. if (sb != null) return;
  1387. sb = new StringBuilder(s, 0, c, s.Length);
  1388. }
  1389. private static bool IsSign(char ch) {
  1390. return ch == '+' || ch == '-';
  1391. }
  1392. internal static string GetEncodingName(Encoding encoding) {
  1393. #if FEATURE_ENCODING
  1394. string name = null;
  1395. // if we have a valid code page try and get a reasonable name. The
  1396. // web names / mail displays match tend to CPython's terse names
  1397. if (encoding.CodePage != 0) {
  1398. #if !NETSTANDARD
  1399. if (encoding.IsBrowserDisplay) {
  1400. name = encoding.WebName;
  1401. }
  1402. if (name == null && encoding.IsMailNewsDisplay) {
  1403. name = encoding.HeaderName;
  1404. }
  1405. #endif
  1406. // otherwise use a code page number which also matches CPython
  1407. if (name == null) {
  1408. name = "cp" + encoding.CodePage;
  1409. }
  1410. }
  1411. if (name == null) {
  1412. // otherwise just finally fall back to the human readable name
  1413. name = encoding.EncodingName;
  1414. }
  1415. #else
  1416. // Silverlight only has web names
  1417. string name = encoding.WebName;
  1418. #endif
  1419. return NormalizeEncodingName(name);
  1420. }
  1421. internal static string NormalizeEncodingName(string name) {
  1422. if (name == null) {
  1423. return null;
  1424. }
  1425. return name.ToLowerInvariant().Replace('-', '_').Replace(' ', '_');
  1426. }
  1427. private static string RawDecode(CodeContext/*!*/ context, string s, object encodingType, string errors) {
  1428. PythonContext pc = PythonContext.GetContext(context);
  1429. Encoding e = null;
  1430. string encoding = encodingType as string;
  1431. if (encoding == null) {
  1432. e = encodingType as Encoding;
  1433. if (e == null) {
  1434. if (encodingType == Missing.Value) {
  1435. encoding = pc.GetDefaultEncodingName();
  1436. } else {
  1437. throw PythonOps.TypeError("decode() expected string, got '{0}'", DynamicHelpers.GetPythonType(encodingType).Name);
  1438. }
  1439. }
  1440. }
  1441. if (e == null) {
  1442. string normalizedName = NormalizeEncodingName(encoding);
  1443. if ("raw_unicode_escape" == normalizedName) {
  1444. return LiteralParser.ParseString(s, true, true);
  1445. } else if ("unicode_escape" == normalizedName) {
  1446. return LiteralParser.ParseString(s, false, true);
  1447. } else if ("string_escape" == normalizedName) {
  1448. return LiteralParser.ParseString(s, false, false);
  1449. }
  1450. }
  1451. if (e != null || TryGetEncoding(encoding, out e)) {
  1452. return DoDecode(context, s, errors, encoding, e);
  1453. }
  1454. // look for user-registered codecs
  1455. PythonTuple codecTuple = PythonOps.LookupEncoding(context, encoding);
  1456. if (codecTuple != null) {
  1457. return UserDecodeOrEncode(codecTuple[/*Modules.PythonCodecs.DecoderIndex*/1], s);
  1458. }
  1459. throw PythonOps.LookupError("unknown encoding: {0}", encoding);
  1460. }
  1461. #if FEATURE_ENCODING
  1462. internal static void SetDecoderFallback(Encoding e, DecoderFallback decoderFallback) {
  1463. #if NETSTANDARD
  1464. typeof(Encoding).GetField("decoderFallback", BindingFlags.Instance | BindingFlags.NonPublic).SetValue(e, decoderFallback);
  1465. Debug.Assert(e.DecoderFallback == decoderFallback);
  1466. #else
  1467. e.DecoderFallback = decoderFallback;
  1468. #endif
  1469. }
  1470. internal static void SetEncoderFallback(Encoding e, EncoderFallback encoderFallback) {
  1471. #if NETSTANDARD
  1472. typeof(Encoding).GetField("encoderFallback", BindingFlags.Instance | BindingFlags.NonPublic).SetValue(e, encoderFallback);
  1473. Debug.Assert(e.EncoderFallback == encoderFallback);
  1474. #else
  1475. e.EncoderFallback = encoderFallback;
  1476. #endif
  1477. }
  1478. #endif
  1479. #if FEATURE_ENCODING
  1480. private static DecoderFallback ReplacementFallback = new DecoderReplacementFallback("\ufffd");
  1481. #endif
  1482. internal static string DoDecode(CodeContext context, string s, string errors, string encoding, Encoding e) {
  1483. #if FEATURE_ENCODING
  1484. // CLR's encoder exceptions have a 1-1 mapping w/ Python's encoder exceptions
  1485. // so we just clone the encoding & set the fallback to throw in strict mode.
  1486. e = (Encoding)e.Clone();
  1487. switch (errors) {
  1488. case "backslashreplace":
  1489. case "xmlcharrefreplace":
  1490. case "strict": SetDecoderFallback(e, DecoderFallback.ExceptionFallback); break;
  1491. case "replace": SetDecoderFallback(e, ReplacementFallback); break;
  1492. case "ignore": SetDecoderFallback(e, new PythonDecoderFallback(encoding, s, null)); break;
  1493. default:
  1494. SetDecoderFallback(e, new PythonDecoderFallback(encoding, s, LightExceptions.CheckAndThrow(PythonOps.LookupEncodingError(context, errors))));
  1495. break;
  1496. }
  1497. #endif
  1498. byte[] bytes = s.MakeByteArray();
  1499. int start = GetStartingOffset(e, bytes);
  1500. #if FEATURE_ENCODING && NETSTANDARD
  1501. try
  1502. {
  1503. return e.GetString(bytes, start, bytes.Length - start);
  1504. }
  1505. catch (NullReferenceException)
  1506. {
  1507. // bug in netstandard1.6, instead of failing try with a DecoderReplacementFallback
  1508. SetDecoderFallback(e, ReplacementFallback);
  1509. }
  1510. #endif
  1511. return e.GetString(bytes, start, bytes.Length - start);
  1512. }
  1513. /// <summary>
  1514. /// Gets the starting offset checking to see if the incoming bytes already include a preamble.
  1515. /// </summary>
  1516. private static int GetStartingOffset(Encoding e, byte[] bytes) {
  1517. byte[] preamble = e.GetPreamble();
  1518. int start = 0;
  1519. if (bytes.Length >= preamble.Length) {
  1520. bool differ = false;
  1521. for (int i = 0; i < preamble.Length; i++) {
  1522. if (bytes[i] != preamble[i]) {
  1523. differ = true;
  1524. }
  1525. }
  1526. if (!differ) {
  1527. start = preamble.Length;
  1528. }
  1529. }
  1530. return start;
  1531. }
  1532. private static string RawEncode(CodeContext/*!*/ context, string s, object encodingType, string errors) {
  1533. string encoding = encodingType as string;
  1534. Encoding e = null;
  1535. if (encoding == null) {
  1536. e = encodingType as Encoding;
  1537. if (e == null) {
  1538. if (encodingType == Missing.Value) {
  1539. encoding = PythonContext.GetContext(context).GetDefaultEncodingName();
  1540. } else {
  1541. throw PythonOps.TypeError("encode() expected string, got '{0}'", DynamicHelpers.GetPythonType(encodingType).Name);
  1542. }
  1543. }
  1544. }
  1545. if (e == null) {
  1546. string normalizedName = NormalizeEncodingName(encoding);
  1547. if ("raw_unicode_escape" == normalizedName) {
  1548. return RawUnicodeEscapeEncode(s);
  1549. } else if ("unicode_escape" == normalizedName || "string_escape" == normalizedName) {
  1550. bool dummy = false;
  1551. return ReprEncode(s, '\'', ref dummy);
  1552. }
  1553. }
  1554. if (e != null || TryGetEncoding(encoding, out e)) {
  1555. return DoEncode(context, s, errors, encoding, e);
  1556. }
  1557. // look for user-registered codecs
  1558. PythonTuple codecTuple = PythonOps.LookupEncoding(context, encoding);
  1559. if (codecTuple != null) {
  1560. return UserDecodeOrEncode(codecTuple[/*Modules.PythonCodecs.EncoderIndex*/0], s);
  1561. }
  1562. throw PythonOps.LookupError("unknown encoding: {0}", encoding);
  1563. }
  1564. internal static string DoEncode(CodeContext context, string s, string errors, string encoding, Encoding e) {
  1565. #if FEATURE_ENCODING
  1566. // CLR's encoder exceptions have a 1-1 mapping w/ Python's encoder exceptions
  1567. // so we just clone the encoding & set the fallback to throw in strict mode
  1568. e = (Encoding)e.Clone();
  1569. switch (errors) {
  1570. case "strict": SetEncoderFallback(e, EncoderFallback.ExceptionFallback); break;
  1571. case "replace": SetEncoderFallback(e, EncoderFallback.ReplacementFallback); break;
  1572. case "backslashreplace": SetEncoderFallback(e, new BackslashEncoderReplaceFallback()); break;
  1573. case "xmlcharrefreplace": SetEncoderFallback(e, new XmlCharRefEncoderReplaceFallback()); break;
  1574. case "ignore": SetEncoderFallback(e, new PythonEncoderFallback(encoding, s, null)); break;
  1575. default:
  1576. SetEncoderFallback(e, new PythonEncoderFallback(encoding, s, LightExceptions.CheckAndThrow(PythonOps.LookupEncodingError(context, errors))));
  1577. break;
  1578. }
  1579. #endif
  1580. return PythonOps.MakeString(e.GetPreamble(), e.GetBytes(s));
  1581. }
  1582. private static string UserDecodeOrEncode(object function, string data) {
  1583. object res = PythonCalls.Call(function, data);
  1584. string strRes = AsString(res);
  1585. if (strRes != null) return strRes;
  1586. // tuple is string, bytes used, we just want the string...
  1587. PythonTuple t = res as PythonTuple;
  1588. if (t == null) throw PythonOps.TypeErrorForBadInstance("expected tuple, but found {0}", res);
  1589. return Converter.ConvertToString(t[0]);
  1590. }
  1591. #if FEATURE_ENCODING
  1592. static class CodecsInfo {
  1593. public static readonly Dictionary<string, EncodingInfoWrapper> Codecs = MakeCodecsDict();
  1594. #if NETSTANDARD
  1595. private static IEnumerable<EncodingInfo> GetEncodings() {
  1596. yield return new EncodingInfo(1200, "utf-16", "Unicode");
  1597. yield return new EncodingInfo(1201, "unicodeFFFE", "Unicode (Big endian)");
  1598. yield return new EncodingInfo(1252, "windows-1252", "Western European (Windows)");
  1599. yield return new EncodingInfo(20127, "us-ascii", "US-ASCII");
  1600. yield return new EncodingInfo(28591, "iso-8859-1", "Western European (ISO)");
  1601. yield return new EncodingInfo(28605, "iso-8859-15", "Latin 9 (ISO)");
  1602. yield return new EncodingInfo(65000, "utf-7", "Unicode (UTF-7)");
  1603. yield return new EncodingInfo(65001, "utf-8", "Unicode (UTF-8)");
  1604. }
  1605. #endif
  1606. private static Dictionary<string, EncodingInfoWrapper> MakeCodecsDict() {
  1607. Dictionary<string, EncodingInfoWrapper> d = new Dictionary<string, EncodingInfoWrapper>();
  1608. #if NETSTANDARD
  1609. Encoding.RegisterProvider(CodePagesEncodingProvider.Instance);
  1610. IEnumerable<EncodingInfo> encs = GetEncodings();
  1611. #else
  1612. EncodingInfo[] encs = Encoding.GetEncodings();
  1613. #endif
  1614. foreach (EncodingInfo enc in encs) {
  1615. string normalizedName = NormalizeEncodingName(enc.Name);
  1616. // setup well-known mappings, for everything
  1617. // else we'll store as lower case w/ _
  1618. switch (normalizedName) {
  1619. case "us_ascii":
  1620. d["cp" + enc.CodePage.ToString()] = d[normalizedName] = d["us"] = d["ascii"] = d["646"] = d["us_ascii"] = new AsciiEncodingInfoWrapper();
  1621. continue;
  1622. case "iso_8859_1":
  1623. d["8859"] = d["latin_1"] = d["latin1"] = d["iso 8859_1"] = d["iso8859_1"] = d["cp819"] = d["819"] = d["latin"] = d["l1"] = enc;
  1624. break;
  1625. case "utf_7":
  1626. d["u7"] = d["unicode-1-1-utf-7"] = enc;
  1627. break;
  1628. case "utf_8":
  1629. d["utf_8_sig"] = enc;
  1630. d["cp" + enc.CodePage.ToString()] = d["utf_8"] = d["utf8"] = d["u8"] = new EncodingInfoWrapper(enc, new byte[0]);
  1631. continue;
  1632. case "utf_16":
  1633. d["utf_16_le"] = d["utf_16le"] = new EncodingInfoWrapper(enc, new byte[0]);
  1634. d["utf16"] = new EncodingInfoWrapper(enc, enc.GetEncoding().GetPreamble());
  1635. break;
  1636. case "unicodefffe": // big endian unicode
  1637. // strip off the pre-amble, CPython doesn't include it.
  1638. d["utf_16_be"] = d["utf_16be"] = new EncodingInfoWrapper(enc, new byte[0]);
  1639. break;
  1640. }
  1641. // publish under normalized name (all lower cases, -s replaced with _s)
  1642. d[normalizedName] = enc;
  1643. // publish under Windows code page as well...
  1644. #if !NETSTANDARD
  1645. d["windows-" + enc.GetEncoding().WindowsCodePage.ToString()] = enc;
  1646. #endif
  1647. // publish under code page number as well...
  1648. d["cp" + enc.CodePage.ToString()] = d[enc.CodePage.ToString()] = enc;
  1649. }
  1650. d["raw_unicode_escape"] = new EncodingInfoWrapper(new UnicodeEscapeEncoding(true));
  1651. d["unicode_escape"] = new EncodingInfoWrapper(new UnicodeEscapeEncoding(false));
  1652. #if DEBUG
  1653. // all codecs should be stored in lowercase because we only look up from lowercase strings
  1654. foreach (KeyValuePair<string, EncodingInfoWrapper> kvp in d) {
  1655. Debug.Assert(kvp.Key.ToLowerInvariant() == kvp.Key);
  1656. }
  1657. #endif
  1658. return d;
  1659. }
  1660. }
  1661. class EncodingInfoWrapper {
  1662. private EncodingInfo _info;
  1663. private Encoding _encoding;
  1664. private byte[] _preamble;
  1665. public EncodingInfoWrapper(Encoding enc) {
  1666. _encoding = enc;
  1667. }
  1668. public EncodingInfoWrapper(EncodingInfo info) {
  1669. _info = info;
  1670. }
  1671. public EncodingInfoWrapper(EncodingInfo info, byte[] preamble) {
  1672. _info = info;
  1673. _preamble = preamble;
  1674. }
  1675. public virtual Encoding GetEncoding() {
  1676. if(_encoding != null) return _encoding;
  1677. if (_preamble == null) {
  1678. return _info.GetEncoding();
  1679. }
  1680. return new EncodingWrapper(_info.GetEncoding(), _preamble);
  1681. }
  1682. public static implicit operator EncodingInfoWrapper(EncodingInfo info) {
  1683. return new EncodingInfoWrapper(info);
  1684. }
  1685. }
  1686. class AsciiEncodingInfoWrapper : EncodingInfoWrapper {
  1687. public AsciiEncodingInfoWrapper()
  1688. : base((EncodingInfo)null) {
  1689. }
  1690. public override Encoding GetEncoding() {
  1691. return PythonAsciiEncoding.Instance;
  1692. }
  1693. }
  1694. #endif
  1695. class EncodingWrapper : Encoding {
  1696. private byte[] _preamble;
  1697. private Encoding _encoding;
  1698. public EncodingWrapper(Encoding encoding, byte[] preamable) {
  1699. _preamble = preamable;
  1700. _encoding = encoding;
  1701. }
  1702. private void SetEncoderFallback() {
  1703. #if FEATURE_ENCODING
  1704. StringOps.SetEncoderFallback(_encoding, EncoderFallback);
  1705. #endif
  1706. }
  1707. private void SetDecoderFallback() {
  1708. #if FEATURE_ENCODING
  1709. StringOps.SetDecoderFallback(_encoding, DecoderFallback);
  1710. #endif
  1711. }
  1712. public override int GetByteCount(char[] chars, int index, int count) {
  1713. SetEncoderFallback();
  1714. return _encoding.GetByteCount(chars, index, count);
  1715. }
  1716. public override int GetBytes(char[] chars, int charIndex, int charCount, byte[] bytes, int byteIndex) {
  1717. SetEncoderFallback();
  1718. return _encoding.GetBytes(chars, charIndex, charCount, bytes, byteIndex);
  1719. }
  1720. public override int GetCharCount(byte[] bytes, int index, int count) {
  1721. SetDecoderFallback();
  1722. return _encoding.GetCharCount(bytes, index, count);
  1723. }
  1724. public override int GetChars(byte[] bytes, int byteIndex, int byteCount, char[] chars, int charIndex) {
  1725. SetDecoderFallback();
  1726. return _encoding.GetChars(bytes, byteIndex, byteCount, chars, charIndex);
  1727. }
  1728. public override int GetMaxByteCount(int charCount) {
  1729. SetEncoderFallback();
  1730. return _encoding.GetMaxByteCount(charCount);
  1731. }
  1732. public override int GetMaxCharCount(int byteCount) {
  1733. SetDecoderFallback();
  1734. return _encoding.GetMaxCharCount(byteCount);
  1735. }
  1736. public override byte[] GetPreamble() {
  1737. return _preamble;
  1738. }
  1739. public override Encoder GetEncoder() {
  1740. SetEncoderFallback();
  1741. return _encoding.GetEncoder();
  1742. }
  1743. public override Decoder GetDecoder() {
  1744. SetDecoderFallback();
  1745. return _encoding.GetDecoder();
  1746. }
  1747. #if FEATURE_ENCODING
  1748. public override object Clone() {
  1749. // need to call base.Clone to be marked as read/write
  1750. EncodingWrapper res = (EncodingWrapper)base.Clone();
  1751. res._encoding = (Encoding)_encoding.Clone();
  1752. return res;
  1753. }
  1754. #endif
  1755. }
  1756. private static List SplitEmptyString(bool separators) {
  1757. List ret = PythonOps.MakeEmptyList(1);
  1758. if (separators) {
  1759. ret.AddNoLock(String.Empty);
  1760. }
  1761. return ret;
  1762. }
  1763. private static List SplitInternal(string self, char[] seps, int maxsplit) {
  1764. if (String.IsNullOrEmpty(self)) {
  1765. return SplitEmptyString(seps != null);
  1766. } else {
  1767. string[] r;
  1768. // If the optional second argument sep is absent or None, the words are separated
  1769. // by arbitrary strings of whitespace characters (space, tab, newline, return, formfeed);
  1770. r = StringUtils.Split(self, seps, (maxsplit < 0) ? Int32.MaxValue : maxsplit + 1,
  1771. (seps == null) ? StringSplitOptions.RemoveEmptyEntries : StringSplitOptions.None);
  1772. List ret = PythonOps.MakeEmptyList(r.Length);
  1773. foreach (string s in r) ret.AddNoLock(s);
  1774. return ret;
  1775. }
  1776. }
  1777. private static List SplitInternal(string self, string separator, int maxsplit) {
  1778. if (String.IsNullOrEmpty(self)) {
  1779. return SplitEmptyString(separator != null);
  1780. } else {
  1781. string[] r = StringUtils.Split(self, separator, (maxsplit < 0) ? Int32.MaxValue : maxsplit + 1, StringSplitOptions.None);
  1782. List ret = PythonOps.MakeEmptyList(r.Length);
  1783. foreach (string s in r) ret.AddNoLock(s);
  1784. return ret;
  1785. }
  1786. }
  1787. private static void TryStringOrTuple(object prefix) {
  1788. if (prefix == null) {
  1789. throw PythonOps.TypeError("expected string or Tuple, got NoneType");
  1790. }
  1791. if (!(prefix is string) && !(prefix is PythonTuple) && !(prefix is Extensible<string>)) {
  1792. throw PythonOps.TypeError("expected string or Tuple, got {0} Type", prefix.GetType());
  1793. }
  1794. }
  1795. private static string GetString(object obj) {
  1796. string ret = AsString(obj);
  1797. if (ret == null) {
  1798. throw PythonOps.TypeError("expected string, got {0}", DynamicHelpers.GetPythonType(obj).Name);
  1799. }
  1800. return ret;
  1801. }
  1802. public static bool endswith(string self, [BytesConversion]string suffix) {
  1803. return self.EndsWith(suffix);
  1804. }
  1805. // Indexing is 0-based. Need to deal with negative indices
  1806. // (which mean count backwards from end of sequence)
  1807. // +---+---+---+---+---+
  1808. // | a | b | c | d | e |
  1809. // +---+---+---+---+---+
  1810. // 0 1 2 3 4
  1811. // -5 -4 -3 -2 -1
  1812. public static bool endswith(string self, [BytesConversion]string suffix, int start) {
  1813. int len = self.Length;
  1814. if (start > len) return false;
  1815. // map the negative indice to its positive counterpart
  1816. if (start < 0) {
  1817. start += len;
  1818. if (start < 0) start = 0;
  1819. }
  1820. return self.Substring(start).EndsWith(suffix);
  1821. }
  1822. // With optional start, test beginning at that position (the char at that index is
  1823. // included in the test). With optional end, stop comparing at that position (the
  1824. // char at that index is not included in the test)
  1825. public static bool endswith(string self, [BytesConversion]string suffix, int start, int end) {
  1826. int len = self.Length;
  1827. if (start > len) return false;
  1828. // map the negative indices to their positive counterparts
  1829. else if (start < 0) {
  1830. start += len;
  1831. if (start < 0) start = 0;
  1832. }
  1833. if (end >= len) return self.Substring(start).EndsWith(suffix);
  1834. else if (end < 0) {
  1835. end += len;
  1836. if (end < 0) return false;
  1837. }
  1838. if (end < start) return false;
  1839. return self.Substring(start, end - start).EndsWith(suffix);
  1840. }
  1841. private static bool endswith(string self, PythonTuple suffix) {
  1842. foreach (object obj in suffix) {
  1843. if (self.EndsWith(GetString(obj))) {
  1844. return true;
  1845. }
  1846. }
  1847. return false;
  1848. }
  1849. private static bool endswith(string self, PythonTuple suffix, int start) {
  1850. foreach (object obj in suffix) {
  1851. if (endswith(self, GetString(obj), start)) {
  1852. return true;
  1853. }
  1854. }
  1855. return false;
  1856. }
  1857. private static bool endswith(string self, PythonTuple suffix, int start, int end) {
  1858. foreach (object obj in suffix) {
  1859. if (endswith(self, GetString(obj), start, end)) {
  1860. return true;
  1861. }
  1862. }
  1863. return false;
  1864. }
  1865. public static bool startswith(string self, [BytesConversion]string prefix) {
  1866. return self.StartsWith(prefix);
  1867. }
  1868. public static bool startswith(string self, [BytesConversion]string prefix, int start) {
  1869. int len = self.Length;
  1870. if (start > len) return false;
  1871. if (start < 0) {
  1872. start += len;
  1873. if (start < 0) start = 0;
  1874. }
  1875. return self.Substring(start).StartsWith(prefix);
  1876. }
  1877. public static bool startswith(string self, [BytesConversion]string prefix, int start, int end) {
  1878. int len = self.Length;
  1879. if (start > len) return false;
  1880. // map the negative indices to their positive counterparts
  1881. else if (start < 0) {
  1882. start += len;
  1883. if (start < 0) start = 0;
  1884. }
  1885. if (end >= len) return self.Substring(start).StartsWith(prefix);
  1886. else if (end < 0) {
  1887. end += len;
  1888. if (end < 0) return false;
  1889. }
  1890. if (end < start) return false;
  1891. return self.Substring(start, end - start).StartsWith(prefix);
  1892. }
  1893. private static bool startswith(string self, PythonTuple prefix) {
  1894. foreach (object obj in prefix) {
  1895. if (self.StartsWith(GetString(obj))) {
  1896. return true;
  1897. }
  1898. }
  1899. return false;
  1900. }
  1901. private static bool startswith(string self, PythonTuple prefix, int start) {
  1902. foreach (object obj in prefix) {
  1903. if (startswith(self, GetString(obj), start)) {
  1904. return true;
  1905. }
  1906. }
  1907. return false;
  1908. }
  1909. private static bool startswith(string self, PythonTuple prefix, int start, int end) {
  1910. foreach (object obj in prefix) {
  1911. if (startswith(self, GetString(obj), start, end)) {
  1912. return true;
  1913. }
  1914. }
  1915. return false;
  1916. }
  1917. // note: any changes in how this iterator works should also be applied in the
  1918. // optimized overloads of Builtins.map()
  1919. [PythonType("str_iterator")]
  1920. private class PythonStringEnumerable : IEnumerable, IEnumerator<string> {
  1921. private readonly string/*!*/ _s;
  1922. private int _index;
  1923. public PythonStringEnumerable(string s) {
  1924. Assert.NotNull(s);
  1925. _index = -1;
  1926. _s = s;
  1927. }
  1928. #region IEnumerable Members
  1929. public IEnumerator GetEnumerator() {
  1930. return this;
  1931. }
  1932. #endregion
  1933. #region IEnumerator<string> Members
  1934. public string Current {
  1935. get {
  1936. if (_index < 0) {
  1937. throw PythonOps.SystemError("Enumeration has not started. Call MoveNext.");
  1938. } else if (_index >= _s.Length) {
  1939. throw PythonOps.SystemError("Enumeration already finished.");
  1940. }
  1941. return ScriptingRuntimeHelpers.CharToString(_s[_index]);
  1942. }
  1943. }
  1944. #endregion
  1945. #region IDisposable Members
  1946. public void Dispose() { }
  1947. #endregion
  1948. #region IEnumerator Members
  1949. object IEnumerator.Current {
  1950. get {
  1951. return ((IEnumerator<string>)this).Current;
  1952. }
  1953. }
  1954. public bool MoveNext() {
  1955. if (_index >= _s.Length) {
  1956. return false;
  1957. }
  1958. _index++;
  1959. return _index != _s.Length;
  1960. }
  1961. public void Reset() {
  1962. _index = -1;
  1963. }
  1964. #endregion
  1965. }
  1966. internal static IEnumerable StringEnumerable(string str) {
  1967. return new PythonStringEnumerable(str);
  1968. }
  1969. internal static IEnumerator<string> StringEnumerator(string str) {
  1970. return new PythonStringEnumerable(str);
  1971. }
  1972. #endregion
  1973. #region Unicode Encode/Decode Fallback Support
  1974. #if FEATURE_ENCODING
  1975. /// When encoding or decoding strings if an error occurs CPython supports several different
  1976. /// behaviors, in addition it supports user-extensible behaviors as well. For the default
  1977. /// behavior we're ok - both of us support throwing and replacing. For custom behaviors
  1978. /// we define a single fallback for decoding and encoding that calls the python function to do
  1979. /// the replacement.
  1980. ///
  1981. /// When we do the replacement we call the provided handler w/ a UnicodeEncodeError or UnicodeDecodeError
  1982. /// object which contains:
  1983. /// encoding (string, the encoding the user requested)
  1984. /// end (the end of the invalid characters)
  1985. /// object (the original string being decoded)
  1986. /// reason (the error, e.g. 'unexpected byte code', not sure of others)
  1987. /// start (the start of the invalid sequence)
  1988. ///
  1989. /// The decoder returns a tuple of (unicode, int) where unicode is the replacement string
  1990. /// and int is an index where encoding should continue.
  1991. private class PythonEncoderFallbackBuffer : EncoderFallbackBuffer {
  1992. private object _function;
  1993. private string _encoding, _strData;
  1994. private string _buffer;
  1995. private int _bufferIndex;
  1996. public PythonEncoderFallbackBuffer(string encoding, string str, object callable) {
  1997. _function = callable;
  1998. _strData = str;
  1999. this._encoding = encoding;
  2000. }
  2001. public override bool Fallback(char charUnknown, int index) {
  2002. return DoPythonFallback(index, 1);
  2003. }
  2004. public override bool Fallback(char charUnknownHigh, char charUnknownLow, int index) {
  2005. return DoPythonFallback(index, 2);
  2006. }
  2007. public override char GetNextChar() {
  2008. if (_buffer == null || _bufferIndex >= _buffer.Length) return Char.MinValue;
  2009. return _buffer[_bufferIndex++];
  2010. }
  2011. public override bool MovePrevious() {
  2012. if (_bufferIndex > 0) {
  2013. _bufferIndex--;
  2014. return true;
  2015. }
  2016. return false;
  2017. }
  2018. public override int Remaining {
  2019. get {
  2020. if (_buffer == null) return 0;
  2021. return _buffer.Length - _bufferIndex;
  2022. }
  2023. }
  2024. public override void Reset() {
  2025. _buffer = null;
  2026. _bufferIndex = 0;
  2027. base.Reset();
  2028. }
  2029. private bool DoPythonFallback(int index, int length) {
  2030. if (_function != null) {
  2031. // create the exception object to hand to the user-function...
  2032. PythonExceptions._UnicodeEncodeError exObj = new PythonExceptions._UnicodeEncodeError();
  2033. exObj.__init__(_encoding, _strData, index, index + length, "unexpected code byte");
  2034. // call the user function...
  2035. object res = PythonCalls.Call(_function, exObj);
  2036. string replacement = PythonDecoderFallbackBuffer.CheckReplacementTuple(res, "encoding");
  2037. // finally process the user's request.
  2038. _buffer = replacement;
  2039. _bufferIndex = 0;
  2040. return true;
  2041. }
  2042. return false;
  2043. }
  2044. }
  2045. class PythonEncoderFallback : EncoderFallback {
  2046. private object _function;
  2047. private string _str;
  2048. private string _enc;
  2049. public PythonEncoderFallback(string encoding, string data, object callable) {
  2050. _function = callable;
  2051. _str = data;
  2052. _enc = encoding;
  2053. }
  2054. public override EncoderFallbackBuffer CreateFallbackBuffer() {
  2055. return new PythonEncoderFallbackBuffer(_enc, _str, _function);
  2056. }
  2057. public override int MaxCharCount {
  2058. get { return Int32.MaxValue; }
  2059. }
  2060. }
  2061. private class PythonDecoderFallbackBuffer : DecoderFallbackBuffer {
  2062. private object _function;
  2063. private string _encoding, _strData;
  2064. private string _buffer;
  2065. private int _bufferIndex;
  2066. public PythonDecoderFallbackBuffer(string encoding, string str, object callable) {
  2067. this._encoding = encoding;
  2068. this._strData = str;
  2069. this._function = callable;
  2070. }
  2071. public override int Remaining {
  2072. get {
  2073. if (_buffer == null) return 0;
  2074. return _buffer.Length - _bufferIndex;
  2075. }
  2076. }
  2077. public override char GetNextChar() {
  2078. if (_buffer == null || _bufferIndex >= _buffer.Length) return Char.MinValue;
  2079. return _buffer[_bufferIndex++];
  2080. }
  2081. public override bool MovePrevious() {
  2082. if (_bufferIndex > 0) {
  2083. _bufferIndex--;
  2084. return true;
  2085. }
  2086. return false;
  2087. }
  2088. public override void Reset() {
  2089. _buffer = null;
  2090. _bufferIndex = 0;
  2091. base.Reset();
  2092. }
  2093. public override bool Fallback(byte[] bytesUnknown, int index) {
  2094. if (_function != null) {
  2095. // create the exception object to hand to the user-function...
  2096. PythonExceptions._UnicodeDecodeError exObj = new PythonExceptions._UnicodeDecodeError();
  2097. exObj.__init__(_encoding, _strData, index, index + bytesUnknown.Length, "unexpected code byte");
  2098. // call the user function...
  2099. object res = PythonCalls.Call(_function, exObj);
  2100. string replacement = CheckReplacementTuple(res, "decoding");
  2101. // finally process the user's request.
  2102. _buffer = replacement;
  2103. _bufferIndex = 0;
  2104. return true;
  2105. }
  2106. return false;
  2107. }
  2108. internal static string CheckReplacementTuple(object res, string encodeOrDecode) {
  2109. bool ok = true;
  2110. string replacement = null;
  2111. PythonTuple tres = res as PythonTuple;
  2112. // verify the result is sane...
  2113. if (tres != null && tres.__len__() == 2) {
  2114. if (!Converter.TryConvertToString(tres[0], out replacement)) ok = false;
  2115. if (ok) {
  2116. int bytesSkipped;
  2117. if (!Converter.TryConvertToInt32(tres[1], out bytesSkipped)) ok = false;
  2118. }
  2119. } else {
  2120. ok = false;
  2121. }
  2122. if (!ok) throw PythonOps.TypeError("{1} error handler must return tuple containing (str, int), got {0}", PythonOps.GetPythonTypeName(res), encodeOrDecode);
  2123. return replacement;
  2124. }
  2125. }
  2126. class PythonDecoderFallback : DecoderFallback {
  2127. private object function;
  2128. private string str;
  2129. private string enc;
  2130. public PythonDecoderFallback(string encoding, string data, object callable) {
  2131. function = callable;
  2132. str = data;
  2133. enc = encoding;
  2134. }
  2135. public override DecoderFallbackBuffer CreateFallbackBuffer() {
  2136. return new PythonDecoderFallbackBuffer(enc, str, function);
  2137. }
  2138. public override int MaxCharCount {
  2139. get { throw new NotImplementedException(); }
  2140. }
  2141. }
  2142. class BackslashEncoderReplaceFallback : EncoderFallback {
  2143. class BackslashReplaceFallbackBuffer : EncoderFallbackBuffer {
  2144. private List<char> _buffer = new List<char>();
  2145. private int _index;
  2146. public override bool Fallback(char charUnknownHigh, char charUnknownLow, int index) {
  2147. return false;
  2148. }
  2149. public override bool Fallback(char charUnknown, int index) {
  2150. _buffer.Add('\\');
  2151. int val = (int)charUnknown;
  2152. if (val > 0xFF) {
  2153. _buffer.Add('u');
  2154. AddCharacter(val >> 8);
  2155. AddCharacter(val & 0xFF);
  2156. } else {
  2157. _buffer.Add('x');
  2158. AddCharacter(charUnknown);
  2159. }
  2160. return true;
  2161. }
  2162. private void AddCharacter(int val) {
  2163. AddOneDigit(((val) & 0xF0) >> 4);
  2164. AddOneDigit(val & 0x0F);
  2165. }
  2166. private void AddOneDigit(int val) {
  2167. if (val > 9) {
  2168. _buffer.Add((char)('a' + val - 0x0A));
  2169. } else {
  2170. _buffer.Add((char)('0' + val));
  2171. }
  2172. }
  2173. public override char GetNextChar() {
  2174. if (_index == _buffer.Count) return Char.MinValue;
  2175. return _buffer[_index++];
  2176. }
  2177. public override bool MovePrevious() {
  2178. if (_index > 0) {
  2179. _index--;
  2180. return true;
  2181. }
  2182. return false;
  2183. }
  2184. public override int Remaining {
  2185. get { return _buffer.Count - _index; }
  2186. }
  2187. }
  2188. public override EncoderFallbackBuffer CreateFallbackBuffer() {
  2189. return new BackslashReplaceFallbackBuffer();
  2190. }
  2191. public override int MaxCharCount {
  2192. get { throw new NotImplementedException(); }
  2193. }
  2194. }
  2195. class XmlCharRefEncoderReplaceFallback : EncoderFallback {
  2196. class XmlCharRefEncoderReplaceFallbackBuffer : EncoderFallbackBuffer {
  2197. private List<char> _buffer = new List<char>();
  2198. private int _index;
  2199. public override bool Fallback(char charUnknownHigh, char charUnknownLow, int index) {
  2200. return false;
  2201. }
  2202. public override bool Fallback(char charUnknown, int index) {
  2203. _buffer.Add('&');
  2204. _buffer.Add('#');
  2205. int val = (int)charUnknown;
  2206. foreach (char c in val.ToString()) {
  2207. _buffer.Add(c);
  2208. }
  2209. _buffer.Add(';');
  2210. return true;
  2211. }
  2212. public override char GetNextChar() {
  2213. if (_index == _buffer.Count) return Char.MinValue;
  2214. return _buffer[_index++];
  2215. }
  2216. public override bool MovePrevious() {
  2217. if (_index > 0) {
  2218. _index--;
  2219. return true;
  2220. }
  2221. return false;
  2222. }
  2223. public override int Remaining {
  2224. get { return _buffer.Count - _index; }
  2225. }
  2226. }
  2227. public override EncoderFallbackBuffer CreateFallbackBuffer() {
  2228. return new XmlCharRefEncoderReplaceFallbackBuffer();
  2229. }
  2230. public override int MaxCharCount {
  2231. get { throw new NotImplementedException(); }
  2232. }
  2233. }
  2234. class UnicodeEscapeEncoding : Encoding {
  2235. private bool _raw;
  2236. public UnicodeEscapeEncoding(bool raw) {
  2237. _raw = raw;
  2238. }
  2239. public override int GetByteCount(char[] chars, int index, int count) {
  2240. return EscapeEncode(chars, index, count).Length;
  2241. }
  2242. private string EscapeEncode(char[] chars, int index, int count) {
  2243. if (_raw) {
  2244. return RawUnicodeEscapeEncode(new string(chars, index, count));
  2245. }
  2246. bool dummy = false;
  2247. return ReprEncode(new string(chars, index, count), ref dummy);
  2248. }
  2249. public override int GetBytes(char[] chars, int charIndex, int charCount, byte[] bytes, int byteIndex) {
  2250. if (_raw) {
  2251. string res = RawUnicodeEscapeEncode(new string(chars, charIndex, charCount));
  2252. for (int i = 0; i < res.Length; i++) {
  2253. bytes[i + byteIndex] = _raw ? (byte)res[i] : (byte)chars[i];
  2254. }
  2255. return res.Length;
  2256. } else {
  2257. for (int i = 0; i < charCount; i++) {
  2258. bytes[i + byteIndex] = (byte)chars[i + charIndex];
  2259. }
  2260. return charCount;
  2261. }
  2262. }
  2263. public override int GetCharCount(byte[] bytes, int index, int count) {
  2264. char[] tmpChars = new char[count];
  2265. for (int i = 0; i < count; i++) {
  2266. tmpChars[i] = (char)bytes[i + index];
  2267. }
  2268. return LiteralParser.ParseString(tmpChars, 0, tmpChars.Length, _raw, true, false).Length;
  2269. }
  2270. public override int GetChars(byte[] bytes, int byteIndex, int byteCount, char[] chars, int charIndex) {
  2271. char[] tmpChars = new char[byteCount];
  2272. for (int i = 0; i < byteCount; i++) {
  2273. chars[i] = (char)bytes[i + byteIndex];
  2274. }
  2275. string res = LiteralParser.ParseString(tmpChars, 0, tmpChars.Length, _raw, true, false);
  2276. for (int i = 0; i < res.Length; i++) {
  2277. chars[i + charIndex] = (char)res[i];
  2278. }
  2279. return res.Length;
  2280. }
  2281. public override int GetMaxByteCount(int charCount) {
  2282. return charCount * 5;
  2283. }
  2284. public override int GetMaxCharCount(int byteCount) {
  2285. return byteCount;
  2286. }
  2287. }
  2288. #endif
  2289. #endregion
  2290. public static string/*!*/ __repr__(string/*!*/ self) {
  2291. return StringOps.Quote(self);
  2292. }
  2293. }
  2294. }