PageRenderTime 62ms CodeModel.GetById 20ms RepoModel.GetById 1ms app.codeStats 0ms

/DICK.B1/IronPython/Runtime/Operations/StringOps.cs

https://bitbucket.org/williamybs/uidipythontool
C# | 2678 lines | 2322 code | 266 blank | 90 comment | 402 complexity | 13d29dd77800c801907fbcb1c1b4590f MD5 | raw file
  1. /* ****************************************************************************
  2. *
  3. * Copyright (c) Microsoft Corporation.
  4. *
  5. * This source code is subject to terms and conditions of the Microsoft Public License. A
  6. * copy of the license can be found in the License.html file at the root of this distribution. If
  7. * you cannot locate the Microsoft Public License, please send an email to
  8. * dlr@microsoft.com. By using this source code in any fashion, you are agreeing to be bound
  9. * by the terms of the Microsoft Public License.
  10. *
  11. * You must not remove this notice, or any other, from this software.
  12. *
  13. *
  14. * ***************************************************************************/
  15. using System;
  16. using System.Collections;
  17. using System.Collections.Generic;
  18. using System.Diagnostics;
  19. using System.Globalization;
  20. using System.Reflection;
  21. using System.Runtime.InteropServices;
  22. using System.Text;
  23. using IronPython.Runtime.Exceptions;
  24. using IronPython.Runtime.Types;
  25. using Microsoft.Scripting;
  26. using Microsoft.Scripting.Runtime;
  27. using Microsoft.Scripting.Utils;
  28. #if CLR2
  29. using Microsoft.Scripting.Math;
  30. #else
  31. using System.Numerics;
  32. #endif
  33. using SpecialNameAttribute = System.Runtime.CompilerServices.SpecialNameAttribute;
  34. namespace IronPython.Runtime.Operations {
  35. /// <summary>
  36. /// ExtensibleString is the base class that is used for types the user defines
  37. /// that derive from string. It carries along with it the string's value and
  38. /// our converter recognizes it as a string.
  39. /// </summary>
  40. public class ExtensibleString : Extensible<string>, ICodeFormattable, IStructuralEquatable
  41. #if CLR2
  42. , IValueEquality
  43. #endif
  44. {
  45. public ExtensibleString() : base(String.Empty) { }
  46. public ExtensibleString(string self) : base(self) { }
  47. public override string ToString() {
  48. return Value;
  49. }
  50. #region ICodeFormattable Members
  51. public virtual string/*!*/ __repr__(CodeContext/*!*/ context) {
  52. return StringOps.Quote(Value);
  53. }
  54. #endregion
  55. [return: MaybeNotImplemented]
  56. public object __eq__(object other) {
  57. if (other is string || other is ExtensibleString || other is Bytes) {
  58. return ScriptingRuntimeHelpers.BooleanToObject(EqualsWorker(other));
  59. }
  60. return NotImplementedType.Value;
  61. }
  62. [return: MaybeNotImplemented]
  63. public object __ne__(object other) {
  64. if (other is string || other is ExtensibleString || other is Bytes) {
  65. return ScriptingRuntimeHelpers.BooleanToObject(!EqualsWorker(other));
  66. }
  67. return NotImplementedType.Value;
  68. }
  69. #region IValueEquality members
  70. #if CLR2
  71. int IValueEquality.GetValueHashCode() {
  72. return GetHashCode();
  73. }
  74. bool IValueEquality.ValueEquals(object other) {
  75. return EqualsWorker(other);
  76. }
  77. #endif
  78. #endregion
  79. #region IStructuralEquatable Members
  80. int IStructuralEquatable.GetHashCode(IEqualityComparer comparer) {
  81. if (comparer is PythonContext.PythonEqualityComparer) {
  82. return GetHashCode();
  83. }
  84. return ((IStructuralEquatable)PythonTuple.MakeTuple(Value.ToCharArray())).GetHashCode(comparer);
  85. }
  86. bool IStructuralEquatable.Equals(object other, IEqualityComparer comparer) {
  87. if (comparer is PythonContext.PythonEqualityComparer) {
  88. return EqualsWorker(other);
  89. }
  90. ExtensibleString es = other as ExtensibleString;
  91. if (es != null) return EqualsWorker(es.Value, comparer);
  92. string os = other as string;
  93. if (os != null) return EqualsWorker(os, comparer);
  94. Bytes tempBytes = other as Bytes;
  95. if (tempBytes != null) return EqualsWorker(tempBytes.ToString(), comparer);
  96. return false;
  97. }
  98. private bool EqualsWorker(object other) {
  99. if (other == null) return false;
  100. ExtensibleString es = other as ExtensibleString;
  101. if (es != null) return Value == es.Value;
  102. string os = other as string;
  103. if (os != null) return Value == os;
  104. Bytes tempBytes = other as Bytes;
  105. if (tempBytes != null) return Value == tempBytes.ToString();
  106. return false;
  107. }
  108. private bool EqualsWorker(string/*!*/ other, IEqualityComparer comparer) {
  109. Debug.Assert(other != null);
  110. if (Value.Length != other.Length) {
  111. return false;
  112. } else if (Value.Length == 0) {
  113. // 2 empty strings are equal
  114. return true;
  115. }
  116. for (int i = 0; i < Value.Length; i++) {
  117. if (!comparer.Equals(Value[i], other[i])) {
  118. return false;
  119. }
  120. }
  121. return true;
  122. }
  123. #endregion
  124. #region ISequence Members
  125. public virtual object this[int index] {
  126. get { return ScriptingRuntimeHelpers.CharToString(Value[index]); }
  127. }
  128. public object this[Slice slice] {
  129. get { return StringOps.GetItem(Value, slice); }
  130. }
  131. public object __getslice__(int start, int stop) {
  132. return StringOps.__getslice__(Value, start, stop);
  133. }
  134. #endregion
  135. #region IPythonContainer Members
  136. public virtual int __len__() {
  137. return Value.Length;
  138. }
  139. public virtual bool __contains__(object value) {
  140. if (value is string) return Value.Contains((string)value);
  141. else if (value is ExtensibleString) return Value.Contains(((ExtensibleString)value).Value);
  142. throw PythonOps.TypeErrorForBadInstance("expected string, got {0}", value);
  143. }
  144. #endregion
  145. }
  146. /// <summary>
  147. /// StringOps is the static class that contains the methods defined on strings, i.e. 'abc'
  148. ///
  149. /// Here we define all of the methods that a Python user would see when doing dir('abc').
  150. /// If the user is running in a CLS aware context they will also see all of the methods
  151. /// defined in the CLS System.String type.
  152. /// </summary>
  153. public static class StringOps {
  154. internal const int LowestUnicodeValue = 0x7f;
  155. private static readonly char[] Whitespace = new char[] { ' ', '\t', '\n', '\r', '\f' };
  156. internal static object FastNew(CodeContext/*!*/ context, object x) {
  157. if (x == null) {
  158. return "None";
  159. }
  160. if (x is string) {
  161. // check ascii
  162. return CheckAsciiString(context, (string)x);
  163. }
  164. // we don't invoke PythonOps.StringRepr here because we want to return the
  165. // Extensible<string> directly back if that's what we received from __str__.
  166. object value = PythonContext.InvokeUnaryOperator(context, UnaryOperators.String, x);
  167. if (value is string || value is Extensible<string>) {
  168. return value;
  169. }
  170. throw PythonOps.TypeError("expected str, got {0} from __str__", DynamicHelpers.GetPythonType(value).Name);
  171. }
  172. internal static string FastNewUnicode(CodeContext context, object value, object encoding, object errors) {
  173. string strErrors = errors as string;
  174. if (strErrors == null) {
  175. throw PythonOps.TypeError("unicode() argument 3 must be string, not {0}", PythonTypeOps.GetName(errors));
  176. }
  177. if (value != null) {
  178. string strValue = value as string;
  179. if (strValue != null) {
  180. return StringOps.RawDecode(context, strValue, encoding, strErrors);
  181. }
  182. Extensible<string> es = value as Extensible<string>;
  183. if (es != null) {
  184. return StringOps.RawDecode(context, es.Value, encoding, strErrors);
  185. }
  186. Bytes bytes = value as Bytes;
  187. if (bytes != null) {
  188. return StringOps.RawDecode(context, bytes.ToString(), encoding, strErrors);
  189. }
  190. PythonBuffer buffer = value as PythonBuffer;
  191. if (buffer != null) {
  192. return StringOps.RawDecode(context, buffer.ToString(), encoding, strErrors);
  193. }
  194. }
  195. throw PythonOps.TypeError("coercing to Unicode: need string or buffer, {0} found", PythonTypeOps.GetName(value));
  196. }
  197. internal static object FastNewUnicode(CodeContext context, object value, object encoding) {
  198. return FastNewUnicode(context, value, encoding, "strict");
  199. }
  200. internal static object FastNewUnicode(CodeContext context, object value) {
  201. if (value == null) {
  202. return "None";
  203. } else if (value is string) {
  204. return value;
  205. }
  206. object res;
  207. OldInstance oi = value as OldInstance;
  208. if (oi != null &&
  209. (oi.TryGetBoundCustomMember(context, "__unicode__", out res) || oi.TryGetBoundCustomMember(context, "__str__", out res))) {
  210. res = context.LanguageContext.Call(context, res);
  211. if (res is string || res is Extensible<string>) {
  212. return res;
  213. }
  214. throw PythonOps.TypeError("coercing to Unicode: expected string, got {0}", PythonTypeOps.GetName(value));
  215. }
  216. if (PythonTypeOps.TryInvokeUnaryOperator(context, value, "__unicode__", out res) ||
  217. PythonTypeOps.TryInvokeUnaryOperator(context, value, "__str__", out res)) {
  218. if (res is string || res is Extensible<string>) {
  219. return res;
  220. }
  221. throw PythonOps.TypeError("coercing to Unicode: expected string, got {0}", PythonTypeOps.GetName(value));
  222. }
  223. return FastNewUnicode(context, value, context.LanguageContext.DefaultEncoding.WebName, "strict");
  224. }
  225. private static object CheckAsciiString(CodeContext context, string s) {
  226. for (int i = 0; i < s.Length; i++) {
  227. if (s[i] > '\x80')
  228. return StringOps.__new__(
  229. context,
  230. (PythonType)DynamicHelpers.GetPythonTypeFromType(typeof(String)),
  231. s,
  232. null,
  233. "strict"
  234. );
  235. }
  236. return s;
  237. }
  238. #region Python Constructors
  239. [StaticExtensionMethod]
  240. public static object __new__(CodeContext/*!*/ context, PythonType cls) {
  241. if (cls == TypeCache.String) {
  242. return "";
  243. } else {
  244. return cls.CreateInstance(context);
  245. }
  246. }
  247. [StaticExtensionMethod]
  248. public static object __new__(CodeContext/*!*/ context, PythonType cls, object @object) {
  249. if (cls == TypeCache.String) {
  250. return FastNew(context, @object);
  251. } else {
  252. return cls.CreateInstance(context, __new__(context, TypeCache.String, @object));
  253. }
  254. }
  255. [StaticExtensionMethod]
  256. public static object __new__(CodeContext/*!*/ context, PythonType cls, [NotNull]string @object) {
  257. if (cls == TypeCache.String) {
  258. return CheckAsciiString(context, @object);
  259. } else {
  260. return cls.CreateInstance(context, @object);
  261. }
  262. }
  263. [StaticExtensionMethod]
  264. public static object __new__(CodeContext/*!*/ context, PythonType cls, [NotNull]ExtensibleString @object) {
  265. if (cls == TypeCache.String) {
  266. return FastNew(context, @object);
  267. } else {
  268. return cls.CreateInstance(context, __new__(context, TypeCache.String, @object));
  269. }
  270. }
  271. [StaticExtensionMethod]
  272. public static object __new__(CodeContext/*!*/ context, PythonType cls, char @object) {
  273. if (cls == TypeCache.String) {
  274. return CheckAsciiString(context, ScriptingRuntimeHelpers.CharToString(@object));
  275. } else {
  276. return cls.CreateInstance(context, __new__(context, TypeCache.String, @object));
  277. }
  278. }
  279. [StaticExtensionMethod]
  280. public static object __new__(CodeContext/*!*/ context, PythonType cls, [NotNull]BigInteger @object) {
  281. if (cls == TypeCache.String) {
  282. return @object.ToString();
  283. } else {
  284. return cls.CreateInstance(context, __new__(context, TypeCache.String, @object));
  285. }
  286. }
  287. [StaticExtensionMethod]
  288. public static object __new__(CodeContext/*!*/ context, PythonType cls, [NotNull]Extensible<BigInteger> @object) {
  289. if (cls == TypeCache.String) {
  290. return FastNew(context, @object);
  291. } else {
  292. return cls.CreateInstance(context, __new__(context, TypeCache.String, @object));
  293. }
  294. }
  295. [StaticExtensionMethod]
  296. public static object __new__(CodeContext/*!*/ context, PythonType cls, int @object) {
  297. if (cls == TypeCache.String) {
  298. return @object.ToString();
  299. } else {
  300. return cls.CreateInstance(context, __new__(context, TypeCache.String, @object));
  301. }
  302. }
  303. [StaticExtensionMethod]
  304. public static object __new__(CodeContext/*!*/ context, PythonType cls, bool @object) {
  305. if (cls == TypeCache.String) {
  306. return @object.ToString();
  307. } else {
  308. return cls.CreateInstance(context, __new__(context, TypeCache.String, @object));
  309. }
  310. }
  311. [StaticExtensionMethod]
  312. public static object __new__(CodeContext/*!*/ context, PythonType cls, double @object) {
  313. if (cls == TypeCache.String) {
  314. return DoubleOps.__str__(context, @object);
  315. } else {
  316. return cls.CreateInstance(context, __new__(context, TypeCache.String, @object));
  317. }
  318. }
  319. [StaticExtensionMethod]
  320. public static object __new__(CodeContext/*!*/ context, PythonType cls, Extensible<double> @object) {
  321. if (cls == TypeCache.String) {
  322. return FastNew(context, @object);
  323. } else {
  324. return cls.CreateInstance(context, __new__(context, TypeCache.String, @object));
  325. }
  326. }
  327. [StaticExtensionMethod]
  328. public static object __new__(CodeContext/*!*/ context, PythonType cls, float @object) {
  329. if (cls == TypeCache.String) {
  330. return SingleOps.__str__(context, @object);
  331. } else {
  332. return cls.CreateInstance(context, __new__(context, TypeCache.String, @object));
  333. }
  334. }
  335. [StaticExtensionMethod]
  336. public static object __new__(CodeContext/*!*/ context, PythonType cls,
  337. object @string,
  338. [DefaultParameterValue(null)] string encoding,
  339. [DefaultParameterValue("strict")] string errors) {
  340. string str = @string as string;
  341. if (str == null) throw PythonOps.TypeError("converting to unicode: need string, got {0}", DynamicHelpers.GetPythonType(@string).Name);
  342. if (cls == TypeCache.String) {
  343. return decode(context, str, encoding ?? PythonContext.GetContext(context).GetDefaultEncodingName(), errors);
  344. } else {
  345. return cls.CreateInstance(context, __new__(context, TypeCache.String, str, encoding, errors));
  346. }
  347. }
  348. #endregion
  349. #region Python __ methods
  350. public static bool __contains__(string s, string item) {
  351. return s.Contains(item);
  352. }
  353. public static bool __contains__(string s, char item) {
  354. return s.IndexOf(item) != -1;
  355. }
  356. public static string __format__(CodeContext/*!*/ context, string self, string formatSpec) {
  357. return ObjectOps.__format__(context, self, formatSpec);
  358. }
  359. public static int __len__(string s) {
  360. return s.Length;
  361. }
  362. [SpecialName]
  363. public static string GetItem(string s, int index) {
  364. return ScriptingRuntimeHelpers.CharToString(s[PythonOps.FixIndex(index, s.Length)]);
  365. }
  366. [SpecialName]
  367. public static string GetItem(string s, object index) {
  368. return GetItem(s, Converter.ConvertToIndex(index));
  369. }
  370. [SpecialName]
  371. public static string GetItem(string s, Slice slice) {
  372. if (slice == null) throw PythonOps.TypeError("string indices must be slices or integers");
  373. int start, stop, step;
  374. slice.indices(s.Length, out start, out stop, out step);
  375. if (step == 1) {
  376. return stop > start ? s.Substring(start, stop - start) : String.Empty;
  377. } else {
  378. int index = 0;
  379. char[] newData;
  380. if (step > 0) {
  381. if (start > stop) return String.Empty;
  382. int icnt = (stop - start + step - 1) / step;
  383. newData = new char[icnt];
  384. for (int i = start; i < stop; i += step) {
  385. newData[index++] = s[i];
  386. }
  387. } else {
  388. if (start < stop) return String.Empty;
  389. int icnt = (stop - start + step + 1) / step;
  390. newData = new char[icnt];
  391. for (int i = start; i > stop; i += step) {
  392. newData[index++] = s[i];
  393. }
  394. }
  395. return new string(newData);
  396. }
  397. }
  398. public static string __getslice__(string self, int x, int y) {
  399. Slice.FixSliceArguments(self.Length, ref x, ref y);
  400. if (x >= y) return String.Empty;
  401. return self.Substring(x, y - x);
  402. }
  403. #endregion
  404. #region Public Python methods
  405. /// <summary>
  406. /// Returns a copy of this string converted to uppercase
  407. /// </summary>
  408. public static string capitalize(this string self) {
  409. if (self.Length == 0) return self;
  410. return Char.ToUpper(self[0], CultureInfo.InvariantCulture) + self.Substring(1).ToLower(CultureInfo.InvariantCulture);
  411. }
  412. // default fillchar (padding char) is a space
  413. public static string center(this string self, int width) {
  414. return center(self, width, ' ');
  415. }
  416. public static string center(this string self, int width, char fillchar) {
  417. int spaces = width - self.Length;
  418. if (spaces <= 0) return self;
  419. StringBuilder ret = new StringBuilder(width);
  420. ret.Append(fillchar, spaces / 2);
  421. ret.Append(self);
  422. ret.Append(fillchar, (spaces + 1) / 2);
  423. return ret.ToString();
  424. }
  425. public static int count(this string self, string sub) {
  426. return count(self, sub, 0, self.Length);
  427. }
  428. public static int count(this string self, string sub, int start) {
  429. return count(self, sub, start, self.Length);
  430. }
  431. public static int count(this string self, string ssub, int start, int end) {
  432. if (ssub == null) throw PythonOps.TypeError("expected string for 'sub' argument, got NoneType");
  433. if (start > self.Length) {
  434. return 0;
  435. }
  436. start = PythonOps.FixSliceIndex(start, self.Length);
  437. end = PythonOps.FixSliceIndex(end, self.Length);
  438. if (ssub.Length == 0) {
  439. return Math.Max((end - start) + 1, 0);
  440. }
  441. int count = 0;
  442. CompareInfo c = CultureInfo.InvariantCulture.CompareInfo;
  443. while (true) {
  444. if (end <= start) break;
  445. int index = c.IndexOf(self, ssub, start, end - start, CompareOptions.Ordinal);
  446. if (index == -1) break;
  447. count++;
  448. start = index + ssub.Length;
  449. }
  450. return count;
  451. }
  452. public static string decode(CodeContext/*!*/ context, string s) {
  453. return decode(context, s, Missing.Value, "strict");
  454. }
  455. public static string decode(CodeContext/*!*/ context, string s, [Optional]object encoding, [DefaultParameterValue("strict")]string errors) {
  456. return RawDecode(context, s, encoding, errors);
  457. }
  458. public static string encode(CodeContext/*!*/ context, string s, [Optional]object encoding, [DefaultParameterValue("strict")]string errors) {
  459. return RawEncode(context, s, encoding, errors);
  460. }
  461. private static string CastString(object o) {
  462. string res = o as string;
  463. if (res != null) {
  464. return res;
  465. }
  466. return ((Extensible<string>)o).Value;
  467. }
  468. internal static string AsString(object o) {
  469. string res = o as string;
  470. if (res != null) {
  471. return res;
  472. }
  473. Extensible<string> es = o as Extensible<string>;
  474. if (es != null) {
  475. return es.Value;
  476. }
  477. return null;
  478. }
  479. public static bool endswith(this string self, object suffix) {
  480. TryStringOrTuple(suffix);
  481. if (suffix is PythonTuple)
  482. return endswith(self, (PythonTuple)suffix);
  483. else
  484. return endswith(self, CastString(suffix));
  485. }
  486. public static bool endswith(this string self, object suffix, int start) {
  487. TryStringOrTuple(suffix);
  488. if (suffix is PythonTuple)
  489. return endswith(self, (PythonTuple)suffix, start);
  490. else
  491. return endswith(self, CastString(suffix), start);
  492. }
  493. public static bool endswith(this string self, object suffix, int start, int end) {
  494. TryStringOrTuple(suffix);
  495. if (suffix is PythonTuple)
  496. return endswith(self, (PythonTuple)suffix, start, end);
  497. else
  498. return endswith(self, CastString(suffix), start, end);
  499. }
  500. public static string expandtabs(string self) {
  501. return expandtabs(self, 8);
  502. }
  503. public static string expandtabs(this string self, int tabsize) {
  504. StringBuilder ret = new StringBuilder(self.Length * 2);
  505. string v = self;
  506. int col = 0;
  507. for (int i = 0; i < v.Length; i++) {
  508. char ch = v[i];
  509. switch (ch) {
  510. case '\n':
  511. case '\r': col = 0; ret.Append(ch); break;
  512. case '\t':
  513. if (tabsize > 0) {
  514. int tabs = tabsize - (col % tabsize);
  515. int existingSize = ret.Capacity;
  516. ret.Capacity = checked(existingSize + tabs);
  517. ret.Append(' ', tabs);
  518. col = 0;
  519. }
  520. break;
  521. default:
  522. col++;
  523. ret.Append(ch);
  524. break;
  525. }
  526. }
  527. return ret.ToString();
  528. }
  529. public static int find(this string self, string sub) {
  530. if (sub == null) throw PythonOps.TypeError("expected string, got NoneType");
  531. if (sub.Length == 1) return self.IndexOf(sub[0]);
  532. CompareInfo c = CultureInfo.InvariantCulture.CompareInfo;
  533. return c.IndexOf(self, sub, CompareOptions.Ordinal);
  534. }
  535. public static int find(this string self, string sub, int start) {
  536. if (sub == null) throw PythonOps.TypeError("expected string, got NoneType");
  537. if (start > self.Length) return -1;
  538. start = PythonOps.FixSliceIndex(start, self.Length);
  539. CompareInfo c = CultureInfo.InvariantCulture.CompareInfo;
  540. return c.IndexOf(self, sub, start, CompareOptions.Ordinal);
  541. }
  542. public static int find(this string self, string sub, int start, int end) {
  543. if (sub == null) throw PythonOps.TypeError("expected string, got NoneType");
  544. if (start > self.Length) return -1;
  545. start = PythonOps.FixSliceIndex(start, self.Length);
  546. end = PythonOps.FixSliceIndex(end, self.Length);
  547. if (end < start) return -1;
  548. CompareInfo c = CultureInfo.InvariantCulture.CompareInfo;
  549. return c.IndexOf(self, sub, start, end - start, CompareOptions.Ordinal);
  550. }
  551. public static int find(this string self, string sub, object start, [DefaultParameterValue(null)]object end) {
  552. return find(self, sub, CheckIndex(start, 0), CheckIndex(end, self.Length));
  553. }
  554. public static int index(this string self, string sub) {
  555. if (sub == null) throw PythonOps.TypeError("expected string, got NoneType");
  556. return index(self, sub, 0, self.Length);
  557. }
  558. public static int index(this string self, string sub, int start) {
  559. if (sub == null) throw PythonOps.TypeError("expected string, got NoneType");
  560. return index(self, sub, start, self.Length);
  561. }
  562. public static int index(this string self, string sub, int start, int end) {
  563. if (sub == null) throw PythonOps.TypeError("expected string, got NoneType");
  564. int ret = find(self, sub, start, end);
  565. if (ret == -1) throw PythonOps.ValueError("substring {0} not found in {1}", sub, self);
  566. return ret;
  567. }
  568. public static int index(this string self, string sub, object start, [DefaultParameterValue(null)]object end) {
  569. return index(self, sub, CheckIndex(start, 0), CheckIndex(end, self.Length));
  570. }
  571. public static bool isalnum(this string self) {
  572. if (self.Length == 0) return false;
  573. string v = self;
  574. for (int i = v.Length - 1; i >= 0; i--) {
  575. if (!Char.IsLetterOrDigit(v, i)) return false;
  576. }
  577. return true;
  578. }
  579. public static bool isalpha(this string self) {
  580. if (self.Length == 0) return false;
  581. string v = self;
  582. for (int i = v.Length - 1; i >= 0; i--) {
  583. if (!Char.IsLetter(v, i)) return false;
  584. }
  585. return true;
  586. }
  587. public static bool isdigit(this string self) {
  588. if (self.Length == 0) return false;
  589. string v = self;
  590. for (int i = v.Length - 1; i >= 0; i--) {
  591. if (!Char.IsDigit(v, i)) return false;
  592. }
  593. return true;
  594. }
  595. public static bool isspace(this string self) {
  596. if (self.Length == 0) return false;
  597. string v = self;
  598. for (int i = v.Length - 1; i >= 0; i--) {
  599. if (!Char.IsWhiteSpace(v, i)) return false;
  600. }
  601. return true;
  602. }
  603. public static bool isdecimal(this string self) {
  604. return isnumeric(self);
  605. }
  606. public static bool isnumeric(this string self) {
  607. if (String.IsNullOrEmpty(self)) return false;
  608. foreach (char c in self) {
  609. if (!Char.IsDigit(c)) return false;
  610. }
  611. return true;
  612. }
  613. public static bool islower(this string self) {
  614. if (self.Length == 0) return false;
  615. string v = self;
  616. bool hasLower = false;
  617. for (int i = v.Length - 1; i >= 0; i--) {
  618. if (!hasLower && Char.IsLower(v, i)) hasLower = true;
  619. if (Char.IsUpper(v, i)) return false;
  620. }
  621. return hasLower;
  622. }
  623. public static bool isupper(this string self) {
  624. if (self.Length == 0) return false;
  625. string v = self;
  626. bool hasUpper = false;
  627. for (int i = v.Length - 1; i >= 0; i--) {
  628. if (!hasUpper && Char.IsUpper(v, i)) hasUpper = true;
  629. if (Char.IsLower(v, i)) return false;
  630. }
  631. return hasUpper;
  632. }
  633. /// <summary>
  634. /// return true if self is a titlecased string and there is at least one
  635. /// character in self; also, uppercase characters may only follow uncased
  636. /// characters (e.g. whitespace) and lowercase characters only cased ones.
  637. /// return false otherwise.
  638. /// </summary>
  639. public static bool istitle(this string self) {
  640. if (self == null || self.Length == 0) return false;
  641. string v = self;
  642. bool prevCharCased = false, currCharCased = false, containsUpper = false;
  643. for (int i = 0; i < v.Length; i++) {
  644. if (Char.IsUpper(v, i) || Char.GetUnicodeCategory(v, i) == UnicodeCategory.TitlecaseLetter) {
  645. containsUpper = true;
  646. if (prevCharCased)
  647. return false;
  648. else
  649. currCharCased = true;
  650. } else if (Char.IsLower(v, i))
  651. if (!prevCharCased)
  652. return false;
  653. else
  654. currCharCased = true;
  655. else
  656. currCharCased = false;
  657. prevCharCased = currCharCased;
  658. }
  659. // if we've gone through the whole string and haven't encountered any rule
  660. // violations but also haven't seen an Uppercased char, then this is not a
  661. // title e.g. '\n', all whitespace etc.
  662. return containsUpper;
  663. }
  664. public static bool isunicode(this string self) {
  665. foreach (char c in self) {
  666. if (c >= LowestUnicodeValue) return true;
  667. }
  668. return false;
  669. }
  670. /// <summary>
  671. /// Return a string which is the concatenation of the strings
  672. /// in the sequence seq. The separator between elements is the
  673. /// string providing this method
  674. /// </summary>
  675. public static string join(this string self, object sequence) {
  676. IEnumerator seq = PythonOps.GetEnumerator(sequence);
  677. if (!seq.MoveNext()) return "";
  678. // check if we have just a sequnce of just one value - if so just
  679. // return that value.
  680. object curVal = seq.Current;
  681. if (!seq.MoveNext()) return Converter.ConvertToString(curVal);
  682. StringBuilder ret = new StringBuilder();
  683. AppendJoin(curVal, 0, ret);
  684. int index = 1;
  685. do {
  686. ret.Append(self);
  687. AppendJoin(seq.Current, index, ret);
  688. index++;
  689. } while (seq.MoveNext());
  690. return ret.ToString();
  691. }
  692. public static string join(this string/*!*/ self, [NotNull]List/*!*/ sequence) {
  693. if (sequence.__len__() == 0) return String.Empty;
  694. lock (sequence) {
  695. if (sequence.__len__() == 1) {
  696. return Converter.ConvertToString(sequence[0]);
  697. }
  698. StringBuilder ret = new StringBuilder();
  699. AppendJoin(sequence._data[0], 0, ret);
  700. for (int i = 1; i < sequence._size; i++) {
  701. if (!String.IsNullOrEmpty(self)) {
  702. ret.Append(self);
  703. }
  704. AppendJoin(sequence._data[i], i, ret);
  705. }
  706. return ret.ToString();
  707. }
  708. }
  709. public static string ljust(this string self, int width) {
  710. return ljust(self, width, ' ');
  711. }
  712. public static string ljust(this string self, int width, char fillchar) {
  713. if (width < 0) return self;
  714. int spaces = width - self.Length;
  715. if (spaces <= 0) return self;
  716. StringBuilder ret = new StringBuilder(width);
  717. ret.Append(self);
  718. ret.Append(fillchar, spaces);
  719. return ret.ToString();
  720. }
  721. public static string lower(this string self) {
  722. return self.ToLower(CultureInfo.InvariantCulture);
  723. }
  724. public static string lstrip(this string self) {
  725. return self.TrimStart(Whitespace);
  726. }
  727. public static string lstrip(this string self, string chars) {
  728. if (chars == null) return lstrip(self);
  729. return self.TrimStart(chars.ToCharArray());
  730. }
  731. public static PythonTuple partition(this string self, string sep) {
  732. if (sep == null)
  733. throw PythonOps.TypeError("expected string, got NoneType");
  734. if (sep.Length == 0)
  735. throw PythonOps.ValueError("empty separator");
  736. object[] obj = new object[3] { "", "", "" };
  737. if (self.Length != 0) {
  738. int index = find(self, sep);
  739. if (index == -1) {
  740. obj[0] = self;
  741. } else {
  742. obj[0] = self.Substring(0, index);
  743. obj[1] = sep;
  744. obj[2] = self.Substring(index + sep.Length, self.Length - index - sep.Length);
  745. }
  746. }
  747. return new PythonTuple(obj);
  748. }
  749. private static string StringOrBuffer(object input) {
  750. string result = (input as string);
  751. if (result != null) {
  752. return result;
  753. }
  754. PythonBuffer buffer = (input as PythonBuffer);
  755. if (buffer != null) {
  756. return buffer.ToString();
  757. }
  758. throw PythonOps.TypeError("expected a character buffer object");
  759. }
  760. public static string replace(this string self, object old, object new_, [DefaultParameterValue(-1)]int maxsplit) {
  761. string oldString = StringOrBuffer(old);
  762. string newString = StringOrBuffer(new_);
  763. if (oldString.Length == 0) return ReplaceEmpty(self, newString, maxsplit);
  764. string v = self;
  765. int replacements = count(v, oldString);
  766. replacements = (maxsplit < 0 || maxsplit > replacements) ? replacements : maxsplit;
  767. int newLength = v.Length;
  768. newLength -= replacements * oldString.Length;
  769. newLength = checked(newLength + replacements * newString.Length);
  770. StringBuilder ret = new StringBuilder(newLength);
  771. int index;
  772. int start = 0;
  773. while (maxsplit != 0 && (index = v.IndexOf(oldString, start)) != -1) {
  774. ret.Append(v, start, index - start);
  775. ret.Append(newString);
  776. start = index + oldString.Length;
  777. maxsplit--;
  778. }
  779. ret.Append(v.Substring(start));
  780. return ret.ToString();
  781. }
  782. public static int rfind(this string self, string sub) {
  783. if (sub == null) throw PythonOps.TypeError("expected string, got NoneType");
  784. return rfind(self, sub, 0, self.Length);
  785. }
  786. public static int rfind(this string self, string sub, int start) {
  787. if (sub == null) throw PythonOps.TypeError("expected string, got NoneType");
  788. if (start > self.Length) return -1;
  789. return rfind(self, sub, start, self.Length);
  790. }
  791. public static int rfind(this string self, string sub, int start, int end) {
  792. if (sub == null) throw PythonOps.TypeError("expected string, got NoneType");
  793. if (start > self.Length) return -1;
  794. start = PythonOps.FixSliceIndex(start, self.Length);
  795. end = PythonOps.FixSliceIndex(end, self.Length);
  796. if (start > end) return -1; // can't possibly match anything, not even an empty string
  797. if (sub.Length == 0) return end; // match at the end
  798. if (end == 0) return -1; // can't possibly find anything
  799. CompareInfo c = CultureInfo.InvariantCulture.CompareInfo;
  800. return c.LastIndexOf(self, sub, end - 1, end - start, CompareOptions.Ordinal);
  801. }
  802. public static int rfind(this string self, string sub, object start, [DefaultParameterValue(null)]object end) {
  803. return rfind(self, sub, CheckIndex(start, 0), CheckIndex(end, self.Length));
  804. }
  805. public static int rindex(this string self, string sub) {
  806. return rindex(self, sub, 0, self.Length);
  807. }
  808. public static int rindex(this string self, string sub, int start) {
  809. return rindex(self, sub, start, self.Length);
  810. }
  811. public static int rindex(this string self, string sub, int start, int end) {
  812. int ret = rfind(self, sub, start, end);
  813. if (ret == -1) throw PythonOps.ValueError("substring {0} not found in {1}", sub, self);
  814. return ret;
  815. }
  816. public static int rindex(this string self, string sub, object start, [DefaultParameterValue(null)]object end) {
  817. return rindex(self, sub, CheckIndex(start, 0), CheckIndex(end, self.Length));
  818. }
  819. public static string rjust(this string self, int width) {
  820. return rjust(self, width, ' ');
  821. }
  822. public static string rjust(this string self, int width, char fillchar) {
  823. int spaces = width - self.Length;
  824. if (spaces <= 0) return self;
  825. StringBuilder ret = new StringBuilder(width);
  826. ret.Append(fillchar, spaces);
  827. ret.Append(self);
  828. return ret.ToString();
  829. }
  830. public static PythonTuple rpartition(this string self, string sep) {
  831. if (sep == null)
  832. throw PythonOps.TypeError("expected string, got NoneType");
  833. if (sep.Length == 0)
  834. throw PythonOps.ValueError("empty separator");
  835. object[] obj = new object[3] { "", "", "" };
  836. if (self.Length != 0) {
  837. int index = rfind(self, sep);
  838. if (index == -1) {
  839. obj[2] = self;
  840. } else {
  841. obj[0] = self.Substring(0, index);
  842. obj[1] = sep;
  843. obj[2] = self.Substring(index + sep.Length, self.Length - index - sep.Length);
  844. }
  845. }
  846. return new PythonTuple(obj);
  847. }
  848. // when no maxsplit arg is given then just use split
  849. public static List rsplit(this string self) {
  850. return SplitInternal(self, (char[])null, -1);
  851. }
  852. public static List rsplit(this string self, string sep) {
  853. return rsplit(self, sep, -1);
  854. }
  855. public static List rsplit(this string self, string sep, int maxsplit) {
  856. // rsplit works like split but needs to split from the right;
  857. // reverse the original string (and the sep), split, reverse
  858. // the split list and finally reverse each element of the list
  859. string reversed = Reverse(self);
  860. if (sep != null) sep = Reverse(sep);
  861. List temp = null, ret = null;
  862. temp = split(reversed, sep, maxsplit);
  863. temp.reverse();
  864. int resultlen = temp.__len__();
  865. if (resultlen != 0) {
  866. ret = new List(resultlen);
  867. foreach (string s in temp)
  868. ret.AddNoLock(Reverse(s));
  869. } else {
  870. ret = temp;
  871. }
  872. return ret;
  873. }
  874. public static string rstrip(this string self) {
  875. return self.TrimEnd(Whitespace);
  876. }
  877. public static string rstrip(this string self, string chars) {
  878. if (chars == null) return rstrip(self);
  879. return self.TrimEnd(chars.ToCharArray());
  880. }
  881. public static List split(this string self) {
  882. return SplitInternal(self, (char[])null, -1);
  883. }
  884. public static List split(this string self, string sep) {
  885. return split(self, sep, -1);
  886. }
  887. public static List split(this string self, string sep, int maxsplit) {
  888. if (sep == null) {
  889. if (maxsplit == 0) {
  890. // Corner case for CPython compatibility
  891. List result = PythonOps.MakeEmptyList(1);
  892. result.AddNoLock(self.TrimStart());
  893. return result;
  894. } else {
  895. return SplitInternal(self, (char[])null, maxsplit);
  896. }
  897. }
  898. if (sep.Length == 0) {
  899. throw PythonOps.ValueError("empty separator");
  900. } else if (sep.Length == 1) {
  901. return SplitInternal(self, new char[] { sep[0] }, maxsplit);
  902. } else {
  903. return SplitInternal(self, sep, maxsplit);
  904. }
  905. }
  906. public static List splitlines(this string self) {
  907. return splitlines(self, false);
  908. }
  909. public static List splitlines(this string self, bool keepends) {
  910. List ret = new List();
  911. int i, linestart;
  912. for (i = 0, linestart = 0; i < self.Length; i++) {
  913. if (self[i] == '\n' || self[i] == '\r' || self[i] == '\x2028') {
  914. // special case of "\r\n" as end of line marker
  915. if (i < self.Length - 1 && self[i] == '\r' && self[i + 1] == '\n') {
  916. if (keepends)
  917. ret.AddNoLock(self.Substring(linestart, i - linestart + 2));
  918. else
  919. ret.AddNoLock(self.Substring(linestart, i - linestart));
  920. linestart = i + 2;
  921. i++;
  922. } else { //'\r', '\n', or unicode new line as end of line marker
  923. if (keepends)
  924. ret.AddNoLock(self.Substring(linestart, i - linestart + 1));
  925. else
  926. ret.AddNoLock(self.Substring(linestart, i - linestart));
  927. linestart = i + 1;
  928. }
  929. }
  930. }
  931. // the last line needs to be accounted for if it is not empty
  932. if (i - linestart != 0)
  933. ret.AddNoLock(self.Substring(linestart, i - linestart));
  934. return ret;
  935. }
  936. public static bool startswith(this string self, object prefix) {
  937. TryStringOrTuple(prefix);
  938. if (prefix is PythonTuple)
  939. return startswith(self, (PythonTuple)prefix);
  940. else
  941. return startswith(self, CastString(prefix));
  942. }
  943. public static bool startswith(this string self, object prefix, int start) {
  944. TryStringOrTuple(prefix);
  945. if (prefix is PythonTuple)
  946. return startswith(self, (PythonTuple)prefix, start);
  947. else
  948. return startswith(self, CastString(prefix), start);
  949. }
  950. public static bool startswith(this string self, object prefix, int start, int end) {
  951. TryStringOrTuple(prefix);
  952. if (prefix is PythonTuple)
  953. return startswith(self, (PythonTuple)prefix, start, end);
  954. else
  955. return startswith(self, CastString(prefix), start, end);
  956. }
  957. public static string strip(this string self) {
  958. return self.Trim();
  959. }
  960. public static string strip(this string self, string chars) {
  961. if (chars == null) return strip(self);
  962. return self.Trim(chars.ToCharArray());
  963. }
  964. public static string swapcase(this string self) {
  965. StringBuilder ret = new StringBuilder(self);
  966. for (int i = 0; i < ret.Length; i++) {
  967. char ch = ret[i];
  968. if (Char.IsUpper(ch)) ret[i] = Char.ToLower(ch, CultureInfo.InvariantCulture);
  969. else if (Char.IsLower(ch)) ret[i] = Char.ToUpper(ch, CultureInfo.InvariantCulture);
  970. }
  971. return ret.ToString();
  972. }
  973. public static string title(this string self) {
  974. if (self == null || self.Length == 0) return self;
  975. char[] retchars = self.ToCharArray();
  976. bool prevCharCased = false;
  977. bool currCharCased = false;
  978. int i = 0;
  979. do {
  980. if (Char.IsUpper(retchars[i]) || Char.IsLower(retchars[i])) {
  981. if (!prevCharCased)
  982. retchars[i] = Char.ToUpper(retchars[i], CultureInfo.InvariantCulture);
  983. else
  984. retchars[i] = Char.ToLower(retchars[i], CultureInfo.InvariantCulture);
  985. currCharCased = true;
  986. } else {
  987. currCharCased = false;
  988. }
  989. i++;
  990. prevCharCased = currCharCased;
  991. }
  992. while (i < retchars.Length);
  993. return new string(retchars);
  994. }
  995. //translate on a unicode string differs from that on an ascii
  996. //for unicode, the table argument is actually a dictionary with
  997. //character ordinals as keys and the replacement strings as values
  998. public static string translate(this string self, [NotNull]PythonDictionary table) {
  999. if (table == null || self.Length == 0) {
  1000. return self;
  1001. }
  1002. StringBuilder ret = new StringBuilder();
  1003. for (int i = 0, idx = 0; i < self.Length; i++) {
  1004. idx = (int)self[i];
  1005. if (table.__contains__(idx))
  1006. ret.Append((string)table[idx]);
  1007. else
  1008. ret.Append(self[i]);
  1009. }
  1010. return ret.ToString();
  1011. }
  1012. public static string translate(this string self, string table) {
  1013. return translate(self, table, (string)null);
  1014. }
  1015. public static string translate(this string self, string table, string deletechars) {
  1016. if (table != null && table.Length != 256) {
  1017. throw PythonOps.ValueError("translation table must be 256 characters long");
  1018. } else if (self.Length == 0) {
  1019. return self;
  1020. }
  1021. // List<char> is about 2/3rds as expensive as StringBuilder appending individual
  1022. // char's so we use that instead of a StringBuilder
  1023. List<char> res = new List<char>();
  1024. for (int i = 0; i < self.Length; i++) {
  1025. if (deletechars == null || !deletechars.Contains(Char.ToString(self[i]))) {
  1026. if (table != null) {
  1027. int idx = (int)self[i];
  1028. if (idx >= 0 && idx < 256) {
  1029. res.Add(table[idx]);
  1030. }
  1031. } else {
  1032. res.Add(self[i]);
  1033. }
  1034. }
  1035. }
  1036. return new String(res.ToArray());
  1037. }
  1038. public static string upper(this string self) {
  1039. return self.ToUpper(CultureInfo.InvariantCulture);
  1040. }
  1041. public static string zfill(this string self, int width) {
  1042. int spaces = width - self.Length;
  1043. if (spaces <= 0) return self;
  1044. StringBuilder ret = new StringBuilder(width);
  1045. if (self.Length > 0 && IsSign(self[0])) {
  1046. ret.Append(self[0]);
  1047. ret.Append('0', spaces);
  1048. ret.Append(self.Substring(1));
  1049. } else {
  1050. ret.Append('0', spaces);
  1051. ret.Append(self);
  1052. }
  1053. return ret.ToString();
  1054. }
  1055. /// <summary>
  1056. /// Replaces each replacement field in the string with the provided arguments.
  1057. ///
  1058. /// replacement_field = "{" field_name ["!" conversion] [":" format_spec] "}"
  1059. /// field_name = (identifier | integer) ("." identifier | "[" element_index "]")*
  1060. ///
  1061. /// format_spec: [[fill]align][sign][#][0][width][.precision][type]
  1062. ///
  1063. /// Conversion can be 'r' for repr or 's' for string.
  1064. /// </summary>
  1065. public static string/*!*/ format(CodeContext/*!*/ context, string format_string, [NotNull]params object[] args) {
  1066. return NewStringFormatter.FormatString(
  1067. PythonContext.GetContext(context),
  1068. format_string,
  1069. PythonTuple.MakeTuple(args),
  1070. new PythonDictionary()
  1071. );
  1072. }
  1073. /// <summary>
  1074. /// Replaces each replacement field in the string with the provided arguments.
  1075. ///
  1076. /// replacement_field = "{" field_name ["!" conversion] [":" format_spec] "}"
  1077. /// field_name = (identifier | integer) ("." identifier | "[" element_index "]")*
  1078. ///
  1079. /// format_spec: [[fill]align][sign][#][0][width][.precision][type]
  1080. ///
  1081. /// Conversion can be 'r' for repr or 's' for string.
  1082. /// </summary>
  1083. public static string/*!*/ format(CodeContext/*!*/ context, string format_string, [ParamDictionary]IDictionary<object, object> kwargs, params object[] args) {
  1084. return NewStringFormatter.FormatString(
  1085. PythonContext.GetContext(context),
  1086. format_string,
  1087. PythonTuple.MakeTuple(args),
  1088. kwargs
  1089. );
  1090. }
  1091. public static IEnumerable<PythonTuple>/*!*/ _formatter_parser(this string/*!*/ self) {
  1092. return NewStringFormatter.GetFormatInfo(self);
  1093. }
  1094. public static PythonTuple/*!*/ _formatter_field_name_split(this string/*!*/ self) {
  1095. return NewStringFormatter.GetFieldNameInfo(self);
  1096. }
  1097. #endregion
  1098. #region operators
  1099. [SpecialName]
  1100. public static string Add([NotNull]string self, [NotNull]string other) {
  1101. return self + other;
  1102. }
  1103. [SpecialName]
  1104. public static string Add([NotNull]string self, char other) {
  1105. return self + other;
  1106. }
  1107. [SpecialName]
  1108. public static string Add(char self, [NotNull]string other) {
  1109. return self + other;
  1110. }
  1111. [SpecialName]
  1112. public static string Mod(CodeContext/*!*/ context, string self, object other) {
  1113. return new StringFormatter(context, self, other).Format();
  1114. }
  1115. [SpecialName]
  1116. [return: MaybeNotImplemented]
  1117. public static object Mod(CodeContext/*!*/ context, object other, string self) {
  1118. string str = other as string;
  1119. if (str != null) {
  1120. return new StringFormatter(context, str, self).Format();
  1121. }
  1122. Extensible<string> es = other as Extensible<string>;
  1123. if (es != null) {
  1124. return new StringFormatter(context, es.Value, self).Format();
  1125. }
  1126. return NotImplementedType.Value;
  1127. }
  1128. [SpecialName]
  1129. public static string Multiply(string s, int count) {
  1130. if (count <= 0) return String.Empty;
  1131. if (count == 1) return s;
  1132. long size = (long)s.Length * (long)count;
  1133. if (size > Int32.MaxValue) throw PythonOps.OverflowError("repeated string is too long");
  1134. int sz = s.Length;
  1135. if (sz == 1) return new string(s[0], count);
  1136. StringBuilder ret = new StringBuilder(sz * count);
  1137. ret.Insert(0, s, count);
  1138. // the above code is MUCH faster than the simple loop
  1139. //for (int i=0; i < count; i++) ret.Append(s);
  1140. return ret.ToString();
  1141. }
  1142. [SpecialName]
  1143. public static string Multiply(int other, string self) {
  1144. return Multiply(self, other);
  1145. }
  1146. [SpecialName]
  1147. public static object Multiply(string self, [NotNull]Index count) {
  1148. return PythonOps.MultiplySequence<string>(Multiply, self, count, true);
  1149. }
  1150. [SpecialName]
  1151. public static object Multiply([NotNull]Index count, string self) {
  1152. return PythonOps.MultiplySequence<string>(Multiply, self, count, false);
  1153. }
  1154. [SpecialName]
  1155. public static object Multiply(string self, object count) {
  1156. int index;
  1157. if (Converter.TryConvertToIndex(count, out index)) {
  1158. return Multiply(self, index);
  1159. }
  1160. throw PythonOps.TypeErrorForUnIndexableObject(count);
  1161. }
  1162. [SpecialName]
  1163. public static object Multiply(object count, string self) {
  1164. int index;
  1165. if (Converter.TryConvertToIndex(count, out index)) {
  1166. return Multiply(index, self);
  1167. }
  1168. throw PythonOps.TypeErrorForUnIndexableObject(count);
  1169. }
  1170. [SpecialName]
  1171. public static bool GreaterThan(string x, string y) {
  1172. return string.CompareOrdinal(x, y) > 0;
  1173. }
  1174. [SpecialName]
  1175. public static bool LessThan(string x, string y) {
  1176. return string.CompareOrdinal(x, y) < 0;
  1177. }
  1178. [SpecialName]
  1179. public static bool LessThanOrEqual(string x, string y) {
  1180. return string.CompareOrdinal(x, y) <= 0;
  1181. }
  1182. [SpecialName]
  1183. public static bool GreaterThanOrEqual(string x, string y) {
  1184. return string.CompareOrdinal(x, y) >= 0;
  1185. }
  1186. [SpecialName]
  1187. public static bool Equals(string x, string y) {
  1188. return string.Equals(x, y);
  1189. }
  1190. [SpecialName]
  1191. public static bool NotEquals(string x, string y) {
  1192. return !string.Equals(x, y);
  1193. }
  1194. #endregion
  1195. [SpecialName, ImplicitConversionMethod]
  1196. public static string ConvertFromChar(char c) {
  1197. return ScriptingRuntimeHelpers.CharToString(c);
  1198. }
  1199. [SpecialName, ExplicitConversionMethod]
  1200. public static char ConvertToChar(string s) {
  1201. if (s.Length == 1) return s[0];
  1202. throw PythonOps.TypeErrorForTypeMismatch("char", s);
  1203. }
  1204. [SpecialName, ImplicitConversionMethod]
  1205. public static IEnumerable ConvertToIEnumerable(string s) {
  1206. // make an enumerator that produces strings instead of chars
  1207. return new PythonStringEnumerable(s);
  1208. }
  1209. internal static int Compare(string self, string obj) {
  1210. int ret = string.CompareOrdinal(self, obj);
  1211. return ret == 0 ? 0 : (ret < 0 ? -1 : +1);
  1212. }
  1213. public static object __getnewargs__(CodeContext/*!*/ context, string self) {
  1214. if (!Object.ReferenceEquals(self, null)) {
  1215. // Cast self to object to avoid exception caused by trying to access SystemState on DefaultContext
  1216. return PythonTuple.MakeTuple(StringOps.__new__(context, TypeCache.String, (object)self));
  1217. }
  1218. throw PythonOps.TypeErrorForBadInstance("__getnewargs__ requires a 'str' object but received a '{0}'", self);
  1219. }
  1220. public static string __str__(string self) {
  1221. return self;
  1222. }
  1223. public static Extensible<string> __str__(ExtensibleString self) {
  1224. return self;
  1225. }
  1226. #region Internal implementation details
  1227. internal static string Quote(string s) {
  1228. bool isUnicode = false;
  1229. StringBuilder b = new StringBuilder(s.Length + 5);
  1230. char quote = '\'';
  1231. if (s.IndexOf('\'') != -1 && s.IndexOf('\"') == -1) {
  1232. quote = '\"';
  1233. }
  1234. b.Append(quote);
  1235. b.Append(ReprEncode(s, quote, ref isUnicode));
  1236. b.Append(quote);
  1237. if (isUnicode) return "u" + b.ToString();
  1238. return b.ToString();
  1239. }
  1240. internal static string ReprEncode(string s, ref bool isUnicode) {
  1241. return ReprEncode(s, (char)0, ref isUnicode);
  1242. }
  1243. internal static bool TryGetEncoding(string name, out Encoding encoding) {
  1244. #if SILVERLIGHT // EncodingInfo
  1245. switch (NormalizeEncodingName(name)) {
  1246. case "us_ascii":
  1247. case "ascii": encoding = PythonAsciiEncoding.Instance; return true;
  1248. case "utf_8": encoding = (Encoding)new EncodingWrapper(Encoding.UTF8, new byte[0]).Clone(); return true;
  1249. case "utf_16_le": encoding = (Encoding)new EncodingWrapper(Encoding.Unicode, new byte[0]).Clone(); return true;
  1250. case "utf_16_be": encoding = (Encoding)new EncodingWrapper(Encoding.BigEndianUnicode, new byte[0]).Clone(); return true;
  1251. case "utf_8_sig": encoding = Encoding.UTF8; return true;
  1252. }
  1253. #else
  1254. name = NormalizeEncodingName(name);
  1255. EncodingInfoWrapper encInfo;
  1256. if (CodecsInfo.Codecs.TryGetValue(name, out encInfo)) {
  1257. encoding = (Encoding)encInfo.GetEncoding().Clone();
  1258. return true;
  1259. }
  1260. #endif
  1261. encoding = null;
  1262. return false;
  1263. }
  1264. internal static string RawUnicodeEscapeEncode(string s) {
  1265. // in the common case we don't need to encode anything, so we
  1266. // lazily create the StringBuilder only if necessary.
  1267. StringBuilder b = null;
  1268. for (int i = 0; i < s.Length; i++) {
  1269. char ch = s[i];
  1270. if (ch > 0xff) {
  1271. ReprInit(ref b, s, i);
  1272. b.AppendFormat("\\u{0:x4}", (int)ch);
  1273. } else if (b != null) {
  1274. b.Append(ch);
  1275. }
  1276. }
  1277. if (b == null) return s;
  1278. return b.ToString();
  1279. }
  1280. #endregion
  1281. #region Private implementation details
  1282. private static int CheckIndex(object index, int defaultValue) {
  1283. int res;
  1284. if (index == null) {
  1285. res = defaultValue;
  1286. } else if (!Converter.TryConvertToIndex(index, out res)) {
  1287. throw PythonOps.TypeError("slice indices must be integers or None or have an __index__ method");
  1288. }
  1289. return res;
  1290. }
  1291. private static void AppendJoin(object value, int index, StringBuilder sb) {
  1292. string strVal;
  1293. if ((strVal = value as string) != null) {
  1294. sb.Append(strVal);
  1295. } else if (Converter.TryConvertToString(value, out strVal) && strVal != null) {
  1296. sb.Append(strVal);
  1297. } else {
  1298. throw PythonOps.TypeError("sequence item {0}: expected string, {1} found", index.ToString(), PythonOps.GetPythonTypeName(value));
  1299. }
  1300. }
  1301. private static string ReplaceEmpty(string self, string new_, int maxsplit) {
  1302. string v = self;
  1303. if (maxsplit == 0) return v;
  1304. else if (maxsplit < 0) maxsplit = v.Length + 1;
  1305. else if (maxsplit > v.Length + 1) maxsplit = checked(v.Length + 1);
  1306. int newLength = checked(v.Length + new_.Length * maxsplit);
  1307. int max = Math.Min(v.Length, maxsplit);
  1308. StringBuilder ret = new StringBuilder(newLength);
  1309. for (int i = 0; i < max; i++) {
  1310. ret.Append(new_);
  1311. ret.Append(v[i]);
  1312. }
  1313. if (maxsplit > max) {
  1314. ret.Append(new_);
  1315. } else {
  1316. ret.Append(v, max, v.Length - max);
  1317. }
  1318. return ret.ToString();
  1319. }
  1320. private static string Reverse(string s) {
  1321. if (s.Length == 0 || s.Length == 1) return s;
  1322. char[] rchars = new char[s.Length];
  1323. for (int i = s.Length - 1, j = 0; i >= 0; i--, j++) {
  1324. rchars[j] = s[i];
  1325. }
  1326. return new string(rchars);
  1327. }
  1328. internal static string ReprEncode(string s, char quote, ref bool isUnicode) {
  1329. // in the common case we don't need to encode anything, so we
  1330. // lazily create the StringBuilder only if necessary.
  1331. StringBuilder b = null;
  1332. for (int i = 0; i < s.Length; i++) {
  1333. char ch = s[i];
  1334. if (ch >= LowestUnicodeValue) isUnicode = true;
  1335. switch (ch) {
  1336. case '\\': ReprInit(ref b, s, i); b.Append("\\\\"); break;
  1337. case '\t': ReprInit(ref b, s, i); b.Append("\\t"); break;
  1338. case '\n': ReprInit(ref b, s, i); b.Append("\\n"); break;
  1339. case '\r': ReprInit(ref b, s, i); b.Append("\\r"); break;
  1340. default:
  1341. if (quote != 0 && ch == quote) {
  1342. ReprInit(ref b, s, i);
  1343. b.Append('\\'); b.Append(ch);
  1344. } else if (ch < ' ' || (ch >= 0x7f && ch <= 0xff)) {
  1345. ReprInit(ref b, s, i);
  1346. b.AppendFormat("\\x{0:x2}", (int)ch);
  1347. } else if (ch > 0xff) {
  1348. ReprInit(ref b, s, i);
  1349. b.AppendFormat("\\u{0:x4}", (int)ch);
  1350. } else if (b != null) {
  1351. b.Append(ch);
  1352. }
  1353. break;
  1354. }
  1355. }
  1356. if (b == null) return s;
  1357. return b.ToString();
  1358. }
  1359. private static void ReprInit(ref StringBuilder sb, string s, int c) {
  1360. if (sb != null) return;
  1361. sb = new StringBuilder(s, 0, c, s.Length);
  1362. }
  1363. private static bool IsSign(char ch) {
  1364. return ch == '+' || ch == '-';
  1365. }
  1366. internal static string GetEncodingName(Encoding encoding) {
  1367. #if !SILVERLIGHT
  1368. string name = null;
  1369. // if we have a valid code page try and get a reasonable name. The
  1370. // web names / mail displays match tend to CPython's terse names
  1371. if (encoding.CodePage != 0) {
  1372. if (encoding.IsBrowserDisplay) {
  1373. name = encoding.WebName;
  1374. }
  1375. if (name == null && encoding.IsMailNewsDisplay) {
  1376. name = encoding.HeaderName;
  1377. }
  1378. // otherwise use a code page number which also matches CPython
  1379. if (name == null) {
  1380. name = "cp" + encoding.CodePage;
  1381. }
  1382. }
  1383. if (name == null) {
  1384. // otherwise just finally fall back to the human readable name
  1385. name = encoding.EncodingName;
  1386. }
  1387. #else
  1388. // Silverlight only has web names
  1389. string name = encoding.WebName;
  1390. #endif
  1391. return NormalizeEncodingName(name);
  1392. }
  1393. internal static string NormalizeEncodingName(string name) {
  1394. if (name == null) {
  1395. return null;
  1396. }
  1397. return name.ToLower(CultureInfo.InvariantCulture).Replace('-', '_').Replace(' ', '_');
  1398. }
  1399. private static string RawDecode(CodeContext/*!*/ context, string s, object encodingType, string errors) {
  1400. PythonContext pc = PythonContext.GetContext(context);
  1401. Encoding e = null;
  1402. string encoding = encodingType as string;
  1403. if (encoding == null) {
  1404. e = encodingType as Encoding;
  1405. if (e == null) {
  1406. if (encodingType == Missing.Value) {
  1407. encoding = pc.GetDefaultEncodingName();
  1408. } else {
  1409. throw PythonOps.TypeError("decode() expected string, got '{0}'", DynamicHelpers.GetPythonType(encodingType).Name);
  1410. }
  1411. }
  1412. }
  1413. if (e == null) {
  1414. string normalizedName = NormalizeEncodingName(encoding);
  1415. if ("raw_unicode_escape" == normalizedName) {
  1416. return LiteralParser.ParseString(s, true, true);
  1417. } else if ("unicode_escape" == normalizedName) {
  1418. return LiteralParser.ParseString(s, false, true);
  1419. } else if ("string_escape" == normalizedName) {
  1420. return LiteralParser.ParseString(s, false, false);
  1421. }
  1422. }
  1423. if (e != null || TryGetEncoding(encoding, out e)) {
  1424. return DoDecode(context, s, errors, encoding, e);
  1425. }
  1426. // look for user-registered codecs
  1427. PythonTuple codecTuple = PythonOps.LookupEncoding(context, encoding);
  1428. if (codecTuple != null) {
  1429. return UserDecodeOrEncode(codecTuple[/*Modules.PythonCodecs.DecoderIndex*/1], s);
  1430. }
  1431. throw PythonOps.LookupError("unknown encoding: {0}", encoding);
  1432. }
  1433. internal static string DoDecode(CodeContext context, string s, string errors, string encoding, Encoding e) {
  1434. #if !SILVERLIGHT // DecoderFallback
  1435. // CLR's encoder exceptions have a 1-1 mapping w/ Python's encoder exceptions
  1436. // so we just clone the encoding & set the fallback to throw in strict mode.
  1437. e = (Encoding)e.Clone();
  1438. switch (errors) {
  1439. case "backslashreplace":
  1440. case "xmlcharrefreplace":
  1441. case "strict": e.DecoderFallback = DecoderFallback.ExceptionFallback; break;
  1442. case "replace": e.DecoderFallback = DecoderFallback.ReplacementFallback; break;
  1443. case "ignore":
  1444. e.DecoderFallback = new PythonDecoderFallback(encoding,
  1445. s,
  1446. null);
  1447. break;
  1448. default:
  1449. e.DecoderFallback = new PythonDecoderFallback(encoding,
  1450. s,
  1451. PythonOps.LookupEncodingError(context, errors));
  1452. break;
  1453. }
  1454. #endif
  1455. byte[] bytes = s.MakeByteArray();
  1456. int start = GetStartingOffset(e, bytes);
  1457. return e.GetString(bytes, start, bytes.Length - start);
  1458. }
  1459. /// <summary>
  1460. /// Gets the starting offset checking to see if the incoming bytes already include a preamble.
  1461. /// </summary>
  1462. private static int GetStartingOffset(Encoding e, byte[] bytes) {
  1463. byte[] preamble = e.GetPreamble();
  1464. int start = 0;
  1465. if (bytes.Length >= preamble.Length) {
  1466. bool differ = false;
  1467. for (int i = 0; i < preamble.Length; i++) {
  1468. if (bytes[i] != preamble[i]) {
  1469. differ = true;
  1470. }
  1471. }
  1472. if (!differ) {
  1473. start = preamble.Length;
  1474. }
  1475. }
  1476. return start;
  1477. }
  1478. private static string RawEncode(CodeContext/*!*/ context, string s, object encodingType, string errors) {
  1479. string encoding = encodingType as string;
  1480. Encoding e = null;
  1481. if (encoding == null) {
  1482. e = encodingType as Encoding;
  1483. if (e == null) {
  1484. if (encodingType == Missing.Value) {
  1485. encoding = PythonContext.GetContext(context).GetDefaultEncodingName();
  1486. } else {
  1487. throw PythonOps.TypeError("encode() expected string, got '{0}'", DynamicHelpers.GetPythonType(encodingType).Name);
  1488. }
  1489. }
  1490. }
  1491. if (e == null) {
  1492. string normalizedName = NormalizeEncodingName(encoding);
  1493. if ("raw_unicode_escape" == normalizedName) {
  1494. return RawUnicodeEscapeEncode(s);
  1495. } else if ("unicode_escape" == normalizedName || "string_escape" == normalizedName) {
  1496. bool dummy = false;
  1497. return ReprEncode(s, '\'', ref dummy);
  1498. }
  1499. }
  1500. if (e != null || TryGetEncoding(encoding, out e)) {
  1501. return DoEncode(context, s, errors, encoding, e);
  1502. }
  1503. // look for user-registered codecs
  1504. PythonTuple codecTuple = PythonOps.LookupEncoding(context, encoding);
  1505. if (codecTuple != null) {
  1506. return UserDecodeOrEncode(codecTuple[/*Modules.PythonCodecs.EncoderIndex*/0], s);
  1507. }
  1508. throw PythonOps.LookupError("unknown encoding: {0}", encoding);
  1509. }
  1510. internal static string DoEncode(CodeContext context, string s, string errors, string encoding, Encoding e) {
  1511. #if !SILVERLIGHT
  1512. // CLR's encoder exceptions have a 1-1 mapping w/ Python's encoder exceptions
  1513. // so we just clone the encoding & set the fallback to throw in strict mode
  1514. e = (Encoding)e.Clone();
  1515. switch (errors) {
  1516. case "strict": e.EncoderFallback = EncoderFallback.ExceptionFallback; break;
  1517. case "replace": e.EncoderFallback = EncoderFallback.ReplacementFallback; break;
  1518. case "backslashreplace": e.EncoderFallback = new BackslashEncoderReplaceFallback(); break;
  1519. case "xmlcharrefreplace": e.EncoderFallback = new XmlCharRefEncoderReplaceFallback(); break;
  1520. case "ignore":
  1521. e.EncoderFallback = new PythonEncoderFallback(encoding,
  1522. s,
  1523. null);
  1524. break;
  1525. default:
  1526. e.EncoderFallback = new PythonEncoderFallback(encoding,
  1527. s,
  1528. PythonOps.LookupEncodingError(context, errors));
  1529. break;
  1530. }
  1531. #endif
  1532. return PythonOps.MakeString(e.GetPreamble(), e.GetBytes(s));
  1533. }
  1534. private static string UserDecodeOrEncode(object function, string data) {
  1535. object res = PythonCalls.Call(function, data);
  1536. string strRes = AsString(res);
  1537. if (strRes != null) return strRes;
  1538. // tuple is string, bytes used, we just want the string...
  1539. PythonTuple t = res as PythonTuple;
  1540. if (t == null) throw PythonOps.TypeErrorForBadInstance("expected tuple, but found {0}", res);
  1541. return Converter.ConvertToString(t[0]);
  1542. }
  1543. #if !SILVERLIGHT
  1544. static class CodecsInfo {
  1545. public static readonly Dictionary<string, EncodingInfoWrapper> Codecs = MakeCodecsDict();
  1546. private static Dictionary<string, EncodingInfoWrapper> MakeCodecsDict() {
  1547. Dictionary<string, EncodingInfoWrapper> d = new Dictionary<string, EncodingInfoWrapper>();
  1548. EncodingInfo[] encs = Encoding.GetEncodings();
  1549. for (int i = 0; i < encs.Length; i++) {
  1550. string normalizedName = NormalizeEncodingName(encs[i].Name);
  1551. // setup well-known mappings, for everything
  1552. // else we'll store as lower case w/ _
  1553. switch (normalizedName) {
  1554. case "us_ascii":
  1555. d["cp" + encs[i].CodePage.ToString()] = d[normalizedName] = d["us"] = d["ascii"] = d["646"] = d["us_ascii"] = new AsciiEncodingInfoWrapper();
  1556. continue;
  1557. case "iso_8859_1":
  1558. d["8859"] = d["latin_1"] = d["latin1"] = d["iso 8859_1"] = d["iso8859_1"] = d["cp819"] = d["819"] = d["latin"] = d["latin1"] = d["l1"] = encs[i];
  1559. break;
  1560. case "utf_7":
  1561. d["u7"] = d["unicode-1-1-utf-7"] = encs[i];
  1562. break;
  1563. case "utf_8":
  1564. d["utf_8_sig"] = encs[i];
  1565. d["utf_8"] = d["utf8"] = d["u8"] = new EncodingInfoWrapper(encs[i], new byte[0]);
  1566. continue;
  1567. case "utf_16":
  1568. d["utf_16_le"] = d["utf_16le"] = new EncodingInfoWrapper(encs[i], new byte[0]);
  1569. d["utf16"] = new EncodingInfoWrapper(encs[i], encs[i].GetEncoding().GetPreamble());
  1570. break;
  1571. case "unicodefffe": // big endian unicode
  1572. // strip off the pre-amble, CPython doesn't include it.
  1573. d["utf_16_be"] = d["utf_16be"] = new EncodingInfoWrapper(encs[i], new byte[0]);
  1574. break;
  1575. }
  1576. // publish under normalized name (all lower cases, -s replaced with _s)
  1577. d[normalizedName] = encs[i];
  1578. // publish under Windows code page as well...
  1579. d["windows-" + encs[i].GetEncoding().WindowsCodePage.ToString()] = encs[i];
  1580. // publish under code page number as well...
  1581. d["cp" + encs[i].CodePage.ToString()] = d[encs[i].CodePage.ToString()] = encs[i];
  1582. }
  1583. d["raw_unicode_escape"] = new EncodingInfoWrapper(new UnicodeEscapeEncoding(true));
  1584. d["unicode_escape"] = new EncodingInfoWrapper(new UnicodeEscapeEncoding(false));
  1585. #if DEBUG
  1586. // all codecs should be stored in lowercase because we only look up from lowercase strings
  1587. foreach (KeyValuePair<string, EncodingInfoWrapper> kvp in d) {
  1588. Debug.Assert(kvp.Key.ToLower(CultureInfo.InvariantCulture) == kvp.Key);
  1589. }
  1590. #endif
  1591. return d;
  1592. }
  1593. }
  1594. class EncodingInfoWrapper {
  1595. private EncodingInfo _info;
  1596. private Encoding _encoding;
  1597. private byte[] _preamble;
  1598. public EncodingInfoWrapper(Encoding enc) {
  1599. _encoding = enc;
  1600. }
  1601. public EncodingInfoWrapper(EncodingInfo info) {
  1602. _info = info;
  1603. }
  1604. public EncodingInfoWrapper(EncodingInfo info, byte[] preamble) {
  1605. _info = info;
  1606. _preamble = preamble;
  1607. }
  1608. public virtual Encoding GetEncoding() {
  1609. if(_encoding != null) return _encoding;
  1610. if (_preamble == null) {
  1611. return _info.GetEncoding();
  1612. }
  1613. return new EncodingWrapper(_info.GetEncoding(), _preamble);
  1614. }
  1615. public static implicit operator EncodingInfoWrapper(EncodingInfo info) {
  1616. return new EncodingInfoWrapper(info);
  1617. }
  1618. }
  1619. class AsciiEncodingInfoWrapper : EncodingInfoWrapper {
  1620. public AsciiEncodingInfoWrapper()
  1621. : base((EncodingInfo)null) {
  1622. }
  1623. public override Encoding GetEncoding() {
  1624. return PythonAsciiEncoding.Instance;
  1625. }
  1626. }
  1627. #endif
  1628. class EncodingWrapper : Encoding {
  1629. private byte[] _preamble;
  1630. private Encoding _encoding;
  1631. public EncodingWrapper(Encoding encoding, byte[] preamable) {
  1632. _preamble = preamable;
  1633. _encoding = encoding;
  1634. }
  1635. private void SetEncoderFallback() {
  1636. #if !SILVERLIGHT
  1637. _encoding.EncoderFallback = EncoderFallback;
  1638. #endif
  1639. }
  1640. private void SetDecoderFallback() {
  1641. #if !SILVERLIGHT
  1642. _encoding.DecoderFallback = DecoderFallback;
  1643. #endif
  1644. }
  1645. public override int GetByteCount(char[] chars, int index, int count) {
  1646. SetEncoderFallback();
  1647. return _encoding.GetByteCount(chars, index, count);
  1648. }
  1649. public override int GetBytes(char[] chars, int charIndex, int charCount, byte[] bytes, int byteIndex) {
  1650. SetEncoderFallback();
  1651. return _encoding.GetBytes(chars, charIndex, charCount, bytes, byteIndex);
  1652. }
  1653. public override int GetCharCount(byte[] bytes, int index, int count) {
  1654. SetDecoderFallback();
  1655. return _encoding.GetCharCount(bytes, index, count);
  1656. }
  1657. public override int GetChars(byte[] bytes, int byteIndex, int byteCount, char[] chars, int charIndex) {
  1658. SetDecoderFallback();
  1659. return _encoding.GetChars(bytes, byteIndex, byteCount, chars, charIndex);
  1660. }
  1661. public override int GetMaxByteCount(int charCount) {
  1662. SetEncoderFallback();
  1663. return _encoding.GetMaxByteCount(charCount);
  1664. }
  1665. public override int GetMaxCharCount(int byteCount) {
  1666. SetDecoderFallback();
  1667. return _encoding.GetMaxCharCount(byteCount);
  1668. }
  1669. public override byte[] GetPreamble() {
  1670. return _preamble;
  1671. }
  1672. public override Encoder GetEncoder() {
  1673. SetEncoderFallback();
  1674. return _encoding.GetEncoder();
  1675. }
  1676. public override Decoder GetDecoder() {
  1677. SetDecoderFallback();
  1678. return _encoding.GetDecoder();
  1679. }
  1680. public override object Clone() {
  1681. // need to call base.Clone to be marked as read/write
  1682. EncodingWrapper res = (EncodingWrapper)base.Clone();
  1683. res._encoding = (Encoding)_encoding.Clone();
  1684. return res;
  1685. }
  1686. }
  1687. private static List SplitEmptyString(bool separators) {
  1688. List ret = PythonOps.MakeEmptyList(1);
  1689. if (separators) {
  1690. ret.AddNoLock(String.Empty);
  1691. }
  1692. return ret;
  1693. }
  1694. private static List SplitInternal(string self, char[] seps, int maxsplit) {
  1695. if (String.IsNullOrEmpty(self)) {
  1696. return SplitEmptyString(seps != null);
  1697. } else {
  1698. string[] r = null;
  1699. // If the optional second argument sep is absent or None, the words are separated
  1700. // by arbitrary strings of whitespace characters (space, tab, newline, return, formfeed);
  1701. r = StringUtils.Split(self, seps, (maxsplit < 0) ? Int32.MaxValue : maxsplit + 1,
  1702. (seps == null) ? StringSplitOptions.RemoveEmptyEntries : StringSplitOptions.None);
  1703. List ret = PythonOps.MakeEmptyList(r.Length);
  1704. foreach (string s in r) ret.AddNoLock(s);
  1705. return ret;
  1706. }
  1707. }
  1708. private static List SplitInternal(string self, string separator, int maxsplit) {
  1709. if (String.IsNullOrEmpty(self)) {
  1710. return SplitEmptyString(separator != null);
  1711. } else {
  1712. string[] r = StringUtils.Split(self, separator, (maxsplit < 0) ? Int32.MaxValue : maxsplit + 1, StringSplitOptions.None);
  1713. List ret = PythonOps.MakeEmptyList(r.Length);
  1714. foreach (string s in r) ret.AddNoLock(s);
  1715. return ret;
  1716. }
  1717. }
  1718. private static void TryStringOrTuple(object prefix) {
  1719. if (prefix == null) {
  1720. throw PythonOps.TypeError("expected string or Tuple, got NoneType");
  1721. }
  1722. if (!(prefix is string) && !(prefix is PythonTuple) && !(prefix is Extensible<string>)) {
  1723. throw PythonOps.TypeError("expected string or Tuple, got {0} Type", prefix.GetType());
  1724. }
  1725. }
  1726. private static string GetString(object obj) {
  1727. string ret = AsString(obj);
  1728. if (ret == null) {
  1729. throw PythonOps.TypeError("expected string, got {0}", DynamicHelpers.GetPythonType(obj).Name);
  1730. }
  1731. return ret;
  1732. }
  1733. private static bool endswith(string self, string suffix) {
  1734. return self.EndsWith(suffix);
  1735. }
  1736. // Indexing is 0-based. Need to deal with negative indices
  1737. // (which mean count backwards from end of sequence)
  1738. // +---+---+---+---+---+
  1739. // | a | b | c | d | e |
  1740. // +---+---+---+---+---+
  1741. // 0 1 2 3 4
  1742. // -5 -4 -3 -2 -1
  1743. private static bool endswith(string self, string suffix, int start) {
  1744. int len = self.Length;
  1745. if (start > len) return false;
  1746. // map the negative indice to its positive counterpart
  1747. if (start < 0) {
  1748. start += len;
  1749. if (start < 0) start = 0;
  1750. }
  1751. return self.Substring(start).EndsWith(suffix);
  1752. }
  1753. // With optional start, test beginning at that position (the char at that index is
  1754. // included in the test). With optional end, stop comparing at that position (the
  1755. // char at that index is not included in the test)
  1756. private static bool endswith(string self, string suffix, int start, int end) {
  1757. int len = self.Length;
  1758. if (start > len) return false;
  1759. // map the negative indices to their positive counterparts
  1760. else if (start < 0) {
  1761. start += len;
  1762. if (start < 0) start = 0;
  1763. }
  1764. if (end >= len) return self.Substring(start).EndsWith(suffix);
  1765. else if (end < 0) {
  1766. end += len;
  1767. if (end < 0) return false;
  1768. }
  1769. if (end < start) return false;
  1770. return self.Substring(start, end - start).EndsWith(suffix);
  1771. }
  1772. private static bool endswith(string self, PythonTuple suffix) {
  1773. foreach (object obj in suffix) {
  1774. if (self.EndsWith(GetString(obj))) {
  1775. return true;
  1776. }
  1777. }
  1778. return false;
  1779. }
  1780. private static bool endswith(string self, PythonTuple suffix, int start) {
  1781. foreach (object obj in suffix) {
  1782. if (endswith(self, GetString(obj), start)) {
  1783. return true;
  1784. }
  1785. }
  1786. return false;
  1787. }
  1788. private static bool endswith(string self, PythonTuple suffix, int start, int end) {
  1789. foreach (object obj in suffix) {
  1790. if (endswith(self, GetString(obj), start, end)) {
  1791. return true;
  1792. }
  1793. }
  1794. return false;
  1795. }
  1796. private static bool startswith(string self, string prefix) {
  1797. return self.StartsWith(prefix);
  1798. }
  1799. private static bool startswith(string self, string prefix, int start) {
  1800. int len = self.Length;
  1801. if (start > len) return false;
  1802. if (start < 0) {
  1803. start += len;
  1804. if (start < 0) start = 0;
  1805. }
  1806. return self.Substring(start).StartsWith(prefix);
  1807. }
  1808. private static bool startswith(string self, string prefix, int start, int end) {
  1809. int len = self.Length;
  1810. if (start > len) return false;
  1811. // map the negative indices to their positive counterparts
  1812. else if (start < 0) {
  1813. start += len;
  1814. if (start < 0) start = 0;
  1815. }
  1816. if (end >= len) return self.Substring(start).StartsWith(prefix);
  1817. else if (end < 0) {
  1818. end += len;
  1819. if (end < 0) return false;
  1820. }
  1821. if (end < start) return false;
  1822. return self.Substring(start, end - start).StartsWith(prefix);
  1823. }
  1824. private static bool startswith(string self, PythonTuple prefix) {
  1825. foreach (object obj in prefix) {
  1826. if (self.StartsWith(GetString(obj))) {
  1827. return true;
  1828. }
  1829. }
  1830. return false;
  1831. }
  1832. private static bool startswith(string self, PythonTuple prefix, int start) {
  1833. foreach (object obj in prefix) {
  1834. if (startswith(self, GetString(obj), start)) {
  1835. return true;
  1836. }
  1837. }
  1838. return false;
  1839. }
  1840. private static bool startswith(string self, PythonTuple prefix, int start, int end) {
  1841. foreach (object obj in prefix) {
  1842. if (startswith(self, GetString(obj), start, end)) {
  1843. return true;
  1844. }
  1845. }
  1846. return false;
  1847. }
  1848. // note: any changes in how this iterator works should also be applied in the
  1849. // optimized overloads of Builtins.map()
  1850. [PythonType("str_iterator")]
  1851. private class PythonStringEnumerable : IEnumerable, IEnumerator<string> {
  1852. private readonly string/*!*/ _s;
  1853. private int _index;
  1854. public PythonStringEnumerable(string s) {
  1855. Assert.NotNull(s);
  1856. _index = -1;
  1857. _s = s;
  1858. }
  1859. #region IEnumerable Members
  1860. public IEnumerator GetEnumerator() {
  1861. return this;
  1862. }
  1863. #endregion
  1864. #region IEnumerator<string> Members
  1865. public string Current {
  1866. get {
  1867. if (_index < 0) {
  1868. throw PythonOps.SystemError("Enumeration has not started. Call MoveNext.");
  1869. } else if (_index >= _s.Length) {
  1870. throw PythonOps.SystemError("Enumeration already finished.");
  1871. }
  1872. return ScriptingRuntimeHelpers.CharToString(_s[_index]);
  1873. }
  1874. }
  1875. #endregion
  1876. #region IDisposable Members
  1877. public void Dispose() { }
  1878. #endregion
  1879. #region IEnumerator Members
  1880. object IEnumerator.Current {
  1881. get {
  1882. return ((IEnumerator<string>)this).Current;
  1883. }
  1884. }
  1885. public bool MoveNext() {
  1886. if (_index >= _s.Length) {
  1887. return false;
  1888. }
  1889. _index++;
  1890. return _index != _s.Length;
  1891. }
  1892. public void Reset() {
  1893. _index = -1;
  1894. }
  1895. #endregion
  1896. }
  1897. internal static IEnumerable StringEnumerable(string str) {
  1898. return new PythonStringEnumerable(str);
  1899. }
  1900. internal static IEnumerator<string> StringEnumerator(string str) {
  1901. return new PythonStringEnumerable(str);
  1902. }
  1903. #endregion
  1904. #region Unicode Encode/Decode Fallback Support
  1905. #if !SILVERLIGHT // EncoderFallbackBuffer
  1906. /// When encoding or decoding strings if an error occurs CPython supports several different
  1907. /// behaviors, in addition it supports user-extensible behaviors as well. For the default
  1908. /// behavior we're ok - both of us support throwing and replacing. For custom behaviors
  1909. /// we define a single fallback for decoding and encoding that calls the python function to do
  1910. /// the replacement.
  1911. ///
  1912. /// When we do the replacement we call the provided handler w/ a UnicodeEncodeError or UnicodeDecodeError
  1913. /// object which contains:
  1914. /// encoding (string, the encoding the user requested)
  1915. /// end (the end of the invalid characters)
  1916. /// object (the original string being decoded)
  1917. /// reason (the error, e.g. 'unexpected byte code', not sure of others)
  1918. /// start (the start of the invalid sequence)
  1919. ///
  1920. /// The decoder returns a tuple of (unicode, int) where unicode is the replacement string
  1921. /// and int is an index where encoding should continue.
  1922. private class PythonEncoderFallbackBuffer : EncoderFallbackBuffer {
  1923. private object _function;
  1924. private string _encoding, _strData;
  1925. private string _buffer;
  1926. private int _bufferIndex;
  1927. public PythonEncoderFallbackBuffer(string encoding, string str, object callable) {
  1928. _function = callable;
  1929. _strData = str;
  1930. this._encoding = encoding;
  1931. }
  1932. public override bool Fallback(char charUnknown, int index) {
  1933. return DoPythonFallback(index, 1);
  1934. }
  1935. public override bool Fallback(char charUnknownHigh, char charUnknownLow, int index) {
  1936. return DoPythonFallback(index, 2);
  1937. }
  1938. public override char GetNextChar() {
  1939. if (_buffer == null || _bufferIndex >= _buffer.Length) return Char.MinValue;
  1940. return _buffer[_bufferIndex++];
  1941. }
  1942. public override bool MovePrevious() {
  1943. if (_bufferIndex > 0) {
  1944. _bufferIndex--;
  1945. return true;
  1946. }
  1947. return false;
  1948. }
  1949. public override int Remaining {
  1950. get {
  1951. if (_buffer == null) return 0;
  1952. return _buffer.Length - _bufferIndex;
  1953. }
  1954. }
  1955. public override void Reset() {
  1956. _buffer = null;
  1957. _bufferIndex = 0;
  1958. base.Reset();
  1959. }
  1960. private bool DoPythonFallback(int index, int length) {
  1961. if (_function != null) {
  1962. // create the exception object to hand to the user-function...
  1963. PythonExceptions._UnicodeEncodeError exObj = new PythonExceptions._UnicodeEncodeError();
  1964. exObj.__init__(_encoding, _strData, index, index + length, "unexpected code byte");
  1965. // call the user function...
  1966. object res = PythonCalls.Call(_function, exObj);
  1967. string replacement = PythonDecoderFallbackBuffer.CheckReplacementTuple(res, "encoding");
  1968. // finally process the user's request.
  1969. _buffer = replacement;
  1970. _bufferIndex = 0;
  1971. return true;
  1972. }
  1973. return false;
  1974. }
  1975. }
  1976. class PythonEncoderFallback : EncoderFallback {
  1977. private object _function;
  1978. private string _str;
  1979. private string _enc;
  1980. public PythonEncoderFallback(string encoding, string data, object callable) {
  1981. _function = callable;
  1982. _str = data;
  1983. _enc = encoding;
  1984. }
  1985. public override EncoderFallbackBuffer CreateFallbackBuffer() {
  1986. return new PythonEncoderFallbackBuffer(_enc, _str, _function);
  1987. }
  1988. public override int MaxCharCount {
  1989. get { return Int32.MaxValue; }
  1990. }
  1991. }
  1992. private class PythonDecoderFallbackBuffer : DecoderFallbackBuffer {
  1993. private object _function;
  1994. private string _encoding, _strData;
  1995. private string _buffer;
  1996. private int _bufferIndex;
  1997. public PythonDecoderFallbackBuffer(string encoding, string str, object callable) {
  1998. this._encoding = encoding;
  1999. this._strData = str;
  2000. this._function = callable;
  2001. }
  2002. public override int Remaining {
  2003. get {
  2004. if (_buffer == null) return 0;
  2005. return _buffer.Length - _bufferIndex;
  2006. }
  2007. }
  2008. public override char GetNextChar() {
  2009. if (_buffer == null || _bufferIndex >= _buffer.Length) return Char.MinValue;
  2010. return _buffer[_bufferIndex++];
  2011. }
  2012. public override bool MovePrevious() {
  2013. if (_bufferIndex > 0) {
  2014. _bufferIndex--;
  2015. return true;
  2016. }
  2017. return false;
  2018. }
  2019. public override void Reset() {
  2020. _buffer = null;
  2021. _bufferIndex = 0;
  2022. base.Reset();
  2023. }
  2024. public override bool Fallback(byte[] bytesUnknown, int index) {
  2025. if (_function != null) {
  2026. // create the exception object to hand to the user-function...
  2027. PythonExceptions._UnicodeDecodeError exObj = new PythonExceptions._UnicodeDecodeError();
  2028. exObj.__init__(_encoding, _strData, index, index + bytesUnknown.Length, "unexpected code byte");
  2029. // call the user function...
  2030. object res = PythonCalls.Call(_function, exObj);
  2031. string replacement = CheckReplacementTuple(res, "decoding");
  2032. // finally process the user's request.
  2033. _buffer = replacement;
  2034. _bufferIndex = 0;
  2035. return true;
  2036. }
  2037. return false;
  2038. }
  2039. internal static string CheckReplacementTuple(object res, string encodeOrDecode) {
  2040. bool ok = true;
  2041. string replacement = null;
  2042. PythonTuple tres = res as PythonTuple;
  2043. // verify the result is sane...
  2044. if (tres != null && tres.__len__() == 2) {
  2045. if (!Converter.TryConvertToString(tres[0], out replacement)) ok = false;
  2046. if (ok) {
  2047. int bytesSkipped;
  2048. if (!Converter.TryConvertToInt32(tres[1], out bytesSkipped)) ok = false;
  2049. }
  2050. } else {
  2051. ok = false;
  2052. }
  2053. if (!ok) throw PythonOps.TypeError("{1} error handler must return tuple containing (str, int), got {0}", PythonOps.GetPythonTypeName(res), encodeOrDecode);
  2054. return replacement;
  2055. }
  2056. }
  2057. class PythonDecoderFallback : DecoderFallback {
  2058. private object function;
  2059. private string str;
  2060. private string enc;
  2061. public PythonDecoderFallback(string encoding, string data, object callable) {
  2062. function = callable;
  2063. str = data;
  2064. enc = encoding;
  2065. }
  2066. public override DecoderFallbackBuffer CreateFallbackBuffer() {
  2067. return new PythonDecoderFallbackBuffer(enc, str, function);
  2068. }
  2069. public override int MaxCharCount {
  2070. get { throw new NotImplementedException(); }
  2071. }
  2072. }
  2073. class BackslashEncoderReplaceFallback : EncoderFallback {
  2074. class BackslashReplaceFallbackBuffer : EncoderFallbackBuffer {
  2075. private List<char> _buffer = new List<char>();
  2076. private int _index;
  2077. public override bool Fallback(char charUnknownHigh, char charUnknownLow, int index) {
  2078. return false;
  2079. }
  2080. public override bool Fallback(char charUnknown, int index) {
  2081. _buffer.Add('\\');
  2082. int val = (int)charUnknown;
  2083. if (val > 0xFF) {
  2084. _buffer.Add('u');
  2085. AddCharacter(val >> 8);
  2086. AddCharacter(val & 0xFF);
  2087. } else {
  2088. _buffer.Add('x');
  2089. AddCharacter(charUnknown);
  2090. }
  2091. return true;
  2092. }
  2093. private void AddCharacter(int val) {
  2094. AddOneDigit(((val) & 0xF0) >> 4);
  2095. AddOneDigit(val & 0x0F);
  2096. }
  2097. private void AddOneDigit(int val) {
  2098. if (val > 9) {
  2099. _buffer.Add((char)('a' + val - 0x0A));
  2100. } else {
  2101. _buffer.Add((char)('0' + val));
  2102. }
  2103. }
  2104. public override char GetNextChar() {
  2105. if (_index == _buffer.Count) return Char.MinValue;
  2106. return _buffer[_index++];
  2107. }
  2108. public override bool MovePrevious() {
  2109. if (_index > 0) {
  2110. _index--;
  2111. return true;
  2112. }
  2113. return false;
  2114. }
  2115. public override int Remaining {
  2116. get { return _buffer.Count - _index; }
  2117. }
  2118. }
  2119. public override EncoderFallbackBuffer CreateFallbackBuffer() {
  2120. return new BackslashReplaceFallbackBuffer();
  2121. }
  2122. public override int MaxCharCount {
  2123. get { throw new NotImplementedException(); }
  2124. }
  2125. }
  2126. class XmlCharRefEncoderReplaceFallback : EncoderFallback {
  2127. class XmlCharRefEncoderReplaceFallbackBuffer : EncoderFallbackBuffer {
  2128. private List<char> _buffer = new List<char>();
  2129. private int _index;
  2130. public override bool Fallback(char charUnknownHigh, char charUnknownLow, int index) {
  2131. return false;
  2132. }
  2133. public override bool Fallback(char charUnknown, int index) {
  2134. _buffer.Add('&');
  2135. _buffer.Add('#');
  2136. int val = (int)charUnknown;
  2137. foreach (char c in val.ToString()) {
  2138. _buffer.Add(c);
  2139. }
  2140. _buffer.Add(';');
  2141. return true;
  2142. }
  2143. public override char GetNextChar() {
  2144. if (_index == _buffer.Count) return Char.MinValue;
  2145. return _buffer[_index++];
  2146. }
  2147. public override bool MovePrevious() {
  2148. if (_index > 0) {
  2149. _index--;
  2150. return true;
  2151. }
  2152. return false;
  2153. }
  2154. public override int Remaining {
  2155. get { return _buffer.Count - _index; }
  2156. }
  2157. }
  2158. public override EncoderFallbackBuffer CreateFallbackBuffer() {
  2159. return new XmlCharRefEncoderReplaceFallbackBuffer();
  2160. }
  2161. public override int MaxCharCount {
  2162. get { throw new NotImplementedException(); }
  2163. }
  2164. }
  2165. class UnicodeEscapeEncoding : Encoding {
  2166. private bool _raw;
  2167. public UnicodeEscapeEncoding(bool raw) {
  2168. _raw = raw;
  2169. }
  2170. public override int GetByteCount(char[] chars, int index, int count) {
  2171. return EscapeEncode(chars, index, count).Length;
  2172. }
  2173. private string EscapeEncode(char[] chars, int index, int count) {
  2174. if (_raw) {
  2175. return RawUnicodeEscapeEncode(new string(chars, index, count));
  2176. }
  2177. bool dummy = false;
  2178. return ReprEncode(new string(chars, index, count), ref dummy);
  2179. }
  2180. public override int GetBytes(char[] chars, int charIndex, int charCount, byte[] bytes, int byteIndex) {
  2181. if (_raw) {
  2182. string res = RawUnicodeEscapeEncode(new string(chars, charIndex, charCount));
  2183. for (int i = 0; i < res.Length; i++) {
  2184. bytes[i + byteIndex] = _raw ? (byte)res[i] : (byte)chars[i];
  2185. }
  2186. return res.Length;
  2187. } else {
  2188. for (int i = 0; i < charCount; i++) {
  2189. bytes[i + byteIndex] = (byte)chars[i + charIndex];
  2190. }
  2191. return charCount;
  2192. }
  2193. }
  2194. public override int GetCharCount(byte[] bytes, int index, int count) {
  2195. StringBuilder builder = new StringBuilder();
  2196. for (int i = 0; i < count; i++) {
  2197. builder.Append((char)bytes[i + index]);
  2198. }
  2199. return LiteralParser.ParseString(builder.ToString(), _raw, true).Length;
  2200. }
  2201. public override int GetChars(byte[] bytes, int byteIndex, int byteCount, char[] chars, int charIndex) {
  2202. StringBuilder builder = new StringBuilder();
  2203. for (int i = 0; i < byteCount; i++) {
  2204. builder.Append((char)bytes[i + byteIndex]);
  2205. }
  2206. string res = LiteralParser.ParseString(builder.ToString(), _raw, true);
  2207. for (int i = 0; i < res.Length; i++) {
  2208. chars[i + charIndex] = (char)res[i];
  2209. }
  2210. return res.Length;
  2211. }
  2212. public override int GetMaxByteCount(int charCount) {
  2213. return charCount * 5;
  2214. }
  2215. public override int GetMaxCharCount(int byteCount) {
  2216. return byteCount;
  2217. }
  2218. }
  2219. #endif
  2220. #endregion
  2221. public static string/*!*/ __repr__(string/*!*/ self) {
  2222. return StringOps.Quote(self);
  2223. }
  2224. }
  2225. }