PageRenderTime 58ms CodeModel.GetById 18ms RepoModel.GetById 0ms app.codeStats 0ms

/IronPython_2_0/Src/IronPython/Runtime/Operations/StringOps.cs

#
C# | 2412 lines | 2117 code | 233 blank | 62 comment | 354 complexity | 7f5752d01ce870c21a534be4bfc5b52d MD5 | raw file
Possible License(s): GPL-2.0, MPL-2.0-no-copyleft-exception, CPL-1.0, CC-BY-SA-3.0, BSD-3-Clause, ISC, AGPL-3.0, LGPL-2.1, Apache-2.0
  1. /* ****************************************************************************
  2. *
  3. * Copyright (c) Microsoft Corporation.
  4. *
  5. * This source code is subject to terms and conditions of the Microsoft Public License. A
  6. * copy of the license can be found in the License.html file at the root of this distribution. If
  7. * you cannot locate the Microsoft Public License, please send an email to
  8. * dlr@microsoft.com. By using this source code in any fashion, you are agreeing to be bound
  9. * by the terms of the Microsoft Public License.
  10. *
  11. * You must not remove this notice, or any other, from this software.
  12. *
  13. *
  14. * ***************************************************************************/
  15. using System; using Microsoft;
  16. using System.Collections;
  17. using System.Collections.Generic;
  18. using System.Diagnostics;
  19. using System.Globalization;
  20. using System.Reflection;
  21. using System.Runtime.InteropServices;
  22. using System.Text;
  23. using IronPython.Runtime.Exceptions;
  24. using IronPython.Runtime.Types;
  25. using Microsoft.Scripting;
  26. using Microsoft.Scripting.Math;
  27. using Microsoft.Scripting.Runtime;
  28. using Microsoft.Scripting.Utils;
  29. using SpecialNameAttribute = System.Runtime.CompilerServices.SpecialNameAttribute;
  30. namespace IronPython.Runtime.Operations {
  31. /// <summary>
  32. /// ExtensibleString is the base class that is used for types the user defines
  33. /// that derive from string. It carries along with it the string's value and
  34. /// our converter recognizes it as a string.
  35. /// </summary>
  36. public class ExtensibleString : Extensible<string>, ICodeFormattable, IValueEquality, ISequence {
  37. public ExtensibleString() : base(String.Empty) { }
  38. public ExtensibleString(string self) : base(self) { }
  39. public override string ToString() {
  40. return Value;
  41. }
  42. #region ICodeFormattable Members
  43. public virtual string/*!*/ __repr__(CodeContext/*!*/ context) {
  44. return StringOps.Quote(Value);
  45. }
  46. #endregion
  47. [return: MaybeNotImplemented]
  48. public object __eq__(object other) {
  49. if (other is string || other is ExtensibleString)
  50. return RuntimeHelpers.BooleanToObject(((IValueEquality)this).ValueEquals(other));
  51. return NotImplementedType.Value;
  52. }
  53. [return: MaybeNotImplemented]
  54. public object __ne__(object other) {
  55. object res = __eq__(other);
  56. if (res != NotImplementedType.Value) return PythonOps.Not(res);
  57. return res;
  58. }
  59. #region IValueEquality members
  60. int IValueEquality.GetValueHashCode() {
  61. return GetHashCode();
  62. }
  63. bool IValueEquality.ValueEquals(object other) {
  64. if (other == null) return false;
  65. ExtensibleString es = other as ExtensibleString;
  66. if (es != null) return Value == es.Value;
  67. string os = other as string;
  68. if (os != null) return Value == os;
  69. return false;
  70. }
  71. #endregion
  72. #region ISequence Members
  73. public virtual object this[int index] {
  74. get { return RuntimeHelpers.CharToString(Value[index]); }
  75. }
  76. public object this[Slice slice] {
  77. get { return StringOps.GetItem(Value, slice); }
  78. }
  79. public object __getslice__(int start, int stop) {
  80. return StringOps.__getslice__(Value, start, stop);
  81. }
  82. #endregion
  83. #region IPythonContainer Members
  84. public virtual int __len__() {
  85. return Value.Length;
  86. }
  87. public virtual bool __contains__(object value) {
  88. if (value is string) return Value.Contains((string)value);
  89. else if (value is ExtensibleString) return Value.Contains(((ExtensibleString)value).Value);
  90. throw PythonOps.TypeErrorForBadInstance("expected string, got {0}", value);
  91. }
  92. #endregion
  93. }
  94. /// <summary>
  95. /// StringOps is the static class that contains the methods defined on strings, i.e. 'abc'
  96. ///
  97. /// Here we define all of the methods that a Python user would see when doing dir('abc').
  98. /// If the user is running in a CLS aware context they will also see all of the methods
  99. /// defined in the CLS System.String type.
  100. /// </summary>
  101. public static class StringOps {
  102. internal const int LowestUnicodeValue = 0x7f;
  103. private static readonly char[] Whitespace = new char[] { ' ', '\t', '\n', '\r', '\f' };
  104. internal static object FastNew(CodeContext/*!*/ context, object x) {
  105. if (x == null) {
  106. return "None";
  107. }
  108. if (x is string) {
  109. // check ascii
  110. return CheckAsciiString(context, (string)x);
  111. }
  112. // we don't invoke PythonOps.StringRepr here because we want to return the
  113. // Extensible<string> directly back if that's what we received from __str__.
  114. object value = PythonContext.InvokeUnaryOperator(context, UnaryOperators.String, x);
  115. if (value is string || value is Extensible<string>) {
  116. return value;
  117. }
  118. throw PythonOps.TypeError("expected str, got {0} from __str__", DynamicHelpers.GetPythonType(value).Name);
  119. }
  120. private static object CheckAsciiString(CodeContext context, string s) {
  121. for (int i = 0; i < s.Length; i++) {
  122. if (s[i] > '\x80')
  123. return StringOps.__new__(
  124. context,
  125. (PythonType)DynamicHelpers.GetPythonTypeFromType(typeof(String)),
  126. s,
  127. null,
  128. "strict"
  129. );
  130. }
  131. return s;
  132. }
  133. #region Python Constructors
  134. [StaticExtensionMethod]
  135. public static object __new__(CodeContext/*!*/ context, PythonType cls) {
  136. if (cls == TypeCache.String) {
  137. return "";
  138. } else {
  139. return cls.CreateInstance(context);
  140. }
  141. }
  142. [StaticExtensionMethod]
  143. public static object __new__(CodeContext/*!*/ context, PythonType cls, object @object) {
  144. if (cls == TypeCache.String) {
  145. return FastNew(context, @object);
  146. } else {
  147. return cls.CreateInstance(context, @object);
  148. }
  149. }
  150. [StaticExtensionMethod]
  151. public static object __new__(CodeContext/*!*/ context, PythonType cls, [NotNull]string @object) {
  152. if (cls == TypeCache.String) {
  153. return CheckAsciiString(context, @object);
  154. } else {
  155. return cls.CreateInstance(context, @object);
  156. }
  157. }
  158. [StaticExtensionMethod]
  159. public static object __new__(CodeContext/*!*/ context, PythonType cls, [NotNull]ExtensibleString @object) {
  160. if (cls == TypeCache.String) {
  161. return FastNew(context, @object);
  162. } else {
  163. return cls.CreateInstance(context, @object);
  164. }
  165. }
  166. [StaticExtensionMethod]
  167. public static object __new__(CodeContext/*!*/ context, PythonType cls, char @object) {
  168. if (cls == TypeCache.String) {
  169. return CheckAsciiString(context, RuntimeHelpers.CharToString(@object));
  170. } else {
  171. return cls.CreateInstance(context, @object);
  172. }
  173. }
  174. [StaticExtensionMethod]
  175. public static object __new__(CodeContext/*!*/ context, PythonType cls, [NotNull]BigInteger @object) {
  176. if (cls == TypeCache.String) {
  177. return @object.ToString();
  178. } else {
  179. return cls.CreateInstance(context, @object);
  180. }
  181. }
  182. [StaticExtensionMethod]
  183. public static object __new__(CodeContext/*!*/ context, PythonType cls, [NotNull]Extensible<BigInteger> @object) {
  184. if (cls == TypeCache.String) {
  185. return FastNew(context, @object);
  186. } else {
  187. return cls.CreateInstance(context, @object);
  188. }
  189. }
  190. [StaticExtensionMethod]
  191. public static object __new__(CodeContext/*!*/ context, PythonType cls, int @object) {
  192. if (cls == TypeCache.String) {
  193. return @object.ToString();
  194. } else {
  195. return cls.CreateInstance(context, @object);
  196. }
  197. }
  198. [StaticExtensionMethod]
  199. public static object __new__(CodeContext/*!*/ context, PythonType cls, bool @object) {
  200. if (cls == TypeCache.String) {
  201. return @object.ToString();
  202. } else {
  203. return cls.CreateInstance(context, @object);
  204. }
  205. }
  206. [StaticExtensionMethod]
  207. public static object __new__(CodeContext/*!*/ context, PythonType cls, double @object) {
  208. if (cls == TypeCache.String) {
  209. return DoubleOps.__str__(context, @object);
  210. } else {
  211. return cls.CreateInstance(context, @object);
  212. }
  213. }
  214. [StaticExtensionMethod]
  215. public static object __new__(CodeContext/*!*/ context, PythonType cls, Extensible<double> @object) {
  216. if (cls == TypeCache.String) {
  217. return FastNew(context, @object);
  218. } else {
  219. return cls.CreateInstance(context, @object);
  220. }
  221. }
  222. [StaticExtensionMethod]
  223. public static object __new__(CodeContext/*!*/ context, PythonType cls, float @object) {
  224. if (cls == TypeCache.String) {
  225. return SingleOps.__str__(context, @object);
  226. } else {
  227. return cls.CreateInstance(context, @object);
  228. }
  229. }
  230. [StaticExtensionMethod]
  231. public static object __new__(CodeContext/*!*/ context, PythonType cls,
  232. object @string,
  233. [DefaultParameterValue(null)] string encoding,
  234. [DefaultParameterValue("strict")] string errors) {
  235. string str = @string as string;
  236. if (str == null) throw PythonOps.TypeError("converting to unicode: need string, got {0}", DynamicHelpers.GetPythonType(@string).Name);
  237. if (cls == TypeCache.String) {
  238. return decode(context, str, encoding ?? PythonContext.GetContext(context).GetDefaultEncodingName(), errors);
  239. } else {
  240. return cls.CreateInstance(context, str, encoding, errors);
  241. }
  242. }
  243. #endregion
  244. #region Python __ methods
  245. public static bool __contains__(string s, string item) {
  246. return s.Contains(item);
  247. }
  248. public static bool __contains__(string s, char item) {
  249. return s.IndexOf(item) != -1;
  250. }
  251. public static int __len__(string s) {
  252. return s.Length;
  253. }
  254. [SpecialName]
  255. public static string GetItem(string s, int index) {
  256. return RuntimeHelpers.CharToString(s[PythonOps.FixIndex(index, s.Length)]);
  257. }
  258. [SpecialName]
  259. public static string GetItem(string s, object index) {
  260. return GetItem(s, Converter.ConvertToIndex(index));
  261. }
  262. [SpecialName]
  263. public static string GetItem(string s, Slice slice) {
  264. if (slice == null) throw PythonOps.TypeError("string indicies must be slices or integers");
  265. int start, stop, step;
  266. slice.indices(s.Length, out start, out stop, out step);
  267. if (step == 1) {
  268. return stop > start ? s.Substring(start, stop - start) : String.Empty;
  269. } else {
  270. int index = 0;
  271. char[] newData;
  272. if (step > 0) {
  273. if (start > stop) return String.Empty;
  274. int icnt = (stop - start + step - 1) / step;
  275. newData = new char[icnt];
  276. for (int i = start; i < stop; i += step) {
  277. newData[index++] = s[i];
  278. }
  279. } else {
  280. if (start < stop) return String.Empty;
  281. int icnt = (stop - start + step + 1) / step;
  282. newData = new char[icnt];
  283. for (int i = start; i > stop; i += step) {
  284. newData[index++] = s[i];
  285. }
  286. }
  287. return new string(newData);
  288. }
  289. }
  290. public static string __getslice__(string self, int x, int y) {
  291. Slice.FixSliceArguments(self.Length, ref x, ref y);
  292. if (x >= y) return String.Empty;
  293. return self.Substring(x, y - x);
  294. }
  295. #endregion
  296. #region Public Python methods
  297. public static string capitalize(this string self) {
  298. if (self.Length == 0) return self;
  299. return Char.ToUpper(self[0]) + self.Substring(1).ToLower();
  300. }
  301. // default fillchar (padding char) is a space
  302. public static string center(this string self, int width) {
  303. return center(self, width, ' ');
  304. }
  305. public static string center(this string self, int width, char fillchar) {
  306. int spaces = width - self.Length;
  307. if (spaces <= 0) return self;
  308. StringBuilder ret = new StringBuilder(width);
  309. ret.Append(fillchar, spaces / 2);
  310. ret.Append(self);
  311. ret.Append(fillchar, (spaces + 1) / 2);
  312. return ret.ToString();
  313. }
  314. public static int count(this string self, string sub) {
  315. return count(self, sub, 0, self.Length);
  316. }
  317. public static int count(this string self, string sub, int start) {
  318. return count(self, sub, start, self.Length);
  319. }
  320. public static int count(this string self, string ssub, int start, int end) {
  321. if (ssub == null) throw PythonOps.TypeError("expected string for 'sub' argument, got NoneType");
  322. string v = self;
  323. if (start > self.Length) {
  324. return 0;
  325. }
  326. start = PythonOps.FixSliceIndex(start, self.Length);
  327. end = PythonOps.FixSliceIndex(end, self.Length);
  328. if (ssub.Length == 0) {
  329. return Math.Max((end - start) + 1, 0);
  330. }
  331. int count = 0;
  332. while (true) {
  333. if (end <= start) break;
  334. int index = v.IndexOf(ssub, start, end - start);
  335. if (index == -1) break;
  336. count++;
  337. start = index + ssub.Length;
  338. }
  339. return count;
  340. }
  341. public static string decode(CodeContext/*!*/ context, string s) {
  342. return decode(context, s, Missing.Value, "strict");
  343. }
  344. public static string decode(CodeContext/*!*/ context, string s, [Optional]object encoding, [DefaultParameterValue("strict")]string errors) {
  345. return RawDecode(context, s, encoding, errors);
  346. }
  347. public static string encode(CodeContext/*!*/ context, string s, [Optional]object encoding, [DefaultParameterValue("strict")]string errors) {
  348. return RawEncode(context, s, encoding, errors);
  349. }
  350. private static string CastString(object o) {
  351. string res = o as string;
  352. if (res != null) {
  353. return res;
  354. }
  355. return ((Extensible<string>)o).Value;
  356. }
  357. private static string AsString(object o) {
  358. string res = o as string;
  359. if (res != null) {
  360. return res;
  361. }
  362. Extensible<string> es = o as Extensible<string>;
  363. if (es != null) {
  364. return es.Value;
  365. }
  366. return null;
  367. }
  368. public static bool endswith(this string self, object suffix) {
  369. TryStringOrTuple(suffix);
  370. if (suffix is PythonTuple)
  371. return endswith(self, (PythonTuple)suffix);
  372. else
  373. return endswith(self, CastString(suffix));
  374. }
  375. public static bool endswith(this string self, object suffix, int start) {
  376. TryStringOrTuple(suffix);
  377. if (suffix is PythonTuple)
  378. return endswith(self, (PythonTuple)suffix, start);
  379. else
  380. return endswith(self, CastString(suffix), start);
  381. }
  382. public static bool endswith(this string self, object suffix, int start, int end) {
  383. TryStringOrTuple(suffix);
  384. if (suffix is PythonTuple)
  385. return endswith(self, (PythonTuple)suffix, start, end);
  386. else
  387. return endswith(self, CastString(suffix), start, end);
  388. }
  389. public static string expandtabs(string self) {
  390. return expandtabs(self, 8);
  391. }
  392. public static string expandtabs(this string self, int tabsize) {
  393. StringBuilder ret = new StringBuilder(self.Length * 2);
  394. string v = self;
  395. int col = 0;
  396. for (int i = 0; i < v.Length; i++) {
  397. char ch = v[i];
  398. switch (ch) {
  399. case '\n':
  400. case '\r': col = 0; ret.Append(ch); break;
  401. case '\t':
  402. if (tabsize > 0) {
  403. int tabs = tabsize - (col % tabsize);
  404. ret.Append(' ', tabs);
  405. col = 0;
  406. }
  407. break;
  408. default:
  409. col++;
  410. ret.Append(ch);
  411. break;
  412. }
  413. }
  414. return ret.ToString();
  415. }
  416. public static int find(this string self, string sub) {
  417. if (sub == null) throw PythonOps.TypeError("expected string, got NoneType");
  418. if (sub.Length == 1) return self.IndexOf(sub[0]);
  419. CompareInfo c = CultureInfo.InvariantCulture.CompareInfo;
  420. return c.IndexOf(self, sub, CompareOptions.Ordinal);
  421. }
  422. public static int find(this string self, string sub, int start) {
  423. if (sub == null) throw PythonOps.TypeError("expected string, got NoneType");
  424. if (start > self.Length) return -1;
  425. start = PythonOps.FixSliceIndex(start, self.Length);
  426. return self.IndexOf(sub, start);
  427. }
  428. public static int find(this string self, string sub, int start, int end) {
  429. if (sub == null) throw PythonOps.TypeError("expected string, got NoneType");
  430. if (start > self.Length) return -1;
  431. start = PythonOps.FixSliceIndex(start, self.Length);
  432. end = PythonOps.FixSliceIndex(end, self.Length);
  433. if (end < start) return -1;
  434. return self.IndexOf(sub, start, end - start);
  435. }
  436. public static int index(this string self, string sub) {
  437. if (sub == null) throw PythonOps.TypeError("expected string, got NoneType");
  438. return index(self, sub, 0, self.Length);
  439. }
  440. public static int index(this string self, string sub, int start) {
  441. if (sub == null) throw PythonOps.TypeError("expected string, got NoneType");
  442. return index(self, sub, start, self.Length);
  443. }
  444. public static int index(this string self, string sub, int start, int end) {
  445. if (sub == null) throw PythonOps.TypeError("expected string, got NoneType");
  446. int ret = find(self, sub, start, end);
  447. if (ret == -1) throw PythonOps.ValueError("substring {0} not found in {1}", sub, self);
  448. return ret;
  449. }
  450. public static bool isalnum(this string self) {
  451. if (self.Length == 0) return false;
  452. string v = self;
  453. for (int i = v.Length - 1; i >= 0; i--) {
  454. if (!Char.IsLetterOrDigit(v, i)) return false;
  455. }
  456. return true;
  457. }
  458. public static bool isalpha(this string self) {
  459. if (self.Length == 0) return false;
  460. string v = self;
  461. for (int i = v.Length - 1; i >= 0; i--) {
  462. if (!Char.IsLetter(v, i)) return false;
  463. }
  464. return true;
  465. }
  466. public static bool isdigit(this string self) {
  467. if (self.Length == 0) return false;
  468. string v = self;
  469. for (int i = v.Length - 1; i >= 0; i--) {
  470. if (!Char.IsDigit(v, i)) return false;
  471. }
  472. return true;
  473. }
  474. public static bool isspace(this string self) {
  475. if (self.Length == 0) return false;
  476. string v = self;
  477. for (int i = v.Length - 1; i >= 0; i--) {
  478. if (!Char.IsWhiteSpace(v, i)) return false;
  479. }
  480. return true;
  481. }
  482. public static bool isdecimal(this string self) {
  483. return isnumeric(self);
  484. }
  485. public static bool isnumeric(this string self) {
  486. if (String.IsNullOrEmpty(self)) return false;
  487. foreach (char c in self) {
  488. if (!Char.IsDigit(c)) return false;
  489. }
  490. return true;
  491. }
  492. public static bool islower(this string self) {
  493. if (self.Length == 0) return false;
  494. string v = self;
  495. bool hasLower = false;
  496. for (int i = v.Length - 1; i >= 0; i--) {
  497. if (!hasLower && Char.IsLower(v, i)) hasLower = true;
  498. if (Char.IsUpper(v, i)) return false;
  499. }
  500. return hasLower;
  501. }
  502. public static bool isupper(this string self) {
  503. if (self.Length == 0) return false;
  504. string v = self;
  505. bool hasUpper = false;
  506. for (int i = v.Length - 1; i >= 0; i--) {
  507. if (!hasUpper && Char.IsUpper(v, i)) hasUpper = true;
  508. if (Char.IsLower(v, i)) return false;
  509. }
  510. return hasUpper;
  511. }
  512. // return true if self is a titlecased string and there is at least one
  513. // character in self; also, uppercase characters may only follow uncased
  514. // characters (e.g. whitespace) and lowercase characters only cased ones.
  515. // return false otherwise.
  516. public static bool istitle(this string self) {
  517. if (self == null || self.Length == 0) return false;
  518. string v = self;
  519. bool prevCharCased = false, currCharCased = false, containsUpper = false;
  520. for (int i = 0; i < v.Length; i++) {
  521. if (Char.IsUpper(v, i) || Char.GetUnicodeCategory(v, i) == UnicodeCategory.TitlecaseLetter) {
  522. containsUpper = true;
  523. if (prevCharCased)
  524. return false;
  525. else
  526. currCharCased = true;
  527. } else if (Char.IsLower(v, i))
  528. if (!prevCharCased)
  529. return false;
  530. else
  531. currCharCased = true;
  532. else
  533. currCharCased = false;
  534. prevCharCased = currCharCased;
  535. }
  536. // if we've gone through the whole string and haven't encountered any rule
  537. // violations but also haven't seen an Uppercased char, then this is not a
  538. // title e.g. '\n', all whitespace etc.
  539. return containsUpper;
  540. }
  541. public static bool isunicode(this string self) {
  542. foreach (char c in self) {
  543. if (c >= LowestUnicodeValue) return true;
  544. }
  545. return false;
  546. }
  547. // Return a string which is the concatenation of the strings
  548. // in the sequence seq. The separator between elements is the
  549. // string providing this method
  550. public static string join(this string self, object sequence) {
  551. IEnumerator seq = PythonOps.GetEnumerator(sequence);
  552. if (!seq.MoveNext()) return "";
  553. // check if we have just a sequnce of just one value - if so just
  554. // return that value.
  555. object curVal = seq.Current;
  556. if (!seq.MoveNext()) return Converter.ConvertToString(curVal);
  557. StringBuilder ret = new StringBuilder();
  558. AppendJoin(curVal, 0, ret);
  559. int index = 1;
  560. do {
  561. ret.Append(self);
  562. AppendJoin(seq.Current, index, ret);
  563. index++;
  564. } while (seq.MoveNext());
  565. return ret.ToString();
  566. }
  567. public static string join(this string/*!*/ self, [NotNull]List/*!*/ sequence) {
  568. if (sequence.__len__() == 0) return String.Empty;
  569. lock (sequence) {
  570. if (sequence.__len__() == 1) {
  571. return Converter.ConvertToString(sequence[0]);
  572. }
  573. StringBuilder ret = new StringBuilder();
  574. AppendJoin(sequence._data[0], 0, ret);
  575. for (int i = 1; i < sequence._size; i++) {
  576. ret.Append(self);
  577. AppendJoin(sequence._data[i], i, ret);
  578. }
  579. return ret.ToString();
  580. }
  581. }
  582. public static string ljust(this string self, int width) {
  583. return ljust(self, width, ' ');
  584. }
  585. public static string ljust(this string self, int width, char fillchar) {
  586. int spaces = width - self.Length;
  587. if (spaces <= 0) return self;
  588. StringBuilder ret = new StringBuilder(width);
  589. ret.Append(self);
  590. ret.Append(fillchar, spaces);
  591. return ret.ToString();
  592. }
  593. public static string lower(this string self) {
  594. return self.ToLower();
  595. }
  596. public static string lstrip(this string self) {
  597. return self.TrimStart(Whitespace);
  598. }
  599. public static string lstrip(this string self, string chars) {
  600. if (chars == null) return lstrip(self);
  601. return self.TrimStart(chars.ToCharArray());
  602. }
  603. public static PythonTuple partition(this string self, string sep) {
  604. if (sep == null)
  605. throw PythonOps.TypeError("expected string, got NoneType");
  606. if (sep.Length == 0)
  607. throw PythonOps.ValueError("empty separator");
  608. object[] obj = new object[3] { "", "", "" };
  609. if (self.Length != 0) {
  610. int index = find(self, sep);
  611. if (index == -1) {
  612. obj[0] = self;
  613. } else {
  614. obj[0] = self.Substring(0, index);
  615. obj[1] = sep;
  616. obj[2] = self.Substring(index + sep.Length, self.Length - index - sep.Length);
  617. }
  618. }
  619. return new PythonTuple(obj);
  620. }
  621. private static string StringOrBuffer(object input) {
  622. string result = (input as string);
  623. if (result != null) {
  624. return result;
  625. }
  626. PythonBuffer buffer = (input as PythonBuffer);
  627. if (buffer != null) {
  628. return buffer.ToString();
  629. }
  630. throw PythonOps.TypeError("expected a character buffer object");
  631. }
  632. public static string replace(this string self, object old, object new_) {
  633. string oldString = StringOrBuffer(old);
  634. string newString = StringOrBuffer(new_);
  635. if (oldString.Length == 0) return ReplaceEmpty(self, newString, self.Length + 1);
  636. return self.Replace(oldString, newString);
  637. }
  638. public static string replace(this string self, object old, object new_, int maxsplit) {
  639. if (maxsplit == -1) return replace(self, old, new_);
  640. string oldString = StringOrBuffer(old);
  641. string newString = StringOrBuffer(new_);
  642. if (oldString.Length == 0) return ReplaceEmpty(self, newString, maxsplit);
  643. string v = self;
  644. StringBuilder ret = new StringBuilder(v.Length);
  645. int index;
  646. int start = 0;
  647. while (maxsplit > 0 && (index = v.IndexOf(oldString, start)) != -1) {
  648. ret.Append(v.Substring(start, index - start));
  649. ret.Append(newString);
  650. start = index + oldString.Length;
  651. maxsplit--;
  652. }
  653. ret.Append(v.Substring(start));
  654. return ret.ToString();
  655. }
  656. public static int rfind(this string self, string sub) {
  657. if (sub == null) throw PythonOps.TypeError("expected string, got NoneType");
  658. return rfind(self, sub, 0, self.Length);
  659. }
  660. public static int rfind(this string self, string sub, int start) {
  661. if (sub == null) throw PythonOps.TypeError("expected string, got NoneType");
  662. if (start > self.Length) return -1;
  663. return rfind(self, sub, start, self.Length);
  664. }
  665. public static int rfind(this string self, string sub, int start, int end) {
  666. if (sub == null) throw PythonOps.TypeError("expected string, got NoneType");
  667. if (start > self.Length) return -1;
  668. start = PythonOps.FixSliceIndex(start, self.Length);
  669. end = PythonOps.FixSliceIndex(end, self.Length);
  670. if (start > end) return -1; // can't possibly match anything, not even an empty string
  671. if (sub.Length == 0) return end; // match at the end
  672. if (end == 0) return -1; // can't possibly find anything
  673. return self.LastIndexOf(sub, end - 1, end - start);
  674. }
  675. public static int rindex(this string self, string sub) {
  676. return rindex(self, sub, 0, self.Length);
  677. }
  678. public static int rindex(this string self, string sub, int start) {
  679. return rindex(self, sub, start, self.Length);
  680. }
  681. public static int rindex(this string self, string sub, int start, int end) {
  682. int ret = rfind(self, sub, start, end);
  683. if (ret == -1) throw PythonOps.ValueError("substring {0} not found in {1}", sub, self);
  684. return ret;
  685. }
  686. public static string rjust(this string self, int width) {
  687. return rjust(self, width, ' ');
  688. }
  689. public static string rjust(this string self, int width, char fillchar) {
  690. int spaces = width - self.Length;
  691. if (spaces <= 0) return self;
  692. StringBuilder ret = new StringBuilder(width);
  693. ret.Append(fillchar, spaces);
  694. ret.Append(self);
  695. return ret.ToString();
  696. }
  697. public static PythonTuple rpartition(this string self, string sep) {
  698. if (sep == null)
  699. throw PythonOps.TypeError("expected string, got NoneType");
  700. if (sep.Length == 0)
  701. throw PythonOps.ValueError("empty separator");
  702. object[] obj = new object[3] { "", "", "" };
  703. if (self.Length != 0) {
  704. int index = rfind(self, sep);
  705. if (index == -1) {
  706. obj[2] = self;
  707. } else {
  708. obj[0] = self.Substring(0, index);
  709. obj[1] = sep;
  710. obj[2] = self.Substring(index + sep.Length, self.Length - index - sep.Length);
  711. }
  712. }
  713. return new PythonTuple(obj);
  714. }
  715. // when no maxsplit arg is given then just use split
  716. public static List rsplit(this string self) {
  717. return SplitInternal(self, (char[])null, -1);
  718. }
  719. public static List rsplit(this string self, string sep) {
  720. return rsplit(self, sep, -1);
  721. }
  722. public static List rsplit(this string self, string sep, int maxsplit) {
  723. // rsplit works like split but needs to split from the right;
  724. // reverse the original string (and the sep), split, reverse
  725. // the split list and finally reverse each element of the list
  726. string reversed = Reverse(self);
  727. if (sep != null) sep = Reverse(sep);
  728. List temp = null, ret = null;
  729. temp = split(reversed, sep, maxsplit);
  730. temp.reverse();
  731. int resultlen = temp.__len__();
  732. if (resultlen != 0) {
  733. ret = new List(resultlen);
  734. foreach (string s in temp)
  735. ret.AddNoLock(Reverse(s));
  736. } else {
  737. ret = temp;
  738. }
  739. return ret;
  740. }
  741. public static string rstrip(this string self) {
  742. return self.TrimEnd(Whitespace);
  743. }
  744. public static string rstrip(this string self, string chars) {
  745. if (chars == null) return rstrip(self);
  746. return self.TrimEnd(chars.ToCharArray());
  747. }
  748. public static List split(this string self) {
  749. return SplitInternal(self, (char[])null, -1);
  750. }
  751. public static List split(this string self, string sep) {
  752. return split(self, sep, -1);
  753. }
  754. public static List split(this string self, string sep, int maxsplit) {
  755. if (sep == null) {
  756. if (maxsplit == 0) {
  757. // Corner case for CPython compatibility
  758. List result = PythonOps.MakeEmptyList(1);
  759. result.AddNoLock(self.TrimStart());
  760. return result;
  761. } else {
  762. return SplitInternal(self, (char[])null, maxsplit);
  763. }
  764. }
  765. if (sep.Length == 0) {
  766. throw PythonOps.ValueError("empty separator");
  767. } else if (sep.Length == 1) {
  768. return SplitInternal(self, new char[] { sep[0] }, maxsplit);
  769. } else {
  770. return SplitInternal(self, sep, maxsplit);
  771. }
  772. }
  773. public static List splitlines(this string self) {
  774. return splitlines(self, false);
  775. }
  776. public static List splitlines(this string self, bool keepends) {
  777. List ret = new List();
  778. int i, linestart;
  779. for (i = 0, linestart = 0; i < self.Length; i++) {
  780. if (self[i] == '\n' || self[i] == '\r' || self[i] == '\x2028') {
  781. // special case of "\r\n" as end of line marker
  782. if (i < self.Length - 1 && self[i] == '\r' && self[i + 1] == '\n') {
  783. if (keepends)
  784. ret.AddNoLock(self.Substring(linestart, i - linestart + 2));
  785. else
  786. ret.AddNoLock(self.Substring(linestart, i - linestart));
  787. linestart = i + 2;
  788. i++;
  789. } else { //'\r', '\n', or unicode new line as end of line marker
  790. if (keepends)
  791. ret.AddNoLock(self.Substring(linestart, i - linestart + 1));
  792. else
  793. ret.AddNoLock(self.Substring(linestart, i - linestart));
  794. linestart = i + 1;
  795. }
  796. }
  797. }
  798. // the last line needs to be accounted for if it is not empty
  799. if (i - linestart != 0)
  800. ret.AddNoLock(self.Substring(linestart, i - linestart));
  801. return ret;
  802. }
  803. public static bool startswith(this string self, object prefix) {
  804. TryStringOrTuple(prefix);
  805. if (prefix is PythonTuple)
  806. return startswith(self, (PythonTuple)prefix);
  807. else
  808. return startswith(self, CastString(prefix));
  809. }
  810. public static bool startswith(this string self, object prefix, int start) {
  811. TryStringOrTuple(prefix);
  812. if (prefix is PythonTuple)
  813. return startswith(self, (PythonTuple)prefix, start);
  814. else
  815. return startswith(self, CastString(prefix), start);
  816. }
  817. public static bool startswith(this string self, object prefix, int start, int end) {
  818. TryStringOrTuple(prefix);
  819. if (prefix is PythonTuple)
  820. return startswith(self, (PythonTuple)prefix, start, end);
  821. else
  822. return startswith(self, CastString(prefix), start, end);
  823. }
  824. public static string strip(this string self) {
  825. return self.Trim();
  826. }
  827. public static string strip(this string self, string chars) {
  828. if (chars == null) return strip(self);
  829. return self.Trim(chars.ToCharArray());
  830. }
  831. public static string swapcase(this string self) {
  832. StringBuilder ret = new StringBuilder(self);
  833. for (int i = 0; i < ret.Length; i++) {
  834. char ch = ret[i];
  835. if (Char.IsUpper(ch)) ret[i] = Char.ToLower(ch);
  836. else if (Char.IsLower(ch)) ret[i] = Char.ToUpper(ch);
  837. }
  838. return ret.ToString();
  839. }
  840. public static string title(this string self) {
  841. if (self == null || self.Length == 0) return self;
  842. char[] retchars = self.ToCharArray();
  843. bool prevCharCased = false;
  844. bool currCharCased = false;
  845. int i = 0;
  846. do {
  847. if (Char.IsUpper(retchars[i]) || Char.IsLower(retchars[i])) {
  848. if (!prevCharCased)
  849. retchars[i] = Char.ToUpper(retchars[i]);
  850. else
  851. retchars[i] = Char.ToLower(retchars[i]);
  852. currCharCased = true;
  853. } else {
  854. currCharCased = false;
  855. }
  856. i++;
  857. prevCharCased = currCharCased;
  858. }
  859. while (i < retchars.Length);
  860. return new string(retchars);
  861. }
  862. //translate on a unicode string differs from that on an ascii
  863. //for unicode, the table argument is actually a dictionary with
  864. //character ordinals as keys and the replacement strings as values
  865. public static string translate(this string self, PythonDictionary table) {
  866. if (table == null) throw PythonOps.TypeError("expected dictionary or string, got NoneType");
  867. if (self.Length == 0) return self;
  868. StringBuilder ret = new StringBuilder();
  869. for (int i = 0, idx = 0; i < self.Length; i++) {
  870. idx = (int)self[i];
  871. if (table.__contains__(idx))
  872. ret.Append((string)table[idx]);
  873. else
  874. ret.Append(self[i]);
  875. }
  876. return ret.ToString();
  877. }
  878. public static string translate(this string self, string table) {
  879. return translate(self, table, (string)null);
  880. }
  881. public static string translate(this string self, string table, string deletechars) {
  882. if (table == null) {
  883. throw PythonOps.TypeError("expected string, got NoneType");
  884. } else if (table.Length != 256) {
  885. throw PythonOps.ValueError("translation table must be 256 characters long");
  886. } else if (self.Length == 0) {
  887. return self;
  888. }
  889. // List<char> is about 2/3rds as expensive as StringBuilder appending individual
  890. // char's so we use that instead of a StringBuilder
  891. List<char> res = new List<char>();
  892. for (int i = 0; i < self.Length; i++) {
  893. if (deletechars == null || !deletechars.Contains(Char.ToString(self[i]))) {
  894. int idx = (int)self[i];
  895. if (idx >= 0 && idx < 256) {
  896. res.Add(table[idx]);
  897. }
  898. }
  899. }
  900. return new String(res.ToArray());
  901. }
  902. public static string upper(this string self) {
  903. return self.ToUpper();
  904. }
  905. public static string zfill(this string self, int width) {
  906. int spaces = width - self.Length;
  907. if (spaces <= 0) return self;
  908. StringBuilder ret = new StringBuilder(width);
  909. if (self.Length > 0 && IsSign(self[0])) {
  910. ret.Append(self[0]);
  911. ret.Append('0', spaces);
  912. ret.Append(self.Substring(1));
  913. } else {
  914. ret.Append('0', spaces);
  915. ret.Append(self);
  916. }
  917. return ret.ToString();
  918. }
  919. #endregion
  920. #region operators
  921. [SpecialName]
  922. public static string Add(string self, string other) {
  923. return self + other;
  924. }
  925. [SpecialName]
  926. public static string Add(string self, char other) {
  927. return self + other;
  928. }
  929. [SpecialName]
  930. public static string Add(char self, string other) {
  931. return self + other;
  932. }
  933. [SpecialName]
  934. public static string Mod(CodeContext/*!*/ context, string self, object other) {
  935. return new StringFormatter(context, self, other).Format();
  936. }
  937. [SpecialName]
  938. [return: MaybeNotImplemented]
  939. public static object Mod(CodeContext/*!*/ context, object other, string self) {
  940. string str = other as string;
  941. if (str != null) {
  942. return new StringFormatter(context, str, self).Format();
  943. }
  944. Extensible<string> es = other as Extensible<string>;
  945. if (es != null) {
  946. return new StringFormatter(context, es.Value, self).Format();
  947. }
  948. return NotImplementedType.Value;
  949. }
  950. [SpecialName]
  951. public static string Multiply(string s, int count) {
  952. if (count <= 0) return String.Empty;
  953. if (count == 1) return s;
  954. long size = (long)s.Length * (long)count;
  955. if (size > Int32.MaxValue) throw PythonOps.OverflowError("repeated string is too long");
  956. int sz = s.Length;
  957. if (sz == 1) return new string(s[0], count);
  958. StringBuilder ret = new StringBuilder(sz * count);
  959. ret.Insert(0, s, count);
  960. // the above code is MUCH faster than the simple loop
  961. //for (int i=0; i < count; i++) ret.Append(s);
  962. return ret.ToString();
  963. }
  964. [SpecialName]
  965. public static string Multiply(int other, string self) {
  966. return Multiply(self, other);
  967. }
  968. [SpecialName]
  969. public static object Multiply(string self, [NotNull]Index count) {
  970. return PythonOps.MultiplySequence<string>(Multiply, self, count, true);
  971. }
  972. [SpecialName]
  973. public static object Multiply([NotNull]Index count, string self) {
  974. return PythonOps.MultiplySequence<string>(Multiply, self, count, false);
  975. }
  976. [SpecialName]
  977. public static object Multiply(string self, object count) {
  978. int index;
  979. if (Converter.TryConvertToIndex(count, out index)) {
  980. return Multiply(self, index);
  981. }
  982. throw PythonOps.TypeErrorForUnIndexableObject(count);
  983. }
  984. [SpecialName]
  985. public static object Multiply(object count, string self) {
  986. int index;
  987. if (Converter.TryConvertToIndex(count, out index)) {
  988. return Multiply(index, self);
  989. }
  990. throw PythonOps.TypeErrorForUnIndexableObject(count);
  991. }
  992. [SpecialName]
  993. public static bool GreaterThan(string x, string y) {
  994. return string.CompareOrdinal(x, y) > 0;
  995. }
  996. [SpecialName]
  997. public static bool LessThan(string x, string y) {
  998. return string.CompareOrdinal(x, y) < 0;
  999. }
  1000. [SpecialName]
  1001. public static bool LessThanOrEqual(string x, string y) {
  1002. return string.CompareOrdinal(x, y) <= 0;
  1003. }
  1004. [SpecialName]
  1005. public static bool GreaterThanOrEqual(string x, string y) {
  1006. return string.CompareOrdinal(x, y) >= 0;
  1007. }
  1008. [SpecialName]
  1009. public static bool Equals(string x, string y) {
  1010. return string.Equals(x, y);
  1011. }
  1012. [SpecialName]
  1013. public static bool NotEquals(string x, string y) {
  1014. return !string.Equals(x, y);
  1015. }
  1016. #endregion
  1017. [SpecialName, ImplicitConversionMethod]
  1018. public static string ConvertFromChar(char c) {
  1019. return RuntimeHelpers.CharToString(c);
  1020. }
  1021. [SpecialName, ExplicitConversionMethod]
  1022. public static char ConvertToChar(string s) {
  1023. if (s.Length == 1) return s[0];
  1024. throw PythonOps.TypeErrorForTypeMismatch("char", s);
  1025. }
  1026. [SpecialName, ImplicitConversionMethod]
  1027. public static IEnumerable ConvertToIEnumerable(string s) {
  1028. return StringOps.GetEnumerable(s);
  1029. }
  1030. public static int __cmp__(string self, string obj) {
  1031. int ret = string.CompareOrdinal(self, obj);
  1032. return ret == 0 ? 0 : (ret < 0 ? -1 : +1);
  1033. }
  1034. public static int __cmp__(string self, ExtensibleString obj) {
  1035. int ret = string.CompareOrdinal(self, obj.Value);
  1036. return ret == 0 ? 0 : (ret < 0 ? -1 : +1);
  1037. }
  1038. public static int __cmp__(string self, char obj) {
  1039. return (int)(self[0] - (char)obj);
  1040. }
  1041. public static object __getnewargs__(CodeContext/*!*/ context, string self) {
  1042. if (!Object.ReferenceEquals(self, null)) {
  1043. // Cast self to object to avoid exception caused by trying to access SystemState on DefaultContext
  1044. return PythonTuple.MakeTuple(StringOps.__new__(context, TypeCache.String, (object)self));
  1045. }
  1046. throw PythonOps.TypeErrorForBadInstance("__getnewargs__ requires a 'str' object but received a '{0}'", self);
  1047. }
  1048. public static string __str__(string self) {
  1049. return self;
  1050. }
  1051. public static Extensible<string> __str__(ExtensibleString self) {
  1052. return self;
  1053. }
  1054. #region Internal implementation details
  1055. internal static IEnumerable GetEnumerable(string s) {
  1056. // make an enumerator that produces strings instead of chars
  1057. return new PythonStringEnumerable(s);
  1058. }
  1059. internal static string Quote(string s) {
  1060. bool isUnicode = false;
  1061. StringBuilder b = new StringBuilder(s.Length + 5);
  1062. char quote = '\'';
  1063. if (s.IndexOf('\'') != -1 && s.IndexOf('\"') == -1) {
  1064. quote = '\"';
  1065. }
  1066. b.Append(quote);
  1067. b.Append(ReprEncode(s, quote, ref isUnicode));
  1068. b.Append(quote);
  1069. if (isUnicode) return "u" + b.ToString();
  1070. return b.ToString();
  1071. }
  1072. internal static string ReprEncode(string s, ref bool isUnicode) {
  1073. return ReprEncode(s, (char)0, ref isUnicode);
  1074. }
  1075. internal static bool TryGetEncoding(string name, out Encoding encoding) {
  1076. #if SILVERLIGHT // EncodingInfo
  1077. switch (NormalizeEncodingName(name)) {
  1078. case "us_ascii":
  1079. case "ascii": encoding = PythonAsciiEncoding.Instance; return true;
  1080. case "utf_8": encoding = (Encoding)new EncodingWrapper(Encoding.UTF8, new byte[0]).Clone(); return true;
  1081. case "utf_16_le": encoding = (Encoding)new EncodingWrapper(Encoding.Unicode, new byte[0]).Clone(); return true;
  1082. case "utf_16_be": encoding = (Encoding)new EncodingWrapper(Encoding.BigEndianUnicode, new byte[0]).Clone(); return true;
  1083. case "utf_8_sig": encoding = Encoding.UTF8; return true;
  1084. }
  1085. #else
  1086. name = NormalizeEncodingName(name);
  1087. EncodingInfoWrapper encInfo;
  1088. if (CodecsInfo.Codecs.TryGetValue(name, out encInfo)) {
  1089. encoding = (Encoding)encInfo.GetEncoding().Clone();
  1090. return true;
  1091. }
  1092. #endif
  1093. encoding = null;
  1094. return false;
  1095. }
  1096. internal static byte[] ToByteArray(string s) {
  1097. byte[] ret = new byte[s.Length];
  1098. for (int i = 0; i < s.Length; i++) {
  1099. if (s[i] < 0x100) ret[i] = (byte)s[i];
  1100. else throw PythonOps.UnicodeDecodeError("'ascii' codec can't decode byte {0:X} in position {1}: ordinal not in range", (int)ret[i], i);
  1101. }
  1102. return ret;
  1103. }
  1104. internal static string FromByteArray(byte[] bytes) {
  1105. return FromByteArray(bytes, bytes.Length);
  1106. }
  1107. internal static string FromByteArray(byte[]preamble, byte[] bytes) {
  1108. char[] chars = new char[preamble.Length + bytes.Length];
  1109. for (int i = 0; i < preamble.Length; i++) {
  1110. chars[i] = (char)preamble[i];
  1111. }
  1112. for (int i = 0; i < bytes.Length; i++) {
  1113. chars[i + preamble.Length] = (char)bytes[i];
  1114. }
  1115. return new String(chars);
  1116. }
  1117. internal static string FromByteArray(byte[] bytes, int maxBytes) {
  1118. int bytesToCopy = Math.Min(bytes.Length, maxBytes);
  1119. StringBuilder b = new StringBuilder(bytesToCopy);
  1120. for (int i = 0; i < bytesToCopy; i++) {
  1121. b.Append((char)bytes[i]);
  1122. }
  1123. return b.ToString();
  1124. }
  1125. internal static string RawUnicodeEscapeEncode(string s) {
  1126. // in the common case we don't need to encode anything, so we
  1127. // lazily create the StringBuilder only if necessary.
  1128. StringBuilder b = null;
  1129. for (int i = 0; i < s.Length; i++) {
  1130. char ch = s[i];
  1131. if (ch > 0xff) {
  1132. ReprInit(ref b, s, i);
  1133. b.AppendFormat("\\u{0:x4}", (int)ch);
  1134. } else if (b != null) {
  1135. b.Append(ch);
  1136. }
  1137. }
  1138. if (b == null) return s;
  1139. return b.ToString();
  1140. }
  1141. #endregion
  1142. #region Private implementation details
  1143. private static void AppendJoin(object value, int index, StringBuilder sb) {
  1144. string strVal;
  1145. if ((strVal = value as string) != null) {
  1146. sb.Append(strVal);
  1147. } else if (Converter.TryConvertToString(value, out strVal) && strVal != null) {
  1148. sb.Append(strVal);
  1149. } else {
  1150. throw PythonOps.TypeError("sequence item {0}: expected string, {1} found", index.ToString(), PythonOps.GetPythonTypeName(value));
  1151. }
  1152. }
  1153. private static string ReplaceEmpty(string self, string new_, int maxsplit) {
  1154. if (maxsplit == 0) return self;
  1155. string v = self;
  1156. int max = maxsplit > v.Length ? v.Length : maxsplit;
  1157. StringBuilder ret = new StringBuilder(v.Length * (new_.Length + 1));
  1158. for (int i = 0; i < max; i++) {
  1159. ret.Append(new_);
  1160. ret.Append(v[i]);
  1161. }
  1162. for (int i = max; i < v.Length; i++) {
  1163. ret.Append(v[i]);
  1164. }
  1165. if (maxsplit > max) {
  1166. ret.Append(new_);
  1167. }
  1168. return ret.ToString();
  1169. }
  1170. private static string Reverse(string s) {
  1171. if (s.Length == 0 || s.Length == 1) return s;
  1172. char[] chars = s.ToCharArray();
  1173. char[] rchars = new char[s.Length];
  1174. for (int i = s.Length - 1, j = 0; i >= 0; i--, j++) {
  1175. rchars[j] = chars[i];
  1176. }
  1177. return new string(rchars);
  1178. }
  1179. private static string ReprEncode(string s, char quote, ref bool isUnicode) {
  1180. // in the common case we don't need to encode anything, so we
  1181. // lazily create the StringBuilder only if necessary.
  1182. StringBuilder b = null;
  1183. for (int i = 0; i < s.Length; i++) {
  1184. char ch = s[i];
  1185. if (ch >= LowestUnicodeValue) isUnicode = true;
  1186. switch (ch) {
  1187. case '\\': ReprInit(ref b, s, i); b.Append("\\\\"); break;
  1188. case '\t': ReprInit(ref b, s, i); b.Append("\\t"); break;
  1189. case '\n': ReprInit(ref b, s, i); b.Append("\\n"); break;
  1190. case '\r': ReprInit(ref b, s, i); b.Append("\\r"); break;
  1191. default:
  1192. if (quote != 0 && ch == quote) {
  1193. ReprInit(ref b, s, i);
  1194. b.Append('\\'); b.Append(ch);
  1195. } else if (ch < ' ' || (ch >= 0x7f && ch <= 0xff)) {
  1196. ReprInit(ref b, s, i);
  1197. b.AppendFormat("\\x{0:x2}", (int)ch);
  1198. } else if (ch > 0xff) {
  1199. ReprInit(ref b, s, i);
  1200. b.AppendFormat("\\u{0:x4}", (int)ch);
  1201. } else if (b != null) {
  1202. b.Append(ch);
  1203. }
  1204. break;
  1205. }
  1206. }
  1207. if (b == null) return s;
  1208. return b.ToString();
  1209. }
  1210. private static void ReprInit(ref StringBuilder sb, string s, int c) {
  1211. if (sb != null) return;
  1212. sb = new StringBuilder(s, 0, c, s.Length);
  1213. }
  1214. private static bool IsSign(char ch) {
  1215. return ch == '+' || ch == '-';
  1216. }
  1217. internal static string GetEncodingName(Encoding encoding) {
  1218. #if !SILVERLIGHT
  1219. string name = null;
  1220. // if we have a valid code page try and get a reasonable name. The
  1221. // web names / mail displays match tend to CPython's terse names
  1222. if (encoding.CodePage != 0) {
  1223. if (encoding.IsBrowserDisplay) {
  1224. name = encoding.WebName;
  1225. }
  1226. if (name == null && encoding.IsMailNewsDisplay) {
  1227. name = encoding.HeaderName;
  1228. }
  1229. // otherwise use a code page number which also matches CPython
  1230. if (name == null) {
  1231. name = "cp" + encoding.CodePage;
  1232. }
  1233. }
  1234. if (name == null) {
  1235. // otherwise just finally fall back to the human readable name
  1236. name = encoding.EncodingName;
  1237. }
  1238. #else
  1239. // Silverlight only has web names
  1240. string name = encoding.WebName;
  1241. #endif
  1242. return NormalizeEncodingName(name);
  1243. }
  1244. internal static string NormalizeEncodingName(string name) {
  1245. if (name == null) {
  1246. return null;
  1247. }
  1248. return name.ToLower().Replace('-', '_').Replace(' ', '_');
  1249. }
  1250. private static string RawDecode(CodeContext/*!*/ context, string s, object encodingType, string errors) {
  1251. PythonContext pc = PythonContext.GetContext(context);
  1252. string encoding = encodingType as string;
  1253. if (encoding == null) {
  1254. if (encodingType == Missing.Value) {
  1255. encoding = pc.GetDefaultEncodingName();
  1256. } else {
  1257. throw PythonOps.TypeError("decode() expected string, got '{0}'", DynamicHelpers.GetPythonType(encodingType).Name);
  1258. }
  1259. }
  1260. string normalizedName = NormalizeEncodingName(encoding);
  1261. if ("raw_unicode_escape" == normalizedName) {
  1262. return LiteralParser.ParseString(s, true, true);
  1263. } else if ("unicode_escape" == normalizedName) {
  1264. return LiteralParser.ParseString(s, false, true);
  1265. } else if ("string_escape" == normalizedName) {
  1266. return LiteralParser.ParseString(s, false, false);
  1267. }
  1268. Encoding e;
  1269. if (TryGetEncoding(encoding, out e)) {
  1270. #if !SILVERLIGHT // DecoderFallback
  1271. // CLR's encoder exceptions have a 1-1 mapping w/ Python's encoder exceptions
  1272. // so we just clone the encoding & set the fallback to throw in strict mode.
  1273. e = (Encoding)e.Clone();
  1274. switch (errors) {
  1275. case "backslashreplace":
  1276. case "xmlcharrefreplace":
  1277. case "strict": e.DecoderFallback = DecoderFallback.ExceptionFallback; break;
  1278. case "replace": e.DecoderFallback = DecoderFallback.ReplacementFallback; break;
  1279. case "ignore":
  1280. e.DecoderFallback = new PythonDecoderFallback(encoding,
  1281. s,
  1282. null);
  1283. break;
  1284. default:
  1285. e.DecoderFallback = new PythonDecoderFallback(encoding,
  1286. s,
  1287. PythonOps.LookupEncodingError(context, errors));
  1288. break;
  1289. }
  1290. #endif
  1291. byte[] bytes = ToByteArray(s);
  1292. int start = GetStartingOffset(e, bytes);
  1293. return e.GetString(bytes, start, bytes.Length - start);
  1294. }
  1295. // look for user-registered codecs
  1296. PythonTuple codecTuple = PythonOps.LookupEncoding(context, encoding);
  1297. if (codecTuple != null) {
  1298. return UserDecodeOrEncode(codecTuple[/*Modules.PythonCodecs.DecoderIndex*/1], s);
  1299. }
  1300. throw PythonOps.LookupError("unknown encoding: {0}", encoding);
  1301. }
  1302. /// <summary>
  1303. /// Gets the starting offset checking to see if the incoming bytes already include a preamble.
  1304. /// </summary>
  1305. private static int GetStartingOffset(Encoding e, byte[] bytes) {
  1306. byte[] preamble = e.GetPreamble();
  1307. int start = 0;
  1308. if (bytes.Length >= preamble.Length) {
  1309. bool differ = false;
  1310. for (int i = 0; i < preamble.Length; i++) {
  1311. if (bytes[i] != preamble[i]) {
  1312. differ = true;
  1313. }
  1314. }
  1315. if (!differ) {
  1316. start = preamble.Length;
  1317. }
  1318. }
  1319. return start;
  1320. }
  1321. private static string RawEncode(CodeContext/*!*/ context, string s, object encodingType, string errors) {
  1322. string encoding = encodingType as string;
  1323. if (encoding == null) {
  1324. if (encodingType == Missing.Value) {
  1325. encoding = PythonContext.GetContext(context).GetDefaultEncodingName();
  1326. } else {
  1327. throw PythonOps.TypeError("encode() expected string, got '{0}'", DynamicHelpers.GetPythonType(encodingType).Name);
  1328. }
  1329. }
  1330. string normalizedName = NormalizeEncodingName(encoding);
  1331. if ("raw_unicode_escape" == normalizedName) {
  1332. return RawUnicodeEscapeEncode(s);
  1333. } else if ("unicode_escape" == normalizedName || "string_escape" == normalizedName) {
  1334. bool dummy = false;
  1335. return ReprEncode(s, '\'', ref dummy);
  1336. }
  1337. Encoding e;
  1338. if (TryGetEncoding(encoding, out e)) {
  1339. #if !SILVERLIGHT
  1340. // CLR's encoder exceptions have a 1-1 mapping w/ Python's encoder exceptions
  1341. // so we just clone the encoding & set the fallback to throw in strict mode
  1342. e = (Encoding)e.Clone();
  1343. switch (errors) {
  1344. case "strict": e.EncoderFallback = EncoderFallback.ExceptionFallback; break;
  1345. case "replace": e.EncoderFallback = EncoderFallback.ReplacementFallback; break;
  1346. case "backslashreplace": e.EncoderFallback = new BackslashEncoderReplaceFallback(); break;
  1347. case "xmlcharrefreplace": e.EncoderFallback = new XmlCharRefEncoderReplaceFallback(); break;
  1348. case "ignore":
  1349. e.EncoderFallback = new PythonEncoderFallback(encoding,
  1350. s,
  1351. null);
  1352. break;
  1353. default:
  1354. e.EncoderFallback = new PythonEncoderFallback(encoding,
  1355. s,
  1356. PythonOps.LookupEncodingError(context, errors));
  1357. break;
  1358. }
  1359. #endif
  1360. return FromByteArray(e.GetPreamble(), e.GetBytes(s));
  1361. }
  1362. // look for user-registered codecs
  1363. PythonTuple codecTuple = PythonOps.LookupEncoding(context, encoding);
  1364. if (codecTuple != null) {
  1365. return UserDecodeOrEncode(codecTuple[/*Modules.PythonCodecs.EncoderIndex*/0], s);
  1366. }
  1367. throw PythonOps.LookupError("unknown encoding: {0}", encoding);
  1368. }
  1369. private static string UserDecodeOrEncode(object function, string data) {
  1370. object res = PythonCalls.Call(function, data);
  1371. string strRes = AsString(res);
  1372. if (strRes != null) return strRes;
  1373. // tuple is string, bytes used, we just want the string...
  1374. PythonTuple t = res as PythonTuple;
  1375. if (t == null) throw PythonOps.TypeErrorForBadInstance("expected tuple, but found {0}", res);
  1376. return Converter.ConvertToString(t[0]);
  1377. }
  1378. #if !SILVERLIGHT
  1379. class CodecsInfo {
  1380. public static readonly Dictionary<string, EncodingInfoWrapper> Codecs = MakeCodecsDict();
  1381. private static Dictionary<string, EncodingInfoWrapper> MakeCodecsDict() {
  1382. Dictionary<string, EncodingInfoWrapper> d = new Dictionary<string, EncodingInfoWrapper>();
  1383. EncodingInfo[] encs = Encoding.GetEncodings();
  1384. for (int i = 0; i < encs.Length; i++) {
  1385. string normalizedName = NormalizeEncodingName(encs[i].Name);
  1386. // setup well-known mappings, for everything
  1387. // else we'll store as lower case w/ _
  1388. switch (normalizedName) {
  1389. case "us_ascii":
  1390. d["cp" + encs[i].CodePage.ToString()] = d[normalizedName] = d["us"] = d["ascii"] = d["646"] = d["us_ascii"] = new AsciiEncodingInfoWrapper();
  1391. continue;
  1392. case "iso_8859_1":
  1393. d["8859"] = d["latin_1"] = d["latin1"] = d["iso 8859_1"] = d["iso8859_1"] = d["cp819"] = d["819"] = d["latin"] = d["latin1"] = d["l1"] = encs[i];
  1394. break;
  1395. case "utf_7":
  1396. d["u7"] = d["unicode-1-1-utf-7"] = encs[i];
  1397. break;
  1398. case "utf_8":
  1399. d["utf_8_sig"] = encs[i];
  1400. d["utf_8"] = d["utf8"] = d["u8"] = new EncodingInfoWrapper(encs[i], new byte[0]);
  1401. continue;
  1402. case "utf_16":
  1403. d["utf_16_le"] = d["utf_16le"] = new EncodingInfoWrapper(encs[i], new byte[0]);
  1404. break;
  1405. case "unicodefffe": // big endian unicode
  1406. // strip off the pre-amble, CPython doesn't include it.
  1407. d["utf_16_be"] = d["utf_16be"] = new EncodingInfoWrapper(encs[i], new byte[0]);
  1408. break;
  1409. }
  1410. // publish under normalized name (all lower cases, -s replaced with _s)
  1411. d[normalizedName] = encs[i];
  1412. // publish under Windows code page as well...
  1413. d["windows-" + encs[i].GetEncoding().WindowsCodePage.ToString()] = encs[i];
  1414. // publish under code page number as well...
  1415. d["cp" + encs[i].CodePage.ToString()] = d[encs[i].CodePage.ToString()] = encs[i];
  1416. }
  1417. d["raw_unicode_escape"] = new EncodingInfoWrapper(new UnicodeEscapeEncoding(true));
  1418. d["unicode_escape"] = new EncodingInfoWrapper(new UnicodeEscapeEncoding(false));
  1419. #if DEBUG
  1420. // all codecs should be stored in lowercase because we only look up from lowercase strings
  1421. foreach (KeyValuePair<string, EncodingInfoWrapper> kvp in d) {
  1422. Debug.Assert(kvp.Key.ToLower() == kvp.Key);
  1423. }
  1424. #endif
  1425. return d;
  1426. }
  1427. }
  1428. class EncodingInfoWrapper {
  1429. private EncodingInfo _info;
  1430. private Encoding _encoding;
  1431. private byte[] _preamble;
  1432. public EncodingInfoWrapper(Encoding enc) {
  1433. _encoding = enc;
  1434. }
  1435. public EncodingInfoWrapper(EncodingInfo info) {
  1436. _info = info;
  1437. }
  1438. public EncodingInfoWrapper(EncodingInfo info, byte[] preamble) {
  1439. _info = info;
  1440. _preamble = preamble;
  1441. }
  1442. public virtual Encoding GetEncoding() {
  1443. if(_encoding != null) return _encoding;
  1444. if (_preamble == null) {
  1445. return _info.GetEncoding();
  1446. }
  1447. return new EncodingWrapper(_info.GetEncoding(), _preamble);
  1448. }
  1449. public static implicit operator EncodingInfoWrapper(EncodingInfo info) {
  1450. return new EncodingInfoWrapper(info);
  1451. }
  1452. }
  1453. class AsciiEncodingInfoWrapper : EncodingInfoWrapper {
  1454. public AsciiEncodingInfoWrapper()
  1455. : base((EncodingInfo)null) {
  1456. }
  1457. public override Encoding GetEncoding() {
  1458. return PythonAsciiEncoding.Instance;
  1459. }
  1460. }
  1461. #endif
  1462. class EncodingWrapper : Encoding {
  1463. private byte[] _preamble;
  1464. private Encoding _encoding;
  1465. public EncodingWrapper(Encoding encoding, byte[] preamable) {
  1466. _preamble = preamable;
  1467. _encoding = encoding;
  1468. }
  1469. private void SetEncoderFallback() {
  1470. #if !SILVERLIGHT
  1471. _encoding.EncoderFallback = EncoderFallback;
  1472. #endif
  1473. }
  1474. private void SetDecoderFallback() {
  1475. #if !SILVERLIGHT
  1476. _encoding.DecoderFallback = DecoderFallback;
  1477. #endif
  1478. }
  1479. public override int GetByteCount(char[] chars, int index, int count) {
  1480. SetEncoderFallback();
  1481. return _encoding.GetByteCount(chars, index, count);
  1482. }
  1483. public override int GetBytes(char[] chars, int charIndex, int charCount, byte[] bytes, int byteIndex) {
  1484. SetEncoderFallback();
  1485. return _encoding.GetBytes(chars, charIndex, charCount, bytes, byteIndex);
  1486. }
  1487. public override int GetCharCount(byte[] bytes, int index, int count) {
  1488. SetDecoderFallback();
  1489. return _encoding.GetCharCount(bytes, index, count);
  1490. }
  1491. public override int GetChars(byte[] bytes, int byteIndex, int byteCount, char[] chars, int charIndex) {
  1492. SetDecoderFallback();
  1493. return _encoding.GetChars(bytes, byteIndex, byteCount, chars, charIndex);
  1494. }
  1495. public override int GetMaxByteCount(int charCount) {
  1496. SetEncoderFallback();
  1497. return _encoding.GetMaxByteCount(charCount);
  1498. }
  1499. public override int GetMaxCharCount(int byteCount) {
  1500. SetDecoderFallback();
  1501. return _encoding.GetMaxCharCount(byteCount);
  1502. }
  1503. public override byte[] GetPreamble() {
  1504. return _preamble;
  1505. }
  1506. public override object Clone() {
  1507. // need to call base.Clone to be marked as read/write
  1508. EncodingWrapper res = (EncodingWrapper)base.Clone();
  1509. res._encoding = (Encoding)_encoding.Clone();
  1510. return res;
  1511. }
  1512. }
  1513. private static List SplitEmptyString(bool separators) {
  1514. List ret = PythonOps.MakeEmptyList(1);
  1515. if (separators) {
  1516. ret.AddNoLock(String.Empty);
  1517. }
  1518. return ret;
  1519. }
  1520. private static List SplitInternal(string self, char[] seps, int maxsplit) {
  1521. if (self == String.Empty) {
  1522. return SplitEmptyString(seps != null);
  1523. } else {
  1524. string[] r = null;
  1525. // If the optional second argument sep is absent or None, the words are separated
  1526. // by arbitrary strings of whitespace characters (space, tab, newline, return, formfeed);
  1527. r = StringUtils.Split(self, seps, (maxsplit < 0) ? Int32.MaxValue : maxsplit + 1,
  1528. (seps == null) ? StringSplitOptions.RemoveEmptyEntries : StringSplitOptions.None);
  1529. List ret = PythonOps.MakeEmptyList(r.Length);
  1530. foreach (string s in r) ret.AddNoLock(s);
  1531. return ret;
  1532. }
  1533. }
  1534. private static List SplitInternal(string self, string separator, int maxsplit) {
  1535. if (self == String.Empty) {
  1536. return SplitEmptyString(separator != null);
  1537. } else {
  1538. string[] r = StringUtils.Split(self, separator, (maxsplit < 0) ? Int32.MaxValue : maxsplit + 1, StringSplitOptions.None);
  1539. List ret = PythonOps.MakeEmptyList(r.Length);
  1540. foreach (string s in r) ret.AddNoLock(s);
  1541. return ret;
  1542. }
  1543. }
  1544. private static void TryStringOrTuple(object prefix) {
  1545. if (prefix == null) throw PythonOps.TypeError("expected string or Tuple, got NoneType");
  1546. if (!(prefix is string) && !(prefix is PythonTuple) && !(prefix is Extensible<string>))
  1547. throw PythonOps.TypeError("expected string or Tuple, got {0} Type", prefix.GetType());
  1548. }
  1549. private static string GetString(object obj) {
  1550. string ret = AsString(obj);
  1551. if (ret == null) {
  1552. throw PythonOps.TypeError("expected string, got {0}", DynamicHelpers.GetPythonType(obj).Name);
  1553. }
  1554. return ret;
  1555. }
  1556. private static bool endswith(string self, string suffix) {
  1557. return self.EndsWith(suffix);
  1558. }
  1559. // Indexing is 0-based. Need to deal with negative indices
  1560. // (which mean count backwards from end of sequence)
  1561. // +---+---+---+---+---+
  1562. // | a | b | c | d | e |
  1563. // +---+---+---+---+---+
  1564. // 0 1 2 3 4
  1565. // -5 -4 -3 -2 -1
  1566. private static bool endswith(string self, string suffix, int start) {
  1567. int len = self.Length;
  1568. if (start > len) return false;
  1569. // map the negative indice to its positive counterpart
  1570. if (start < 0) {
  1571. start += len;
  1572. if (start < 0) start = 0;
  1573. }
  1574. return self.Substring(start).EndsWith(suffix);
  1575. }
  1576. // With optional start, test beginning at that position (the char at that index is
  1577. // included in the test). With optional end, stop comparing at that position (the
  1578. // char at that index is not included in the test)
  1579. private static bool endswith(string self, string suffix, int start, int end) {
  1580. int len = self.Length;
  1581. if (start > len) return false;
  1582. // map the negative indices to their positive counterparts
  1583. else if (start < 0) {
  1584. start += len;
  1585. if (start < 0) start = 0;
  1586. }
  1587. if (end >= len) return self.Substring(start).EndsWith(suffix);
  1588. else if (end < 0) {
  1589. end += len;
  1590. if (end < 0) return false;
  1591. }
  1592. if (end < start) return false;
  1593. return self.Substring(start, end - start).EndsWith(suffix);
  1594. }
  1595. private static bool endswith(string self, PythonTuple suffix) {
  1596. foreach (object obj in suffix) {
  1597. if (self.EndsWith(GetString(obj))) {
  1598. return true;
  1599. }
  1600. }
  1601. return false;
  1602. }
  1603. private static bool endswith(string self, PythonTuple suffix, int start) {
  1604. foreach (object obj in suffix) {
  1605. if (endswith(self, GetString(obj), start)) {
  1606. return true;
  1607. }
  1608. }
  1609. return false;
  1610. }
  1611. private static bool endswith(string self, PythonTuple suffix, int start, int end) {
  1612. foreach (object obj in suffix) {
  1613. if (endswith(self, GetString(obj), start, end)) {
  1614. return true;
  1615. }
  1616. }
  1617. return false;
  1618. }
  1619. private static bool startswith(string self, string prefix) {
  1620. return self.StartsWith(prefix);
  1621. }
  1622. private static bool startswith(string self, string prefix, int start) {
  1623. int len = self.Length;
  1624. if (start > len) return false;
  1625. if (start < 0) {
  1626. start += len;
  1627. if (start < 0) start = 0;
  1628. }
  1629. return self.Substring(start).StartsWith(prefix);
  1630. }
  1631. private static bool startswith(string self, string prefix, int start, int end) {
  1632. int len = self.Length;
  1633. if (start > len) return false;
  1634. // map the negative indices to their positive counterparts
  1635. else if (start < 0) {
  1636. start += len;
  1637. if (start < 0) start = 0;
  1638. }
  1639. if (end >= len) return self.Substring(start).StartsWith(prefix);
  1640. else if (end < 0) {
  1641. end += len;
  1642. if (end < 0) return false;
  1643. }
  1644. if (end < start) return false;
  1645. return self.Substring(start, end - start).StartsWith(prefix);
  1646. }
  1647. private static bool startswith(string self, PythonTuple prefix) {
  1648. foreach (object obj in prefix) {
  1649. if (self.StartsWith(GetString(obj))) {
  1650. return true;
  1651. }
  1652. }
  1653. return false;
  1654. }
  1655. private static bool startswith(string self, PythonTuple prefix, int start) {
  1656. foreach (object obj in prefix) {
  1657. if (startswith(self, GetString(obj), start)) {
  1658. return true;
  1659. }
  1660. }
  1661. return false;
  1662. }
  1663. private static bool startswith(string self, PythonTuple prefix, int start, int end) {
  1664. foreach (object obj in prefix) {
  1665. if (startswith(self, GetString(obj), start, end)) {
  1666. return true;
  1667. }
  1668. }
  1669. return false;
  1670. }
  1671. private class PythonStringEnumerable : IEnumerable {
  1672. private readonly string/*!*/ _s;
  1673. public PythonStringEnumerable(string s) {
  1674. Assert.NotNull(s);
  1675. _s = s;
  1676. }
  1677. #region IEnumerable Members
  1678. public IEnumerator GetEnumerator() {
  1679. return StringEnumerator(_s);
  1680. }
  1681. #endregion
  1682. }
  1683. internal static IEnumerator<string> StringEnumerator(string str) {
  1684. for (int i = 0; i < str.Length; i++) {
  1685. yield return RuntimeHelpers.CharToString(str[i]);
  1686. }
  1687. }
  1688. #endregion
  1689. #region Unicode Encode/Decode Fallback Support
  1690. #if !SILVERLIGHT // EncoderFallbackBuffer
  1691. /// When encoding or decoding strings if an error occurs CPython supports several different
  1692. /// behaviors, in addition it supports user-extensible behaviors as well. For the default
  1693. /// behavior we're ok - both of us support throwing and replacing. For custom behaviors
  1694. /// we define a single fallback for decoding and encoding that calls the python function to do
  1695. /// the replacement.
  1696. ///
  1697. /// When we do the replacement we call the provided handler w/ a UnicodeEncodeError or UnicodeDecodeError
  1698. /// object which contains:
  1699. /// encoding (string, the encoding the user requested)
  1700. /// end (the end of the invalid characters)
  1701. /// object (the original string being decoded)
  1702. /// reason (the error, e.g. 'unexpected byte code', not sure of others)
  1703. /// start (the start of the invalid sequence)
  1704. ///
  1705. /// The decoder returns a tuple of (unicode, int) where unicode is the replacement string
  1706. /// and int is an index where encoding should continue.
  1707. private class PythonEncoderFallbackBuffer : EncoderFallbackBuffer {
  1708. private object _function;
  1709. private string _encoding, _strData;
  1710. private string _buffer;
  1711. private int _bufferIndex;
  1712. public PythonEncoderFallbackBuffer(string encoding, string str, object callable) {
  1713. _function = callable;
  1714. _strData = str;
  1715. this._encoding = encoding;
  1716. }
  1717. public override bool Fallback(char charUnknown, int index) {
  1718. return DoPythonFallback(index, 1);
  1719. }
  1720. public override bool Fallback(char charUnknownHigh, char charUnknownLow, int index) {
  1721. return DoPythonFallback(index, 2);
  1722. }
  1723. public override char GetNextChar() {
  1724. if (_buffer == null || _bufferIndex >= _buffer.Length) return Char.MinValue;
  1725. return _buffer[_bufferIndex++];
  1726. }
  1727. public override bool MovePrevious() {
  1728. if (_bufferIndex > 0) {
  1729. _bufferIndex--;
  1730. return true;
  1731. }
  1732. return false;
  1733. }
  1734. public override int Remaining {
  1735. get {
  1736. if (_buffer == null) return 0;
  1737. return _buffer.Length - _bufferIndex;
  1738. }
  1739. }
  1740. public override void Reset() {
  1741. _buffer = null;
  1742. _bufferIndex = 0;
  1743. base.Reset();
  1744. }
  1745. private bool DoPythonFallback(int index, int length) {
  1746. if (_function != null) {
  1747. // create the exception object to hand to the user-function...
  1748. PythonExceptions._UnicodeEncodeError exObj = new PythonExceptions._UnicodeEncodeError();
  1749. exObj.__init__(_encoding, _strData, index, index + length, "unexpected code byte");
  1750. // call the user function...
  1751. object res = PythonCalls.Call(_function, exObj);
  1752. string replacement = PythonDecoderFallbackBuffer.CheckReplacementTuple(res, "encoding");
  1753. // finally process the user's request.
  1754. _buffer = replacement;
  1755. _bufferIndex = 0;
  1756. return true;
  1757. }
  1758. return false;
  1759. }
  1760. }
  1761. class PythonEncoderFallback : EncoderFallback {
  1762. private object _function;
  1763. private string _str;
  1764. private string _enc;
  1765. public PythonEncoderFallback(string encoding, string data, object callable) {
  1766. _function = callable;
  1767. _str = data;
  1768. _enc = encoding;
  1769. }
  1770. public override EncoderFallbackBuffer CreateFallbackBuffer() {
  1771. return new PythonEncoderFallbackBuffer(_enc, _str, _function);
  1772. }
  1773. public override int MaxCharCount {
  1774. get { return Int32.MaxValue; }
  1775. }
  1776. }
  1777. private class PythonDecoderFallbackBuffer : DecoderFallbackBuffer {
  1778. private object _function;
  1779. private string _encoding, _strData;
  1780. private string _buffer;
  1781. private int _bufferIndex;
  1782. public PythonDecoderFallbackBuffer(string encoding, string str, object callable) {
  1783. this._encoding = encoding;
  1784. this._strData = str;
  1785. this._function = callable;
  1786. }
  1787. public override int Remaining {
  1788. get {
  1789. if (_buffer == null) return 0;
  1790. return _buffer.Length - _bufferIndex;
  1791. }
  1792. }
  1793. public override char GetNextChar() {
  1794. if (_buffer == null || _bufferIndex >= _buffer.Length) return Char.MinValue;
  1795. return _buffer[_bufferIndex++];
  1796. }
  1797. public override bool MovePrevious() {
  1798. if (_bufferIndex > 0) {
  1799. _bufferIndex--;
  1800. return true;
  1801. }
  1802. return false;
  1803. }
  1804. public override void Reset() {
  1805. _buffer = null;
  1806. _bufferIndex = 0;
  1807. base.Reset();
  1808. }
  1809. public override bool Fallback(byte[] bytesUnknown, int index) {
  1810. if (_function != null) {
  1811. // create the exception object to hand to the user-function...
  1812. PythonExceptions._UnicodeDecodeError exObj = new PythonExceptions._UnicodeDecodeError();
  1813. exObj.__init__(_encoding, _strData, index, index + bytesUnknown.Length, "unexpected code byte");
  1814. // call the user function...
  1815. object res = PythonCalls.Call(_function, exObj);
  1816. string replacement = CheckReplacementTuple(res, "decoding");
  1817. // finally process the user's request.
  1818. _buffer = replacement;
  1819. _bufferIndex = 0;
  1820. return true;
  1821. }
  1822. return false;
  1823. }
  1824. internal static string CheckReplacementTuple(object res, string encodeOrDecode) {
  1825. bool ok = true;
  1826. string replacement = null;
  1827. PythonTuple tres = res as PythonTuple;
  1828. // verify the result is sane...
  1829. if (tres != null && tres.__len__() == 2) {
  1830. if (!Converter.TryConvertToString(tres[0], out replacement)) ok = false;
  1831. if (ok) {
  1832. int bytesSkipped;
  1833. if (!Converter.TryConvertToInt32(tres[1], out bytesSkipped)) ok = false;
  1834. }
  1835. } else {
  1836. ok = false;
  1837. }
  1838. if (!ok) throw PythonOps.TypeError("{1} error handler must return tuple containing (str, int), got {0}", PythonOps.GetPythonTypeName(res), encodeOrDecode);
  1839. return replacement;
  1840. }
  1841. }
  1842. class PythonDecoderFallback : DecoderFallback {
  1843. private object function;
  1844. private string str;
  1845. private string enc;
  1846. public PythonDecoderFallback(string encoding, string data, object callable) {
  1847. function = callable;
  1848. str = data;
  1849. enc = encoding;
  1850. }
  1851. public override DecoderFallbackBuffer CreateFallbackBuffer() {
  1852. return new PythonDecoderFallbackBuffer(enc, str, function);
  1853. }
  1854. public override int MaxCharCount {
  1855. get { throw new NotImplementedException(); }
  1856. }
  1857. }
  1858. class BackslashEncoderReplaceFallback : EncoderFallback {
  1859. class BackslashReplaceFallbackBuffer : EncoderFallbackBuffer {
  1860. private List<char> _buffer = new List<char>();
  1861. private int _index;
  1862. public override bool Fallback(char charUnknownHigh, char charUnknownLow, int index) {
  1863. return false;
  1864. }
  1865. public override bool Fallback(char charUnknown, int index) {
  1866. _buffer.Add('\\');
  1867. int val = (int)charUnknown;
  1868. if (val > 0xFF) {
  1869. _buffer.Add('u');
  1870. AddCharacter(val >> 8);
  1871. AddCharacter(val & 0xFF);
  1872. } else {
  1873. _buffer.Add('x');
  1874. AddCharacter(charUnknown);
  1875. }
  1876. return true;
  1877. }
  1878. private void AddCharacter(int val) {
  1879. AddOneDigit(((val) & 0xF0) >> 4);
  1880. AddOneDigit(val & 0x0F);
  1881. }
  1882. private void AddOneDigit(int val) {
  1883. if (val > 9) {
  1884. _buffer.Add((char)('a' + val - 0x0A));
  1885. } else {
  1886. _buffer.Add((char)('0' + val));
  1887. }
  1888. }
  1889. public override char GetNextChar() {
  1890. if (_index == _buffer.Count) return Char.MinValue;
  1891. return _buffer[_index++];
  1892. }
  1893. public override bool MovePrevious() {
  1894. if (_index > 0) {
  1895. _index--;
  1896. return true;
  1897. }
  1898. return false;
  1899. }
  1900. public override int Remaining {
  1901. get { return _buffer.Count - _index; }
  1902. }
  1903. }
  1904. public override EncoderFallbackBuffer CreateFallbackBuffer() {
  1905. return new BackslashReplaceFallbackBuffer();
  1906. }
  1907. public override int MaxCharCount {
  1908. get { throw new NotImplementedException(); }
  1909. }
  1910. }
  1911. class XmlCharRefEncoderReplaceFallback : EncoderFallback {
  1912. class XmlCharRefEncoderReplaceFallbackBuffer : EncoderFallbackBuffer {
  1913. private List<char> _buffer = new List<char>();
  1914. private int _index;
  1915. public override bool Fallback(char charUnknownHigh, char charUnknownLow, int index) {
  1916. return false;
  1917. }
  1918. public override bool Fallback(char charUnknown, int index) {
  1919. _buffer.Add('&');
  1920. _buffer.Add('#');
  1921. int val = (int)charUnknown;
  1922. foreach (char c in val.ToString()) {
  1923. _buffer.Add(c);
  1924. }
  1925. _buffer.Add(';');
  1926. return true;
  1927. }
  1928. public override char GetNextChar() {
  1929. if (_index == _buffer.Count) return Char.MinValue;
  1930. return _buffer[_index++];
  1931. }
  1932. public override bool MovePrevious() {
  1933. if (_index > 0) {
  1934. _index--;
  1935. return true;
  1936. }
  1937. return false;
  1938. }
  1939. public override int Remaining {
  1940. get { return _buffer.Count - _index; }
  1941. }
  1942. }
  1943. public override EncoderFallbackBuffer CreateFallbackBuffer() {
  1944. return new XmlCharRefEncoderReplaceFallbackBuffer();
  1945. }
  1946. public override int MaxCharCount {
  1947. get { throw new NotImplementedException(); }
  1948. }
  1949. }
  1950. class UnicodeEscapeEncoding : Encoding {
  1951. private bool _raw;
  1952. public UnicodeEscapeEncoding(bool raw) {
  1953. _raw = raw;
  1954. }
  1955. public override int GetByteCount(char[] chars, int index, int count) {
  1956. return EscapeEncode(chars, index, count).Length;
  1957. }
  1958. private string EscapeEncode(char[] chars, int index, int count) {
  1959. if (_raw) {
  1960. return RawUnicodeEscapeEncode(new string(chars, index, count));
  1961. }
  1962. bool dummy = false;
  1963. return ReprEncode(new string(chars, index, count), ref dummy);
  1964. }
  1965. public override int GetBytes(char[] chars, int charIndex, int charCount, byte[] bytes, int byteIndex) {
  1966. if (_raw) {
  1967. string res = RawUnicodeEscapeEncode(new string(chars, charIndex, charCount));
  1968. for (int i = 0; i < res.Length; i++) {
  1969. bytes[i + byteIndex] = _raw ? (byte)res[i] : (byte)chars[i];
  1970. }
  1971. return res.Length;
  1972. } else {
  1973. for (int i = 0; i < charCount; i++) {
  1974. bytes[i + byteIndex] = (byte)chars[i + charIndex];
  1975. }
  1976. return charCount;
  1977. }
  1978. }
  1979. public override int GetCharCount(byte[] bytes, int index, int count) {
  1980. StringBuilder builder = new StringBuilder();
  1981. for (int i = 0; i < count; i++) {
  1982. builder.Append((char)bytes[i + index]);
  1983. }
  1984. return LiteralParser.ParseString(builder.ToString(), _raw, true).Length;
  1985. }
  1986. public override int GetChars(byte[] bytes, int byteIndex, int byteCount, char[] chars, int charIndex) {
  1987. StringBuilder builder = new StringBuilder();
  1988. for (int i = 0; i < byteCount; i++) {
  1989. builder.Append((char)bytes[i + byteIndex]);
  1990. }
  1991. string res = LiteralParser.ParseString(builder.ToString(), _raw, true);
  1992. for (int i = 0; i < res.Length; i++) {
  1993. chars[i + charIndex] = (char)res[i];
  1994. }
  1995. return res.Length;
  1996. }
  1997. public override int GetMaxByteCount(int charCount) {
  1998. return charCount * 5;
  1999. }
  2000. public override int GetMaxCharCount(int byteCount) {
  2001. return byteCount;
  2002. }
  2003. }
  2004. #endif
  2005. #endregion
  2006. public static string/*!*/ __repr__(string/*!*/ self) {
  2007. return StringOps.Quote(self);
  2008. }
  2009. }
  2010. }