PageRenderTime 56ms CodeModel.GetById 16ms RepoModel.GetById 1ms app.codeStats 0ms

/DICK.B1/IronPython.Modules/re.cs

https://bitbucket.org/williamybs/uidipythontool
C# | 1275 lines | 992 code | 187 blank | 96 comment | 234 complexity | c6b477601b26b26e7947ed6541170357 MD5 | raw file

Large files files are truncated, but you can click here to view the full file

  1. /* ****************************************************************************
  2. *
  3. * Copyright (c) Microsoft Corporation.
  4. *
  5. * This source code is subject to terms and conditions of the Microsoft Public License. A
  6. * copy of the license can be found in the License.html file at the root of this distribution. If
  7. * you cannot locate the Microsoft Public License, please send an email to
  8. * ironpy@microsoft.com. By using this source code in any fashion, you are agreeing to be bound
  9. * by the terms of the Microsoft Public License.
  10. *
  11. * You must not remove this notice, or any other, from this software.
  12. *
  13. *
  14. * ***************************************************************************/
  15. using System;
  16. using System.Collections;
  17. using System.Collections.Generic;
  18. using System.Diagnostics;
  19. using System.Runtime.CompilerServices;
  20. using System.Runtime.InteropServices;
  21. using System.Text;
  22. using System.Text.RegularExpressions;
  23. using Microsoft.Scripting;
  24. using Microsoft.Scripting.Runtime;
  25. using Microsoft.Scripting.Utils;
  26. using IronPython.Runtime;
  27. using IronPython.Runtime.Exceptions;
  28. using IronPython.Runtime.Operations;
  29. using IronPython.Runtime.Types;
  30. // Maintain compatibility between 2.6.0 and 2.6.1 on CLR2 builds, on
  31. // Dev10 builds we can introduce breaking changes (with stronger typing
  32. // information) because we haven't shipped a stable version yet.
  33. #if CLR2
  34. using ListOrObject = System.Object;
  35. using REMatchOrObject = System.Object;
  36. using StringOrObject = System.Object;
  37. using TupleOrObject = System.Object;
  38. using DictOrObject = System.Object;
  39. using REPatternOrObject = System.Object;
  40. #else
  41. using ListOrObject = IronPython.Runtime.List;
  42. using REMatchOrObject = IronPython.Modules.PythonRegex.RE_Match;
  43. using StringOrObject = System.String;
  44. using TupleOrObject = IronPython.Runtime.PythonTuple;
  45. using DictOrObject = IronPython.Runtime.PythonDictionary;
  46. using REPatternOrObject = IronPython.Modules.PythonRegex.RE_Pattern;
  47. #endif
  48. [assembly: PythonModule("re", typeof(IronPython.Modules.PythonRegex))]
  49. namespace IronPython.Modules {
  50. /// <summary>
  51. /// Python regular expression module.
  52. /// </summary>
  53. public static class PythonRegex {
  54. private static CacheDict<PatternKey, RE_Pattern> _cachedPatterns = new CacheDict<PatternKey, RE_Pattern>(100);
  55. [SpecialName]
  56. public static void PerformModuleReload(PythonContext/*!*/ context, PythonDictionary/*!*/ dict) {
  57. context.EnsureModuleException("reerror", dict, "error", "re");
  58. PythonCopyReg.GetDispatchTable(context.SharedContext)[DynamicHelpers.GetPythonTypeFromType(typeof(RE_Pattern))] = dict["_pickle"];
  59. }
  60. private static readonly Random r = new Random(DateTime.Now.Millisecond);
  61. #region CONSTANTS
  62. // short forms
  63. public const int I = 0x02;
  64. public const int L = 0x04;
  65. public const int M = 0x08;
  66. public const int S = 0x10;
  67. public const int U = 0x20;
  68. public const int X = 0x40;
  69. // long forms
  70. public const int IGNORECASE = 0x02;
  71. public const int LOCALE = 0x04;
  72. public const int MULTILINE = 0x08;
  73. public const int DOTALL = 0x10;
  74. public const int UNICODE = 0x20;
  75. public const int VERBOSE = 0x40;
  76. #endregion
  77. #region Public API Surface
  78. public static RE_Pattern compile(CodeContext/*!*/ context, object pattern) {
  79. try {
  80. return new RE_Pattern(context, ValidatePattern(pattern), 0, true);
  81. } catch (ArgumentException e) {
  82. throw PythonExceptions.CreateThrowable(error(context), e.Message);
  83. }
  84. }
  85. public static RE_Pattern compile(CodeContext/*!*/ context, object pattern, object flags) {
  86. try {
  87. return new RE_Pattern(context, ValidatePattern(pattern), PythonContext.GetContext(context).ConvertToInt32(flags), true);
  88. } catch (ArgumentException e) {
  89. throw PythonExceptions.CreateThrowable(error(context), e.Message);
  90. }
  91. }
  92. public const string engine = "cli reg ex";
  93. public static string escape(string text) {
  94. if (text == null) throw PythonOps.TypeError("text must not be None");
  95. for (int i = 0; i < text.Length; i++) {
  96. if (!Char.IsLetterOrDigit(text[i])) {
  97. StringBuilder sb = new StringBuilder(text, 0, i, text.Length);
  98. char ch = text[i];
  99. do {
  100. sb.Append('\\');
  101. sb.Append(ch);
  102. i++;
  103. int last = i;
  104. while (i < text.Length) {
  105. ch = text[i];
  106. if (!Char.IsLetterOrDigit(ch)) {
  107. break;
  108. }
  109. i++;
  110. }
  111. sb.Append(text, last, i - last);
  112. } while (i < text.Length);
  113. return sb.ToString();
  114. }
  115. }
  116. return text;
  117. }
  118. public static ListOrObject findall(CodeContext/*!*/ context, object pattern, string @string) {
  119. return findall(context, pattern, @string, 0);
  120. }
  121. public static ListOrObject findall(CodeContext/*!*/ context, object pattern, string @string, int flags) {
  122. RE_Pattern pat = GetPattern(context, ValidatePattern(pattern), flags);
  123. ValidateString(@string, "string");
  124. MatchCollection mc = pat.FindAllWorker(context, @string, 0, @string.Length);
  125. return FixFindAllMatch(pat, mc);
  126. }
  127. private static ListOrObject FixFindAllMatch(RE_Pattern pat, MatchCollection mc) {
  128. object[] matches = new object[mc.Count];
  129. int numgrps = pat._re.GetGroupNumbers().Length;
  130. for (int i = 0; i < mc.Count; i++) {
  131. if (numgrps > 2) { // CLR gives us a "bonus" group of 0 - the entire expression
  132. // at this point we have more than one group in the pattern;
  133. // need to return a list of tuples in this case
  134. // for each match item in the matchcollection, create a tuple representing what was matched
  135. // e.g. findall("(\d+)|(\w+)", "x = 99y") == [('', 'x'), ('99', ''), ('', 'y')]
  136. // in the example above, ('', 'x') did not match (\d+) as indicated by '' but did
  137. // match (\w+) as indicated by 'x' and so on...
  138. int k = 0;
  139. List<object> tpl = new List<object>();
  140. foreach (Group g in mc[i].Groups) {
  141. // here also the CLR gives us a "bonus" match as the first item which is the
  142. // group that was actually matched in the tuple e.g. we get 'x', '', 'x' for
  143. // the first match object...so we'll skip the first item when creating the
  144. // tuple
  145. if (k++ != 0) {
  146. tpl.Add(g.Value);
  147. }
  148. }
  149. matches[i] = PythonTuple.Make(tpl);
  150. } else if (numgrps == 2) {
  151. // at this point we have exactly one group in the pattern (including the "bonus" one given
  152. // by the CLR
  153. // skip the first match since that contains the entire match and not the group match
  154. // e.g. re.findall(r"(\w+)\s+fish\b", "green fish") will have "green fish" in the 0
  155. // index and "green" as the (\w+) group match
  156. matches[i] = mc[i].Groups[1].Value;
  157. } else {
  158. matches[i] = mc[i].Value;
  159. }
  160. }
  161. return List.FromArrayNoCopy(matches);
  162. }
  163. public static object finditer(CodeContext/*!*/ context, object pattern, object @string) {
  164. return finditer(context, pattern, @string, 0);
  165. }
  166. public static object finditer(CodeContext/*!*/ context, object pattern, object @string, int flags) {
  167. RE_Pattern pat = GetPattern(context, ValidatePattern(pattern), flags);
  168. string str = ValidateString(@string, "string");
  169. return MatchIterator(pat.FindAllWorker(context, str, 0, str.Length), pat, str);
  170. }
  171. public static REMatchOrObject match(CodeContext/*!*/ context, object pattern, object @string) {
  172. return match(context, pattern, @string, 0);
  173. }
  174. public static REMatchOrObject match(CodeContext/*!*/ context, object pattern, object @string, int flags) {
  175. return GetPattern(context, ValidatePattern(pattern), flags).match(ValidateString(@string, "string"));
  176. }
  177. public static REMatchOrObject search(CodeContext/*!*/ context, object pattern, object @string) {
  178. return search(context, pattern, @string, 0);
  179. }
  180. public static REMatchOrObject search(CodeContext/*!*/ context, object pattern, object @string, int flags) {
  181. return GetPattern(context, ValidatePattern(pattern), flags).search(ValidateString(@string, "string"));
  182. }
  183. [return: SequenceTypeInfo(typeof(string))]
  184. public static ListOrObject split(CodeContext/*!*/ context, object pattern, object @string) {
  185. return split(context, ValidatePattern(pattern), ValidateString(@string, "string"), 0);
  186. }
  187. [return: SequenceTypeInfo(typeof(string))]
  188. public static ListOrObject split(CodeContext/*!*/ context, object pattern, object @string, int maxsplit) {
  189. return GetPattern(context, ValidatePattern(pattern), 0).split(ValidateString(@string, "string"),
  190. maxsplit);
  191. }
  192. public static StringOrObject sub(CodeContext/*!*/ context, object pattern, object repl, object @string) {
  193. return sub(context, pattern, repl, @string, Int32.MaxValue);
  194. }
  195. public static StringOrObject sub(CodeContext/*!*/ context, object pattern, object repl, object @string, int count) {
  196. return GetPattern(context, ValidatePattern(pattern), 0).sub(context, repl, ValidateString(@string, "string"), count);
  197. }
  198. public static object subn(CodeContext/*!*/ context, object pattern, object repl, object @string) {
  199. return subn(context, pattern, repl, @string, Int32.MaxValue);
  200. }
  201. public static object subn(CodeContext/*!*/ context, object pattern, object repl, object @string, int count) {
  202. return GetPattern(context, ValidatePattern(pattern), 0).subn(context, repl, ValidateString(@string, "string"), count);
  203. }
  204. public static void purge() {
  205. _cachedPatterns = new CacheDict<PatternKey, RE_Pattern>(100);
  206. }
  207. #endregion
  208. #region Public classes
  209. /// <summary>
  210. /// Compiled reg-ex pattern
  211. /// </summary>
  212. [PythonType]
  213. public class RE_Pattern : IWeakReferenceable {
  214. internal Regex _re;
  215. private PythonDictionary _groups;
  216. private int _compileFlags;
  217. private WeakRefTracker _weakRefTracker;
  218. internal ParsedRegex _pre;
  219. internal RE_Pattern(CodeContext/*!*/ context, object pattern)
  220. : this(context, pattern, 0) {
  221. }
  222. internal RE_Pattern(CodeContext/*!*/ context, object pattern, int flags) :
  223. this(context, pattern, flags, false) {
  224. }
  225. internal RE_Pattern(CodeContext/*!*/ context, object pattern, int flags, bool compiled) {
  226. _pre = PreParseRegex(context, ValidatePattern(pattern));
  227. try {
  228. flags |= OptionToFlags(_pre.Options);
  229. RegexOptions opts = FlagsToOption(flags);
  230. #if SILVERLIGHT
  231. this._re = new Regex(_pre.Pattern, opts);
  232. #else
  233. this._re = new Regex(_pre.Pattern, opts | (compiled ? RegexOptions.Compiled : RegexOptions.None));
  234. #endif
  235. } catch (ArgumentException e) {
  236. throw PythonExceptions.CreateThrowable(error(context), e.Message);
  237. }
  238. this._compileFlags = flags;
  239. }
  240. public RE_Match match(object text) {
  241. string input = ValidateString(text, "text");
  242. return RE_Match.makeMatch(_re.Match(input), this, input, 0, input.Length);
  243. }
  244. private static int FixPosition(string text, int position) {
  245. if (position < 0) return 0;
  246. if (position > text.Length) return text.Length;
  247. return position;
  248. }
  249. public RE_Match match(object text, int pos) {
  250. string input = ValidateString(text, "text");
  251. pos = FixPosition(input, pos);
  252. return RE_Match.makeMatch(_re.Match(input, pos), this, input, pos, input.Length);
  253. }
  254. public RE_Match match(object text, [DefaultParameterValue(0)]int pos, int endpos) {
  255. string input = ValidateString(text, "text");
  256. pos = FixPosition(input, pos);
  257. endpos = FixPosition(input, endpos);
  258. return RE_Match.makeMatch(
  259. _re.Match(input.Substring(0, endpos), pos),
  260. this,
  261. input,
  262. pos,
  263. endpos);
  264. }
  265. public RE_Match search(object text) {
  266. string input = ValidateString(text, "text");
  267. return RE_Match.make(_re.Match(input), this, input);
  268. }
  269. public RE_Match search(object text, int pos) {
  270. string input = ValidateString(text, "text");
  271. return RE_Match.make(_re.Match(input, pos, input.Length - pos), this, input);
  272. }
  273. public RE_Match search(object text, int pos, int endpos) {
  274. string input = ValidateString(text, "text");
  275. return RE_Match.make(_re.Match(input, pos, Math.Min(Math.Max(endpos - pos, 0), input.Length - pos)), this, input);
  276. }
  277. public object findall(CodeContext/*!*/ context, string @string) {
  278. return findall(context, @string, 0, null);
  279. }
  280. public object findall(CodeContext/*!*/ context, string @string, int pos) {
  281. return findall(context, @string, pos, null);
  282. }
  283. public object findall(CodeContext/*!*/ context, object @string, int pos, object endpos) {
  284. MatchCollection mc = FindAllWorker(context, ValidateString(@string, "text"), pos, endpos);
  285. return FixFindAllMatch(this, mc);
  286. }
  287. internal MatchCollection FindAllWorker(CodeContext/*!*/ context, string str, int pos, object endpos) {
  288. string against = str;
  289. if (endpos != null) {
  290. int end = PythonContext.GetContext(context).ConvertToInt32(endpos);
  291. against = against.Substring(0, Math.Max(end, 0));
  292. }
  293. return _re.Matches(against, pos);
  294. }
  295. public object finditer(CodeContext/*!*/ context, object @string) {
  296. string input = ValidateString(@string, "string");
  297. return MatchIterator(FindAllWorker(context, input, 0, input.Length), this, input);
  298. }
  299. public object finditer(CodeContext/*!*/ context, object @string, int pos) {
  300. string input = ValidateString(@string, "string");
  301. return MatchIterator(FindAllWorker(context, input, pos, input.Length), this, input);
  302. }
  303. public object finditer(CodeContext/*!*/ context, object @string, int pos, int endpos) {
  304. string input = ValidateString(@string, "string");
  305. return MatchIterator(FindAllWorker(context, input, pos, endpos), this, input);
  306. }
  307. [return: SequenceTypeInfo(typeof(string))]
  308. public ListOrObject split(string @string) {
  309. return split(@string, 0);
  310. }
  311. [return: SequenceTypeInfo(typeof(string))]
  312. public ListOrObject split(object @string, int maxsplit) {
  313. List result = new List();
  314. // fast path for negative maxSplit ( == "make no splits")
  315. if (maxsplit < 0) {
  316. result.AddNoLock(ValidateString(@string, "string"));
  317. } else {
  318. // iterate over all matches
  319. string theStr = ValidateString(@string, "string");
  320. MatchCollection matches = _re.Matches(theStr);
  321. int lastPos = 0; // is either start of the string, or first position *after* the last match
  322. int nSplits = 0; // how many splits have occurred?
  323. foreach (Match m in matches) {
  324. if (m.Length > 0) {
  325. // add substring from lastPos to beginning of current match
  326. result.AddNoLock(theStr.Substring(lastPos, m.Index - lastPos));
  327. // if there are subgroups of the match, add their match or None
  328. if (m.Groups.Count > 1)
  329. for (int i = 1; i < m.Groups.Count; i++)
  330. if (m.Groups[i].Success)
  331. result.AddNoLock(m.Groups[i].Value);
  332. else
  333. result.AddNoLock(null);
  334. // update lastPos, nSplits
  335. lastPos = m.Index + m.Length;
  336. nSplits++;
  337. if (nSplits == maxsplit)
  338. break;
  339. }
  340. }
  341. // add tail following last match
  342. result.AddNoLock(theStr.Substring(lastPos));
  343. }
  344. return result;
  345. }
  346. public string sub(CodeContext/*!*/ context, object repl, object @string) {
  347. return sub(context, repl, ValidateString(@string, "string"), Int32.MaxValue);
  348. }
  349. public string sub(CodeContext/*!*/ context, object repl, object @string, int count) {
  350. if (repl == null) throw PythonOps.TypeError("NoneType is not valid repl");
  351. // if 'count' is omitted or 0, all occurrences are replaced
  352. if (count == 0) count = Int32.MaxValue;
  353. string replacement = repl as string;
  354. if (replacement == null) {
  355. if (repl is ExtensibleString) {
  356. replacement = (repl as ExtensibleString).Value;
  357. }
  358. }
  359. Match prev = null;
  360. string input = ValidateString(@string, "string");
  361. return _re.Replace(
  362. input,
  363. delegate(Match match) {
  364. // from the docs: Empty matches for the pattern are replaced
  365. // only when not adjacent to a previous match
  366. if (String.IsNullOrEmpty(match.Value) && prev != null &&
  367. (prev.Index + prev.Length) == match.Index) {
  368. return "";
  369. };
  370. prev = match;
  371. if (replacement != null) return UnescapeGroups(match, replacement);
  372. return PythonCalls.Call(context, repl, RE_Match.make(match, this, input)) as string;
  373. },
  374. count);
  375. }
  376. public object subn(CodeContext/*!*/ context, object repl, string @string) {
  377. return subn(context, repl, @string, Int32.MaxValue);
  378. }
  379. public object subn(CodeContext/*!*/ context, object repl, object @string, int count) {
  380. if (repl == null) throw PythonOps.TypeError("NoneType is not valid repl");
  381. // if 'count' is omitted or 0, all occurrences are replaced
  382. if (count == 0) count = Int32.MaxValue;
  383. int totalCount = 0;
  384. string res;
  385. string replacement = repl as string;
  386. if (replacement == null) {
  387. if (repl is ExtensibleString) {
  388. replacement = (repl as ExtensibleString).Value;
  389. }
  390. }
  391. Match prev = null;
  392. string input = ValidateString(@string, "string");
  393. res = _re.Replace(
  394. input,
  395. delegate(Match match) {
  396. // from the docs: Empty matches for the pattern are replaced
  397. // only when not adjacent to a previous match
  398. if (String.IsNullOrEmpty(match.Value) && prev != null &&
  399. (prev.Index + prev.Length) == match.Index) {
  400. return "";
  401. };
  402. prev = match;
  403. totalCount++;
  404. if (replacement != null) return UnescapeGroups(match, replacement);
  405. return PythonCalls.Call(context, repl, RE_Match.make(match, this, input)) as string;
  406. },
  407. count);
  408. return PythonTuple.MakeTuple(res, totalCount);
  409. }
  410. public int flags {
  411. get {
  412. return _compileFlags;
  413. }
  414. }
  415. public PythonDictionary groupindex {
  416. get {
  417. if (_groups == null) {
  418. PythonDictionary d = new PythonDictionary();
  419. string[] names = _re.GetGroupNames();
  420. int[] nums = _re.GetGroupNumbers();
  421. for (int i = 1; i < names.Length; i++) {
  422. if (Char.IsDigit(names[i][0]) || names[i].StartsWith(_mangledNamedGroup)) {
  423. // skip numeric names and our mangling for unnamed groups mixed w/ named groups.
  424. continue;
  425. }
  426. d[names[i]] = nums[i];
  427. }
  428. _groups = d;
  429. }
  430. return _groups;
  431. }
  432. }
  433. public string pattern {
  434. get {
  435. return _pre.UserPattern;
  436. }
  437. }
  438. public override bool Equals(object obj) {
  439. RE_Pattern other = obj as RE_Pattern;
  440. if (other == null) {
  441. return false;
  442. }
  443. return other.pattern == pattern && other.flags == flags;
  444. }
  445. public override int GetHashCode() {
  446. return pattern.GetHashCode() ^ flags;
  447. }
  448. #region IWeakReferenceable Members
  449. WeakRefTracker IWeakReferenceable.GetWeakRef() {
  450. return _weakRefTracker;
  451. }
  452. bool IWeakReferenceable.SetWeakRef(WeakRefTracker value) {
  453. _weakRefTracker = value;
  454. return true;
  455. }
  456. void IWeakReferenceable.SetFinalizer(WeakRefTracker value) {
  457. ((IWeakReferenceable)this).SetWeakRef(value);
  458. }
  459. #endregion
  460. }
  461. public static PythonTuple _pickle(CodeContext/*!*/ context, RE_Pattern pattern) {
  462. object scope = Importer.ImportModule(context, new PythonDictionary(), "re", false, 0);
  463. object compile;
  464. if (scope is PythonModule && ((PythonModule)scope).__dict__.TryGetValue("compile", out compile)) {
  465. return PythonTuple.MakeTuple(compile, PythonTuple.MakeTuple(pattern.pattern, pattern.flags));
  466. }
  467. throw new InvalidOperationException("couldn't find compile method");
  468. }
  469. [PythonType]
  470. public class RE_Match {
  471. RE_Pattern _pattern;
  472. private Match _m;
  473. private string _text;
  474. private int _lastindex = -1;
  475. private int _pos, _endPos;
  476. #region Internal makers
  477. internal static RE_Match make(Match m, RE_Pattern pattern, string input) {
  478. if (m.Success) return new RE_Match(m, pattern, input, 0, input.Length);
  479. return null;
  480. }
  481. internal static RE_Match make(Match m, RE_Pattern pattern, string input, int offset, int endpos) {
  482. if (m.Success) return new RE_Match(m, pattern, input, offset, endpos);
  483. return null;
  484. }
  485. internal static RE_Match makeMatch(Match m, RE_Pattern pattern, string input, int offset, int endpos) {
  486. if (m.Success && m.Index == offset) return new RE_Match(m, pattern, input, offset, endpos);
  487. return null;
  488. }
  489. #endregion
  490. #region Public ctors
  491. public RE_Match(Match m, RE_Pattern pattern, string text) {
  492. _m = m;
  493. _pattern = pattern;
  494. _text = text;
  495. }
  496. public RE_Match(Match m, RE_Pattern pattern, string text, int pos, int endpos) {
  497. _m = m;
  498. _pattern = pattern;
  499. _text = text;
  500. _pos = pos;
  501. _endPos = endpos;
  502. }
  503. #endregion
  504. // public override bool __nonzero__() {
  505. // return m.Success;
  506. // }
  507. #region Public API Surface
  508. public int end() {
  509. return _m.Index + _m.Length;
  510. }
  511. public int start() {
  512. return _m.Index;
  513. }
  514. public int start(object group) {
  515. int grpIndex = GetGroupIndex(group);
  516. if (!_m.Groups[grpIndex].Success) {
  517. return -1;
  518. }
  519. return _m.Groups[grpIndex].Index;
  520. }
  521. public int end(object group) {
  522. int grpIndex = GetGroupIndex(group);
  523. if (!_m.Groups[grpIndex].Success) {
  524. return -1;
  525. }
  526. return _m.Groups[grpIndex].Index + _m.Groups[grpIndex].Length;
  527. }
  528. public object group(object index, params object[] additional) {
  529. if (additional.Length == 0) {
  530. return group(index);
  531. }
  532. object[] res = new object[additional.Length + 1];
  533. res[0] = _m.Groups[GetGroupIndex(index)].Success ? _m.Groups[GetGroupIndex(index)].Value : null;
  534. for (int i = 1; i < res.Length; i++) {
  535. int grpIndex = GetGroupIndex(additional[i - 1]);
  536. res[i] = _m.Groups[grpIndex].Success ? _m.Groups[grpIndex].Value : null;
  537. }
  538. return PythonTuple.MakeTuple(res);
  539. }
  540. public StringOrObject group(object index) {
  541. int pos = GetGroupIndex(index);
  542. Group g = _m.Groups[pos];
  543. return g.Success ? g.Value : null;
  544. }
  545. public StringOrObject group() {
  546. return group(0);
  547. }
  548. [return: SequenceTypeInfo(typeof(string))]
  549. public TupleOrObject groups() {
  550. return groups(null);
  551. }
  552. public TupleOrObject groups(object @default) {
  553. object[] ret = new object[_m.Groups.Count - 1];
  554. for (int i = 1; i < _m.Groups.Count; i++) {
  555. if (!_m.Groups[i].Success) {
  556. ret[i - 1] = @default;
  557. } else {
  558. ret[i - 1] = _m.Groups[i].Value;
  559. }
  560. }
  561. return PythonTuple.MakeTuple(ret);
  562. }
  563. public StringOrObject expand(object template) {
  564. string strTmp = ValidateString(template, "template");
  565. StringBuilder res = new StringBuilder();
  566. for (int i = 0; i < strTmp.Length; i++) {
  567. if (strTmp[i] != '\\') { res.Append(strTmp[i]); continue; }
  568. if (++i == strTmp.Length) { res.Append(strTmp[i - 1]); continue; }
  569. if (Char.IsDigit(strTmp[i])) {
  570. AppendGroup(res, (int)(strTmp[i] - '0'));
  571. } else if (strTmp[i] == 'g') {
  572. if (++i == strTmp.Length) { res.Append("\\g"); return res.ToString(); }
  573. if (strTmp[i] != '<') {
  574. res.Append("\\g<"); continue;
  575. } else { // '<'
  576. StringBuilder name = new StringBuilder();
  577. i++;
  578. while (strTmp[i] != '>' && i < strTmp.Length) {
  579. name.Append(strTmp[i++]);
  580. }
  581. AppendGroup(res, _pattern._re.GroupNumberFromName(name.ToString()));
  582. }
  583. } else {
  584. switch (strTmp[i]) {
  585. case 'n': res.Append('\n'); break;
  586. case 'r': res.Append('\r'); break;
  587. case 't': res.Append('\t'); break;
  588. case '\\': res.Append('\\'); break;
  589. }
  590. }
  591. }
  592. return res.ToString();
  593. }
  594. [return: DictionaryTypeInfo(typeof(string), typeof(string))]
  595. public DictOrObject groupdict() {
  596. return groupdict(null);
  597. }
  598. private static bool IsGroupNumber(string name) {
  599. foreach (char c in name) {
  600. if (!Char.IsNumber(c)) return false;
  601. }
  602. return true;
  603. }
  604. [return: DictionaryTypeInfo(typeof(string), typeof(string))]
  605. public DictOrObject groupdict([NotNull]string value) {
  606. return groupdict((object)value);
  607. }
  608. [return: DictionaryTypeInfo(typeof(string), typeof(object))]
  609. public DictOrObject groupdict(object value) {
  610. string[] groupNames = this._pattern._re.GetGroupNames();
  611. Debug.Assert(groupNames.Length == this._m.Groups.Count);
  612. PythonDictionary d = new PythonDictionary();
  613. for (int i = 0; i < groupNames.Length; i++) {
  614. if (IsGroupNumber(groupNames[i])) continue; // python doesn't report group numbers
  615. if (_m.Groups[i].Captures.Count != 0) {
  616. d[groupNames[i]] = _m.Groups[i].Value;
  617. } else {
  618. d[groupNames[i]] = value;
  619. }
  620. }
  621. return d;
  622. }
  623. [return: SequenceTypeInfo(typeof(int))]
  624. public TupleOrObject span() {
  625. return PythonTuple.MakeTuple(this.start(), this.end());
  626. }
  627. [return: SequenceTypeInfo(typeof(int))]
  628. public TupleOrObject span(object group) {
  629. return PythonTuple.MakeTuple(this.start(group), this.end(group));
  630. }
  631. public int pos {
  632. get {
  633. return _pos;
  634. }
  635. }
  636. public int endpos {
  637. get {
  638. return _endPos;
  639. }
  640. }
  641. public string @string {
  642. get {
  643. return _text;
  644. }
  645. }
  646. public TupleOrObject regs {
  647. get {
  648. object[] res = new object[_m.Groups.Count];
  649. for (int i = 0; i < res.Length; i++) {
  650. res[i] = PythonTuple.MakeTuple(start(i), end(i));
  651. }
  652. return PythonTuple.MakeTuple(res);
  653. }
  654. }
  655. public REPatternOrObject re {
  656. get {
  657. return _pattern;
  658. }
  659. }
  660. public object lastindex {
  661. get {
  662. // -1 : initial value of lastindex
  663. // 0 : no match found
  664. //other : the true lastindex
  665. // Match.Groups contains "lower" level matched groups, which has to be removed
  666. if (_lastindex == -1) {
  667. int i = 1;
  668. while (i < _m.Groups.Count) {
  669. if (_m.Groups[i].Success) {
  670. _lastindex = i;
  671. int start = _m.Groups[i].Index;
  672. int end = start + _m.Groups[i].Length;
  673. i++;
  674. // skip any group which fall into the range [start, end],
  675. // no matter match succeed or fail
  676. while (i < _m.Groups.Count && (_m.Groups[i].Index < end)) {
  677. i++;
  678. }
  679. } else {
  680. i++;
  681. }
  682. }
  683. if (_lastindex == -1) {
  684. _lastindex = 0;
  685. }
  686. }
  687. if (_lastindex == 0) {
  688. return null;
  689. } else {
  690. return _lastindex;
  691. }
  692. }
  693. }
  694. public StringOrObject lastgroup {
  695. get {
  696. if (lastindex == null) return null;
  697. // when group was not explicitly named, RegEx assigns the number as name
  698. // This is different from C-Python, which returns None in such cases
  699. return this._pattern._re.GroupNameFromNumber((int)lastindex);
  700. }
  701. }
  702. #endregion
  703. #region Private helper functions
  704. private void AppendGroup(StringBuilder sb, int index) {
  705. sb.Append(_m.Groups[index].Value);
  706. }
  707. private int GetGroupIndex(object group) {
  708. int grpIndex;
  709. if (!Converter.TryConvertToInt32(group, out grpIndex)) {
  710. grpIndex = _pattern._re.GroupNumberFromName(ValidateString(group, "group"));
  711. }
  712. if (grpIndex < 0 || grpIndex >= _m.Groups.Count) {
  713. throw PythonOps.IndexError("no such group");
  714. }
  715. return grpIndex;
  716. }
  717. #endregion
  718. }
  719. #endregion
  720. #region Private helper functions
  721. private static RE_Pattern GetPattern(CodeContext/*!*/ context, object pattern, int flags) {
  722. string strPattern = ValidatePattern(pattern);
  723. PatternKey key = new PatternKey(strPattern, flags);
  724. lock (_cachedPatterns) {
  725. RE_Pattern res;
  726. if (_cachedPatterns.TryGetValue(new PatternKey(strPattern, flags), out res)) {
  727. return res;
  728. }
  729. res = new RE_Pattern(context, strPattern, flags);
  730. _cachedPatterns[key] = res;
  731. return res;
  732. }
  733. }
  734. private static IEnumerator MatchIterator(MatchCollection matches, RE_Pattern pattern, string input) {
  735. for (int i = 0; i < matches.Count; i++) {
  736. yield return RE_Match.make(matches[i], pattern, input, 0, input.Length);
  737. }
  738. }
  739. private static RegexOptions FlagsToOption(int flags) {
  740. RegexOptions opts = RegexOptions.None;
  741. if ((flags & (int)IGNORECASE) != 0) opts |= RegexOptions.IgnoreCase;
  742. if ((flags & (int)MULTILINE) != 0) opts |= RegexOptions.Multiline;
  743. if (((flags & (int)LOCALE)) == 0) opts &= (~RegexOptions.CultureInvariant);
  744. if ((flags & (int)DOTALL) != 0) opts |= RegexOptions.Singleline;
  745. if ((flags & (int)VERBOSE) != 0) opts |= RegexOptions.IgnorePatternWhitespace;
  746. return opts;
  747. }
  748. private static int OptionToFlags(RegexOptions options) {
  749. int flags = 0;
  750. if ((options & RegexOptions.IgnoreCase) != 0) {
  751. flags |= IGNORECASE;
  752. }
  753. if ((options & RegexOptions.Multiline) != 0) {
  754. flags |= MULTILINE;
  755. }
  756. if ((options & RegexOptions.CultureInvariant) == 0) {
  757. flags |= LOCALE;
  758. }
  759. if ((options & RegexOptions.Singleline) != 0) {
  760. flags |= DOTALL;
  761. }
  762. if ((options & RegexOptions.IgnorePatternWhitespace) != 0) {
  763. flags |= VERBOSE;
  764. }
  765. return flags;
  766. }
  767. internal class ParsedRegex {
  768. public ParsedRegex(string pattern) {
  769. this.UserPattern = pattern;
  770. }
  771. public string UserPattern;
  772. public string Pattern;
  773. public RegexOptions Options = RegexOptions.CultureInvariant;
  774. }
  775. private static char[] _preParsedChars = new[] { '(', '{', '[', ']' };
  776. private const string _mangledNamedGroup = "___PyRegexNameMangled";
  777. /// <summary>
  778. /// Preparses a regular expression text returning a ParsedRegex class
  779. /// that can be used for further regular expressions.
  780. /// </summary>
  781. private static ParsedRegex PreParseRegex(CodeContext/*!*/ context, string pattern) {
  782. ParsedRegex res = new ParsedRegex(pattern);
  783. //string newPattern;
  784. int cur = 0, nameIndex;
  785. int curGroup = 0;
  786. bool isCharList = false;
  787. bool containsNamedGroup = false;
  788. for (; ; ) {
  789. nameIndex = pattern.IndexOfAny(_preParsedChars, cur);
  790. if (nameIndex > 0 && pattern[nameIndex - 1] == '\\') {
  791. int curIndex = nameIndex - 2;
  792. int backslashCount = 1;
  793. while (curIndex >= 0 && pattern[curIndex] == '\\') {
  794. backslashCount++;
  795. curIndex--;
  796. }
  797. // odd number of back slashes, this is an optional
  798. // paren that we should ignore.
  799. if ((backslashCount & 0x01) != 0) {
  800. cur++;
  801. continue;
  802. }
  803. }
  804. if (nameIndex == -1) break;
  805. if (nameIndex == pattern.Length - 1) break;
  806. switch (pattern[nameIndex]) {
  807. case '{':
  808. if (pattern[++nameIndex] == ',') {
  809. // no beginning specified for the n-m quntifier, add the
  810. // default 0 value.
  811. pattern = pattern.Insert(nameIndex, "0");
  812. }
  813. break;
  814. case '[':
  815. nameIndex++;
  816. isCharList = true;
  817. break;
  818. case ']':
  819. nameIndex++;
  820. isCharList = false;
  821. break;
  822. case '(':
  823. // make sure we're not dealing with [(]
  824. if (!isCharList) {
  825. switch (pattern[++nameIndex]) {
  826. case '?':
  827. // extension syntax
  828. if (nameIndex == pattern.Length - 1) throw PythonExceptions.CreateThrowable(error(context), "unexpected end of regex");
  829. switch (pattern[++nameIndex]) {
  830. case 'P':
  831. // named regex, .NET doesn't expect the P so we'll remove it;
  832. // also, once we see a named group i.e. ?P then we need to start artificially
  833. // naming all unnamed groups from then on---this is to get around the fact that
  834. // the CLR RegEx support orders all the unnamed groups before all the named
  835. // groups, even if the named groups are before the unnamed ones in the pattern;
  836. // the artificial naming preserves the order of the groups and thus the order of
  837. // the matches
  838. if (nameIndex + 1 < pattern.Length && pattern[nameIndex + 1] == '=') {
  839. // match whatever was previously matched by the named group
  840. // remove the (?P=
  841. pattern = pattern.Remove(nameIndex - 2, 4);
  842. pattern = pattern.Insert(nameIndex - 2, "\\k<");
  843. int tmpIndex = nameIndex;
  844. while (tmpIndex < pattern.Length && pattern[tmpIndex] != ')')
  845. tmpIndex++;
  846. if (tmpIndex == pattern.Length) throw PythonExceptions.CreateThrowable(error(context), "unexpected end of regex");
  847. pattern = pattern.Substring(0, tmpIndex) + ">" + pattern.Substring(tmpIndex + 1);
  848. } else {
  849. containsNamedGroup = true;
  850. pattern = pattern.Remove(nameIndex, 1);
  851. }
  852. break;
  853. case 'i': res.Options |= RegexOptions.IgnoreCase; break;
  854. case 'L':
  855. res.Options &= ~(RegexOptions.CultureInvariant);
  856. RemoveOption(ref pattern, ref nameIndex);
  857. break;
  858. case 'm': res.Options |= RegexOptions.Multiline; break;
  859. case 's': res.Options |= RegexOptions.Singleline; break;
  860. case 'u':
  861. // specify unicode; not relevant and not valid under .NET as we're always unicode
  862. // -- so the option needs to be removed
  863. RemoveOption(ref pattern, ref nameIndex);
  864. break;
  865. case 'x': res.Options |= RegexOptions.IgnorePatternWhitespace; break;
  866. case ':': break; // non-capturing
  867. case '=': break; // look ahead assertion
  868. case '<': break; // positive look behind assertion
  869. case '!': break; // negative look ahead assertion
  870. case '#': break; // inline comment
  871. case '(':
  872. // conditional match alternation (?(id/name)yes-pattern|no-pattern)
  873. // move past ?( so we don't preparse the name.
  874. nameIndex++;
  875. break;
  876. default: throw PythonExceptions.CreateThrowable(error(context), "Unrecognized extension " + pattern[nameIndex]);
  877. }
  878. break;
  879. default:
  880. // just another group
  881. curGroup++;
  882. if (containsNamedGroup) {
  883. // need to name this unnamed group
  884. pattern = pattern.Insert(nameIndex, "?<" + _mangledNamedGroup + GetRandomString() + ">");
  885. }
  886. break;
  887. }
  888. } else {
  889. nameIndex++;
  890. }
  891. break;
  892. }
  893. cur = nameIndex;
  894. }
  895. cur = 0;
  896. for (; ; ) {
  897. nameIndex = pattern.IndexOf('\\', cur);
  898. if (nameIndex == -1 || nameIndex == pattern.Length - 1) break;
  899. cur = ++nameIndex;
  900. char curChar = pattern[cur];
  901. switch (curChar) {
  902. case 'x':
  903. case 'u':
  904. case 'a':
  905. case 'b':
  906. case 'e':
  907. case 'f':
  908. case 'k':
  909. case 'n':
  910. case 'r':
  911. case 't':
  912. case 'v':
  913. case 'c':
  914. case 's':
  915. case 'W':
  916. case 'w':
  917. case 'p':
  918. case 'P':
  919. case 'S':
  920. case 'd':
  921. case 'D':
  922. case 'Z':
  923. case '\\':
  924. // known escape sequences, leave escaped.
  925. break;
  926. default:
  927. System.Globalization.UnicodeCategory charClass = Char.GetUnicodeCategory(curChar);
  928. switch (charClass) {
  929. // recognized word characters, always unescape.
  930. case System.Globalization.UnicodeCategory.ModifierLetter:
  931. case System.Globalization.UnicodeCategory.LowercaseLetter:
  932. case System.Globalization.UnicodeCategory.UppercaseLetter:
  933. case System.Globalization.UnicodeCategory.TitlecaseLetter:
  934. case System.Globalization.UnicodeCategory.OtherLetter:
  935. case System.Globalization.UnicodeCategory.LetterNumber:
  936. case System.Globalization.UnicodeCategory.OtherNumber:
  937. case System.Globalization.UnicodeCategory.ConnectorPunctuation:
  938. pattern = pattern.Remove(nameIndex - 1, 1);
  939. cur--;
  940. break;
  941. case System.Globalization.UnicodeCategory.DecimalDigitNumber:
  942. // actually don't want to unescape '\1', '\2' etc. which are references to groups
  943. break;
  944. }
  945. break;
  946. }
  947. if (++cur >= pattern.Length) {
  948. break;
  949. }
  950. }
  951. res.Pattern = pattern;
  952. return res;
  953. }
  954. private static void RemoveOption(ref string pattern, ref int nameIndex) {
  955. if (pattern[nameIndex - 1] == '?' && nameIndex < (pattern.Length - 1) && pattern[nameIndex + 1] == ')') {
  956. pattern = pattern.Remove(nameIndex - 2, 4);
  957. nameIndex -= 2;
  958. } else {
  959. pattern = pattern.Remove(nameIndex--, 1);
  960. }
  961. }
  962. private static string GetRandomString() {
  963. return r.Next(Int32.MaxValue / 2, Int32.MaxValue).ToString();
  964. }
  965. private static string UnescapeGroups(Match m, string text) {
  966. for (int i = 0; i < text.Length; i++) {
  967. if (text[i] == '\\') {
  968. StringBuilder sb = new StringBuilder(text, 0, i, text.L

Large files files are truncated, but you can click here to view the full file