PageRenderTime 497ms CodeModel.GetById 14ms RepoModel.GetById 0ms app.codeStats 1ms

/IronPython_2_0/Src/IronPython.Modules/re.cs

#
C# | 1158 lines | 895 code | 173 blank | 90 comment | 218 complexity | 30b88730e65f4fb7ff51d8fcfb847fc5 MD5 | raw file
Possible License(s): GPL-2.0, MPL-2.0-no-copyleft-exception, CPL-1.0, CC-BY-SA-3.0, BSD-3-Clause, ISC, AGPL-3.0, LGPL-2.1, Apache-2.0

Large files files are truncated, but you can click here to view the full file

  1. /* ****************************************************************************
  2. *
  3. * Copyright (c) Microsoft Corporation.
  4. *
  5. * This source code is subject to terms and conditions of the Microsoft Public License. A
  6. * copy of the license can be found in the License.html file at the root of this distribution. If
  7. * you cannot locate the Microsoft Public License, please send an email to
  8. * ironpy@microsoft.com. By using this source code in any fashion, you are agreeing to be bound
  9. * by the terms of the Microsoft Public License.
  10. *
  11. * You must not remove this notice, or any other, from this software.
  12. *
  13. *
  14. * ***************************************************************************/
  15. using System; using Microsoft;
  16. using System.Collections;
  17. using System.Collections.Generic;
  18. using System.Diagnostics;
  19. using System.Runtime.CompilerServices;
  20. using Microsoft.Runtime.CompilerServices;
  21. using System.Runtime.InteropServices;
  22. using System.Text;
  23. using System.Text.RegularExpressions;
  24. using Microsoft.Scripting;
  25. using Microsoft.Scripting.Runtime;
  26. using Microsoft.Scripting.Utils;
  27. using IronPython.Runtime;
  28. using IronPython.Runtime.Exceptions;
  29. using IronPython.Runtime.Operations;
  30. using IronPython.Runtime.Types;
  31. [assembly: PythonModule("re", typeof(IronPython.Modules.PythonRegex))]
  32. namespace IronPython.Modules {
  33. /// <summary>
  34. /// Python regular expression module.
  35. /// </summary>
  36. public static class PythonRegex {
  37. [SpecialName]
  38. public static void PerformModuleReload(PythonContext/*!*/ context, IAttributesCollection/*!*/ dict) {
  39. context.EnsureModuleException("reerror", dict, "error", "re");
  40. }
  41. private static readonly Random r = new Random(DateTime.Now.Millisecond);
  42. #region CONSTANTS
  43. // short forms
  44. public const int I = 0x02;
  45. public const int L = 0x04;
  46. public const int M = 0x08;
  47. public const int S = 0x10;
  48. public const int U = 0x20;
  49. public const int X = 0x40;
  50. // long forms
  51. public const int IGNORECASE = 0x02;
  52. public const int LOCALE = 0x04;
  53. public const int MULTILINE = 0x08;
  54. public const int DOTALL = 0x10;
  55. public const int UNICODE = 0x20;
  56. public const int VERBOSE = 0x40;
  57. #endregion
  58. #region Public API Surface
  59. public static RE_Pattern compile(CodeContext/*!*/ context, object pattern) {
  60. try {
  61. return new RE_Pattern(context, ValidatePattern(pattern), 0, true);
  62. } catch (ArgumentException e) {
  63. throw PythonExceptions.CreateThrowable(error(context), e.Message);
  64. }
  65. }
  66. public static RE_Pattern compile(CodeContext/*!*/ context, object pattern, object flags) {
  67. try {
  68. return new RE_Pattern(context, ValidatePattern(pattern), PythonContext.GetContext(context).ConvertToInt32(flags), true);
  69. } catch (ArgumentException e) {
  70. throw PythonExceptions.CreateThrowable(error(context), e.Message);
  71. }
  72. }
  73. public const string engine = "cli reg ex";
  74. public static string escape(string text) {
  75. if (text == null) throw PythonOps.TypeError("text must not be None");
  76. for (int i = 0; i < text.Length; i++) {
  77. if (!Char.IsLetterOrDigit(text[i])) {
  78. StringBuilder sb = new StringBuilder(text, 0, i, text.Length);
  79. char ch = text[i];
  80. do {
  81. sb.Append('\\');
  82. sb.Append(ch);
  83. i++;
  84. int last = i;
  85. while (i < text.Length) {
  86. ch = text[i];
  87. if (!Char.IsLetterOrDigit(ch)) {
  88. break;
  89. }
  90. i++;
  91. }
  92. sb.Append(text, last, i - last);
  93. } while (i < text.Length);
  94. return sb.ToString();
  95. }
  96. }
  97. return text;
  98. }
  99. public static object findall(CodeContext/*!*/ context, object pattern, string @string) {
  100. return findall(context, pattern, @string, 0);
  101. }
  102. public static object findall(CodeContext/*!*/ context, object pattern, string @string, int flags) {
  103. RE_Pattern pat = new RE_Pattern(context, ValidatePattern(pattern), flags);
  104. ValidateString(@string, "string");
  105. MatchCollection mc = pat.FindAllWorker(context, @string, 0, @string.Length);
  106. return FixFindAllMatch(pat, mc);
  107. }
  108. private static object FixFindAllMatch(RE_Pattern pat, MatchCollection mc) {
  109. object[] matches = new object[mc.Count];
  110. int numgrps = pat._re.GetGroupNumbers().Length;
  111. for (int i = 0; i < mc.Count; i++) {
  112. if (numgrps > 2) { // CLR gives us a "bonus" group of 0 - the entire expression
  113. // at this point we have more than one group in the pattern;
  114. // need to return a list of tuples in this case
  115. // for each match item in the matchcollection, create a tuple representing what was matched
  116. // e.g. findall("(\d+)|(\w+)", "x = 99y") == [('', 'x'), ('99', ''), ('', 'y')]
  117. // in the example above, ('', 'x') did not match (\d+) as indicated by '' but did
  118. // match (\w+) as indicated by 'x' and so on...
  119. int k = 0;
  120. List<object> tpl = new List<object>();
  121. foreach (Group g in mc[i].Groups) {
  122. // here also the CLR gives us a "bonus" match as the first item which is the
  123. // group that was actually matched in the tuple e.g. we get 'x', '', 'x' for
  124. // the first match object...so we'll skip the first item when creating the
  125. // tuple
  126. if (k++ != 0) {
  127. tpl.Add(g.Value);
  128. }
  129. }
  130. matches[i] = PythonTuple.Make(tpl);
  131. } else if (numgrps == 2) {
  132. // at this point we have exactly one group in the pattern (including the "bonus" one given
  133. // by the CLR
  134. // skip the first match since that contains the entire match and not the group match
  135. // e.g. re.findall(r"(\w+)\s+fish\b", "green fish") will have "green fish" in the 0
  136. // index and "green" as the (\w+) group match
  137. matches[i] = mc[i].Groups[1].Value;
  138. } else {
  139. matches[i] = mc[i].Value;
  140. }
  141. }
  142. return List.FromArrayNoCopy(matches);
  143. }
  144. public static object finditer(CodeContext/*!*/ context, object pattern, object @string) {
  145. return finditer(context, pattern, @string, 0);
  146. }
  147. public static object finditer(CodeContext/*!*/ context, object pattern, object @string, int flags) {
  148. RE_Pattern pat = new RE_Pattern(context, ValidatePattern(pattern), flags);
  149. string str = ValidateString(@string, "string");
  150. return MatchIterator(pat.FindAllWorker(context, str, 0, str.Length), pat, str);
  151. }
  152. public static object match(CodeContext/*!*/ context, object pattern, object @string) {
  153. return match(context, pattern, @string, 0);
  154. }
  155. public static object match(CodeContext/*!*/ context, object pattern, object @string, int flags) {
  156. return new RE_Pattern(context, ValidatePattern(pattern), flags).match(ValidateString(@string, "string"));
  157. }
  158. public static object search(CodeContext/*!*/ context, object pattern, object @string) {
  159. return search(context, pattern, @string, 0);
  160. }
  161. public static object search(CodeContext/*!*/ context, object pattern, object @string, int flags) {
  162. return new RE_Pattern(context, ValidatePattern(pattern), flags).search(ValidateString(@string, "string"));
  163. }
  164. public static object split(CodeContext/*!*/ context, object pattern, object @string) {
  165. return split(context, ValidatePattern(pattern), ValidateString(@string, "string"), 0);
  166. }
  167. public static object split(CodeContext/*!*/ context, object pattern, object @string, int maxsplit) {
  168. return new RE_Pattern(context, ValidatePattern(pattern)).split(ValidateString(@string, "string"),
  169. maxsplit);
  170. }
  171. public static object sub(CodeContext/*!*/ context, object pattern, object repl, object @string) {
  172. return sub(context, pattern, repl, @string, Int32.MaxValue);
  173. }
  174. public static object sub(CodeContext/*!*/ context, object pattern, object repl, object @string, int count) {
  175. return new RE_Pattern(context, ValidatePattern(pattern)).sub(context, repl, ValidateString(@string, "string"), count);
  176. }
  177. public static object subn(CodeContext/*!*/ context, object pattern, object repl, object @string) {
  178. return subn(context, pattern, repl, @string, Int32.MaxValue);
  179. }
  180. public static object subn(CodeContext/*!*/ context, object pattern, object repl, object @string, int count) {
  181. return new RE_Pattern(context, ValidatePattern(pattern)).subn(context, repl, ValidateString(@string, "string"), count);
  182. }
  183. #endregion
  184. #region Public classes
  185. /// <summary>
  186. /// Compiled reg-ex pattern
  187. /// </summary>
  188. [PythonType]
  189. public class RE_Pattern : IWeakReferenceable {
  190. internal Regex _re;
  191. private PythonDictionary _groups;
  192. private int _compileFlags;
  193. private WeakRefTracker _weakRefTracker;
  194. internal ParsedRegex _pre;
  195. internal RE_Pattern(CodeContext/*!*/ context, object pattern)
  196. : this(context, pattern, 0) {
  197. }
  198. internal RE_Pattern(CodeContext/*!*/ context, object pattern, int flags) :
  199. this(context, pattern, flags, false) {
  200. }
  201. internal RE_Pattern(CodeContext/*!*/ context, object pattern, int flags, bool compiled) {
  202. _pre = PreParseRegex(context, ValidatePattern(pattern));
  203. try {
  204. flags |= OptionToFlags(_pre.Options);
  205. RegexOptions opts = FlagsToOption(flags);
  206. #if SILVERLIGHT
  207. this._re = new Regex(_pre.Pattern, opts);
  208. #else
  209. this._re = new Regex(_pre.Pattern, opts | (compiled ? RegexOptions.Compiled : RegexOptions.None));
  210. #endif
  211. } catch (ArgumentException e) {
  212. throw PythonExceptions.CreateThrowable(error(context), e.Message);
  213. }
  214. this._compileFlags = flags;
  215. }
  216. public RE_Match match(object text) {
  217. string input = ValidateString(text, "text");
  218. return RE_Match.makeMatch(_re.Match(input), this, input, 0, input.Length);
  219. }
  220. private static int FixPosition(string text, int position) {
  221. if (position < 0) return 0;
  222. if (position > text.Length) return text.Length;
  223. return position;
  224. }
  225. public RE_Match match(object text, int pos) {
  226. string input = ValidateString(text, "text");
  227. pos = FixPosition(input, pos);
  228. return RE_Match.makeMatch(_re.Match(input, pos), this, input, pos, input.Length);
  229. }
  230. public RE_Match match(object text, [DefaultParameterValue(0)]int pos, int endpos) {
  231. string input = ValidateString(text, "text");
  232. pos = FixPosition(input, pos);
  233. endpos = FixPosition(input, endpos);
  234. return RE_Match.makeMatch(
  235. _re.Match(input.Substring(0, endpos), pos),
  236. this,
  237. input,
  238. pos,
  239. endpos);
  240. }
  241. public RE_Match search(object text) {
  242. string input = ValidateString(text, "text");
  243. return RE_Match.make(_re.Match(input), this, input);
  244. }
  245. public RE_Match search(object text, int pos) {
  246. string input = ValidateString(text, "text");
  247. return RE_Match.make(_re.Match(input, pos, input.Length - pos), this, input);
  248. }
  249. public RE_Match search(object text, int pos, int endpos) {
  250. string input = ValidateString(text, "text");
  251. return RE_Match.make(_re.Match(input, pos, Math.Max(endpos - pos, 0)), this, input);
  252. }
  253. public object findall(CodeContext/*!*/ context, string @string) {
  254. return findall(context, @string, 0, null);
  255. }
  256. public object findall(CodeContext/*!*/ context, string @string, int pos) {
  257. return findall(context, @string, pos, null);
  258. }
  259. public object findall(CodeContext/*!*/ context, object @string, int pos, object endpos) {
  260. MatchCollection mc = FindAllWorker(context, ValidateString(@string, "text"), pos, endpos);
  261. return FixFindAllMatch(this, mc);
  262. }
  263. internal MatchCollection FindAllWorker(CodeContext/*!*/ context, string str, int pos, object endpos) {
  264. string against = str;
  265. if (endpos != null) {
  266. int end = PythonContext.GetContext(context).ConvertToInt32(endpos);
  267. against = against.Substring(0, Math.Max(end, 0));
  268. }
  269. return _re.Matches(against, pos);
  270. }
  271. public object finditer(CodeContext/*!*/ context, object @string) {
  272. string input = ValidateString(@string, "string");
  273. return MatchIterator(FindAllWorker(context, input, 0, input.Length), this, input);
  274. }
  275. public object finditer(CodeContext/*!*/ context, object @string, int pos) {
  276. string input = ValidateString(@string, "string");
  277. return MatchIterator(FindAllWorker(context, input, pos, input.Length), this, input);
  278. }
  279. public object finditer(CodeContext/*!*/ context, object @string, int pos, int endpos) {
  280. string input = ValidateString(@string, "string");
  281. return MatchIterator(FindAllWorker(context, input, pos, endpos), this, input);
  282. }
  283. public object split(object @string) {
  284. return split(@string, 0);
  285. }
  286. public object split(object @string, int maxsplit) {
  287. List result = new List();
  288. // fast path for negative maxSplit ( == "make no splits")
  289. if (maxsplit < 0)
  290. result.AddNoLock(@string);
  291. else {
  292. // iterate over all matches
  293. string theStr = ValidateString(@string, "string");
  294. MatchCollection matches = _re.Matches(theStr);
  295. int lastPos = 0; // is either start of the string, or first position *after* the last match
  296. int nSplits = 0; // how many splits have occurred?
  297. foreach (Match m in matches) {
  298. if (m.Length > 0) {
  299. // add substring from lastPos to beginning of current match
  300. result.AddNoLock(theStr.Substring(lastPos, m.Index - lastPos));
  301. // if there are subgroups of the match, add their match or None
  302. if (m.Groups.Count > 1)
  303. for (int i = 1; i < m.Groups.Count; i++)
  304. if (m.Groups[i].Success)
  305. result.AddNoLock(m.Groups[i].Value);
  306. else
  307. result.AddNoLock(null);
  308. // update lastPos, nSplits
  309. lastPos = m.Index + m.Length;
  310. nSplits++;
  311. if (nSplits == maxsplit)
  312. break;
  313. }
  314. }
  315. // add tail following last match
  316. result.AddNoLock(theStr.Substring(lastPos));
  317. }
  318. return result;
  319. }
  320. public string sub(CodeContext/*!*/ context, object repl, object @string) {
  321. return sub(context, repl, ValidateString(@string, "string"), Int32.MaxValue);
  322. }
  323. public string sub(CodeContext/*!*/ context, object repl, object @string, int count) {
  324. if (repl == null) throw PythonOps.TypeError("NoneType is not valid repl");
  325. // if 'count' is omitted or 0, all occurrences are replaced
  326. if (count == 0) count = Int32.MaxValue;
  327. string replacement = repl as string;
  328. if (replacement == null) {
  329. if (repl is ExtensibleString) {
  330. replacement = (repl as ExtensibleString).Value;
  331. }
  332. }
  333. Match prev = null;
  334. string input = ValidateString(@string, "string");
  335. return _re.Replace(
  336. input,
  337. delegate(Match match) {
  338. // from the docs: Empty matches for the pattern are replaced
  339. // only when not adjacent to a previous match
  340. if (String.IsNullOrEmpty(match.Value) && prev != null &&
  341. (prev.Index + prev.Length) == match.Index) {
  342. return "";
  343. };
  344. prev = match;
  345. if (replacement != null) return UnescapeGroups(match, replacement);
  346. return PythonCalls.Call(context, repl, RE_Match.make(match, this, input)) as string;
  347. },
  348. count);
  349. }
  350. public object subn(CodeContext/*!*/ context, object repl, string @string) {
  351. return subn(context, repl, @string, Int32.MaxValue);
  352. }
  353. public object subn(CodeContext/*!*/ context, object repl, object @string, int count) {
  354. if (repl == null) throw PythonOps.TypeError("NoneType is not valid repl");
  355. // if 'count' is omitted or 0, all occurrences are replaced
  356. if (count == 0) count = Int32.MaxValue;
  357. int totalCount = 0;
  358. string res;
  359. string replacement = repl as string;
  360. if (replacement == null) {
  361. if (repl is ExtensibleString) {
  362. replacement = (repl as ExtensibleString).Value;
  363. }
  364. }
  365. Match prev = null;
  366. string input = ValidateString(@string, "string");
  367. res = _re.Replace(
  368. input,
  369. delegate(Match match) {
  370. // from the docs: Empty matches for the pattern are replaced
  371. // only when not adjacent to a previous match
  372. if (String.IsNullOrEmpty(match.Value) && prev != null &&
  373. (prev.Index + prev.Length) == match.Index) {
  374. return "";
  375. };
  376. prev = match;
  377. totalCount++;
  378. if (replacement != null) return UnescapeGroups(match, replacement);
  379. return PythonCalls.Call(context, repl, RE_Match.make(match, this, input)) as string;
  380. },
  381. count);
  382. return PythonTuple.MakeTuple(res, totalCount);
  383. }
  384. public int flags {
  385. get {
  386. return _compileFlags;
  387. }
  388. }
  389. public PythonDictionary groupindex {
  390. get {
  391. if (_groups == null) {
  392. PythonDictionary d = new PythonDictionary();
  393. string[] names = _re.GetGroupNames();
  394. int[] nums = _re.GetGroupNumbers();
  395. for (int i = 1; i < names.Length; i++) {
  396. if (Char.IsDigit(names[i][0])) continue; // skip numeric names
  397. d[names[i]] = nums[i];
  398. }
  399. _groups = d;
  400. }
  401. return _groups;
  402. }
  403. }
  404. public string pattern {
  405. get {
  406. return _pre.UserPattern;
  407. }
  408. }
  409. #region IWeakReferenceable Members
  410. WeakRefTracker IWeakReferenceable.GetWeakRef() {
  411. return _weakRefTracker;
  412. }
  413. bool IWeakReferenceable.SetWeakRef(WeakRefTracker value) {
  414. _weakRefTracker = value;
  415. return true;
  416. }
  417. void IWeakReferenceable.SetFinalizer(WeakRefTracker value) {
  418. ((IWeakReferenceable)this).SetWeakRef(value);
  419. }
  420. #endregion
  421. }
  422. [PythonType]
  423. public class RE_Match {
  424. RE_Pattern _pattern;
  425. private Match _m;
  426. private string _text;
  427. private int _lastindex = -1;
  428. private int _pos, _endPos;
  429. #region Internal makers
  430. internal static RE_Match make(Match m, RE_Pattern pattern, string input) {
  431. if (m.Success) return new RE_Match(m, pattern, input, 0, input.Length);
  432. return null;
  433. }
  434. internal static RE_Match make(Match m, RE_Pattern pattern, string input, int offset, int endpos) {
  435. if (m.Success) return new RE_Match(m, pattern, input, offset, endpos);
  436. return null;
  437. }
  438. internal static RE_Match makeMatch(Match m, RE_Pattern pattern, string input, int offset, int endpos) {
  439. if (m.Success && m.Index == offset) return new RE_Match(m, pattern, input, offset, endpos);
  440. return null;
  441. }
  442. #endregion
  443. #region Public ctors
  444. public RE_Match(Match m, RE_Pattern pattern, string text) {
  445. _m = m;
  446. _pattern = pattern;
  447. _text = text;
  448. }
  449. public RE_Match(Match m, RE_Pattern pattern, string text, int pos, int endpos) {
  450. _m = m;
  451. _pattern = pattern;
  452. _text = text;
  453. _pos = pos;
  454. _endPos = endpos;
  455. }
  456. #endregion
  457. // public override bool __nonzero__() {
  458. // return m.Success;
  459. // }
  460. #region Public API Surface
  461. public int end() {
  462. return _m.Index + _m.Length;
  463. }
  464. public int start() {
  465. return _m.Index;
  466. }
  467. public int start(object group) {
  468. int grpIndex = GetGroupIndex(group);
  469. if (!_m.Groups[grpIndex].Success) {
  470. return -1;
  471. }
  472. return _m.Groups[grpIndex].Index;
  473. }
  474. public int end(object group) {
  475. int grpIndex = GetGroupIndex(group);
  476. if (!_m.Groups[grpIndex].Success) {
  477. return -1;
  478. }
  479. return _m.Groups[grpIndex].Index + _m.Groups[grpIndex].Length;
  480. }
  481. public object group(object index, params object[] additional) {
  482. if (additional.Length == 0) return group(index);
  483. object[] res = new object[additional.Length + 1];
  484. res[0] = _m.Groups[GetGroupIndex(index)].Success ? _m.Groups[GetGroupIndex(index)].Value : null;
  485. for (int i = 1; i < res.Length; i++) {
  486. int grpIndex = GetGroupIndex(additional[i - 1]);
  487. res[i] = _m.Groups[grpIndex].Success ? _m.Groups[grpIndex].Value : null;
  488. }
  489. return PythonTuple.MakeTuple(res);
  490. }
  491. public object group(object index) {
  492. int pos = GetGroupIndex(index);
  493. Group g = _m.Groups[pos];
  494. return g.Success ? g.Value : null;
  495. }
  496. public object group() {
  497. return group(0);
  498. }
  499. public object groups() {
  500. return groups(null);
  501. }
  502. public object groups(object @default) {
  503. object[] ret = new object[_m.Groups.Count - 1];
  504. for (int i = 1; i < _m.Groups.Count; i++) {
  505. if (!_m.Groups[i].Success) {
  506. ret[i - 1] = @default;
  507. } else {
  508. ret[i - 1] = _m.Groups[i].Value;
  509. }
  510. }
  511. return PythonTuple.MakeTuple(ret);
  512. }
  513. public object expand(object template) {
  514. string strTmp = ValidateString(template, "template");
  515. StringBuilder res = new StringBuilder();
  516. for (int i = 0; i < strTmp.Length; i++) {
  517. if (strTmp[i] != '\\') { res.Append(strTmp[i]); continue; }
  518. if (++i == strTmp.Length) { res.Append(strTmp[i - 1]); continue; }
  519. if (Char.IsDigit(strTmp[i])) {
  520. AppendGroup(res, (int)(strTmp[i] - '0'));
  521. } else if (strTmp[i] == 'g') {
  522. if (++i == strTmp.Length) { res.Append("\\g"); return res.ToString(); }
  523. if (strTmp[i] != '<') {
  524. res.Append("\\g<"); continue;
  525. } else { // '<'
  526. StringBuilder name = new StringBuilder();
  527. i++;
  528. while (strTmp[i] != '>' && i < strTmp.Length) {
  529. name.Append(strTmp[i++]);
  530. }
  531. AppendGroup(res, _pattern._re.GroupNumberFromName(name.ToString()));
  532. }
  533. } else {
  534. switch (strTmp[i]) {
  535. case 'n': res.Append('\n'); break;
  536. case 'r': res.Append('\r'); break;
  537. case 't': res.Append('\t'); break;
  538. case '\\': res.Append('\\'); break;
  539. }
  540. }
  541. }
  542. return res.ToString();
  543. }
  544. public object groupdict() {
  545. return groupdict(null);
  546. }
  547. private static bool IsGroupNumber(string name) {
  548. foreach (char c in name) {
  549. if (!Char.IsNumber(c)) return false;
  550. }
  551. return true;
  552. }
  553. public object groupdict(object value) {
  554. string[] groupNames = this._pattern._re.GetGroupNames();
  555. Debug.Assert(groupNames.Length == this._m.Groups.Count);
  556. PythonDictionary d = new PythonDictionary();
  557. for (int i = 0; i < groupNames.Length; i++) {
  558. if (IsGroupNumber(groupNames[i])) continue; // python doesn't report group numbers
  559. if (_m.Groups[i].Captures.Count != 0) {
  560. d[groupNames[i]] = _m.Groups[i].Value;
  561. } else {
  562. d[groupNames[i]] = value;
  563. }
  564. }
  565. return d;
  566. }
  567. public object span() {
  568. return PythonTuple.MakeTuple(this.start(), this.end());
  569. }
  570. public object span(object group) {
  571. return PythonTuple.MakeTuple(this.start(group), this.end(group));
  572. }
  573. public int pos {
  574. get {
  575. return _pos;
  576. }
  577. }
  578. public int endpos {
  579. get {
  580. return _endPos;
  581. }
  582. }
  583. public string @string {
  584. get {
  585. return _text;
  586. }
  587. }
  588. public object regs {
  589. get {
  590. object[] res = new object[_m.Groups.Count];
  591. for (int i = 0; i < res.Length; i++) {
  592. res[i] = PythonTuple.MakeTuple(start(i), end(i));
  593. }
  594. return PythonTuple.MakeTuple(res);
  595. }
  596. }
  597. public object re {
  598. get {
  599. return _pattern;
  600. }
  601. }
  602. public object lastindex {
  603. get {
  604. // -1 : initial value of lastindex
  605. // 0 : no match found
  606. //other : the true lastindex
  607. // Match.Groups contains "lower" level matched groups, which has to be removed
  608. if (_lastindex == -1) {
  609. int i = 1;
  610. while (i < _m.Groups.Count) {
  611. if (_m.Groups[i].Success) {
  612. _lastindex = i;
  613. int start = _m.Groups[i].Index;
  614. int end = start + _m.Groups[i].Length;
  615. i++;
  616. // skip any group which fall into the range [start, end],
  617. // no matter match succeed or fail
  618. while (i < _m.Groups.Count && (_m.Groups[i].Index < end)) {
  619. i++;
  620. }
  621. } else {
  622. i++;
  623. }
  624. }
  625. if (_lastindex == -1) {
  626. _lastindex = 0;
  627. }
  628. }
  629. if (_lastindex == 0) {
  630. return null;
  631. } else {
  632. return _lastindex;
  633. }
  634. }
  635. }
  636. public object lastgroup {
  637. get {
  638. if (lastindex == null) return null;
  639. // when group was not explicitly named, RegEx assigns the number as name
  640. // This is different from C-Python, which returns None in such cases
  641. return this._pattern._re.GroupNameFromNumber((int)lastindex);
  642. }
  643. }
  644. #endregion
  645. #region Private helper functions
  646. private void AppendGroup(StringBuilder sb, int index) {
  647. sb.Append(_m.Groups[index].Value);
  648. }
  649. private int GetGroupIndex(object group) {
  650. int grpIndex;
  651. if (!Converter.TryConvertToInt32(group, out grpIndex)) {
  652. grpIndex = _pattern._re.GroupNumberFromName(ValidateString(group, "group"));
  653. }
  654. if (grpIndex < 0 || grpIndex >= _m.Groups.Count) {
  655. throw PythonOps.IndexError("no such group");
  656. }
  657. return grpIndex;
  658. }
  659. #endregion
  660. }
  661. #endregion
  662. #region Private helper functions
  663. private static IEnumerator MatchIterator(MatchCollection matches, RE_Pattern pattern, string input) {
  664. for (int i = 0; i < matches.Count; i++) {
  665. yield return RE_Match.make(matches[i], pattern, input, 0, input.Length);
  666. }
  667. }
  668. private static RegexOptions FlagsToOption(int flags) {
  669. RegexOptions opts = RegexOptions.None;
  670. if ((flags & (int)IGNORECASE) != 0) opts |= RegexOptions.IgnoreCase;
  671. if ((flags & (int)MULTILINE) != 0) opts |= RegexOptions.Multiline;
  672. if (((flags & (int)LOCALE)) == 0) opts &= (~RegexOptions.CultureInvariant);
  673. if ((flags & (int)DOTALL) != 0) opts |= RegexOptions.Singleline;
  674. if ((flags & (int)VERBOSE) != 0) opts |= RegexOptions.IgnorePatternWhitespace;
  675. return opts;
  676. }
  677. private static int OptionToFlags(RegexOptions options) {
  678. int flags = 0;
  679. if ((options & RegexOptions.IgnoreCase) != 0) {
  680. flags |= IGNORECASE;
  681. }
  682. if ((options & RegexOptions.Multiline) != 0) {
  683. flags |= MULTILINE;
  684. }
  685. if ((options & RegexOptions.CultureInvariant) == 0) {
  686. flags |= LOCALE;
  687. }
  688. if ((options & RegexOptions.Singleline) != 0) {
  689. flags |= DOTALL;
  690. }
  691. if ((options & RegexOptions.IgnorePatternWhitespace) != 0) {
  692. flags |= VERBOSE;
  693. }
  694. return flags;
  695. }
  696. internal class ParsedRegex {
  697. public ParsedRegex(string pattern) {
  698. this.UserPattern = pattern;
  699. }
  700. public string UserPattern;
  701. public string Pattern;
  702. public RegexOptions Options = RegexOptions.CultureInvariant;
  703. }
  704. private static char[] _preParsedChars = new[] { '(', '{', '[', ']' };
  705. /// <summary>
  706. /// Preparses a regular expression text returning a ParsedRegex class
  707. /// that can be used for further regular expressions.
  708. /// </summary>
  709. private static ParsedRegex PreParseRegex(CodeContext/*!*/ context, string pattern) {
  710. ParsedRegex res = new ParsedRegex(pattern);
  711. //string newPattern;
  712. int cur = 0, nameIndex;
  713. int curGroup = 0;
  714. bool isCharList = false;
  715. bool containsNamedGroup = false;
  716. for (; ; ) {
  717. nameIndex = pattern.IndexOfAny(_preParsedChars, cur);
  718. if (nameIndex > 0 && pattern[nameIndex - 1] == '\\') {
  719. int curIndex = nameIndex - 2;
  720. int backslashCount = 1;
  721. while (curIndex >= 0 && pattern[curIndex] == '\\') {
  722. backslashCount++;
  723. curIndex--;
  724. }
  725. // odd number of back slashes, this is an optional
  726. // paren that we should ignore.
  727. if ((backslashCount & 0x01) != 0) {
  728. cur++;
  729. continue;
  730. }
  731. }
  732. if (nameIndex == -1) break;
  733. if (nameIndex == pattern.Length - 1) break;
  734. switch (pattern[nameIndex]) {
  735. case '{':
  736. if (pattern[++nameIndex] == ',') {
  737. // no beginning specified for the n-m quntifier, add the
  738. // default 0 value.
  739. pattern = pattern.Insert(nameIndex, "0");
  740. }
  741. break;
  742. case '[':
  743. nameIndex++;
  744. isCharList = true;
  745. break;
  746. case ']':
  747. nameIndex++;
  748. isCharList = false;
  749. break;
  750. case '(':
  751. // make sure we're not dealing with [(]
  752. if (!isCharList) {
  753. switch (pattern[++nameIndex]) {
  754. case '?':
  755. // extension syntax
  756. if (nameIndex == pattern.Length - 1) throw PythonExceptions.CreateThrowable(error(context), "unexpected end of regex");
  757. switch (pattern[++nameIndex]) {
  758. case 'P':
  759. // named regex, .NET doesn't expect the P so we'll remove it;
  760. // also, once we see a named group i.e. ?P then we need to start artificially
  761. // naming all unnamed groups from then on---this is to get around the fact that
  762. // the CLR RegEx support orders all the unnamed groups before all the named
  763. // groups, even if the named groups are before the unnamed ones in the pattern;
  764. // the artificial naming preserves the order of the groups and thus the order of
  765. // the matches
  766. if (nameIndex + 1 < pattern.Length && pattern[nameIndex + 1] == '=') {
  767. // match whatever was previously matched by the named group
  768. // remove the (?P=
  769. pattern = pattern.Remove(nameIndex - 2, 4);
  770. pattern = pattern.Insert(nameIndex - 2, "\\k<");
  771. int tmpIndex = nameIndex;
  772. while (tmpIndex < pattern.Length && pattern[tmpIndex] != ')')
  773. tmpIndex++;
  774. if (tmpIndex == pattern.Length) throw PythonExceptions.CreateThrowable(error(context), "unexpected end of regex");
  775. pattern = pattern.Substring(0, tmpIndex) + ">" + pattern.Substring(tmpIndex + 1);
  776. } else {
  777. containsNamedGroup = true;
  778. pattern = pattern.Remove(nameIndex, 1);
  779. }
  780. break;
  781. case 'i': res.Options |= RegexOptions.IgnoreCase; break;
  782. case 'L':
  783. res.Options &= ~(RegexOptions.CultureInvariant);
  784. RemoveOption(ref pattern, ref nameIndex);
  785. break;
  786. case 'm': res.Options |= RegexOptions.Multiline; break;
  787. case 's': res.Options |= RegexOptions.Singleline; break;
  788. case 'u':
  789. // specify unicode; not relevant and not valid under .NET as we're always unicode
  790. // -- so the option needs to be removed
  791. RemoveOption(ref pattern, ref nameIndex);
  792. break;
  793. case 'x': res.Options |= RegexOptions.IgnorePatternWhitespace; break;
  794. case ':': break; // non-capturing
  795. case '=': break; // look ahead assertion
  796. case '<': break; // positive look behind assertion
  797. case '!': break; // negative look ahead assertion
  798. case '#': break; // inline comment
  799. case '(': // yes/no if group exists, we don't support this
  800. default: throw PythonExceptions.CreateThrowable(error(context), "Unrecognized extension " + pattern[nameIndex]);
  801. }
  802. break;
  803. default:
  804. // just another group
  805. curGroup++;
  806. if (containsNamedGroup) {
  807. // need to name this unnamed group
  808. pattern = pattern.Insert(nameIndex, "?<Named" + GetRandomString() + ">");
  809. }
  810. break;
  811. }
  812. } else {
  813. nameIndex++;
  814. }
  815. break;
  816. }
  817. cur = nameIndex;
  818. }
  819. cur = 0;
  820. for (; ; ) {
  821. nameIndex = pattern.IndexOf('\\', cur);
  822. if (nameIndex == -1 || nameIndex == pattern.Length - 1) break;
  823. cur = ++nameIndex;
  824. char curChar = pattern[cur];
  825. switch (curChar) {
  826. case 'x':
  827. case 'u':
  828. case 'a':
  829. case 'b':
  830. case 'e':
  831. case 'f':
  832. case 'k':
  833. case 'n':
  834. case 'r':
  835. case 't':
  836. case 'v':
  837. case 'c':
  838. case 's':
  839. case 'W':
  840. case 'w':
  841. case 'p':
  842. case 'P':
  843. case 'S':
  844. case 'd':
  845. case 'D':
  846. case 'Z':
  847. case '\\':
  848. // known escape sequences, leave escaped.
  849. break;
  850. default:
  851. System.Globalization.UnicodeCategory charClass = Char.GetUnicodeCategory(curChar);
  852. switch (charClass) {
  853. // recognized word characters, always unescape.
  854. case System.Globalization.UnicodeCategory.ModifierLetter:
  855. case System.Globalization.UnicodeCategory.LowercaseLetter:
  856. case System.Globalization.UnicodeCategory.UppercaseLetter:
  857. case System.Globalization.UnicodeCategory.TitlecaseLetter:
  858. case System.Globalization.UnicodeCategory.OtherLetter:
  859. case System.Globalization.UnicodeCategory.LetterNumber:
  860. case System.Globalization.UnicodeCategory.OtherNumber:
  861. case System.Globalization.UnicodeCategory.ConnectorPunctuation:
  862. pattern = pattern.Remove(nameIndex - 1, 1);
  863. cur--;
  864. break;
  865. case System.Globalization.UnicodeCategory.DecimalDigitNumber:
  866. // actually don't want to unescape '\1', '\2' etc. which are references to groups
  867. break;
  868. }
  869. break;
  870. }
  871. if (++cur >= pattern.Length) {
  872. break;
  873. }
  874. }
  875. res.Pattern = pattern;
  876. return res;
  877. }
  878. private static void RemoveOption(ref string pattern, ref int nameIndex) {
  879. if (pattern[nameIndex - 1] == '?' && nameIndex < (pattern.Length - 1) && pattern[nameIndex + 1] == ')') {
  880. pattern = pattern.Remove(nameIndex - 2, 4);
  881. nameIndex -= 2;
  882. } else {
  883. pattern = pattern.Remove(nameIndex--, 1);
  884. }
  885. }
  886. private static string GetRandomString() {
  887. return r.Next(Int32.MaxValue / 2, Int32.MaxValue).ToString();
  888. }
  889. private static string UnescapeGroups(Match m, string text) {
  890. for (int i = 0; i < text.Length; i++) {
  891. if (text[i] == '\\') {
  892. StringBuilder sb = new StringBuilder(text, 0, i, text.Length);
  893. do {
  894. if (text[i] == '\\') {
  895. i++;
  896. if (i == text.Length) { sb.Append('\\'); break; }
  897. switch (text[i]) {
  898. case 'n': sb.Append('\n'); break;
  899. case 'r': sb.Append('\r'); break;
  900. case 't': sb.Append('\t'); break;
  901. case '\\': sb.Append('\\'); break;
  902. case '\'': sb.Append('\''); break;
  903. case 'b': sb.Append('\b'); break;
  904. case 'g':
  905. // \g<#>, \g<name> need to be substituted by the groups they
  906. // matched
  907. if (text[i + 1] == '<') {
  908. int anglebrkStart = i + 1;
  909. int anglebrkEnd = text.IndexOf('>', i + 2);
  910. if (anglebrkEnd != -1) {
  911. // grab the # or 'name' of the group between '< >'
  912. int lengrp = anglebrkEnd - (anglebrkStart + 1);
  913. string grp = text.Substring(anglebrkStart + 1, lengrp);
  914. int num;
  915. Group g;
  916. if (StringUtils.TryParseInt32(grp, out num)) {
  917. g = m.Groups[num];
  918. if (String.IsNullOrEmpty(g.Value)) {
  919. throw PythonOps.IndexError("unknown group reference");
  920. }
  921. sb.Append(g.Value);
  922. } else {
  923. g = m.Groups[grp];
  924. if (String.IsNullOrEmpty(g.Value)) {
  925. throw PythonOps.IndexError("unknown group reference");
  926. }
  927. sb.Append(g.Value);
  928. }
  929. i = anglebrkEnd;
  930. }
  931. break;
  932. }
  933. sb.Append('\\');
  934. sb.Append((char)text[i]);
  935. break;
  936. default:
  937. if (Char.IsDigit(text[i]) && text[i] <= '7') {
  938. int val = 0;
  939. int digitCount = 0;
  940. while (i < text.Length && Char.IsDigit(text[i]) && text[i] <= '7') {
  941. digitCount++;
  942. val += val * 8 + (text[i] - '0');
  943. i++;
  944. }
  945. i--;
  946. if (digitCount == 1 && val > 0 && val < m.Groups.Count) {
  947. sb.Append(m.Groups[val].Value);
  948. } else {
  949. sb.Append((char)val);
  950. }
  951. } else {
  952. sb.Append('\\');
  953. sb.Append((char)text[i]);
  954. }
  955. break;
  956. }
  957. } else {
  958. sb.Append(text[i]);
  959. }
  960. } while (++i < text.Length);
  961. r

Large files files are truncated, but you can click here to view the full file