PageRenderTime 61ms CodeModel.GetById 20ms RepoModel.GetById 0ms app.codeStats 1ms

/IronPython_1_1/Src/IronPython/Modules/re.cs

#
C# | 1110 lines | 871 code | 153 blank | 86 comment | 195 complexity | e1db274a1727b3ce43e0ec3092baf67f MD5 | raw file
Possible License(s): GPL-2.0, MPL-2.0-no-copyleft-exception, CPL-1.0, CC-BY-SA-3.0, BSD-3-Clause, ISC, AGPL-3.0, LGPL-2.1, Apache-2.0
  1. /* ****************************************************************************
  2. *
  3. * Copyright (c) Microsoft Corporation.
  4. *
  5. * This source code is subject to terms and conditions of the Microsoft Public
  6. * License. A copy of the license can be found in the License.html file at the
  7. * root of this distribution. If you cannot locate the Microsoft Public
  8. * License, please send an email to dlr@microsoft.com. By using this source
  9. * code in any fashion, you are agreeing to be bound by the terms of the
  10. * Microsoft Public License.
  11. *
  12. * You must not remove this notice, or any other, from this software.
  13. *
  14. * ***************************************************************************/
  15. using System;
  16. using System.Text;
  17. using System.Text.RegularExpressions;
  18. using System.Collections;
  19. using System.Collections.Generic;
  20. using System.Diagnostics;
  21. using IronPython.Runtime;
  22. using IronPython.Runtime.Operations;
  23. using IronPython.Runtime.Exceptions;
  24. using System.Runtime.InteropServices;
  25. [assembly: PythonModule("re", typeof(IronPython.Modules.PythonRegex))]
  26. namespace IronPython.Modules {
  27. /// <summary>
  28. /// Python regular expression module.
  29. /// </summary>
  30. [PythonType("re")]
  31. public static class PythonRegex {
  32. #region CONSTANTS
  33. // short forms
  34. public static object I = 0x02;
  35. public static object L = 0x04;
  36. public static object M = 0x08;
  37. public static object S = 0x10;
  38. public static object U = 0x20;
  39. public static object X = 0x40;
  40. // long forms
  41. public static object IGNORECASE = 0x02;
  42. public static object LOCALE = 0x04;
  43. public static object MULTILINE = 0x08;
  44. public static object DOTALL = 0x10;
  45. public static object UNICODE = 0x20;
  46. public static object VERBOSE = 0x40;
  47. #endregion
  48. #region Public API Surface
  49. [PythonName("compile")]
  50. public static RE_Pattern Compile(object pattern) {
  51. try {
  52. return new RE_Pattern(ValidatePattern(pattern));
  53. } catch (ArgumentException e) {
  54. throw ExceptionConverter.CreateThrowable(error, e.Message);
  55. }
  56. }
  57. [PythonName("compile")]
  58. public static RE_Pattern Compile(object pattern, object flags) {
  59. try {
  60. return new RE_Pattern(ValidatePattern(pattern), Converter.ConvertToInt32(flags));
  61. } catch (ArgumentException e) {
  62. throw ExceptionConverter.CreateThrowable(error, e.Message);
  63. }
  64. }
  65. public static string engine = "cli reg ex";
  66. public static object error = ExceptionConverter.CreatePythonException("error", "re");
  67. [PythonName("escape")]
  68. public static string Escape(string text) {
  69. if (text == null) throw Ops.TypeError("text must not be None");
  70. for (int i = 0; i < text.Length; i++) {
  71. if (!Char.IsLetterOrDigit(text[i])) {
  72. StringBuilder sb = new StringBuilder(text, 0, i, text.Length);
  73. char ch = text[i];
  74. do {
  75. sb.Append('\\');
  76. sb.Append(ch);
  77. i++;
  78. int last = i;
  79. while (i < text.Length) {
  80. ch = text[i];
  81. if (!Char.IsLetterOrDigit(ch)) {
  82. break;
  83. }
  84. i++;
  85. }
  86. sb.Append(text, last, i - last);
  87. } while (i < text.Length);
  88. return sb.ToString();
  89. }
  90. }
  91. return text;
  92. }
  93. [PythonName("findall")]
  94. public static object FindAll(object pattern, string @string) {
  95. return FindAll(pattern, @string, 0);
  96. }
  97. [PythonName("findall")]
  98. public static object FindAll(object pattern, string @string, int flags) {
  99. RE_Pattern pat = new RE_Pattern(ValidatePattern(pattern), flags);
  100. ValidateString(@string, "string");
  101. MatchCollection mc = pat.FindAllWorker(@string, 0, @string.Length);
  102. return FixFindAllMatch(pat, mc);
  103. }
  104. private static object FixFindAllMatch(RE_Pattern pat, MatchCollection mc) {
  105. object[] matches = new object[mc.Count];
  106. int numgrps = pat.re.GetGroupNumbers().Length;
  107. for (int i = 0; i < mc.Count; i++) {
  108. if (numgrps > 2) { // CLR gives us a "bonus" group of 0 - the entire expression
  109. // at this point we have more than one group in the pattern;
  110. // need to return a list of tuples in this case
  111. // for each match item in the matchcollection, create a tuple representing what was matched
  112. // e.g. findall("(\d+)|(\w+)", "x = 99y") == [('', 'x'), ('99', ''), ('', 'y')]
  113. // in the example above, ('', 'x') did not match (\d+) as indicated by '' but did
  114. // match (\w+) as indicated by 'x' and so on...
  115. int k = 0;
  116. List<object> tpl = new List<object>();
  117. foreach (Group g in mc[i].Groups) {
  118. // here also the CLR gives us a "bonus" match as the first item which is the
  119. // group that was actually matched in the tuple e.g. we get 'x', '', 'x' for
  120. // the first match object...so we'll skip the first item when creating the
  121. // tuple
  122. if (k++ != 0) {
  123. tpl.Add(g.Value);
  124. }
  125. }
  126. matches[i] = Tuple.Make(tpl);
  127. } else if (numgrps == 2) {
  128. // at this point we have exactly one group in the pattern (including the "bonus" one given
  129. // by the CLR
  130. // skip the first match since that contains the entire match and not the group match
  131. // e.g. re.findall(r"(\w+)\s+fish\b", "green fish") will have "green fish" in the 0
  132. // index and "green" as the (\w+) group match
  133. matches[i] = mc[i].Groups[1].Value;
  134. } else {
  135. matches[i] = mc[i].Value;
  136. }
  137. }
  138. return new List(matches);
  139. }
  140. [PythonName("finditer")]
  141. public static object FindIter(object pattern, object @string) {
  142. return FindIter(pattern, @string, 0);
  143. }
  144. [PythonName("finditer")]
  145. public static object FindIter(object pattern, object @string, int flags) {
  146. RE_Pattern pat = new RE_Pattern(ValidatePattern(pattern), flags);
  147. string str = ValidateString(@string, "string");
  148. return MatchIterator(pat.FindAllWorker(str, 0, str.Length), pat, str);
  149. }
  150. [PythonName("match")]
  151. public static object Match(object pattern, object @string) {
  152. return Match(pattern, @string, 0);
  153. }
  154. [PythonName("match")]
  155. public static object Match(object pattern, object @string, int flags) {
  156. return new RE_Pattern(ValidatePattern(pattern), flags).Match(ValidateString(@string, "string"));
  157. }
  158. [PythonName("search")]
  159. public static object Search(object pattern, object @string) {
  160. return Search(pattern, @string, 0);
  161. }
  162. [PythonName("search")]
  163. public static object Search(object pattern, object @string, int flags) {
  164. return new RE_Pattern(ValidatePattern(pattern), flags).Search(ValidateString(@string, "string"));
  165. }
  166. [PythonName("split")]
  167. public static object Split(object pattern, object @string) {
  168. return Split(ValidatePattern(pattern), ValidateString(@string, "string"), 0);
  169. }
  170. [PythonName("split")]
  171. public static object Split(object pattern, object @string, int maxsplit) {
  172. return new RE_Pattern(ValidatePattern(pattern)).Split(ValidateString(@string, "string"),
  173. maxsplit);
  174. }
  175. [PythonName("sub")]
  176. public static object Substitute(object pattern, object repl, object @string) {
  177. return Substitute(pattern, repl, @string, Int32.MaxValue);
  178. }
  179. [PythonName("sub")]
  180. public static object Substitute(object pattern, object repl, object @string, int count) {
  181. return new RE_Pattern(ValidatePattern(pattern)).Substitute(repl, ValidateString(@string, "string"), count);
  182. }
  183. [PythonName("subn")]
  184. public static object SubGetCount(object pattern, object repl, object @string) {
  185. return SubGetCount(pattern, repl, @string, Int32.MaxValue);
  186. }
  187. [PythonName("subn")]
  188. public static object SubGetCount(object pattern, object repl, object @string, int count) {
  189. return new RE_Pattern(ValidatePattern(pattern)).SubGetCount(repl, ValidateString(@string, "string"), count);
  190. }
  191. #endregion
  192. #region Public classes
  193. /// <summary>
  194. /// Compiled reg-ex pattern
  195. /// </summary>
  196. public class RE_Pattern : IWeakReferenceable {
  197. internal Regex re;
  198. private Dict groups;
  199. private int compileFlags;
  200. private WeakRefTracker weakRefTracker;
  201. internal ParsedRegex pre;
  202. public RE_Pattern(object pattern)
  203. : this(pattern, 0) {
  204. }
  205. public RE_Pattern(object pattern, int flags) {
  206. pre = PreParseRegex(ValidatePattern(pattern));
  207. try {
  208. RegexOptions opts = FlagsToOption(flags);
  209. this.re = new Regex(pre.Pattern, opts);
  210. } catch (ArgumentException e) {
  211. throw ExceptionConverter.CreateThrowable(error, e.Message);
  212. }
  213. this.compileFlags = flags;
  214. }
  215. [PythonName("match")]
  216. public RE_Match Match(object text) {
  217. string input = ValidateString(text, "text");
  218. return RE_Match.makeMatch(re.Match(input), this, input, 0, input.Length);
  219. }
  220. private static int FixPosition(string text, int position) {
  221. if (position < 0) return 0;
  222. if (position > text.Length) return text.Length;
  223. return position;
  224. }
  225. [PythonName("match")]
  226. public RE_Match Match(object text, int pos) {
  227. string input = ValidateString(text, "text");
  228. pos = FixPosition(input, pos);
  229. return RE_Match.makeMatch(re.Match(input, pos), this, input, pos, input.Length);
  230. }
  231. [PythonName("match")]
  232. public RE_Match Match(object text, [DefaultParameterValue(0)] int pos, int endpos) {
  233. string input = ValidateString(text, "text");
  234. pos = FixPosition(input, pos);
  235. endpos = FixPosition(input, endpos);
  236. return RE_Match.makeMatch(
  237. re.Match(input.Substring(0, endpos), pos),
  238. this,
  239. input,
  240. pos,
  241. endpos);
  242. }
  243. [PythonName("search")]
  244. public RE_Match Search(object text) {
  245. string input = ValidateString(text, "text");
  246. return RE_Match.make(re.Match(input), this, input);
  247. }
  248. [PythonName("search")]
  249. public RE_Match Search(object text, int pos) {
  250. string input = ValidateString(text, "text");
  251. return RE_Match.make(re.Match(input, pos, input.Length - pos), this, input);
  252. }
  253. [PythonName("search")]
  254. public RE_Match Search(object text, int pos, int endpos) {
  255. string input = ValidateString(text, "text");
  256. return RE_Match.make(re.Match(input, pos, Math.Max(endpos - pos, 0)), this, input);
  257. }
  258. [PythonName("findall")]
  259. public object FindAll(string @string) {
  260. return FindAll(@string, 0, null);
  261. }
  262. [PythonName("findall")]
  263. public object FindAll(string @string, int pos) {
  264. return FindAll(@string, pos, null);
  265. }
  266. [PythonName("findall")]
  267. public object FindAll(object @string, int pos, object endpos) {
  268. MatchCollection mc = FindAllWorker(ValidateString(@string, "text"), pos, endpos);
  269. return new List(FixFindAllMatch(this, mc));
  270. }
  271. internal MatchCollection FindAllWorker(string str, int pos, object endpos) {
  272. string against = str;
  273. if (endpos != null) {
  274. int end = Converter.ConvertToInt32(endpos);
  275. against = against.Substring(0, Math.Max(end, 0));
  276. }
  277. return re.Matches(against, pos);
  278. }
  279. [PythonName("finditer")]
  280. public object FindIter(object @string) {
  281. string input = ValidateString(@string, "string");
  282. return MatchIterator(FindAllWorker(input, 0, input.Length), this, input);
  283. }
  284. [PythonName("finditer")]
  285. public object FindIter(object @string, int pos) {
  286. string input = ValidateString(@string, "string");
  287. return MatchIterator(FindAllWorker(input, pos, input.Length), this, input);
  288. }
  289. [PythonName("finditer")]
  290. public object FindIter(object @string, int pos, int endpos) {
  291. string input = ValidateString(@string, "string");
  292. return MatchIterator(FindAllWorker(input, pos, endpos), this, input);
  293. }
  294. [PythonName("split")]
  295. public object Split(object @string) {
  296. return Split(@string, 0);
  297. }
  298. [PythonName("split")]
  299. public object Split(object @string, int maxsplit) {
  300. List result = new List();
  301. // fast path for negative maxSplit ( == "make no splits")
  302. if (maxsplit < 0)
  303. result.AddNoLock(@string);
  304. else {
  305. // iterate over all matches
  306. string theStr = ValidateString(@string, "string");
  307. MatchCollection matches = re.Matches(theStr);
  308. int lastPos = 0; // is either start of the string, or first position *after* the last match
  309. int nSplits = 0; // how many splits have occurred?
  310. foreach (Match m in matches) {
  311. if (m.Length > 0) {
  312. // add substring from lastPos to beginning of current match
  313. result.AddNoLock(theStr.Substring(lastPos, m.Index - lastPos));
  314. // if there are subgroups of the match, add their match or None
  315. if (m.Groups.Count > 1)
  316. for (int i = 1; i < m.Groups.Count; i++)
  317. if (m.Groups[i].Success)
  318. result.AddNoLock(m.Groups[i].Value);
  319. else
  320. result.AddNoLock(null);
  321. // update lastPos, nSplits
  322. lastPos = m.Index + m.Length;
  323. nSplits++;
  324. if (nSplits == maxsplit)
  325. break;
  326. }
  327. }
  328. // add tail following last match
  329. result.AddNoLock(theStr.Substring(lastPos));
  330. }
  331. return result;
  332. }
  333. [PythonName("sub")]
  334. public string Substitute(object repl, object @string) {
  335. return Substitute(repl, ValidateString(@string, "string"), Int32.MaxValue);
  336. }
  337. [PythonName("sub")]
  338. public string Substitute(object repl, object @string, int count) {
  339. if (repl == null) throw Ops.TypeError("NoneType is not valid repl");
  340. // if 'count' is omitted or 0, all occurrences are replaced
  341. if (count == 0) count = Int32.MaxValue;
  342. string replacement = repl as string;
  343. if (replacement == null) {
  344. if (repl is ExtensibleString) {
  345. replacement = (repl as ExtensibleString).Value;
  346. }
  347. }
  348. Match prev = null;
  349. string input = ValidateString(@string, "string");
  350. return re.Replace(
  351. input,
  352. delegate(Match match) {
  353. // from the docs: Empty matches for the pattern are replaced
  354. // only when not adjacent to a previous match
  355. if (String.IsNullOrEmpty(match.Value) && prev != null &&
  356. (prev.Index + prev.Length) == match.Index) {
  357. return "";
  358. };
  359. prev = match;
  360. if (replacement != null) return UnescapeGroups(match, replacement);
  361. return Ops.Call(repl, RE_Match.make(match, this, input)) as string;
  362. },
  363. count);
  364. }
  365. [PythonName("subn")]
  366. public object SubGetCount(object repl, string @string) {
  367. return SubGetCount(repl, @string, Int32.MaxValue);
  368. }
  369. [PythonName("subn")]
  370. public object SubGetCount(object repl, object @string, int count) {
  371. if (repl == null) throw Ops.TypeError("NoneType is not valid repl");
  372. // if 'count' is omitted or 0, all occurrences are replaced
  373. if (count == 0) count = Int32.MaxValue;
  374. int totalCount = 0;
  375. string res;
  376. string replacement = repl as string;
  377. if (replacement == null) {
  378. if (repl is ExtensibleString) {
  379. replacement = (repl as ExtensibleString).Value;
  380. }
  381. }
  382. Match prev = null;
  383. string input = ValidateString(@string, "string");
  384. res = re.Replace(
  385. input,
  386. delegate(Match match) {
  387. // from the docs: Empty matches for the pattern are replaced
  388. // only when not adjacent to a previous match
  389. if (String.IsNullOrEmpty(match.Value) && prev != null &&
  390. (prev.Index + prev.Length) == match.Index) {
  391. return "";
  392. };
  393. prev = match;
  394. totalCount++;
  395. if (replacement != null) return UnescapeGroups(match, replacement);
  396. return Ops.Call(repl, RE_Match.make(match, this, input)) as string;
  397. },
  398. count);
  399. return Tuple.MakeTuple(res, totalCount);
  400. }
  401. public int Flags {
  402. [PythonName("flags")]
  403. get {
  404. return compileFlags;
  405. }
  406. }
  407. public Dict GroupIndex {
  408. [PythonName("groupindex")]
  409. get {
  410. if (groups == null) {
  411. Dict d = new Dict();
  412. string[] names = re.GetGroupNames();
  413. int[] nums = re.GetGroupNumbers();
  414. for (int i = 1; i < names.Length; i++) {
  415. if (Char.IsDigit(names[i][0])) continue; // skip numeric names
  416. d[names[i]] = nums[i];
  417. }
  418. groups = d;
  419. }
  420. return groups;
  421. }
  422. }
  423. public string Pattern {
  424. [PythonName("pattern")]
  425. get {
  426. return pre.UserPattern;
  427. }
  428. }
  429. #region IWeakReferenceable Members
  430. public WeakRefTracker GetWeakRef() {
  431. return weakRefTracker;
  432. }
  433. public bool SetWeakRef(WeakRefTracker value) {
  434. weakRefTracker = value;
  435. return true;
  436. }
  437. public void SetFinalizer(WeakRefTracker value) {
  438. SetWeakRef(value);
  439. }
  440. #endregion
  441. }
  442. public class RE_Match {
  443. RE_Pattern pattern;
  444. private Match m;
  445. private string text;
  446. private int lastindex = -1;
  447. private int endPos;
  448. #region Internal makers
  449. internal static RE_Match make(Match m, RE_Pattern pattern, string input) {
  450. if (m.Success) return new RE_Match(m, pattern, input);
  451. return null;
  452. }
  453. internal static RE_Match makeMatch(Match m, RE_Pattern pattern, string input, int offset, int endPos) {
  454. if (m.Success && m.Index == offset) return new RE_Match(m, pattern, input, endPos);
  455. return null;
  456. }
  457. #endregion
  458. #region Public ctors
  459. public RE_Match(Match m, RE_Pattern pattern, string text) {
  460. this.m = m;
  461. this.pattern = pattern;
  462. this.text = text;
  463. }
  464. public RE_Match(Match m, RE_Pattern pattern, string text, int endpos) {
  465. this.m = m;
  466. this.pattern = pattern;
  467. this.text = text;
  468. this.endPos = endpos;
  469. }
  470. #endregion
  471. // public override bool __nonzero__() {
  472. // return m.Success;
  473. // }
  474. #region Public API Surface
  475. [PythonName("end")]
  476. public int End() {
  477. return m.Index + m.Length;
  478. }
  479. [PythonName("start")]
  480. public int Start() {
  481. return m.Index;
  482. }
  483. [PythonName("start")]
  484. public int Start(object group) {
  485. int grpIndex = GetGroupIndex(group);
  486. if (!m.Groups[grpIndex].Success) {
  487. return -1;
  488. }
  489. return m.Groups[grpIndex].Index;
  490. }
  491. [PythonName("end")]
  492. public int End(object group) {
  493. int grpIndex = GetGroupIndex(group);
  494. if (!m.Groups[grpIndex].Success) {
  495. return -1;
  496. }
  497. return m.Groups[grpIndex].Index + m.Groups[grpIndex].Length;
  498. }
  499. [PythonName("group")]
  500. public object Group(object index, params object[] additional) {
  501. if (additional.Length == 0) return Group(index);
  502. object[] res = new object[additional.Length + 1];
  503. res[0] = m.Groups[GetGroupIndex(index)].Success ? m.Groups[GetGroupIndex(index)].Value : null;
  504. for (int i = 1; i < res.Length; i++) {
  505. int grpIndex = GetGroupIndex(additional[i - 1]);
  506. res[i] = m.Groups[grpIndex].Success ? m.Groups[grpIndex].Value : null;
  507. }
  508. return Tuple.MakeTuple(res);
  509. }
  510. [PythonName("group")]
  511. public object Group(object index) {
  512. int pos = GetGroupIndex(index);
  513. Group g = m.Groups[pos];
  514. return g.Success ? g.Value : null;
  515. }
  516. [PythonName("group")]
  517. public object Group() {
  518. return Group(0);
  519. }
  520. [PythonName("groups")]
  521. public object Groups() {
  522. return Groups(null);
  523. }
  524. [PythonName("groups")]
  525. public object Groups(object @default) {
  526. object[] ret = new object[m.Groups.Count - 1];
  527. for (int i = 1; i < m.Groups.Count; i++) {
  528. if (!m.Groups[i].Success) {
  529. ret[i - 1] = @default;
  530. } else {
  531. ret[i - 1] = m.Groups[i].Value;
  532. }
  533. }
  534. return Ops.MakeTuple(ret);
  535. }
  536. [PythonName("expand")]
  537. public object Expand(object template) {
  538. string strTmp = ValidateString(template, "template");
  539. StringBuilder res = new StringBuilder();
  540. for (int i = 0; i < strTmp.Length; i++) {
  541. if (strTmp[i] != '\\') { res.Append(strTmp[i]); continue; }
  542. if (++i == strTmp.Length) { res.Append(strTmp[i - 1]); continue; }
  543. if (Char.IsDigit(strTmp[i])) {
  544. AppendGroup(res, (int)(strTmp[i] - '0'));
  545. } else if (strTmp[i] == 'g') {
  546. if (++i == strTmp.Length) { res.Append("\\g"); return res.ToString(); }
  547. if (strTmp[i] != '<') {
  548. res.Append("\\g<"); continue;
  549. } else { // '<'
  550. StringBuilder name = new StringBuilder();
  551. i++;
  552. while (strTmp[i] != '>' && i < strTmp.Length) {
  553. name.Append(strTmp[i++]);
  554. }
  555. AppendGroup(res, pattern.re.GroupNumberFromName(name.ToString()));
  556. }
  557. } else {
  558. switch (strTmp[i]) {
  559. case 'n': res.Append('\n'); break;
  560. case 'r': res.Append('\r'); break;
  561. case 't': res.Append('\t'); break;
  562. case '\\': res.Append('\\'); break;
  563. }
  564. }
  565. }
  566. return res.ToString();
  567. }
  568. [PythonName("groupdict")]
  569. public object GroupDict() {
  570. return GroupDict(null);
  571. }
  572. private static bool IsGroupNumber(string name) {
  573. foreach (char c in name) {
  574. if (!Char.IsNumber(c)) return false;
  575. }
  576. return true;
  577. }
  578. [PythonName("groupdict")]
  579. public object GroupDict(object value) {
  580. string[] groupNames = this.pattern.re.GetGroupNames();
  581. Debug.Assert(groupNames.Length == this.m.Groups.Count);
  582. Dict d = new Dict();
  583. for (int i = 0; i < groupNames.Length; i++) {
  584. if (IsGroupNumber(groupNames[i])) continue; // python doesn't report group numbers
  585. if (m.Groups[i].Captures.Count != 0) {
  586. d[groupNames[i]] = m.Groups[i].Value;
  587. } else {
  588. d[groupNames[i]] = value;
  589. }
  590. }
  591. return d;
  592. }
  593. [PythonName("span")]
  594. public object Span() {
  595. return Tuple.MakeTuple(this.Start(), this.End());
  596. }
  597. [PythonName("span")]
  598. public object Span(object group) {
  599. return Tuple.MakeTuple(this.Start(group), this.End(group));
  600. }
  601. public int Position {
  602. [PythonName("pos")]
  603. get {
  604. return m.Index;
  605. }
  606. }
  607. public int EndPosition {
  608. [PythonName("endpos")]
  609. get {
  610. return endPos;
  611. }
  612. }
  613. public string SearchValue {
  614. [PythonName("string")]
  615. get {
  616. return text;
  617. }
  618. }
  619. public object Regs {
  620. [PythonName("regs")]
  621. get {
  622. object[] res = new object[m.Groups.Count];
  623. for(int i = 0; i<res.Length; i++) {
  624. res[i] = Tuple.MakeTuple(this.Start(i), this.End(i));
  625. }
  626. return Tuple.MakeTuple(res);
  627. }
  628. }
  629. public object Pattern {
  630. [PythonName("re")]
  631. get {
  632. return pattern;
  633. }
  634. }
  635. public object LastIndex {
  636. [PythonName("lastindex")]
  637. get {
  638. // -1 : initial value of lastindex
  639. // 0 : no match found
  640. //other : the true lastindex
  641. // Match.Groups contains "lower" level matched groups, which has to be removed
  642. if (lastindex == -1) {
  643. int i = 1;
  644. while (i < m.Groups.Count) {
  645. if (m.Groups[i].Success) {
  646. lastindex = i;
  647. int start = m.Groups[i].Index;
  648. int end = start + m.Groups[i].Length;
  649. i++;
  650. // skip any group which fall into the range [start, end],
  651. // no matter match succeed or fail
  652. while (i < m.Groups.Count && (m.Groups[i].Index < end)) {
  653. i++;
  654. }
  655. } else {
  656. i++;
  657. }
  658. }
  659. if (lastindex == -1) {
  660. lastindex = 0;
  661. }
  662. }
  663. if (lastindex == 0) {
  664. return null;
  665. } else {
  666. return lastindex;
  667. }
  668. }
  669. }
  670. public object LastGroup {
  671. [PythonName("lastgroup")]
  672. get {
  673. if (LastIndex == null) return null;
  674. // when group was not explicitly named, RegEx assigns the number as name
  675. // This is different from C-Python, which returns None in such cases
  676. return this.pattern.re.GroupNameFromNumber((int)LastIndex);
  677. }
  678. }
  679. #endregion
  680. #region Private helper functions
  681. private void AppendGroup(StringBuilder sb, int index) {
  682. sb.Append(m.Groups[index].Value);
  683. }
  684. private int GetGroupIndex(object group) {
  685. int grpIndex;
  686. if (!Converter.TryConvertToInt32(group, out grpIndex)) {
  687. grpIndex = pattern.re.GroupNumberFromName(ValidateString(group, "group"));
  688. }
  689. if (grpIndex < 0 || grpIndex >= m.Groups.Count) {
  690. throw Ops.IndexError("no such group");
  691. }
  692. return grpIndex;
  693. }
  694. #endregion
  695. }
  696. #endregion
  697. #region Private helper functions
  698. private static IEnumerator MatchIterator(MatchCollection matches, RE_Pattern pattern, string input) {
  699. for (int i = 0; i < matches.Count; i++) {
  700. yield return RE_Match.make(matches[i], pattern, input);
  701. }
  702. }
  703. private static RegexOptions FlagsToOption(int flags) {
  704. RegexOptions opts = RegexOptions.None;
  705. if ((flags & (int)IGNORECASE) != 0) opts |= RegexOptions.IgnoreCase;
  706. if ((flags & (int)MULTILINE) != 0) opts |= RegexOptions.Multiline;
  707. if (((flags & (int)LOCALE)) == 0) opts &= (~RegexOptions.CultureInvariant);
  708. if ((flags & (int)DOTALL) != 0) opts |= RegexOptions.Singleline;
  709. if ((flags & (int)VERBOSE) != 0) opts |= RegexOptions.IgnorePatternWhitespace;
  710. return opts;
  711. }
  712. internal class ParsedRegex {
  713. public ParsedRegex(string pattern) {
  714. this.UserPattern = pattern;
  715. }
  716. public string UserPattern;
  717. public string Pattern;
  718. public RegexOptions Options = RegexOptions.CultureInvariant;
  719. }
  720. /// <summary>
  721. /// Preparses a regular expression text returning a ParsedRegex class
  722. /// that can be used for further regular expressions.
  723. /// </summary>
  724. private static ParsedRegex PreParseRegex(string pattern) {
  725. ParsedRegex res = new ParsedRegex(pattern);
  726. //string newPattern;
  727. int cur = 0, nameIndex;
  728. int curGroup = 0;
  729. bool containsNamedGroup = false;
  730. for (; ; ) {
  731. nameIndex = pattern.IndexOf("(", cur);
  732. if (nameIndex > 0 && pattern[nameIndex - 1] == '\\') {
  733. int curIndex = nameIndex - 2;
  734. int backslashCount = 1;
  735. while (curIndex >= 0 && pattern[curIndex] == '\\') {
  736. backslashCount++;
  737. curIndex--;
  738. }
  739. // odd number of back slashes, this is an optional
  740. // paren that we should ignore.
  741. if ((backslashCount & 0x01) != 0) {
  742. cur++;
  743. continue;
  744. }
  745. }
  746. if (nameIndex == -1) break;
  747. if (nameIndex == pattern.Length - 1) break;
  748. switch (pattern[++nameIndex]) {
  749. case '?':
  750. // extension syntax
  751. if (nameIndex == pattern.Length - 1) throw ExceptionConverter.CreateThrowable(error, "unexpected end of regex");
  752. switch (pattern[++nameIndex]) {
  753. case 'P':
  754. // named regex, .NET doesn't expect the P so we'll remove it;
  755. // also, once we see a named group i.e. ?P then we need to start artificially
  756. // naming all unnamed groups from then on---this is to get around the fact that
  757. // the CLR RegEx support orders all the unnamed groups before all the named
  758. // groups, even if the named groups are before the unnamed ones in the pattern;
  759. // the artificial naming preserves the order of the groups and thus the order of
  760. // the matches
  761. if (nameIndex + 1 < pattern.Length && pattern[nameIndex + 1] == '=') {
  762. // match whatever was previously matched by the named group
  763. // remove the (?P=
  764. pattern = pattern.Remove(nameIndex - 2, 4);
  765. pattern = pattern.Insert(nameIndex - 2, "\\\\k<");
  766. int tmpIndex = nameIndex;
  767. while (tmpIndex < pattern.Length && pattern[tmpIndex] != ')')
  768. tmpIndex++;
  769. if (tmpIndex == pattern.Length) throw ExceptionConverter.CreateThrowable(error, "unexpected end of regex");
  770. pattern = pattern.Substring(0, tmpIndex) + ">" + pattern.Substring(tmpIndex + 1);
  771. } else {
  772. containsNamedGroup = true;
  773. pattern = pattern.Remove(nameIndex, 1);
  774. }
  775. break;
  776. case 'i': res.Options |= RegexOptions.IgnoreCase; break;
  777. case 'L': res.Options &= ~(RegexOptions.CultureInvariant); break;
  778. case 'm': res.Options |= RegexOptions.Multiline; break;
  779. case 's': res.Options |= RegexOptions.Singleline; break;
  780. case 'u': break;
  781. case 'x': res.Options |= RegexOptions.IgnorePatternWhitespace; break;
  782. case ':': break; // non-capturing
  783. case '=': break; // look ahead assertion
  784. case '<': break; // positive look behind assertion
  785. case '!': break; // negative look ahead assertion
  786. case '#': break; // inline comment
  787. case '(': // yes/no if group exists, we don't support this
  788. default: throw ExceptionConverter.CreateThrowable(error, "Unrecognized extension " + pattern[nameIndex]);
  789. }
  790. break;
  791. default:
  792. // just another group
  793. curGroup++;
  794. if (containsNamedGroup) {
  795. // need to name this unnamed group
  796. pattern = pattern.Insert(nameIndex, "?<Named" + GetRandomString() + ">");
  797. }
  798. break;
  799. }
  800. cur = nameIndex;
  801. }
  802. cur = 0;
  803. for (; ; ) {
  804. nameIndex = pattern.IndexOf('\\', cur);
  805. if (nameIndex == -1 || nameIndex == pattern.Length - 1) break;
  806. char curChar = pattern[++nameIndex];
  807. switch (curChar) {
  808. case 'x':
  809. case 'u':
  810. case 'a':
  811. case 'b':
  812. case 'e':
  813. case 'f':
  814. case 'n':
  815. case 'r':
  816. case 't':
  817. case 'v':
  818. case 'c':
  819. case 's':
  820. case 'W':
  821. case 'w':
  822. case 'p':
  823. case 'P':
  824. case 'S':
  825. case 'd':
  826. case 'D':
  827. case 'Z':
  828. // known escape sequences, leave escaped.
  829. break;
  830. case '\\':
  831. // escaping a \\
  832. cur += 2;
  833. break;
  834. default:
  835. System.Globalization.UnicodeCategory charClass = Char.GetUnicodeCategory(curChar);
  836. switch (charClass) {
  837. // recognized word characters, always unescape.
  838. case System.Globalization.UnicodeCategory.ModifierLetter:
  839. case System.Globalization.UnicodeCategory.LowercaseLetter:
  840. case System.Globalization.UnicodeCategory.UppercaseLetter:
  841. case System.Globalization.UnicodeCategory.TitlecaseLetter:
  842. case System.Globalization.UnicodeCategory.OtherLetter:
  843. case System.Globalization.UnicodeCategory.LetterNumber:
  844. case System.Globalization.UnicodeCategory.OtherNumber:
  845. case System.Globalization.UnicodeCategory.ConnectorPunctuation:
  846. pattern = pattern.Remove(nameIndex - 1, 1);
  847. break;
  848. case System.Globalization.UnicodeCategory.DecimalDigitNumber:
  849. // actually don't want to unescape '\1', '\2' etc. which are references to groups
  850. break;
  851. }
  852. break;
  853. }
  854. cur++;
  855. }
  856. res.Pattern = pattern;
  857. return res;
  858. }
  859. static Random r = new Random(DateTime.Now.Millisecond);
  860. private static string GetRandomString() {
  861. return r.Next(Int32.MaxValue / 2, Int32.MaxValue).ToString();
  862. }
  863. private static string UnescapeGroups(Match m, string text) {
  864. for (int i = 0; i < text.Length; i++) {
  865. if (text[i] == '\\') {
  866. StringBuilder sb = new StringBuilder(text, 0, i, text.Length);
  867. do {
  868. if (text[i] == '\\') {
  869. i++;
  870. if (i == text.Length) { sb.Append('\\'); break; }
  871. switch (text[i]) {
  872. case 'n': sb.Append('\n'); break;
  873. case 'r': sb.Append('\r'); break;
  874. case 't': sb.Append('\t'); break;
  875. case '\\': sb.Append('\\'); break;
  876. case '\'': sb.Append('\''); break;
  877. case '"': sb.Append('"'); break;
  878. case 'b': sb.Append('\b'); break;
  879. case 'g':
  880. // \g<#>, \g<name> need to be substituted by the groups they
  881. // matched
  882. if (text[i + 1] == '<') {
  883. int anglebrkStart = i + 1;
  884. int anglebrkEnd = text.IndexOf('>', i + 2);
  885. if (anglebrkEnd != -1) {
  886. // grab the # or 'name' of the group between '< >'
  887. int lengrp = anglebrkEnd - (anglebrkStart + 1);
  888. string grp = text.Substring(anglebrkStart + 1, lengrp);
  889. int num;
  890. Group g;
  891. if (Int32.TryParse(grp, out num)) {
  892. g = m.Groups[num];
  893. if (String.IsNullOrEmpty(g.Value)) {
  894. throw Ops.IndexError("unknown group reference");
  895. }
  896. sb.Append(g.Value);
  897. } else {
  898. g = m.Groups[grp];
  899. if (String.IsNullOrEmpty(g.Value)) {
  900. throw Ops.IndexError("unknown group reference");
  901. }
  902. sb.Append(g.Value);
  903. }
  904. i = anglebrkEnd;
  905. }
  906. break;
  907. }
  908. sb.Append('\\');
  909. sb.Append((char)text[i]);
  910. break;
  911. default:
  912. if (Char.IsDigit(text[i]) && text[i] <= '7') {
  913. int val = 0;
  914. int digitCount = 0;
  915. while (i < text.Length && Char.IsDigit(text[i]) && text[i] <= '7') {
  916. digitCount++;
  917. val += val * 8 + (text[i] - '0');
  918. i++;
  919. }
  920. i--;
  921. if (digitCount == 1 && val > 0 && val < m.Groups.Count) {
  922. sb.Append(m.Groups[val].Value);
  923. } else {
  924. sb.Append((char)val);
  925. }
  926. } else {
  927. sb.Append('\\');
  928. sb.Append((char)text[i]);
  929. }
  930. break;
  931. }
  932. } else {
  933. sb.Append(text[i]);
  934. }
  935. } while (++i < text.Length);
  936. return sb.ToString();
  937. }
  938. }
  939. return text;
  940. }
  941. private static string ValidatePattern(object pattern) {
  942. if (pattern is string) return pattern as string;
  943. ExtensibleString es = pattern as ExtensibleString;
  944. if (es != null) return es.Value;
  945. RE_Pattern rep = pattern as RE_Pattern;
  946. if (rep != null) return rep.pre.UserPattern;
  947. throw Ops.TypeError("pattern must be a string or compiled pattern");
  948. }
  949. private static string ValidateString(object str, string param) {
  950. if (str is string) return str as string;
  951. ExtensibleString es = str as ExtensibleString;
  952. if (es != null) return es.Value;
  953. throw Ops.TypeError("expected string for parameter '{0}' but got '{1}'", param, Ops.GetPythonTypeName(str));
  954. }
  955. #endregion
  956. }
  957. }