PageRenderTime 62ms CodeModel.GetById 29ms RepoModel.GetById 0ms app.codeStats 1ms

/Backend/Modules/_re.cs

https://bitbucket.org/AdamMil/boaold
C# | 430 lines | 351 code | 58 blank | 21 comment | 43 complexity | c84f1063c44ca6c7e3daf69afeb0f8aa MD5 | raw file
Possible License(s): GPL-2.0
  1. /*
  2. Boa is the reference implementation for a language similar to Python,
  3. also called Boa. This implementation is both interpreted and compiled,
  4. targeting the Microsoft .NET Framework.
  5. http://www.adammil.net/
  6. Copyright (C) 2004-2005 Adam Milazzo
  7. This program is free software; you can redistribute it and/or
  8. modify it under the terms of the GNU General Public License
  9. as published by the Free Software Foundation; either version 2
  10. of the License, or (at your option) any later version.
  11. This program is distributed in the hope that it will be useful,
  12. but WITHOUT ANY WARRANTY; without even the implied warranty of
  13. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  14. GNU General Public License for more details.
  15. You should have received a copy of the GNU General Public License
  16. along with this program; if not, write to the Free Software
  17. Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
  18. */
  19. using System;
  20. using System.Collections;
  21. using System.Text.RegularExpressions;
  22. using Boa.Runtime;
  23. namespace Boa.Modules
  24. {
  25. public sealed class re_internal
  26. {
  27. #region match
  28. public sealed class match
  29. { public static string expand(Match m, string template) { throw new NotImplementedException(); }
  30. public static object group(Match m, params object[] groups)
  31. { if(groups.Length==0) return m.Value;
  32. if(groups.Length==1)
  33. { Group g = GetGroup(m, groups[0]);
  34. return g.Success ? g.Value : null;
  35. }
  36. object[] ret = new object[groups.Length];
  37. for(int i=0; i<groups.Length; i++)
  38. { Group g = GetGroup(m, groups[i]);
  39. ret[i] = g.Success ? g.Value : null;
  40. }
  41. return new Tuple(ret);
  42. }
  43. public static Tuple groups(Match m) { return groups(m, null); }
  44. public static Tuple groups(Match m, object defaultValue)
  45. { object[] ret = new object[m.Groups.Count-1];
  46. for(int i=0; i<ret.Length; i++) ret[i] = m.Groups[i+1].Success ? m.Groups[i+1].Value : null;
  47. return new Tuple(ret);
  48. }
  49. public static Dict groupdict(Match m) { return groupdict(m, null); }
  50. public static Dict groupdict(Match m, object defaultValue) { throw new NotImplementedException(); }
  51. public static int start(Match m) { return m.Index; }
  52. public static int start(Match m, object group)
  53. { if(group==null) return m.Index;
  54. Group g = GetGroup(m, group);
  55. return g.Success ? g.Index : -1;
  56. }
  57. public static int end(Match m) { return m.Index+m.Length; }
  58. public static int end(Match m, object group)
  59. { if(group==null) return m.Index+m.Length;
  60. Group g = GetGroup(m, group);
  61. return g.Success ? g.Index+g.Length : -1;
  62. }
  63. public static Tuple span(Match m) { return new Tuple(m.Index, m.Index+m.Length); }
  64. public static Tuple span(Match m, object group)
  65. { if(group==null) return new Tuple(m.Index, m.Index+m.Length);
  66. Group g = GetGroup(m, group);
  67. return g.Success ? new Tuple(g.Index, g.Index+g.Length) : new Tuple(-1, -1);
  68. }
  69. public static object get_lastindex(Match m)
  70. { for(int i=m.Groups.Count-1; i>0; i--) if(m.Groups[i].Success) return i;
  71. return null;
  72. }
  73. public static string get_lastgroup(Match m) { throw new NotImplementedException(); }
  74. static Group GetGroup(Match m, object g) { return g is int ? m.Groups[(int)g] : m.Groups[Ops.ToString(g)]; }
  75. }
  76. #endregion
  77. }
  78. // TODO: optimize by using Match.NextMatch()
  79. [BoaType("module")]
  80. public sealed class _re
  81. { _re() { }
  82. #region FindEnumerator
  83. public class FindEnumerator : IEnumerator
  84. { public FindEnumerator(Regex regex, string str) { this.regex=regex; this.str=str; state=State.BOF; }
  85. public object Current
  86. { get
  87. { if(state!=State.IN) throw new InvalidOperationException();
  88. return match;
  89. }
  90. }
  91. public bool MoveNext()
  92. { if(state==State.EOF) return false;
  93. if(state==State.BOF) { match=regex.Match(str); state=State.IN; }
  94. else match=match.NextMatch();
  95. if(!match.Success) { state=State.EOF; return false; }
  96. return true;
  97. }
  98. public void Reset() { state=State.BOF; }
  99. enum State { BOF, IN, EOF };
  100. Regex regex;
  101. string str;
  102. Match match;
  103. State state;
  104. }
  105. #endregion
  106. #region regex
  107. public class regex : Regex, IRepresentable
  108. { public regex(string pattern, RegexOptions options) : base(pattern, options) { }
  109. [DocString("flags -> int\nThe flags used to create this regex.")]
  110. public int flags { get { return (int)Options; } }
  111. [DocString("pattern -> str\nThe pattern used to create this regex.")]
  112. public new string pattern { get { return base.pattern; } }
  113. [DocString("groupindex -> dict\nA dictionary mapping group names to group numbers.")]
  114. public Dict groupindex
  115. { get
  116. { if(groups==null)
  117. { groups = new Dict();
  118. string[] names = GetGroupNames();
  119. for(int i=0; i<names.Length; i++) groups[names[i]] = GroupNumberFromName(names[i]);
  120. }
  121. return groups;
  122. }
  123. }
  124. [DocString(@"findall(string) -> list\n\nSee documentation for re.findall()")]
  125. public List findall(string str) { return Boa.Modules._re.findall(this, str); }
  126. [DocString(@"finditer(string) -> iter\n\nSee documentation for re.finditer()")]
  127. public IEnumerator finditer(string str) { return Boa.Modules._re.finditer(this, str); }
  128. [DocString(@"match(string[, start[, end]]) -> Match
  129. If zero or more characters at the beginning of 'string' match this regular
  130. expression, return a corresponding Match instance. Return null if the string
  131. does not match the pattern; note that this is different from a zero-length
  132. match.
  133. Note: If you want to locate a match anywhere in 'string', use search()
  134. instead.
  135. The optional second parameter pos gives an index in the string where the
  136. search is to start; it defaults to 0.
  137. The optional parameter endpos limits how far the string will be searched;
  138. it will be as if the string is endpos characters long, so only the
  139. characters from pos to endpos - 1 will be searched for a match. If endpos is
  140. less than pos, no match will be found.")]
  141. public Match match(string str) { return match(str, 0, str.Length); }
  142. public Match match(string str, int start) { return match(str, start, str.Length); }
  143. public Match match(string str, int start, int end)
  144. { Match m = end==str.Length ? Match(str, start) : Match(str, start, end-start);
  145. return m.Success && m.Index==start ? m : null;
  146. }
  147. public Match search(string str)
  148. { Match m = Match(str);
  149. return m.Success ? m : null;
  150. }
  151. public Match search(string str, int start)
  152. { Match m = Match(str, start);
  153. return m.Success ? m : null;
  154. }
  155. public Match search(string str, int start, int end)
  156. { Match m = Match(str, start, end-start);
  157. return m.Success ? m : null;
  158. }
  159. [DocString(@"split(string[, maxsplit=0]) -> list\n\nSee documentation for re.split()")]
  160. public List split(string str) { return Boa.Modules._re.split(this, str, 0); }
  161. public List split(string str, int maxsplit) { return Boa.Modules._re.split(this, str, maxsplit); }
  162. [DocString(@"sub(repl, string [, maxreplace=0]) -> str\n\nSee documentation for re.sub()")]
  163. public string sub(object repl, string str) { return Boa.Modules._re.sub(this, repl, str, 0); }
  164. public string sub(object repl, string str, int maxreplace)
  165. { return Boa.Modules._re.sub(this, repl, str, maxreplace);
  166. }
  167. [DocString(@"subn(repl, string [, maxreplace=0]) -> tuple\n\nSee documentation for re.subn()")]
  168. public Tuple subn(object repl, string str) { return Boa.Modules._re.subn(this, repl, str, 0); }
  169. public Tuple subn(object repl, string str, int maxreplace)
  170. { return Boa.Modules._re.subn(this, repl, str, maxreplace);
  171. }
  172. public string __repr__() { return string.Format("re.compile({0})", Ops.Repr(base.pattern)); }
  173. Dict groups;
  174. }
  175. #endregion
  176. #region RegexErrorException
  177. [DocString(@"Exception raised when a string passed to one of the functions here is not a
  178. valid regular expression (for example, it might contain unmatched
  179. parentheses) or when some other error occurs during compilation or matching.
  180. It is never an error if a string contains no match for a pattern.")]
  181. public class RegexErrorException : ValueErrorException
  182. { public RegexErrorException(string message) : base(message) { }
  183. }
  184. #endregion
  185. public static readonly ReflectedType MatchObject = ReflectedType.FromType(typeof(Match));
  186. public static string __repr__() { return "<module 're' (built-in)>"; }
  187. public static string __str__() { return __repr__(); }
  188. [DocString(@"compile(pattern[, flags]) -> regex
  189. Compile a regular expression pattern, returning a Regex object.")]
  190. public static regex compile(string pattern) { return compile(pattern, (int)RegexOptions.Singleline); }
  191. public static regex compile(string pattern, int flags)
  192. { return MakeRegex(pattern, (RegexOptions)flags | RegexOptions.Compiled);
  193. }
  194. [DocString(@"escape(pattern) -> str
  195. Escape characters in pattern that may be regex metacharacters.")]
  196. public static string escape(string str)
  197. { System.Text.StringBuilder sb = new System.Text.StringBuilder(str.Length+10);
  198. for(int i=0; i<str.Length; i++)
  199. { char c = str[i];
  200. if(!char.IsLetterOrDigit(c) && c>32) sb.Append('\\');
  201. sb.Append(c);
  202. }
  203. return sb.ToString();
  204. }
  205. [DocString(@"findall(pattern, string) -> list
  206. Return a list of all non-overlapping matches in the string.
  207. If one or more groups are present in the pattern, return a list of groups;
  208. this will be a list of tuples if the pattern has more than one group.
  209. Empty matches are included in the result.")]
  210. public static List findall(object pattern, string str)
  211. { List ret = new List();
  212. foreach(Match m in MakeRegex(pattern).Matches(str)) ret.append(MatchToFind(m));
  213. return ret;
  214. }
  215. [DocString(@"finditer(pattern, string) -> iter
  216. Return an iterator over all non-overlapping matches in the string.
  217. For each match, the iterator returns a match object.
  218. Empty matches are included in the result.")]
  219. public static IEnumerator finditer(object pattern, string str)
  220. { return new FindEnumerator(MakeRegex(pattern), str);
  221. }
  222. [DocString(@"match(pattern, string[, flags]) -> Match
  223. Try to apply the pattern at the start of the string, returning a match
  224. object, or null if no match was found.")]
  225. public static Match match(object pattern, string str) { return match(pattern, str, 0); }
  226. public static Match match(object pattern, string str, int flags)
  227. { Match m = search(pattern, str, flags);
  228. return m!=null && m.Index==0 ? m : null;
  229. }
  230. [DocString(@"search(pattern, string[, flags]) -> Match
  231. Scan through string looking for a match to the pattern, returning a match
  232. object, or null if no match was found.")]
  233. public static Match search(object pattern, string str) { return search(pattern, str, 0); }
  234. public static Match search(object pattern, string str, int flags)
  235. { Match m = MakeRegex(pattern, (RegexOptions)flags).Match(str);
  236. return m!=null && m.Success ? m : null;
  237. }
  238. [DocString(@"split(pattern, string[, maxsplit=0]) -> list
  239. Split the source string by the occurrences of the pattern. If capturing
  240. parentheses are used in the pattern, then the text of all groups in the
  241. pattern are also returned as part of the resulting list. If maxsplit is
  242. nonzero, at most maxsplit splits occur, and the remainder of the string
  243. is returned as the final element of the list.")]
  244. public static List split(object pattern, string str) { return split(pattern, str, 0); }
  245. public static List split(object pattern, string str, int maxsplit)
  246. { if(maxsplit<0) throw Ops.ValueError("split(): maxsplit must be >= 0");
  247. if(maxsplit==0) maxsplit=int.MaxValue;
  248. MatchCollection matches = MakeRegex(pattern).Matches(str);
  249. List ret = new List();
  250. int i=0, pos=0;
  251. for(; i<matches.Count && i<maxsplit; i++)
  252. { Match m = matches[i];
  253. ret.append(str.Substring(pos, m.Index-pos));
  254. pos = m.Index+m.Length;
  255. for(int g=1; g<m.Groups.Count; g++) if(m.Groups[g].Success) ret.append(m.Groups[g].Value);
  256. }
  257. ret.append(pos==0 ? str : str.Substring(pos));
  258. return ret;
  259. }
  260. [DocString(@"sub(pattern, repl, string[, maxreplace=0]) -> str
  261. Return the string obtained by replacing the leftmost non-overlapping
  262. occurrences of the pattern in the source string by the replacement value.
  263. If the pattern isn't found, the string is returned unchanged. The
  264. replacement value can be a string or a function; if it is a string, any
  265. backslash escapes in it are processed. That is, '\n' is converted to a
  266. single newline character, '\r' is converted to a linefeed, and so forth.
  267. Backreferences, such as '\6', are replaced with the substring matched
  268. by group 6 in the pattern.
  269. If repl is a function, it is called for every non-overlapping occurrence
  270. of pattern. It will be passed a single match object argument, and should
  271. return the replacement string.")]
  272. public static string sub(object pattern, object repl, string str) { return sub(pattern, repl, str, 0); }
  273. public static string sub(object pattern, object repl, string str, int maxreplace)
  274. { int dummy;
  275. return sub(pattern, repl, str, maxreplace, out dummy);
  276. }
  277. [DocString(@"subn(pattern, repl, string[, maxreplace]) -> tuple
  278. Performs the same operation as sub(), but returns a tuple
  279. (new_string, number_of_subs_made).")]
  280. public static Tuple subn(object pattern, object repl, string str) { return subn(pattern, repl, str, 0); }
  281. public static Tuple subn(object pattern, object repl, string str, int maxreplace)
  282. { int count;
  283. return new Tuple(sub(pattern, repl, str, maxreplace, out count), count);
  284. }
  285. [DocString(@"Specifies that the regular expression should be compiled, increasing both
  286. creation time and performance.")]
  287. public const int C=(int)RegexOptions.Compiled, COMPILED=C;
  288. [DocString(@"Specifies that the regular expression should perform case-insensitive
  289. matching.")]
  290. public const int I=(int)RegexOptions.IgnoreCase, IGNORECASE=I;
  291. [DocString(@"When specified, the pattern character '^' matches at the beginning of the
  292. string and at the beginning of each line (immediately following each
  293. newline); and the pattern character '$' matches at the end of the string
  294. and at the end of each line (immediately preceding each newline). By
  295. default, '^' matches only at the beginning of the string, and '$' only at
  296. the end of the string and immediately before the newline (if any) at the
  297. end of the string.")]
  298. public const int M=(int)RegexOptions.Multiline, MULTILINE=M;
  299. [DocString(@"This flag allows you to write regular expressions that look nicer.
  300. Whitespace within the pattern is ignored, except when in a character class
  301. or preceded by an unescaped backslash, and, when a line contains a '#'
  302. neither in a character class or preceded by an unescaped backslash, all
  303. characters from the leftmost such '#' through the end of the line are
  304. ignored.")]
  305. public const int X=(int)RegexOptions.IgnorePatternWhitespace, VERBOSE=X;
  306. public static readonly ReflectedType error = ReflectedType.FromType(typeof(RegexErrorException));
  307. static regex MakeRegex(object pattern) { return MakeRegex(pattern, RegexOptions.Singleline); }
  308. static regex MakeRegex(object pattern, RegexOptions flags)
  309. { if(pattern is regex) return (regex)pattern;
  310. if(pattern is string)
  311. try { return new regex((string)pattern, flags); }
  312. catch(ArgumentException e) { throw new RegexErrorException("regex parse error: " + e.Message); }
  313. throw Ops.TypeError("re: expecting either a regex object or a regex pattern string");
  314. }
  315. static object MatchToFind(Match m)
  316. { GroupCollection groups = m.Groups;
  317. if(groups.Count<=1) return m.Value;
  318. else if(groups.Count==2) return groups[1].Value;
  319. else
  320. { object[] items = new object[groups.Count-1];
  321. for(int i=1; i<groups.Count; i++) items[i-1] = groups[i].Value;
  322. return new Tuple(items);
  323. }
  324. }
  325. static string sub(object pattern, object repl, string str, int maxreplace, out int count)
  326. { if(maxreplace<0) throw Ops.ValueError("sub(): maxreplace must be >= 0");
  327. if(maxreplace==0) maxreplace=int.MaxValue;
  328. System.Text.StringBuilder sb = new System.Text.StringBuilder();
  329. MatchCollection matches = MakeRegex(pattern).Matches(str);
  330. int i=0, pos=0;
  331. if(repl is string)
  332. { string rep = (string)repl;
  333. bool unescape = rep.IndexOf('\\')!=-1;
  334. for(; i<matches.Count && i<maxreplace; i++)
  335. { Match m = matches[i];
  336. if(m.Index!=pos) sb.Append(str.Substring(pos, m.Index-pos));
  337. sb.Append(unescape ? StringOps.Unescape(rep, m) : rep);
  338. pos = m.Index+m.Length;
  339. }
  340. }
  341. else
  342. for(; i<matches.Count && i<maxreplace; i++)
  343. { Match m = matches[i];
  344. if(m.Index!=pos) sb.Append(str.Substring(pos, m.Index-pos));
  345. sb.Append(Ops.Str(Ops.Call(repl, m)));
  346. pos = m.Index+m.Length;
  347. }
  348. count = i;
  349. sb.Append(pos==0 ? str : str.Substring(pos));
  350. return sb.ToString();
  351. }
  352. }
  353. } // namespace Boa.Modules