PageRenderTime 68ms CodeModel.GetById 19ms RepoModel.GetById 1ms app.codeStats 0ms

/3.0/Source/ClassLibrary/RegExpPerl.cs

#
C# | 3244 lines | 2030 code | 464 blank | 750 comment | 640 complexity | a29104df8fd368c98e16d78023cbfe33 MD5 | raw file
Possible License(s): CPL-1.0, GPL-2.0, CC-BY-SA-3.0, MPL-2.0-no-copyleft-exception, Apache-2.0
  1. /*
  2. Copyright (c) 2004-2006 Pavel Novak and Tomas Matousek.
  3. The use and distribution terms for this software are contained in the file named License.txt,
  4. which can be found in the root of the Phalanger distribution. By using this software
  5. in any fashion, you are agreeing to be bound by the terms of this license.
  6. You must not remove this notice from this software.
  7. TODO: preg_match - unmatched groups should be empty only if they are not followed by matched one (isn't it PHP bug?)
  8. TODO: preg_last_error - Returns the error code of the last PCRE regex execution
  9. */
  10. using System;
  11. using System.Text;
  12. using System.Threading;
  13. using System.Collections;
  14. using System.Collections.Generic;
  15. using System.Globalization;
  16. using System.Text.RegularExpressions;
  17. using PHP.Core;
  18. using PHP.Core.Reflection;
  19. namespace PHP.Library
  20. {
  21. /// <summary>
  22. /// Perl regular expression specific options that are not captured by .NET <see cref="RegexOptions"/> or by
  23. /// transformation of the regular expression itself.
  24. /// </summary>
  25. [Flags]
  26. public enum PerlRegexOptions
  27. {
  28. None = 0,
  29. Evaluate = 1,
  30. Ungreedy = 2,
  31. Anchored = 4,
  32. DollarMatchesEndOfStringOnly = 8,
  33. UTF8 = 16
  34. }
  35. /// <summary>
  36. /// Implements PERL extended regular expressions as they are implemented in PHP.
  37. /// </summary>
  38. /// <threadsafety static="true"/>
  39. [ImplementsExtension(LibraryDescriptor.ExtPcre)]
  40. public static class PerlRegExp
  41. {
  42. #region preg_last_error
  43. public enum PregError
  44. {
  45. [ImplementsConstant("PREG_NO_ERROR")]
  46. NoError = 0,
  47. [ImplementsConstant("PREG_INTERNAL_ERROR")]
  48. InternalError = 1,
  49. [ImplementsConstant("PREG_BACKTRACK_LIMIT_ERROR")]
  50. BacktrackLimitError = 2,
  51. [ImplementsConstant("PREG_RECURSION_LIMIT_ERROR")]
  52. RecursionLimitError = 3,
  53. [ImplementsConstant("PREG_BAD_UTF8_ERROR")]
  54. BadUtf8Error = 4,
  55. [ImplementsConstant("PREG_BAD_UTF8_OFFSET_ERROR")]
  56. BadUtf8OffsetError = 5
  57. }
  58. public enum PregConst
  59. {
  60. [ImplementsConstant("PREG_PATTERN_ORDER")]
  61. PatternOrder = 1,
  62. [ImplementsConstant("PREG_SET_ORDER")]
  63. SetOrder = 2,
  64. [ImplementsConstant("PREG_OFFSET_CAPTURE")]
  65. OffsetCapture = 1 << 8,
  66. [ImplementsConstant("PREG_SPLIT_NO_EMPTY")]
  67. SplitNoEmpty = 1 << 0,
  68. [ImplementsConstant("PREG_SPLIT_DELIM_CAPTURE")]
  69. SplitDelimCapture = 1 << 1,
  70. [ImplementsConstant("PREG_SPLIT_OFFSET_CAPTURE")]
  71. SplitOffsetCapture = 1 << 2,
  72. [ImplementsConstant("PREG_REPLACE_EVAL")]
  73. ReplaceEval = 1 << 0,
  74. [ImplementsConstant("PREG_GREP_INVERT")]
  75. GrepInvert = 1 << 0,
  76. }
  77. [ImplementsFunction("preg_last_error")]
  78. public static int LastError()
  79. {
  80. return 0;
  81. }
  82. #endregion
  83. #region preg_quote
  84. /// <summary>
  85. /// <para>Escapes all characters that have special meaning in regular expressions. These characters are
  86. /// . \\ + * ? [ ^ ] $ ( ) { } = ! &lt; &gt; | :</para>
  87. /// </summary>
  88. /// <param name="str">String with characters to escape.</param>
  89. /// <returns>String with escaped characters.</returns>
  90. [ImplementsFunction("preg_quote")]
  91. [PureFunction]
  92. public static string Quote(string str)
  93. {
  94. return Quote(str, '\0', false);
  95. }
  96. /// <summary>
  97. /// <para>Escapes all characters that have special meaning in regular expressions. These characters are
  98. /// . \\ + * ? [ ^ ] $ ( ) { } = ! &lt; &gt; | : plus <paramref name="delimiter"/>.</para>
  99. /// </summary>
  100. /// <param name="str">String with characters to escape.</param>
  101. /// <param name="delimiter">Character to escape in additon to general special characters.</param>
  102. /// <returns>String with escaped characters.</returns>
  103. [ImplementsFunction("preg_quote")]
  104. [PureFunction]
  105. public static string Quote(string str, string delimiter)
  106. {
  107. bool delimiter_used = true;
  108. if (delimiter == null || delimiter.Length == 0)
  109. delimiter_used = false;
  110. return Quote(str, delimiter_used ? delimiter[0] : '\0', delimiter_used);
  111. }
  112. /// <summary>
  113. /// Escapes all characters with special meaning in Perl regular expressions and char
  114. /// <paramref name="delimiter"/>.
  115. /// </summary>
  116. /// <param name="str">String to quote.</param>
  117. /// <param name="delimiter">Additional character to quote.</param>
  118. /// <param name="delimiterUsed">Whether the delimiter should be quoted.</param>
  119. /// <returns>String with quoted characters.</returns>
  120. internal static string Quote(string str, char delimiter, bool delimiterUsed)
  121. {
  122. if (str == null)
  123. return null;
  124. StringBuilder result = new StringBuilder();
  125. for (int i = 0; i < str.Length; i++)
  126. {
  127. bool escape = false;
  128. if (delimiterUsed && str[i] == delimiter)
  129. escape = true;
  130. else
  131. // switch only if true is not set already
  132. switch (str[i])
  133. {
  134. case '\\':
  135. case '+':
  136. case '*':
  137. case '?':
  138. case '[':
  139. case '^':
  140. case ']':
  141. case '$':
  142. case '(':
  143. case ')':
  144. case '{':
  145. case '}':
  146. case '=':
  147. case '!':
  148. case '<':
  149. case '>':
  150. case '|':
  151. case ':':
  152. case '.':
  153. escape = true;
  154. break;
  155. }
  156. if (escape)
  157. result.Append('\\');
  158. result.Append(str[i]);
  159. }
  160. return result.ToString();
  161. }
  162. #endregion
  163. #region preg_grep
  164. /// <summary>
  165. /// Flags for Grep functions.
  166. /// </summary>
  167. [Flags]
  168. public enum GrepFlags
  169. {
  170. None = 0,
  171. [ImplementsConstant("PREG_GREP_INVERT")]
  172. GrepInvert = 1
  173. }
  174. /// <summary>
  175. /// Returns the array consisting of the elements of the <paramref name="input"/> array that match
  176. /// the given <paramref name="pattern"/>.
  177. /// </summary>
  178. /// <param name="pattern">Pattern to be matched against each array element.</param>
  179. /// <param name="input">Array of strings to match.</param>
  180. /// <returns>Array containing only values from <paramref name="input"/> that match <paramref name="pattern"/>
  181. /// </returns>
  182. [ImplementsFunction("preg_grep")]
  183. public static PhpArray Grep(object pattern, PhpArray input)
  184. {
  185. return Grep(pattern, input, GrepFlags.None);
  186. }
  187. /// <summary>
  188. /// <para>Returns the array consisting of the elements of the <paramref name="input"/> array that match
  189. /// the given <paramref name="pattern"/>.</para>
  190. /// <para>If <see cref="GrepFlags.GrepInvert"/> flag is specified, resulting array will contain
  191. /// elements that do not match the <paramref name="pattern"/>.</para>
  192. /// </summary>
  193. /// <param name="pattern">Pattern to be matched against each array element.</param>
  194. /// <param name="input">Array of strings to match.</param>
  195. /// <param name="flags">Flags modifying which elements contains resulting array.</param>
  196. /// <returns>Array containing only values from <paramref name="input"/> that match <paramref name="pattern"/>.
  197. /// (Or do not match according to <paramref name="flags"/> specified.)</returns>
  198. [ImplementsFunction("preg_grep")]
  199. public static PhpArray Grep(object pattern, PhpArray input, GrepFlags flags)
  200. {
  201. if (input == null)
  202. return null;
  203. PerlRegExpConverter converter = ConvertPattern(pattern, null);
  204. if (converter == null) return null;
  205. PhpArray result = new PhpArray();
  206. foreach (KeyValuePair<IntStringKey, object> entry in input)
  207. {
  208. string str = ConvertData(entry.Value, converter);
  209. Match m = converter.Regex.Match(str);
  210. // move a copy to return array if success and not invert or
  211. // not success and invert
  212. if (m.Success ^ (flags & GrepFlags.GrepInvert) != 0)
  213. result.Add(entry.Key, str);
  214. }
  215. return result;
  216. }
  217. #endregion
  218. #region preg_match, preg_match_all
  219. /// <summary>
  220. /// Flags for Match function family.
  221. /// </summary>
  222. /// <remarks>
  223. /// MatchFlags used by pre_match PHP functions is a hybrid enumeration.
  224. /// PatternOrder and SetOrder flags are mutually exclusive but OffsetCapture may be added by bitwise | operator.
  225. /// Moreover, PatternOrder is a default value used by these functions, so it can be equal to 0.
  226. /// (This confusing declaration is done by PHP authors.)
  227. /// </remarks>
  228. [Flags]
  229. public enum MatchFlags
  230. {
  231. [ImplementsConstant("PREG_PATTERN_ORDER")]
  232. PatternOrder = 1,
  233. [ImplementsConstant("PREG_SET_ORDER")]
  234. SetOrder = 2,
  235. [ImplementsConstant("PREG_OFFSET_CAPTURE")]
  236. OffsetCapture = 0x100
  237. }
  238. /// <summary>
  239. /// Searches <paramref name="data"/> for a match to the regular expression given in <paramref name="pattern"/>.
  240. /// The search is stopped after the first match is found.
  241. /// </summary>
  242. /// <param name="pattern">Perl regular expression.</param>
  243. /// <param name="data">String to search.</param>
  244. /// <returns>0 if there is no match and 1 if the match was found.</returns>
  245. [ImplementsFunction("preg_match")]
  246. [return: CastToFalse]
  247. public static int Match(object pattern, object data)
  248. {
  249. PerlRegExpConverter converter = ConvertPattern(pattern, null);
  250. if (converter == null) return -1;
  251. string str = ConvertData(data, converter);
  252. Match match = converter.Regex.Match(str);
  253. return match.Success ? 1 : 0;
  254. }
  255. /// <summary>
  256. /// <para>Searches <paramref name="data"/> for a match to the regular expression given in
  257. /// <paramref name="pattern"/>. The search is stopped after the first match is found.</para>
  258. /// <para><paramref name="matches"/> contains an array with matches. At index 0 is the whole string that
  259. /// matches the <paramref name="pattern"/>, from index 1 are stored matches for parenthesized subpatterns.</para>
  260. /// </summary>
  261. /// <param name="pattern">Perl regular expression.</param>
  262. /// <param name="data">String or string of bytes to search.</param>
  263. /// <param name="matches">Array containing matched strings.</param>
  264. /// <returns>0 if there is no match and 1 if the match was found.</returns>
  265. [ImplementsFunction("preg_match")]
  266. [return: CastToFalse]
  267. public static int Match(object pattern, object data, out PhpArray matches)
  268. {
  269. return Match(pattern, data, out matches, MatchFlags.PatternOrder, 0, false);
  270. }
  271. /// <summary>
  272. /// <para>Searches <paramref name="data"/> for a match to the regular expression given in
  273. /// <paramref name="pattern"/>. The search is stopped after the first match is found.</para>
  274. /// <para><paramref name="matches"/> contains an array with matches. At index 0 is the whole string that
  275. /// matches the <paramref name="pattern"/>, from index 1 are stored matches for parenthesized subpatterns.</para>
  276. /// <para>Flag <see cref="MatchFlags.OffsetCapture"/> can be specified and it means that the
  277. /// <paramref name="matches"/> array will not contain substrings, but another array where the substring
  278. /// is stored at index [0] and index [1] is its offset in <paramref name="data"/>.</para>
  279. /// </summary>
  280. /// <param name="pattern">Perl regular expression.</param>
  281. /// <param name="data">String to search.</param>
  282. /// <param name="matches">Array containing matched strings.</param>
  283. /// <param name="flags"><see cref="MatchFlags"/>.</param>
  284. /// <returns>0 if there is no match and 1 if the match was found.</returns>
  285. [ImplementsFunction("preg_match")]
  286. [return: CastToFalse]
  287. public static int Match(object pattern, object data, out PhpArray matches, MatchFlags flags)
  288. {
  289. return Match(pattern, data, out matches, flags, 0, false);
  290. }
  291. /// <summary>
  292. /// <para>Searches <paramref name="data"/> for a match to the regular expression given in
  293. /// <paramref name="pattern"/>. The search is stopped after the first match is found.</para>
  294. /// <para><paramref name="matches"/> contains an array with matches. At index 0 is the whole string that
  295. /// matches the <paramref name="pattern"/>, from index 1 are stored matches for parenthesized subpatterns.</para>
  296. /// <para>Flag <see cref="MatchFlags.OffsetCapture"/> can be specified and it means that the
  297. /// <paramref name="matches"/> array will not contain substrings, but another array where the substring
  298. /// is stored at index [0] and index [1] is its offset in <paramref name="data"/>. <paramref name="offset"/>
  299. /// specifies where the search should start. (Note that it is not the same as passing a substring of
  300. /// <paramref name="data"/>.)</para>
  301. /// </summary>
  302. /// <param name="pattern">Perl regular expression.</param>
  303. /// <param name="data">String or string of bytes to search.</param>
  304. /// <param name="matches">Array containing matched strings.</param>
  305. /// <param name="flags"><see cref="MatchFlags"/>.</param>
  306. /// <param name="offset">Offset to <paramref name="data"/> where the match should start.</param>
  307. /// <returns>0 if there is no match and 1 if the match was found.</returns>
  308. [ImplementsFunction("preg_match")]
  309. [return: CastToFalse]
  310. public static int Match(object pattern, object data, out PhpArray matches, MatchFlags flags, int offset)
  311. {
  312. return Match(pattern, data, out matches, flags, offset, false);
  313. }
  314. /// <summary>
  315. /// <para>Searches <paramref name="data"/> for all matches to the regular expression given in pattern and puts
  316. /// them in <paramref name="matches"/> array. The matches are sorted in "Pattern Order" i. e. at zero
  317. /// index is an array containing whole matches, at first index is an array containing number 1 subpatterns
  318. /// for all matches etc.</para>
  319. /// <para>Next match search starts just after the previous match.</para>
  320. /// </summary>
  321. /// <param name="pattern">Regular expression.</param>
  322. /// <param name="data">String or string of bytes to search.</param>
  323. /// <param name="matches">Output array containing matches found.</param>
  324. /// <returns>Number of whole matches.</returns>
  325. [ImplementsFunction("preg_match_all")]
  326. [return: CastToFalse]
  327. public static int MatchAll(object pattern, object data, out PhpArray matches)
  328. {
  329. return Match(pattern, data, out matches, MatchFlags.PatternOrder, 0, true);
  330. }
  331. /// <summary>
  332. /// <para>Searches <paramref name="data"/> for all matches to the regular expression given in pattern and puts
  333. /// them in <paramref name="matches"/> array. The matches are sorted in "Pattern Order" i. e. at zero
  334. /// index is an array containing whole matches, at first index is an array containing number 1 subpatterns
  335. /// for all matches etc.</para>
  336. /// <para>Next match search starts just after the previous match.</para>
  337. /// <para>If <see cref="MatchFlags.PatternOrder"/> flag is specified, <paramref name="matches"/> array
  338. /// contains an array of full pattern matches at index 0, an array of strings matched to
  339. /// first parenthesized substring at index 1 etc. If <see cref="MatchFlags.SetOrder"/> is set, at index 0 is the first
  340. /// set of matches (full match and substrings), at index 1 full set for second match etc.</para>
  341. /// <para>Flag <see cref="MatchFlags.OffsetCapture"/> indicates that instead the matched substring should
  342. /// be an array containing the substring at index 0 and position at original string at index 1.</para>
  343. /// </summary>
  344. /// <param name="pattern">Regular expression.</param>
  345. /// <param name="data">String or string of bytes to search.</param>
  346. /// <param name="matches">Output array containing matches found.</param>
  347. /// <param name="flags">Flags for specifying order of results in <paramref name="matches"/> array (Set Order,
  348. /// Pattern Order) and whether positions of matches should be added to results (Offset Capture).</param>
  349. /// <returns>Number of whole matches.</returns>
  350. [ImplementsFunction("preg_match_all")]
  351. [return: CastToFalse]
  352. public static int MatchAll(object pattern, object data, out PhpArray matches, MatchFlags flags)
  353. {
  354. return Match(pattern, data, out matches, flags, 0, true);
  355. }
  356. /// <summary>
  357. /// <para>Searches <paramref name="data"/> for all matches to the regular expression given in pattern and puts
  358. /// them in <paramref name="matches"/> array. The matches are sorted in "Pattern Order" i. e. at zero
  359. /// index is an array containing whole matches, at first index is an array containing number 1 subpatterns
  360. /// for all matches etc.</para>
  361. /// <para>Next match search starts just after the previous match.</para>
  362. /// <para>If <see cref="MatchFlags.PatternOrder"/> flag is specified, <paramref name="matches"/> array
  363. /// contains at index 0 an array of full pattern matches, at index 1 is an array of strings matched to
  364. /// first parenthesized substring etc. If <see cref="MatchFlags.SetOrder"/> is set, at index 0 is the first
  365. /// set of matches (full match and substrings), at index 1 full set for second match etc.</para>
  366. /// <para>Flag <see cref="MatchFlags.OffsetCapture"/> indicates that instead the matched substring should
  367. /// be an array containing the substring at index 0 and position at original string at index 1.</para>
  368. /// </summary>
  369. /// <param name="pattern">Regular expression.</param>
  370. /// <param name="data">String or string of bytes to search.</param>
  371. /// <param name="matches">Output array containing matches found.</param>
  372. /// <param name="flags">Flags for specifying order of results in <paramref name="matches"/> array (Set Order,
  373. /// Pattern Order) and whether positions of matches should be added to results (Offset Capture).</param>
  374. /// <param name="offset">Offset in <paramref name="data"/> where the search should begin. Note that it is
  375. /// not equal to passing an substring as this parameter because of ^ (start of the string or line) modifier.
  376. /// </param>
  377. /// <returns>Number of whole matches.</returns>
  378. [ImplementsFunction("preg_match_all")]
  379. [return: CastToFalse]
  380. public static int MatchAll(object pattern, object data, out PhpArray matches, MatchFlags flags, int offset)
  381. {
  382. return Match(pattern, data, out matches, flags, offset, true);
  383. }
  384. /// <summary>
  385. /// Private method implementing functions from match family.
  386. /// </summary>
  387. /// <param name="pattern">Perl regular expression match pattern.</param>
  388. /// <param name="data">String to search matches.</param>
  389. /// <param name="matches">An array containing matches found.</param>
  390. /// <param name="flags">Flags for searching.</param>
  391. /// <param name="offset">Offset to <paramref name="pattern"/> where the search should start.</param>
  392. /// <param name="matchAll"><B>True</B> if all matches should be found, <B>false</B> if only the first
  393. /// is enough.</param>
  394. /// <returns>Number of times the <paramref name="pattern"/> matches.</returns>
  395. private static int Match(object pattern, object data, out PhpArray matches, MatchFlags flags,
  396. int offset, bool matchAll)
  397. {
  398. // these two flags together do not make sense
  399. if ((flags & MatchFlags.PatternOrder) != 0 && (flags & MatchFlags.SetOrder) != 0)
  400. {
  401. PhpException.InvalidArgument("flags", LibResources.GetString("preg_match_pattern_set_order"));
  402. matches = null;
  403. return -1;
  404. }
  405. PerlRegExpConverter converter = ConvertPattern(pattern, null);
  406. if (converter == null)
  407. {
  408. matches = new PhpArray();
  409. return -1;
  410. }
  411. string converted = ConvertData(data, converter);
  412. Match m = converter.Regex.Match(converted, offset>converted.Length?converted.Length:offset);
  413. if ((converter.PerlOptions & PerlRegexOptions.Anchored) > 0 && m.Success && m.Index != offset)
  414. {
  415. matches = new PhpArray();
  416. return -1;
  417. }
  418. if (m.Success)
  419. {
  420. if (!matchAll || (flags & MatchFlags.PatternOrder) != 0)
  421. {
  422. matches = new PhpArray(m.Groups.Count);
  423. }
  424. else
  425. matches = new PhpArray();
  426. if (!matchAll)
  427. {
  428. // Preg numbers groups sequentially, both named and unnamed.
  429. // .Net only numbers unnamed groups.
  430. // So we name unnamed groups (see ConvertRegex) to map correctly.
  431. int lastSuccessfulGroupIndex = GetLastSuccessfulGroup(m.Groups);
  432. var indexGroups = new List<Group>(m.Groups.Count);
  433. var groupNameByIndex = new Dictionary<int, string>(m.Groups.Count);
  434. for (int i = 0; i <= lastSuccessfulGroupIndex; i++)
  435. {
  436. // All groups should be named.
  437. var groupName = GetGroupName(converter.Regex, i);
  438. if (!string.IsNullOrEmpty(groupName))
  439. {
  440. matches[groupName] = NewArrayItem(m.Groups[i].Value, m.Groups[i].Index, (flags & MatchFlags.OffsetCapture) != 0);
  441. }
  442. matches[i] = NewArrayItem(m.Groups[i].Value, m.Groups[i].Index, (flags & MatchFlags.OffsetCapture) != 0);
  443. }
  444. return 1;
  445. }
  446. // store all other matches in PhpArray matches
  447. if ((flags & MatchFlags.SetOrder) != 0) // cannot test PatternOrder, it is 0, SetOrder must be tested
  448. return FillMatchesArrayAllSetOrder(converter.Regex, m, ref matches, (flags & MatchFlags.OffsetCapture) != 0);
  449. else
  450. return FillMatchesArrayAllPatternOrder(converter.Regex, m, ref matches, (flags & MatchFlags.OffsetCapture) != 0);
  451. }
  452. // no match has been found
  453. if (matchAll && (flags & MatchFlags.SetOrder) == 0)
  454. {
  455. // in that case PHP returns an array filled with empty arrays according to parentheses count
  456. matches = new PhpArray(m.Groups.Count);
  457. for (int i = 0; i < converter.Regex.GetGroupNumbers().Length; i++)
  458. {
  459. AddGroupNameToResult(converter.Regex, matches, i, (ms,groupName) =>
  460. {
  461. ms[groupName] = new PhpArray(0);
  462. });
  463. matches[i] = new PhpArray(0);
  464. }
  465. }
  466. else
  467. {
  468. matches = new PhpArray(0); // empty array
  469. }
  470. return 0;
  471. }
  472. private static string GetGroupName(Regex regex, int index)
  473. {
  474. var groupName = regex.GroupNameFromNumber(index);
  475. if (groupName.StartsWith(PerlRegExpConverter.AnonymousGroupPrefix))
  476. {
  477. // Anonymous groups: remove it altogether. Its purpose was to order it correctly.
  478. Debug.Assert(groupName.Substring(PerlRegExpConverter.AnonymousGroupPrefix.Length) == index.ToString(CultureInfo.InvariantCulture));
  479. groupName = string.Empty;
  480. }
  481. else
  482. if (groupName[0] != PerlRegExpConverter.GroupPrefix)
  483. {
  484. // Indexed groups. Leave as-is.
  485. Debug.Assert(groupName == index.ToString(CultureInfo.InvariantCulture));
  486. groupName = string.Empty;
  487. }
  488. else
  489. {
  490. // Named groups: remove prefix.
  491. groupName = (groupName[0] == PerlRegExpConverter.GroupPrefix ? groupName.Substring(1) : groupName);
  492. }
  493. return groupName;
  494. }
  495. #endregion
  496. #region preg_split
  497. /// <summary>
  498. /// Flags for split functions family.
  499. /// </summary>
  500. [Flags]
  501. public enum SplitFlags
  502. {
  503. None = 0,
  504. [ImplementsConstant("PREG_SPLIT_NO_EMPTY")]
  505. NoEmpty = 1,
  506. [ImplementsConstant("PREG_SPLIT_DELIM_CAPTURE")]
  507. DelimCapture = 2,
  508. [ImplementsConstant("PREG_SPLIT_OFFSET_CAPTURE")]
  509. OffsetCapture = 4
  510. }
  511. /// <summary>
  512. /// Splits <paramref name="data"/> along boundaries matched by <paramref name="pattern"/> and returns
  513. /// an array containing substrings.
  514. /// </summary>
  515. /// <param name="pattern">Regular expression to match to boundaries.</param>
  516. /// <param name="data">String string of bytes to split.</param>
  517. /// <returns>An array containing substrings.</returns>
  518. [ImplementsFunction("preg_split")]
  519. public static PhpArray Split(object pattern, object data)
  520. {
  521. return Split(pattern, data, -1, SplitFlags.None);
  522. }
  523. /// <summary>
  524. /// <para>Splits <paramref name="data"/> along boundaries matched by <paramref name="pattern"/> and returns
  525. /// an array containing substrings.</para>
  526. /// <para><paramref name="limit"/> specifies the maximum number of strings returned in the resulting
  527. /// array. If (limit-1) matches is found and there remain some characters to match whole remaining
  528. /// string is returned as the last element of the array.</para>
  529. /// </summary>
  530. /// <param name="pattern">Regular expression to match to boundaries.</param>
  531. /// <param name="data">String string of bytes to split.</param>
  532. /// <param name="limit">Max number of elements in the resulting array.</param>
  533. /// <returns>An array containing substrings.</returns>
  534. [ImplementsFunction("preg_split")]
  535. public static PhpArray Split(object pattern, object data, int limit)
  536. {
  537. return Split(pattern, data, limit, SplitFlags.None);
  538. }
  539. /// <summary>
  540. /// <para>Splits <paramref name="data"/> along boundaries matched by <paramref name="pattern"/> and returns
  541. /// an array containing substrings.</para>
  542. /// <para><paramref name="limit"/> specifies the maximum number of strings returned in the resulting
  543. /// array. If (limit-1) matches is found and there remain some characters to match whole remaining
  544. /// string is returned as the last element of the array.</para>
  545. /// <para>Some flags may be specified. <see cref="SplitFlags.NoEmpty"/> means no empty strings will be
  546. /// in the resulting array. <see cref="SplitFlags.DelimCapture"/> adds also substrings matching
  547. /// the delimiter and <see cref="SplitFlags.OffsetCapture"/> returns instead substrings the arrays
  548. /// containing appropriate substring at index 0 and the offset of this substring in original
  549. /// <paramref name="data"/> at index 1.</para>
  550. /// </summary>
  551. /// <param name="pattern">Regular expression to match to boundaries.</param>
  552. /// <param name="data">String or string of bytes to split.</param>
  553. /// <param name="limit">Max number of elements in the resulting array.</param>
  554. /// <param name="flags">Flags affecting the returned array.</param>
  555. /// <returns>An array containing substrings.</returns>
  556. [ImplementsFunction("preg_split")]
  557. public static PhpArray Split(object pattern, object data, int limit, SplitFlags flags)
  558. {
  559. if (limit == 0) // 0 does not make sense, php's behavior is as it is -1
  560. limit = -1;
  561. if (limit < -1) // for all other negative values it seems that is as limit == 1
  562. limit = 1;
  563. PerlRegExpConverter converter = ConvertPattern(pattern, null);
  564. if (converter == null) return null;
  565. string str = ConvertData(data, converter);
  566. Match m = converter.Regex.Match(str);
  567. bool offset_capture = (flags & SplitFlags.OffsetCapture) != 0;
  568. PhpArray result = new PhpArray();
  569. int last_index = 0;
  570. while (m.Success && (limit == -1 || --limit > 0) && last_index < str.Length)
  571. {
  572. // add part before match
  573. int length = m.Index - last_index;
  574. if (length > 0 || (flags & SplitFlags.NoEmpty) == 0)
  575. result.Add(NewArrayItem(str.Substring(last_index, length), last_index, offset_capture));
  576. if (m.Value.Length > 0)
  577. {
  578. if ((flags & SplitFlags.DelimCapture) != 0) // add all captures but not whole pattern match (start at 1)
  579. {
  580. List<object> lastUnsucessfulGroups = null; // value of groups that was not successful since last succesful one
  581. for (int i = 1; i < m.Groups.Count; i++)
  582. {
  583. Group g = m.Groups[i];
  584. if (g.Length > 0 || (flags & SplitFlags.NoEmpty) == 0)
  585. {
  586. // the value to be added into the result:
  587. object value = NewArrayItem(g.Value, g.Index, offset_capture);
  588. if (g.Success)
  589. {
  590. // group {i} was matched:
  591. // if there was some unsuccesfull matches before, add them now:
  592. if (lastUnsucessfulGroups != null && lastUnsucessfulGroups.Count > 0)
  593. {
  594. foreach (var x in lastUnsucessfulGroups)
  595. result.Add(x);
  596. lastUnsucessfulGroups.Clear();
  597. }
  598. // add the matched group:
  599. result.Add(value);
  600. }
  601. else
  602. {
  603. // The match was unsuccesful, remember all the unsuccesful matches
  604. // and add them only if some succesful match will follow.
  605. // In PHP, unsuccessfully matched groups are trimmed by the end
  606. // (regexp processing stops when other groups cannot be matched):
  607. if (lastUnsucessfulGroups == null) lastUnsucessfulGroups = new List<object>();
  608. lastUnsucessfulGroups.Add(value);
  609. }
  610. }
  611. }
  612. }
  613. last_index = m.Index + m.Length;
  614. }
  615. else // regular expression match an empty string => add one character
  616. {
  617. // always not empty
  618. result.Add(NewArrayItem(str.Substring(last_index, 1), last_index, offset_capture));
  619. last_index++;
  620. }
  621. m = m.NextMatch();
  622. }
  623. // add remaining string (might be empty)
  624. if (last_index < str.Length || (flags & SplitFlags.NoEmpty) == 0)
  625. result.Add(NewArrayItem(str.Substring(last_index), last_index, offset_capture));
  626. return result;
  627. }
  628. #endregion
  629. #region preg_replace, preg_replace_callback
  630. /// <summary>
  631. /// <para>Searches <paramref name="data"/> for matches to <paramref name="pattern"/> and replaces them
  632. /// with <paramref name="replacement"/>. <paramref name="replacement"/> may contain backreferences
  633. /// of the form of <I>\\n</I> or <I>$n</I> (second one preferred).</para>
  634. /// <para>Every parameter may be an unidimensional array of strings. If <paramref name="data"/> is
  635. /// an array, replacement is done on every element and return value is an array as well. If
  636. /// <paramref name="pattern"/> and <paramref name="replacement"/> are arrays, the replacements are processed
  637. /// in the order the keys appear in the array. If only <paramref name="pattern"/> is an array, the
  638. /// replacement string is used for every key in the <paramref name="pattern"/>.</para>
  639. /// </summary>
  640. /// <param name="context">Current <see cref="ScriptContext"/>. Passed by Phalanger runtime, cannot be null.</param>
  641. /// <param name="self">Instance of object that called the replace method (replace pattern may contain $this).</param>
  642. /// <param name="definedVariables"></param>
  643. /// <param name="pattern">Regular expression to match.</param>
  644. /// <param name="replacement">Replacement string.</param>
  645. /// <param name="data">String to search for replacements.</param>
  646. /// <returns>String or array containing strings with replacement performed.</returns>
  647. [ImplementsFunction("preg_replace", FunctionImplOptions.CaptureEvalInfo | FunctionImplOptions.NeedsVariables | FunctionImplOptions.NeedsThisReference)]
  648. public static object Replace(ScriptContext/*!*/context, DObject self, Dictionary<string, object> definedVariables,
  649. object pattern, object replacement, object data)
  650. {
  651. int count = Int32.MinValue; // disables counting
  652. return Replace(context, self, definedVariables, pattern, replacement, null, data, -1, ref count);
  653. }
  654. /// <summary>
  655. /// <para>Searches <paramref name="data"/> for matches to <paramref name="pattern"/> and replaces them
  656. /// with <paramref name="replacement"/>. <paramref name="replacement"/> may contain backreferences
  657. /// of the form of <I>\\n</I> or <I>$n</I> (second one preferred).</para>
  658. /// <para>Every parameter may be an unidimensional array of strings. If <paramref name="data"/> is
  659. /// an array, replacement is done on every element and return value is an array as well. If
  660. /// <paramref name="pattern"/> and <paramref name="replacement"/> are arrays, the replacements are processed
  661. /// in the order the keys appear in the array. If only <paramref name="pattern"/> is an array, the
  662. /// replacement string is used for every key in the <paramref name="pattern"/>.</para>
  663. /// </summary>
  664. /// <param name="context">Current <see cref="ScriptContext"/>. Passed by Phalanger runtime, cannot be null.</param>
  665. /// <param name="self">Instance of object that called the replace method (replace pattern may contain $this)</param>
  666. /// <param name="definedVariables"></param>
  667. /// <param name="pattern">Regular expression to match.</param>
  668. /// <param name="replacement">Replacement string.</param>
  669. /// <param name="data">String to search for replacements.</param>
  670. /// <param name="limit">Maximum number of matches replaced. (-1 for no limit)</param>
  671. /// <returns>String or array containing strings with replacement performed.</returns>
  672. [ImplementsFunction("preg_replace", FunctionImplOptions.CaptureEvalInfo | FunctionImplOptions.NeedsVariables | FunctionImplOptions.NeedsThisReference)]
  673. public static object Replace(ScriptContext/*!*/context, DObject self, Dictionary<string, object> definedVariables,
  674. object pattern, object replacement, object data, int limit)
  675. {
  676. int count = Int32.MinValue; // disables counting
  677. return Replace(context, self, definedVariables, pattern, replacement, null, data, limit, ref count);
  678. }
  679. /// <summary>
  680. /// <para>Searches <paramref name="data"/> for matches to <paramref name="pattern"/> and replaces them
  681. /// with <paramref name="replacement"/>. <paramref name="replacement"/> may contain backreferences
  682. /// of the form of <I>\\n</I> or <I>$n</I> (second one preferred).</para>
  683. /// <para>Every parameter may be an unidimensional array of strings. If <paramref name="data"/> is
  684. /// an array, replacement is done on every element and return value is an array as well. If
  685. /// <paramref name="pattern"/> and <paramref name="replacement"/> are arrays, the replacements are processed
  686. /// in the order the keys appear in the array. If only <paramref name="pattern"/> is an array, the
  687. /// replacement string is used for every key in the <paramref name="pattern"/>.</para>
  688. /// </summary>
  689. /// <param name="context">Current <see cref="ScriptContext"/>. Passed by Phalanger runtime, cannot be null.</param>
  690. /// <param name="self">Instance of object that called the replace method (replace pattern may contain $this)</param>
  691. /// <param name="definedVariables"></param>
  692. /// <param name="pattern">Regular expression to match.</param>
  693. /// <param name="replacement">Replacement string.</param>
  694. /// <param name="data">String to search for replacements.</param>
  695. /// <param name="limit">Maximum number of matches replaced. (-1 for no limit)</param>
  696. /// <param name="count">Number of replacements.</param>
  697. /// <returns>String or array containing strings with replacement performed.</returns>
  698. [ImplementsFunction("preg_replace", FunctionImplOptions.CaptureEvalInfo | FunctionImplOptions.NeedsVariables | FunctionImplOptions.NeedsThisReference)]
  699. public static object Replace(ScriptContext/*!*/context, DObject self, Dictionary<string, object> definedVariables,
  700. object pattern, object replacement, object data, int limit, out int count)
  701. {
  702. count = 0;
  703. return Replace(context, self, definedVariables, pattern, replacement, null, data, limit, ref count);
  704. }
  705. /// <summary>
  706. /// <para>Searches <paramref name="data"/> for matches to <paramref name="pattern"/> and the array of matched
  707. /// strings (full pattern match + parenthesized substrings) is passed to <paramref name="callback"/> which
  708. /// returns replacement string.</para>
  709. /// <para><paramref name="pattern"/> and <paramref name="data"/> parameters may be also unidimensional
  710. /// arrays of strings. For the explanation <see cref="Replace"/>.</para>
  711. /// </summary>
  712. /// <param name="context">Current <see cref="ScriptContext"/>. Passed by Phalanger runtime, cannot be null.</param>
  713. /// <param name="pattern">Regular expression to match.</param>
  714. /// <param name="callback">Function called to find out the replacement string.</param>
  715. /// <param name="data">String to search for replacements.</param>
  716. /// <returns>String or array containing strings with replacement performed.</returns>
  717. [ImplementsFunction("preg_replace_callback")]
  718. public static object Replace(ScriptContext/*!*/context, object pattern, PhpCallback callback, object data)
  719. {
  720. int count = Int32.MinValue; // disables counting;
  721. return Replace(context, null, null, pattern, null, callback, data, -1, ref count);
  722. }
  723. /// <summary>
  724. /// <para>Searches <paramref name="data"/> for matches to <paramref name="pattern"/> and the array of matched
  725. /// strings (full pattern match + parenthesized substrings) is passed to <paramref name="callback"/> which
  726. /// returns replacement string.</para>
  727. /// <para><paramref name="pattern"/> and <paramref name="data"/> parameters may be also unidimensional
  728. /// arrays of strings. For the explanation <see cref="Replace"/>.</para>
  729. /// </summary>
  730. /// <param name="context">Current <see cref="ScriptContext"/>. Passed by Phalanger runtime, cannot be null.</param>
  731. /// <param name="pattern">Regular expression to match.</param>
  732. /// <param name="callback">Function called to find out the replacement string.</param>
  733. /// <param name="data">String to search for replacements.</param>
  734. /// <param name="limit">Maximum number of matches replaced. (-1 for no limit)</param>
  735. /// <returns>String or array containing strings with replacement performed.</returns>
  736. [ImplementsFunction("preg_replace_callback")]
  737. public static object Replace(ScriptContext/*!*/context, object pattern, PhpCallback callback, object data, int limit)
  738. {
  739. int count = Int32.MinValue; // disables counting
  740. return Replace(context, null, null, pattern, null, callback, data, limit, ref count);
  741. }
  742. /// <summary>
  743. /// <para>Searches <paramref name="data"/> for matches to <paramref name="pattern"/> and the array of matched
  744. /// strings (full pattern match + parenthesized substrings) is passed to <paramref name="callback"/> which
  745. /// returns replacement string.</para>
  746. /// <para><paramref name="pattern"/> and <paramref name="data"/> parameters may be also unidimensional
  747. /// arrays of strings. For the explanation <see cref="Replace"/>.</para>
  748. /// </summary>
  749. /// <param name="context">Current <see cref="ScriptContext"/>. Passed by Phalanger runtime, cannot be null.</param>
  750. /// <param name="pattern">Regular expression to match.</param>
  751. /// <param name="callback">Function called to find out the replacement string.</param>
  752. /// <param name="data">String to search for replacements.</param>
  753. /// <param name="limit">Maximum number of matches replaced. (-1 for no limit)</param>
  754. /// <param name="count">Number of replacements.</param>
  755. /// <returns>String or array containing strings with replacement performed.</returns>
  756. [ImplementsFunction("preg_replace_callback")]
  757. public static object Replace(ScriptContext/*!*/context, object pattern, PhpCallback callback, object data, int limit, out int count)
  758. {
  759. count = 0;
  760. return Replace(context, null, null, pattern, null, callback, data, limit, ref count);
  761. }
  762. /// <summary>
  763. /// Private mehtod implementing all replace methods. Just one of <paramref name="replacement"/> or <paramref name="callback" /> should be used.
  764. /// </summary>
  765. /// <param name="context">Current <see cref="ScriptContext"/>. Must not be null.</param>
  766. /// <param name="self">Instance of object that called the replace method (replace pattern may contain $this)</param>
  767. /// <param name="definedVariables"></param>
  768. /// <param name="pattern"></param>
  769. /// <param name="replacement"></param>
  770. /// <param name="callback"></param>
  771. /// <param name="data"></param>
  772. /// <param name="limit"></param>
  773. /// <param name="count"></param>
  774. /// <returns>String or an array.</returns>
  775. private static object Replace(ScriptContext/*!*/context, DObject self, Dictionary<string, object> definedVariables, object pattern, object replacement, PhpCallback callback,
  776. object data, int limit, ref int count)
  777. {
  778. // if we have no replacement and no callback, matches are deleted (replaced by an empty string)
  779. if (replacement == null && callback == null)
  780. replacement = String.Empty;
  781. // exactly one of replacement or callback is valid now
  782. Debug.Assert(replacement != null ^ callback != null);
  783. // get eval info if it has been captured - is needed even if we do not need them later
  784. SourceCodeDescriptor descriptor = context.GetCapturedSourceCodeDescriptor();
  785. // PHP's behaviour for undocumented limit range
  786. if (limit < -1)
  787. limit = 0;
  788. PhpArray replacement_array = replacement as PhpArray;
  789. string replacement_string = null;
  790. if (replacement_array == null && replacement != null)
  791. replacement_string = Core.Convert.ObjectToString(replacement);
  792. // we should return new array, if there is an array passed as subject, it should remain unchanged:
  793. object data_copy = PhpVariable.DeepCopy(data);
  794. PhpArray pattern_array = pattern as PhpArray;
  795. if (pattern_array == null)
  796. {
  797. // string pattern
  798. // string replacement
  799. if (replacement_array != null)
  800. {
  801. // string pattern and array replacement not allowed:
  802. PhpException.InvalidArgument("replacement", LibResources.GetString("replacement_array_pattern_not"));
  803. return null;
  804. }
  805. // pattern should be treated as string and therefore replacement too:
  806. return SimpleReplace(self, definedVariables, pattern, replacement_string, callback, data_copy, limit, descriptor, ref count);
  807. }
  808. else if (replacement_array == null)
  809. {
  810. // array pattern
  811. // string replacement
  812. using (var pattern_enumerator = pattern_array.GetFastEnumerator())
  813. while (pattern_enumerator.MoveNext())
  814. {
  815. data_copy = SimpleReplace(self, definedVariables, pattern_enumerator.CurrentValue, replacement_string,
  816. callback, data_copy, limit, descriptor, ref count);
  817. }
  818. }
  819. else //if (replacement_array != null)
  820. {
  821. // array pattern
  822. // array replacement
  823. var replacement_enumerator = replacement_array.GetFastEnumerator();
  824. bool replacement_valid = true;
  825. using (var pattern_enumerator = pattern_array.GetFastEnumerator())
  826. while (pattern_enumerator.MoveNext())
  827. {
  828. // replacements are in array, move to next item and take it if possible, in other case take empty string:
  829. if (replacement_valid && replacement_enumerator.MoveNext())
  830. {
  831. replacement_string = Core.Convert.ObjectToString(replacement_enumerator.CurrentValue);
  832. }
  833. else
  834. {
  835. replacement_string = string.Empty;
  836. replacement_valid = false; // end of replacement_enumerator, do not call MoveNext again!
  837. }
  838. data_copy = SimpleReplace(self, definedVariables, pattern_enumerator.CurrentValue, replacement_string,
  839. callback, data_copy, limit, descriptor, ref count);
  840. }
  841. }
  842. // return resulting array or string assigned to data
  843. return data_copy;
  844. }
  845. /// <summary>
  846. /// Takes a regular expression <paramref name="pattern"/> and one of <paramref name="replacement"/> or
  847. /// <paramref name="callback"/>. Performs replacing on <paramref name="data"/>, which can be
  848. /// <see cref="PhpArray"/>, in other cases it is converted to string.
  849. /// If <paramref name="data"/> is <see cref="PhpArray"/>, every value is converted to string and
  850. /// replacement is performed in place in this array.
  851. /// Either <paramref name="replacement"/> or <paramref name="callback"/> should be null.
  852. /// </summary>
  853. /// <param name="self">Instance of object that called the replace method (replace pattern may contain $this)</param>
  854. /// <param name="definedVariables">Array with local variables - can be used by replace pattern</param>
  855. /// <param name="pattern">Regular expression to search.</param>
  856. /// <param name="replacement">Regular replacement expression. Should be null if callback is specified.</param>
  857. /// <param name="callback">Callback function that should be called to make replacements. Should be null
  858. /// if replacement is specified.</param>
  859. /// <param name="data">Array or string where pattern is searched.</param>
  860. /// <param name="limit">Max count of replacements for each item in subject.</param>
  861. /// <param name="descriptor"><see cref="SourceCodeDescriptor"/> for possible lambda function creation.</param>
  862. /// <param name="count">Cumulated number of replacements.</param>
  863. /// <returns></returns>
  864. private static object SimpleReplace(DObject self, Dictionary<string, object> definedVariables, object pattern,
  865. string replacement, PhpCallback callback, object data, int limit, SourceCodeDescriptor descriptor, ref int count)
  866. {
  867. Debug.Assert(limit >= -1);
  868. // exactly one of replacement or callback is valid:
  869. Debug.Assert(replacement != null ^ callback != null);
  870. PerlRegExpConverter converter = ConvertPattern(pattern, replacement);
  871. if (converter == null) return null;
  872. // get types of data we need:
  873. PhpArray data_array = data as PhpArray;
  874. string data_string = (data_array == null) ? ConvertData(data, converter) : null;
  875. // data comprising of a single string:
  876. if (data_array == null)
  877. {
  878. return ReplaceInternal(self, definedVariables, converter, callback, data_string, limit, descriptor, ref count);
  879. }
  880. else
  881. {
  882. // data is array, process each item:
  883. var enumerator = data_array.GetFastEnumerator();
  884. while (enumerator.MoveNext())
  885. {
  886. enumerator.CurrentValue = ReplaceInternal(self, definedVariables, converter, callback,
  887. ConvertData(enumerator.CurrentValue, converter), limit, descriptor, ref count);
  888. }
  889. enumerator.Dispose();
  890. // return array with items replaced:
  891. return data;
  892. }
  893. }
  894. /// <summary>
  895. /// Replaces <paramref name="limit"/> occurences of substrings.
  896. /// </summary>
  897. /// <param name="converter">
  898. /// Converter used for replacement if <paramref name="callback"/> is <B>null</B>.
  899. /// </param>
  900. /// <param name="self">Instance of object that called the replace method (replace pattern may contain $this)</param>
  901. /// <param name="definedVariables">Array with local variables - can be used by replace pattern</param>
  902. /// <param name="callback">Callback to call for replacement strings.</param>
  903. /// <param name="str">String to search for matches.</param>
  904. /// <param name="limit">Max number of replacements performed.</param>
  905. /// <param name="sourceCodeDesc"><see cref="SourceCodeDescriptor"/> for possible lambda function creation.</param>
  906. /// <param name="count">Cumulated number of replacements.</param>
  907. /// <returns></returns>
  908. private static string ReplaceInternal(DObject self, Dictionary<string, object> definedVariables, PerlRegExpConverter converter, PhpCallback callback,
  909. string str, int limit, SourceCodeDescriptor sourceCodeDesc, ref int count)
  910. {
  911. Debug.Assert(limit >= -1);
  912. if (callback == null)
  913. {
  914. // replace without executing code or counting the number of replacements:
  915. if ((converter.PerlOptions & PerlRegexOptions.Evaluate) == 0 && count < 0)
  916. return converter.Regex.Replace(str, converter.DotNetReplaceExpression, limit);
  917. Evaluator evaluator = new Evaluator(converter.Regex, converter.DotNetReplaceExpression, sourceCodeDesc, self, definedVariables);
  918. MatchEvaluator match_evaluator;
  919. if ((converter.PerlOptions & PerlRegexOptions.Evaluate) != 0)
  920. match_evaluator = new MatchEvaluator(evaluator.ReplaceCodeExecute);
  921. else
  922. match_evaluator = new MatchEvaluator(evaluator.ReplaceCount);
  923. string result = converter.Regex.Replace(str, match_evaluator, limit);
  924. count += evaluator.Count;
  925. return result;
  926. }
  927. else
  928. {
  929. StringBuilder result = new StringBuilder((str != null) ? str.Length : 0);
  930. int last_index = 0;
  931. Match m = converter.Regex.Match(str);
  932. while (m.Success && (limit == -1 || limit-- > 0))
  933. {
  934. // append everything from input string to current match
  935. result.Append(str, last_index, m.Index - last_index);
  936. // move index after current match
  937. last_index = m.Index + m.Length;
  938. PhpArray arr = new PhpArray(m.Groups.Count, 0);
  939. for (int i = 0; i < m.Groups.Count; i++)
  940. arr[i] = m.Groups[i].Value;
  941. // append user callback function result
  942. string replacement = Core.Convert.ObjectToString(callback.Invoke(arr));
  943. result.Append(replacement);
  944. m = m.NextMatch();
  945. count++;
  946. }
  947. // remaining string
  948. result.Append(str, last_index, str.Length - last_index);
  949. return result.ToString();
  950. }
  951. }
  952. /// <summary>
  953. /// Class implementing <see cref="MatchEvaluator"/> delegate evaluating php code if 'e' modifier
  954. /// in preg_replace is specified.
  955. /// </summary>
  956. private sealed class Evaluator
  957. {
  958. private Regex reg;
  959. private string replacement;
  960. private SourceCodeDescriptor sourceCodeDesc;
  961. private Dictionary<string, object> definedVariables;
  962. private DObject self;
  963. public int Count { get { return count; } }
  964. private int count;
  965. public Evaluator(Regex reg, string replacement, SourceCodeDescriptor sourceCodeDesc, DObject self, Dictionary<string, object> definedVariables)
  966. {
  967. this.reg = reg;
  968. this.definedVariables = definedVariables;
  969. this.replacement = replacement;
  970. this.sourceCodeDesc = sourceCodeDesc;
  971. this.count = 0;
  972. this.self = self;
  973. }
  974. public string ReplaceCodeExecute(Match m)
  975. {
  976. count++;
  977. if (m.Value.Trim().Length == 0)
  978. return String.Empty; // nothing to do
  979. ScriptContext context = ScriptContext.CurrentContext;
  980. // generate code that will be executed
  981. string code = String.Concat("return ", Substitute(replacement, m.Groups), ";");
  982. // Execute..
  983. return Core.Convert.ObjectToString(DynamicCode.Eval(code, true, context, definedVariables, self, null,
  984. context.EvalRelativeSourcePath, context.EvalLine, context.EvalColumn, context.EvalId, null));
  985. }
  986. public string ReplaceCount(Match m)
  987. {
  988. count++;
  989. return replacement;
  990. }
  991. /// <summary>
  992. /// Expects replacement string produced by <see cref="PerlRegExpReplacement.ConvertReplacement"/>,
  993. /// i.e. only ${n} refer to valid groups.
  994. /// </summary>
  995. private string Substitute(string replacement, GroupCollection groups)
  996. {
  997. StringBuilder result = new StringBuilder(replacement.Length);
  998. int i = 0;
  999. while (i < replacement.Length)
  1000. {
  1001. if (IsParenthesizedGroupReference(replacement, i))
  1002. {
  1003. // ${
  1004. i += 2;
  1005. // [0-9]{1,2}
  1006. int group_no = replacement[i++] - '0';
  1007. if (replacement[i] != '}')
  1008. {
  1009. group_no = group_no * 10 + (replacement[i] - '0');
  1010. i++;
  1011. }
  1012. // }
  1013. Debug.Assert(replacement[i] == '}');
  1014. i++;
  1015. Debug.Assert(group_no < groups.Count);
  1016. // append slashed group value:
  1017. result.Append(StringUtils.AddCSlashes(groups[group_no].Value, true, true, false));
  1018. }
  1019. else if (replacement[i] == '$')
  1020. {
  1021. Debug.Assert(i + 1 < replacement.Length && replacement[i + 1] == '$');
  1022. result.Append('$');
  1023. i += 2;
  1024. }
  1025. else
  1026. {
  1027. result.Append(replacement[i++]);
  1028. }
  1029. }
  1030. return result.ToString();
  1031. }
  1032. }
  1033. #endregion
  1034. #region Helper methods
  1035. private static void AddGroupNameToResult(Regex regex, PhpArray matches, int i, Action<PhpArray, string> action)
  1036. {
  1037. var groupName = GetGroupName(regex, i);
  1038. if (!String.IsNullOrEmpty(groupName))
  1039. {
  1040. action(matches, groupName);
  1041. }
  1042. }
  1043. private static PerlRegExpConverter ConvertPattern(object pattern, string replacement)
  1044. {
  1045. var converter = PerlRegExpCache.Get(pattern, replacement, true);
  1046. // converter can contain a warning message,
  1047. // it means it is invalid and we cannot use it:
  1048. if (converter.ArgumentException != null)
  1049. {
  1050. // Exception message might contain substrings like "{2}" so it cannot be passed to any
  1051. // method that formats the string and replaces these numbers with parameters.
  1052. PhpException.Throw(PhpError.Warning, LibResources.GetString("invalid_argument", "pattern") + ": " + converter.ArgumentException);
  1053. return null;
  1054. }
  1055. //
  1056. return converter;
  1057. }
  1058. private static string ConvertData(object data, PerlRegExpConverter/*!*/ converter)
  1059. {
  1060. if (data == null)
  1061. {
  1062. return string.Empty;
  1063. }
  1064. else if (data.GetType() == typeof(PhpBytes))
  1065. {
  1066. return converter.ConvertBytes(((PhpBytes)data).ReadonlyData);
  1067. }
  1068. else
  1069. {
  1070. string str = Core.Convert.ObjectToString(data);
  1071. return converter.ConvertString(str, 0, str.Length);
  1072. }
  1073. }
  1074. /// <summary>
  1075. /// Used for handling Offset Capture flags. Returns just <paramref name="item"/> if
  1076. /// <paramref name="offsetCapture"/> is <B>false</B> or an <see cref="PhpArray"/> containing
  1077. /// <paramref name="item"/> at index 0 and <paramref name="index"/> at index 1.
  1078. /// </summary>
  1079. /// <param name="item">Item to add to return value.</param>
  1080. /// <param name="index">Index to specify in return value if <paramref name="offsetCapture"/> is
  1081. /// <B>true</B>.</param>
  1082. /// <param name="offsetCapture">Whether or not to make <see cref="PhpArray"/> with item and index.</param>
  1083. /// <returns></returns>
  1084. private static object NewArrayItem(object item, int index, bool offsetCapture)
  1085. {
  1086. if (!offsetCapture)
  1087. return item;
  1088. PhpArray arr = new PhpArray(2, 0);
  1089. arr[0] = item;
  1090. arr[1] = index;
  1091. return arr;
  1092. }
  1093. /// <summary>
  1094. /// Goes through <paramref name="m"/> matches and fill <paramref name="matches"/> array with results
  1095. /// according to Pattern Order.
  1096. /// </summary>
  1097. /// <param name="r"><see cref="Regex"/> that produced the match</param>
  1098. /// <param name="m"><see cref="Match"/> to iterate through all matches by NextMatch() call.</param>
  1099. /// <param name="matches">Array for storing results.</param>
  1100. /// <param name="addOffsets">Whether or not add arrays with offsets instead of strings.</param>
  1101. /// <returns>Number of full pattern matches.</returns>
  1102. private static int FillMatchesArrayAllPatternOrder(Regex r, Match m, ref PhpArray matches, bool addOffsets)
  1103. {
  1104. // second index, increases at each match in pattern order
  1105. int j = 0;
  1106. while (m.Success)
  1107. {
  1108. // add all groups
  1109. for (int i = 0; i < m.Groups.Count; i++)
  1110. {
  1111. object arr = NewArrayItem(m.Groups[i].Value, m.Groups[i].Index, addOffsets);
  1112. AddGroupNameToResult(r, matches, i, (ms, groupName) =>
  1113. {
  1114. if (j == 0) ms[groupName] = new PhpArray();
  1115. ((PhpArray)ms[groupName])[j] = arr;
  1116. });
  1117. if (j == 0) matches[i] = new PhpArray();
  1118. ((PhpArray)matches[i])[j] = arr;
  1119. }
  1120. j++;
  1121. m = m.NextMatch();
  1122. }
  1123. return j;
  1124. }
  1125. /// <summary>
  1126. /// Goes through <paramref name="m"/> matches and fill <paramref name="matches"/> array with results
  1127. /// according to Set Order.
  1128. /// </summary>
  1129. /// <param name="r"><see cref="Regex"/> that produced the match</param>
  1130. /// <param name="m"><see cref="Match"/> to iterate through all matches by NextMatch() call.</param>
  1131. /// <param name="matches">Array for storing results.</param>
  1132. /// <param name="addOffsets">Whether or not add arrays with offsets instead of strings.</param>
  1133. /// <returns>Number of full pattern matches.</returns>
  1134. private static int FillMatchesArrayAllSetOrder(Regex r, Match m, ref PhpArray matches, bool addOffsets)
  1135. {
  1136. // first index, increases at each match in set order
  1137. int i = 0;
  1138. while (m.Success)
  1139. {
  1140. PhpArray pa = new PhpArray(m.Groups.Count, 0);
  1141. // add all groups
  1142. for (int j = 0; j < m.Groups.Count; j++)
  1143. {
  1144. object arr = NewArrayItem(m.Groups[j].Value, m.Groups[j].Index, addOffsets);
  1145. AddGroupNameToResult(r, pa, j, (p, groupName) =>
  1146. {
  1147. p[groupName] = arr;
  1148. });
  1149. pa[j] = arr;
  1150. }
  1151. matches[i] = pa;
  1152. i++;
  1153. m = m.NextMatch();
  1154. }
  1155. return i;
  1156. }
  1157. private static int GetLastSuccessfulGroup(GroupCollection/*!*/ groups)
  1158. {
  1159. Debug.Assert(groups != null);
  1160. for (int i = groups.Count - 1; i >= 0; i--)
  1161. {
  1162. if (groups[i].Success)
  1163. return i;
  1164. }
  1165. return -1;
  1166. }
  1167. internal static bool IsDigitGroupReference(string replacement, int i)
  1168. {
  1169. return (replacement[i] == '$' || replacement[i] == '\\') &&
  1170. (i + 1 < replacement.Length && Char.IsDigit(replacement, i + 1));
  1171. }
  1172. internal static bool IsParenthesizedGroupReference(string replacement, int i)
  1173. {
  1174. return replacement[i] == '$' && i + 3 < replacement.Length && replacement[i + 1] == '{' &&
  1175. Char.IsDigit(replacement, i + 2) &&
  1176. (
  1177. replacement[i + 3] == '}' ||
  1178. i + 4 < replacement.Length && replacement[i + 4] == '}' && Char.IsDigit(replacement, i + 3)
  1179. );
  1180. }
  1181. #endregion
  1182. #region Unit Testing
  1183. #if DEBUG
  1184. [Test]
  1185. static void TestUnicodeMatch()
  1186. {
  1187. int m;
  1188. m = Match
  1189. (
  1190. new PhpBytes(Encoding.UTF8.GetBytes("/[ř]/u")),
  1191. new PhpBytes(Encoding.UTF8.GetBytes("12ščř45"))
  1192. );
  1193. Debug.Assert(m == 1);
  1194. Encoding enc = Configuration.Application.Globalization.PageEncoding;
  1195. m = Match
  1196. (
  1197. new PhpBytes(enc.GetBytes("/[ř]/")),
  1198. new PhpBytes("12ščř45")
  1199. );
  1200. Debug.Assert(m == 1);
  1201. // binary cache test:
  1202. m = Match
  1203. (
  1204. new PhpBytes(enc.GetBytes("/[ř]/")),
  1205. new PhpBytes("12ščř45")
  1206. );
  1207. Debug.Assert(m == 1);
  1208. int count;
  1209. object r = Replace
  1210. (
  1211. ScriptContext.CurrentContext,
  1212. null,
  1213. null,
  1214. new PhpBytes(Encoding.UTF8.GetBytes("/[řš]+/u")),
  1215. "|žýř|",
  1216. new PhpBytes(Encoding.UTF8.GetBytes("Hešovářřřříčkořš hxx")),
  1217. 1000,
  1218. out count
  1219. );
  1220. Debug.Assert(r as string == "He|žýř|ová|žýř|íčko|žýř| hxx");
  1221. Debug.Assert(count == 3);
  1222. }
  1223. #endif
  1224. #endregion
  1225. }
  1226. #region PerlRegExpReplacement
  1227. internal static class PerlRegExpReplacement
  1228. {
  1229. /// <summary>
  1230. /// Get the converted replacement from the cache or perform conversion and cache.
  1231. /// </summary>
  1232. /// <param name="regex"></param>
  1233. /// <param name="replacement"></param>
  1234. /// <returns></returns>
  1235. internal static string ConvertReplacement(Regex/*!*/regex, string/*!*/replacement)
  1236. {
  1237. int[] group_numbers = regex.GetGroupNumbers();
  1238. int max_number = (group_numbers.Length > 0) ? group_numbers[group_numbers.Length - 1] : 0;
  1239. return ConvertReplacement(max_number, replacement);
  1240. }
  1241. /// <summary>
  1242. /// Converts substitutions of the form \\xx to $xx (perl to .NET format).
  1243. /// </summary>
  1244. /// <param name="max_number">Maximum group number for the current regullar expression.
  1245. /// <code>
  1246. /// int[] group_numbers = regex.GetGroupNumbers();
  1247. /// int max_number = (group_numbers.Length > 0) ? group_numbers[group_numbers.Length - 1] : 0;
  1248. /// </code>
  1249. /// </param>
  1250. /// <param name="replacement">String possibly containing \\xx substitutions.</param>
  1251. /// <returns>String with converted $xx substitution format.</returns>
  1252. private static string ConvertReplacement(int max_number, string replacement)
  1253. {
  1254. int length = replacement.Length;
  1255. StringBuilder result = new StringBuilder(length);
  1256. //int[] group_numbers = regex.GetGroupNumbers();
  1257. //int max_number = (group_numbers.Length > 0) ? group_numbers[group_numbers.Length - 1] : 0;
  1258. int i = 0;
  1259. while (i < length)
  1260. {
  1261. if (PerlRegExp.IsDigitGroupReference(replacement, i) ||
  1262. PerlRegExp.IsParenthesizedGroupReference(replacement, i))
  1263. {
  1264. int add = 0;
  1265. i++;
  1266. if (replacement[i] == '{') { i++; add = 1; }
  1267. // parse number
  1268. int number = replacement[i++] - '0';
  1269. if (i < length && Char.IsDigit(replacement, i))
  1270. {
  1271. number = number * 10 + (replacement[i++] - '0');
  1272. }
  1273. // insert only existing group references (others replaced with empty string):
  1274. if (number <= max_number)
  1275. {
  1276. result.Append('$');
  1277. result.Append('{');
  1278. result.Append(number.ToString());
  1279. result.Append('}');
  1280. }
  1281. i += add;
  1282. }
  1283. else if (replacement[i] == '$')
  1284. {
  1285. // there is $ and it is not a substitution - duplicate it:
  1286. result.Append("$$");
  1287. i++;
  1288. }
  1289. else if (replacement[i] == '\\' && i + 1 < length)
  1290. {
  1291. if (replacement[i + 1] == '\\')
  1292. {
  1293. // two backslashes, replace with one:
  1294. result.Append('\\');
  1295. i += 2;
  1296. }
  1297. else if (replacement[i + 1] == '$')
  1298. {
  1299. // "/$" -> '$$' because /$ doesn't escape $ in .NET
  1300. result.Append("$$");
  1301. i += 2;
  1302. }
  1303. else
  1304. {
  1305. // backslash + some character, skip two characters
  1306. result.Append(replacement, i, 2);
  1307. i += 2;
  1308. }
  1309. }
  1310. else
  1311. {
  1312. // no substitution, no backslash (or backslash at the end of string)
  1313. result.Append(replacement, i++, 1);
  1314. }
  1315. }
  1316. return result.ToString();
  1317. }
  1318. }
  1319. #endregion
  1320. #region PerlRegExpCache
  1321. internal static class PerlRegExpCache
  1322. {
  1323. private const uint BucketsLength = 64;
  1324. private static readonly PerlRegExpConverter[]/*!*/buckets = new PerlRegExpConverter[BucketsLength];
  1325. private static readonly object[] locks = new object[8];
  1326. static PerlRegExpCache()
  1327. {
  1328. var locks = PerlRegExpCache.locks;
  1329. for (int i = 0; i < locks.Length; i++)
  1330. locks[i] = new object();
  1331. Debug.Assert(BucketsLength == 64); // must be 2^x
  1332. RequestContext.RequestEnd += CleanupBuckets;
  1333. }
  1334. private static int generation = 0;
  1335. public static PerlRegExpConverter Get(object pattern, string replacement, bool add)
  1336. {
  1337. uint hash = unchecked(
  1338. ((pattern != null)
  1339. ? (uint)pattern.GetHashCode() // little slow, some virtual method call
  1340. : 0)
  1341. & (BucketsLength - 1));
  1342. for (var item = buckets[hash]; item != null; item = item.nextcache)
  1343. {
  1344. if (item.CacheEquals(item, pattern, replacement))
  1345. {
  1346. item.Cachehit();
  1347. item.generation = PerlRegExpCache.generation; // move item to the current generation
  1348. return item;
  1349. }
  1350. }
  1351. return add ? EnsureGet(pattern, replacement, hash) : null;
  1352. }
  1353. private static PerlRegExpConverter/*!*/EnsureGet(object pattern, string replacement, uint hash)
  1354. {
  1355. PerlRegExpConverter item;
  1356. lock (locks[hash % locks.Length])
  1357. {
  1358. // double checked lock
  1359. if ((item = Get(pattern, replacement, false)) == null)
  1360. {
  1361. // avoid growing of the table in non-web applications (console etc.)
  1362. CleanupBuckets();
  1363. // new item
  1364. item = new PerlRegExpConverter(pattern, replacement, Configuration.Application.Globalization.PageEncoding)
  1365. {
  1366. nextcache = PerlRegExpCache.buckets[hash],
  1367. generation = PerlRegExpCache.generation
  1368. };
  1369. buckets[hash] = item; // enlist the item
  1370. }
  1371. }
  1372. return item;
  1373. }
  1374. private static int requestsCounter = 0;
  1375. private static uint cleanupBucket = 0;
  1376. private static void CleanupBuckets()
  1377. {
  1378. var requestsCounter = PerlRegExpCache.requestsCounter;
  1379. if (requestsCounter < 32)
  1380. {
  1381. PerlRegExpCache.requestsCounter = requestsCounter + 1;
  1382. }
  1383. else if (requestsCounter < 64)
  1384. {
  1385. if (Interlocked.Increment(ref PerlRegExpCache.requestsCounter) == 64)
  1386. {
  1387. // do some cleanup
  1388. var generation = PerlRegExpCache.generation;
  1389. var hash = PerlRegExpCache.cleanupBucket;
  1390. PerlRegExpCache.cleanupBucket = (uint)(hash + 1) & (BucketsLength - 1);
  1391. //
  1392. PerlRegExpConverter prev = null;
  1393. for (var p = buckets[hash]; p != null; p = p.nextcache)
  1394. {
  1395. if (p.generation != generation && unchecked(p.generation + 1) != generation)
  1396. {
  1397. if (prev != null) prev.nextcache = p.nextcache;
  1398. else buckets[hash] = p.nextcache;
  1399. }
  1400. else
  1401. prev = p;
  1402. }
  1403. //
  1404. if ((hash & 1) == 1) // every 2nd
  1405. PerlRegExpCache.generation = unchecked(generation + 1);
  1406. }
  1407. }
  1408. else
  1409. PerlRegExpCache.requestsCounter = 0;
  1410. }
  1411. }
  1412. #endregion
  1413. #region PerlRegExpConverter
  1414. /// <summary>
  1415. /// Used for converting PHP Perl like regular expressions to .NET regular expressions.
  1416. /// </summary>
  1417. internal sealed class PerlRegExpConverter
  1418. {
  1419. #region Static & Constants
  1420. /// <summary>
  1421. /// All named groups from Perl regexp are renamed to start with this character.
  1422. /// In order to enable group names starting with number
  1423. /// </summary>
  1424. internal const char GroupPrefix = 'a';
  1425. internal const string AnonymousGroupPrefix = "an0ny_";
  1426. /// <summary>
  1427. /// Regular expression used for matching quantifiers, they are changed ungreedy to greedy and vice versa if
  1428. /// needed.
  1429. /// </summary>
  1430. private static Regex quantifiers
  1431. {
  1432. get
  1433. {
  1434. if (_quantifiers == null)
  1435. _quantifiers = new Regex(@"\G(?:\?|\*|\+|\{[0-9]+,[0-9]*\})");
  1436. return _quantifiers;
  1437. }
  1438. }
  1439. private static Regex _quantifiers;
  1440. /// <summary>
  1441. /// Regular expression for POSIX regular expression classes matching.
  1442. /// </summary>
  1443. private static Regex posixCharClasses
  1444. {
  1445. get
  1446. {
  1447. if (_posixCharClasses == null)
  1448. _posixCharClasses = new Regex("^\\[:(^)?(alpha|alnum|ascii|cntrl|digit|graph|lower|print|punct|space|upper|word|xdigit):]", RegexOptions.Singleline | RegexOptions.Compiled);
  1449. return _posixCharClasses;
  1450. }
  1451. }
  1452. private static Regex _posixCharClasses = null;
  1453. #endregion
  1454. #region Fields & Properties
  1455. /// <summary>
  1456. /// Returns <see cref="Regex"/> class that can be used for matching.
  1457. /// </summary>
  1458. public Regex/*!*/ Regex { get { return regex; } }
  1459. private Regex/*!*/ regex;
  1460. /// <summary>
  1461. /// Returns .NET replacement string.
  1462. /// </summary>
  1463. public readonly string DotNetReplaceExpression;
  1464. /// <summary>
  1465. /// <see cref="RegexOptions"/> which should be set while matching the expression. May be <B>null</B>
  1466. /// if <see cref="regex"/> is already set.
  1467. /// </summary>
  1468. public RegexOptions DotNetOptions { get { return dotNetOptions; } }
  1469. private RegexOptions dotNetOptions;
  1470. public PerlRegexOptions PerlOptions { get { return perlOptions; } }
  1471. private PerlRegexOptions perlOptions = PerlRegexOptions.None;
  1472. private readonly Encoding/*!*/ encoding;
  1473. /// <summary>
  1474. /// An error message. Is <c>null</c> if all the conversions are ok.
  1475. /// </summary>
  1476. public string ArgumentException { get; private set; }
  1477. #endregion
  1478. #region Cache helper
  1479. /// <summary>
  1480. /// Internal pointer to the next <see cref="PerlRegExpConverter"/> in the list of cached <see cref="PerlRegExpConverter"/> instances.
  1481. /// </summary>
  1482. internal PerlRegExpConverter nextcache;
  1483. /// <summary>
  1484. /// Internal hits counter. Once it gets to specified constant number, <see cref="regex"/> gets compiled.
  1485. /// </summary>
  1486. private int hitsCount = 0;
  1487. /// <summary>
  1488. /// Current generation. Old generations can be removed from cache.
  1489. /// </summary>
  1490. internal long generation;
  1491. internal readonly object _pattern;
  1492. internal readonly string _replacement, _strpattern;
  1493. internal void Cachehit()
  1494. {
  1495. int hitsCount = this.hitsCount;
  1496. if (hitsCount < 3 && (Interlocked.Increment(ref this.hitsCount) == 3))
  1497. {
  1498. if (this.regex != null) // && (this.regex.Options & RegexOptions.Compiled) == 0)
  1499. this.regex = new Regex(this.regex.ToString(), this.dotNetOptions | RegexOptions.Compiled);
  1500. }
  1501. }
  1502. /// <summary>
  1503. /// Function that efficiently compares <c>this</c> instance of <see cref="PerlRegExpConverter"/> with another <see cref="PerlRegExpConverter"/>.
  1504. /// 1st argument is reference to <c>this</c>.
  1505. /// 2nd argument is the other's <see cref="PerlRegExpConverter._pattern"/>.
  1506. /// 3nd argument is the other's <see cref="PerlRegExpConverter._replacement"/>.
  1507. /// Function returns <c>true</c> if pattern and replacement match.
  1508. /// </summary>
  1509. internal readonly Func<PerlRegExpConverter, object, string, bool>/*!*/CacheEquals;
  1510. /// <summary>
  1511. /// Functions for efficient equality check.
  1512. /// </summary>
  1513. private struct CacheEqualsFunctions
  1514. {
  1515. static bool eq_null(PerlRegExpConverter self, object otherpattern, string otherreplacement) { return otherpattern == null && otherreplacement == self._replacement; }
  1516. static bool eq_string_null(PerlRegExpConverter self, object otherpattern, string otherreplacement) { return otherpattern != null && otherreplacement == null && otherpattern.GetType() == typeof(string) && self._strpattern.Equals((string)otherpattern); }
  1517. static bool eq_string(PerlRegExpConverter self, object otherpattern, string otherreplacement) { return otherpattern != null && otherreplacement != null && otherpattern.GetType() == typeof(string) && self._strpattern.Equals((string)otherpattern) && self._replacement.Equals(otherreplacement); }
  1518. static bool eq_phpbytes(PerlRegExpConverter self, object otherpattern, string otherreplacement) { return otherpattern != null && otherpattern.GetType() == typeof(PhpBytes) && ((PhpBytes)otherpattern).Equals((PhpBytes)self._pattern) && otherreplacement == self._replacement; }
  1519. static bool eq_phpstring(PerlRegExpConverter self, object otherpattern, string otherreplacement) { return otherpattern != null && otherpattern.GetType() == typeof(PhpString) && ((PhpString)otherpattern).Equals((PhpString)self._pattern) && otherreplacement == self._replacement; }
  1520. static bool eq_default(PerlRegExpConverter self, object otherpattern, string otherreplacement) { return otherpattern != null && otherpattern.GetType() == self._pattern.GetType() && otherpattern.Equals(self._pattern) && otherreplacement == self._replacement; }
  1521. // cached delegates
  1522. static Func<PerlRegExpConverter, object, string, bool>/*!*/
  1523. cacheeq_null = eq_null,
  1524. cacheeq_string_null = eq_string_null,
  1525. cacheeq_string = eq_string,
  1526. cacheeq_phpbytes = eq_phpbytes,
  1527. cacheeq_phpstring = eq_phpstring,
  1528. cacheeq_default = eq_default;
  1529. /// <summary>
  1530. /// Select appropriate equality function delegate for given <see cref="PerlRegExpConverter"/>'s pattern and replacement.
  1531. /// </summary>
  1532. public static Func<PerlRegExpConverter, object, string, bool>/*!*/SelectEqualsFunction(object pattern, string replacement)
  1533. {
  1534. if (pattern == null) return CacheEqualsFunctions.cacheeq_null;
  1535. else if (pattern.GetType() == typeof(string) && replacement == null) return CacheEqualsFunctions.cacheeq_string_null;
  1536. else if (pattern.GetType() == typeof(string)) return CacheEqualsFunctions.cacheeq_string;
  1537. else if (pattern.GetType() == typeof(PhpBytes)) return CacheEqualsFunctions.cacheeq_phpbytes;
  1538. else if (pattern.GetType() == typeof(PhpString)) return CacheEqualsFunctions.cacheeq_phpstring;
  1539. else return CacheEqualsFunctions.cacheeq_default;
  1540. }
  1541. }
  1542. /// <summary>
  1543. /// Initializes cache-specific fields of <see cref="PerlRegExpConverter"/> new instance.
  1544. /// </summary>
  1545. private PerlRegExpConverter(object pattern, string replacement)
  1546. {
  1547. // used for caching:
  1548. this._pattern = PhpVariable.Copy(pattern, CopyReason.Assigned);
  1549. this._strpattern = pattern as string;
  1550. this._replacement = replacement;
  1551. // initialize function that effectively checks given pattern whether it is equal to this pattern
  1552. this.CacheEquals = CacheEqualsFunctions.SelectEqualsFunction(pattern, replacement);
  1553. }
  1554. #endregion
  1555. /// <summary>
  1556. /// Creates new <see cref="PerlRegExpConverter"/> and converts Perl regular expression to .NET.
  1557. /// </summary>
  1558. /// <param name="pattern">Perl regular expression to convert.</param>
  1559. /// <param name="replacement">Perl replacement string to convert or a <B>null</B> reference for match only.</param>
  1560. /// <param name="encoding">Encoding used in the case the pattern is a binary string.</param>
  1561. public PerlRegExpConverter(object pattern, string replacement, Encoding/*!*/ encoding)
  1562. :this(pattern, replacement)
  1563. {
  1564. if (encoding == null)
  1565. throw new ArgumentNullException("encoding");
  1566. this.encoding = encoding;
  1567. ConvertPattern(pattern);
  1568. if (replacement != null && this.regex != null)
  1569. this.DotNetReplaceExpression = (replacement.Length == 0) ? string.Empty : PerlRegExpReplacement.ConvertReplacement(regex, replacement);
  1570. }
  1571. private void ConvertPattern(object pattern)
  1572. {
  1573. string perlRegEx;
  1574. string dotNetMatchExpression = null;
  1575. try
  1576. {
  1577. // convert pattern into string, parse options:
  1578. if (pattern != null && pattern.GetType() == typeof(PhpBytes))
  1579. perlRegEx = LoadPerlRegex(((PhpBytes)pattern).ReadonlyData);
  1580. else
  1581. perlRegEx = LoadPerlRegex(PHP.Core.Convert.ObjectToString(pattern));
  1582. // convert pattern into regex:
  1583. dotNetMatchExpression = ConvertRegex(perlRegEx, perlOptions);
  1584. // process the regex:
  1585. this.regex = new Regex(dotNetMatchExpression, dotNetOptions);
  1586. }
  1587. catch (ArgumentException e)
  1588. {
  1589. this.ArgumentException = ExtractExceptionalMessage(e.Message, dotNetMatchExpression);
  1590. }
  1591. }
  1592. /// <summary>
  1593. /// Extracts the .NET exceptional message from the message stored in an exception.
  1594. /// The message has format 'parsing "{pattern}" - {message}\r\nParameter name {pattern}' in .NET 1.1.
  1595. /// </summary>
  1596. private static string ExtractExceptionalMessage(string message, string dotNetMatchExpression)
  1597. {
  1598. if (message != null)
  1599. {
  1600. if (dotNetMatchExpression != null)
  1601. message = message.Replace(dotNetMatchExpression, "<pattern>");
  1602. int i = message.IndexOf("\r\n");
  1603. if (i >= 0)
  1604. message = message.Substring(0, i);
  1605. i = message.IndexOf("-");
  1606. if (i >= 0)
  1607. message = message.Substring(i + 2);
  1608. return message;
  1609. }
  1610. else
  1611. {
  1612. return string.Empty;
  1613. }
  1614. }
  1615. internal string ConvertString(string str, int start, int length)
  1616. {
  1617. if ((perlOptions & PerlRegexOptions.UTF8) != 0 && !StringUtils.IsAsciiString(str, start, length))
  1618. #if SILVERLIGHT
  1619. {
  1620. byte[] bytes = new byte[encoding.GetByteCount(str)];
  1621. encoding.GetBytes(str, 0, str.Length, bytes, 0);
  1622. return System.Text.Encoding.UTF8.GetString(bytes, 0, bytes.Length);
  1623. }
  1624. #else
  1625. return Encoding.UTF8.GetString(encoding.GetBytes(str.Substring(start, length)));
  1626. #endif
  1627. else
  1628. return str.Substring(start, length);
  1629. }
  1630. internal string ConvertBytes(byte[] bytes)
  1631. {
  1632. return ConvertBytes(bytes, 0, bytes.Length);
  1633. }
  1634. internal string ConvertBytes(byte[] bytes, int start, int length)
  1635. {
  1636. if ((perlOptions & PerlRegexOptions.UTF8) != 0)
  1637. return Encoding.UTF8.GetString(bytes, start, length);
  1638. else
  1639. return encoding.GetString(bytes, start, length);
  1640. }
  1641. private string LoadPerlRegex(byte[] pattern)
  1642. {
  1643. if (pattern == null) pattern = ArrayUtils.EmptyBytes;
  1644. int regex_start, regex_end;
  1645. StringUtils.UniformWrapper upattern = new StringUtils.BytesWrapper(pattern);
  1646. FindRegexDelimiters(upattern, out regex_start, out regex_end);
  1647. ParseRegexOptions(upattern, regex_end + 2, out dotNetOptions, out perlOptions);
  1648. return ConvertBytes(pattern, regex_start, regex_end - regex_start + 1);
  1649. }
  1650. private string LoadPerlRegex(string pattern)
  1651. {
  1652. if (pattern == null) pattern = "";
  1653. int regex_start, regex_end;
  1654. StringUtils.UniformWrapper upattern = new StringUtils.StringWrapper(pattern);
  1655. FindRegexDelimiters(upattern, out regex_start, out regex_end);
  1656. ParseRegexOptions(upattern, regex_end + 2, out dotNetOptions, out perlOptions);
  1657. return ConvertString(pattern, regex_start, regex_end - regex_start + 1);
  1658. }
  1659. private void FindRegexDelimiters(StringUtils.UniformWrapper pattern, out int start, out int end)
  1660. {
  1661. int i = 0;
  1662. while (i < pattern.Length && Char.IsWhiteSpace(pattern[i])) i++;
  1663. if (i == pattern.Length)
  1664. throw new ArgumentException(LibResources.GetString("regular_expression_empty"));
  1665. char start_delimiter = pattern[i++];
  1666. if (Char.IsLetterOrDigit(start_delimiter) || start_delimiter == '\\')
  1667. throw new ArgumentException(LibResources.GetString("delimiter_alnum_backslash"));
  1668. start = i;
  1669. char end_delimiter;
  1670. if (start_delimiter == '[') end_delimiter = ']';
  1671. else if (start_delimiter == '(') end_delimiter = ')';
  1672. else if (start_delimiter == '{') end_delimiter = '}';
  1673. else if (start_delimiter == '<') end_delimiter = '>';
  1674. else end_delimiter = start_delimiter;
  1675. int depth = 1;
  1676. while (i < pattern.Length)
  1677. {
  1678. if (pattern[i] == '\\' && i + 1 < pattern.Length)
  1679. {
  1680. i += 2;
  1681. continue;
  1682. }
  1683. else if (pattern[i] == end_delimiter) // (1) should precede (2) to handle end_delim == start_delim case
  1684. {
  1685. depth--;
  1686. if (depth == 0) break;
  1687. }
  1688. else if (pattern[i] == start_delimiter) // (2)
  1689. {
  1690. depth++;
  1691. }
  1692. i++;
  1693. }
  1694. if (i == pattern.Length)
  1695. throw new ArgumentException(LibResources.GetString("preg_no_end_delimiter", end_delimiter));
  1696. end = i - 1;
  1697. }
  1698. private static void ParseRegexOptions(StringUtils.UniformWrapper pattern, int start,
  1699. out RegexOptions dotNetOptions, out PerlRegexOptions extraOptions)
  1700. {
  1701. dotNetOptions = RegexOptions.None;
  1702. extraOptions = PerlRegexOptions.None;
  1703. for (int i = start; i < pattern.Length; i++)
  1704. {
  1705. char option = pattern[i];
  1706. switch (option)
  1707. {
  1708. case 'i': // PCRE_CASELESS
  1709. dotNetOptions |= RegexOptions.IgnoreCase;
  1710. break;
  1711. case 'm': // PCRE_MULTILINE
  1712. dotNetOptions |= RegexOptions.Multiline;
  1713. break;
  1714. case 's': // PCRE_DOTALL
  1715. dotNetOptions |= RegexOptions.Singleline;
  1716. break;
  1717. case 'x': // PCRE_EXTENDED
  1718. dotNetOptions |= RegexOptions.IgnorePatternWhitespace;
  1719. break;
  1720. case 'e': // evaluate as PHP code
  1721. extraOptions |= PerlRegexOptions.Evaluate;
  1722. break;
  1723. case 'A': // PCRE_ANCHORED
  1724. extraOptions |= PerlRegexOptions.Anchored;
  1725. break;
  1726. case 'D': // PCRE_DOLLAR_ENDONLY
  1727. extraOptions |= PerlRegexOptions.DollarMatchesEndOfStringOnly;
  1728. break;
  1729. case 'S': // spend more time studying the pattern - ignore
  1730. break;
  1731. case 'U': // PCRE_UNGREEDY
  1732. extraOptions |= PerlRegexOptions.Ungreedy;
  1733. break;
  1734. case 'u': // PCRE_UTF8
  1735. extraOptions |= PerlRegexOptions.UTF8;
  1736. break;
  1737. case 'X': // PCRE_EXTRA
  1738. PhpException.Throw(PhpError.Warning, LibResources.GetString("modifier_not_supported", option));
  1739. break;
  1740. default:
  1741. PhpException.Throw(PhpError.Notice, LibResources.GetString("modifier_unknown", option));
  1742. break;
  1743. }
  1744. }
  1745. // inconsistent options check:
  1746. if
  1747. (
  1748. (dotNetOptions & RegexOptions.Multiline) != 0 &&
  1749. (extraOptions & PerlRegexOptions.DollarMatchesEndOfStringOnly) != 0
  1750. )
  1751. {
  1752. PhpException.Throw(PhpError.Notice, LibResources.GetString("modifiers_inconsistent", 'D', 'm'));
  1753. }
  1754. }
  1755. /// <summary>
  1756. /// Parses escaped sequences: "\[xX][0-9A-Fa-f]{2}", "\[xX]\{[0-9A-Fa-f]{0,4}\}", "\[0-7]{3}",
  1757. /// "\[pP]{Unicode Category}"
  1758. /// </summary>
  1759. private static bool ParseEscapeCode(
  1760. //Encoding/*!*/ encoding,
  1761. string/*!*/ str, ref int pos, ref int ch, ref bool escaped)
  1762. {
  1763. Debug.Assert(/*encoding != null &&*/ str != null && pos >= 0 && pos < str.Length && str[pos] == '\\');
  1764. if (pos + 3 >= str.Length) return false;
  1765. int number = 0;
  1766. if (str[pos + 1] == 'x')
  1767. {
  1768. if (str[pos + 2] == '{')
  1769. {
  1770. // hexadecimal number encoding a Unicode character:
  1771. int i = pos + 3;
  1772. while (i < str.Length && str[i] != '}' && number < Char.MaxValue)
  1773. {
  1774. int digit = Core.Convert.AlphaNumericToDigit(str[i]);
  1775. if (digit > 16) return false;
  1776. number = (number << 4) + digit;
  1777. i++;
  1778. }
  1779. if (/*number > Char.MaxValue || */i >= str.Length) return false;
  1780. pos = i;
  1781. ch = number;
  1782. escaped = ch < Char.MaxValue ? IsCharRegexSpecial((char)ch) : false;
  1783. }
  1784. else
  1785. {
  1786. // hexadecimal number encoding single-byte character:
  1787. for (int i = pos + 2; i < pos + 4; i++)
  1788. {
  1789. Debug.Assert(i < str.Length);
  1790. int digit = Core.Convert.AlphaNumericToDigit(str[i]);
  1791. if (digit > 16) return false;
  1792. number = (number << 4) + digit;
  1793. }
  1794. pos += 3;
  1795. ch = number;
  1796. //char[] chars = encoding.GetChars(new byte[] { (byte)number });
  1797. //if (chars.Length == 1)
  1798. // ch = chars[0];
  1799. //else
  1800. // ch = number;
  1801. escaped = ch < Char.MaxValue ? IsCharRegexSpecial((char)ch) : false;
  1802. }
  1803. return true;
  1804. }
  1805. else if (str[pos + 1] >= '0' && str[pos + 1] <= '7')
  1806. {
  1807. // octal number:
  1808. for (int i = pos + 1; i < pos + 4; i++)
  1809. {
  1810. Debug.Assert(i < str.Length);
  1811. int digit = Core.Convert.AlphaNumericToDigit(str[i]);
  1812. if (digit > 8) return false;
  1813. number = (number << 3) + digit;
  1814. }
  1815. pos += 3;
  1816. ch = number;//encoding.GetChars(new byte[] { (byte)number })[0];
  1817. escaped = ch < Char.MaxValue ? IsCharRegexSpecial((char)ch) : false;
  1818. return true;
  1819. }
  1820. else if (str[pos + 1] == 'p' || str[pos + 1] == 'P')
  1821. {
  1822. bool complement = str[pos + 1] == 'P';
  1823. int cat_start;
  1824. if (str[pos + 2] == '{')
  1825. {
  1826. if (!complement && str[pos + 3] == '^')
  1827. {
  1828. complement = true;
  1829. cat_start = pos + 4;
  1830. }
  1831. else
  1832. cat_start = pos + 3;
  1833. }
  1834. else
  1835. {
  1836. cat_start = pos + 2;
  1837. }
  1838. UnicodeCategoryGroup group;
  1839. UnicodeCategory category;
  1840. int cat_length = StringUtils.ParseUnicodeDesignation(str, cat_start, out group, out category);
  1841. int cat_end = cat_start + cat_length - 1;
  1842. // unknown category:
  1843. if (cat_length == 0) return false;
  1844. // check closing brace:
  1845. if (str[pos + 2] == '{' && (cat_end + 1 >= str.Length || str[cat_end + 1] != '}'))
  1846. return false;
  1847. // TODO: custom categories on .NET 2?
  1848. // Unicode category:
  1849. PhpException.Throw(PhpError.Warning, "Unicode categories not supported.");
  1850. // ?? if (complement) pos = pos;
  1851. return false;
  1852. }
  1853. else if (str[pos + 1] == 'X')
  1854. {
  1855. PhpException.Throw(PhpError.Warning, "Unicode categories not supported.");
  1856. return false;
  1857. }
  1858. return false;
  1859. }
  1860. /// <summary>
  1861. /// Characters that must be encoded in .NET regexp
  1862. /// </summary>
  1863. static char[] encodeChars = new char[] { '.', '$', '(', ')', '*', '+', '?', '[', ']', '{', '}', '\\', '^', '|' };
  1864. /// <summary>
  1865. /// Returns true if character needs to be escaped in .NET regex
  1866. /// </summary>
  1867. private static bool IsCharRegexSpecial(char ch)
  1868. {
  1869. return Array.IndexOf(encodeChars, ch) != -1;
  1870. }
  1871. /// <summary>
  1872. /// Converts Perl match expression (only, without delimiters, options etc.) to .NET regular expression.
  1873. /// </summary>
  1874. /// <param name="perlExpr">Perl regular expression to convert.</param>
  1875. /// <param name="opt">Regexp options - some of them must be processed by changes in match string.</param>
  1876. /// <returns>Resulting .NET regular expression.</returns>
  1877. private string ConvertRegex(string perlExpr, PerlRegexOptions opt)
  1878. {
  1879. // Ranges in bracket expressions should be replaced with appropriate characters
  1880. // assume no conversion will be performed, create string builder with exact length. Only in
  1881. // case there is a range StringBuilder would be prolonged, +1 for Anchored
  1882. StringBuilder result = new StringBuilder(perlExpr.Length + 1);
  1883. // Anchored means that the string should match only at the start of the string, add '^'
  1884. // at the beginning if there is no one
  1885. if ((opt & PerlRegexOptions.Anchored) != 0 && (perlExpr.Length == 0 || perlExpr[0] != '^'))
  1886. result.Append('^');
  1887. // set to true after a quantifier is matched, if there is second quantifier just behind the
  1888. // first it is an error
  1889. bool last_quantifier = false;
  1890. // 4 means we're switching from 3 back to 2 - ie. "a-b-c"
  1891. // (we need to make a difference here because second "-" shouldn't be expanded)
  1892. bool leaving_range = false;
  1893. // remember the last character added in the character class, so in state 3 we can expand the range as properly as possible
  1894. int range_from_character = -1;
  1895. bool escaped = false;
  1896. int state = 0;
  1897. int inner_state = 0;
  1898. HashSet<uint> addedSurrogate2Ranges = null; // cache of already added character pairs valid within character class [], dropped when switching to 0
  1899. int group_number = 0;
  1900. int i = 0;
  1901. while (i < perlExpr.Length)
  1902. {
  1903. int ch = perlExpr[i];
  1904. escaped = false;
  1905. if (ch == '\\' && !ParseEscapeCode(/*encoding,*/ perlExpr, ref i, ref ch, ref escaped))
  1906. {
  1907. i++;
  1908. Debug.Assert(i < perlExpr.Length, "Regex cannot end with backslash.");
  1909. ch = perlExpr[i];
  1910. if (ch == 'g')
  1911. {
  1912. ++i;
  1913. inner_state = 5; // skip 'g' from resulting pattern
  1914. escaped = false;
  1915. continue;
  1916. }
  1917. else if (ch == 'k')
  1918. {
  1919. inner_state = 11;
  1920. escaped = true;
  1921. }
  1922. // some characters (like '_') don't need to be escaped in .net
  1923. // and ignore escaping of unicode sequence of characters
  1924. if (ch == '_' || (int)ch > 0x7F) escaped = false; else escaped = true;
  1925. }
  1926. switch (state)
  1927. {
  1928. case 0: // outside of character class
  1929. if (escaped)
  1930. {
  1931. result.Append('\\');
  1932. Append(result, ch);
  1933. last_quantifier = false;
  1934. break;
  1935. }
  1936. // In perl regexps, named groups are written like this: "(?P<name> ... )"
  1937. // (\k<name>...)
  1938. // (\k'name'...)
  1939. // (\k{name}...)
  1940. // (\g{name}...)
  1941. // (?'name'...)
  1942. // (?<name>...)
  1943. // (?P=name)
  1944. // (?:...)
  1945. // If the group is starting here, we need to skip the 'P' character (see state 4)
  1946. switch (inner_state)
  1947. {
  1948. case 0:
  1949. if (ch == '(')
  1950. {
  1951. inner_state = 1;
  1952. // Look-ahead and name anonymous groups.
  1953. // This is used to match the order of the results.
  1954. // As perlre doc says:
  1955. // NOTE: While the notation of this construct [grouping] is the same as the similar function in .NET regexes,
  1956. // the behavior is not. In Perl the groups are numbered sequentially regardless of being named or not.
  1957. ++group_number;
  1958. if (i + 1 < perlExpr.Length)
  1959. {
  1960. if (perlExpr[i + 1] != '?')
  1961. {
  1962. ++i;
  1963. result.Append("(?<");
  1964. result.Append(AnonymousGroupPrefix);
  1965. result.Append(group_number);
  1966. result.Append('>');
  1967. continue;
  1968. }
  1969. else
  1970. if (i + 2 < perlExpr.Length && perlExpr[i + 2] == ':')
  1971. {
  1972. // Pseudo-group, don't count.
  1973. --group_number;
  1974. }
  1975. }
  1976. }
  1977. else if (ch == '\\')
  1978. inner_state = 4;
  1979. else
  1980. inner_state = 0;
  1981. break;
  1982. //groups
  1983. case 1:
  1984. if (ch == '?')
  1985. inner_state = 2;
  1986. else if (ch != '(')// stay in inner_state == 1, because this can happen: ((?<blah>...))
  1987. inner_state = 0;
  1988. break;
  1989. case 2:
  1990. if (ch == 'P')
  1991. {
  1992. i++;
  1993. inner_state = 3;
  1994. continue; //skip 'P' from resulting pattern
  1995. }
  1996. else if (ch == '<')
  1997. {
  1998. inner_state = 15;
  1999. break;
  2000. }
  2001. else if (ch == '\'')
  2002. {
  2003. i++;
  2004. result.Append('\'');
  2005. result.Append(GroupPrefix);
  2006. inner_state = 0;
  2007. continue;
  2008. }
  2009. inner_state = 0;
  2010. break;
  2011. case 3: // '(?P'
  2012. if (ch == '=')
  2013. {
  2014. ++i;
  2015. inner_state = 12;
  2016. continue; //skip '=' from resulting pattern
  2017. }
  2018. else if (ch != '<')// if P wasn't part of "(?P<name> ... )" neither '(?P=name)' back reference, so put it back to the pattern
  2019. {
  2020. result.Append('P');
  2021. }
  2022. else if (ch == '<')
  2023. {
  2024. i++;
  2025. result.Append('<');
  2026. result.Append(GroupPrefix);
  2027. inner_state = 0;
  2028. continue;
  2029. }
  2030. inner_state = 0;
  2031. break;
  2032. // /g[0-9]{1,2} back references
  2033. case 5: // '\g'
  2034. result.Append('\\');
  2035. if (ch == '{')
  2036. {
  2037. i++;
  2038. inner_state = 6;
  2039. continue; // skip '{' from resulting pattern
  2040. }
  2041. else if (ch >= '0' && ch <= '9')
  2042. {
  2043. inner_state = 0; // just copy the rest of the pattern
  2044. }
  2045. else
  2046. {
  2047. result.Append('g'); // unexpected character after '/g', so put g back to pattern
  2048. inner_state = 0;
  2049. }
  2050. break;
  2051. case 6: // '\g{'
  2052. if (ch >= '0' && ch <= '9')
  2053. {
  2054. inner_state = 7;
  2055. }
  2056. else
  2057. {
  2058. // it can be named group
  2059. result.Append("k<");
  2060. result.Append(GroupPrefix);
  2061. inner_state = 10;
  2062. //result.Append("g{"); // unexpected character after '/g{', so put it back to pattern
  2063. //group_state = 0;
  2064. }
  2065. break;
  2066. case 7:// '\g{[0-9]'
  2067. if (ch == '}')
  2068. {
  2069. i++;
  2070. inner_state = 9;
  2071. continue; // skip '}' from resulting pattern
  2072. }
  2073. else if (ch >= '0' && ch <= '9')
  2074. {
  2075. inner_state = 8;
  2076. }
  2077. else
  2078. {
  2079. //name of the group starts with a number
  2080. //put behind PreGroupNameSign
  2081. result.Insert(result.Length - 1,"k<");
  2082. result.Insert(result.Length - 1, GroupPrefix);
  2083. inner_state = 14;
  2084. }
  2085. break;
  2086. case 8: // '\g{[0-9][0-9]'
  2087. if (ch == '}')
  2088. {
  2089. i++;
  2090. inner_state = 9;
  2091. continue; // skip '}' from resulting pattern
  2092. }
  2093. else
  2094. {
  2095. //name of the group starts with a number
  2096. //put behind PreGroupNameSign
  2097. result.Insert(result.Length - 1, "k<");
  2098. result.Insert(result.Length - 2, GroupPrefix);
  2099. inner_state = 14;
  2100. }
  2101. // there is just 99 back references possible
  2102. inner_state = 0;
  2103. break;
  2104. case 9:// '\g{[0-9][0-9]?}'
  2105. if (ch >= '0' && ch <= '9')
  2106. {
  2107. result.Append("(?#)"); // put this to the resulting pattern to separate number of the reference from number that follows
  2108. }
  2109. inner_state = 0;
  2110. break;
  2111. // named back references
  2112. case 10:// '\g{.*?}' | '\k{.*?}'
  2113. if (ch == '}')
  2114. {
  2115. ++i;
  2116. result.Append('>');
  2117. inner_state = 0;
  2118. continue; // skip '}' from resulting pattern
  2119. }
  2120. break;
  2121. case 11:// '\k'
  2122. if (ch == '{')
  2123. {
  2124. i++;
  2125. inner_state = 10;
  2126. result.Append('<');
  2127. result.Append(GroupPrefix);
  2128. continue; // skip '{' from resulting pattern
  2129. }
  2130. else if (ch == '<')
  2131. {
  2132. i++;
  2133. result.Append('<');
  2134. result.Append(GroupPrefix);
  2135. inner_state = 0;
  2136. continue;
  2137. }
  2138. else if (ch == '\'')
  2139. {
  2140. i++;
  2141. result.Append('\'');
  2142. result.Append(GroupPrefix);
  2143. inner_state = 0;
  2144. continue;
  2145. }
  2146. inner_state = 0;
  2147. break;
  2148. // transforming '(?P=name)' to '\k<name>'
  2149. case 12: // '(?P='
  2150. // (? was already put in the pattern, so replace it with '\k'
  2151. result[result.Length - 2] = '\\';
  2152. result[result.Length - 1] = 'k';
  2153. // add '<' so it is '\k<'
  2154. result.Append('<');
  2155. result.Append(GroupPrefix);
  2156. inner_state = 13;
  2157. break;
  2158. case 13: // '(?P=.*?'
  2159. if (ch == ')')
  2160. {
  2161. ++i;
  2162. result.Append('>');
  2163. inner_state = 0;
  2164. continue; // skip ')' from resulting pattern
  2165. }
  2166. break;
  2167. case 14:// '\g{[0-9].*?'
  2168. if (ch == '}')
  2169. {
  2170. i++;
  2171. inner_state = 9;
  2172. result.Append(">");
  2173. continue; // skip '}' from resulting pattern
  2174. }
  2175. break;
  2176. case 15:// (?<
  2177. //Add group prefix only if it's not lookbehind assertions
  2178. //(?<! negative
  2179. //(?<= positive
  2180. if (ch != '!' && ch != '=')
  2181. {
  2182. result.Append(GroupPrefix);
  2183. }
  2184. inner_state = 0;
  2185. break;
  2186. default: inner_state = 0; break;
  2187. }
  2188. if ((opt & PerlRegexOptions.Ungreedy) != 0)
  2189. {
  2190. // match quantifier ?,*,+,{n,m} at the position i:
  2191. Match m = quantifiers.Match(perlExpr, i);
  2192. // quantifier matched; quentifier '?' hasn't to be preceded by '(' - a grouping construct '(?'
  2193. if (m.Success && (m.Value != "?" || i == 0 || perlExpr[i - 1] != '('))
  2194. {
  2195. // two quantifiers:
  2196. if (last_quantifier)
  2197. throw new ArgumentException(LibResources.GetString("regexp_duplicate_quantifier", i));
  2198. // append quantifier:
  2199. result.Append(perlExpr, i, m.Length);
  2200. i += m.Length;
  2201. if (i < perlExpr.Length && perlExpr[i] == '?')
  2202. {
  2203. // skip question mark to make the quantifier greedy:
  2204. i++;
  2205. }
  2206. else if (i < perlExpr.Length && perlExpr[i] == '+')
  2207. {
  2208. // TODO: we do not yet support possesive quantifiers
  2209. // so we just skip the attribute it and pray
  2210. // nobody will ever realize :-)
  2211. i++;
  2212. }
  2213. else
  2214. {
  2215. // add question mark to make the quantifier lazy:
  2216. if (result.Length != 0 && result[result.Length - 1] == '?')
  2217. {
  2218. // HACK: Due to the issue in .NET regex we can't use "??" because it isn't interpreted correctly!!
  2219. // (for example "^(ab)??$" matches with "abab", but it shouldn't!!)
  2220. }
  2221. else
  2222. result.Append('?');
  2223. }
  2224. last_quantifier = true;
  2225. continue;
  2226. }
  2227. }
  2228. last_quantifier = false;
  2229. if (ch == '$' && (opt & PerlRegexOptions.DollarMatchesEndOfStringOnly) != 0)
  2230. {
  2231. // replaces '$' with '\z':
  2232. result.Append(@"\z");
  2233. break;
  2234. }
  2235. if (ch == '[')
  2236. state = 1;
  2237. Append(result, ch);
  2238. break;
  2239. case 1: // first character of character class
  2240. if (escaped)
  2241. {
  2242. result.Append('\\');
  2243. Append(result, ch);
  2244. range_from_character = ch;
  2245. state = 2;
  2246. break;
  2247. }
  2248. // special characters:
  2249. if (ch == '^' || ch == ']' || ch == '-')
  2250. {
  2251. Append(result, ch);
  2252. }
  2253. else
  2254. {
  2255. // other characters are not consumed here, for example [[:space:]abc] will not match if the first
  2256. // [ is appended here.
  2257. state = 2;
  2258. goto case 2;
  2259. }
  2260. break;
  2261. case 2: // inside of character class
  2262. if (escaped)
  2263. {
  2264. result.Append('\\');
  2265. Append(result, ch);
  2266. range_from_character = ch;
  2267. leaving_range = false;
  2268. break;
  2269. }
  2270. if (ch == '-' && !leaving_range)
  2271. {
  2272. state = 3;
  2273. break;
  2274. }
  2275. leaving_range = false;
  2276. // posix character classes
  2277. Match match = posixCharClasses.Match(perlExpr.Substring(i), 0);
  2278. if (match.Success)
  2279. {
  2280. string chars = PosixRegExp.BracketExpression.CountCharacterClass(match.Groups[2].Value);
  2281. if (chars == null)
  2282. throw new ArgumentException(/*TODO*/ String.Format("Unknown character class '{0}'", match.Groups[2].Value));
  2283. if (match.Groups[1].Value.Length > 0)
  2284. throw new ArgumentException(/*TODO*/ "POSIX character classes negation not supported.");
  2285. result.Append(chars);
  2286. range_from_character = -1; // -1 means, it is not rangable :)
  2287. i += match.Length - 1; // +1 is added just behind the switch
  2288. break;
  2289. }
  2290. if (ch == ']')
  2291. {
  2292. addedSurrogate2Ranges = null; // drop the cache of ranges
  2293. state = 0;
  2294. }
  2295. // append <ch>
  2296. range_from_character = ch;
  2297. if (ch == '-')
  2298. result.Append("\\x2d");
  2299. else
  2300. AppendEscaped(result, ch);
  2301. break;
  2302. case 3: // range previous character was '-'
  2303. if (!escaped && ch == ']')
  2304. {
  2305. if (range_from_character > char.MaxValue)
  2306. throw new ArgumentException("Cannot range from an UTF-32 character to unknown.");
  2307. result.Append("-]");
  2308. addedSurrogate2Ranges = null; // drop the cache of ranges
  2309. state = 0;
  2310. break;
  2311. }
  2312. //string range;
  2313. //int error;
  2314. //if (!PosixRegExp.BracketExpression.CountRange(result[result.Length - 1], ch, out range, out error))
  2315. //{
  2316. // if ((error != 1) || (!CountUnicodeRange(result[result.Length - 1], ch, out range)))
  2317. // {
  2318. // Debug.Assert(error == 2);
  2319. // throw new ArgumentException(LibResources.GetString("range_first_character_greater"));
  2320. // }
  2321. //}
  2322. //PosixRegExp.BracketExpression.EscapeBracketExpressionSpecialChars(result, range); // left boundary is duplicated, but doesn't matter...
  2323. if (addedSurrogate2Ranges == null)
  2324. addedSurrogate2Ranges = new HashSet<uint>(); // initialize the cache of already added character ranges, invalidated at the end of character class
  2325. if (ch != range_from_character)
  2326. {
  2327. // <from>-<ch>:
  2328. // 1. <utf16>-<utf16>
  2329. // 2. <utf16>-<utf32>
  2330. // 3. <utf32>-<utf32>
  2331. if (range_from_character <= char.MaxValue)
  2332. {
  2333. if (ch <= char.MaxValue)
  2334. {
  2335. //symbol order can be different, not testet with other modes
  2336. var seqBreak = false;
  2337. byte from = 0;
  2338. byte to = 0;
  2339. if (encoding.IsSingleByte)
  2340. {
  2341. var bytes = encoding.GetBytes(new char[] {(char) range_from_character});
  2342. from = bytes[0];
  2343. bytes = encoding.GetBytes(new char[] {(char)ch});
  2344. to = bytes[0];
  2345. var lastChar = range_from_character;
  2346. for (int j = from + 1; j <= to; j++)
  2347. {
  2348. var chars = encoding.GetChars(new[] {(byte)j});
  2349. if (chars[0] - lastChar != 1)
  2350. {
  2351. seqBreak = true;
  2352. break;
  2353. }
  2354. lastChar = chars[0];
  2355. }
  2356. }
  2357. // 1.
  2358. if (!seqBreak)
  2359. {
  2360. result.Append('-');
  2361. AppendEscaped(result, ch);
  2362. }
  2363. else
  2364. {
  2365. for (byte b = (byte)(from + 1); b <= to; b++)
  2366. {
  2367. var chars = encoding.GetChars(new[] { b });
  2368. AppendEscaped(result, chars[0]);
  2369. }
  2370. }
  2371. }
  2372. else
  2373. {
  2374. // 2.
  2375. result.Append('-');
  2376. AppendEscaped(result, char.MaxValue);
  2377. // count <char.MaxValue+1>-<ch>
  2378. CountUTF32Range(result, char.MaxValue + 1, ch, addedSurrogate2Ranges);
  2379. }
  2380. }
  2381. else
  2382. {
  2383. // 3. utf32 range
  2384. result.Length -= 2;
  2385. CountUTF32Range(result, range_from_character, ch, addedSurrogate2Ranges);
  2386. }
  2387. }
  2388. state = 2;
  2389. leaving_range = true;
  2390. range_from_character = -1;
  2391. break;
  2392. }
  2393. i++;
  2394. }
  2395. return ConvertPossesiveToAtomicGroup(result);
  2396. }
  2397. private static void AppendEscaped(StringBuilder/*!*/sb, int ch)
  2398. {
  2399. Debug.Assert(sb != null);
  2400. if (ch < 0x80 && ch != '\\' && ch != '-')
  2401. sb.Append((char)ch);
  2402. else
  2403. AppendUnicode(sb, ch);
  2404. }
  2405. private static void Append(StringBuilder/*!*/sb, int ch)
  2406. {
  2407. Debug.Assert(sb != null);
  2408. Debug.Assert(ch >= 0);
  2409. if (ch < Char.MaxValue && ch > 0)
  2410. sb.Append((char)ch);
  2411. else
  2412. AppendUnicode(sb, ch);
  2413. }
  2414. private static void AppendUnicode(StringBuilder/*!*/sb, int ch)
  2415. {
  2416. Debug.Assert(sb != null);
  2417. if (ch <= Char.MaxValue)
  2418. sb.Append(@"\u" + ((int)ch).ToString("X4"));
  2419. else
  2420. sb.Append(Char.ConvertFromUtf32(ch));
  2421. }
  2422. #region Conversion of possesive quantifiers
  2423. internal struct brace
  2424. {
  2425. public brace(int position, char braceType)
  2426. {
  2427. this.position = position;
  2428. this.braceType = braceType;
  2429. }
  2430. public int position;
  2431. public char braceType;
  2432. }
  2433. /// <summary>
  2434. /// Convert possesive quantifiers to atomic group, which .NET support.
  2435. /// </summary>
  2436. /// <param name="pattern"></param>
  2437. /// <returns></returns>
  2438. /// <remarks>Works on these cases *+, ++, ?+, {}+
  2439. /// </remarks>
  2440. private static string ConvertPossesiveToAtomicGroup(StringBuilder pattern)
  2441. {
  2442. const int preallocatedAttomicGroups = 10;
  2443. const string atomicGroupStart = "(?>";
  2444. const string atomicGroupEnd = ")";
  2445. Stack<brace> braceStack = new Stack<brace>(16);
  2446. int state = 0;
  2447. int escape_state = 0;
  2448. int escapeSequenceStart = 0;
  2449. int offset = 0;
  2450. StringBuilder sb = new StringBuilder(pattern.Length + atomicGroupStart.Length * preallocatedAttomicGroups);//, 0, pattern.Length, pattern.Length + 4 * 10); // (?>)
  2451. Action<int> addAtomicGroup =
  2452. (start) =>
  2453. {
  2454. sb.Insert(start, atomicGroupStart);
  2455. sb.Append(atomicGroupEnd);
  2456. offset += atomicGroupStart.Length;
  2457. };
  2458. Action<int, char> pushToStack = (pos, ch) =>
  2459. {
  2460. braceStack.Push(new brace(pos + offset, ch));
  2461. };
  2462. brace LastBrace = new brace();
  2463. bool escaped = false;
  2464. int i = 0;
  2465. while (i < pattern.Length)
  2466. {
  2467. char ch = pattern[i];
  2468. //TODO: handle comments
  2469. if (!escaped)
  2470. {
  2471. switch (state)
  2472. {
  2473. case 0:
  2474. if (ch == '(')
  2475. {
  2476. pushToStack(i, ch);
  2477. state = 2;
  2478. }
  2479. else if (ch == '[')
  2480. {
  2481. pushToStack(i, ch);
  2482. state = 12;
  2483. }
  2484. break;
  2485. case 2: // (.
  2486. if (ch == ')')
  2487. state = 3;
  2488. else if (ch == '(') //nested (
  2489. pushToStack(i, ch);
  2490. else if (ch == '[')
  2491. {
  2492. state = 12;
  2493. pushToStack(i, ch);
  2494. }
  2495. break;
  2496. case 3: // (...)
  2497. LastBrace = braceStack.Pop();
  2498. if (ch == '*' || ch == '?' || ch =='+')
  2499. state = 4;
  2500. else if (ch == '{')
  2501. state = 5;
  2502. else
  2503. {
  2504. state = DecideState(pattern, braceStack);
  2505. continue;
  2506. }
  2507. break;
  2508. case 4: // (...)*+ | (...)++ | (...)?+
  2509. if (ch == '+')
  2510. {
  2511. addAtomicGroup(LastBrace.position);
  2512. state = DecideState(pattern, braceStack);
  2513. ++i;
  2514. continue;
  2515. }
  2516. else
  2517. state = DecideState(pattern, braceStack);
  2518. break;
  2519. case 5: // (...){
  2520. if (ch == '}')
  2521. state = 4;
  2522. //if (!char.IsDigit(ch))
  2523. //{
  2524. // state = DecideState(pattern, braceStack);
  2525. //}
  2526. break;
  2527. case 12: // [.
  2528. if (ch == ']')
  2529. state = 13;
  2530. //else
  2531. //if (ch == '(')
  2532. //{
  2533. // state = 2;
  2534. // pushToStack(i, ch);
  2535. //}
  2536. else if (ch == '[')
  2537. {
  2538. pushToStack(i, ch);
  2539. }
  2540. break;
  2541. case 13: // [...]
  2542. LastBrace = braceStack.Pop();
  2543. if (ch == '*' || ch == '?' || ch == '+')
  2544. state = 14;
  2545. else if (ch == '{')
  2546. state = 15;
  2547. else
  2548. state = DecideState(pattern, braceStack);
  2549. break;
  2550. case 14: // [...]*+
  2551. if (ch == '+')
  2552. {
  2553. addAtomicGroup(LastBrace.position);
  2554. state = DecideState(pattern, braceStack);
  2555. ++i;
  2556. continue;
  2557. }
  2558. else
  2559. {
  2560. state = DecideState(pattern, braceStack);
  2561. continue;
  2562. }
  2563. case 15: // [...]{
  2564. if (ch == '}')
  2565. state = 4;
  2566. break;
  2567. }
  2568. }
  2569. else
  2570. {
  2571. //escaped
  2572. switch (escape_state)
  2573. {
  2574. case 0:
  2575. if (ch == '\\')
  2576. {
  2577. escape_state = 0;
  2578. }
  2579. else
  2580. {
  2581. escape_state = 1;
  2582. }
  2583. break;
  2584. case 1:// \.
  2585. if (ch == '*' || ch == '?' || ch == '+')
  2586. escape_state = 2;
  2587. else
  2588. escape_state = 0;
  2589. break;
  2590. case 2:
  2591. if (ch == '+')
  2592. {
  2593. escape_state = 0;
  2594. addAtomicGroup(escapeSequenceStart);
  2595. ++i;
  2596. }
  2597. else
  2598. {
  2599. escape_state = 0;
  2600. }
  2601. break;
  2602. }
  2603. if (escape_state == 0)
  2604. {
  2605. escaped = false;
  2606. continue;
  2607. }
  2608. }
  2609. if (ch == '\\' && escaped == false)
  2610. {
  2611. escaped = true;
  2612. escapeSequenceStart = i + offset;
  2613. }
  2614. ++i;
  2615. sb.Append(ch);
  2616. }
  2617. return sb.ToString();
  2618. }
  2619. private static int DecideState(StringBuilder pattern, Stack<brace> braceStack)
  2620. {
  2621. int state;
  2622. if (braceStack.Count > 0)
  2623. {
  2624. brace SecondToLastBrace = braceStack.Pop();
  2625. braceStack.Push(SecondToLastBrace);
  2626. if (SecondToLastBrace.braceType == '(')
  2627. state = 2;
  2628. else
  2629. state = 12;
  2630. }
  2631. else
  2632. state = 0;
  2633. return state;
  2634. }
  2635. #endregion
  2636. ///// <summary>
  2637. ///// Simple version of 'PosixRegExp.BracketExpression.CountRange' function. Generates string
  2638. ///// with all characters in specified range, but uses unicode encoding.
  2639. ///// </summary>
  2640. ///// <param name="f">Lower bound</param>
  2641. ///// <param name="t">Upper bound</param>
  2642. ///// <param name="range">Returned string</param>
  2643. ///// <returns>Returns false if lower bound is larger than upper bound</returns>
  2644. //private static bool CountUnicodeRange(char f, char t, out string range)
  2645. //{
  2646. // range = "";
  2647. // if (f > t) return false;
  2648. // StringBuilder sb = new StringBuilder(t - f);
  2649. // for (char c = f; c <= t; c++) sb.Append(c);
  2650. // range = sb.ToString();
  2651. // return true;
  2652. //}
  2653. /// <summary>
  2654. /// Ranges characters from <paramref name="chFrom"/> up to <paramref name="chTo"/> inclusive, where characters are UTF32.
  2655. /// We will only list every from surrogate pair once (same result as writing all the characters one by one).
  2656. /// </summary>
  2657. /// <param name="sb"></param>
  2658. /// <param name="chFrom"></param>
  2659. /// <param name="chTo"></param>
  2660. /// <param name="addedSurrogate2Ranges">Cache of already added character pairs to avoid duplicitous character ranges.</param>
  2661. private static void CountUTF32Range(StringBuilder/*!*/sb, int chFrom, int chTo, HashSet<uint>/*!*/addedSurrogate2Ranges)
  2662. {
  2663. Debug.Assert(addedSurrogate2Ranges != null);
  2664. Debug.Assert(chFrom > Char.MaxValue);
  2665. Debug.Assert(chTo > Char.MaxValue);
  2666. //
  2667. chFrom -= char.MaxValue + 1;
  2668. chTo -= char.MaxValue + 1;
  2669. // whether the range of the same surrogate1 starts
  2670. bool start = true;
  2671. // range UTF32 characters
  2672. for (; chFrom <= chTo; chFrom++)
  2673. {
  2674. // current UTF32 character "<a1><a2>":
  2675. char a1 = (char)(chFrom / 1024 + 55296); // surrogate pair [1]
  2676. char a2 = (char)(chFrom % 1024 + 56320); // surrogate pair [2]
  2677. //var str = new string(new char[] { a1, a2 }); // single UTF32 character
  2678. // output:
  2679. if (start) // first character from the range of the same surrogate1
  2680. {
  2681. start = false;
  2682. // "<a1><a2>"
  2683. // try to compress <a1>:
  2684. // convert "-ab" to "-b", where a+1 == b
  2685. if (sb.Length >= 2 &&
  2686. sb[sb.Length - 1] + 1 == a1 &&
  2687. sb[sb.Length - 2] == '-' &&
  2688. (sb.Length < 3 || sb[sb.Length - 3] != '\\')) // '-' is not escaped
  2689. {
  2690. sb[sb.Length - 1] = a1; // extend the range
  2691. }
  2692. // convert "abc" to "a-c", where a+2 == b+1 == c
  2693. else if (sb.Length >= 2 &&
  2694. sb[sb.Length - 1] + 1 == a1 &&
  2695. sb[sb.Length - 2] + 2 == a1)
  2696. {
  2697. // a,b,c are all UTF32 surrogate1
  2698. sb[sb.Length - 1] = '-';
  2699. sb.Append(a1);
  2700. }
  2701. else
  2702. {
  2703. sb.Append(a1);
  2704. }
  2705. //
  2706. sb.Append(a2);
  2707. }
  2708. else if ((chFrom + 1) > chTo || a1 != (char)((chFrom + 1) / 1024 + 55296)) // finish the range (end of the range || different next surrogate1)
  2709. {
  2710. AddCharacterRangeChecked(sb, a2, addedSurrogate2Ranges);
  2711. start = true;
  2712. }
  2713. else
  2714. {
  2715. // in range ...
  2716. }
  2717. }
  2718. }
  2719. /// <summary>
  2720. /// Adds "-<paramref name="chTo"/>" iff there is not the same character range in the result already. Otherwise the last character form <paramref name="sb"/> is removed.
  2721. /// </summary>
  2722. /// <param name="sb"><see cref="StringBuilder"/> with lower bound of the range already added.</param>
  2723. /// <param name="chTo">Upper bound of the range.</param>
  2724. /// <param name="addedSurrogate2Ranges">Cache of already added character pairs.</param>
  2725. /// <remarks>Assumes there is starting character at the end of <paramref name="sb"/>.</remarks>
  2726. private static void AddCharacterRangeChecked(StringBuilder/*!*/sb, char chTo, HashSet<uint>/*!*/addedSurrogate2Ranges)
  2727. {
  2728. Debug.Assert(addedSurrogate2Ranges != null);
  2729. Debug.Assert(sb.Length > 0);
  2730. char previous = sb[sb.Length - 1]; // the lower bound already in the result
  2731. uint print = (uint)previous | ((uint)chTo << 16); // the "hash" of the character range to be inserted
  2732. if (addedSurrogate2Ranges.Add(print)) // is the <print> range not in the result yet?
  2733. {
  2734. // and of the range with the same surrogate1
  2735. // "<previous>-<chTo>" wil be in the <sb>
  2736. sb.Append('-');
  2737. sb.Append(chTo);
  2738. }
  2739. else
  2740. {
  2741. // "<previous>-<chTo>" already in the result, just "remove" the last character from the <sb>
  2742. sb.Length--;
  2743. }
  2744. }
  2745. /// <summary>
  2746. /// Modifies regular expression so it matches only at the beginning of the string.
  2747. /// </summary>
  2748. /// <param name="expr">Regular expression to modify.</param>
  2749. private static void ModifyRegExpAnchored(ref string expr)
  2750. {
  2751. // anchored means regular expression should match only at the beginning of the string
  2752. // => add ^ at the beginning if there is no one.
  2753. if (expr.Length == 0 || expr[0] != '^')
  2754. expr.Insert(0, "^");
  2755. }
  2756. #region Unit Test
  2757. #if !SILVERLIGHT
  2758. #if DEBUG
  2759. [Test]
  2760. private void TestConvertRegex()
  2761. {
  2762. string s;
  2763. s = ConvertRegex(@"?a+sa?s (?:{1,2})", PerlRegexOptions.Ungreedy);
  2764. Debug.Assert(s == "??a+?sa??s (?:{1,2}?)");
  2765. s = ConvertRegex(@"(X+)(?:\|(.+?))?]](.*)$", PerlRegexOptions.Ungreedy);
  2766. Debug.Assert(s == @"(X+?)(?:\|(.+))??]](.*?)$");
  2767. s = ConvertRegex(@"([X$]+)$", PerlRegexOptions.DollarMatchesEndOfStringOnly);
  2768. Debug.Assert(s == @"([X$]+)\z");
  2769. }
  2770. #endif
  2771. #endif
  2772. #endregion
  2773. }
  2774. #endregion
  2775. }