PageRenderTime 58ms CodeModel.GetById 22ms RepoModel.GetById 1ms app.codeStats 0ms

/3.0/Source/ClassLibrary/RegExpPerl.cs

#
C# | 3244 lines | 2030 code | 464 blank | 750 comment | 640 complexity | a29104df8fd368c98e16d78023cbfe33 MD5 | raw file
Possible License(s): CPL-1.0, GPL-2.0, CC-BY-SA-3.0, MPL-2.0-no-copyleft-exception, Apache-2.0

Large files files are truncated, but you can click here to view the full file

  1. /*
  2. Copyright (c) 2004-2006 Pavel Novak and Tomas Matousek.
  3. The use and distribution terms for this software are contained in the file named License.txt,
  4. which can be found in the root of the Phalanger distribution. By using this software
  5. in any fashion, you are agreeing to be bound by the terms of this license.
  6. You must not remove this notice from this software.
  7. TODO: preg_match - unmatched groups should be empty only if they are not followed by matched one (isn't it PHP bug?)
  8. TODO: preg_last_error - Returns the error code of the last PCRE regex execution
  9. */
  10. using System;
  11. using System.Text;
  12. using System.Threading;
  13. using System.Collections;
  14. using System.Collections.Generic;
  15. using System.Globalization;
  16. using System.Text.RegularExpressions;
  17. using PHP.Core;
  18. using PHP.Core.Reflection;
  19. namespace PHP.Library
  20. {
  21. /// <summary>
  22. /// Perl regular expression specific options that are not captured by .NET <see cref="RegexOptions"/> or by
  23. /// transformation of the regular expression itself.
  24. /// </summary>
  25. [Flags]
  26. public enum PerlRegexOptions
  27. {
  28. None = 0,
  29. Evaluate = 1,
  30. Ungreedy = 2,
  31. Anchored = 4,
  32. DollarMatchesEndOfStringOnly = 8,
  33. UTF8 = 16
  34. }
  35. /// <summary>
  36. /// Implements PERL extended regular expressions as they are implemented in PHP.
  37. /// </summary>
  38. /// <threadsafety static="true"/>
  39. [ImplementsExtension(LibraryDescriptor.ExtPcre)]
  40. public static class PerlRegExp
  41. {
  42. #region preg_last_error
  43. public enum PregError
  44. {
  45. [ImplementsConstant("PREG_NO_ERROR")]
  46. NoError = 0,
  47. [ImplementsConstant("PREG_INTERNAL_ERROR")]
  48. InternalError = 1,
  49. [ImplementsConstant("PREG_BACKTRACK_LIMIT_ERROR")]
  50. BacktrackLimitError = 2,
  51. [ImplementsConstant("PREG_RECURSION_LIMIT_ERROR")]
  52. RecursionLimitError = 3,
  53. [ImplementsConstant("PREG_BAD_UTF8_ERROR")]
  54. BadUtf8Error = 4,
  55. [ImplementsConstant("PREG_BAD_UTF8_OFFSET_ERROR")]
  56. BadUtf8OffsetError = 5
  57. }
  58. public enum PregConst
  59. {
  60. [ImplementsConstant("PREG_PATTERN_ORDER")]
  61. PatternOrder = 1,
  62. [ImplementsConstant("PREG_SET_ORDER")]
  63. SetOrder = 2,
  64. [ImplementsConstant("PREG_OFFSET_CAPTURE")]
  65. OffsetCapture = 1 << 8,
  66. [ImplementsConstant("PREG_SPLIT_NO_EMPTY")]
  67. SplitNoEmpty = 1 << 0,
  68. [ImplementsConstant("PREG_SPLIT_DELIM_CAPTURE")]
  69. SplitDelimCapture = 1 << 1,
  70. [ImplementsConstant("PREG_SPLIT_OFFSET_CAPTURE")]
  71. SplitOffsetCapture = 1 << 2,
  72. [ImplementsConstant("PREG_REPLACE_EVAL")]
  73. ReplaceEval = 1 << 0,
  74. [ImplementsConstant("PREG_GREP_INVERT")]
  75. GrepInvert = 1 << 0,
  76. }
  77. [ImplementsFunction("preg_last_error")]
  78. public static int LastError()
  79. {
  80. return 0;
  81. }
  82. #endregion
  83. #region preg_quote
  84. /// <summary>
  85. /// <para>Escapes all characters that have special meaning in regular expressions. These characters are
  86. /// . \\ + * ? [ ^ ] $ ( ) { } = ! &lt; &gt; | :</para>
  87. /// </summary>
  88. /// <param name="str">String with characters to escape.</param>
  89. /// <returns>String with escaped characters.</returns>
  90. [ImplementsFunction("preg_quote")]
  91. [PureFunction]
  92. public static string Quote(string str)
  93. {
  94. return Quote(str, '\0', false);
  95. }
  96. /// <summary>
  97. /// <para>Escapes all characters that have special meaning in regular expressions. These characters are
  98. /// . \\ + * ? [ ^ ] $ ( ) { } = ! &lt; &gt; | : plus <paramref name="delimiter"/>.</para>
  99. /// </summary>
  100. /// <param name="str">String with characters to escape.</param>
  101. /// <param name="delimiter">Character to escape in additon to general special characters.</param>
  102. /// <returns>String with escaped characters.</returns>
  103. [ImplementsFunction("preg_quote")]
  104. [PureFunction]
  105. public static string Quote(string str, string delimiter)
  106. {
  107. bool delimiter_used = true;
  108. if (delimiter == null || delimiter.Length == 0)
  109. delimiter_used = false;
  110. return Quote(str, delimiter_used ? delimiter[0] : '\0', delimiter_used);
  111. }
  112. /// <summary>
  113. /// Escapes all characters with special meaning in Perl regular expressions and char
  114. /// <paramref name="delimiter"/>.
  115. /// </summary>
  116. /// <param name="str">String to quote.</param>
  117. /// <param name="delimiter">Additional character to quote.</param>
  118. /// <param name="delimiterUsed">Whether the delimiter should be quoted.</param>
  119. /// <returns>String with quoted characters.</returns>
  120. internal static string Quote(string str, char delimiter, bool delimiterUsed)
  121. {
  122. if (str == null)
  123. return null;
  124. StringBuilder result = new StringBuilder();
  125. for (int i = 0; i < str.Length; i++)
  126. {
  127. bool escape = false;
  128. if (delimiterUsed && str[i] == delimiter)
  129. escape = true;
  130. else
  131. // switch only if true is not set already
  132. switch (str[i])
  133. {
  134. case '\\':
  135. case '+':
  136. case '*':
  137. case '?':
  138. case '[':
  139. case '^':
  140. case ']':
  141. case '$':
  142. case '(':
  143. case ')':
  144. case '{':
  145. case '}':
  146. case '=':
  147. case '!':
  148. case '<':
  149. case '>':
  150. case '|':
  151. case ':':
  152. case '.':
  153. escape = true;
  154. break;
  155. }
  156. if (escape)
  157. result.Append('\\');
  158. result.Append(str[i]);
  159. }
  160. return result.ToString();
  161. }
  162. #endregion
  163. #region preg_grep
  164. /// <summary>
  165. /// Flags for Grep functions.
  166. /// </summary>
  167. [Flags]
  168. public enum GrepFlags
  169. {
  170. None = 0,
  171. [ImplementsConstant("PREG_GREP_INVERT")]
  172. GrepInvert = 1
  173. }
  174. /// <summary>
  175. /// Returns the array consisting of the elements of the <paramref name="input"/> array that match
  176. /// the given <paramref name="pattern"/>.
  177. /// </summary>
  178. /// <param name="pattern">Pattern to be matched against each array element.</param>
  179. /// <param name="input">Array of strings to match.</param>
  180. /// <returns>Array containing only values from <paramref name="input"/> that match <paramref name="pattern"/>
  181. /// </returns>
  182. [ImplementsFunction("preg_grep")]
  183. public static PhpArray Grep(object pattern, PhpArray input)
  184. {
  185. return Grep(pattern, input, GrepFlags.None);
  186. }
  187. /// <summary>
  188. /// <para>Returns the array consisting of the elements of the <paramref name="input"/> array that match
  189. /// the given <paramref name="pattern"/>.</para>
  190. /// <para>If <see cref="GrepFlags.GrepInvert"/> flag is specified, resulting array will contain
  191. /// elements that do not match the <paramref name="pattern"/>.</para>
  192. /// </summary>
  193. /// <param name="pattern">Pattern to be matched against each array element.</param>
  194. /// <param name="input">Array of strings to match.</param>
  195. /// <param name="flags">Flags modifying which elements contains resulting array.</param>
  196. /// <returns>Array containing only values from <paramref name="input"/> that match <paramref name="pattern"/>.
  197. /// (Or do not match according to <paramref name="flags"/> specified.)</returns>
  198. [ImplementsFunction("preg_grep")]
  199. public static PhpArray Grep(object pattern, PhpArray input, GrepFlags flags)
  200. {
  201. if (input == null)
  202. return null;
  203. PerlRegExpConverter converter = ConvertPattern(pattern, null);
  204. if (converter == null) return null;
  205. PhpArray result = new PhpArray();
  206. foreach (KeyValuePair<IntStringKey, object> entry in input)
  207. {
  208. string str = ConvertData(entry.Value, converter);
  209. Match m = converter.Regex.Match(str);
  210. // move a copy to return array if success and not invert or
  211. // not success and invert
  212. if (m.Success ^ (flags & GrepFlags.GrepInvert) != 0)
  213. result.Add(entry.Key, str);
  214. }
  215. return result;
  216. }
  217. #endregion
  218. #region preg_match, preg_match_all
  219. /// <summary>
  220. /// Flags for Match function family.
  221. /// </summary>
  222. /// <remarks>
  223. /// MatchFlags used by pre_match PHP functions is a hybrid enumeration.
  224. /// PatternOrder and SetOrder flags are mutually exclusive but OffsetCapture may be added by bitwise | operator.
  225. /// Moreover, PatternOrder is a default value used by these functions, so it can be equal to 0.
  226. /// (This confusing declaration is done by PHP authors.)
  227. /// </remarks>
  228. [Flags]
  229. public enum MatchFlags
  230. {
  231. [ImplementsConstant("PREG_PATTERN_ORDER")]
  232. PatternOrder = 1,
  233. [ImplementsConstant("PREG_SET_ORDER")]
  234. SetOrder = 2,
  235. [ImplementsConstant("PREG_OFFSET_CAPTURE")]
  236. OffsetCapture = 0x100
  237. }
  238. /// <summary>
  239. /// Searches <paramref name="data"/> for a match to the regular expression given in <paramref name="pattern"/>.
  240. /// The search is stopped after the first match is found.
  241. /// </summary>
  242. /// <param name="pattern">Perl regular expression.</param>
  243. /// <param name="data">String to search.</param>
  244. /// <returns>0 if there is no match and 1 if the match was found.</returns>
  245. [ImplementsFunction("preg_match")]
  246. [return: CastToFalse]
  247. public static int Match(object pattern, object data)
  248. {
  249. PerlRegExpConverter converter = ConvertPattern(pattern, null);
  250. if (converter == null) return -1;
  251. string str = ConvertData(data, converter);
  252. Match match = converter.Regex.Match(str);
  253. return match.Success ? 1 : 0;
  254. }
  255. /// <summary>
  256. /// <para>Searches <paramref name="data"/> for a match to the regular expression given in
  257. /// <paramref name="pattern"/>. The search is stopped after the first match is found.</para>
  258. /// <para><paramref name="matches"/> contains an array with matches. At index 0 is the whole string that
  259. /// matches the <paramref name="pattern"/>, from index 1 are stored matches for parenthesized subpatterns.</para>
  260. /// </summary>
  261. /// <param name="pattern">Perl regular expression.</param>
  262. /// <param name="data">String or string of bytes to search.</param>
  263. /// <param name="matches">Array containing matched strings.</param>
  264. /// <returns>0 if there is no match and 1 if the match was found.</returns>
  265. [ImplementsFunction("preg_match")]
  266. [return: CastToFalse]
  267. public static int Match(object pattern, object data, out PhpArray matches)
  268. {
  269. return Match(pattern, data, out matches, MatchFlags.PatternOrder, 0, false);
  270. }
  271. /// <summary>
  272. /// <para>Searches <paramref name="data"/> for a match to the regular expression given in
  273. /// <paramref name="pattern"/>. The search is stopped after the first match is found.</para>
  274. /// <para><paramref name="matches"/> contains an array with matches. At index 0 is the whole string that
  275. /// matches the <paramref name="pattern"/>, from index 1 are stored matches for parenthesized subpatterns.</para>
  276. /// <para>Flag <see cref="MatchFlags.OffsetCapture"/> can be specified and it means that the
  277. /// <paramref name="matches"/> array will not contain substrings, but another array where the substring
  278. /// is stored at index [0] and index [1] is its offset in <paramref name="data"/>.</para>
  279. /// </summary>
  280. /// <param name="pattern">Perl regular expression.</param>
  281. /// <param name="data">String to search.</param>
  282. /// <param name="matches">Array containing matched strings.</param>
  283. /// <param name="flags"><see cref="MatchFlags"/>.</param>
  284. /// <returns>0 if there is no match and 1 if the match was found.</returns>
  285. [ImplementsFunction("preg_match")]
  286. [return: CastToFalse]
  287. public static int Match(object pattern, object data, out PhpArray matches, MatchFlags flags)
  288. {
  289. return Match(pattern, data, out matches, flags, 0, false);
  290. }
  291. /// <summary>
  292. /// <para>Searches <paramref name="data"/> for a match to the regular expression given in
  293. /// <paramref name="pattern"/>. The search is stopped after the first match is found.</para>
  294. /// <para><paramref name="matches"/> contains an array with matches. At index 0 is the whole string that
  295. /// matches the <paramref name="pattern"/>, from index 1 are stored matches for parenthesized subpatterns.</para>
  296. /// <para>Flag <see cref="MatchFlags.OffsetCapture"/> can be specified and it means that the
  297. /// <paramref name="matches"/> array will not contain substrings, but another array where the substring
  298. /// is stored at index [0] and index [1] is its offset in <paramref name="data"/>. <paramref name="offset"/>
  299. /// specifies where the search should start. (Note that it is not the same as passing a substring of
  300. /// <paramref name="data"/>.)</para>
  301. /// </summary>
  302. /// <param name="pattern">Perl regular expression.</param>
  303. /// <param name="data">String or string of bytes to search.</param>
  304. /// <param name="matches">Array containing matched strings.</param>
  305. /// <param name="flags"><see cref="MatchFlags"/>.</param>
  306. /// <param name="offset">Offset to <paramref name="data"/> where the match should start.</param>
  307. /// <returns>0 if there is no match and 1 if the match was found.</returns>
  308. [ImplementsFunction("preg_match")]
  309. [return: CastToFalse]
  310. public static int Match(object pattern, object data, out PhpArray matches, MatchFlags flags, int offset)
  311. {
  312. return Match(pattern, data, out matches, flags, offset, false);
  313. }
  314. /// <summary>
  315. /// <para>Searches <paramref name="data"/> for all matches to the regular expression given in pattern and puts
  316. /// them in <paramref name="matches"/> array. The matches are sorted in "Pattern Order" i. e. at zero
  317. /// index is an array containing whole matches, at first index is an array containing number 1 subpatterns
  318. /// for all matches etc.</para>
  319. /// <para>Next match search starts just after the previous match.</para>
  320. /// </summary>
  321. /// <param name="pattern">Regular expression.</param>
  322. /// <param name="data">String or string of bytes to search.</param>
  323. /// <param name="matches">Output array containing matches found.</param>
  324. /// <returns>Number of whole matches.</returns>
  325. [ImplementsFunction("preg_match_all")]
  326. [return: CastToFalse]
  327. public static int MatchAll(object pattern, object data, out PhpArray matches)
  328. {
  329. return Match(pattern, data, out matches, MatchFlags.PatternOrder, 0, true);
  330. }
  331. /// <summary>
  332. /// <para>Searches <paramref name="data"/> for all matches to the regular expression given in pattern and puts
  333. /// them in <paramref name="matches"/> array. The matches are sorted in "Pattern Order" i. e. at zero
  334. /// index is an array containing whole matches, at first index is an array containing number 1 subpatterns
  335. /// for all matches etc.</para>
  336. /// <para>Next match search starts just after the previous match.</para>
  337. /// <para>If <see cref="MatchFlags.PatternOrder"/> flag is specified, <paramref name="matches"/> array
  338. /// contains an array of full pattern matches at index 0, an array of strings matched to
  339. /// first parenthesized substring at index 1 etc. If <see cref="MatchFlags.SetOrder"/> is set, at index 0 is the first
  340. /// set of matches (full match and substrings), at index 1 full set for second match etc.</para>
  341. /// <para>Flag <see cref="MatchFlags.OffsetCapture"/> indicates that instead the matched substring should
  342. /// be an array containing the substring at index 0 and position at original string at index 1.</para>
  343. /// </summary>
  344. /// <param name="pattern">Regular expression.</param>
  345. /// <param name="data">String or string of bytes to search.</param>
  346. /// <param name="matches">Output array containing matches found.</param>
  347. /// <param name="flags">Flags for specifying order of results in <paramref name="matches"/> array (Set Order,
  348. /// Pattern Order) and whether positions of matches should be added to results (Offset Capture).</param>
  349. /// <returns>Number of whole matches.</returns>
  350. [ImplementsFunction("preg_match_all")]
  351. [return: CastToFalse]
  352. public static int MatchAll(object pattern, object data, out PhpArray matches, MatchFlags flags)
  353. {
  354. return Match(pattern, data, out matches, flags, 0, true);
  355. }
  356. /// <summary>
  357. /// <para>Searches <paramref name="data"/> for all matches to the regular expression given in pattern and puts
  358. /// them in <paramref name="matches"/> array. The matches are sorted in "Pattern Order" i. e. at zero
  359. /// index is an array containing whole matches, at first index is an array containing number 1 subpatterns
  360. /// for all matches etc.</para>
  361. /// <para>Next match search starts just after the previous match.</para>
  362. /// <para>If <see cref="MatchFlags.PatternOrder"/> flag is specified, <paramref name="matches"/> array
  363. /// contains at index 0 an array of full pattern matches, at index 1 is an array of strings matched to
  364. /// first parenthesized substring etc. If <see cref="MatchFlags.SetOrder"/> is set, at index 0 is the first
  365. /// set of matches (full match and substrings), at index 1 full set for second match etc.</para>
  366. /// <para>Flag <see cref="MatchFlags.OffsetCapture"/> indicates that instead the matched substring should
  367. /// be an array containing the substring at index 0 and position at original string at index 1.</para>
  368. /// </summary>
  369. /// <param name="pattern">Regular expression.</param>
  370. /// <param name="data">String or string of bytes to search.</param>
  371. /// <param name="matches">Output array containing matches found.</param>
  372. /// <param name="flags">Flags for specifying order of results in <paramref name="matches"/> array (Set Order,
  373. /// Pattern Order) and whether positions of matches should be added to results (Offset Capture).</param>
  374. /// <param name="offset">Offset in <paramref name="data"/> where the search should begin. Note that it is
  375. /// not equal to passing an substring as this parameter because of ^ (start of the string or line) modifier.
  376. /// </param>
  377. /// <returns>Number of whole matches.</returns>
  378. [ImplementsFunction("preg_match_all")]
  379. [return: CastToFalse]
  380. public static int MatchAll(object pattern, object data, out PhpArray matches, MatchFlags flags, int offset)
  381. {
  382. return Match(pattern, data, out matches, flags, offset, true);
  383. }
  384. /// <summary>
  385. /// Private method implementing functions from match family.
  386. /// </summary>
  387. /// <param name="pattern">Perl regular expression match pattern.</param>
  388. /// <param name="data">String to search matches.</param>
  389. /// <param name="matches">An array containing matches found.</param>
  390. /// <param name="flags">Flags for searching.</param>
  391. /// <param name="offset">Offset to <paramref name="pattern"/> where the search should start.</param>
  392. /// <param name="matchAll"><B>True</B> if all matches should be found, <B>false</B> if only the first
  393. /// is enough.</param>
  394. /// <returns>Number of times the <paramref name="pattern"/> matches.</returns>
  395. private static int Match(object pattern, object data, out PhpArray matches, MatchFlags flags,
  396. int offset, bool matchAll)
  397. {
  398. // these two flags together do not make sense
  399. if ((flags & MatchFlags.PatternOrder) != 0 && (flags & MatchFlags.SetOrder) != 0)
  400. {
  401. PhpException.InvalidArgument("flags", LibResources.GetString("preg_match_pattern_set_order"));
  402. matches = null;
  403. return -1;
  404. }
  405. PerlRegExpConverter converter = ConvertPattern(pattern, null);
  406. if (converter == null)
  407. {
  408. matches = new PhpArray();
  409. return -1;
  410. }
  411. string converted = ConvertData(data, converter);
  412. Match m = converter.Regex.Match(converted, offset>converted.Length?converted.Length:offset);
  413. if ((converter.PerlOptions & PerlRegexOptions.Anchored) > 0 && m.Success && m.Index != offset)
  414. {
  415. matches = new PhpArray();
  416. return -1;
  417. }
  418. if (m.Success)
  419. {
  420. if (!matchAll || (flags & MatchFlags.PatternOrder) != 0)
  421. {
  422. matches = new PhpArray(m.Groups.Count);
  423. }
  424. else
  425. matches = new PhpArray();
  426. if (!matchAll)
  427. {
  428. // Preg numbers groups sequentially, both named and unnamed.
  429. // .Net only numbers unnamed groups.
  430. // So we name unnamed groups (see ConvertRegex) to map correctly.
  431. int lastSuccessfulGroupIndex = GetLastSuccessfulGroup(m.Groups);
  432. var indexGroups = new List<Group>(m.Groups.Count);
  433. var groupNameByIndex = new Dictionary<int, string>(m.Groups.Count);
  434. for (int i = 0; i <= lastSuccessfulGroupIndex; i++)
  435. {
  436. // All groups should be named.
  437. var groupName = GetGroupName(converter.Regex, i);
  438. if (!string.IsNullOrEmpty(groupName))
  439. {
  440. matches[groupName] = NewArrayItem(m.Groups[i].Value, m.Groups[i].Index, (flags & MatchFlags.OffsetCapture) != 0);
  441. }
  442. matches[i] = NewArrayItem(m.Groups[i].Value, m.Groups[i].Index, (flags & MatchFlags.OffsetCapture) != 0);
  443. }
  444. return 1;
  445. }
  446. // store all other matches in PhpArray matches
  447. if ((flags & MatchFlags.SetOrder) != 0) // cannot test PatternOrder, it is 0, SetOrder must be tested
  448. return FillMatchesArrayAllSetOrder(converter.Regex, m, ref matches, (flags & MatchFlags.OffsetCapture) != 0);
  449. else
  450. return FillMatchesArrayAllPatternOrder(converter.Regex, m, ref matches, (flags & MatchFlags.OffsetCapture) != 0);
  451. }
  452. // no match has been found
  453. if (matchAll && (flags & MatchFlags.SetOrder) == 0)
  454. {
  455. // in that case PHP returns an array filled with empty arrays according to parentheses count
  456. matches = new PhpArray(m.Groups.Count);
  457. for (int i = 0; i < converter.Regex.GetGroupNumbers().Length; i++)
  458. {
  459. AddGroupNameToResult(converter.Regex, matches, i, (ms,groupName) =>
  460. {
  461. ms[groupName] = new PhpArray(0);
  462. });
  463. matches[i] = new PhpArray(0);
  464. }
  465. }
  466. else
  467. {
  468. matches = new PhpArray(0); // empty array
  469. }
  470. return 0;
  471. }
  472. private static string GetGroupName(Regex regex, int index)
  473. {
  474. var groupName = regex.GroupNameFromNumber(index);
  475. if (groupName.StartsWith(PerlRegExpConverter.AnonymousGroupPrefix))
  476. {
  477. // Anonymous groups: remove it altogether. Its purpose was to order it correctly.
  478. Debug.Assert(groupName.Substring(PerlRegExpConverter.AnonymousGroupPrefix.Length) == index.ToString(CultureInfo.InvariantCulture));
  479. groupName = string.Empty;
  480. }
  481. else
  482. if (groupName[0] != PerlRegExpConverter.GroupPrefix)
  483. {
  484. // Indexed groups. Leave as-is.
  485. Debug.Assert(groupName == index.ToString(CultureInfo.InvariantCulture));
  486. groupName = string.Empty;
  487. }
  488. else
  489. {
  490. // Named groups: remove prefix.
  491. groupName = (groupName[0] == PerlRegExpConverter.GroupPrefix ? groupName.Substring(1) : groupName);
  492. }
  493. return groupName;
  494. }
  495. #endregion
  496. #region preg_split
  497. /// <summary>
  498. /// Flags for split functions family.
  499. /// </summary>
  500. [Flags]
  501. public enum SplitFlags
  502. {
  503. None = 0,
  504. [ImplementsConstant("PREG_SPLIT_NO_EMPTY")]
  505. NoEmpty = 1,
  506. [ImplementsConstant("PREG_SPLIT_DELIM_CAPTURE")]
  507. DelimCapture = 2,
  508. [ImplementsConstant("PREG_SPLIT_OFFSET_CAPTURE")]
  509. OffsetCapture = 4
  510. }
  511. /// <summary>
  512. /// Splits <paramref name="data"/> along boundaries matched by <paramref name="pattern"/> and returns
  513. /// an array containing substrings.
  514. /// </summary>
  515. /// <param name="pattern">Regular expression to match to boundaries.</param>
  516. /// <param name="data">String string of bytes to split.</param>
  517. /// <returns>An array containing substrings.</returns>
  518. [ImplementsFunction("preg_split")]
  519. public static PhpArray Split(object pattern, object data)
  520. {
  521. return Split(pattern, data, -1, SplitFlags.None);
  522. }
  523. /// <summary>
  524. /// <para>Splits <paramref name="data"/> along boundaries matched by <paramref name="pattern"/> and returns
  525. /// an array containing substrings.</para>
  526. /// <para><paramref name="limit"/> specifies the maximum number of strings returned in the resulting
  527. /// array. If (limit-1) matches is found and there remain some characters to match whole remaining
  528. /// string is returned as the last element of the array.</para>
  529. /// </summary>
  530. /// <param name="pattern">Regular expression to match to boundaries.</param>
  531. /// <param name="data">String string of bytes to split.</param>
  532. /// <param name="limit">Max number of elements in the resulting array.</param>
  533. /// <returns>An array containing substrings.</returns>
  534. [ImplementsFunction("preg_split")]
  535. public static PhpArray Split(object pattern, object data, int limit)
  536. {
  537. return Split(pattern, data, limit, SplitFlags.None);
  538. }
  539. /// <summary>
  540. /// <para>Splits <paramref name="data"/> along boundaries matched by <paramref name="pattern"/> and returns
  541. /// an array containing substrings.</para>
  542. /// <para><paramref name="limit"/> specifies the maximum number of strings returned in the resulting
  543. /// array. If (limit-1) matches is found and there remain some characters to match whole remaining
  544. /// string is returned as the last element of the array.</para>
  545. /// <para>Some flags may be specified. <see cref="SplitFlags.NoEmpty"/> means no empty strings will be
  546. /// in the resulting array. <see cref="SplitFlags.DelimCapture"/> adds also substrings matching
  547. /// the delimiter and <see cref="SplitFlags.OffsetCapture"/> returns instead substrings the arrays
  548. /// containing appropriate substring at index 0 and the offset of this substring in original
  549. /// <paramref name="data"/> at index 1.</para>
  550. /// </summary>
  551. /// <param name="pattern">Regular expression to match to boundaries.</param>
  552. /// <param name="data">String or string of bytes to split.</param>
  553. /// <param name="limit">Max number of elements in the resulting array.</param>
  554. /// <param name="flags">Flags affecting the returned array.</param>
  555. /// <returns>An array containing substrings.</returns>
  556. [ImplementsFunction("preg_split")]
  557. public static PhpArray Split(object pattern, object data, int limit, SplitFlags flags)
  558. {
  559. if (limit == 0) // 0 does not make sense, php's behavior is as it is -1
  560. limit = -1;
  561. if (limit < -1) // for all other negative values it seems that is as limit == 1
  562. limit = 1;
  563. PerlRegExpConverter converter = ConvertPattern(pattern, null);
  564. if (converter == null) return null;
  565. string str = ConvertData(data, converter);
  566. Match m = converter.Regex.Match(str);
  567. bool offset_capture = (flags & SplitFlags.OffsetCapture) != 0;
  568. PhpArray result = new PhpArray();
  569. int last_index = 0;
  570. while (m.Success && (limit == -1 || --limit > 0) && last_index < str.Length)
  571. {
  572. // add part before match
  573. int length = m.Index - last_index;
  574. if (length > 0 || (flags & SplitFlags.NoEmpty) == 0)
  575. result.Add(NewArrayItem(str.Substring(last_index, length), last_index, offset_capture));
  576. if (m.Value.Length > 0)
  577. {
  578. if ((flags & SplitFlags.DelimCapture) != 0) // add all captures but not whole pattern match (start at 1)
  579. {
  580. List<object> lastUnsucessfulGroups = null; // value of groups that was not successful since last succesful one
  581. for (int i = 1; i < m.Groups.Count; i++)
  582. {
  583. Group g = m.Groups[i];
  584. if (g.Length > 0 || (flags & SplitFlags.NoEmpty) == 0)
  585. {
  586. // the value to be added into the result:
  587. object value = NewArrayItem(g.Value, g.Index, offset_capture);
  588. if (g.Success)
  589. {
  590. // group {i} was matched:
  591. // if there was some unsuccesfull matches before, add them now:
  592. if (lastUnsucessfulGroups != null && lastUnsucessfulGroups.Count > 0)
  593. {
  594. foreach (var x in lastUnsucessfulGroups)
  595. result.Add(x);
  596. lastUnsucessfulGroups.Clear();
  597. }
  598. // add the matched group:
  599. result.Add(value);
  600. }
  601. else
  602. {
  603. // The match was unsuccesful, remember all the unsuccesful matches
  604. // and add them only if some succesful match will follow.
  605. // In PHP, unsuccessfully matched groups are trimmed by the end
  606. // (regexp processing stops when other groups cannot be matched):
  607. if (lastUnsucessfulGroups == null) lastUnsucessfulGroups = new List<object>();
  608. lastUnsucessfulGroups.Add(value);
  609. }
  610. }
  611. }
  612. }
  613. last_index = m.Index + m.Length;
  614. }
  615. else // regular expression match an empty string => add one character
  616. {
  617. // always not empty
  618. result.Add(NewArrayItem(str.Substring(last_index, 1), last_index, offset_capture));
  619. last_index++;
  620. }
  621. m = m.NextMatch();
  622. }
  623. // add remaining string (might be empty)
  624. if (last_index < str.Length || (flags & SplitFlags.NoEmpty) == 0)
  625. result.Add(NewArrayItem(str.Substring(last_index), last_index, offset_capture));
  626. return result;
  627. }
  628. #endregion
  629. #region preg_replace, preg_replace_callback
  630. /// <summary>
  631. /// <para>Searches <paramref name="data"/> for matches to <paramref name="pattern"/> and replaces them
  632. /// with <paramref name="replacement"/>. <paramref name="replacement"/> may contain backreferences
  633. /// of the form of <I>\\n</I> or <I>$n</I> (second one preferred).</para>
  634. /// <para>Every parameter may be an unidimensional array of strings. If <paramref name="data"/> is
  635. /// an array, replacement is done on every element and return value is an array as well. If
  636. /// <paramref name="pattern"/> and <paramref name="replacement"/> are arrays, the replacements are processed
  637. /// in the order the keys appear in the array. If only <paramref name="pattern"/> is an array, the
  638. /// replacement string is used for every key in the <paramref name="pattern"/>.</para>
  639. /// </summary>
  640. /// <param name="context">Current <see cref="ScriptContext"/>. Passed by Phalanger runtime, cannot be null.</param>
  641. /// <param name="self">Instance of object that called the replace method (replace pattern may contain $this).</param>
  642. /// <param name="definedVariables"></param>
  643. /// <param name="pattern">Regular expression to match.</param>
  644. /// <param name="replacement">Replacement string.</param>
  645. /// <param name="data">String to search for replacements.</param>
  646. /// <returns>String or array containing strings with replacement performed.</returns>
  647. [ImplementsFunction("preg_replace", FunctionImplOptions.CaptureEvalInfo | FunctionImplOptions.NeedsVariables | FunctionImplOptions.NeedsThisReference)]
  648. public static object Replace(ScriptContext/*!*/context, DObject self, Dictionary<string, object> definedVariables,
  649. object pattern, object replacement, object data)
  650. {
  651. int count = Int32.MinValue; // disables counting
  652. return Replace(context, self, definedVariables, pattern, replacement, null, data, -1, ref count);
  653. }
  654. /// <summary>
  655. /// <para>Searches <paramref name="data"/> for matches to <paramref name="pattern"/> and replaces them
  656. /// with <paramref name="replacement"/>. <paramref name="replacement"/> may contain backreferences
  657. /// of the form of <I>\\n</I> or <I>$n</I> (second one preferred).</para>
  658. /// <para>Every parameter may be an unidimensional array of strings. If <paramref name="data"/> is
  659. /// an array, replacement is done on every element and return value is an array as well. If
  660. /// <paramref name="pattern"/> and <paramref name="replacement"/> are arrays, the replacements are processed
  661. /// in the order the keys appear in the array. If only <paramref name="pattern"/> is an array, the
  662. /// replacement string is used for every key in the <paramref name="pattern"/>.</para>
  663. /// </summary>
  664. /// <param name="context">Current <see cref="ScriptContext"/>. Passed by Phalanger runtime, cannot be null.</param>
  665. /// <param name="self">Instance of object that called the replace method (replace pattern may contain $this)</param>
  666. /// <param name="definedVariables"></param>
  667. /// <param name="pattern">Regular expression to match.</param>
  668. /// <param name="replacement">Replacement string.</param>
  669. /// <param name="data">String to search for replacements.</param>
  670. /// <param name="limit">Maximum number of matches replaced. (-1 for no limit)</param>
  671. /// <returns>String or array containing strings with replacement performed.</returns>
  672. [ImplementsFunction("preg_replace", FunctionImplOptions.CaptureEvalInfo | FunctionImplOptions.NeedsVariables | FunctionImplOptions.NeedsThisReference)]
  673. public static object Replace(ScriptContext/*!*/context, DObject self, Dictionary<string, object> definedVariables,
  674. object pattern, object replacement, object data, int limit)
  675. {
  676. int count = Int32.MinValue; // disables counting
  677. return Replace(context, self, definedVariables, pattern, replacement, null, data, limit, ref count);
  678. }
  679. /// <summary>
  680. /// <para>Searches <paramref name="data"/> for matches to <paramref name="pattern"/> and replaces them
  681. /// with <paramref name="replacement"/>. <paramref name="replacement"/> may contain backreferences
  682. /// of the form of <I>\\n</I> or <I>$n</I> (second one preferred).</para>
  683. /// <para>Every parameter may be an unidimensional array of strings. If <paramref name="data"/> is
  684. /// an array, replacement is done on every element and return value is an array as well. If
  685. /// <paramref name="pattern"/> and <paramref name="replacement"/> are arrays, the replacements are processed
  686. /// in the order the keys appear in the array. If only <paramref name="pattern"/> is an array, the
  687. /// replacement string is used for every key in the <paramref name="pattern"/>.</para>
  688. /// </summary>
  689. /// <param name="context">Current <see cref="ScriptContext"/>. Passed by Phalanger runtime, cannot be null.</param>
  690. /// <param name="self">Instance of object that called the replace method (replace pattern may contain $this)</param>
  691. /// <param name="definedVariables"></param>
  692. /// <param name="pattern">Regular expression to match.</param>
  693. /// <param name="replacement">Replacement string.</param>
  694. /// <param name="data">String to search for replacements.</param>
  695. /// <param name="limit">Maximum number of matches replaced. (-1 for no limit)</param>
  696. /// <param name="count">Number of replacements.</param>
  697. /// <returns>String or array containing strings with replacement performed.</returns>
  698. [ImplementsFunction("preg_replace", FunctionImplOptions.CaptureEvalInfo | FunctionImplOptions.NeedsVariables | FunctionImplOptions.NeedsThisReference)]
  699. public static object Replace(ScriptContext/*!*/context, DObject self, Dictionary<string, object> definedVariables,
  700. object pattern, object replacement, object data, int limit, out int count)
  701. {
  702. count = 0;
  703. return Replace(context, self, definedVariables, pattern, replacement, null, data, limit, ref count);
  704. }
  705. /// <summary>
  706. /// <para>Searches <paramref name="data"/> for matches to <paramref name="pattern"/> and the array of matched
  707. /// strings (full pattern match + parenthesized substrings) is passed to <paramref name="callback"/> which
  708. /// returns replacement string.</para>
  709. /// <para><paramref name="pattern"/> and <paramref name="data"/> parameters may be also unidimensional
  710. /// arrays of strings. For the explanation <see cref="Replace"/>.</para>
  711. /// </summary>
  712. /// <param name="context">Current <see cref="ScriptContext"/>. Passed by Phalanger runtime, cannot be null.</param>
  713. /// <param name="pattern">Regular expression to match.</param>
  714. /// <param name="callback">Function called to find out the replacement string.</param>
  715. /// <param name="data">String to search for replacements.</param>
  716. /// <returns>String or array containing strings with replacement performed.</returns>
  717. [ImplementsFunction("preg_replace_callback")]
  718. public static object Replace(ScriptContext/*!*/context, object pattern, PhpCallback callback, object data)
  719. {
  720. int count = Int32.MinValue; // disables counting;
  721. return Replace(context, null, null, pattern, null, callback, data, -1, ref count);
  722. }
  723. /// <summary>
  724. /// <para>Searches <paramref name="data"/> for matches to <paramref name="pattern"/> and the array of matched
  725. /// strings (full pattern match + parenthesized substrings) is passed to <paramref name="callback"/> which
  726. /// returns replacement string.</para>
  727. /// <para><paramref name="pattern"/> and <paramref name="data"/> parameters may be also unidimensional
  728. /// arrays of strings. For the explanation <see cref="Replace"/>.</para>
  729. /// </summary>
  730. /// <param name="context">Current <see cref="ScriptContext"/>. Passed by Phalanger runtime, cannot be null.</param>
  731. /// <param name="pattern">Regular expression to match.</param>
  732. /// <param name="callback">Function called to find out the replacement string.</param>
  733. /// <param name="data">String to search for replacements.</param>
  734. /// <param name="limit">Maximum number of matches replaced. (-1 for no limit)</param>
  735. /// <returns>String or array containing strings with replacement performed.</returns>
  736. [ImplementsFunction("preg_replace_callback")]
  737. public static object Replace(ScriptContext/*!*/context, object pattern, PhpCallback callback, object data, int limit)
  738. {
  739. int count = Int32.MinValue; // disables counting
  740. return Replace(context, null, null, pattern, null, callback, data, limit, ref count);
  741. }
  742. /// <summary>
  743. /// <para>Searches <paramref name="data"/> for matches to <paramref name="pattern"/> and the array of matched
  744. /// strings (full pattern match + parenthesized substrings) is passed to <paramref name="callback"/> which
  745. /// returns replacement string.</para>
  746. /// <para><paramref name="pattern"/> and <paramref name="data"/> parameters may be also unidimensional
  747. /// arrays of strings. For the explanation <see cref="Replace"/>.</para>
  748. /// </summary>
  749. /// <param name="context">Current <see cref="ScriptContext"/>. Passed by Phalanger runtime, cannot be null.</param>
  750. /// <param name="pattern">Regular expression to match.</param>
  751. /// <param name="callback">Function called to find out the replacement string.</param>
  752. /// <param name="data">String to search for replacements.</param>
  753. /// <param name="limit">Maximum number of matches replaced. (-1 for no limit)</param>
  754. /// <param name="count">Number of replacements.</param>
  755. /// <returns>String or array containing strings with replacement performed.</returns>
  756. [ImplementsFunction("preg_replace_callback")]
  757. public static object Replace(ScriptContext/*!*/context, object pattern, PhpCallback callback, object data, int limit, out int count)
  758. {
  759. count = 0;
  760. return Replace(context, null, null, pattern, null, callback, data, limit, ref count);
  761. }
  762. /// <summary>
  763. /// Private mehtod implementing all replace methods. Just one of <paramref name="replacement"/> or <paramref name="callback" /> should be used.
  764. /// </summary>
  765. /// <param name="context">Current <see cref="ScriptContext"/>. Must not be null.</param>
  766. /// <param name="self">Instance of object that called the replace method (replace pattern may contain $this)</param>
  767. /// <param name="definedVariables"></param>
  768. /// <param name="pattern"></param>
  769. /// <param name="replacement"></param>
  770. /// <param name="callback"></param>
  771. /// <param name="data"></param>
  772. /// <param name="limit"></param>
  773. /// <param name="count"></param>
  774. /// <returns>String or an array.</returns>
  775. private static object Replace(ScriptContext/*!*/context, DObject self, Dictionary<string, object> definedVariables, object pattern, object replacement, PhpCallback callback,
  776. object data, int limit, ref int count)
  777. {
  778. // if we have no replacement and no callback, matches are deleted (replaced by an empty string)
  779. if (replacement == null && callback == null)
  780. replacement = String.Empty;
  781. // exactly one of replacement or callback is valid now
  782. Debug.Assert(replacement != null ^ callback != null);
  783. // get eval info if it has been captured - is needed even if we do not need them later
  784. SourceCodeDescriptor descriptor = context.GetCapturedSourceCodeDescriptor();
  785. // PHP's behaviour for undocumented limit range
  786. if (limit < -1)
  787. limit = 0;
  788. PhpArray replacement_array = replacement as PhpArray;
  789. string replacement_string = null;
  790. if (replacement_array == null && replacement != null)
  791. replacement_string = Core.Convert.ObjectToString(replacement);
  792. // we should return new array, if there is an array passed as subject, it should remain unchanged:
  793. object data_copy = PhpVariable.DeepCopy(data);
  794. PhpArray pattern_array = pattern as PhpArray;
  795. if (pattern_array == null)
  796. {
  797. // string pattern
  798. // string replacement
  799. if (replacement_array != null)
  800. {
  801. // string pattern and array replacement not allowed:
  802. PhpException.InvalidArgument("replacement", LibResources.GetString("replacement_array_pattern_not"));
  803. return null;
  804. }
  805. // pattern should be treated as string and therefore replacement too:
  806. return SimpleReplace(self, definedVariables, pattern, replacement_string, callback, data_copy, limit, descriptor, ref count);
  807. }
  808. else if (replacement_array == null)
  809. {
  810. // array pattern
  811. // string replacement
  812. using (var pattern_enumerator = pattern_array.GetFastEnumerator())
  813. while (pattern_enumerator.MoveNext())
  814. {
  815. data_copy = SimpleReplace(self, definedVariables, pattern_enumerator.CurrentValue, replacement_string,
  816. callback, data_copy, limit, descriptor, ref count);
  817. }
  818. }
  819. else //if (replacement_array != null)
  820. {
  821. // array pattern
  822. // array replacement
  823. var replacement_enumerator = replacement_array.GetFastEnumerator();
  824. bool replacement_valid = true;
  825. using (var pattern_enumerator = pattern_array.GetFastEnumerator())
  826. while (pattern_enumerator.MoveNext())
  827. {
  828. // replacements are in array, move to next item and take it if possible, in other case take empty string:
  829. if (replacement_valid && replacement_enumerator.MoveNext())
  830. {
  831. replacement_string = Core.Convert.ObjectToString(replacement_enumerator.CurrentValue);
  832. }
  833. else
  834. {
  835. replacement_string = string.Empty;
  836. replacement_valid = false; // end of replacement_enumerator, do not call MoveNext again!
  837. }
  838. data_copy = SimpleReplace(self, definedVariables, pattern_enumerator.CurrentValue, replacement_string,
  839. callback, data_copy, limit, descriptor, ref count);
  840. }
  841. }
  842. // return resulting array or string assigned to data
  843. return data_copy;
  844. }
  845. /// <summary>
  846. /// Takes a regular expression <paramref name="pattern"/> and one of <paramref name="replacement"/> or
  847. /// <paramref name="callback"/>. Performs replacing on <paramref name="data"/>, which can be
  848. /// <see cref="PhpArray"/>, in other cases it is converted to string.
  849. /// If <paramref name="data"/> is <see cref="PhpArray"/>, every value is converted to string and
  850. /// replacement is performed in place in this array.
  851. /// Either <paramref name="replacement"/> or <paramref name="callback"/> should be null.
  852. /// </summary>
  853. /// <param name="self">Instance of object that called the replace method (replace pattern may contain $this)</param>
  854. /// <param name="definedVariables">Array with local variables - can be used by replace pattern</param>
  855. /// <param name="pattern">Regular expression to search.</param>
  856. /// <param name="replacement">Regular replacement expression. Should be null if callback is specified.</param>
  857. /// <param name="callback">Callback function that should be called to make replacements. Should be null
  858. /// if replacement is specified.</param>
  859. /// <param name="data">Array or string where pattern is searched.</param>
  860. /// <param name="limit">Max count of replacements for each item in subject.</param>
  861. /// <param name="descriptor"><see cref="SourceCodeDescriptor"/> for possible lambda function creation.</param>
  862. /// <param name="count">Cumulated number of replacements.</param>
  863. /// <returns></returns>
  864. private static object SimpleReplace(DObject self, Dictionary<string, object> definedVariables, object pattern,
  865. string replacement, PhpCallback callback, object data, int limit, SourceCodeDescriptor descriptor, ref int count)
  866. {
  867. Debug.Assert(limit >= -1);
  868. // exactly one of replacement or callback is valid:
  869. Debug.Assert(replacement != null ^ callback != null);
  870. PerlRegExpConverter converter = ConvertPattern(pattern, replacement);
  871. if (converter == null) return null;
  872. // get types of data we need:
  873. PhpArray data_array = data as PhpArray;
  874. string data_string = (data_array == null) ? ConvertData(data, converter) : null;
  875. // data comprising of a single string:
  876. if (data_array == null)
  877. {
  878. return ReplaceInternal(self, definedVariables, converter, callback, data_string, limit, descriptor, ref count);
  879. }
  880. else
  881. {
  882. // data is array, process each item:
  883. var enumerator = data_array.GetFastEnumerator();
  884. while (enumerator.MoveNext())
  885. {
  886. enumerator.CurrentValue = ReplaceInternal(self, definedVariables, converter, callback,
  887. ConvertData(enumerator.CurrentValue, converter), limit, descriptor, ref count);
  888. }
  889. enumerator.Dispose();
  890. // return array with items replaced:
  891. return data;
  892. }
  893. }
  894. /// <summary>
  895. /// Replaces <paramref name="limit"/> occurences of substrings.
  896. /// </summary>
  897. /// <param name="converter">
  898. /// Converter used for replacement if <paramref name="callback"/> is <B>null</B>.
  899. /// </param>
  900. /// <param name="self">Instance of object that called the replace method (replace pattern may contain $this)</param>
  901. /// <param name="definedVariables">Array with local variables - can be used by replace pattern</param>
  902. /// <param name="callback">Callback to call for replacement strings.</param>
  903. /// <param name="str">String to search for matches.</param>
  904. /// <param name="limit">Max number of replacements performed.</param>
  905. /// <param name="sourceCodeDesc"><see cref="SourceCodeDescriptor"/> for possible lambda function creation.</param>
  906. /// <param name="count">Cumulated number of replacements.</param>
  907. /// <returns></returns>
  908. private static string ReplaceInternal(DObject self, Dictionary<string, object> definedVariables, PerlRegExpConverter converter, PhpCallback callback,
  909. string str, int limit, SourceCodeDescriptor sourceCodeDesc, ref int count)
  910. {
  911. Debug.Assert(limit >= -1);
  912. if (callback == null)
  913. {
  914. // replace without executing code or counting the number of replacements:
  915. if ((converter.PerlOptions & PerlRegexOptions.Evaluate) == 0 && count < 0)
  916. return converter.Regex.Replace(str, converter.DotNetReplaceExpression, limit);
  917. Evaluator evaluator = new Evaluator(converter.Regex, converter.DotNetReplaceExpression, sourceCodeDesc, self, definedVariables);
  918. MatchEvaluator match_evaluator;
  919. if ((converter.PerlOptions & PerlRegexOptions.Evaluate) != 0)
  920. match_evaluator = new MatchEvaluator(evaluator.ReplaceCodeExecute);
  921. else
  922. match_evaluator = new MatchEvaluator(evaluator.ReplaceCount);
  923. string result = converter.Regex.Replac

Large files files are truncated, but you can click here to view the full file