/3.0/Source/ClassLibrary/RegExpPerl.cs
C# | 3244 lines | 2030 code | 464 blank | 750 comment | 640 complexity | a29104df8fd368c98e16d78023cbfe33 MD5 | raw file
Possible License(s): CPL-1.0, GPL-2.0, CC-BY-SA-3.0, MPL-2.0-no-copyleft-exception, Apache-2.0
Large files files are truncated, but you can click here to view the full file
- /*
-
- Copyright (c) 2004-2006 Pavel Novak and Tomas Matousek.
-
- The use and distribution terms for this software are contained in the file named License.txt,
- which can be found in the root of the Phalanger distribution. By using this software
- in any fashion, you are agreeing to be bound by the terms of this license.
-
- You must not remove this notice from this software.
-
- TODO: preg_match - unmatched groups should be empty only if they are not followed by matched one (isn't it PHP bug?)
- TODO: preg_last_error - Returns the error code of the last PCRE regex execution
-
- */
-
- using System;
- using System.Text;
- using System.Threading;
- using System.Collections;
- using System.Collections.Generic;
- using System.Globalization;
- using System.Text.RegularExpressions;
-
- using PHP.Core;
- using PHP.Core.Reflection;
-
- namespace PHP.Library
- {
- /// <summary>
- /// Perl regular expression specific options that are not captured by .NET <see cref="RegexOptions"/> or by
- /// transformation of the regular expression itself.
- /// </summary>
- [Flags]
- public enum PerlRegexOptions
- {
- None = 0,
- Evaluate = 1,
- Ungreedy = 2,
- Anchored = 4,
- DollarMatchesEndOfStringOnly = 8,
- UTF8 = 16
- }
-
- /// <summary>
- /// Implements PERL extended regular expressions as they are implemented in PHP.
- /// </summary>
- /// <threadsafety static="true"/>
- [ImplementsExtension(LibraryDescriptor.ExtPcre)]
- public static class PerlRegExp
- {
- #region preg_last_error
-
- public enum PregError
- {
- [ImplementsConstant("PREG_NO_ERROR")]
- NoError = 0,
- [ImplementsConstant("PREG_INTERNAL_ERROR")]
- InternalError = 1,
- [ImplementsConstant("PREG_BACKTRACK_LIMIT_ERROR")]
- BacktrackLimitError = 2,
- [ImplementsConstant("PREG_RECURSION_LIMIT_ERROR")]
- RecursionLimitError = 3,
- [ImplementsConstant("PREG_BAD_UTF8_ERROR")]
- BadUtf8Error = 4,
- [ImplementsConstant("PREG_BAD_UTF8_OFFSET_ERROR")]
- BadUtf8OffsetError = 5
- }
-
- public enum PregConst
- {
- [ImplementsConstant("PREG_PATTERN_ORDER")]
- PatternOrder = 1,
- [ImplementsConstant("PREG_SET_ORDER")]
- SetOrder = 2,
- [ImplementsConstant("PREG_OFFSET_CAPTURE")]
- OffsetCapture = 1 << 8,
- [ImplementsConstant("PREG_SPLIT_NO_EMPTY")]
- SplitNoEmpty = 1 << 0,
- [ImplementsConstant("PREG_SPLIT_DELIM_CAPTURE")]
- SplitDelimCapture = 1 << 1,
- [ImplementsConstant("PREG_SPLIT_OFFSET_CAPTURE")]
- SplitOffsetCapture = 1 << 2,
- [ImplementsConstant("PREG_REPLACE_EVAL")]
- ReplaceEval = 1 << 0,
- [ImplementsConstant("PREG_GREP_INVERT")]
- GrepInvert = 1 << 0,
- }
-
- [ImplementsFunction("preg_last_error")]
- public static int LastError()
- {
- return 0;
- }
-
- #endregion
- #region preg_quote
-
- /// <summary>
- /// <para>Escapes all characters that have special meaning in regular expressions. These characters are
- /// . \\ + * ? [ ^ ] $ ( ) { } = ! < > | :</para>
- /// </summary>
- /// <param name="str">String with characters to escape.</param>
- /// <returns>String with escaped characters.</returns>
- [ImplementsFunction("preg_quote")]
- [PureFunction]
- public static string Quote(string str)
- {
- return Quote(str, '\0', false);
- }
-
- /// <summary>
- /// <para>Escapes all characters that have special meaning in regular expressions. These characters are
- /// . \\ + * ? [ ^ ] $ ( ) { } = ! < > | : plus <paramref name="delimiter"/>.</para>
- /// </summary>
- /// <param name="str">String with characters to escape.</param>
- /// <param name="delimiter">Character to escape in additon to general special characters.</param>
- /// <returns>String with escaped characters.</returns>
- [ImplementsFunction("preg_quote")]
- [PureFunction]
- public static string Quote(string str, string delimiter)
- {
- bool delimiter_used = true;
- if (delimiter == null || delimiter.Length == 0)
- delimiter_used = false;
-
- return Quote(str, delimiter_used ? delimiter[0] : '\0', delimiter_used);
- }
-
- /// <summary>
- /// Escapes all characters with special meaning in Perl regular expressions and char
- /// <paramref name="delimiter"/>.
- /// </summary>
- /// <param name="str">String to quote.</param>
- /// <param name="delimiter">Additional character to quote.</param>
- /// <param name="delimiterUsed">Whether the delimiter should be quoted.</param>
- /// <returns>String with quoted characters.</returns>
- internal static string Quote(string str, char delimiter, bool delimiterUsed)
- {
- if (str == null)
- return null;
-
- StringBuilder result = new StringBuilder();
-
- for (int i = 0; i < str.Length; i++)
- {
- bool escape = false;
-
- if (delimiterUsed && str[i] == delimiter)
- escape = true;
- else
- // switch only if true is not set already
- switch (str[i])
- {
- case '\\':
- case '+':
- case '*':
- case '?':
- case '[':
- case '^':
- case ']':
- case '$':
- case '(':
- case ')':
- case '{':
- case '}':
- case '=':
- case '!':
- case '<':
- case '>':
- case '|':
- case ':':
- case '.':
- escape = true;
- break;
- }
-
- if (escape)
- result.Append('\\');
-
- result.Append(str[i]);
- }
-
- return result.ToString();
- }
-
- #endregion
-
- #region preg_grep
-
- /// <summary>
- /// Flags for Grep functions.
- /// </summary>
- [Flags]
- public enum GrepFlags
- {
- None = 0,
- [ImplementsConstant("PREG_GREP_INVERT")]
- GrepInvert = 1
- }
-
- /// <summary>
- /// Returns the array consisting of the elements of the <paramref name="input"/> array that match
- /// the given <paramref name="pattern"/>.
- /// </summary>
- /// <param name="pattern">Pattern to be matched against each array element.</param>
- /// <param name="input">Array of strings to match.</param>
- /// <returns>Array containing only values from <paramref name="input"/> that match <paramref name="pattern"/>
- /// </returns>
- [ImplementsFunction("preg_grep")]
- public static PhpArray Grep(object pattern, PhpArray input)
- {
- return Grep(pattern, input, GrepFlags.None);
- }
-
- /// <summary>
- /// <para>Returns the array consisting of the elements of the <paramref name="input"/> array that match
- /// the given <paramref name="pattern"/>.</para>
- /// <para>If <see cref="GrepFlags.GrepInvert"/> flag is specified, resulting array will contain
- /// elements that do not match the <paramref name="pattern"/>.</para>
- /// </summary>
- /// <param name="pattern">Pattern to be matched against each array element.</param>
- /// <param name="input">Array of strings to match.</param>
- /// <param name="flags">Flags modifying which elements contains resulting array.</param>
- /// <returns>Array containing only values from <paramref name="input"/> that match <paramref name="pattern"/>.
- /// (Or do not match according to <paramref name="flags"/> specified.)</returns>
- [ImplementsFunction("preg_grep")]
- public static PhpArray Grep(object pattern, PhpArray input, GrepFlags flags)
- {
- if (input == null)
- return null;
-
- PerlRegExpConverter converter = ConvertPattern(pattern, null);
- if (converter == null) return null;
-
- PhpArray result = new PhpArray();
- foreach (KeyValuePair<IntStringKey, object> entry in input)
- {
- string str = ConvertData(entry.Value, converter);
- Match m = converter.Regex.Match(str);
-
- // move a copy to return array if success and not invert or
- // not success and invert
- if (m.Success ^ (flags & GrepFlags.GrepInvert) != 0)
- result.Add(entry.Key, str);
- }
-
- return result;
- }
-
- #endregion
-
- #region preg_match, preg_match_all
-
- /// <summary>
- /// Flags for Match function family.
- /// </summary>
- /// <remarks>
- /// MatchFlags used by pre_match PHP functions is a hybrid enumeration.
- /// PatternOrder and SetOrder flags are mutually exclusive but OffsetCapture may be added by bitwise | operator.
- /// Moreover, PatternOrder is a default value used by these functions, so it can be equal to 0.
- /// (This confusing declaration is done by PHP authors.)
- /// </remarks>
- [Flags]
- public enum MatchFlags
- {
- [ImplementsConstant("PREG_PATTERN_ORDER")]
- PatternOrder = 1,
- [ImplementsConstant("PREG_SET_ORDER")]
- SetOrder = 2,
- [ImplementsConstant("PREG_OFFSET_CAPTURE")]
- OffsetCapture = 0x100
- }
-
- /// <summary>
- /// Searches <paramref name="data"/> for a match to the regular expression given in <paramref name="pattern"/>.
- /// The search is stopped after the first match is found.
- /// </summary>
- /// <param name="pattern">Perl regular expression.</param>
- /// <param name="data">String to search.</param>
- /// <returns>0 if there is no match and 1 if the match was found.</returns>
- [ImplementsFunction("preg_match")]
- [return: CastToFalse]
- public static int Match(object pattern, object data)
- {
- PerlRegExpConverter converter = ConvertPattern(pattern, null);
- if (converter == null) return -1;
-
- string str = ConvertData(data, converter);
- Match match = converter.Regex.Match(str);
- return match.Success ? 1 : 0;
- }
-
- /// <summary>
- /// <para>Searches <paramref name="data"/> for a match to the regular expression given in
- /// <paramref name="pattern"/>. The search is stopped after the first match is found.</para>
- /// <para><paramref name="matches"/> contains an array with matches. At index 0 is the whole string that
- /// matches the <paramref name="pattern"/>, from index 1 are stored matches for parenthesized subpatterns.</para>
- /// </summary>
- /// <param name="pattern">Perl regular expression.</param>
- /// <param name="data">String or string of bytes to search.</param>
- /// <param name="matches">Array containing matched strings.</param>
- /// <returns>0 if there is no match and 1 if the match was found.</returns>
- [ImplementsFunction("preg_match")]
- [return: CastToFalse]
- public static int Match(object pattern, object data, out PhpArray matches)
- {
- return Match(pattern, data, out matches, MatchFlags.PatternOrder, 0, false);
- }
-
- /// <summary>
- /// <para>Searches <paramref name="data"/> for a match to the regular expression given in
- /// <paramref name="pattern"/>. The search is stopped after the first match is found.</para>
- /// <para><paramref name="matches"/> contains an array with matches. At index 0 is the whole string that
- /// matches the <paramref name="pattern"/>, from index 1 are stored matches for parenthesized subpatterns.</para>
- /// <para>Flag <see cref="MatchFlags.OffsetCapture"/> can be specified and it means that the
- /// <paramref name="matches"/> array will not contain substrings, but another array where the substring
- /// is stored at index [0] and index [1] is its offset in <paramref name="data"/>.</para>
- /// </summary>
- /// <param name="pattern">Perl regular expression.</param>
- /// <param name="data">String to search.</param>
- /// <param name="matches">Array containing matched strings.</param>
- /// <param name="flags"><see cref="MatchFlags"/>.</param>
- /// <returns>0 if there is no match and 1 if the match was found.</returns>
- [ImplementsFunction("preg_match")]
- [return: CastToFalse]
- public static int Match(object pattern, object data, out PhpArray matches, MatchFlags flags)
- {
- return Match(pattern, data, out matches, flags, 0, false);
- }
-
- /// <summary>
- /// <para>Searches <paramref name="data"/> for a match to the regular expression given in
- /// <paramref name="pattern"/>. The search is stopped after the first match is found.</para>
- /// <para><paramref name="matches"/> contains an array with matches. At index 0 is the whole string that
- /// matches the <paramref name="pattern"/>, from index 1 are stored matches for parenthesized subpatterns.</para>
- /// <para>Flag <see cref="MatchFlags.OffsetCapture"/> can be specified and it means that the
- /// <paramref name="matches"/> array will not contain substrings, but another array where the substring
- /// is stored at index [0] and index [1] is its offset in <paramref name="data"/>. <paramref name="offset"/>
- /// specifies where the search should start. (Note that it is not the same as passing a substring of
- /// <paramref name="data"/>.)</para>
- /// </summary>
- /// <param name="pattern">Perl regular expression.</param>
- /// <param name="data">String or string of bytes to search.</param>
- /// <param name="matches">Array containing matched strings.</param>
- /// <param name="flags"><see cref="MatchFlags"/>.</param>
- /// <param name="offset">Offset to <paramref name="data"/> where the match should start.</param>
- /// <returns>0 if there is no match and 1 if the match was found.</returns>
- [ImplementsFunction("preg_match")]
- [return: CastToFalse]
- public static int Match(object pattern, object data, out PhpArray matches, MatchFlags flags, int offset)
- {
- return Match(pattern, data, out matches, flags, offset, false);
- }
-
- /// <summary>
- /// <para>Searches <paramref name="data"/> for all matches to the regular expression given in pattern and puts
- /// them in <paramref name="matches"/> array. The matches are sorted in "Pattern Order" i. e. at zero
- /// index is an array containing whole matches, at first index is an array containing number 1 subpatterns
- /// for all matches etc.</para>
- /// <para>Next match search starts just after the previous match.</para>
- /// </summary>
- /// <param name="pattern">Regular expression.</param>
- /// <param name="data">String or string of bytes to search.</param>
- /// <param name="matches">Output array containing matches found.</param>
- /// <returns>Number of whole matches.</returns>
- [ImplementsFunction("preg_match_all")]
- [return: CastToFalse]
- public static int MatchAll(object pattern, object data, out PhpArray matches)
- {
- return Match(pattern, data, out matches, MatchFlags.PatternOrder, 0, true);
- }
-
- /// <summary>
- /// <para>Searches <paramref name="data"/> for all matches to the regular expression given in pattern and puts
- /// them in <paramref name="matches"/> array. The matches are sorted in "Pattern Order" i. e. at zero
- /// index is an array containing whole matches, at first index is an array containing number 1 subpatterns
- /// for all matches etc.</para>
- /// <para>Next match search starts just after the previous match.</para>
- /// <para>If <see cref="MatchFlags.PatternOrder"/> flag is specified, <paramref name="matches"/> array
- /// contains an array of full pattern matches at index 0, an array of strings matched to
- /// first parenthesized substring at index 1 etc. If <see cref="MatchFlags.SetOrder"/> is set, at index 0 is the first
- /// set of matches (full match and substrings), at index 1 full set for second match etc.</para>
- /// <para>Flag <see cref="MatchFlags.OffsetCapture"/> indicates that instead the matched substring should
- /// be an array containing the substring at index 0 and position at original string at index 1.</para>
- /// </summary>
- /// <param name="pattern">Regular expression.</param>
- /// <param name="data">String or string of bytes to search.</param>
- /// <param name="matches">Output array containing matches found.</param>
- /// <param name="flags">Flags for specifying order of results in <paramref name="matches"/> array (Set Order,
- /// Pattern Order) and whether positions of matches should be added to results (Offset Capture).</param>
- /// <returns>Number of whole matches.</returns>
- [ImplementsFunction("preg_match_all")]
- [return: CastToFalse]
- public static int MatchAll(object pattern, object data, out PhpArray matches, MatchFlags flags)
- {
- return Match(pattern, data, out matches, flags, 0, true);
- }
-
- /// <summary>
- /// <para>Searches <paramref name="data"/> for all matches to the regular expression given in pattern and puts
- /// them in <paramref name="matches"/> array. The matches are sorted in "Pattern Order" i. e. at zero
- /// index is an array containing whole matches, at first index is an array containing number 1 subpatterns
- /// for all matches etc.</para>
- /// <para>Next match search starts just after the previous match.</para>
- /// <para>If <see cref="MatchFlags.PatternOrder"/> flag is specified, <paramref name="matches"/> array
- /// contains at index 0 an array of full pattern matches, at index 1 is an array of strings matched to
- /// first parenthesized substring etc. If <see cref="MatchFlags.SetOrder"/> is set, at index 0 is the first
- /// set of matches (full match and substrings), at index 1 full set for second match etc.</para>
- /// <para>Flag <see cref="MatchFlags.OffsetCapture"/> indicates that instead the matched substring should
- /// be an array containing the substring at index 0 and position at original string at index 1.</para>
- /// </summary>
- /// <param name="pattern">Regular expression.</param>
- /// <param name="data">String or string of bytes to search.</param>
- /// <param name="matches">Output array containing matches found.</param>
- /// <param name="flags">Flags for specifying order of results in <paramref name="matches"/> array (Set Order,
- /// Pattern Order) and whether positions of matches should be added to results (Offset Capture).</param>
- /// <param name="offset">Offset in <paramref name="data"/> where the search should begin. Note that it is
- /// not equal to passing an substring as this parameter because of ^ (start of the string or line) modifier.
- /// </param>
- /// <returns>Number of whole matches.</returns>
- [ImplementsFunction("preg_match_all")]
- [return: CastToFalse]
- public static int MatchAll(object pattern, object data, out PhpArray matches, MatchFlags flags, int offset)
- {
- return Match(pattern, data, out matches, flags, offset, true);
- }
-
- /// <summary>
- /// Private method implementing functions from match family.
- /// </summary>
- /// <param name="pattern">Perl regular expression match pattern.</param>
- /// <param name="data">String to search matches.</param>
- /// <param name="matches">An array containing matches found.</param>
- /// <param name="flags">Flags for searching.</param>
- /// <param name="offset">Offset to <paramref name="pattern"/> where the search should start.</param>
- /// <param name="matchAll"><B>True</B> if all matches should be found, <B>false</B> if only the first
- /// is enough.</param>
- /// <returns>Number of times the <paramref name="pattern"/> matches.</returns>
- private static int Match(object pattern, object data, out PhpArray matches, MatchFlags flags,
- int offset, bool matchAll)
- {
- // these two flags together do not make sense
- if ((flags & MatchFlags.PatternOrder) != 0 && (flags & MatchFlags.SetOrder) != 0)
- {
- PhpException.InvalidArgument("flags", LibResources.GetString("preg_match_pattern_set_order"));
- matches = null;
- return -1;
- }
-
- PerlRegExpConverter converter = ConvertPattern(pattern, null);
- if (converter == null)
- {
- matches = new PhpArray();
- return -1;
- }
-
- string converted = ConvertData(data, converter);
- Match m = converter.Regex.Match(converted, offset>converted.Length?converted.Length:offset);
-
- if ((converter.PerlOptions & PerlRegexOptions.Anchored) > 0 && m.Success && m.Index != offset)
- {
- matches = new PhpArray();
- return -1;
- }
-
- if (m.Success)
- {
- if (!matchAll || (flags & MatchFlags.PatternOrder) != 0)
- {
- matches = new PhpArray(m.Groups.Count);
- }
- else
- matches = new PhpArray();
-
- if (!matchAll)
- {
- // Preg numbers groups sequentially, both named and unnamed.
- // .Net only numbers unnamed groups.
- // So we name unnamed groups (see ConvertRegex) to map correctly.
- int lastSuccessfulGroupIndex = GetLastSuccessfulGroup(m.Groups);
- var indexGroups = new List<Group>(m.Groups.Count);
- var groupNameByIndex = new Dictionary<int, string>(m.Groups.Count);
- for (int i = 0; i <= lastSuccessfulGroupIndex; i++)
- {
- // All groups should be named.
- var groupName = GetGroupName(converter.Regex, i);
-
- if (!string.IsNullOrEmpty(groupName))
- {
- matches[groupName] = NewArrayItem(m.Groups[i].Value, m.Groups[i].Index, (flags & MatchFlags.OffsetCapture) != 0);
- }
-
- matches[i] = NewArrayItem(m.Groups[i].Value, m.Groups[i].Index, (flags & MatchFlags.OffsetCapture) != 0);
- }
-
- return 1;
- }
-
- // store all other matches in PhpArray matches
- if ((flags & MatchFlags.SetOrder) != 0) // cannot test PatternOrder, it is 0, SetOrder must be tested
- return FillMatchesArrayAllSetOrder(converter.Regex, m, ref matches, (flags & MatchFlags.OffsetCapture) != 0);
- else
- return FillMatchesArrayAllPatternOrder(converter.Regex, m, ref matches, (flags & MatchFlags.OffsetCapture) != 0);
- }
-
- // no match has been found
- if (matchAll && (flags & MatchFlags.SetOrder) == 0)
- {
- // in that case PHP returns an array filled with empty arrays according to parentheses count
- matches = new PhpArray(m.Groups.Count);
- for (int i = 0; i < converter.Regex.GetGroupNumbers().Length; i++)
- {
- AddGroupNameToResult(converter.Regex, matches, i, (ms,groupName) =>
- {
- ms[groupName] = new PhpArray(0);
- });
-
- matches[i] = new PhpArray(0);
- }
- }
- else
- {
- matches = new PhpArray(0); // empty array
- }
-
- return 0;
- }
-
- private static string GetGroupName(Regex regex, int index)
- {
- var groupName = regex.GroupNameFromNumber(index);
- if (groupName.StartsWith(PerlRegExpConverter.AnonymousGroupPrefix))
- {
- // Anonymous groups: remove it altogether. Its purpose was to order it correctly.
- Debug.Assert(groupName.Substring(PerlRegExpConverter.AnonymousGroupPrefix.Length) == index.ToString(CultureInfo.InvariantCulture));
- groupName = string.Empty;
- }
- else
- if (groupName[0] != PerlRegExpConverter.GroupPrefix)
- {
- // Indexed groups. Leave as-is.
- Debug.Assert(groupName == index.ToString(CultureInfo.InvariantCulture));
- groupName = string.Empty;
- }
- else
- {
- // Named groups: remove prefix.
- groupName = (groupName[0] == PerlRegExpConverter.GroupPrefix ? groupName.Substring(1) : groupName);
- }
-
- return groupName;
- }
-
- #endregion
-
- #region preg_split
-
- /// <summary>
- /// Flags for split functions family.
- /// </summary>
- [Flags]
- public enum SplitFlags
- {
- None = 0,
- [ImplementsConstant("PREG_SPLIT_NO_EMPTY")]
- NoEmpty = 1,
- [ImplementsConstant("PREG_SPLIT_DELIM_CAPTURE")]
- DelimCapture = 2,
- [ImplementsConstant("PREG_SPLIT_OFFSET_CAPTURE")]
- OffsetCapture = 4
- }
-
- /// <summary>
- /// Splits <paramref name="data"/> along boundaries matched by <paramref name="pattern"/> and returns
- /// an array containing substrings.
- /// </summary>
- /// <param name="pattern">Regular expression to match to boundaries.</param>
- /// <param name="data">String string of bytes to split.</param>
- /// <returns>An array containing substrings.</returns>
- [ImplementsFunction("preg_split")]
- public static PhpArray Split(object pattern, object data)
- {
- return Split(pattern, data, -1, SplitFlags.None);
- }
-
- /// <summary>
- /// <para>Splits <paramref name="data"/> along boundaries matched by <paramref name="pattern"/> and returns
- /// an array containing substrings.</para>
- /// <para><paramref name="limit"/> specifies the maximum number of strings returned in the resulting
- /// array. If (limit-1) matches is found and there remain some characters to match whole remaining
- /// string is returned as the last element of the array.</para>
- /// </summary>
- /// <param name="pattern">Regular expression to match to boundaries.</param>
- /// <param name="data">String string of bytes to split.</param>
- /// <param name="limit">Max number of elements in the resulting array.</param>
- /// <returns>An array containing substrings.</returns>
- [ImplementsFunction("preg_split")]
- public static PhpArray Split(object pattern, object data, int limit)
- {
- return Split(pattern, data, limit, SplitFlags.None);
- }
-
- /// <summary>
- /// <para>Splits <paramref name="data"/> along boundaries matched by <paramref name="pattern"/> and returns
- /// an array containing substrings.</para>
- /// <para><paramref name="limit"/> specifies the maximum number of strings returned in the resulting
- /// array. If (limit-1) matches is found and there remain some characters to match whole remaining
- /// string is returned as the last element of the array.</para>
- /// <para>Some flags may be specified. <see cref="SplitFlags.NoEmpty"/> means no empty strings will be
- /// in the resulting array. <see cref="SplitFlags.DelimCapture"/> adds also substrings matching
- /// the delimiter and <see cref="SplitFlags.OffsetCapture"/> returns instead substrings the arrays
- /// containing appropriate substring at index 0 and the offset of this substring in original
- /// <paramref name="data"/> at index 1.</para>
- /// </summary>
- /// <param name="pattern">Regular expression to match to boundaries.</param>
- /// <param name="data">String or string of bytes to split.</param>
- /// <param name="limit">Max number of elements in the resulting array.</param>
- /// <param name="flags">Flags affecting the returned array.</param>
- /// <returns>An array containing substrings.</returns>
- [ImplementsFunction("preg_split")]
- public static PhpArray Split(object pattern, object data, int limit, SplitFlags flags)
- {
- if (limit == 0) // 0 does not make sense, php's behavior is as it is -1
- limit = -1;
- if (limit < -1) // for all other negative values it seems that is as limit == 1
- limit = 1;
-
- PerlRegExpConverter converter = ConvertPattern(pattern, null);
- if (converter == null) return null;
-
- string str = ConvertData(data, converter);
- Match m = converter.Regex.Match(str);
-
- bool offset_capture = (flags & SplitFlags.OffsetCapture) != 0;
- PhpArray result = new PhpArray();
- int last_index = 0;
-
- while (m.Success && (limit == -1 || --limit > 0) && last_index < str.Length)
- {
- // add part before match
- int length = m.Index - last_index;
- if (length > 0 || (flags & SplitFlags.NoEmpty) == 0)
- result.Add(NewArrayItem(str.Substring(last_index, length), last_index, offset_capture));
-
- if (m.Value.Length > 0)
- {
- if ((flags & SplitFlags.DelimCapture) != 0) // add all captures but not whole pattern match (start at 1)
- {
- List<object> lastUnsucessfulGroups = null; // value of groups that was not successful since last succesful one
- for (int i = 1; i < m.Groups.Count; i++)
- {
- Group g = m.Groups[i];
- if (g.Length > 0 || (flags & SplitFlags.NoEmpty) == 0)
- {
- // the value to be added into the result:
- object value = NewArrayItem(g.Value, g.Index, offset_capture);
-
- if (g.Success)
- {
- // group {i} was matched:
- // if there was some unsuccesfull matches before, add them now:
- if (lastUnsucessfulGroups != null && lastUnsucessfulGroups.Count > 0)
- {
- foreach (var x in lastUnsucessfulGroups)
- result.Add(x);
- lastUnsucessfulGroups.Clear();
- }
- // add the matched group:
- result.Add(value);
- }
- else
- {
- // The match was unsuccesful, remember all the unsuccesful matches
- // and add them only if some succesful match will follow.
- // In PHP, unsuccessfully matched groups are trimmed by the end
- // (regexp processing stops when other groups cannot be matched):
- if (lastUnsucessfulGroups == null) lastUnsucessfulGroups = new List<object>();
- lastUnsucessfulGroups.Add(value);
- }
- }
- }
- }
-
- last_index = m.Index + m.Length;
- }
- else // regular expression match an empty string => add one character
- {
- // always not empty
- result.Add(NewArrayItem(str.Substring(last_index, 1), last_index, offset_capture));
- last_index++;
- }
-
- m = m.NextMatch();
- }
-
- // add remaining string (might be empty)
- if (last_index < str.Length || (flags & SplitFlags.NoEmpty) == 0)
- result.Add(NewArrayItem(str.Substring(last_index), last_index, offset_capture));
-
- return result;
- }
-
- #endregion
-
- #region preg_replace, preg_replace_callback
-
- /// <summary>
- /// <para>Searches <paramref name="data"/> for matches to <paramref name="pattern"/> and replaces them
- /// with <paramref name="replacement"/>. <paramref name="replacement"/> may contain backreferences
- /// of the form of <I>\\n</I> or <I>$n</I> (second one preferred).</para>
- /// <para>Every parameter may be an unidimensional array of strings. If <paramref name="data"/> is
- /// an array, replacement is done on every element and return value is an array as well. If
- /// <paramref name="pattern"/> and <paramref name="replacement"/> are arrays, the replacements are processed
- /// in the order the keys appear in the array. If only <paramref name="pattern"/> is an array, the
- /// replacement string is used for every key in the <paramref name="pattern"/>.</para>
- /// </summary>
- /// <param name="context">Current <see cref="ScriptContext"/>. Passed by Phalanger runtime, cannot be null.</param>
- /// <param name="self">Instance of object that called the replace method (replace pattern may contain $this).</param>
- /// <param name="definedVariables"></param>
- /// <param name="pattern">Regular expression to match.</param>
- /// <param name="replacement">Replacement string.</param>
- /// <param name="data">String to search for replacements.</param>
- /// <returns>String or array containing strings with replacement performed.</returns>
- [ImplementsFunction("preg_replace", FunctionImplOptions.CaptureEvalInfo | FunctionImplOptions.NeedsVariables | FunctionImplOptions.NeedsThisReference)]
- public static object Replace(ScriptContext/*!*/context, DObject self, Dictionary<string, object> definedVariables,
- object pattern, object replacement, object data)
- {
- int count = Int32.MinValue; // disables counting
- return Replace(context, self, definedVariables, pattern, replacement, null, data, -1, ref count);
- }
-
- /// <summary>
- /// <para>Searches <paramref name="data"/> for matches to <paramref name="pattern"/> and replaces them
- /// with <paramref name="replacement"/>. <paramref name="replacement"/> may contain backreferences
- /// of the form of <I>\\n</I> or <I>$n</I> (second one preferred).</para>
- /// <para>Every parameter may be an unidimensional array of strings. If <paramref name="data"/> is
- /// an array, replacement is done on every element and return value is an array as well. If
- /// <paramref name="pattern"/> and <paramref name="replacement"/> are arrays, the replacements are processed
- /// in the order the keys appear in the array. If only <paramref name="pattern"/> is an array, the
- /// replacement string is used for every key in the <paramref name="pattern"/>.</para>
- /// </summary>
- /// <param name="context">Current <see cref="ScriptContext"/>. Passed by Phalanger runtime, cannot be null.</param>
- /// <param name="self">Instance of object that called the replace method (replace pattern may contain $this)</param>
- /// <param name="definedVariables"></param>
- /// <param name="pattern">Regular expression to match.</param>
- /// <param name="replacement">Replacement string.</param>
- /// <param name="data">String to search for replacements.</param>
- /// <param name="limit">Maximum number of matches replaced. (-1 for no limit)</param>
- /// <returns>String or array containing strings with replacement performed.</returns>
- [ImplementsFunction("preg_replace", FunctionImplOptions.CaptureEvalInfo | FunctionImplOptions.NeedsVariables | FunctionImplOptions.NeedsThisReference)]
- public static object Replace(ScriptContext/*!*/context, DObject self, Dictionary<string, object> definedVariables,
- object pattern, object replacement, object data, int limit)
- {
- int count = Int32.MinValue; // disables counting
- return Replace(context, self, definedVariables, pattern, replacement, null, data, limit, ref count);
- }
-
- /// <summary>
- /// <para>Searches <paramref name="data"/> for matches to <paramref name="pattern"/> and replaces them
- /// with <paramref name="replacement"/>. <paramref name="replacement"/> may contain backreferences
- /// of the form of <I>\\n</I> or <I>$n</I> (second one preferred).</para>
- /// <para>Every parameter may be an unidimensional array of strings. If <paramref name="data"/> is
- /// an array, replacement is done on every element and return value is an array as well. If
- /// <paramref name="pattern"/> and <paramref name="replacement"/> are arrays, the replacements are processed
- /// in the order the keys appear in the array. If only <paramref name="pattern"/> is an array, the
- /// replacement string is used for every key in the <paramref name="pattern"/>.</para>
- /// </summary>
- /// <param name="context">Current <see cref="ScriptContext"/>. Passed by Phalanger runtime, cannot be null.</param>
- /// <param name="self">Instance of object that called the replace method (replace pattern may contain $this)</param>
- /// <param name="definedVariables"></param>
- /// <param name="pattern">Regular expression to match.</param>
- /// <param name="replacement">Replacement string.</param>
- /// <param name="data">String to search for replacements.</param>
- /// <param name="limit">Maximum number of matches replaced. (-1 for no limit)</param>
- /// <param name="count">Number of replacements.</param>
- /// <returns>String or array containing strings with replacement performed.</returns>
- [ImplementsFunction("preg_replace", FunctionImplOptions.CaptureEvalInfo | FunctionImplOptions.NeedsVariables | FunctionImplOptions.NeedsThisReference)]
- public static object Replace(ScriptContext/*!*/context, DObject self, Dictionary<string, object> definedVariables,
- object pattern, object replacement, object data, int limit, out int count)
- {
- count = 0;
- return Replace(context, self, definedVariables, pattern, replacement, null, data, limit, ref count);
- }
-
- /// <summary>
- /// <para>Searches <paramref name="data"/> for matches to <paramref name="pattern"/> and the array of matched
- /// strings (full pattern match + parenthesized substrings) is passed to <paramref name="callback"/> which
- /// returns replacement string.</para>
- /// <para><paramref name="pattern"/> and <paramref name="data"/> parameters may be also unidimensional
- /// arrays of strings. For the explanation <see cref="Replace"/>.</para>
- /// </summary>
- /// <param name="context">Current <see cref="ScriptContext"/>. Passed by Phalanger runtime, cannot be null.</param>
- /// <param name="pattern">Regular expression to match.</param>
- /// <param name="callback">Function called to find out the replacement string.</param>
- /// <param name="data">String to search for replacements.</param>
- /// <returns>String or array containing strings with replacement performed.</returns>
- [ImplementsFunction("preg_replace_callback")]
- public static object Replace(ScriptContext/*!*/context, object pattern, PhpCallback callback, object data)
- {
- int count = Int32.MinValue; // disables counting;
- return Replace(context, null, null, pattern, null, callback, data, -1, ref count);
- }
-
- /// <summary>
- /// <para>Searches <paramref name="data"/> for matches to <paramref name="pattern"/> and the array of matched
- /// strings (full pattern match + parenthesized substrings) is passed to <paramref name="callback"/> which
- /// returns replacement string.</para>
- /// <para><paramref name="pattern"/> and <paramref name="data"/> parameters may be also unidimensional
- /// arrays of strings. For the explanation <see cref="Replace"/>.</para>
- /// </summary>
- /// <param name="context">Current <see cref="ScriptContext"/>. Passed by Phalanger runtime, cannot be null.</param>
- /// <param name="pattern">Regular expression to match.</param>
- /// <param name="callback">Function called to find out the replacement string.</param>
- /// <param name="data">String to search for replacements.</param>
- /// <param name="limit">Maximum number of matches replaced. (-1 for no limit)</param>
- /// <returns>String or array containing strings with replacement performed.</returns>
- [ImplementsFunction("preg_replace_callback")]
- public static object Replace(ScriptContext/*!*/context, object pattern, PhpCallback callback, object data, int limit)
- {
- int count = Int32.MinValue; // disables counting
- return Replace(context, null, null, pattern, null, callback, data, limit, ref count);
- }
-
- /// <summary>
- /// <para>Searches <paramref name="data"/> for matches to <paramref name="pattern"/> and the array of matched
- /// strings (full pattern match + parenthesized substrings) is passed to <paramref name="callback"/> which
- /// returns replacement string.</para>
- /// <para><paramref name="pattern"/> and <paramref name="data"/> parameters may be also unidimensional
- /// arrays of strings. For the explanation <see cref="Replace"/>.</para>
- /// </summary>
- /// <param name="context">Current <see cref="ScriptContext"/>. Passed by Phalanger runtime, cannot be null.</param>
- /// <param name="pattern">Regular expression to match.</param>
- /// <param name="callback">Function called to find out the replacement string.</param>
- /// <param name="data">String to search for replacements.</param>
- /// <param name="limit">Maximum number of matches replaced. (-1 for no limit)</param>
- /// <param name="count">Number of replacements.</param>
- /// <returns>String or array containing strings with replacement performed.</returns>
- [ImplementsFunction("preg_replace_callback")]
- public static object Replace(ScriptContext/*!*/context, object pattern, PhpCallback callback, object data, int limit, out int count)
- {
- count = 0;
- return Replace(context, null, null, pattern, null, callback, data, limit, ref count);
- }
-
- /// <summary>
- /// Private mehtod implementing all replace methods. Just one of <paramref name="replacement"/> or <paramref name="callback" /> should be used.
- /// </summary>
- /// <param name="context">Current <see cref="ScriptContext"/>. Must not be null.</param>
- /// <param name="self">Instance of object that called the replace method (replace pattern may contain $this)</param>
- /// <param name="definedVariables"></param>
- /// <param name="pattern"></param>
- /// <param name="replacement"></param>
- /// <param name="callback"></param>
- /// <param name="data"></param>
- /// <param name="limit"></param>
- /// <param name="count"></param>
- /// <returns>String or an array.</returns>
- private static object Replace(ScriptContext/*!*/context, DObject self, Dictionary<string, object> definedVariables, object pattern, object replacement, PhpCallback callback,
- object data, int limit, ref int count)
- {
- // if we have no replacement and no callback, matches are deleted (replaced by an empty string)
- if (replacement == null && callback == null)
- replacement = String.Empty;
-
- // exactly one of replacement or callback is valid now
- Debug.Assert(replacement != null ^ callback != null);
-
- // get eval info if it has been captured - is needed even if we do not need them later
- SourceCodeDescriptor descriptor = context.GetCapturedSourceCodeDescriptor();
-
- // PHP's behaviour for undocumented limit range
- if (limit < -1)
- limit = 0;
-
- PhpArray replacement_array = replacement as PhpArray;
-
- string replacement_string = null;
- if (replacement_array == null && replacement != null)
- replacement_string = Core.Convert.ObjectToString(replacement);
-
- // we should return new array, if there is an array passed as subject, it should remain unchanged:
- object data_copy = PhpVariable.DeepCopy(data);
-
- PhpArray pattern_array = pattern as PhpArray;
- if (pattern_array == null)
- {
- // string pattern
- // string replacement
-
- if (replacement_array != null)
- {
- // string pattern and array replacement not allowed:
- PhpException.InvalidArgument("replacement", LibResources.GetString("replacement_array_pattern_not"));
- return null;
- }
-
- // pattern should be treated as string and therefore replacement too:
- return SimpleReplace(self, definedVariables, pattern, replacement_string, callback, data_copy, limit, descriptor, ref count);
- }
- else if (replacement_array == null)
- {
- // array pattern
- // string replacement
-
- using (var pattern_enumerator = pattern_array.GetFastEnumerator())
- while (pattern_enumerator.MoveNext())
- {
- data_copy = SimpleReplace(self, definedVariables, pattern_enumerator.CurrentValue, replacement_string,
- callback, data_copy, limit, descriptor, ref count);
- }
- }
- else //if (replacement_array != null)
- {
- // array pattern
- // array replacement
-
- var replacement_enumerator = replacement_array.GetFastEnumerator();
- bool replacement_valid = true;
-
- using (var pattern_enumerator = pattern_array.GetFastEnumerator())
- while (pattern_enumerator.MoveNext())
- {
- // replacements are in array, move to next item and take it if possible, in other case take empty string:
- if (replacement_valid && replacement_enumerator.MoveNext())
- {
- replacement_string = Core.Convert.ObjectToString(replacement_enumerator.CurrentValue);
- }
- else
- {
- replacement_string = string.Empty;
- replacement_valid = false; // end of replacement_enumerator, do not call MoveNext again!
- }
-
- data_copy = SimpleReplace(self, definedVariables, pattern_enumerator.CurrentValue, replacement_string,
- callback, data_copy, limit, descriptor, ref count);
- }
- }
-
- // return resulting array or string assigned to data
- return data_copy;
- }
-
- /// <summary>
- /// Takes a regular expression <paramref name="pattern"/> and one of <paramref name="replacement"/> or
- /// <paramref name="callback"/>. Performs replacing on <paramref name="data"/>, which can be
- /// <see cref="PhpArray"/>, in other cases it is converted to string.
- /// If <paramref name="data"/> is <see cref="PhpArray"/>, every value is converted to string and
- /// replacement is performed in place in this array.
- /// Either <paramref name="replacement"/> or <paramref name="callback"/> should be null.
- /// </summary>
- /// <param name="self">Instance of object that called the replace method (replace pattern may contain $this)</param>
- /// <param name="definedVariables">Array with local variables - can be used by replace pattern</param>
- /// <param name="pattern">Regular expression to search.</param>
- /// <param name="replacement">Regular replacement expression. Should be null if callback is specified.</param>
- /// <param name="callback">Callback function that should be called to make replacements. Should be null
- /// if replacement is specified.</param>
- /// <param name="data">Array or string where pattern is searched.</param>
- /// <param name="limit">Max count of replacements for each item in subject.</param>
- /// <param name="descriptor"><see cref="SourceCodeDescriptor"/> for possible lambda function creation.</param>
- /// <param name="count">Cumulated number of replacements.</param>
- /// <returns></returns>
- private static object SimpleReplace(DObject self, Dictionary<string, object> definedVariables, object pattern,
- string replacement, PhpCallback callback, object data, int limit, SourceCodeDescriptor descriptor, ref int count)
- {
- Debug.Assert(limit >= -1);
-
- // exactly one of replacement or callback is valid:
- Debug.Assert(replacement != null ^ callback != null);
-
- PerlRegExpConverter converter = ConvertPattern(pattern, replacement);
- if (converter == null) return null;
-
- // get types of data we need:
- PhpArray data_array = data as PhpArray;
- string data_string = (data_array == null) ? ConvertData(data, converter) : null;
-
- // data comprising of a single string:
- if (data_array == null)
- {
- return ReplaceInternal(self, definedVariables, converter, callback, data_string, limit, descriptor, ref count);
- }
- else
- {
- // data is array, process each item:
- var enumerator = data_array.GetFastEnumerator();
- while (enumerator.MoveNext())
- {
- enumerator.CurrentValue = ReplaceInternal(self, definedVariables, converter, callback,
- ConvertData(enumerator.CurrentValue, converter), limit, descriptor, ref count);
- }
- enumerator.Dispose();
-
- // return array with items replaced:
- return data;
- }
- }
-
- /// <summary>
- /// Replaces <paramref name="limit"/> occurences of substrings.
- /// </summary>
- /// <param name="converter">
- /// Converter used for replacement if <paramref name="callback"/> is <B>null</B>.
- /// </param>
- /// <param name="self">Instance of object that called the replace method (replace pattern may contain $this)</param>
- /// <param name="definedVariables">Array with local variables - can be used by replace pattern</param>
- /// <param name="callback">Callback to call for replacement strings.</param>
- /// <param name="str">String to search for matches.</param>
- /// <param name="limit">Max number of replacements performed.</param>
- /// <param name="sourceCodeDesc"><see cref="SourceCodeDescriptor"/> for possible lambda function creation.</param>
- /// <param name="count">Cumulated number of replacements.</param>
- /// <returns></returns>
- private static string ReplaceInternal(DObject self, Dictionary<string, object> definedVariables, PerlRegExpConverter converter, PhpCallback callback,
- string str, int limit, SourceCodeDescriptor sourceCodeDesc, ref int count)
- {
- Debug.Assert(limit >= -1);
-
- if (callback == null)
- {
- // replace without executing code or counting the number of replacements:
- if ((converter.PerlOptions & PerlRegexOptions.Evaluate) == 0 && count < 0)
- return converter.Regex.Replace(str, converter.DotNetReplaceExpression, limit);
-
- Evaluator evaluator = new Evaluator(converter.Regex, converter.DotNetReplaceExpression, sourceCodeDesc, self, definedVariables);
- MatchEvaluator match_evaluator;
-
- if ((converter.PerlOptions & PerlRegexOptions.Evaluate) != 0)
- match_evaluator = new MatchEvaluator(evaluator.ReplaceCodeExecute);
- else
- match_evaluator = new MatchEvaluator(evaluator.ReplaceCount);
-
- string result = converter.Regex.Replac…
Large files files are truncated, but you can click here to view the full file