PageRenderTime 40ms CodeModel.GetById 13ms RepoModel.GetById 1ms app.codeStats 0ms

/inc/hyperlight/preg_helper.php

https://bitbucket.org/yoander/mtrack
PHP | 170 lines | 60 code | 25 blank | 85 comment | 8 complexity | 26e45a4a12117debbfce61a3bfdca6ce MD5 | raw file
Possible License(s): BSD-3-Clause, Apache-2.0
  1. <?php
  2. /**
  3. * Copyright 2008 Konrad Rudolph
  4. * All rights reserved.
  5. *
  6. * Permission is hereby granted, free of charge, to any person obtaining a copy
  7. * of this software and associated documentation files (the "Software"), to deal
  8. * in the Software without restriction, including without limitation the rights
  9. * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  10. * copies of the Software, and to permit persons to whom the Software is
  11. * furnished to do so, subject to the following conditions:
  12. *
  13. * The above copyright notice and this permission notice shall be included in
  14. * all copies or substantial portions of the Software.
  15. *
  16. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  17. * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  18. * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  19. * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  20. * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  21. * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  22. * THE SOFTWARE.
  23. */
  24. /**
  25. * Helper functions for the Perl-compatible regular expressions.
  26. * @package preg_helper
  27. */
  28. /**
  29. * Merges several regular expressions into one, using the indicated 'glue'.
  30. *
  31. * This function takes care of individual modifiers so it's safe to use
  32. * <i>different</i> modifiers on the individual expressions. The order of
  33. * sub-matches is preserved as well. Numbered back-references are adapted to
  34. * the new overall sub-match count. This means that it's safe to use numbered
  35. * back-refences in the individual expressions!
  36. * If {@link $names} is given, the individual expressions are captured in
  37. * named sub-matches using the contents of that array as names.
  38. * Matching pair-delimiters (e.g. <var>"{…}"</var>) are currently
  39. * <b>not</b> supported.
  40. *
  41. * The function assumes that all regular expressions are well-formed.
  42. * Behaviour is undefined if they aren't.
  43. *
  44. * This function was created after a
  45. * {@link http://stackoverflow.com/questions/244959/ StackOverflow discussion}.
  46. * Much of it was written or thought of by “porneL” and “eyelidlessness”. Many
  47. * thanks to both of them.
  48. *
  49. * @param string $glue A string to insert between the individual expressions.
  50. * This should usually be either the empty string, indicating
  51. * concatenation, or the pipe (<var>"|"</var>), indicating alternation.
  52. * Notice that this string might have to be escaped since it is treated
  53. * as a normal character in a regular expression (i.e. <var>"/"</var> will
  54. * end the expression and result in an invalid output).
  55. * @param array $expressions The expressions to merge. The expressions may
  56. * have arbitrary different delimiters and modifiers.
  57. * @param array $names Optional. This is either an empty array or an array of
  58. * strings of the same length as {@link $expressions}. In that case,
  59. * the strings of this array are used to create named sub-matches for the
  60. * expressions.
  61. * @return string An string representing a regular expression equivalent to the
  62. * merged expressions. Returns <var>FALSE</var> if an error occurred.
  63. */
  64. function preg_merge($glue, array $expressions, array $names = array()) {
  65. // … then, a miracle occurs.
  66. // Sanity check …
  67. $use_names = ($names !== null and count($names) !== 0);
  68. if (
  69. $use_names and count($names) !== count($expressions) or
  70. !is_string($glue)
  71. )
  72. return false;
  73. $result = array();
  74. // For keeping track of the names for sub-matches.
  75. $names_count = 0;
  76. // For keeping track of *all* captures to re-adjust backreferences.
  77. $capture_count = 0;
  78. foreach ($expressions as $expression) {
  79. if ($use_names)
  80. $name = str_replace(' ', '_', $names[$names_count++]);
  81. // Get delimiters and modifiers:
  82. $stripped = preg_strip($expression);
  83. if ($stripped === false)
  84. return false;
  85. list($sub_expr, $modifiers) = $stripped;
  86. // Re-adjust backreferences:
  87. // TODO What about \R backreferences (\0 isn't allowed, though)?
  88. // We assume that the expression is correct and therefore don't check
  89. // for matching parentheses.
  90. $number_of_captures = preg_match_all('/\([^?]|\(\?[^:]/', $sub_expr, $_);
  91. if ($number_of_captures === false)
  92. return false;
  93. if ($number_of_captures > 0) {
  94. $backref_expr = '/
  95. (?<!\\\\) # Not preceded by a backslash,
  96. ((?:\\\\\\\\)*?) # zero or more escaped backslashes,
  97. \\\\ (\d+) # followed by backslash plus digits.
  98. /x';
  99. $sub_expr = preg_replace_callback(
  100. $backref_expr,
  101. create_function(
  102. '$m',
  103. 'return $m[1] . "\\\\" . ((int)$m[2] + ' . $capture_count . ');'
  104. ),
  105. $sub_expr
  106. );
  107. $capture_count += $number_of_captures;
  108. }
  109. // Last, construct the new sub-match:
  110. $modifiers = implode('', $modifiers);
  111. $sub_modifiers = "(?$modifiers)";
  112. if ($sub_modifiers === '(?)')
  113. $sub_modifiers = '';
  114. $sub_name = $use_names ? "?<$name>" : '?:';
  115. $new_expr = "($sub_name$sub_modifiers$sub_expr)";
  116. $result[] = $new_expr;
  117. }
  118. return '/' . implode($glue, $result) . '/';
  119. }
  120. /**
  121. * Strips a regular expression string off its delimiters and modifiers.
  122. * Additionally, normalizes the delimiters (i.e. reformats the pattern so that
  123. * it could have used <var>"/"</var> as delimiter).
  124. *
  125. * @param string $expression The regular expression string to strip.
  126. * @return array An array whose first entry is the expression itself, the
  127. * second an array of delimiters. If the argument is not a valid regular
  128. * expression, returns <var>FALSE</var>.
  129. *
  130. */
  131. function preg_strip($expression) {
  132. if (preg_match('/^(.)(.*)\\1([imsxeADSUXJu]*)$/s', $expression, $matches) !== 1)
  133. return false;
  134. $delim = $matches[1];
  135. $sub_expr = $matches[2];
  136. if ($delim !== '/') {
  137. // Replace occurrences by the escaped delimiter by its unescaped
  138. // version and escape new delimiter.
  139. $sub_expr = str_replace("\\$delim", $delim, $sub_expr);
  140. $sub_expr = str_replace('/', '\\/', $sub_expr);
  141. }
  142. $modifiers = $matches[3] === '' ? array() : str_split(trim($matches[3]));
  143. return array($sub_expr, $modifiers);
  144. }
  145. ?>