PageRenderTime 47ms CodeModel.GetById 19ms RepoModel.GetById 0ms app.codeStats 0ms

/mediawiki-integration/source/php/mediawiki/includes/StringUtils.php

https://code.google.com/
PHP | 301 lines | 181 code | 31 blank | 89 comment | 23 complexity | d4abc48a271d8f7493633ff0fa1e6c8b MD5 | raw file
Possible License(s): GPL-2.0, LGPL-3.0
  1. <?php
  2. class StringUtils {
  3. /**
  4. * Perform an operation equivalent to
  5. *
  6. * preg_replace( "!$startDelim(.*?)$endDelim!", $replace, $subject );
  7. *
  8. * except that it's worst-case O(N) instead of O(N^2)
  9. *
  10. * Compared to delimiterReplace(), this implementation is fast but memory-
  11. * hungry and inflexible. The memory requirements are such that I don't
  12. * recommend using it on anything but guaranteed small chunks of text.
  13. */
  14. static function hungryDelimiterReplace( $startDelim, $endDelim, $replace, $subject ) {
  15. $segments = explode( $startDelim, $subject );
  16. $output = array_shift( $segments );
  17. foreach ( $segments as $s ) {
  18. $endDelimPos = strpos( $s, $endDelim );
  19. if ( $endDelimPos === false ) {
  20. $output .= $startDelim . $s;
  21. } else {
  22. $output .= $replace . substr( $s, $endDelimPos + strlen( $endDelim ) );
  23. }
  24. }
  25. return $output;
  26. }
  27. /**
  28. * Perform an operation equivalent to
  29. *
  30. * preg_replace_callback( "!$startDelim(.*)$endDelim!s$flags", $callback, $subject )
  31. *
  32. * This implementation is slower than hungryDelimiterReplace but uses far less
  33. * memory. The delimiters are literal strings, not regular expressions.
  34. *
  35. * @param string $flags Regular expression flags
  36. */
  37. # If the start delimiter ends with an initial substring of the end delimiter,
  38. # e.g. in the case of C-style comments, the behaviour differs from the model
  39. # regex. In this implementation, the end must share no characters with the
  40. # start, so e.g. /*/ is not considered to be both the start and end of a
  41. # comment. /*/xy/*/ is considered to be a single comment with contents /xy/.
  42. static function delimiterReplaceCallback( $startDelim, $endDelim, $callback, $subject, $flags = '' ) {
  43. $inputPos = 0;
  44. $outputPos = 0;
  45. $output = '';
  46. $foundStart = false;
  47. $encStart = preg_quote( $startDelim, '!' );
  48. $encEnd = preg_quote( $endDelim, '!' );
  49. $strcmp = strpos( $flags, 'i' ) === false ? 'strcmp' : 'strcasecmp';
  50. $endLength = strlen( $endDelim );
  51. $m = array();
  52. while ( $inputPos < strlen( $subject ) &&
  53. preg_match( "!($encStart)|($encEnd)!S$flags", $subject, $m, PREG_OFFSET_CAPTURE, $inputPos ) )
  54. {
  55. $tokenOffset = $m[0][1];
  56. if ( $m[1][0] != '' ) {
  57. if ( $foundStart &&
  58. $strcmp( $endDelim, substr( $subject, $tokenOffset, $endLength ) ) == 0 )
  59. {
  60. # An end match is present at the same location
  61. $tokenType = 'end';
  62. $tokenLength = $endLength;
  63. } else {
  64. $tokenType = 'start';
  65. $tokenLength = strlen( $m[0][0] );
  66. }
  67. } elseif ( $m[2][0] != '' ) {
  68. $tokenType = 'end';
  69. $tokenLength = strlen( $m[0][0] );
  70. } else {
  71. throw new MWException( 'Invalid delimiter given to ' . __METHOD__ );
  72. }
  73. if ( $tokenType == 'start' ) {
  74. $inputPos = $tokenOffset + $tokenLength;
  75. # Only move the start position if we haven't already found a start
  76. # This means that START START END matches outer pair
  77. if ( !$foundStart ) {
  78. # Found start
  79. # Write out the non-matching section
  80. $output .= substr( $subject, $outputPos, $tokenOffset - $outputPos );
  81. $outputPos = $tokenOffset;
  82. $contentPos = $inputPos;
  83. $foundStart = true;
  84. }
  85. } elseif ( $tokenType == 'end' ) {
  86. if ( $foundStart ) {
  87. # Found match
  88. $output .= call_user_func( $callback, array(
  89. substr( $subject, $outputPos, $tokenOffset + $tokenLength - $outputPos ),
  90. substr( $subject, $contentPos, $tokenOffset - $contentPos )
  91. ));
  92. $foundStart = false;
  93. } else {
  94. # Non-matching end, write it out
  95. $output .= substr( $subject, $inputPos, $tokenOffset + $tokenLength - $outputPos );
  96. }
  97. $inputPos = $outputPos = $tokenOffset + $tokenLength;
  98. } else {
  99. throw new MWException( 'Invalid delimiter given to ' . __METHOD__ );
  100. }
  101. }
  102. if ( $outputPos < strlen( $subject ) ) {
  103. $output .= substr( $subject, $outputPos );
  104. }
  105. return $output;
  106. }
  107. /*
  108. * Perform an operation equivalent to
  109. *
  110. * preg_replace( "!$startDelim(.*)$endDelim!$flags", $replace, $subject )
  111. *
  112. * @param string $startDelim Start delimiter regular expression
  113. * @param string $endDelim End delimiter regular expression
  114. * @param string $replace Replacement string. May contain $1, which will be
  115. * replaced by the text between the delimiters
  116. * @param string $subject String to search
  117. * @return string The string with the matches replaced
  118. */
  119. static function delimiterReplace( $startDelim, $endDelim, $replace, $subject, $flags = '' ) {
  120. $replacer = new RegexlikeReplacer( $replace );
  121. return self::delimiterReplaceCallback( $startDelim, $endDelim,
  122. $replacer->cb(), $subject, $flags );
  123. }
  124. /**
  125. * More or less "markup-safe" explode()
  126. * Ignores any instances of the separator inside <...>
  127. * @param string $separator
  128. * @param string $text
  129. * @return array
  130. */
  131. static function explodeMarkup( $separator, $text ) {
  132. $placeholder = "\x00";
  133. // Remove placeholder instances
  134. $text = str_replace( $placeholder, '', $text );
  135. // Replace instances of the separator inside HTML-like tags with the placeholder
  136. $replacer = new DoubleReplacer( $separator, $placeholder );
  137. $cleaned = StringUtils::delimiterReplaceCallback( '<', '>', $replacer->cb(), $text );
  138. // Explode, then put the replaced separators back in
  139. $items = explode( $separator, $cleaned );
  140. foreach( $items as $i => $str ) {
  141. $items[$i] = str_replace( $placeholder, $separator, $str );
  142. }
  143. return $items;
  144. }
  145. /**
  146. * Escape a string to make it suitable for inclusion in a preg_replace()
  147. * replacement parameter.
  148. *
  149. * @param string $string
  150. * @return string
  151. */
  152. static function escapeRegexReplacement( $string ) {
  153. $string = str_replace( '\\', '\\\\', $string );
  154. $string = str_replace( '$', '\\$', $string );
  155. return $string;
  156. }
  157. }
  158. /**
  159. * Base class for "replacers", objects used in preg_replace_callback() and
  160. * StringUtils::delimiterReplaceCallback()
  161. */
  162. class Replacer {
  163. function cb() {
  164. return array( &$this, 'replace' );
  165. }
  166. }
  167. /**
  168. * Class to replace regex matches with a string similar to that used in preg_replace()
  169. */
  170. class RegexlikeReplacer extends Replacer {
  171. var $r;
  172. function __construct( $r ) {
  173. $this->r = $r;
  174. }
  175. function replace( $matches ) {
  176. $pairs = array();
  177. foreach ( $matches as $i => $match ) {
  178. $pairs["\$$i"] = $match;
  179. }
  180. return strtr( $this->r, $pairs );
  181. }
  182. }
  183. /**
  184. * Class to perform secondary replacement within each replacement string
  185. */
  186. class DoubleReplacer extends Replacer {
  187. function __construct( $from, $to, $index = 0 ) {
  188. $this->from = $from;
  189. $this->to = $to;
  190. $this->index = $index;
  191. }
  192. function replace( $matches ) {
  193. return str_replace( $this->from, $this->to, $matches[$this->index] );
  194. }
  195. }
  196. /**
  197. * Class to perform replacement based on a simple hashtable lookup
  198. */
  199. class HashtableReplacer extends Replacer {
  200. var $table, $index;
  201. function __construct( $table, $index = 0 ) {
  202. $this->table = $table;
  203. $this->index = $index;
  204. }
  205. function replace( $matches ) {
  206. return $this->table[$matches[$this->index]];
  207. }
  208. }
  209. /**
  210. * Replacement array for FSS with fallback to strtr()
  211. * Supports lazy initialisation of FSS resource
  212. */
  213. class ReplacementArray {
  214. /*mostly private*/ var $data = false;
  215. /*mostly private*/ var $fss = false;
  216. /**
  217. * Create an object with the specified replacement array
  218. * The array should have the same form as the replacement array for strtr()
  219. */
  220. function __construct( $data = array() ) {
  221. $this->data = $data;
  222. }
  223. function __sleep() {
  224. return array( 'data' );
  225. }
  226. function __wakeup() {
  227. $this->fss = false;
  228. }
  229. /**
  230. * Set the whole replacement array at once
  231. */
  232. function setArray( $data ) {
  233. $this->data = $data;
  234. $this->fss = false;
  235. }
  236. function getArray() {
  237. return $this->data;
  238. }
  239. /**
  240. * Set an element of the replacement array
  241. */
  242. function setPair( $from, $to ) {
  243. $this->data[$from] = $to;
  244. $this->fss = false;
  245. }
  246. function mergeArray( $data ) {
  247. $this->data = array_merge( $this->data, $data );
  248. $this->fss = false;
  249. }
  250. function merge( $other ) {
  251. $this->data = array_merge( $this->data, $other->data );
  252. $this->fss = false;
  253. }
  254. function replace( $subject ) {
  255. if ( function_exists( 'fss_prep_replace' ) ) {
  256. wfProfileIn( __METHOD__.'-fss' );
  257. if ( $this->fss === false ) {
  258. $this->fss = fss_prep_replace( $this->data );
  259. }
  260. $result = fss_exec_replace( $this->fss, $subject );
  261. wfProfileOut( __METHOD__.'-fss' );
  262. } else {
  263. wfProfileIn( __METHOD__.'-strtr' );
  264. $result = strtr( $subject, $this->data );
  265. wfProfileOut( __METHOD__.'-strtr' );
  266. }
  267. return $result;
  268. }
  269. }
  270. ?>