PageRenderTime 271ms CodeModel.GetById 120ms app.highlight 100ms RepoModel.GetById 11ms app.codeStats 2ms

/wp-includes/formatting.php

https://github.com/davodey/WordPress
PHP | 3846 lines | 2281 code | 303 blank | 1262 comment | 310 complexity | 16c946ba73d4badae88cc1a98a4d2d59 MD5 | raw file

Large files files are truncated, but you can click here to view the full file

   1<?php
   2/**
   3 * Main WordPress Formatting API.
   4 *
   5 * Handles many functions for formatting output.
   6 *
   7 * @package WordPress
   8 */
   9
  10/**
  11 * Replaces common plain text characters into formatted entities
  12 *
  13 * As an example,
  14 * <code>
  15 * 'cause today's effort makes it worth tomorrow's "holiday"...
  16 * </code>
  17 * Becomes:
  18 * <code>
  19 * &#8217;cause today&#8217;s effort makes it worth tomorrow&#8217;s &#8220;holiday&#8221;&#8230;
  20 * </code>
  21 * Code within certain html blocks are skipped.
  22 *
  23 * @since 0.71
  24 * @uses $wp_cockneyreplace Array of formatted entities for certain common phrases
  25 *
  26 * @param string $text The text to be formatted
  27 * @return string The string replaced with html entities
  28 */
  29function wptexturize($text) {
  30	global $wp_cockneyreplace;
  31	static $static_characters, $static_replacements, $dynamic_characters, $dynamic_replacements,
  32		$default_no_texturize_tags, $default_no_texturize_shortcodes;
  33
  34	// No need to set up these static variables more than once
  35	if ( ! isset( $static_characters ) ) {
  36		/* translators: opening curly double quote */
  37		$opening_quote = _x( '&#8220;', 'opening curly double quote' );
  38		/* translators: closing curly double quote */
  39		$closing_quote = _x( '&#8221;', 'closing curly double quote' );
  40
  41		/* translators: apostrophe, for example in 'cause or can't */
  42		$apos = _x( '&#8217;', 'apostrophe' );
  43
  44		/* translators: prime, for example in 9' (nine feet) */
  45		$prime = _x( '&#8242;', 'prime' );
  46		/* translators: double prime, for example in 9" (nine inches) */
  47		$double_prime = _x( '&#8243;', 'double prime' );
  48
  49		/* translators: opening curly single quote */
  50		$opening_single_quote = _x( '&#8216;', 'opening curly single quote' );
  51		/* translators: closing curly single quote */
  52		$closing_single_quote = _x( '&#8217;', 'closing curly single quote' );
  53
  54		/* translators: en dash */
  55		$en_dash = _x( '&#8211;', 'en dash' );
  56		/* translators: em dash */
  57		$em_dash = _x( '&#8212;', 'em dash' );
  58
  59		$default_no_texturize_tags = array('pre', 'code', 'kbd', 'style', 'script', 'tt');
  60		$default_no_texturize_shortcodes = array('code');
  61
  62		// if a plugin has provided an autocorrect array, use it
  63		if ( isset($wp_cockneyreplace) ) {
  64			$cockney = array_keys($wp_cockneyreplace);
  65			$cockneyreplace = array_values($wp_cockneyreplace);
  66		} elseif ( "'" != $apos ) { // Only bother if we're doing a replacement.
  67			$cockney = array( "'tain't", "'twere", "'twas", "'tis", "'twill", "'til", "'bout", "'nuff", "'round", "'cause" );
  68			$cockneyreplace = array( $apos . "tain" . $apos . "t", $apos . "twere", $apos . "twas", $apos . "tis", $apos . "twill", $apos . "til", $apos . "bout", $apos . "nuff", $apos . "round", $apos . "cause" );
  69		} else {
  70			$cockney = $cockneyreplace = array();
  71		}
  72
  73		$static_characters = array_merge( array( '---', ' -- ', '--', ' - ', 'xn&#8211;', '...', '``', '\'\'', ' (tm)' ), $cockney );
  74		$static_replacements = array_merge( array( $em_dash, ' ' . $em_dash . ' ', $en_dash, ' ' . $en_dash . ' ', 'xn--', '&#8230;', $opening_quote, $closing_quote, ' &#8482;' ), $cockneyreplace );
  75
  76		/*
  77		 * Regex for common whitespace characters.
  78		 *
  79		 * By default, spaces include new lines, tabs, nbsp entities, and the UTF-8 nbsp.
  80		 * This is designed to replace the PCRE \s sequence.  In #WP22692, that sequence
  81		 * was found to be unreliable due to random inclusion of the A0 byte.
  82		 */
  83		$spaces = '[\r\n\t ]|\xC2\xA0|&nbsp;';
  84
  85
  86		// Pattern-based replacements of characters.
  87		$dynamic = array();
  88
  89		// '99 '99s '99's (apostrophe)
  90		if ( "'" !== $apos ) {
  91			$dynamic[ '/\'(?=\d)/' ] = $apos;
  92		}
  93
  94		// Single quote at start, or preceded by (, {, <, [, ", or spaces.
  95		if ( "'" !== $opening_single_quote ) {
  96			$dynamic[ '/(?<=\A|[([{<"]|' . $spaces . ')\'/' ] = $opening_single_quote;
  97		}
  98
  99		// 9" (double prime)
 100		if ( '"' !== $double_prime ) {
 101			$dynamic[ '/(?<=\d)"/' ] = $double_prime;
 102		}
 103
 104		// 9' (prime)
 105		if ( "'" !== $prime ) {
 106			$dynamic[ '/(?<=\d)\'/' ] = $prime;
 107		}
 108
 109		// Apostrophe in a word.  No spaces or double primes.
 110		if ( "'" !== $apos ) {
 111			$dynamic[ '/(?<!' . $spaces . ')\'(?!\'|' . $spaces . ')/' ] = $apos;
 112		}
 113
 114		// Double quote at start, or preceded by (, {, <, [, or spaces, and not followed by spaces.
 115		if ( '"' !== $opening_quote ) {
 116			$dynamic[ '/(?<=\A|[([{<]|' . $spaces . ')"(?!' . $spaces . ')/' ] = $opening_quote;
 117		}
 118
 119		// Any remaining double quotes.
 120		if ( '"' !== $closing_quote ) {
 121			$dynamic[ '/"/' ] = $closing_quote;
 122		}
 123
 124		// Single quotes followed by spaces or a period.
 125		if ( "'" !== $closing_single_quote ) {
 126			$dynamic[ '/\'(?=\Z|\.|' . $spaces . ')/' ] = $closing_single_quote;
 127		}
 128
 129		$dynamic_characters = array_keys( $dynamic );
 130		$dynamic_replacements = array_values( $dynamic );
 131	}
 132
 133	// Transform into regexp sub-expression used in _wptexturize_pushpop_element
 134	// Must do this every time in case plugins use these filters in a context sensitive manner
 135	/**
 136	 * Filter the list of HTML elements not to texturize.
 137	 *
 138	 * @since 2.8.0
 139	 *
 140	 * @param array $default_no_texturize_tags An array of HTML element names.
 141	 */
 142	$no_texturize_tags = '(' . implode( '|', apply_filters( 'no_texturize_tags', $default_no_texturize_tags ) ) . ')';
 143	/**
 144	 * Filter the list of shortcodes not to texturize.
 145	 *
 146	 * @since 2.8.0
 147	 *
 148	 * @param array $default_no_texturize_shortcodes An array of shortcode names.
 149	 */
 150	$no_texturize_shortcodes = '(' . implode( '|', apply_filters( 'no_texturize_shortcodes', $default_no_texturize_shortcodes ) ) . ')';
 151
 152	$no_texturize_tags_stack = array();
 153	$no_texturize_shortcodes_stack = array();
 154
 155	$textarr = preg_split('/(<.*>|\[.*\])/Us', $text, -1, PREG_SPLIT_DELIM_CAPTURE);
 156
 157	foreach ( $textarr as &$curl ) {
 158		if ( empty( $curl ) ) {
 159			continue;
 160		}
 161
 162		// Only call _wptexturize_pushpop_element if first char is correct tag opening
 163		$first = $curl[0];
 164		if ( '<' === $first ) {
 165			_wptexturize_pushpop_element($curl, $no_texturize_tags_stack, $no_texturize_tags, '<', '>');
 166		} elseif ( '[' === $first ) {
 167			_wptexturize_pushpop_element($curl, $no_texturize_shortcodes_stack, $no_texturize_shortcodes, '[', ']');
 168		} elseif ( empty($no_texturize_shortcodes_stack) && empty($no_texturize_tags_stack) ) {
 169
 170			// This is not a tag, nor is the texturization disabled static strings
 171			$curl = str_replace($static_characters, $static_replacements, $curl);
 172
 173			// regular expressions
 174			$curl = preg_replace($dynamic_characters, $dynamic_replacements, $curl);
 175
 176			// 9x9 (times)
 177			if ( 1 === preg_match( '/(?<=\d)x\d/', $text ) ) {
 178				// Searching for a digit is 10 times more expensive than for the x, so we avoid doing this one!
 179				$curl = preg_replace( '/\b(\d+)x(\d+)\b/', '$1&#215;$2', $curl );
 180			}
 181		}
 182
 183		// Replace each & with &#038; unless it already looks like an entity.
 184		$curl = preg_replace('/&([^#])(?![a-zA-Z1-4]{1,8};)/', '&#038;$1', $curl);
 185	}
 186	return implode( '', $textarr );
 187}
 188
 189/**
 190 * Search for disabled element tags. Push element to stack on tag open and pop
 191 * on tag close. Assumes first character of $text is tag opening.
 192 *
 193 * @since 2.9.0
 194 * @access private
 195 *
 196 * @param string $text Text to check. First character is assumed to be $opening
 197 * @param array $stack Array used as stack of opened tag elements
 198 * @param string $disabled_elements Tags to match against formatted as regexp sub-expression
 199 * @param string $opening Tag opening character, assumed to be 1 character long
 200 * @param string $closing Tag closing character
 201 */
 202function _wptexturize_pushpop_element($text, &$stack, $disabled_elements, $opening = '<', $closing = '>') {
 203	// Check if it is a closing tag -- otherwise assume opening tag
 204	if (strncmp($opening . '/', $text, 2)) {
 205		// Opening? Check $text+1 against disabled elements
 206		if (preg_match('/^' . $disabled_elements . '\b/', substr($text, 1), $matches)) {
 207			/*
 208			 * This disables texturize until we find a closing tag of our type
 209			 * (e.g. <pre>) even if there was invalid nesting before that
 210			 *
 211			 * Example: in the case <pre>sadsadasd</code>"baba"</pre>
 212			 *          "baba" won't be texturize
 213			 */
 214
 215			array_push($stack, $matches[1]);
 216		}
 217	} else {
 218		// Closing? Check $text+2 against disabled elements
 219		$c = preg_quote($closing, '/');
 220		if (preg_match('/^' . $disabled_elements . $c . '/', substr($text, 2), $matches)) {
 221			$last = array_pop($stack);
 222
 223			// Make sure it matches the opening tag
 224			if ( $last != $matches[1] ) {
 225				array_push( $stack, $last );
 226			}
 227		}
 228	}
 229}
 230
 231/**
 232 * Replaces double line-breaks with paragraph elements.
 233 *
 234 * A group of regex replaces used to identify text formatted with newlines and
 235 * replace double line-breaks with HTML paragraph tags. The remaining
 236 * line-breaks after conversion become <<br />> tags, unless $br is set to '0'
 237 * or 'false'.
 238 *
 239 * @since 0.71
 240 *
 241 * @param string $pee The text which has to be formatted.
 242 * @param bool $br Optional. If set, this will convert all remaining line-breaks after paragraphing. Default true.
 243 * @return string Text which has been converted into correct paragraph tags.
 244 */
 245function wpautop($pee, $br = true) {
 246	$pre_tags = array();
 247
 248	if ( trim($pee) === '' )
 249		return '';
 250
 251	$pee = $pee . "\n"; // just to make things a little easier, pad the end
 252
 253	if ( strpos($pee, '<pre') !== false ) {
 254		$pee_parts = explode( '</pre>', $pee );
 255		$last_pee = array_pop($pee_parts);
 256		$pee = '';
 257		$i = 0;
 258
 259		foreach ( $pee_parts as $pee_part ) {
 260			$start = strpos($pee_part, '<pre');
 261
 262			// Malformed html?
 263			if ( $start === false ) {
 264				$pee .= $pee_part;
 265				continue;
 266			}
 267
 268			$name = "<pre wp-pre-tag-$i></pre>";
 269			$pre_tags[$name] = substr( $pee_part, $start ) . '</pre>';
 270
 271			$pee .= substr( $pee_part, 0, $start ) . $name;
 272			$i++;
 273		}
 274
 275		$pee .= $last_pee;
 276	}
 277
 278	$pee = preg_replace('|<br />\s*<br />|', "\n\n", $pee);
 279	// Space things out a little
 280	$allblocks = '(?:table|thead|tfoot|caption|col|colgroup|tbody|tr|td|th|div|dl|dd|dt|ul|ol|li|pre|form|map|area|blockquote|address|math|style|p|h[1-6]|hr|fieldset|noscript|legend|section|article|aside|hgroup|header|footer|nav|figure|details|menu|summary)';
 281	$pee = preg_replace('!(<' . $allblocks . '[^>]*>)!', "\n$1", $pee);
 282	$pee = preg_replace('!(</' . $allblocks . '>)!', "$1\n\n", $pee);
 283	$pee = str_replace(array("\r\n", "\r"), "\n", $pee); // cross-platform newlines
 284
 285	if ( strpos( $pee, '</object>' ) !== false ) {
 286		// no P/BR around param and embed
 287		$pee = preg_replace( '|(<object[^>]*>)\s*|', '$1', $pee );
 288		$pee = preg_replace( '|\s*</object>|', '</object>', $pee );
 289		$pee = preg_replace( '%\s*(</?(?:param|embed)[^>]*>)\s*%', '$1', $pee );
 290	}
 291
 292	if ( strpos( $pee, '<source' ) !== false || strpos( $pee, '<track' ) !== false ) {
 293		// no P/BR around source and track
 294		$pee = preg_replace( '%([<\[](?:audio|video)[^>\]]*[>\]])\s*%', '$1', $pee );
 295		$pee = preg_replace( '%\s*([<\[]/(?:audio|video)[>\]])%', '$1', $pee );
 296		$pee = preg_replace( '%\s*(<(?:source|track)[^>]*>)\s*%', '$1', $pee );
 297	}
 298
 299	$pee = preg_replace("/\n\n+/", "\n\n", $pee); // take care of duplicates
 300	// make paragraphs, including one at the end
 301	$pees = preg_split('/\n\s*\n/', $pee, -1, PREG_SPLIT_NO_EMPTY);
 302	$pee = '';
 303
 304	foreach ( $pees as $tinkle ) {
 305		$pee .= '<p>' . trim($tinkle, "\n") . "</p>\n";
 306	}
 307
 308	$pee = preg_replace('|<p>\s*</p>|', '', $pee); // under certain strange conditions it could create a P of entirely whitespace
 309	$pee = preg_replace('!<p>([^<]+)</(div|address|form)>!', "<p>$1</p></$2>", $pee);
 310	$pee = preg_replace('!<p>\s*(</?' . $allblocks . '[^>]*>)\s*</p>!', "$1", $pee); // don't pee all over a tag
 311	$pee = preg_replace("|<p>(<li.+?)</p>|", "$1", $pee); // problem with nested lists
 312	$pee = preg_replace('|<p><blockquote([^>]*)>|i', "<blockquote$1><p>", $pee);
 313	$pee = str_replace('</blockquote></p>', '</p></blockquote>', $pee);
 314	$pee = preg_replace('!<p>\s*(</?' . $allblocks . '[^>]*>)!', "$1", $pee);
 315	$pee = preg_replace('!(</?' . $allblocks . '[^>]*>)\s*</p>!', "$1", $pee);
 316
 317	if ( $br ) {
 318		$pee = preg_replace_callback('/<(script|style).*?<\/\\1>/s', '_autop_newline_preservation_helper', $pee);
 319		$pee = preg_replace('|(?<!<br />)\s*\n|', "<br />\n", $pee); // optionally make line breaks
 320		$pee = str_replace('<WPPreserveNewline />', "\n", $pee);
 321	}
 322
 323	$pee = preg_replace('!(</?' . $allblocks . '[^>]*>)\s*<br />!', "$1", $pee);
 324	$pee = preg_replace('!<br />(\s*</?(?:p|li|div|dl|dd|dt|th|pre|td|ul|ol)[^>]*>)!', '$1', $pee);
 325	$pee = preg_replace( "|\n</p>$|", '</p>', $pee );
 326
 327	if ( !empty($pre_tags) )
 328		$pee = str_replace(array_keys($pre_tags), array_values($pre_tags), $pee);
 329
 330	return $pee;
 331}
 332
 333/**
 334 * Newline preservation help function for wpautop
 335 *
 336 * @since 3.1.0
 337 * @access private
 338 *
 339 * @param array $matches preg_replace_callback matches array
 340 * @return string
 341 */
 342function _autop_newline_preservation_helper( $matches ) {
 343	return str_replace("\n", "<WPPreserveNewline />", $matches[0]);
 344}
 345
 346/**
 347 * Don't auto-p wrap shortcodes that stand alone
 348 *
 349 * Ensures that shortcodes are not wrapped in <<p>>...<</p>>.
 350 *
 351 * @since 2.9.0
 352 *
 353 * @param string $pee The content.
 354 * @return string The filtered content.
 355 */
 356function shortcode_unautop( $pee ) {
 357	global $shortcode_tags;
 358
 359	if ( empty( $shortcode_tags ) || !is_array( $shortcode_tags ) ) {
 360		return $pee;
 361	}
 362
 363	$tagregexp = join( '|', array_map( 'preg_quote', array_keys( $shortcode_tags ) ) );
 364
 365	$pattern =
 366		  '/'
 367		. '<p>'                              // Opening paragraph
 368		. '\\s*+'                            // Optional leading whitespace
 369		. '('                                // 1: The shortcode
 370		.     '\\['                          // Opening bracket
 371		.     "($tagregexp)"                 // 2: Shortcode name
 372		.     '(?![\\w-])'                   // Not followed by word character or hyphen
 373		                                     // Unroll the loop: Inside the opening shortcode tag
 374		.     '[^\\]\\/]*'                   // Not a closing bracket or forward slash
 375		.     '(?:'
 376		.         '\\/(?!\\])'               // A forward slash not followed by a closing bracket
 377		.         '[^\\]\\/]*'               // Not a closing bracket or forward slash
 378		.     ')*?'
 379		.     '(?:'
 380		.         '\\/\\]'                   // Self closing tag and closing bracket
 381		.     '|'
 382		.         '\\]'                      // Closing bracket
 383		.         '(?:'                      // Unroll the loop: Optionally, anything between the opening and closing shortcode tags
 384		.             '[^\\[]*+'             // Not an opening bracket
 385		.             '(?:'
 386		.                 '\\[(?!\\/\\2\\])' // An opening bracket not followed by the closing shortcode tag
 387		.                 '[^\\[]*+'         // Not an opening bracket
 388		.             ')*+'
 389		.             '\\[\\/\\2\\]'         // Closing shortcode tag
 390		.         ')?'
 391		.     ')'
 392		. ')'
 393		. '\\s*+'                            // optional trailing whitespace
 394		. '<\\/p>'                           // closing paragraph
 395		. '/s';
 396
 397	return preg_replace( $pattern, '$1', $pee );
 398}
 399
 400/**
 401 * Checks to see if a string is utf8 encoded.
 402 *
 403 * NOTE: This function checks for 5-Byte sequences, UTF8
 404 *       has Bytes Sequences with a maximum length of 4.
 405 *
 406 * @author bmorel at ssi dot fr (modified)
 407 * @since 1.2.1
 408 *
 409 * @param string $str The string to be checked
 410 * @return bool True if $str fits a UTF-8 model, false otherwise.
 411 */
 412function seems_utf8($str) {
 413	$length = strlen($str);
 414	for ($i=0; $i < $length; $i++) {
 415		$c = ord($str[$i]);
 416		if ($c < 0x80) $n = 0; # 0bbbbbbb
 417		elseif (($c & 0xE0) == 0xC0) $n=1; # 110bbbbb
 418		elseif (($c & 0xF0) == 0xE0) $n=2; # 1110bbbb
 419		elseif (($c & 0xF8) == 0xF0) $n=3; # 11110bbb
 420		elseif (($c & 0xFC) == 0xF8) $n=4; # 111110bb
 421		elseif (($c & 0xFE) == 0xFC) $n=5; # 1111110b
 422		else return false; # Does not match any model
 423		for ($j=0; $j<$n; $j++) { # n bytes matching 10bbbbbb follow ?
 424			if ((++$i == $length) || ((ord($str[$i]) & 0xC0) != 0x80))
 425				return false;
 426		}
 427	}
 428	return true;
 429}
 430
 431/**
 432 * Converts a number of special characters into their HTML entities.
 433 *
 434 * Specifically deals with: &, <, >, ", and '.
 435 *
 436 * $quote_style can be set to ENT_COMPAT to encode " to
 437 * &quot;, or ENT_QUOTES to do both. Default is ENT_NOQUOTES where no quotes are encoded.
 438 *
 439 * @since 1.2.2
 440 * @access private
 441 *
 442 * @param string $string The text which is to be encoded.
 443 * @param mixed $quote_style Optional. Converts double quotes if set to ENT_COMPAT, both single and double if set to ENT_QUOTES or none if set to ENT_NOQUOTES. Also compatible with old values; converting single quotes if set to 'single', double if set to 'double' or both if otherwise set. Default is ENT_NOQUOTES.
 444 * @param string $charset Optional. The character encoding of the string. Default is false.
 445 * @param boolean $double_encode Optional. Whether to encode existing html entities. Default is false.
 446 * @return string The encoded text with HTML entities.
 447 */
 448function _wp_specialchars( $string, $quote_style = ENT_NOQUOTES, $charset = false, $double_encode = false ) {
 449	$string = (string) $string;
 450
 451	if ( 0 === strlen( $string ) )
 452		return '';
 453
 454	// Don't bother if there are no specialchars - saves some processing
 455	if ( ! preg_match( '/[&<>"\']/', $string ) )
 456		return $string;
 457
 458	// Account for the previous behaviour of the function when the $quote_style is not an accepted value
 459	if ( empty( $quote_style ) )
 460		$quote_style = ENT_NOQUOTES;
 461	elseif ( ! in_array( $quote_style, array( 0, 2, 3, 'single', 'double' ), true ) )
 462		$quote_style = ENT_QUOTES;
 463
 464	// Store the site charset as a static to avoid multiple calls to wp_load_alloptions()
 465	if ( ! $charset ) {
 466		static $_charset;
 467		if ( ! isset( $_charset ) ) {
 468			$alloptions = wp_load_alloptions();
 469			$_charset = isset( $alloptions['blog_charset'] ) ? $alloptions['blog_charset'] : '';
 470		}
 471		$charset = $_charset;
 472	}
 473
 474	if ( in_array( $charset, array( 'utf8', 'utf-8', 'UTF8' ) ) )
 475		$charset = 'UTF-8';
 476
 477	$_quote_style = $quote_style;
 478
 479	if ( $quote_style === 'double' ) {
 480		$quote_style = ENT_COMPAT;
 481		$_quote_style = ENT_COMPAT;
 482	} elseif ( $quote_style === 'single' ) {
 483		$quote_style = ENT_NOQUOTES;
 484	}
 485
 486	// Handle double encoding ourselves
 487	if ( $double_encode ) {
 488		$string = @htmlspecialchars( $string, $quote_style, $charset );
 489	} else {
 490		// Decode &amp; into &
 491		$string = wp_specialchars_decode( $string, $_quote_style );
 492
 493		// Guarantee every &entity; is valid or re-encode the &
 494		$string = wp_kses_normalize_entities( $string );
 495
 496		// Now re-encode everything except &entity;
 497		$string = preg_split( '/(&#?x?[0-9a-z]+;)/i', $string, -1, PREG_SPLIT_DELIM_CAPTURE );
 498
 499		for ( $i = 0; $i < count( $string ); $i += 2 )
 500			$string[$i] = @htmlspecialchars( $string[$i], $quote_style, $charset );
 501
 502		$string = implode( '', $string );
 503	}
 504
 505	// Backwards compatibility
 506	if ( 'single' === $_quote_style )
 507		$string = str_replace( "'", '&#039;', $string );
 508
 509	return $string;
 510}
 511
 512/**
 513 * Converts a number of HTML entities into their special characters.
 514 *
 515 * Specifically deals with: &, <, >, ", and '.
 516 *
 517 * $quote_style can be set to ENT_COMPAT to decode " entities,
 518 * or ENT_QUOTES to do both " and '. Default is ENT_NOQUOTES where no quotes are decoded.
 519 *
 520 * @since 2.8.0
 521 *
 522 * @param string $string The text which is to be decoded.
 523 * @param mixed $quote_style Optional. Converts double quotes if set to ENT_COMPAT, both single and double if set to ENT_QUOTES or none if set to ENT_NOQUOTES. Also compatible with old _wp_specialchars() values; converting single quotes if set to 'single', double if set to 'double' or both if otherwise set. Default is ENT_NOQUOTES.
 524 * @return string The decoded text without HTML entities.
 525 */
 526function wp_specialchars_decode( $string, $quote_style = ENT_NOQUOTES ) {
 527	$string = (string) $string;
 528
 529	if ( 0 === strlen( $string ) ) {
 530		return '';
 531	}
 532
 533	// Don't bother if there are no entities - saves a lot of processing
 534	if ( strpos( $string, '&' ) === false ) {
 535		return $string;
 536	}
 537
 538	// Match the previous behaviour of _wp_specialchars() when the $quote_style is not an accepted value
 539	if ( empty( $quote_style ) ) {
 540		$quote_style = ENT_NOQUOTES;
 541	} elseif ( !in_array( $quote_style, array( 0, 2, 3, 'single', 'double' ), true ) ) {
 542		$quote_style = ENT_QUOTES;
 543	}
 544
 545	// More complete than get_html_translation_table( HTML_SPECIALCHARS )
 546	$single = array( '&#039;'  => '\'', '&#x27;' => '\'' );
 547	$single_preg = array( '/&#0*39;/'  => '&#039;', '/&#x0*27;/i' => '&#x27;' );
 548	$double = array( '&quot;' => '"', '&#034;'  => '"', '&#x22;' => '"' );
 549	$double_preg = array( '/&#0*34;/'  => '&#034;', '/&#x0*22;/i' => '&#x22;' );
 550	$others = array( '&lt;'   => '<', '&#060;'  => '<', '&gt;'   => '>', '&#062;'  => '>', '&amp;'  => '&', '&#038;'  => '&', '&#x26;' => '&' );
 551	$others_preg = array( '/&#0*60;/'  => '&#060;', '/&#0*62;/'  => '&#062;', '/&#0*38;/'  => '&#038;', '/&#x0*26;/i' => '&#x26;' );
 552
 553	if ( $quote_style === ENT_QUOTES ) {
 554		$translation = array_merge( $single, $double, $others );
 555		$translation_preg = array_merge( $single_preg, $double_preg, $others_preg );
 556	} elseif ( $quote_style === ENT_COMPAT || $quote_style === 'double' ) {
 557		$translation = array_merge( $double, $others );
 558		$translation_preg = array_merge( $double_preg, $others_preg );
 559	} elseif ( $quote_style === 'single' ) {
 560		$translation = array_merge( $single, $others );
 561		$translation_preg = array_merge( $single_preg, $others_preg );
 562	} elseif ( $quote_style === ENT_NOQUOTES ) {
 563		$translation = $others;
 564		$translation_preg = $others_preg;
 565	}
 566
 567	// Remove zero padding on numeric entities
 568	$string = preg_replace( array_keys( $translation_preg ), array_values( $translation_preg ), $string );
 569
 570	// Replace characters according to translation table
 571	return strtr( $string, $translation );
 572}
 573
 574/**
 575 * Checks for invalid UTF8 in a string.
 576 *
 577 * @since 2.8.0
 578 *
 579 * @param string $string The text which is to be checked.
 580 * @param boolean $strip Optional. Whether to attempt to strip out invalid UTF8. Default is false.
 581 * @return string The checked text.
 582 */
 583function wp_check_invalid_utf8( $string, $strip = false ) {
 584	$string = (string) $string;
 585
 586	if ( 0 === strlen( $string ) ) {
 587		return '';
 588	}
 589
 590	// Store the site charset as a static to avoid multiple calls to get_option()
 591	static $is_utf8;
 592	if ( !isset( $is_utf8 ) ) {
 593		$is_utf8 = in_array( get_option( 'blog_charset' ), array( 'utf8', 'utf-8', 'UTF8', 'UTF-8' ) );
 594	}
 595	if ( !$is_utf8 ) {
 596		return $string;
 597	}
 598
 599	// Check for support for utf8 in the installed PCRE library once and store the result in a static
 600	static $utf8_pcre;
 601	if ( !isset( $utf8_pcre ) ) {
 602		$utf8_pcre = @preg_match( '/^./u', 'a' );
 603	}
 604	// We can't demand utf8 in the PCRE installation, so just return the string in those cases
 605	if ( !$utf8_pcre ) {
 606		return $string;
 607	}
 608
 609	// preg_match fails when it encounters invalid UTF8 in $string
 610	if ( 1 === @preg_match( '/^./us', $string ) ) {
 611		return $string;
 612	}
 613
 614	// Attempt to strip the bad chars if requested (not recommended)
 615	if ( $strip && function_exists( 'iconv' ) ) {
 616		return iconv( 'utf-8', 'utf-8', $string );
 617	}
 618
 619	return '';
 620}
 621
 622/**
 623 * Encode the Unicode values to be used in the URI.
 624 *
 625 * @since 1.5.0
 626 *
 627 * @param string $utf8_string
 628 * @param int $length Max length of the string
 629 * @return string String with Unicode encoded for URI.
 630 */
 631function utf8_uri_encode( $utf8_string, $length = 0 ) {
 632	$unicode = '';
 633	$values = array();
 634	$num_octets = 1;
 635	$unicode_length = 0;
 636
 637	$string_length = strlen( $utf8_string );
 638	for ($i = 0; $i < $string_length; $i++ ) {
 639
 640		$value = ord( $utf8_string[ $i ] );
 641
 642		if ( $value < 128 ) {
 643			if ( $length && ( $unicode_length >= $length ) )
 644				break;
 645			$unicode .= chr($value);
 646			$unicode_length++;
 647		} else {
 648			if ( count( $values ) == 0 ) $num_octets = ( $value < 224 ) ? 2 : 3;
 649
 650			$values[] = $value;
 651
 652			if ( $length && ( $unicode_length + ($num_octets * 3) ) > $length )
 653				break;
 654			if ( count( $values ) == $num_octets ) {
 655				if ($num_octets == 3) {
 656					$unicode .= '%' . dechex($values[0]) . '%' . dechex($values[1]) . '%' . dechex($values[2]);
 657					$unicode_length += 9;
 658				} else {
 659					$unicode .= '%' . dechex($values[0]) . '%' . dechex($values[1]);
 660					$unicode_length += 6;
 661				}
 662
 663				$values = array();
 664				$num_octets = 1;
 665			}
 666		}
 667	}
 668
 669	return $unicode;
 670}
 671
 672/**
 673 * Converts all accent characters to ASCII characters.
 674 *
 675 * If there are no accent characters, then the string given is just returned.
 676 *
 677 * @since 1.2.1
 678 *
 679 * @param string $string Text that might have accent characters
 680 * @return string Filtered string with replaced "nice" characters.
 681 */
 682function remove_accents($string) {
 683	if ( !preg_match('/[\x80-\xff]/', $string) )
 684		return $string;
 685
 686	if (seems_utf8($string)) {
 687		$chars = array(
 688		// Decompositions for Latin-1 Supplement
 689		chr(194).chr(170) => 'a', chr(194).chr(186) => 'o',
 690		chr(195).chr(128) => 'A', chr(195).chr(129) => 'A',
 691		chr(195).chr(130) => 'A', chr(195).chr(131) => 'A',
 692		chr(195).chr(132) => 'A', chr(195).chr(133) => 'A',
 693		chr(195).chr(134) => 'AE',chr(195).chr(135) => 'C',
 694		chr(195).chr(136) => 'E', chr(195).chr(137) => 'E',
 695		chr(195).chr(138) => 'E', chr(195).chr(139) => 'E',
 696		chr(195).chr(140) => 'I', chr(195).chr(141) => 'I',
 697		chr(195).chr(142) => 'I', chr(195).chr(143) => 'I',
 698		chr(195).chr(144) => 'D', chr(195).chr(145) => 'N',
 699		chr(195).chr(146) => 'O', chr(195).chr(147) => 'O',
 700		chr(195).chr(148) => 'O', chr(195).chr(149) => 'O',
 701		chr(195).chr(150) => 'O', chr(195).chr(153) => 'U',
 702		chr(195).chr(154) => 'U', chr(195).chr(155) => 'U',
 703		chr(195).chr(156) => 'U', chr(195).chr(157) => 'Y',
 704		chr(195).chr(158) => 'TH',chr(195).chr(159) => 's',
 705		chr(195).chr(160) => 'a', chr(195).chr(161) => 'a',
 706		chr(195).chr(162) => 'a', chr(195).chr(163) => 'a',
 707		chr(195).chr(164) => 'a', chr(195).chr(165) => 'a',
 708		chr(195).chr(166) => 'ae',chr(195).chr(167) => 'c',
 709		chr(195).chr(168) => 'e', chr(195).chr(169) => 'e',
 710		chr(195).chr(170) => 'e', chr(195).chr(171) => 'e',
 711		chr(195).chr(172) => 'i', chr(195).chr(173) => 'i',
 712		chr(195).chr(174) => 'i', chr(195).chr(175) => 'i',
 713		chr(195).chr(176) => 'd', chr(195).chr(177) => 'n',
 714		chr(195).chr(178) => 'o', chr(195).chr(179) => 'o',
 715		chr(195).chr(180) => 'o', chr(195).chr(181) => 'o',
 716		chr(195).chr(182) => 'o', chr(195).chr(184) => 'o',
 717		chr(195).chr(185) => 'u', chr(195).chr(186) => 'u',
 718		chr(195).chr(187) => 'u', chr(195).chr(188) => 'u',
 719		chr(195).chr(189) => 'y', chr(195).chr(190) => 'th',
 720		chr(195).chr(191) => 'y', chr(195).chr(152) => 'O',
 721		// Decompositions for Latin Extended-A
 722		chr(196).chr(128) => 'A', chr(196).chr(129) => 'a',
 723		chr(196).chr(130) => 'A', chr(196).chr(131) => 'a',
 724		chr(196).chr(132) => 'A', chr(196).chr(133) => 'a',
 725		chr(196).chr(134) => 'C', chr(196).chr(135) => 'c',
 726		chr(196).chr(136) => 'C', chr(196).chr(137) => 'c',
 727		chr(196).chr(138) => 'C', chr(196).chr(139) => 'c',
 728		chr(196).chr(140) => 'C', chr(196).chr(141) => 'c',
 729		chr(196).chr(142) => 'D', chr(196).chr(143) => 'd',
 730		chr(196).chr(144) => 'D', chr(196).chr(145) => 'd',
 731		chr(196).chr(146) => 'E', chr(196).chr(147) => 'e',
 732		chr(196).chr(148) => 'E', chr(196).chr(149) => 'e',
 733		chr(196).chr(150) => 'E', chr(196).chr(151) => 'e',
 734		chr(196).chr(152) => 'E', chr(196).chr(153) => 'e',
 735		chr(196).chr(154) => 'E', chr(196).chr(155) => 'e',
 736		chr(196).chr(156) => 'G', chr(196).chr(157) => 'g',
 737		chr(196).chr(158) => 'G', chr(196).chr(159) => 'g',
 738		chr(196).chr(160) => 'G', chr(196).chr(161) => 'g',
 739		chr(196).chr(162) => 'G', chr(196).chr(163) => 'g',
 740		chr(196).chr(164) => 'H', chr(196).chr(165) => 'h',
 741		chr(196).chr(166) => 'H', chr(196).chr(167) => 'h',
 742		chr(196).chr(168) => 'I', chr(196).chr(169) => 'i',
 743		chr(196).chr(170) => 'I', chr(196).chr(171) => 'i',
 744		chr(196).chr(172) => 'I', chr(196).chr(173) => 'i',
 745		chr(196).chr(174) => 'I', chr(196).chr(175) => 'i',
 746		chr(196).chr(176) => 'I', chr(196).chr(177) => 'i',
 747		chr(196).chr(178) => 'IJ',chr(196).chr(179) => 'ij',
 748		chr(196).chr(180) => 'J', chr(196).chr(181) => 'j',
 749		chr(196).chr(182) => 'K', chr(196).chr(183) => 'k',
 750		chr(196).chr(184) => 'k', chr(196).chr(185) => 'L',
 751		chr(196).chr(186) => 'l', chr(196).chr(187) => 'L',
 752		chr(196).chr(188) => 'l', chr(196).chr(189) => 'L',
 753		chr(196).chr(190) => 'l', chr(196).chr(191) => 'L',
 754		chr(197).chr(128) => 'l', chr(197).chr(129) => 'L',
 755		chr(197).chr(130) => 'l', chr(197).chr(131) => 'N',
 756		chr(197).chr(132) => 'n', chr(197).chr(133) => 'N',
 757		chr(197).chr(134) => 'n', chr(197).chr(135) => 'N',
 758		chr(197).chr(136) => 'n', chr(197).chr(137) => 'N',
 759		chr(197).chr(138) => 'n', chr(197).chr(139) => 'N',
 760		chr(197).chr(140) => 'O', chr(197).chr(141) => 'o',
 761		chr(197).chr(142) => 'O', chr(197).chr(143) => 'o',
 762		chr(197).chr(144) => 'O', chr(197).chr(145) => 'o',
 763		chr(197).chr(146) => 'OE',chr(197).chr(147) => 'oe',
 764		chr(197).chr(148) => 'R',chr(197).chr(149) => 'r',
 765		chr(197).chr(150) => 'R',chr(197).chr(151) => 'r',
 766		chr(197).chr(152) => 'R',chr(197).chr(153) => 'r',
 767		chr(197).chr(154) => 'S',chr(197).chr(155) => 's',
 768		chr(197).chr(156) => 'S',chr(197).chr(157) => 's',
 769		chr(197).chr(158) => 'S',chr(197).chr(159) => 's',
 770		chr(197).chr(160) => 'S', chr(197).chr(161) => 's',
 771		chr(197).chr(162) => 'T', chr(197).chr(163) => 't',
 772		chr(197).chr(164) => 'T', chr(197).chr(165) => 't',
 773		chr(197).chr(166) => 'T', chr(197).chr(167) => 't',
 774		chr(197).chr(168) => 'U', chr(197).chr(169) => 'u',
 775		chr(197).chr(170) => 'U', chr(197).chr(171) => 'u',
 776		chr(197).chr(172) => 'U', chr(197).chr(173) => 'u',
 777		chr(197).chr(174) => 'U', chr(197).chr(175) => 'u',
 778		chr(197).chr(176) => 'U', chr(197).chr(177) => 'u',
 779		chr(197).chr(178) => 'U', chr(197).chr(179) => 'u',
 780		chr(197).chr(180) => 'W', chr(197).chr(181) => 'w',
 781		chr(197).chr(182) => 'Y', chr(197).chr(183) => 'y',
 782		chr(197).chr(184) => 'Y', chr(197).chr(185) => 'Z',
 783		chr(197).chr(186) => 'z', chr(197).chr(187) => 'Z',
 784		chr(197).chr(188) => 'z', chr(197).chr(189) => 'Z',
 785		chr(197).chr(190) => 'z', chr(197).chr(191) => 's',
 786		// Decompositions for Latin Extended-B
 787		chr(200).chr(152) => 'S', chr(200).chr(153) => 's',
 788		chr(200).chr(154) => 'T', chr(200).chr(155) => 't',
 789		// Euro Sign
 790		chr(226).chr(130).chr(172) => 'E',
 791		// GBP (Pound) Sign
 792		chr(194).chr(163) => '',
 793		// Vowels with diacritic (Vietnamese)
 794		// unmarked
 795		chr(198).chr(160) => 'O', chr(198).chr(161) => 'o',
 796		chr(198).chr(175) => 'U', chr(198).chr(176) => 'u',
 797		// grave accent
 798		chr(225).chr(186).chr(166) => 'A', chr(225).chr(186).chr(167) => 'a',
 799		chr(225).chr(186).chr(176) => 'A', chr(225).chr(186).chr(177) => 'a',
 800		chr(225).chr(187).chr(128) => 'E', chr(225).chr(187).chr(129) => 'e',
 801		chr(225).chr(187).chr(146) => 'O', chr(225).chr(187).chr(147) => 'o',
 802		chr(225).chr(187).chr(156) => 'O', chr(225).chr(187).chr(157) => 'o',
 803		chr(225).chr(187).chr(170) => 'U', chr(225).chr(187).chr(171) => 'u',
 804		chr(225).chr(187).chr(178) => 'Y', chr(225).chr(187).chr(179) => 'y',
 805		// hook
 806		chr(225).chr(186).chr(162) => 'A', chr(225).chr(186).chr(163) => 'a',
 807		chr(225).chr(186).chr(168) => 'A', chr(225).chr(186).chr(169) => 'a',
 808		chr(225).chr(186).chr(178) => 'A', chr(225).chr(186).chr(179) => 'a',
 809		chr(225).chr(186).chr(186) => 'E', chr(225).chr(186).chr(187) => 'e',
 810		chr(225).chr(187).chr(130) => 'E', chr(225).chr(187).chr(131) => 'e',
 811		chr(225).chr(187).chr(136) => 'I', chr(225).chr(187).chr(137) => 'i',
 812		chr(225).chr(187).chr(142) => 'O', chr(225).chr(187).chr(143) => 'o',
 813		chr(225).chr(187).chr(148) => 'O', chr(225).chr(187).chr(149) => 'o',
 814		chr(225).chr(187).chr(158) => 'O', chr(225).chr(187).chr(159) => 'o',
 815		chr(225).chr(187).chr(166) => 'U', chr(225).chr(187).chr(167) => 'u',
 816		chr(225).chr(187).chr(172) => 'U', chr(225).chr(187).chr(173) => 'u',
 817		chr(225).chr(187).chr(182) => 'Y', chr(225).chr(187).chr(183) => 'y',
 818		// tilde
 819		chr(225).chr(186).chr(170) => 'A', chr(225).chr(186).chr(171) => 'a',
 820		chr(225).chr(186).chr(180) => 'A', chr(225).chr(186).chr(181) => 'a',
 821		chr(225).chr(186).chr(188) => 'E', chr(225).chr(186).chr(189) => 'e',
 822		chr(225).chr(187).chr(132) => 'E', chr(225).chr(187).chr(133) => 'e',
 823		chr(225).chr(187).chr(150) => 'O', chr(225).chr(187).chr(151) => 'o',
 824		chr(225).chr(187).chr(160) => 'O', chr(225).chr(187).chr(161) => 'o',
 825		chr(225).chr(187).chr(174) => 'U', chr(225).chr(187).chr(175) => 'u',
 826		chr(225).chr(187).chr(184) => 'Y', chr(225).chr(187).chr(185) => 'y',
 827		// acute accent
 828		chr(225).chr(186).chr(164) => 'A', chr(225).chr(186).chr(165) => 'a',
 829		chr(225).chr(186).chr(174) => 'A', chr(225).chr(186).chr(175) => 'a',
 830		chr(225).chr(186).chr(190) => 'E', chr(225).chr(186).chr(191) => 'e',
 831		chr(225).chr(187).chr(144) => 'O', chr(225).chr(187).chr(145) => 'o',
 832		chr(225).chr(187).chr(154) => 'O', chr(225).chr(187).chr(155) => 'o',
 833		chr(225).chr(187).chr(168) => 'U', chr(225).chr(187).chr(169) => 'u',
 834		// dot below
 835		chr(225).chr(186).chr(160) => 'A', chr(225).chr(186).chr(161) => 'a',
 836		chr(225).chr(186).chr(172) => 'A', chr(225).chr(186).chr(173) => 'a',
 837		chr(225).chr(186).chr(182) => 'A', chr(225).chr(186).chr(183) => 'a',
 838		chr(225).chr(186).chr(184) => 'E', chr(225).chr(186).chr(185) => 'e',
 839		chr(225).chr(187).chr(134) => 'E', chr(225).chr(187).chr(135) => 'e',
 840		chr(225).chr(187).chr(138) => 'I', chr(225).chr(187).chr(139) => 'i',
 841		chr(225).chr(187).chr(140) => 'O', chr(225).chr(187).chr(141) => 'o',
 842		chr(225).chr(187).chr(152) => 'O', chr(225).chr(187).chr(153) => 'o',
 843		chr(225).chr(187).chr(162) => 'O', chr(225).chr(187).chr(163) => 'o',
 844		chr(225).chr(187).chr(164) => 'U', chr(225).chr(187).chr(165) => 'u',
 845		chr(225).chr(187).chr(176) => 'U', chr(225).chr(187).chr(177) => 'u',
 846		chr(225).chr(187).chr(180) => 'Y', chr(225).chr(187).chr(181) => 'y',
 847		// Vowels with diacritic (Chinese, Hanyu Pinyin)
 848		chr(201).chr(145) => 'a',
 849		// macron
 850		chr(199).chr(149) => 'U', chr(199).chr(150) => 'u',
 851		// acute accent
 852		chr(199).chr(151) => 'U', chr(199).chr(152) => 'u',
 853		// caron
 854		chr(199).chr(141) => 'A', chr(199).chr(142) => 'a',
 855		chr(199).chr(143) => 'I', chr(199).chr(144) => 'i',
 856		chr(199).chr(145) => 'O', chr(199).chr(146) => 'o',
 857		chr(199).chr(147) => 'U', chr(199).chr(148) => 'u',
 858		chr(199).chr(153) => 'U', chr(199).chr(154) => 'u',
 859		// grave accent
 860		chr(199).chr(155) => 'U', chr(199).chr(156) => 'u',
 861		);
 862
 863		// Used for locale-specific rules
 864		$locale = get_locale();
 865
 866		if ( 'de_DE' == $locale ) {
 867			$chars[ chr(195).chr(132) ] = 'Ae';
 868			$chars[ chr(195).chr(164) ] = 'ae';
 869			$chars[ chr(195).chr(150) ] = 'Oe';
 870			$chars[ chr(195).chr(182) ] = 'oe';
 871			$chars[ chr(195).chr(156) ] = 'Ue';
 872			$chars[ chr(195).chr(188) ] = 'ue';
 873			$chars[ chr(195).chr(159) ] = 'ss';
 874		} elseif ( 'da_DK' === $locale ) {
 875			$chars[ chr(195).chr(134) ] = 'Ae';
 876 			$chars[ chr(195).chr(166) ] = 'ae';
 877			$chars[ chr(195).chr(152) ] = 'Oe';
 878			$chars[ chr(195).chr(184) ] = 'oe';
 879			$chars[ chr(195).chr(133) ] = 'Aa';
 880			$chars[ chr(195).chr(165) ] = 'aa';
 881		}
 882
 883		$string = strtr($string, $chars);
 884	} else {
 885		// Assume ISO-8859-1 if not UTF-8
 886		$chars['in'] = chr(128).chr(131).chr(138).chr(142).chr(154).chr(158)
 887			.chr(159).chr(162).chr(165).chr(181).chr(192).chr(193).chr(194)
 888			.chr(195).chr(196).chr(197).chr(199).chr(200).chr(201).chr(202)
 889			.chr(203).chr(204).chr(205).chr(206).chr(207).chr(209).chr(210)
 890			.chr(211).chr(212).chr(213).chr(214).chr(216).chr(217).chr(218)
 891			.chr(219).chr(220).chr(221).chr(224).chr(225).chr(226).chr(227)
 892			.chr(228).chr(229).chr(231).chr(232).chr(233).chr(234).chr(235)
 893			.chr(236).chr(237).chr(238).chr(239).chr(241).chr(242).chr(243)
 894			.chr(244).chr(245).chr(246).chr(248).chr(249).chr(250).chr(251)
 895			.chr(252).chr(253).chr(255);
 896
 897		$chars['out'] = "EfSZszYcYuAAAAAACEEEEIIIINOOOOOOUUUUYaaaaaaceeeeiiiinoooooouuuuyy";
 898
 899		$string = strtr($string, $chars['in'], $chars['out']);
 900		$double_chars['in'] = array(chr(140), chr(156), chr(198), chr(208), chr(222), chr(223), chr(230), chr(240), chr(254));
 901		$double_chars['out'] = array('OE', 'oe', 'AE', 'DH', 'TH', 'ss', 'ae', 'dh', 'th');
 902		$string = str_replace($double_chars['in'], $double_chars['out'], $string);
 903	}
 904
 905	return $string;
 906}
 907
 908/**
 909 * Sanitizes a filename, replacing whitespace with dashes.
 910 *
 911 * Removes special characters that are illegal in filenames on certain
 912 * operating systems and special characters requiring special escaping
 913 * to manipulate at the command line. Replaces spaces and consecutive
 914 * dashes with a single dash. Trims period, dash and underscore from beginning
 915 * and end of filename.
 916 *
 917 * @since 2.1.0
 918 *
 919 * @param string $filename The filename to be sanitized
 920 * @return string The sanitized filename
 921 */
 922function sanitize_file_name( $filename ) {
 923	$filename_raw = $filename;
 924	$special_chars = array("?", "[", "]", "/", "\\", "=", "<", ">", ":", ";", ",", "'", "\"", "&", "$", "#", "*", "(", ")", "|", "~", "`", "!", "{", "}", chr(0));
 925	/**
 926	 * Filter the list of characters to remove from a filename.
 927	 *
 928	 * @since 2.8.0
 929	 *
 930	 * @param array  $special_chars Characters to remove.
 931	 * @param string $filename_raw  Filename as it was passed into sanitize_file_name().
 932	 */
 933	$special_chars = apply_filters( 'sanitize_file_name_chars', $special_chars, $filename_raw );
 934	$filename = preg_replace( "#\x{00a0}#siu", ' ', $filename );
 935	$filename = str_replace($special_chars, '', $filename);
 936	$filename = preg_replace('/[\s-]+/', '-', $filename);
 937	$filename = trim($filename, '.-_');
 938
 939	// Split the filename into a base and extension[s]
 940	$parts = explode('.', $filename);
 941
 942	// Return if only one extension
 943	if ( count( $parts ) <= 2 ) {
 944		/**
 945		 * Filter a sanitized filename string.
 946		 *
 947		 * @since 2.8.0
 948		 *
 949		 * @param string $filename     Sanitized filename.
 950		 * @param string $filename_raw The filename prior to sanitization.
 951		 */
 952		return apply_filters( 'sanitize_file_name', $filename, $filename_raw );
 953	}
 954
 955	// Process multiple extensions
 956	$filename = array_shift($parts);
 957	$extension = array_pop($parts);
 958	$mimes = get_allowed_mime_types();
 959
 960	/*
 961	 * Loop over any intermediate extensions. Postfix them with a trailing underscore
 962	 * if they are a 2 - 5 character long alpha string not in the extension whitelist.
 963	 */
 964	foreach ( (array) $parts as $part) {
 965		$filename .= '.' . $part;
 966
 967		if ( preg_match("/^[a-zA-Z]{2,5}\d?$/", $part) ) {
 968			$allowed = false;
 969			foreach ( $mimes as $ext_preg => $mime_match ) {
 970				$ext_preg = '!^(' . $ext_preg . ')$!i';
 971				if ( preg_match( $ext_preg, $part ) ) {
 972					$allowed = true;
 973					break;
 974				}
 975			}
 976			if ( !$allowed )
 977				$filename .= '_';
 978		}
 979	}
 980	$filename .= '.' . $extension;
 981	/** This filter is documented in wp-includes/formatting.php */
 982	return apply_filters('sanitize_file_name', $filename, $filename_raw);
 983}
 984
 985/**
 986 * Sanitizes a username, stripping out unsafe characters.
 987 *
 988 * Removes tags, octets, entities, and if strict is enabled, will only keep
 989 * alphanumeric, _, space, ., -, @. After sanitizing, it passes the username,
 990 * raw username (the username in the parameter), and the value of $strict as
 991 * parameters for the 'sanitize_user' filter.
 992 *
 993 * @since 2.0.0
 994 *
 995 * @param string $username The username to be sanitized.
 996 * @param bool $strict If set limits $username to specific characters. Default false.
 997 * @return string The sanitized username, after passing through filters.
 998 */
 999function sanitize_user( $username, $strict = false ) {
1000	$raw_username = $username;
1001	$username = wp_strip_all_tags( $username );
1002	$username = remove_accents( $username );
1003	// Kill octets
1004	$username = preg_replace( '|%([a-fA-F0-9][a-fA-F0-9])|', '', $username );
1005	$username = preg_replace( '/&.+?;/', '', $username ); // Kill entities
1006
1007	// If strict, reduce to ASCII for max portability.
1008	if ( $strict )
1009		$username = preg_replace( '|[^a-z0-9 _.\-@]|i', '', $username );
1010
1011	$username = trim( $username );
1012	// Consolidate contiguous whitespace
1013	$username = preg_replace( '|\s+|', ' ', $username );
1014
1015	/**
1016	 * Filter a sanitized username string.
1017	 *
1018	 * @since 2.0.1
1019	 *
1020	 * @param string $username     Sanitized username.
1021	 * @param string $raw_username The username prior to sanitization.
1022	 * @param bool   $strict       Whether to limit the sanitization to specific characters. Default false.
1023	 */
1024	return apply_filters( 'sanitize_user', $username, $raw_username, $strict );
1025}
1026
1027/**
1028 * Sanitizes a string key.
1029 *
1030 * Keys are used as internal identifiers. Lowercase alphanumeric characters, dashes and underscores are allowed.
1031 *
1032 * @since 3.0.0
1033 *
1034 * @param string $key String key
1035 * @return string Sanitized key
1036 */
1037function sanitize_key( $key ) {
1038	$raw_key = $key;
1039	$key = strtolower( $key );
1040	$key = preg_replace( '/[^a-z0-9_\-]/', '', $key );
1041
1042	/**
1043	 * Filter a sanitized key string.
1044	 *
1045	 * @since 3.0.0
1046	 *
1047	 * @param string $key     Sanitized key.
1048	 * @param string $raw_key The key prior to sanitization.
1049	 */
1050	return apply_filters( 'sanitize_key', $key, $raw_key );
1051}
1052
1053/**
1054 * Sanitizes a title, or returns a fallback title.
1055 *
1056 * Specifically, HTML and PHP tags are stripped. Further actions can be added
1057 * via the plugin API. If $title is empty and $fallback_title is set, the latter
1058 * will be used.
1059 *
1060 * @since 1.0.0
1061 *
1062 * @param string $title The string to be sanitized.
1063 * @param string $fallback_title Optional. A title to use if $title is empty.
1064 * @param string $context Optional. The operation for which the string is sanitized
1065 * @return string The sanitized string.
1066 */
1067function sanitize_title( $title, $fallback_title = '', $context = 'save' ) {
1068	$raw_title = $title;
1069
1070	if ( 'save' == $context )
1071		$title = remove_accents($title);
1072
1073	/**
1074	 * Filter a sanitized title string.
1075	 *
1076	 * @since 1.2.0
1077	 *
1078	 * @param string $title     Sanitized title.
1079	 * @param string $raw_title The title prior to sanitization.
1080	 * @param string $context   The context for which the title is being sanitized.
1081	 */
1082	$title = apply_filters( 'sanitize_title', $title, $raw_title, $context );
1083
1084	if ( '' === $title || false === $title )
1085		$title = $fallback_title;
1086
1087	return $title;
1088}
1089
1090/**
1091 * Sanitizes a title with the 'query' context.
1092 *
1093 * Used for querying the database for a value from URL.
1094 *
1095 * @since 3.1.0
1096 * @uses sanitize_title()
1097 *
1098 * @param string $title The string to be sanitized.
1099 * @return string The sanitized string.
1100 */
1101function sanitize_title_for_query( $title ) {
1102	return sanitize_title( $title, '', 'query' );
1103}
1104
1105/**
1106 * Sanitizes a title, replacing whitespace and a few other characters with dashes.
1107 *
1108 * Limits the output to alphanumeric characters, underscore (_) and dash (-).
1109 * Whitespace becomes a dash.
1110 *
1111 * @since 1.2.0
1112 *
1113 * @param string $title The title to be sanitized.
1114 * @param string $raw_title Optional. Not used.
1115 * @param string $context Optional. The operation for which the string is sanitized.
1116 * @return string The sanitized title.
1117 */
1118function sanitize_title_with_dashes( $title, $raw_title = '', $context = 'display' ) {
1119	$title = strip_tags($title);
1120	// Preserve escaped octets.
1121	$title = preg_replace('|%([a-fA-F0-9][a-fA-F0-9])|', '---$1---', $title);
1122	// Remove percent signs that are not part of an octet.
1123	$title = str_replace('%', '', $title);
1124	// Restore octets.
1125	$title = preg_replace('|---([a-fA-F0-9][a-fA-F0-9])---|', '%$1', $title);
1126
1127	if (seems_utf8($title)) {
1128		if (function_exists('mb_strtolower')) {
1129			$title = mb_strtolower($title, 'UTF-8');
1130		}
1131		$title = utf8_uri_encode($title, 200);
1132	}
1133
1134	$title = strtolower($title);
1135	$title = preg_replace('/&.+?;/', '', $title); // kill entities
1136	$title = str_replace('.', '-', $title);
1137
1138	if ( 'save' == $context ) {
1139		// Convert nbsp, ndash and mdash to hyphens
1140		$title = str_replace( array( '%c2%a0', '%e2%80%93', '%e2%80%94' ), '-', $title );
1141
1142		// Strip these characters entirely
1143		$title = str_replace( array(
1144			// iexcl and iquest
1145			'%c2%a1', '%c2%bf',
1146			// angle quotes
1147			'%c2%ab', '%c2%bb', '%e2%80%b9', '%e2%80%ba',
1148			// curly quotes
1149			'%e2%80%98', '%e2%80%99', '%e2%80%9c', '%e2%80%9d',
1150			'%e2%80%9a', '%e2%80%9b', '%e2%80%9e', '%e2%80%9f',
1151			// copy, reg, deg, hellip and trade
1152			'%c2%a9', '%c2%ae', '%c2%b0', '%e2%80%a6', '%e2%84%a2',
1153			// acute accents
1154			'%c2%b4', '%cb%8a', '%cc%81', '%cd%81',
1155			// grave accent, macron, caron
1156			'%cc%80', '%cc%84', '%cc%8c',
1157		), '', $title );
1158
1159		// Convert times to x
1160		$title = str_replace( '%c3%97', 'x', $title );
1161	}
1162
1163	$title = preg_replace('/[^%a-z0-9 _-]/', '', $title);
1164	$title = preg_replace('/\s+/', '-', $title);
1165	$title = preg_replace('|-+|', '-', $title);
1166	$title = trim($title, '-');
1167
1168	return $title;
1169}
1170
1171/**
1172 * Ensures a string is a valid SQL order by clause.
1173 *
1174 * Accepts one or more columns, with or without ASC/DESC, and also accepts
1175 * RAND().
1176 *
1177 * @since 2.5.1
1178 *
1179 * @param string $orderby Order by string to be checked.
1180 * @return string|bool Returns the order by clause if it is a match, false otherwise.
1181 */
1182function sanitize_sql_orderby( $orderby ){
1183	preg_match('/^\s*([a-z0-9_]+(\s+(ASC|DESC))?(\s*,\s*|\s*$))+|^\s*RAND\(\s*\)\s*$/i', $orderby, $obmatches);
1184	if ( !$obmatches )
1185		return false;
1186	return $orderby;
1187}
1188
1189/**
1190 * Sanitizes an HTML classname to ensure it only contains valid characters.
1191 *
1192 * Strips the string down to A-Z,a-z,0-9,_,-. If this results in an empty
1193 * string then it will return the alternative value supplied.
1194 *
1195 * @todo Expand to support the full range of CDATA that a class attribute can contain.
1196 *
1197 * @since 2.8.0
1198 *
1199 * @param string $class The classname to be sanitized
1200 * @param string $fallback Optional. The value to return if the sanitization end's up as an empty string.
1201 * 	Defaults to an empty string.
1202 * @return string The sanitized value
1203 */
1204function sanitize_html_class( $class, $fallback = '' ) {
1205	//Strip out any % encoded octets
1206	$sanitized = preg_replace( '|%[a-fA-F0-9][a-fA-F0-9]|', '', $class );
1207
1208	//Limit to A-Z,a-z,0-9,_,-
1209	$sanitized = preg_replace( '/[^A-Za-z0-9_-]/', '', $sanitized );
1210
1211	if ( '' == $sanitized )
1212		$sanitized = $fallback;
1213
1214	/**
1215	 * Filter a sanitized HTML class string.
1216	 *
1217	 * @since 2.8.0
1218	 *
1219	 * @param string $sanitized The sanitized HTML class.
1220	 * @param string $class     HTML class before sanitization.
1221	 * @param string $fallback  The fallback string.
1222	 */
1223	return apply_filters( 'sanitize_html_class', $sanitized, $class, $fallback );
1224}
1225
1226/**
1227 * Converts a number of characters from a string.
1228 *
1229 * Metadata tags <<title>> and <<category>> are removed, <<br>> and <<hr>> are
1230 * converted into correct XHTML and Unicode characters are converted to the
1231 * valid range.
1232 *
1233 * @since 0.71
1234 *
1235 * @param string $content String of characters to be converted.
1236 * @param string $deprecated Not used.
1237 * @return string Converted string.
1238 */
1239function convert_chars($content, $deprecated = '') {
1240	if ( !empty( $deprecated ) )
1241		_deprecated_argument( __FUNCTION__, '0.71' );
1242
1243	// Translation of invalid Unicode references range to valid range
1244	$wp_htmltranswinuni = array(
1245	'&#128;' => '&#8364;', // the Euro sign
1246	'&#129;' => '',
1247	'&#130;' => '&#8218;', // these are Windows CP1252 specific characters
1248	'&#131;' => '&#402;',  // they would look weird on non-Windows browsers
1249	'&#132;' => '&#8222;',
1250	'&#133;' => '&#8230;',
1251	'&#134;' => '&#8224;',
1252	'&#135;' => '&#8225;',
1253	'&#136;' => '&#710;',
1254	'&#137;' => '&#8240;',
1255	'&#138;' => '&#352;',
1256	'&#139;' => '&#8249;',
1257	'&#140;' => '&#338;',
1258	'&#141;' => '',
1259	'&#142;' => '&#381;',
1260	'&#143;' => '',
1261	'&#144;' => '',
1262	'&#145;' => '&#8216;',
1263	'&#146;' => '&#8217;',
1264	'&#147;' => '&#8220;',
1265	'&#148;' => '&#8221;',
1266	'&#149;' => '&#8226;',
1267	'&#150;' => '&#8211;',
1268	'&#151;' => '&#8212;',
1269	'&#152;' => '&#732;',
1270	'&#153;' => '&#8482;',
1271	'&#154;' => '&#353;',
1272	'&#155;' => '&#8250;',
1273	'&#156;' => '&#339;',
1274	'&#157;' => '',
1275	'&#158;' => '&#382;',
1276	'&#159;' => '&#376;'
1277	);
1278
1279	// Remove metadata tags
1280	$content = preg_replace('/<title>(.+?)<\/title>/','',$content);
1281	$content = preg_replace('/<category>(.+?)<\/category>/','',$content);
1282
1283	// Converts lone & characters into &#38; (a.k.a. &amp;)
1284	$content = preg_replace('/&([^#])(?![a-z1-4]{1,8};)/i', '&#038;$1', $content);
1285
1286	// Fix Word pasting
1287	$content = strtr($content, $wp_htmltranswinuni);
1288
1289	// Just a little XHTML help
1290	$content = str_replace('<br>', '<br />', $content);
1291	$content = str_replace('<hr>', '<hr />', $content);
1292
1293	return $content;
1294}
1295
1296/**
1297 * Balances tags if forced to, or if the 'use_balanceTags' option is set to true.
1298 *
1299 * @since 0.71
1300 *
1301 * @param string $text Text to be balanced
1302 * @param bool $force If true, forces balancing, ignoring the value of the option. Default false.
1303 * @return string Balanced text
1304 */
1305function balanceTags( $text, $force = false ) {
1306	if ( $force || get_option('use_balanceTags') == 1 ) {
1307		return force_balance_tags( $text );
1308	} else {
1309		return $text;
1310	}
1311}
1312
1313/**
1314 * Balances tags of string using a modified stack.
1315 *
1316 * @since 2.0.4
1317 *
1318 * @author Leonard Lin <leonard@acm.org>
1319 * @license GPL
1320 * @copyright November 4, 2001
1321 * @version 1.1
1322 * @todo Make better - change loop condition to $text in 1.2
1323 * @internal Modified by Scott Reilly (coffee2code) 02 Aug 2004
1324 *		1.1  Fixed handling of append/stack pop order of end text
1325 *			 Added Cleaning Hooks
1326 *		1.0  First Version
1327 *
1328 * @param string $text Text to be balanced.
1329 * @return string Balanced text.
1330 */
1331function force_balance_tags( $text ) {
1332	$tagstack = array();
1333	$stacksize = 0;
1334	$tagqueue = '';
1335	$newtext = '';
1336	// Known single-entity/self-closing tags
1337	$single_tags = array( 'area', 'base', 'basefont', 'br', 'col', 'command', 'embed', 'frame', 'hr', 'img', 'input', 'isindex', 'link', 'meta', 'param', 'source' );
1338	// Tags that can be immediately nested within themselves
1339	$nestable_tags = array( 'blockquote', 'div', 'object', 'q', 'span' );
1340
1341	// WP bug fix for comments - in case you REALLY meant to type '< !--'
1342	$text = str_replace('< !--', '<    !--', $text);
1343	// WP bug fix for LOVE <3 (and other situations with '<' before a number)
1344	$text = preg_replace('#<([0-9]{1})#', '&lt;$1', $text);
1345
1346	while ( preg_match("/<(\/?[\w:]*)\s*([^>]*)>/", $text, $regex) ) {
1347		$newtext .= $tagqueue;
1348
1349		$i = strpos($text, $regex[0]);
1350		$l = strlen($regex[0]);
1351
1352		// clear the shifter
1353		$tagqueue = '';
1354		// Pop or Push
1355		if ( isset($regex[1][0]) && '/' == $regex[1][0] ) { // End Tag
1356			$tag = strtolower(substr($regex[1],1));
1357			// if too many closing tags
1358			if( $stacksiā€¦

Large files files are truncated, but you can click here to view the full file