/Web/wp-includes/formatting.php
PHP | 3247 lines | 1970 code | 266 blank | 1011 comment | 258 complexity | 1b93561121cca8f46a85834b014fb5fc MD5 | raw file
Possible License(s): GPL-2.0, GPL-3.0, AGPL-1.0, LGPL-2.1
Large files files are truncated, but you can click here to view the full file
1<?php 2/** 3 * Main WordPress Formatting API. 4 * 5 * Handles many functions for formatting output. 6 * 7 * @package WordPress 8 **/ 9 10/** 11 * Replaces common plain text characters into formatted entities 12 * 13 * As an example, 14 * <code> 15 * 'cause today's effort makes it worth tomorrow's "holiday"... 16 * </code> 17 * Becomes: 18 * <code> 19 * ’cause today’s effort makes it worth tomorrow’s “holiday”… 20 * </code> 21 * Code within certain html blocks are skipped. 22 * 23 * @since 0.71 24 * @uses $wp_cockneyreplace Array of formatted entities for certain common phrases 25 * 26 * @param string $text The text to be formatted 27 * @return string The string replaced with html entities 28 */ 29function wptexturize($text) { 30 global $wp_cockneyreplace; 31 static $static_characters, $static_replacements, $dynamic_characters, $dynamic_replacements, 32 $default_no_texturize_tags, $default_no_texturize_shortcodes; 33 34 // No need to set up these static variables more than once 35 if ( ! isset( $static_characters ) ) { 36 /* translators: opening curly double quote */ 37 $opening_quote = _x( '“', 'opening curly double quote' ); 38 /* translators: closing curly double quote */ 39 $closing_quote = _x( '”', 'closing curly double quote' ); 40 41 /* translators: apostrophe, for example in 'cause or can't */ 42 $apos = _x( '’', 'apostrophe' ); 43 44 /* translators: prime, for example in 9' (nine feet) */ 45 $prime = _x( '′', 'prime' ); 46 /* translators: double prime, for example in 9" (nine inches) */ 47 $double_prime = _x( '″', 'double prime' ); 48 49 /* translators: opening curly single quote */ 50 $opening_single_quote = _x( '‘', 'opening curly single quote' ); 51 /* translators: closing curly single quote */ 52 $closing_single_quote = _x( '’', 'closing curly single quote' ); 53 54 /* translators: en dash */ 55 $en_dash = _x( '–', 'en dash' ); 56 /* translators: em dash */ 57 $em_dash = _x( '—', 'em dash' ); 58 59 $default_no_texturize_tags = array('pre', 'code', 'kbd', 'style', 'script', 'tt'); 60 $default_no_texturize_shortcodes = array('code'); 61 62 // if a plugin has provided an autocorrect array, use it 63 if ( isset($wp_cockneyreplace) ) { 64 $cockney = array_keys($wp_cockneyreplace); 65 $cockneyreplace = array_values($wp_cockneyreplace); 66 } elseif ( "'" != $apos ) { // Only bother if we're doing a replacement. 67 $cockney = array( "'tain't", "'twere", "'twas", "'tis", "'twill", "'til", "'bout", "'nuff", "'round", "'cause" ); 68 $cockneyreplace = array( $apos . "tain" . $apos . "t", $apos . "twere", $apos . "twas", $apos . "tis", $apos . "twill", $apos . "til", $apos . "bout", $apos . "nuff", $apos . "round", $apos . "cause" ); 69 } else { 70 $cockney = $cockneyreplace = array(); 71 } 72 73 $static_characters = array_merge( array( '---', ' -- ', '--', ' - ', 'xn–', '...', '``', '\'\'', ' (tm)' ), $cockney ); 74 $static_replacements = array_merge( array( $em_dash, ' ' . $em_dash . ' ', $en_dash, ' ' . $en_dash . ' ', 'xn--', '…', $opening_quote, $closing_quote, ' ™' ), $cockneyreplace ); 75 76 $dynamic = array(); 77 if ( "'" != $apos ) { 78 $dynamic[ '/\'(\d\d(?:’|\')?s)/' ] = $apos . '$1'; // '99's 79 $dynamic[ '/\'(\d)/' ] = $apos . '$1'; // '99 80 } 81 if ( "'" != $opening_single_quote ) 82 $dynamic[ '/(\s|\A|[([{<]|")\'/' ] = '$1' . $opening_single_quote; // opening single quote, even after (, {, <, [ 83 if ( '"' != $double_prime ) 84 $dynamic[ '/(\d)"/' ] = '$1' . $double_prime; // 9" (double prime) 85 if ( "'" != $prime ) 86 $dynamic[ '/(\d)\'/' ] = '$1' . $prime; // 9' (prime) 87 if ( "'" != $apos ) 88 $dynamic[ '/(\S)\'([^\'\s])/' ] = '$1' . $apos . '$2'; // apostrophe in a word 89 if ( '"' != $opening_quote ) 90 $dynamic[ '/(\s|\A|[([{<])"(?!\s)/' ] = '$1' . $opening_quote . '$2'; // opening double quote, even after (, {, <, [ 91 if ( '"' != $closing_quote ) 92 $dynamic[ '/"(\s|\S|\Z)/' ] = $closing_quote . '$1'; // closing double quote 93 if ( "'" != $closing_single_quote ) 94 $dynamic[ '/\'([\s.]|\Z)/' ] = $closing_single_quote . '$1'; // closing single quote 95 96 $dynamic[ '/\b(\d+)x(\d+)\b/' ] = '$1×$2'; // 9x9 (times) 97 98 $dynamic_characters = array_keys( $dynamic ); 99 $dynamic_replacements = array_values( $dynamic ); 100 } 101 102 // Transform into regexp sub-expression used in _wptexturize_pushpop_element 103 // Must do this everytime in case plugins use these filters in a context sensitive manner 104 $no_texturize_tags = '(' . implode('|', apply_filters('no_texturize_tags', $default_no_texturize_tags) ) . ')'; 105 $no_texturize_shortcodes = '(' . implode('|', apply_filters('no_texturize_shortcodes', $default_no_texturize_shortcodes) ) . ')'; 106 107 $no_texturize_tags_stack = array(); 108 $no_texturize_shortcodes_stack = array(); 109 110 $textarr = preg_split('/(<.*>|\[.*\])/Us', $text, -1, PREG_SPLIT_DELIM_CAPTURE); 111 112 foreach ( $textarr as &$curl ) { 113 if ( empty( $curl ) ) 114 continue; 115 116 // Only call _wptexturize_pushpop_element if first char is correct tag opening 117 $first = $curl[0]; 118 if ( '<' === $first ) { 119 _wptexturize_pushpop_element($curl, $no_texturize_tags_stack, $no_texturize_tags, '<', '>'); 120 } elseif ( '[' === $first ) { 121 _wptexturize_pushpop_element($curl, $no_texturize_shortcodes_stack, $no_texturize_shortcodes, '[', ']'); 122 } elseif ( empty($no_texturize_shortcodes_stack) && empty($no_texturize_tags_stack) ) { 123 // This is not a tag, nor is the texturization disabled static strings 124 $curl = str_replace($static_characters, $static_replacements, $curl); 125 // regular expressions 126 $curl = preg_replace($dynamic_characters, $dynamic_replacements, $curl); 127 } 128 $curl = preg_replace('/&([^#])(?![a-zA-Z1-4]{1,8};)/', '&$1', $curl); 129 } 130 return implode( '', $textarr ); 131} 132 133/** 134 * Search for disabled element tags. Push element to stack on tag open and pop 135 * on tag close. Assumes first character of $text is tag opening. 136 * 137 * @access private 138 * @since 2.9.0 139 * 140 * @param string $text Text to check. First character is assumed to be $opening 141 * @param array $stack Array used as stack of opened tag elements 142 * @param string $disabled_elements Tags to match against formatted as regexp sub-expression 143 * @param string $opening Tag opening character, assumed to be 1 character long 144 * @param string $opening Tag closing character 145 * @return object 146 */ 147function _wptexturize_pushpop_element($text, &$stack, $disabled_elements, $opening = '<', $closing = '>') { 148 // Check if it is a closing tag -- otherwise assume opening tag 149 if (strncmp($opening . '/', $text, 2)) { 150 // Opening? Check $text+1 against disabled elements 151 if (preg_match('/^' . $disabled_elements . '\b/', substr($text, 1), $matches)) { 152 /* 153 * This disables texturize until we find a closing tag of our type 154 * (e.g. <pre>) even if there was invalid nesting before that 155 * 156 * Example: in the case <pre>sadsadasd</code>"baba"</pre> 157 * "baba" won't be texturize 158 */ 159 160 array_push($stack, $matches[1]); 161 } 162 } else { 163 // Closing? Check $text+2 against disabled elements 164 $c = preg_quote($closing, '/'); 165 if (preg_match('/^' . $disabled_elements . $c . '/', substr($text, 2), $matches)) { 166 $last = array_pop($stack); 167 168 // Make sure it matches the opening tag 169 if ($last != $matches[1]) 170 array_push($stack, $last); 171 } 172 } 173} 174 175/** 176 * Replaces double line-breaks with paragraph elements. 177 * 178 * A group of regex replaces used to identify text formatted with newlines and 179 * replace double line-breaks with HTML paragraph tags. The remaining 180 * line-breaks after conversion become <<br />> tags, unless $br is set to '0' 181 * or 'false'. 182 * 183 * @since 0.71 184 * 185 * @param string $pee The text which has to be formatted. 186 * @param bool $br Optional. If set, this will convert all remaining line-breaks after paragraphing. Default true. 187 * @return string Text which has been converted into correct paragraph tags. 188 */ 189function wpautop($pee, $br = true) { 190 $pre_tags = array(); 191 192 if ( trim($pee) === '' ) 193 return ''; 194 195 $pee = $pee . "\n"; // just to make things a little easier, pad the end 196 197 if ( strpos($pee, '<pre') !== false ) { 198 $pee_parts = explode( '</pre>', $pee ); 199 $last_pee = array_pop($pee_parts); 200 $pee = ''; 201 $i = 0; 202 203 foreach ( $pee_parts as $pee_part ) { 204 $start = strpos($pee_part, '<pre'); 205 206 // Malformed html? 207 if ( $start === false ) { 208 $pee .= $pee_part; 209 continue; 210 } 211 212 $name = "<pre wp-pre-tag-$i></pre>"; 213 $pre_tags[$name] = substr( $pee_part, $start ) . '</pre>'; 214 215 $pee .= substr( $pee_part, 0, $start ) . $name; 216 $i++; 217 } 218 219 $pee .= $last_pee; 220 } 221 222 $pee = preg_replace('|<br />\s*<br />|', "\n\n", $pee); 223 // Space things out a little 224 $allblocks = '(?:table|thead|tfoot|caption|col|colgroup|tbody|tr|td|th|div|dl|dd|dt|ul|ol|li|pre|select|option|form|map|area|blockquote|address|math|style|p|h[1-6]|hr|fieldset|legend|section|article|aside|hgroup|header|footer|nav|figure|figcaption|details|menu|summary)'; 225 $pee = preg_replace('!(<' . $allblocks . '[^>]*>)!', "\n$1", $pee); 226 $pee = preg_replace('!(</' . $allblocks . '>)!', "$1\n\n", $pee); 227 $pee = str_replace(array("\r\n", "\r"), "\n", $pee); // cross-platform newlines 228 if ( strpos($pee, '<object') !== false ) { 229 $pee = preg_replace('|\s*<param([^>]*)>\s*|', "<param$1>", $pee); // no pee inside object/embed 230 $pee = preg_replace('|\s*</embed>\s*|', '</embed>', $pee); 231 } 232 $pee = preg_replace("/\n\n+/", "\n\n", $pee); // take care of duplicates 233 // make paragraphs, including one at the end 234 $pees = preg_split('/\n\s*\n/', $pee, -1, PREG_SPLIT_NO_EMPTY); 235 $pee = ''; 236 foreach ( $pees as $tinkle ) 237 $pee .= '<p>' . trim($tinkle, "\n") . "</p>\n"; 238 $pee = preg_replace('|<p>\s*</p>|', '', $pee); // under certain strange conditions it could create a P of entirely whitespace 239 $pee = preg_replace('!<p>([^<]+)</(div|address|form)>!', "<p>$1</p></$2>", $pee); 240 $pee = preg_replace('!<p>\s*(</?' . $allblocks . '[^>]*>)\s*</p>!', "$1", $pee); // don't pee all over a tag 241 $pee = preg_replace("|<p>(<li.+?)</p>|", "$1", $pee); // problem with nested lists 242 $pee = preg_replace('|<p><blockquote([^>]*)>|i', "<blockquote$1><p>", $pee); 243 $pee = str_replace('</blockquote></p>', '</p></blockquote>', $pee); 244 $pee = preg_replace('!<p>\s*(</?' . $allblocks . '[^>]*>)!', "$1", $pee); 245 $pee = preg_replace('!(</?' . $allblocks . '[^>]*>)\s*</p>!', "$1", $pee); 246 if ( $br ) { 247 $pee = preg_replace_callback('/<(script|style).*?<\/\\1>/s', '_autop_newline_preservation_helper', $pee); 248 $pee = preg_replace('|(?<!<br />)\s*\n|', "<br />\n", $pee); // optionally make line breaks 249 $pee = str_replace('<WPPreserveNewline />', "\n", $pee); 250 } 251 $pee = preg_replace('!(</?' . $allblocks . '[^>]*>)\s*<br />!', "$1", $pee); 252 $pee = preg_replace('!<br />(\s*</?(?:p|li|div|dl|dd|dt|th|pre|td|ul|ol)[^>]*>)!', '$1', $pee); 253 $pee = preg_replace( "|\n</p>$|", '</p>', $pee ); 254 255 if ( !empty($pre_tags) ) 256 $pee = str_replace(array_keys($pre_tags), array_values($pre_tags), $pee); 257 258 return $pee; 259} 260 261/** 262 * Newline preservation help function for wpautop 263 * 264 * @since 3.1.0 265 * @access private 266 * @param array $matches preg_replace_callback matches array 267 * @returns string 268 */ 269function _autop_newline_preservation_helper( $matches ) { 270 return str_replace("\n", "<WPPreserveNewline />", $matches[0]); 271} 272 273/** 274 * Don't auto-p wrap shortcodes that stand alone 275 * 276 * Ensures that shortcodes are not wrapped in <<p>>...<</p>>. 277 * 278 * @since 2.9.0 279 * 280 * @param string $pee The content. 281 * @return string The filtered content. 282 */ 283function shortcode_unautop( $pee ) { 284 global $shortcode_tags; 285 286 if ( empty( $shortcode_tags ) || !is_array( $shortcode_tags ) ) { 287 return $pee; 288 } 289 290 $tagregexp = join( '|', array_map( 'preg_quote', array_keys( $shortcode_tags ) ) ); 291 292 $pattern = 293 '/' 294 . '<p>' // Opening paragraph 295 . '\\s*+' // Optional leading whitespace 296 . '(' // 1: The shortcode 297 . '\\[' // Opening bracket 298 . "($tagregexp)" // 2: Shortcode name 299 . '\\b' // Word boundary 300 // Unroll the loop: Inside the opening shortcode tag 301 . '[^\\]\\/]*' // Not a closing bracket or forward slash 302 . '(?:' 303 . '\\/(?!\\])' // A forward slash not followed by a closing bracket 304 . '[^\\]\\/]*' // Not a closing bracket or forward slash 305 . ')*?' 306 . '(?:' 307 . '\\/\\]' // Self closing tag and closing bracket 308 . '|' 309 . '\\]' // Closing bracket 310 . '(?:' // Unroll the loop: Optionally, anything between the opening and closing shortcode tags 311 . '[^\\[]*+' // Not an opening bracket 312 . '(?:' 313 . '\\[(?!\\/\\2\\])' // An opening bracket not followed by the closing shortcode tag 314 . '[^\\[]*+' // Not an opening bracket 315 . ')*+' 316 . '\\[\\/\\2\\]' // Closing shortcode tag 317 . ')?' 318 . ')' 319 . ')' 320 . '\\s*+' // optional trailing whitespace 321 . '<\\/p>' // closing paragraph 322 . '/s'; 323 324 return preg_replace( $pattern, '$1', $pee ); 325} 326 327/** 328 * Checks to see if a string is utf8 encoded. 329 * 330 * NOTE: This function checks for 5-Byte sequences, UTF8 331 * has Bytes Sequences with a maximum length of 4. 332 * 333 * @author bmorel at ssi dot fr (modified) 334 * @since 1.2.1 335 * 336 * @param string $str The string to be checked 337 * @return bool True if $str fits a UTF-8 model, false otherwise. 338 */ 339function seems_utf8($str) { 340 $length = strlen($str); 341 for ($i=0; $i < $length; $i++) { 342 $c = ord($str[$i]); 343 if ($c < 0x80) $n = 0; # 0bbbbbbb 344 elseif (($c & 0xE0) == 0xC0) $n=1; # 110bbbbb 345 elseif (($c & 0xF0) == 0xE0) $n=2; # 1110bbbb 346 elseif (($c & 0xF8) == 0xF0) $n=3; # 11110bbb 347 elseif (($c & 0xFC) == 0xF8) $n=4; # 111110bb 348 elseif (($c & 0xFE) == 0xFC) $n=5; # 1111110b 349 else return false; # Does not match any model 350 for ($j=0; $j<$n; $j++) { # n bytes matching 10bbbbbb follow ? 351 if ((++$i == $length) || ((ord($str[$i]) & 0xC0) != 0x80)) 352 return false; 353 } 354 } 355 return true; 356} 357 358/** 359 * Converts a number of special characters into their HTML entities. 360 * 361 * Specifically deals with: &, <, >, ", and '. 362 * 363 * $quote_style can be set to ENT_COMPAT to encode " to 364 * ", or ENT_QUOTES to do both. Default is ENT_NOQUOTES where no quotes are encoded. 365 * 366 * @since 1.2.2 367 * 368 * @param string $string The text which is to be encoded. 369 * @param mixed $quote_style Optional. Converts double quotes if set to ENT_COMPAT, both single and double if set to ENT_QUOTES or none if set to ENT_NOQUOTES. Also compatible with old values; converting single quotes if set to 'single', double if set to 'double' or both if otherwise set. Default is ENT_NOQUOTES. 370 * @param string $charset Optional. The character encoding of the string. Default is false. 371 * @param boolean $double_encode Optional. Whether to encode existing html entities. Default is false. 372 * @return string The encoded text with HTML entities. 373 */ 374function _wp_specialchars( $string, $quote_style = ENT_NOQUOTES, $charset = false, $double_encode = false ) { 375 $string = (string) $string; 376 377 if ( 0 === strlen( $string ) ) 378 return ''; 379 380 // Don't bother if there are no specialchars - saves some processing 381 if ( ! preg_match( '/[&<>"\']/', $string ) ) 382 return $string; 383 384 // Account for the previous behaviour of the function when the $quote_style is not an accepted value 385 if ( empty( $quote_style ) ) 386 $quote_style = ENT_NOQUOTES; 387 elseif ( ! in_array( $quote_style, array( 0, 2, 3, 'single', 'double' ), true ) ) 388 $quote_style = ENT_QUOTES; 389 390 // Store the site charset as a static to avoid multiple calls to wp_load_alloptions() 391 if ( ! $charset ) { 392 static $_charset; 393 if ( ! isset( $_charset ) ) { 394 $alloptions = wp_load_alloptions(); 395 $_charset = isset( $alloptions['blog_charset'] ) ? $alloptions['blog_charset'] : ''; 396 } 397 $charset = $_charset; 398 } 399 400 if ( in_array( $charset, array( 'utf8', 'utf-8', 'UTF8' ) ) ) 401 $charset = 'UTF-8'; 402 403 $_quote_style = $quote_style; 404 405 if ( $quote_style === 'double' ) { 406 $quote_style = ENT_COMPAT; 407 $_quote_style = ENT_COMPAT; 408 } elseif ( $quote_style === 'single' ) { 409 $quote_style = ENT_NOQUOTES; 410 } 411 412 // Handle double encoding ourselves 413 if ( $double_encode ) { 414 $string = @htmlspecialchars( $string, $quote_style, $charset ); 415 } else { 416 // Decode & into & 417 $string = wp_specialchars_decode( $string, $_quote_style ); 418 419 // Guarantee every &entity; is valid or re-encode the & 420 $string = wp_kses_normalize_entities( $string ); 421 422 // Now re-encode everything except &entity; 423 $string = preg_split( '/(&#?x?[0-9a-z]+;)/i', $string, -1, PREG_SPLIT_DELIM_CAPTURE ); 424 425 for ( $i = 0; $i < count( $string ); $i += 2 ) 426 $string[$i] = @htmlspecialchars( $string[$i], $quote_style, $charset ); 427 428 $string = implode( '', $string ); 429 } 430 431 // Backwards compatibility 432 if ( 'single' === $_quote_style ) 433 $string = str_replace( "'", ''', $string ); 434 435 return $string; 436} 437 438/** 439 * Converts a number of HTML entities into their special characters. 440 * 441 * Specifically deals with: &, <, >, ", and '. 442 * 443 * $quote_style can be set to ENT_COMPAT to decode " entities, 444 * or ENT_QUOTES to do both " and '. Default is ENT_NOQUOTES where no quotes are decoded. 445 * 446 * @since 2.8 447 * 448 * @param string $string The text which is to be decoded. 449 * @param mixed $quote_style Optional. Converts double quotes if set to ENT_COMPAT, both single and double if set to ENT_QUOTES or none if set to ENT_NOQUOTES. Also compatible with old _wp_specialchars() values; converting single quotes if set to 'single', double if set to 'double' or both if otherwise set. Default is ENT_NOQUOTES. 450 * @return string The decoded text without HTML entities. 451 */ 452function wp_specialchars_decode( $string, $quote_style = ENT_NOQUOTES ) { 453 $string = (string) $string; 454 455 if ( 0 === strlen( $string ) ) { 456 return ''; 457 } 458 459 // Don't bother if there are no entities - saves a lot of processing 460 if ( strpos( $string, '&' ) === false ) { 461 return $string; 462 } 463 464 // Match the previous behaviour of _wp_specialchars() when the $quote_style is not an accepted value 465 if ( empty( $quote_style ) ) { 466 $quote_style = ENT_NOQUOTES; 467 } elseif ( !in_array( $quote_style, array( 0, 2, 3, 'single', 'double' ), true ) ) { 468 $quote_style = ENT_QUOTES; 469 } 470 471 // More complete than get_html_translation_table( HTML_SPECIALCHARS ) 472 $single = array( ''' => '\'', ''' => '\'' ); 473 $single_preg = array( '/�*39;/' => ''', '/�*27;/i' => ''' ); 474 $double = array( '"' => '"', '"' => '"', '"' => '"' ); 475 $double_preg = array( '/�*34;/' => '"', '/�*22;/i' => '"' ); 476 $others = array( '<' => '<', '<' => '<', '>' => '>', '>' => '>', '&' => '&', '&' => '&', '&' => '&' ); 477 $others_preg = array( '/�*60;/' => '<', '/�*62;/' => '>', '/�*38;/' => '&', '/�*26;/i' => '&' ); 478 479 if ( $quote_style === ENT_QUOTES ) { 480 $translation = array_merge( $single, $double, $others ); 481 $translation_preg = array_merge( $single_preg, $double_preg, $others_preg ); 482 } elseif ( $quote_style === ENT_COMPAT || $quote_style === 'double' ) { 483 $translation = array_merge( $double, $others ); 484 $translation_preg = array_merge( $double_preg, $others_preg ); 485 } elseif ( $quote_style === 'single' ) { 486 $translation = array_merge( $single, $others ); 487 $translation_preg = array_merge( $single_preg, $others_preg ); 488 } elseif ( $quote_style === ENT_NOQUOTES ) { 489 $translation = $others; 490 $translation_preg = $others_preg; 491 } 492 493 // Remove zero padding on numeric entities 494 $string = preg_replace( array_keys( $translation_preg ), array_values( $translation_preg ), $string ); 495 496 // Replace characters according to translation table 497 return strtr( $string, $translation ); 498} 499 500/** 501 * Checks for invalid UTF8 in a string. 502 * 503 * @since 2.8 504 * 505 * @param string $string The text which is to be checked. 506 * @param boolean $strip Optional. Whether to attempt to strip out invalid UTF8. Default is false. 507 * @return string The checked text. 508 */ 509function wp_check_invalid_utf8( $string, $strip = false ) { 510 $string = (string) $string; 511 512 if ( 0 === strlen( $string ) ) { 513 return ''; 514 } 515 516 // Store the site charset as a static to avoid multiple calls to get_option() 517 static $is_utf8; 518 if ( !isset( $is_utf8 ) ) { 519 $is_utf8 = in_array( get_option( 'blog_charset' ), array( 'utf8', 'utf-8', 'UTF8', 'UTF-8' ) ); 520 } 521 if ( !$is_utf8 ) { 522 return $string; 523 } 524 525 // Check for support for utf8 in the installed PCRE library once and store the result in a static 526 static $utf8_pcre; 527 if ( !isset( $utf8_pcre ) ) { 528 $utf8_pcre = @preg_match( '/^./u', 'a' ); 529 } 530 // We can't demand utf8 in the PCRE installation, so just return the string in those cases 531 if ( !$utf8_pcre ) { 532 return $string; 533 } 534 535 // preg_match fails when it encounters invalid UTF8 in $string 536 if ( 1 === @preg_match( '/^./us', $string ) ) { 537 return $string; 538 } 539 540 // Attempt to strip the bad chars if requested (not recommended) 541 if ( $strip && function_exists( 'iconv' ) ) { 542 return iconv( 'utf-8', 'utf-8', $string ); 543 } 544 545 return ''; 546} 547 548/** 549 * Encode the Unicode values to be used in the URI. 550 * 551 * @since 1.5.0 552 * 553 * @param string $utf8_string 554 * @param int $length Max length of the string 555 * @return string String with Unicode encoded for URI. 556 */ 557function utf8_uri_encode( $utf8_string, $length = 0 ) { 558 $unicode = ''; 559 $values = array(); 560 $num_octets = 1; 561 $unicode_length = 0; 562 563 $string_length = strlen( $utf8_string ); 564 for ($i = 0; $i < $string_length; $i++ ) { 565 566 $value = ord( $utf8_string[ $i ] ); 567 568 if ( $value < 128 ) { 569 if ( $length && ( $unicode_length >= $length ) ) 570 break; 571 $unicode .= chr($value); 572 $unicode_length++; 573 } else { 574 if ( count( $values ) == 0 ) $num_octets = ( $value < 224 ) ? 2 : 3; 575 576 $values[] = $value; 577 578 if ( $length && ( $unicode_length + ($num_octets * 3) ) > $length ) 579 break; 580 if ( count( $values ) == $num_octets ) { 581 if ($num_octets == 3) { 582 $unicode .= '%' . dechex($values[0]) . '%' . dechex($values[1]) . '%' . dechex($values[2]); 583 $unicode_length += 9; 584 } else { 585 $unicode .= '%' . dechex($values[0]) . '%' . dechex($values[1]); 586 $unicode_length += 6; 587 } 588 589 $values = array(); 590 $num_octets = 1; 591 } 592 } 593 } 594 595 return $unicode; 596} 597 598/** 599 * Converts all accent characters to ASCII characters. 600 * 601 * If there are no accent characters, then the string given is just returned. 602 * 603 * @since 1.2.1 604 * 605 * @param string $string Text that might have accent characters 606 * @return string Filtered string with replaced "nice" characters. 607 */ 608function remove_accents($string) { 609 if ( !preg_match('/[\x80-\xff]/', $string) ) 610 return $string; 611 612 if (seems_utf8($string)) { 613 $chars = array( 614 // Decompositions for Latin-1 Supplement 615 chr(194).chr(170) => 'a', chr(194).chr(186) => 'o', 616 chr(195).chr(128) => 'A', chr(195).chr(129) => 'A', 617 chr(195).chr(130) => 'A', chr(195).chr(131) => 'A', 618 chr(195).chr(132) => 'A', chr(195).chr(133) => 'A', 619 chr(195).chr(134) => 'AE',chr(195).chr(135) => 'C', 620 chr(195).chr(136) => 'E', chr(195).chr(137) => 'E', 621 chr(195).chr(138) => 'E', chr(195).chr(139) => 'E', 622 chr(195).chr(140) => 'I', chr(195).chr(141) => 'I', 623 chr(195).chr(142) => 'I', chr(195).chr(143) => 'I', 624 chr(195).chr(144) => 'D', chr(195).chr(145) => 'N', 625 chr(195).chr(146) => 'O', chr(195).chr(147) => 'O', 626 chr(195).chr(148) => 'O', chr(195).chr(149) => 'O', 627 chr(195).chr(150) => 'O', chr(195).chr(153) => 'U', 628 chr(195).chr(154) => 'U', chr(195).chr(155) => 'U', 629 chr(195).chr(156) => 'U', chr(195).chr(157) => 'Y', 630 chr(195).chr(158) => 'TH',chr(195).chr(159) => 's', 631 chr(195).chr(160) => 'a', chr(195).chr(161) => 'a', 632 chr(195).chr(162) => 'a', chr(195).chr(163) => 'a', 633 chr(195).chr(164) => 'a', chr(195).chr(165) => 'a', 634 chr(195).chr(166) => 'ae',chr(195).chr(167) => 'c', 635 chr(195).chr(168) => 'e', chr(195).chr(169) => 'e', 636 chr(195).chr(170) => 'e', chr(195).chr(171) => 'e', 637 chr(195).chr(172) => 'i', chr(195).chr(173) => 'i', 638 chr(195).chr(174) => 'i', chr(195).chr(175) => 'i', 639 chr(195).chr(176) => 'd', chr(195).chr(177) => 'n', 640 chr(195).chr(178) => 'o', chr(195).chr(179) => 'o', 641 chr(195).chr(180) => 'o', chr(195).chr(181) => 'o', 642 chr(195).chr(182) => 'o', chr(195).chr(184) => 'o', 643 chr(195).chr(185) => 'u', chr(195).chr(186) => 'u', 644 chr(195).chr(187) => 'u', chr(195).chr(188) => 'u', 645 chr(195).chr(189) => 'y', chr(195).chr(190) => 'th', 646 chr(195).chr(191) => 'y', chr(195).chr(152) => 'O', 647 // Decompositions for Latin Extended-A 648 chr(196).chr(128) => 'A', chr(196).chr(129) => 'a', 649 chr(196).chr(130) => 'A', chr(196).chr(131) => 'a', 650 chr(196).chr(132) => 'A', chr(196).chr(133) => 'a', 651 chr(196).chr(134) => 'C', chr(196).chr(135) => 'c', 652 chr(196).chr(136) => 'C', chr(196).chr(137) => 'c', 653 chr(196).chr(138) => 'C', chr(196).chr(139) => 'c', 654 chr(196).chr(140) => 'C', chr(196).chr(141) => 'c', 655 chr(196).chr(142) => 'D', chr(196).chr(143) => 'd', 656 chr(196).chr(144) => 'D', chr(196).chr(145) => 'd', 657 chr(196).chr(146) => 'E', chr(196).chr(147) => 'e', 658 chr(196).chr(148) => 'E', chr(196).chr(149) => 'e', 659 chr(196).chr(150) => 'E', chr(196).chr(151) => 'e', 660 chr(196).chr(152) => 'E', chr(196).chr(153) => 'e', 661 chr(196).chr(154) => 'E', chr(196).chr(155) => 'e', 662 chr(196).chr(156) => 'G', chr(196).chr(157) => 'g', 663 chr(196).chr(158) => 'G', chr(196).chr(159) => 'g', 664 chr(196).chr(160) => 'G', chr(196).chr(161) => 'g', 665 chr(196).chr(162) => 'G', chr(196).chr(163) => 'g', 666 chr(196).chr(164) => 'H', chr(196).chr(165) => 'h', 667 chr(196).chr(166) => 'H', chr(196).chr(167) => 'h', 668 chr(196).chr(168) => 'I', chr(196).chr(169) => 'i', 669 chr(196).chr(170) => 'I', chr(196).chr(171) => 'i', 670 chr(196).chr(172) => 'I', chr(196).chr(173) => 'i', 671 chr(196).chr(174) => 'I', chr(196).chr(175) => 'i', 672 chr(196).chr(176) => 'I', chr(196).chr(177) => 'i', 673 chr(196).chr(178) => 'IJ',chr(196).chr(179) => 'ij', 674 chr(196).chr(180) => 'J', chr(196).chr(181) => 'j', 675 chr(196).chr(182) => 'K', chr(196).chr(183) => 'k', 676 chr(196).chr(184) => 'k', chr(196).chr(185) => 'L', 677 chr(196).chr(186) => 'l', chr(196).chr(187) => 'L', 678 chr(196).chr(188) => 'l', chr(196).chr(189) => 'L', 679 chr(196).chr(190) => 'l', chr(196).chr(191) => 'L', 680 chr(197).chr(128) => 'l', chr(197).chr(129) => 'L', 681 chr(197).chr(130) => 'l', chr(197).chr(131) => 'N', 682 chr(197).chr(132) => 'n', chr(197).chr(133) => 'N', 683 chr(197).chr(134) => 'n', chr(197).chr(135) => 'N', 684 chr(197).chr(136) => 'n', chr(197).chr(137) => 'N', 685 chr(197).chr(138) => 'n', chr(197).chr(139) => 'N', 686 chr(197).chr(140) => 'O', chr(197).chr(141) => 'o', 687 chr(197).chr(142) => 'O', chr(197).chr(143) => 'o', 688 chr(197).chr(144) => 'O', chr(197).chr(145) => 'o', 689 chr(197).chr(146) => 'OE',chr(197).chr(147) => 'oe', 690 chr(197).chr(148) => 'R',chr(197).chr(149) => 'r', 691 chr(197).chr(150) => 'R',chr(197).chr(151) => 'r', 692 chr(197).chr(152) => 'R',chr(197).chr(153) => 'r', 693 chr(197).chr(154) => 'S',chr(197).chr(155) => 's', 694 chr(197).chr(156) => 'S',chr(197).chr(157) => 's', 695 chr(197).chr(158) => 'S',chr(197).chr(159) => 's', 696 chr(197).chr(160) => 'S', chr(197).chr(161) => 's', 697 chr(197).chr(162) => 'T', chr(197).chr(163) => 't', 698 chr(197).chr(164) => 'T', chr(197).chr(165) => 't', 699 chr(197).chr(166) => 'T', chr(197).chr(167) => 't', 700 chr(197).chr(168) => 'U', chr(197).chr(169) => 'u', 701 chr(197).chr(170) => 'U', chr(197).chr(171) => 'u', 702 chr(197).chr(172) => 'U', chr(197).chr(173) => 'u', 703 chr(197).chr(174) => 'U', chr(197).chr(175) => 'u', 704 chr(197).chr(176) => 'U', chr(197).chr(177) => 'u', 705 chr(197).chr(178) => 'U', chr(197).chr(179) => 'u', 706 chr(197).chr(180) => 'W', chr(197).chr(181) => 'w', 707 chr(197).chr(182) => 'Y', chr(197).chr(183) => 'y', 708 chr(197).chr(184) => 'Y', chr(197).chr(185) => 'Z', 709 chr(197).chr(186) => 'z', chr(197).chr(187) => 'Z', 710 chr(197).chr(188) => 'z', chr(197).chr(189) => 'Z', 711 chr(197).chr(190) => 'z', chr(197).chr(191) => 's', 712 // Decompositions for Latin Extended-B 713 chr(200).chr(152) => 'S', chr(200).chr(153) => 's', 714 chr(200).chr(154) => 'T', chr(200).chr(155) => 't', 715 // Euro Sign 716 chr(226).chr(130).chr(172) => 'E', 717 // GBP (Pound) Sign 718 chr(194).chr(163) => '', 719 // Vowels with diacritic (Vietnamese) 720 // unmarked 721 chr(198).chr(160) => 'O', chr(198).chr(161) => 'o', 722 chr(198).chr(175) => 'U', chr(198).chr(176) => 'u', 723 // grave accent 724 chr(225).chr(186).chr(166) => 'A', chr(225).chr(186).chr(167) => 'a', 725 chr(225).chr(186).chr(176) => 'A', chr(225).chr(186).chr(177) => 'a', 726 chr(225).chr(187).chr(128) => 'E', chr(225).chr(187).chr(129) => 'e', 727 chr(225).chr(187).chr(146) => 'O', chr(225).chr(187).chr(147) => 'o', 728 chr(225).chr(187).chr(156) => 'O', chr(225).chr(187).chr(157) => 'o', 729 chr(225).chr(187).chr(170) => 'U', chr(225).chr(187).chr(171) => 'u', 730 chr(225).chr(187).chr(178) => 'Y', chr(225).chr(187).chr(179) => 'y', 731 // hook 732 chr(225).chr(186).chr(162) => 'A', chr(225).chr(186).chr(163) => 'a', 733 chr(225).chr(186).chr(168) => 'A', chr(225).chr(186).chr(169) => 'a', 734 chr(225).chr(186).chr(178) => 'A', chr(225).chr(186).chr(179) => 'a', 735 chr(225).chr(186).chr(186) => 'E', chr(225).chr(186).chr(187) => 'e', 736 chr(225).chr(187).chr(130) => 'E', chr(225).chr(187).chr(131) => 'e', 737 chr(225).chr(187).chr(136) => 'I', chr(225).chr(187).chr(137) => 'i', 738 chr(225).chr(187).chr(142) => 'O', chr(225).chr(187).chr(143) => 'o', 739 chr(225).chr(187).chr(148) => 'O', chr(225).chr(187).chr(149) => 'o', 740 chr(225).chr(187).chr(158) => 'O', chr(225).chr(187).chr(159) => 'o', 741 chr(225).chr(187).chr(166) => 'U', chr(225).chr(187).chr(167) => 'u', 742 chr(225).chr(187).chr(172) => 'U', chr(225).chr(187).chr(173) => 'u', 743 chr(225).chr(187).chr(182) => 'Y', chr(225).chr(187).chr(183) => 'y', 744 // tilde 745 chr(225).chr(186).chr(170) => 'A', chr(225).chr(186).chr(171) => 'a', 746 chr(225).chr(186).chr(180) => 'A', chr(225).chr(186).chr(181) => 'a', 747 chr(225).chr(186).chr(188) => 'E', chr(225).chr(186).chr(189) => 'e', 748 chr(225).chr(187).chr(132) => 'E', chr(225).chr(187).chr(133) => 'e', 749 chr(225).chr(187).chr(150) => 'O', chr(225).chr(187).chr(151) => 'o', 750 chr(225).chr(187).chr(160) => 'O', chr(225).chr(187).chr(161) => 'o', 751 chr(225).chr(187).chr(174) => 'U', chr(225).chr(187).chr(175) => 'u', 752 chr(225).chr(187).chr(184) => 'Y', chr(225).chr(187).chr(185) => 'y', 753 // acute accent 754 chr(225).chr(186).chr(164) => 'A', chr(225).chr(186).chr(165) => 'a', 755 chr(225).chr(186).chr(174) => 'A', chr(225).chr(186).chr(175) => 'a', 756 chr(225).chr(186).chr(190) => 'E', chr(225).chr(186).chr(191) => 'e', 757 chr(225).chr(187).chr(144) => 'O', chr(225).chr(187).chr(145) => 'o', 758 chr(225).chr(187).chr(154) => 'O', chr(225).chr(187).chr(155) => 'o', 759 chr(225).chr(187).chr(168) => 'U', chr(225).chr(187).chr(169) => 'u', 760 // dot below 761 chr(225).chr(186).chr(160) => 'A', chr(225).chr(186).chr(161) => 'a', 762 chr(225).chr(186).chr(172) => 'A', chr(225).chr(186).chr(173) => 'a', 763 chr(225).chr(186).chr(182) => 'A', chr(225).chr(186).chr(183) => 'a', 764 chr(225).chr(186).chr(184) => 'E', chr(225).chr(186).chr(185) => 'e', 765 chr(225).chr(187).chr(134) => 'E', chr(225).chr(187).chr(135) => 'e', 766 chr(225).chr(187).chr(138) => 'I', chr(225).chr(187).chr(139) => 'i', 767 chr(225).chr(187).chr(140) => 'O', chr(225).chr(187).chr(141) => 'o', 768 chr(225).chr(187).chr(152) => 'O', chr(225).chr(187).chr(153) => 'o', 769 chr(225).chr(187).chr(162) => 'O', chr(225).chr(187).chr(163) => 'o', 770 chr(225).chr(187).chr(164) => 'U', chr(225).chr(187).chr(165) => 'u', 771 chr(225).chr(187).chr(176) => 'U', chr(225).chr(187).chr(177) => 'u', 772 chr(225).chr(187).chr(180) => 'Y', chr(225).chr(187).chr(181) => 'y', 773 ); 774 775 $string = strtr($string, $chars); 776 } else { 777 // Assume ISO-8859-1 if not UTF-8 778 $chars['in'] = chr(128).chr(131).chr(138).chr(142).chr(154).chr(158) 779 .chr(159).chr(162).chr(165).chr(181).chr(192).chr(193).chr(194) 780 .chr(195).chr(196).chr(197).chr(199).chr(200).chr(201).chr(202) 781 .chr(203).chr(204).chr(205).chr(206).chr(207).chr(209).chr(210) 782 .chr(211).chr(212).chr(213).chr(214).chr(216).chr(217).chr(218) 783 .chr(219).chr(220).chr(221).chr(224).chr(225).chr(226).chr(227) 784 .chr(228).chr(229).chr(231).chr(232).chr(233).chr(234).chr(235) 785 .chr(236).chr(237).chr(238).chr(239).chr(241).chr(242).chr(243) 786 .chr(244).chr(245).chr(246).chr(248).chr(249).chr(250).chr(251) 787 .chr(252).chr(253).chr(255); 788 789 $chars['out'] = "EfSZszYcYuAAAAAACEEEEIIIINOOOOOOUUUUYaaaaaaceeeeiiiinoooooouuuuyy"; 790 791 $string = strtr($string, $chars['in'], $chars['out']); 792 $double_chars['in'] = array(chr(140), chr(156), chr(198), chr(208), chr(222), chr(223), chr(230), chr(240), chr(254)); 793 $double_chars['out'] = array('OE', 'oe', 'AE', 'DH', 'TH', 'ss', 'ae', 'dh', 'th'); 794 $string = str_replace($double_chars['in'], $double_chars['out'], $string); 795 } 796 797 return $string; 798} 799 800/** 801 * Sanitizes a filename replacing whitespace with dashes 802 * 803 * Removes special characters that are illegal in filenames on certain 804 * operating systems and special characters requiring special escaping 805 * to manipulate at the command line. Replaces spaces and consecutive 806 * dashes with a single dash. Trim period, dash and underscore from beginning 807 * and end of filename. 808 * 809 * @since 2.1.0 810 * 811 * @param string $filename The filename to be sanitized 812 * @return string The sanitized filename 813 */ 814function sanitize_file_name( $filename ) { 815 $filename_raw = $filename; 816 $special_chars = array("?", "[", "]", "/", "\\", "=", "<", ">", ":", ";", ",", "'", "\"", "&", "$", "#", "*", "(", ")", "|", "~", "`", "!", "{", "}", chr(0)); 817 $special_chars = apply_filters('sanitize_file_name_chars', $special_chars, $filename_raw); 818 $filename = str_replace($special_chars, '', $filename); 819 $filename = preg_replace('/[\s-]+/', '-', $filename); 820 $filename = trim($filename, '.-_'); 821 822 // Split the filename into a base and extension[s] 823 $parts = explode('.', $filename); 824 825 // Return if only one extension 826 if ( count($parts) <= 2 ) 827 return apply_filters('sanitize_file_name', $filename, $filename_raw); 828 829 // Process multiple extensions 830 $filename = array_shift($parts); 831 $extension = array_pop($parts); 832 $mimes = get_allowed_mime_types(); 833 834 // Loop over any intermediate extensions. Munge them with a trailing underscore if they are a 2 - 5 character 835 // long alpha string not in the extension whitelist. 836 foreach ( (array) $parts as $part) { 837 $filename .= '.' . $part; 838 839 if ( preg_match("/^[a-zA-Z]{2,5}\d?$/", $part) ) { 840 $allowed = false; 841 foreach ( $mimes as $ext_preg => $mime_match ) { 842 $ext_preg = '!^(' . $ext_preg . ')$!i'; 843 if ( preg_match( $ext_preg, $part ) ) { 844 $allowed = true; 845 break; 846 } 847 } 848 if ( !$allowed ) 849 $filename .= '_'; 850 } 851 } 852 $filename .= '.' . $extension; 853 854 return apply_filters('sanitize_file_name', $filename, $filename_raw); 855} 856 857/** 858 * Sanitize username stripping out unsafe characters. 859 * 860 * Removes tags, octets, entities, and if strict is enabled, will only keep 861 * alphanumeric, _, space, ., -, @. After sanitizing, it passes the username, 862 * raw username (the username in the parameter), and the value of $strict as 863 * parameters for the 'sanitize_user' filter. 864 * 865 * @since 2.0.0 866 * @uses apply_filters() Calls 'sanitize_user' hook on username, raw username, 867 * and $strict parameter. 868 * 869 * @param string $username The username to be sanitized. 870 * @param bool $strict If set limits $username to specific characters. Default false. 871 * @return string The sanitized username, after passing through filters. 872 */ 873function sanitize_user( $username, $strict = false ) { 874 $raw_username = $username; 875 $username = wp_strip_all_tags( $username ); 876 $username = remove_accents( $username ); 877 // Kill octets 878 $username = preg_replace( '|%([a-fA-F0-9][a-fA-F0-9])|', '', $username ); 879 $username = preg_replace( '/&.+?;/', '', $username ); // Kill entities 880 881 // If strict, reduce to ASCII for max portability. 882 if ( $strict ) 883 $username = preg_replace( '|[^a-z0-9 _.\-@]|i', '', $username ); 884 885 $username = trim( $username ); 886 // Consolidate contiguous whitespace 887 $username = preg_replace( '|\s+|', ' ', $username ); 888 889 return apply_filters( 'sanitize_user', $username, $raw_username, $strict ); 890} 891 892/** 893 * Sanitize a string key. 894 * 895 * Keys are used as internal identifiers. Lowercase alphanumeric characters, dashes and underscores are allowed. 896 * 897 * @since 3.0.0 898 * 899 * @param string $key String key 900 * @return string Sanitized key 901 */ 902function sanitize_key( $key ) { 903 $raw_key = $key; 904 $key = strtolower( $key ); 905 $key = preg_replace( '/[^a-z0-9_\-]/', '', $key ); 906 return apply_filters( 'sanitize_key', $key, $raw_key ); 907} 908 909/** 910 * Sanitizes title or use fallback title. 911 * 912 * Specifically, HTML and PHP tags are stripped. Further actions can be added 913 * via the plugin API. If $title is empty and $fallback_title is set, the latter 914 * will be used. 915 * 916 * @since 1.0.0 917 * 918 * @param string $title The string to be sanitized. 919 * @param string $fallback_title Optional. A title to use if $title is empty. 920 * @param string $context Optional. The operation for which the string is sanitized 921 * @return string The sanitized string. 922 */ 923function sanitize_title($title, $fallback_title = '', $context = 'save') { 924 $raw_title = $title; 925 926 if ( 'save' == $context ) 927 $title = remove_accents($title); 928 929 $title = apply_filters('sanitize_title', $title, $raw_title, $context); 930 931 if ( '' === $title || false === $title ) 932 $title = $fallback_title; 933 934 return $title; 935} 936 937function sanitize_title_for_query($title) { 938 return sanitize_title($title, '', 'query'); 939} 940 941/** 942 * Sanitizes title, replacing whitespace and a few other characters with dashes. 943 * 944 * Limits the output to alphanumeric characters, underscore (_) and dash (-). 945 * Whitespace becomes a dash. 946 * 947 * @since 1.2.0 948 * 949 * @param string $title The title to be sanitized. 950 * @param string $raw_title Optional. Not used. 951 * @param string $context Optional. The operation for which the string is sanitized. 952 * @return string The sanitized title. 953 */ 954function sanitize_title_with_dashes($title, $raw_title = '', $context = 'display') { 955 $title = strip_tags($title); 956 // Preserve escaped octets. 957 $title = preg_replace('|%([a-fA-F0-9][a-fA-F0-9])|', '---$1---', $title); 958 // Remove percent signs that are not part of an octet. 959 $title = str_replace('%', '', $title); 960 // Restore octets. 961 $title = preg_replace('|---([a-fA-F0-9][a-fA-F0-9])---|', '%$1', $title); 962 963 if (seems_utf8($title)) { 964 if (function_exists('mb_strtolower')) { 965 $title = mb_strtolower($title, 'UTF-8'); 966 } 967 $title = utf8_uri_encode($title, 200); 968 } 969 970 $title = strtolower($title); 971 $title = preg_replace('/&.+?;/', '', $title); // kill entities 972 $title = str_replace('.', '-', $title); 973 974 if ( 'save' == $context ) { 975 // Convert nbsp, ndash and mdash to hyphens 976 $title = str_replace( array( '%c2%a0', '%e2%80%93', '%e2%80%94' ), '-', $title ); 977 978 // Strip these characters entirely 979 $title = str_replace( array( 980 // iexcl and iquest 981 '%c2%a1', '%c2%bf', 982 // angle quotes 983 '%c2%ab', '%c2%bb', '%e2%80%b9', '%e2%80%ba', 984 // curly quotes 985 '%e2%80%98', '%e2%80%99', '%e2%80%9c', '%e2%80%9d', 986 '%e2%80%9a', '%e2%80%9b', '%e2%80%9e', '%e2%80%9f', 987 // copy, reg, deg, hellip and trade 988 '%c2%a9', '%c2%ae', '%c2%b0', '%e2%80%a6', '%e2%84%a2', 989 ), '', $title ); 990 991 // Convert times to x 992 $title = str_replace( '%c3%97', 'x', $title ); 993 } 994 995 $title = preg_replace('/[^%a-z0-9 _-]/', '', $title); 996 $title = preg_replace('/\s+/', '-', $title); 997 $title = preg_replace('|-+|', '-', $title); 998 $title = trim($title, '-'); 999 1000 return $title; 1001} 1002 1003/** 1004 * Ensures a string is a valid SQL order by clause. 1005 * 1006 * Accepts one or more columns, with or without ASC/DESC, and also accepts 1007 * RAND(). 1008 * 1009 * @since 2.5.1 1010 * 1011 * @param string $orderby Order by string to be checked. 1012 * @return string|false Returns the order by clause if it is a match, false otherwise. 1013 */ 1014function sanitize_sql_orderby( $orderby ){ 1015 preg_match('/^\s*([a-z0-9_]+(\s+(ASC|DESC))?(\s*,\s*|\s*$))+|^\s*RAND\(\s*\)\s*$/i', $orderby, $obmatches); 1016 if ( !$obmatches ) 1017 return false; 1018 return $orderby; 1019} 1020 1021/** 1022 * Santizes a html classname to ensure it only contains valid characters 1023 * 1024 * Strips the string down to A-Z,a-z,0-9,_,-. If this results in an empty 1025 * string then it will return the alternative value supplied. 1026 * 1027 * @todo Expand to support the full range of CDATA that a class attribute can contain. 1028 * 1029 * @since 2.8.0 1030 * 1031 * @param string $class The classname to be sanitized 1032 * @param string $fallback Optional. The value to return if the sanitization end's up as an empty string. 1033 * Defaults to an empty string. 1034 * @return string The sanitized value 1035 */ 1036function sanitize_html_class( $class, $fallback = '' ) { 1037 //Strip out any % encoded octets 1038 $sanitized = preg_replace( '|%[a-fA-F0-9][a-fA-F0-9]|', '', $class ); 1039 1040 //Limit to A-Z,a-z,0-9,_,- 1041 $sanitized = preg_replace( '/[^A-Za-z0-9_-]/', '', $sanitized ); 1042 1043 if ( '' == $sanitized ) 1044 $sanitized = $fallback; 1045 1046 return apply_filters( 'sanitize_html_class', $sanitized, $class, $fallback ); 1047} 1048 1049/** 1050 * Converts a number of characters from a string. 1051 * 1052 * Metadata tags <<title>> and <<category>> are removed, <<br>> and <<hr>> are 1053 * converted into correct XHTML and Unicode characters are converted to the 1054 * valid range. 1055 * 1056 * @since 0.71 1057 * 1058 * @param string $content String of characters to be converted. 1059 * @param string $deprecated Not used. 1060 * @return string Converted string. 1061 */ 1062function convert_chars($content, $deprecated = '') { 1063 if ( !empty( $deprecated ) ) 1064 _deprecated_argument( __FUNCTION__, '0.71' ); 1065 1066 // Translation of invalid Unicode references range to valid range 1067 $wp_htmltranswinuni = array( 1068 '€' => '€', // the Euro sign 1069 '' => '', 1070 '‚' => '‚', // these are Windows CP1252 specific characters 1071 'ƒ' => 'ƒ', // they would look weird on non-Windows browsers 1072 '„' => '„', 1073 '…' => '…', 1074 '†' => '†', 1075 '‡' => '‡', 1076 'ˆ' => 'ˆ', 1077 '‰' => '‰', 1078 'Š' => 'Š', 1079 '‹' => '‹', 1080 'Œ' => 'Œ', 1081 '' => '', 1082 'Ž' => 'Ž', 1083 '' => '', 1084 '' => '', 1085 '‘' => '‘', 1086 '’' => '’', 1087 '“' => '“', 1088 '”' => '”', 1089 '•' => '•', 1090 '–' => '–', 1091 '—' => '—', 1092 '˜' => '˜', 1093 '™' => '™', 1094 'š' => 'š', 1095 '›' => '›', 1096 'œ' => 'œ', 1097 '' => '', 1098 'ž' => 'ž', 1099 'Ÿ' => 'Ÿ' 1100 ); 1101 1102 // Remove metadata tags 1103 $content = preg_replace('/<title>(.+?)<\/title>/','',$content); 1104 $content = preg_replace('/<category>(.+?)<\/category>/','',$content); 1105 1106 // Converts lone & characters into & (a.k.a. &) 1107 $content = preg_replace('/&([^#])(?![a-z1-4]{1,8};)/i', '&$1', $content); 1108 1109 // Fix Word pasting 1110 $content = strtr($content, $wp_htmltranswinuni); 1111 1112 // Just a little XHTML help 1113 $content = str_replace('<br>', '<br />', $content); 1114 $content = str_replace('<hr>', '<hr />', $content); 1115 1116 return $content; 1117} 1118 1119/** 1120 * Will only balance the tags if forced to and the option is set to balance tags. 1121 * 1122 * The option 'use_balanceTags' is used to determine whether the tags will be balanced. 1123 * 1124 * @since 0.71 1125 * 1126 * @param string $text Text to be balanced 1127 * @param bool $force If true, forces balancing, ignoring the value of the option. Default false. 1128 * @return string Balanced text 1129 */ 1130function balanceTags( $text, $force = false ) { 1131 if ( !$force && get_option('use_balanceTags') == 0 ) 1132 return $text; 1133 return force_balance_tags( $text ); 1134} 1135 1136/** 1137 * Balances tags of string using a modified stack. 1138 * 1139 * @since 2.0.4 1140 * 1141 * @author Leonard Lin <leonard@acm.org> 1142 * @license GPL 1143 * @copyright November 4, 2001 1144 * @version 1.1 1145 * @todo Make better - change loop condition to $text in 1.2 1146 * @internal Modified by Scott Reilly (coffee2code) 02 Aug 2004 1147 * 1.1 Fixed handling of append/stack pop order of end text 1148 * Added Cleaning Hooks 1149 * 1.0 First Version 1150 * 1151 * @param string $text Text to be balanced. 1152 * @return string Balanced text. 1153 */ 1154function force_balance_tags( $text ) { 1155 $tagstack = array(); 1156 $stacksize = 0; 1157 $tagqueue = ''; 1158 $newtext = ''; 1159 $single_tags = array( 'br', 'hr', 'img', 'input' ); // Known single-entity/self-closing tags 1160 $nestable_tags = array( 'blockquote', 'div', 'span', 'q' ); // Tags that can be immediately nested within themselves 1161 1162 // WP bug fix for comments - in case you REALLY meant to type '< !--' 1163 $text = str_replace('< !--', '< !--', $text); 1164 // WP bug fix for LOVE <3 (and other situations with '<' before a number) 1165 $text = preg_replace('#<([0-9]{1})#', '<$1', $text); 1166 1167 while ( preg_match("/<(\/?[\w:]*)\s*([^>]*)>/", $text, $regex) ) { 1168 $newtext .= $tagqueue; 1169 1170 $i = strpos($text, $regex[0]); 1171 $l = strlen($regex[0]); 1172 1173 // clear the shifter 1174 $tagqueue = ''; 1175 // Pop or Push 1176 if ( isset($regex[1][0]) && '/' == $regex[1][0] ) { // End Tag 1177 $tag = strtolower(substr($regex[1],1)); 1178 // if too many closing tags 1179 if( $stacksize <= 0 ) { 1180 $tag = ''; 1181 // or close to be safe $tag = '/' . $tag; 1182 } 1183 // if stacktop value = tag close value then pop 1184 else if ( $tagstack[$stacksize - 1] == $tag ) { // found closing tag 1185 $tag = '</' . $tag . '>'; // Close Tag 1186 // Pop 1187 array_pop( $tagstack ); 1188 $stacksize--; 1189 } else { // closing tag not at top, search for it 1190 for ( $j = $stacksize-1; $j >= 0; $j-- ) { 1191 if ( $tagstack[$j] == $tag ) { 1192 // add tag to tagqueue 1193 for ( $k = $stacksize-1; $k >= $j; $k--) { 1194 $tagqueue .= '</' . array_pop( $tagstack ) . '>'; 1195 $stacksize--; 1196 } 1197 break; 1198 } 1199 } 1200 $tag = ''; 1201 } 1202 } else { // Begin Tag 1203 $tag = strtolower($regex[1]); 1204 1205 // Tag Cleaning 1206 1207 // If self-closing or '', don't do anything. 1208 if ( substr($regex[2],-1) == '/' || $tag == '' ) { 1209 // do nothing 1210 } 1211 // ElseIf it's a known single-entity tag but it doesn't close itself, do so 1212 elseif ( in_array($tag, $single_tags) ) { 1213 $regex[2] .= '/'; 1214 } else { // Push the tag onto the stack 1215 // If the top of the stack is the same as the tag we want to push, close previous tag 1216 if ( $stacksize > 0 && !in_array($tag, $nestable_tags) && $tagstack[$stacksize - 1] == $tag ) { 1217 $tagqueue = '</' . array_pop ($tagstack) . '>'; 1218 $stacksize--; 1219 } 1220 $stacksize = array_push ($tagstack, $tag); 1221 } 1222 1223 // Attributes 1224 $attributes = $regex[2]; 1225 if( !empty($attributes) ) 1226 $attributes = ' '.$attributes; 1227 1228 $tag = '<' . $tag . $attributes . '>'; 1229 //If already queuing a close tag, then put this tag on, too 1230 if ( !empty($tagqueue) ) { 1231 $tagqueue .= $tag; 1232 $tag = ''; 1233 } 1234 } 1235 $newtext .= substr($text, 0, $i) . $tag; 1236 $text = substr($text, $i + $l); 1237 } 1238 1239 // Clear Tag Queue 1240 $newtext .= $tagqueue; 1241 1242 // Add Remaining text 1243 $newtext .= $text; 1244 1245 // Empty Stack 1246 while( $x = array_pop($tagstack) ) 1247 $newtext .= '</' . $x . '>'; // Add remaining tags to close 1248 1249 // WP fix for the bug with HTML comments 1250 $newtext = str_replace("< !--","<!--",$newtext); 1251 $newtext = str_replace("< !--","< !--",$newtext); 1252 1253 return $newtext; 1254} 1255 1256/** 1257 * Acts on text which is about to be edited. 1258 * 1259 * The $content is run through esc_textarea(), which uses htmlspecialchars() 1260 * to convert special characters to HTML entities. If $richedit is set to true, 1261 * it is simply a holder for the 'format_to_edit' filter. 1262 * 1263 * @since 0.71 1264 * 1265 * @param string $content The text about to be edited. 1266 * @param bool $richedit Whether the $content should not pass through htmlspecialchars(). Default false (meaning it will be passed). 1267 * @return string The text after the filter (and possibly htmlspecialchars()) has been run. 1268 */ 1269function format_to_edit( $content, $richedit = false ) { 1270 $content = apply_filters( 'format_to_edit', $content ); 1271 if ( ! $richedit ) 1272 $content = esc_textarea( $content ); 1273 return $content; 1274} 1275 1276/** 1277 * Holder for the 'format_to_post' filter. 1278 * 1279 * @since 0.71 1280 * 1281 * @param string $content The text to pass through the filter. 1282 * @return string Text returned from the 'format_to_post' filter. 1283 */ 1284function format_to_post($content) { 1285 $content = apply_filters('format_to_post', $content); 1286 return $content; 1287} 1288 1289/** 1290 * Add leading zeros when necessary. 1291 * 1292 * If you set the threshold to '4' and the number is '10', then you will get 1293 * back '0010'. If you set the threshold to '4' and the number is '5000', then you 1294 * will get back '5000'. 1295 * 1296 * Uses sprintf to append the amount of zeros based on the $threshold parameter 1297 * and the size of the number. If the number is large enough, then no zeros will 1298 * be appended. 1299 * 1300 * @since 0.71 1301 * 1302 * @param mixed $number Number to append zeros to if not greater than threshold. 1303 * @param int $threshold Digit places number needs to be to not have zeros added. 1304 * @return string Adds leading zeros to number if needed. 1305 */ 1306function zeroise($number, $threshold) { 1307 return sprintf('%0'.$threshold.'s', $number); 1308} 1309 1310/** 1311 * Adds backslashes before letters and before a number at the start of a string. 1312 * 1313 * @since 0.71 1314 * 1315 * @param string $string Value to which backslashes will be added. 1316 * @return string String with backslashes inserted. 1317 */ 1318function backslashit($string) { 1319 $string = preg_replace('/^([0-9])/', '\\\\\\\\\1', $string); 1320 $string = preg_replace('/([a-z])/i', '\\\\\1', $string); 1321 return $string; 1322} 1323 1324/** 1325 * Appends a trailing slash. 1326 * 1327 * Will remove trailing slash if it exists already before adding a trailing 1328 * slash. This prevents double slashing a string or path. 1329 * 1330 * The primary use of this is for paths and thus should be used for paths. It is 1331 * not restricted to paths and offers no specific path support. 1332 * 1333 * @…
Large files files are truncated, but you can click here to view the full file