PageRenderTime 167ms CodeModel.GetById 36ms RepoModel.GetById 1ms app.codeStats 1ms

/wp-includes/formatting.php

https://bitbucket.org/JacobTyler/fame4good-wp
PHP | 3310 lines | 2013 code | 275 blank | 1022 comment | 265 complexity | df4a36b853dd8a0088d8671c591942b2 MD5 | raw file
Possible License(s): BSD-3-Clause, GPL-3.0, AGPL-1.0, LGPL-2.1, GPL-2.0

Large files files are truncated, but you can click here to view the full file

  1. <?php
  2. /**
  3. * Main WordPress Formatting API.
  4. *
  5. * Handles many functions for formatting output.
  6. *
  7. * @package WordPress
  8. **/
  9. /**
  10. * Replaces common plain text characters into formatted entities
  11. *
  12. * As an example,
  13. * <code>
  14. * 'cause today's effort makes it worth tomorrow's "holiday"...
  15. * </code>
  16. * Becomes:
  17. * <code>
  18. * &#8217;cause today&#8217;s effort makes it worth tomorrow&#8217;s &#8220;holiday&#8221;&#8230;
  19. * </code>
  20. * Code within certain html blocks are skipped.
  21. *
  22. * @since 0.71
  23. * @uses $wp_cockneyreplace Array of formatted entities for certain common phrases
  24. *
  25. * @param string $text The text to be formatted
  26. * @return string The string replaced with html entities
  27. */
  28. function wptexturize($text) {
  29. global $wp_cockneyreplace;
  30. static $static_characters, $static_replacements, $dynamic_characters, $dynamic_replacements,
  31. $default_no_texturize_tags, $default_no_texturize_shortcodes;
  32. // No need to set up these static variables more than once
  33. if ( ! isset( $static_characters ) ) {
  34. /* translators: opening curly double quote */
  35. $opening_quote = _x( '&#8220;', 'opening curly double quote' );
  36. /* translators: closing curly double quote */
  37. $closing_quote = _x( '&#8221;', 'closing curly double quote' );
  38. /* translators: apostrophe, for example in 'cause or can't */
  39. $apos = _x( '&#8217;', 'apostrophe' );
  40. /* translators: prime, for example in 9' (nine feet) */
  41. $prime = _x( '&#8242;', 'prime' );
  42. /* translators: double prime, for example in 9" (nine inches) */
  43. $double_prime = _x( '&#8243;', 'double prime' );
  44. /* translators: opening curly single quote */
  45. $opening_single_quote = _x( '&#8216;', 'opening curly single quote' );
  46. /* translators: closing curly single quote */
  47. $closing_single_quote = _x( '&#8217;', 'closing curly single quote' );
  48. /* translators: en dash */
  49. $en_dash = _x( '&#8211;', 'en dash' );
  50. /* translators: em dash */
  51. $em_dash = _x( '&#8212;', 'em dash' );
  52. $default_no_texturize_tags = array('pre', 'code', 'kbd', 'style', 'script', 'tt');
  53. $default_no_texturize_shortcodes = array('code');
  54. // if a plugin has provided an autocorrect array, use it
  55. if ( isset($wp_cockneyreplace) ) {
  56. $cockney = array_keys($wp_cockneyreplace);
  57. $cockneyreplace = array_values($wp_cockneyreplace);
  58. } elseif ( "'" != $apos ) { // Only bother if we're doing a replacement.
  59. $cockney = array( "'tain't", "'twere", "'twas", "'tis", "'twill", "'til", "'bout", "'nuff", "'round", "'cause" );
  60. $cockneyreplace = array( $apos . "tain" . $apos . "t", $apos . "twere", $apos . "twas", $apos . "tis", $apos . "twill", $apos . "til", $apos . "bout", $apos . "nuff", $apos . "round", $apos . "cause" );
  61. } else {
  62. $cockney = $cockneyreplace = array();
  63. }
  64. $static_characters = array_merge( array( '---', ' -- ', '--', ' - ', 'xn&#8211;', '...', '``', '\'\'', ' (tm)' ), $cockney );
  65. $static_replacements = array_merge( array( $em_dash, ' ' . $em_dash . ' ', $en_dash, ' ' . $en_dash . ' ', 'xn--', '&#8230;', $opening_quote, $closing_quote, ' &#8482;' ), $cockneyreplace );
  66. $dynamic = array();
  67. if ( "'" != $apos ) {
  68. $dynamic[ '/\'(\d\d(?:&#8217;|\')?s)/' ] = $apos . '$1'; // '99's
  69. $dynamic[ '/\'(\d)/' ] = $apos . '$1'; // '99
  70. }
  71. if ( "'" != $opening_single_quote )
  72. $dynamic[ '/(\s|\A|[([{<]|")\'/' ] = '$1' . $opening_single_quote; // opening single quote, even after (, {, <, [
  73. if ( '"' != $double_prime )
  74. $dynamic[ '/(\d)"/' ] = '$1' . $double_prime; // 9" (double prime)
  75. if ( "'" != $prime )
  76. $dynamic[ '/(\d)\'/' ] = '$1' . $prime; // 9' (prime)
  77. if ( "'" != $apos )
  78. $dynamic[ '/(\S)\'([^\'\s])/' ] = '$1' . $apos . '$2'; // apostrophe in a word
  79. if ( '"' != $opening_quote )
  80. $dynamic[ '/(\s|\A|[([{<])"(?!\s)/' ] = '$1' . $opening_quote . '$2'; // opening double quote, even after (, {, <, [
  81. if ( '"' != $closing_quote )
  82. $dynamic[ '/"(\s|\S|\Z)/' ] = $closing_quote . '$1'; // closing double quote
  83. if ( "'" != $closing_single_quote )
  84. $dynamic[ '/\'([\s.]|\Z)/' ] = $closing_single_quote . '$1'; // closing single quote
  85. $dynamic[ '/\b(\d+)x(\d+)\b/' ] = '$1&#215;$2'; // 9x9 (times)
  86. $dynamic_characters = array_keys( $dynamic );
  87. $dynamic_replacements = array_values( $dynamic );
  88. }
  89. // Transform into regexp sub-expression used in _wptexturize_pushpop_element
  90. // Must do this every time in case plugins use these filters in a context sensitive manner
  91. $no_texturize_tags = '(' . implode('|', apply_filters('no_texturize_tags', $default_no_texturize_tags) ) . ')';
  92. $no_texturize_shortcodes = '(' . implode('|', apply_filters('no_texturize_shortcodes', $default_no_texturize_shortcodes) ) . ')';
  93. $no_texturize_tags_stack = array();
  94. $no_texturize_shortcodes_stack = array();
  95. $textarr = preg_split('/(<.*>|\[.*\])/Us', $text, -1, PREG_SPLIT_DELIM_CAPTURE);
  96. foreach ( $textarr as &$curl ) {
  97. if ( empty( $curl ) )
  98. continue;
  99. // Only call _wptexturize_pushpop_element if first char is correct tag opening
  100. $first = $curl[0];
  101. if ( '<' === $first ) {
  102. _wptexturize_pushpop_element($curl, $no_texturize_tags_stack, $no_texturize_tags, '<', '>');
  103. } elseif ( '[' === $first ) {
  104. _wptexturize_pushpop_element($curl, $no_texturize_shortcodes_stack, $no_texturize_shortcodes, '[', ']');
  105. } elseif ( empty($no_texturize_shortcodes_stack) && empty($no_texturize_tags_stack) ) {
  106. // This is not a tag, nor is the texturization disabled static strings
  107. $curl = str_replace($static_characters, $static_replacements, $curl);
  108. // regular expressions
  109. $curl = preg_replace($dynamic_characters, $dynamic_replacements, $curl);
  110. }
  111. $curl = preg_replace('/&([^#])(?![a-zA-Z1-4]{1,8};)/', '&#038;$1', $curl);
  112. }
  113. return implode( '', $textarr );
  114. }
  115. /**
  116. * Search for disabled element tags. Push element to stack on tag open and pop
  117. * on tag close. Assumes first character of $text is tag opening.
  118. *
  119. * @access private
  120. * @since 2.9.0
  121. *
  122. * @param string $text Text to check. First character is assumed to be $opening
  123. * @param array $stack Array used as stack of opened tag elements
  124. * @param string $disabled_elements Tags to match against formatted as regexp sub-expression
  125. * @param string $opening Tag opening character, assumed to be 1 character long
  126. * @param string $closing Tag closing character
  127. */
  128. function _wptexturize_pushpop_element($text, &$stack, $disabled_elements, $opening = '<', $closing = '>') {
  129. // Check if it is a closing tag -- otherwise assume opening tag
  130. if (strncmp($opening . '/', $text, 2)) {
  131. // Opening? Check $text+1 against disabled elements
  132. if (preg_match('/^' . $disabled_elements . '\b/', substr($text, 1), $matches)) {
  133. /*
  134. * This disables texturize until we find a closing tag of our type
  135. * (e.g. <pre>) even if there was invalid nesting before that
  136. *
  137. * Example: in the case <pre>sadsadasd</code>"baba"</pre>
  138. * "baba" won't be texturize
  139. */
  140. array_push($stack, $matches[1]);
  141. }
  142. } else {
  143. // Closing? Check $text+2 against disabled elements
  144. $c = preg_quote($closing, '/');
  145. if (preg_match('/^' . $disabled_elements . $c . '/', substr($text, 2), $matches)) {
  146. $last = array_pop($stack);
  147. // Make sure it matches the opening tag
  148. if ($last != $matches[1])
  149. array_push($stack, $last);
  150. }
  151. }
  152. }
  153. /**
  154. * Replaces double line-breaks with paragraph elements.
  155. *
  156. * A group of regex replaces used to identify text formatted with newlines and
  157. * replace double line-breaks with HTML paragraph tags. The remaining
  158. * line-breaks after conversion become <<br />> tags, unless $br is set to '0'
  159. * or 'false'.
  160. *
  161. * @since 0.71
  162. *
  163. * @param string $pee The text which has to be formatted.
  164. * @param bool $br Optional. If set, this will convert all remaining line-breaks after paragraphing. Default true.
  165. * @return string Text which has been converted into correct paragraph tags.
  166. */
  167. function wpautop($pee, $br = true) {
  168. $pre_tags = array();
  169. if ( trim($pee) === '' )
  170. return '';
  171. $pee = $pee . "\n"; // just to make things a little easier, pad the end
  172. if ( strpos($pee, '<pre') !== false ) {
  173. $pee_parts = explode( '</pre>', $pee );
  174. $last_pee = array_pop($pee_parts);
  175. $pee = '';
  176. $i = 0;
  177. foreach ( $pee_parts as $pee_part ) {
  178. $start = strpos($pee_part, '<pre');
  179. // Malformed html?
  180. if ( $start === false ) {
  181. $pee .= $pee_part;
  182. continue;
  183. }
  184. $name = "<pre wp-pre-tag-$i></pre>";
  185. $pre_tags[$name] = substr( $pee_part, $start ) . '</pre>';
  186. $pee .= substr( $pee_part, 0, $start ) . $name;
  187. $i++;
  188. }
  189. $pee .= $last_pee;
  190. }
  191. $pee = preg_replace('|<br />\s*<br />|', "\n\n", $pee);
  192. // Space things out a little
  193. $allblocks = '(?:table|thead|tfoot|caption|col|colgroup|tbody|tr|td|th|div|dl|dd|dt|ul|ol|li|pre|select|option|form|map|area|blockquote|address|math|style|p|h[1-6]|hr|fieldset|noscript|samp|legend|section|article|aside|hgroup|header|footer|nav|figure|figcaption|details|menu|summary)';
  194. $pee = preg_replace('!(<' . $allblocks . '[^>]*>)!', "\n$1", $pee);
  195. $pee = preg_replace('!(</' . $allblocks . '>)!', "$1\n\n", $pee);
  196. $pee = str_replace(array("\r\n", "\r"), "\n", $pee); // cross-platform newlines
  197. if ( strpos($pee, '<object') !== false ) {
  198. $pee = preg_replace('|\s*<param([^>]*)>\s*|', "<param$1>", $pee); // no pee inside object/embed
  199. $pee = preg_replace('|\s*</embed>\s*|', '</embed>', $pee);
  200. }
  201. $pee = preg_replace("/\n\n+/", "\n\n", $pee); // take care of duplicates
  202. // make paragraphs, including one at the end
  203. $pees = preg_split('/\n\s*\n/', $pee, -1, PREG_SPLIT_NO_EMPTY);
  204. $pee = '';
  205. foreach ( $pees as $tinkle )
  206. $pee .= '<p>' . trim($tinkle, "\n") . "</p>\n";
  207. $pee = preg_replace('|<p>\s*</p>|', '', $pee); // under certain strange conditions it could create a P of entirely whitespace
  208. $pee = preg_replace('!<p>([^<]+)</(div|address|form)>!', "<p>$1</p></$2>", $pee);
  209. $pee = preg_replace('!<p>\s*(</?' . $allblocks . '[^>]*>)\s*</p>!', "$1", $pee); // don't pee all over a tag
  210. $pee = preg_replace("|<p>(<li.+?)</p>|", "$1", $pee); // problem with nested lists
  211. $pee = preg_replace('|<p><blockquote([^>]*)>|i', "<blockquote$1><p>", $pee);
  212. $pee = str_replace('</blockquote></p>', '</p></blockquote>', $pee);
  213. $pee = preg_replace('!<p>\s*(</?' . $allblocks . '[^>]*>)!', "$1", $pee);
  214. $pee = preg_replace('!(</?' . $allblocks . '[^>]*>)\s*</p>!', "$1", $pee);
  215. if ( $br ) {
  216. $pee = preg_replace_callback('/<(script|style).*?<\/\\1>/s', '_autop_newline_preservation_helper', $pee);
  217. $pee = preg_replace('|(?<!<br />)\s*\n|', "<br />\n", $pee); // optionally make line breaks
  218. $pee = str_replace('<WPPreserveNewline />', "\n", $pee);
  219. }
  220. $pee = preg_replace('!(</?' . $allblocks . '[^>]*>)\s*<br />!', "$1", $pee);
  221. $pee = preg_replace('!<br />(\s*</?(?:p|li|div|dl|dd|dt|th|pre|td|ul|ol)[^>]*>)!', '$1', $pee);
  222. $pee = preg_replace( "|\n</p>$|", '</p>', $pee );
  223. if ( !empty($pre_tags) )
  224. $pee = str_replace(array_keys($pre_tags), array_values($pre_tags), $pee);
  225. return $pee;
  226. }
  227. /**
  228. * Newline preservation help function for wpautop
  229. *
  230. * @since 3.1.0
  231. * @access private
  232. * @param array $matches preg_replace_callback matches array
  233. * @return string
  234. */
  235. function _autop_newline_preservation_helper( $matches ) {
  236. return str_replace("\n", "<WPPreserveNewline />", $matches[0]);
  237. }
  238. /**
  239. * Don't auto-p wrap shortcodes that stand alone
  240. *
  241. * Ensures that shortcodes are not wrapped in <<p>>...<</p>>.
  242. *
  243. * @since 2.9.0
  244. *
  245. * @param string $pee The content.
  246. * @return string The filtered content.
  247. */
  248. function shortcode_unautop( $pee ) {
  249. global $shortcode_tags;
  250. if ( empty( $shortcode_tags ) || !is_array( $shortcode_tags ) ) {
  251. return $pee;
  252. }
  253. $tagregexp = join( '|', array_map( 'preg_quote', array_keys( $shortcode_tags ) ) );
  254. $pattern =
  255. '/'
  256. . '<p>' // Opening paragraph
  257. . '\\s*+' // Optional leading whitespace
  258. . '(' // 1: The shortcode
  259. . '\\[' // Opening bracket
  260. . "($tagregexp)" // 2: Shortcode name
  261. . '(?![\\w-])' // Not followed by word character or hyphen
  262. // Unroll the loop: Inside the opening shortcode tag
  263. . '[^\\]\\/]*' // Not a closing bracket or forward slash
  264. . '(?:'
  265. . '\\/(?!\\])' // A forward slash not followed by a closing bracket
  266. . '[^\\]\\/]*' // Not a closing bracket or forward slash
  267. . ')*?'
  268. . '(?:'
  269. . '\\/\\]' // Self closing tag and closing bracket
  270. . '|'
  271. . '\\]' // Closing bracket
  272. . '(?:' // Unroll the loop: Optionally, anything between the opening and closing shortcode tags
  273. . '[^\\[]*+' // Not an opening bracket
  274. . '(?:'
  275. . '\\[(?!\\/\\2\\])' // An opening bracket not followed by the closing shortcode tag
  276. . '[^\\[]*+' // Not an opening bracket
  277. . ')*+'
  278. . '\\[\\/\\2\\]' // Closing shortcode tag
  279. . ')?'
  280. . ')'
  281. . ')'
  282. . '\\s*+' // optional trailing whitespace
  283. . '<\\/p>' // closing paragraph
  284. . '/s';
  285. return preg_replace( $pattern, '$1', $pee );
  286. }
  287. /**
  288. * Checks to see if a string is utf8 encoded.
  289. *
  290. * NOTE: This function checks for 5-Byte sequences, UTF8
  291. * has Bytes Sequences with a maximum length of 4.
  292. *
  293. * @author bmorel at ssi dot fr (modified)
  294. * @since 1.2.1
  295. *
  296. * @param string $str The string to be checked
  297. * @return bool True if $str fits a UTF-8 model, false otherwise.
  298. */
  299. function seems_utf8($str) {
  300. $length = strlen($str);
  301. for ($i=0; $i < $length; $i++) {
  302. $c = ord($str[$i]);
  303. if ($c < 0x80) $n = 0; # 0bbbbbbb
  304. elseif (($c & 0xE0) == 0xC0) $n=1; # 110bbbbb
  305. elseif (($c & 0xF0) == 0xE0) $n=2; # 1110bbbb
  306. elseif (($c & 0xF8) == 0xF0) $n=3; # 11110bbb
  307. elseif (($c & 0xFC) == 0xF8) $n=4; # 111110bb
  308. elseif (($c & 0xFE) == 0xFC) $n=5; # 1111110b
  309. else return false; # Does not match any model
  310. for ($j=0; $j<$n; $j++) { # n bytes matching 10bbbbbb follow ?
  311. if ((++$i == $length) || ((ord($str[$i]) & 0xC0) != 0x80))
  312. return false;
  313. }
  314. }
  315. return true;
  316. }
  317. /**
  318. * Converts a number of special characters into their HTML entities.
  319. *
  320. * Specifically deals with: &, <, >, ", and '.
  321. *
  322. * $quote_style can be set to ENT_COMPAT to encode " to
  323. * &quot;, or ENT_QUOTES to do both. Default is ENT_NOQUOTES where no quotes are encoded.
  324. *
  325. * @since 1.2.2
  326. *
  327. * @param string $string The text which is to be encoded.
  328. * @param mixed $quote_style Optional. Converts double quotes if set to ENT_COMPAT, both single and double if set to ENT_QUOTES or none if set to ENT_NOQUOTES. Also compatible with old values; converting single quotes if set to 'single', double if set to 'double' or both if otherwise set. Default is ENT_NOQUOTES.
  329. * @param string $charset Optional. The character encoding of the string. Default is false.
  330. * @param boolean $double_encode Optional. Whether to encode existing html entities. Default is false.
  331. * @return string The encoded text with HTML entities.
  332. */
  333. function _wp_specialchars( $string, $quote_style = ENT_NOQUOTES, $charset = false, $double_encode = false ) {
  334. $string = (string) $string;
  335. if ( 0 === strlen( $string ) )
  336. return '';
  337. // Don't bother if there are no specialchars - saves some processing
  338. if ( ! preg_match( '/[&<>"\']/', $string ) )
  339. return $string;
  340. // Account for the previous behaviour of the function when the $quote_style is not an accepted value
  341. if ( empty( $quote_style ) )
  342. $quote_style = ENT_NOQUOTES;
  343. elseif ( ! in_array( $quote_style, array( 0, 2, 3, 'single', 'double' ), true ) )
  344. $quote_style = ENT_QUOTES;
  345. // Store the site charset as a static to avoid multiple calls to wp_load_alloptions()
  346. if ( ! $charset ) {
  347. static $_charset;
  348. if ( ! isset( $_charset ) ) {
  349. $alloptions = wp_load_alloptions();
  350. $_charset = isset( $alloptions['blog_charset'] ) ? $alloptions['blog_charset'] : '';
  351. }
  352. $charset = $_charset;
  353. }
  354. if ( in_array( $charset, array( 'utf8', 'utf-8', 'UTF8' ) ) )
  355. $charset = 'UTF-8';
  356. $_quote_style = $quote_style;
  357. if ( $quote_style === 'double' ) {
  358. $quote_style = ENT_COMPAT;
  359. $_quote_style = ENT_COMPAT;
  360. } elseif ( $quote_style === 'single' ) {
  361. $quote_style = ENT_NOQUOTES;
  362. }
  363. // Handle double encoding ourselves
  364. if ( $double_encode ) {
  365. $string = @htmlspecialchars( $string, $quote_style, $charset );
  366. } else {
  367. // Decode &amp; into &
  368. $string = wp_specialchars_decode( $string, $_quote_style );
  369. // Guarantee every &entity; is valid or re-encode the &
  370. $string = wp_kses_normalize_entities( $string );
  371. // Now re-encode everything except &entity;
  372. $string = preg_split( '/(&#?x?[0-9a-z]+;)/i', $string, -1, PREG_SPLIT_DELIM_CAPTURE );
  373. for ( $i = 0; $i < count( $string ); $i += 2 )
  374. $string[$i] = @htmlspecialchars( $string[$i], $quote_style, $charset );
  375. $string = implode( '', $string );
  376. }
  377. // Backwards compatibility
  378. if ( 'single' === $_quote_style )
  379. $string = str_replace( "'", '&#039;', $string );
  380. return $string;
  381. }
  382. /**
  383. * Converts a number of HTML entities into their special characters.
  384. *
  385. * Specifically deals with: &, <, >, ", and '.
  386. *
  387. * $quote_style can be set to ENT_COMPAT to decode " entities,
  388. * or ENT_QUOTES to do both " and '. Default is ENT_NOQUOTES where no quotes are decoded.
  389. *
  390. * @since 2.8
  391. *
  392. * @param string $string The text which is to be decoded.
  393. * @param mixed $quote_style Optional. Converts double quotes if set to ENT_COMPAT, both single and double if set to ENT_QUOTES or none if set to ENT_NOQUOTES. Also compatible with old _wp_specialchars() values; converting single quotes if set to 'single', double if set to 'double' or both if otherwise set. Default is ENT_NOQUOTES.
  394. * @return string The decoded text without HTML entities.
  395. */
  396. function wp_specialchars_decode( $string, $quote_style = ENT_NOQUOTES ) {
  397. $string = (string) $string;
  398. if ( 0 === strlen( $string ) ) {
  399. return '';
  400. }
  401. // Don't bother if there are no entities - saves a lot of processing
  402. if ( strpos( $string, '&' ) === false ) {
  403. return $string;
  404. }
  405. // Match the previous behaviour of _wp_specialchars() when the $quote_style is not an accepted value
  406. if ( empty( $quote_style ) ) {
  407. $quote_style = ENT_NOQUOTES;
  408. } elseif ( !in_array( $quote_style, array( 0, 2, 3, 'single', 'double' ), true ) ) {
  409. $quote_style = ENT_QUOTES;
  410. }
  411. // More complete than get_html_translation_table( HTML_SPECIALCHARS )
  412. $single = array( '&#039;' => '\'', '&#x27;' => '\'' );
  413. $single_preg = array( '/&#0*39;/' => '&#039;', '/&#x0*27;/i' => '&#x27;' );
  414. $double = array( '&quot;' => '"', '&#034;' => '"', '&#x22;' => '"' );
  415. $double_preg = array( '/&#0*34;/' => '&#034;', '/&#x0*22;/i' => '&#x22;' );
  416. $others = array( '&lt;' => '<', '&#060;' => '<', '&gt;' => '>', '&#062;' => '>', '&amp;' => '&', '&#038;' => '&', '&#x26;' => '&' );
  417. $others_preg = array( '/&#0*60;/' => '&#060;', '/&#0*62;/' => '&#062;', '/&#0*38;/' => '&#038;', '/&#x0*26;/i' => '&#x26;' );
  418. if ( $quote_style === ENT_QUOTES ) {
  419. $translation = array_merge( $single, $double, $others );
  420. $translation_preg = array_merge( $single_preg, $double_preg, $others_preg );
  421. } elseif ( $quote_style === ENT_COMPAT || $quote_style === 'double' ) {
  422. $translation = array_merge( $double, $others );
  423. $translation_preg = array_merge( $double_preg, $others_preg );
  424. } elseif ( $quote_style === 'single' ) {
  425. $translation = array_merge( $single, $others );
  426. $translation_preg = array_merge( $single_preg, $others_preg );
  427. } elseif ( $quote_style === ENT_NOQUOTES ) {
  428. $translation = $others;
  429. $translation_preg = $others_preg;
  430. }
  431. // Remove zero padding on numeric entities
  432. $string = preg_replace( array_keys( $translation_preg ), array_values( $translation_preg ), $string );
  433. // Replace characters according to translation table
  434. return strtr( $string, $translation );
  435. }
  436. /**
  437. * Checks for invalid UTF8 in a string.
  438. *
  439. * @since 2.8
  440. *
  441. * @param string $string The text which is to be checked.
  442. * @param boolean $strip Optional. Whether to attempt to strip out invalid UTF8. Default is false.
  443. * @return string The checked text.
  444. */
  445. function wp_check_invalid_utf8( $string, $strip = false ) {
  446. $string = (string) $string;
  447. if ( 0 === strlen( $string ) ) {
  448. return '';
  449. }
  450. // Store the site charset as a static to avoid multiple calls to get_option()
  451. static $is_utf8;
  452. if ( !isset( $is_utf8 ) ) {
  453. $is_utf8 = in_array( get_option( 'blog_charset' ), array( 'utf8', 'utf-8', 'UTF8', 'UTF-8' ) );
  454. }
  455. if ( !$is_utf8 ) {
  456. return $string;
  457. }
  458. // Check for support for utf8 in the installed PCRE library once and store the result in a static
  459. static $utf8_pcre;
  460. if ( !isset( $utf8_pcre ) ) {
  461. $utf8_pcre = @preg_match( '/^./u', 'a' );
  462. }
  463. // We can't demand utf8 in the PCRE installation, so just return the string in those cases
  464. if ( !$utf8_pcre ) {
  465. return $string;
  466. }
  467. // preg_match fails when it encounters invalid UTF8 in $string
  468. if ( 1 === @preg_match( '/^./us', $string ) ) {
  469. return $string;
  470. }
  471. // Attempt to strip the bad chars if requested (not recommended)
  472. if ( $strip && function_exists( 'iconv' ) ) {
  473. return iconv( 'utf-8', 'utf-8', $string );
  474. }
  475. return '';
  476. }
  477. /**
  478. * Encode the Unicode values to be used in the URI.
  479. *
  480. * @since 1.5.0
  481. *
  482. * @param string $utf8_string
  483. * @param int $length Max length of the string
  484. * @return string String with Unicode encoded for URI.
  485. */
  486. function utf8_uri_encode( $utf8_string, $length = 0 ) {
  487. $unicode = '';
  488. $values = array();
  489. $num_octets = 1;
  490. $unicode_length = 0;
  491. $string_length = strlen( $utf8_string );
  492. for ($i = 0; $i < $string_length; $i++ ) {
  493. $value = ord( $utf8_string[ $i ] );
  494. if ( $value < 128 ) {
  495. if ( $length && ( $unicode_length >= $length ) )
  496. break;
  497. $unicode .= chr($value);
  498. $unicode_length++;
  499. } else {
  500. if ( count( $values ) == 0 ) $num_octets = ( $value < 224 ) ? 2 : 3;
  501. $values[] = $value;
  502. if ( $length && ( $unicode_length + ($num_octets * 3) ) > $length )
  503. break;
  504. if ( count( $values ) == $num_octets ) {
  505. if ($num_octets == 3) {
  506. $unicode .= '%' . dechex($values[0]) . '%' . dechex($values[1]) . '%' . dechex($values[2]);
  507. $unicode_length += 9;
  508. } else {
  509. $unicode .= '%' . dechex($values[0]) . '%' . dechex($values[1]);
  510. $unicode_length += 6;
  511. }
  512. $values = array();
  513. $num_octets = 1;
  514. }
  515. }
  516. }
  517. return $unicode;
  518. }
  519. /**
  520. * Converts all accent characters to ASCII characters.
  521. *
  522. * If there are no accent characters, then the string given is just returned.
  523. *
  524. * @since 1.2.1
  525. *
  526. * @param string $string Text that might have accent characters
  527. * @return string Filtered string with replaced "nice" characters.
  528. */
  529. function remove_accents($string) {
  530. if ( !preg_match('/[\x80-\xff]/', $string) )
  531. return $string;
  532. if (seems_utf8($string)) {
  533. $chars = array(
  534. // Decompositions for Latin-1 Supplement
  535. chr(194).chr(170) => 'a', chr(194).chr(186) => 'o',
  536. chr(195).chr(128) => 'A', chr(195).chr(129) => 'A',
  537. chr(195).chr(130) => 'A', chr(195).chr(131) => 'A',
  538. chr(195).chr(132) => 'A', chr(195).chr(133) => 'A',
  539. chr(195).chr(134) => 'AE',chr(195).chr(135) => 'C',
  540. chr(195).chr(136) => 'E', chr(195).chr(137) => 'E',
  541. chr(195).chr(138) => 'E', chr(195).chr(139) => 'E',
  542. chr(195).chr(140) => 'I', chr(195).chr(141) => 'I',
  543. chr(195).chr(142) => 'I', chr(195).chr(143) => 'I',
  544. chr(195).chr(144) => 'D', chr(195).chr(145) => 'N',
  545. chr(195).chr(146) => 'O', chr(195).chr(147) => 'O',
  546. chr(195).chr(148) => 'O', chr(195).chr(149) => 'O',
  547. chr(195).chr(150) => 'O', chr(195).chr(153) => 'U',
  548. chr(195).chr(154) => 'U', chr(195).chr(155) => 'U',
  549. chr(195).chr(156) => 'U', chr(195).chr(157) => 'Y',
  550. chr(195).chr(158) => 'TH',chr(195).chr(159) => 's',
  551. chr(195).chr(160) => 'a', chr(195).chr(161) => 'a',
  552. chr(195).chr(162) => 'a', chr(195).chr(163) => 'a',
  553. chr(195).chr(164) => 'a', chr(195).chr(165) => 'a',
  554. chr(195).chr(166) => 'ae',chr(195).chr(167) => 'c',
  555. chr(195).chr(168) => 'e', chr(195).chr(169) => 'e',
  556. chr(195).chr(170) => 'e', chr(195).chr(171) => 'e',
  557. chr(195).chr(172) => 'i', chr(195).chr(173) => 'i',
  558. chr(195).chr(174) => 'i', chr(195).chr(175) => 'i',
  559. chr(195).chr(176) => 'd', chr(195).chr(177) => 'n',
  560. chr(195).chr(178) => 'o', chr(195).chr(179) => 'o',
  561. chr(195).chr(180) => 'o', chr(195).chr(181) => 'o',
  562. chr(195).chr(182) => 'o', chr(195).chr(184) => 'o',
  563. chr(195).chr(185) => 'u', chr(195).chr(186) => 'u',
  564. chr(195).chr(187) => 'u', chr(195).chr(188) => 'u',
  565. chr(195).chr(189) => 'y', chr(195).chr(190) => 'th',
  566. chr(195).chr(191) => 'y', chr(195).chr(152) => 'O',
  567. // Decompositions for Latin Extended-A
  568. chr(196).chr(128) => 'A', chr(196).chr(129) => 'a',
  569. chr(196).chr(130) => 'A', chr(196).chr(131) => 'a',
  570. chr(196).chr(132) => 'A', chr(196).chr(133) => 'a',
  571. chr(196).chr(134) => 'C', chr(196).chr(135) => 'c',
  572. chr(196).chr(136) => 'C', chr(196).chr(137) => 'c',
  573. chr(196).chr(138) => 'C', chr(196).chr(139) => 'c',
  574. chr(196).chr(140) => 'C', chr(196).chr(141) => 'c',
  575. chr(196).chr(142) => 'D', chr(196).chr(143) => 'd',
  576. chr(196).chr(144) => 'D', chr(196).chr(145) => 'd',
  577. chr(196).chr(146) => 'E', chr(196).chr(147) => 'e',
  578. chr(196).chr(148) => 'E', chr(196).chr(149) => 'e',
  579. chr(196).chr(150) => 'E', chr(196).chr(151) => 'e',
  580. chr(196).chr(152) => 'E', chr(196).chr(153) => 'e',
  581. chr(196).chr(154) => 'E', chr(196).chr(155) => 'e',
  582. chr(196).chr(156) => 'G', chr(196).chr(157) => 'g',
  583. chr(196).chr(158) => 'G', chr(196).chr(159) => 'g',
  584. chr(196).chr(160) => 'G', chr(196).chr(161) => 'g',
  585. chr(196).chr(162) => 'G', chr(196).chr(163) => 'g',
  586. chr(196).chr(164) => 'H', chr(196).chr(165) => 'h',
  587. chr(196).chr(166) => 'H', chr(196).chr(167) => 'h',
  588. chr(196).chr(168) => 'I', chr(196).chr(169) => 'i',
  589. chr(196).chr(170) => 'I', chr(196).chr(171) => 'i',
  590. chr(196).chr(172) => 'I', chr(196).chr(173) => 'i',
  591. chr(196).chr(174) => 'I', chr(196).chr(175) => 'i',
  592. chr(196).chr(176) => 'I', chr(196).chr(177) => 'i',
  593. chr(196).chr(178) => 'IJ',chr(196).chr(179) => 'ij',
  594. chr(196).chr(180) => 'J', chr(196).chr(181) => 'j',
  595. chr(196).chr(182) => 'K', chr(196).chr(183) => 'k',
  596. chr(196).chr(184) => 'k', chr(196).chr(185) => 'L',
  597. chr(196).chr(186) => 'l', chr(196).chr(187) => 'L',
  598. chr(196).chr(188) => 'l', chr(196).chr(189) => 'L',
  599. chr(196).chr(190) => 'l', chr(196).chr(191) => 'L',
  600. chr(197).chr(128) => 'l', chr(197).chr(129) => 'L',
  601. chr(197).chr(130) => 'l', chr(197).chr(131) => 'N',
  602. chr(197).chr(132) => 'n', chr(197).chr(133) => 'N',
  603. chr(197).chr(134) => 'n', chr(197).chr(135) => 'N',
  604. chr(197).chr(136) => 'n', chr(197).chr(137) => 'N',
  605. chr(197).chr(138) => 'n', chr(197).chr(139) => 'N',
  606. chr(197).chr(140) => 'O', chr(197).chr(141) => 'o',
  607. chr(197).chr(142) => 'O', chr(197).chr(143) => 'o',
  608. chr(197).chr(144) => 'O', chr(197).chr(145) => 'o',
  609. chr(197).chr(146) => 'OE',chr(197).chr(147) => 'oe',
  610. chr(197).chr(148) => 'R',chr(197).chr(149) => 'r',
  611. chr(197).chr(150) => 'R',chr(197).chr(151) => 'r',
  612. chr(197).chr(152) => 'R',chr(197).chr(153) => 'r',
  613. chr(197).chr(154) => 'S',chr(197).chr(155) => 's',
  614. chr(197).chr(156) => 'S',chr(197).chr(157) => 's',
  615. chr(197).chr(158) => 'S',chr(197).chr(159) => 's',
  616. chr(197).chr(160) => 'S', chr(197).chr(161) => 's',
  617. chr(197).chr(162) => 'T', chr(197).chr(163) => 't',
  618. chr(197).chr(164) => 'T', chr(197).chr(165) => 't',
  619. chr(197).chr(166) => 'T', chr(197).chr(167) => 't',
  620. chr(197).chr(168) => 'U', chr(197).chr(169) => 'u',
  621. chr(197).chr(170) => 'U', chr(197).chr(171) => 'u',
  622. chr(197).chr(172) => 'U', chr(197).chr(173) => 'u',
  623. chr(197).chr(174) => 'U', chr(197).chr(175) => 'u',
  624. chr(197).chr(176) => 'U', chr(197).chr(177) => 'u',
  625. chr(197).chr(178) => 'U', chr(197).chr(179) => 'u',
  626. chr(197).chr(180) => 'W', chr(197).chr(181) => 'w',
  627. chr(197).chr(182) => 'Y', chr(197).chr(183) => 'y',
  628. chr(197).chr(184) => 'Y', chr(197).chr(185) => 'Z',
  629. chr(197).chr(186) => 'z', chr(197).chr(187) => 'Z',
  630. chr(197).chr(188) => 'z', chr(197).chr(189) => 'Z',
  631. chr(197).chr(190) => 'z', chr(197).chr(191) => 's',
  632. // Decompositions for Latin Extended-B
  633. chr(200).chr(152) => 'S', chr(200).chr(153) => 's',
  634. chr(200).chr(154) => 'T', chr(200).chr(155) => 't',
  635. // Euro Sign
  636. chr(226).chr(130).chr(172) => 'E',
  637. // GBP (Pound) Sign
  638. chr(194).chr(163) => '',
  639. // Vowels with diacritic (Vietnamese)
  640. // unmarked
  641. chr(198).chr(160) => 'O', chr(198).chr(161) => 'o',
  642. chr(198).chr(175) => 'U', chr(198).chr(176) => 'u',
  643. // grave accent
  644. chr(225).chr(186).chr(166) => 'A', chr(225).chr(186).chr(167) => 'a',
  645. chr(225).chr(186).chr(176) => 'A', chr(225).chr(186).chr(177) => 'a',
  646. chr(225).chr(187).chr(128) => 'E', chr(225).chr(187).chr(129) => 'e',
  647. chr(225).chr(187).chr(146) => 'O', chr(225).chr(187).chr(147) => 'o',
  648. chr(225).chr(187).chr(156) => 'O', chr(225).chr(187).chr(157) => 'o',
  649. chr(225).chr(187).chr(170) => 'U', chr(225).chr(187).chr(171) => 'u',
  650. chr(225).chr(187).chr(178) => 'Y', chr(225).chr(187).chr(179) => 'y',
  651. // hook
  652. chr(225).chr(186).chr(162) => 'A', chr(225).chr(186).chr(163) => 'a',
  653. chr(225).chr(186).chr(168) => 'A', chr(225).chr(186).chr(169) => 'a',
  654. chr(225).chr(186).chr(178) => 'A', chr(225).chr(186).chr(179) => 'a',
  655. chr(225).chr(186).chr(186) => 'E', chr(225).chr(186).chr(187) => 'e',
  656. chr(225).chr(187).chr(130) => 'E', chr(225).chr(187).chr(131) => 'e',
  657. chr(225).chr(187).chr(136) => 'I', chr(225).chr(187).chr(137) => 'i',
  658. chr(225).chr(187).chr(142) => 'O', chr(225).chr(187).chr(143) => 'o',
  659. chr(225).chr(187).chr(148) => 'O', chr(225).chr(187).chr(149) => 'o',
  660. chr(225).chr(187).chr(158) => 'O', chr(225).chr(187).chr(159) => 'o',
  661. chr(225).chr(187).chr(166) => 'U', chr(225).chr(187).chr(167) => 'u',
  662. chr(225).chr(187).chr(172) => 'U', chr(225).chr(187).chr(173) => 'u',
  663. chr(225).chr(187).chr(182) => 'Y', chr(225).chr(187).chr(183) => 'y',
  664. // tilde
  665. chr(225).chr(186).chr(170) => 'A', chr(225).chr(186).chr(171) => 'a',
  666. chr(225).chr(186).chr(180) => 'A', chr(225).chr(186).chr(181) => 'a',
  667. chr(225).chr(186).chr(188) => 'E', chr(225).chr(186).chr(189) => 'e',
  668. chr(225).chr(187).chr(132) => 'E', chr(225).chr(187).chr(133) => 'e',
  669. chr(225).chr(187).chr(150) => 'O', chr(225).chr(187).chr(151) => 'o',
  670. chr(225).chr(187).chr(160) => 'O', chr(225).chr(187).chr(161) => 'o',
  671. chr(225).chr(187).chr(174) => 'U', chr(225).chr(187).chr(175) => 'u',
  672. chr(225).chr(187).chr(184) => 'Y', chr(225).chr(187).chr(185) => 'y',
  673. // acute accent
  674. chr(225).chr(186).chr(164) => 'A', chr(225).chr(186).chr(165) => 'a',
  675. chr(225).chr(186).chr(174) => 'A', chr(225).chr(186).chr(175) => 'a',
  676. chr(225).chr(186).chr(190) => 'E', chr(225).chr(186).chr(191) => 'e',
  677. chr(225).chr(187).chr(144) => 'O', chr(225).chr(187).chr(145) => 'o',
  678. chr(225).chr(187).chr(154) => 'O', chr(225).chr(187).chr(155) => 'o',
  679. chr(225).chr(187).chr(168) => 'U', chr(225).chr(187).chr(169) => 'u',
  680. // dot below
  681. chr(225).chr(186).chr(160) => 'A', chr(225).chr(186).chr(161) => 'a',
  682. chr(225).chr(186).chr(172) => 'A', chr(225).chr(186).chr(173) => 'a',
  683. chr(225).chr(186).chr(182) => 'A', chr(225).chr(186).chr(183) => 'a',
  684. chr(225).chr(186).chr(184) => 'E', chr(225).chr(186).chr(185) => 'e',
  685. chr(225).chr(187).chr(134) => 'E', chr(225).chr(187).chr(135) => 'e',
  686. chr(225).chr(187).chr(138) => 'I', chr(225).chr(187).chr(139) => 'i',
  687. chr(225).chr(187).chr(140) => 'O', chr(225).chr(187).chr(141) => 'o',
  688. chr(225).chr(187).chr(152) => 'O', chr(225).chr(187).chr(153) => 'o',
  689. chr(225).chr(187).chr(162) => 'O', chr(225).chr(187).chr(163) => 'o',
  690. chr(225).chr(187).chr(164) => 'U', chr(225).chr(187).chr(165) => 'u',
  691. chr(225).chr(187).chr(176) => 'U', chr(225).chr(187).chr(177) => 'u',
  692. chr(225).chr(187).chr(180) => 'Y', chr(225).chr(187).chr(181) => 'y',
  693. // Vowels with diacritic (Chinese, Hanyu Pinyin)
  694. chr(201).chr(145) => 'a',
  695. // macron
  696. chr(199).chr(149) => 'U', chr(199).chr(150) => 'u',
  697. // acute accent
  698. chr(199).chr(151) => 'U', chr(199).chr(152) => 'u',
  699. // caron
  700. chr(199).chr(141) => 'A', chr(199).chr(142) => 'a',
  701. chr(199).chr(143) => 'I', chr(199).chr(144) => 'i',
  702. chr(199).chr(145) => 'O', chr(199).chr(146) => 'o',
  703. chr(199).chr(147) => 'U', chr(199).chr(148) => 'u',
  704. chr(199).chr(153) => 'U', chr(199).chr(154) => 'u',
  705. // grave accent
  706. chr(199).chr(155) => 'U', chr(199).chr(156) => 'u',
  707. );
  708. $string = strtr($string, $chars);
  709. } else {
  710. // Assume ISO-8859-1 if not UTF-8
  711. $chars['in'] = chr(128).chr(131).chr(138).chr(142).chr(154).chr(158)
  712. .chr(159).chr(162).chr(165).chr(181).chr(192).chr(193).chr(194)
  713. .chr(195).chr(196).chr(197).chr(199).chr(200).chr(201).chr(202)
  714. .chr(203).chr(204).chr(205).chr(206).chr(207).chr(209).chr(210)
  715. .chr(211).chr(212).chr(213).chr(214).chr(216).chr(217).chr(218)
  716. .chr(219).chr(220).chr(221).chr(224).chr(225).chr(226).chr(227)
  717. .chr(228).chr(229).chr(231).chr(232).chr(233).chr(234).chr(235)
  718. .chr(236).chr(237).chr(238).chr(239).chr(241).chr(242).chr(243)
  719. .chr(244).chr(245).chr(246).chr(248).chr(249).chr(250).chr(251)
  720. .chr(252).chr(253).chr(255);
  721. $chars['out'] = "EfSZszYcYuAAAAAACEEEEIIIINOOOOOOUUUUYaaaaaaceeeeiiiinoooooouuuuyy";
  722. $string = strtr($string, $chars['in'], $chars['out']);
  723. $double_chars['in'] = array(chr(140), chr(156), chr(198), chr(208), chr(222), chr(223), chr(230), chr(240), chr(254));
  724. $double_chars['out'] = array('OE', 'oe', 'AE', 'DH', 'TH', 'ss', 'ae', 'dh', 'th');
  725. $string = str_replace($double_chars['in'], $double_chars['out'], $string);
  726. }
  727. return $string;
  728. }
  729. /**
  730. * Sanitizes a filename replacing whitespace with dashes
  731. *
  732. * Removes special characters that are illegal in filenames on certain
  733. * operating systems and special characters requiring special escaping
  734. * to manipulate at the command line. Replaces spaces and consecutive
  735. * dashes with a single dash. Trim period, dash and underscore from beginning
  736. * and end of filename.
  737. *
  738. * @since 2.1.0
  739. *
  740. * @param string $filename The filename to be sanitized
  741. * @return string The sanitized filename
  742. */
  743. function sanitize_file_name( $filename ) {
  744. $filename_raw = $filename;
  745. $special_chars = array("?", "[", "]", "/", "\\", "=", "<", ">", ":", ";", ",", "'", "\"", "&", "$", "#", "*", "(", ")", "|", "~", "`", "!", "{", "}", chr(0));
  746. $special_chars = apply_filters('sanitize_file_name_chars', $special_chars, $filename_raw);
  747. $filename = str_replace($special_chars, '', $filename);
  748. $filename = preg_replace('/[\s-]+/', '-', $filename);
  749. $filename = trim($filename, '.-_');
  750. // Split the filename into a base and extension[s]
  751. $parts = explode('.', $filename);
  752. // Return if only one extension
  753. if ( count($parts) <= 2 )
  754. return apply_filters('sanitize_file_name', $filename, $filename_raw);
  755. // Process multiple extensions
  756. $filename = array_shift($parts);
  757. $extension = array_pop($parts);
  758. $mimes = get_allowed_mime_types();
  759. // Loop over any intermediate extensions. Munge them with a trailing underscore if they are a 2 - 5 character
  760. // long alpha string not in the extension whitelist.
  761. foreach ( (array) $parts as $part) {
  762. $filename .= '.' . $part;
  763. if ( preg_match("/^[a-zA-Z]{2,5}\d?$/", $part) ) {
  764. $allowed = false;
  765. foreach ( $mimes as $ext_preg => $mime_match ) {
  766. $ext_preg = '!^(' . $ext_preg . ')$!i';
  767. if ( preg_match( $ext_preg, $part ) ) {
  768. $allowed = true;
  769. break;
  770. }
  771. }
  772. if ( !$allowed )
  773. $filename .= '_';
  774. }
  775. }
  776. $filename .= '.' . $extension;
  777. return apply_filters('sanitize_file_name', $filename, $filename_raw);
  778. }
  779. /**
  780. * Sanitize username stripping out unsafe characters.
  781. *
  782. * Removes tags, octets, entities, and if strict is enabled, will only keep
  783. * alphanumeric, _, space, ., -, @. After sanitizing, it passes the username,
  784. * raw username (the username in the parameter), and the value of $strict as
  785. * parameters for the 'sanitize_user' filter.
  786. *
  787. * @since 2.0.0
  788. * @uses apply_filters() Calls 'sanitize_user' hook on username, raw username,
  789. * and $strict parameter.
  790. *
  791. * @param string $username The username to be sanitized.
  792. * @param bool $strict If set limits $username to specific characters. Default false.
  793. * @return string The sanitized username, after passing through filters.
  794. */
  795. function sanitize_user( $username, $strict = false ) {
  796. $raw_username = $username;
  797. $username = wp_strip_all_tags( $username );
  798. $username = remove_accents( $username );
  799. // Kill octets
  800. $username = preg_replace( '|%([a-fA-F0-9][a-fA-F0-9])|', '', $username );
  801. $username = preg_replace( '/&.+?;/', '', $username ); // Kill entities
  802. // If strict, reduce to ASCII for max portability.
  803. if ( $strict )
  804. $username = preg_replace( '|[^a-z0-9 _.\-@]|i', '', $username );
  805. $username = trim( $username );
  806. // Consolidate contiguous whitespace
  807. $username = preg_replace( '|\s+|', ' ', $username );
  808. return apply_filters( 'sanitize_user', $username, $raw_username, $strict );
  809. }
  810. /**
  811. * Sanitize a string key.
  812. *
  813. * Keys are used as internal identifiers. Lowercase alphanumeric characters, dashes and underscores are allowed.
  814. *
  815. * @since 3.0.0
  816. *
  817. * @param string $key String key
  818. * @return string Sanitized key
  819. */
  820. function sanitize_key( $key ) {
  821. $raw_key = $key;
  822. $key = strtolower( $key );
  823. $key = preg_replace( '/[^a-z0-9_\-]/', '', $key );
  824. return apply_filters( 'sanitize_key', $key, $raw_key );
  825. }
  826. /**
  827. * Sanitizes title or use fallback title.
  828. *
  829. * Specifically, HTML and PHP tags are stripped. Further actions can be added
  830. * via the plugin API. If $title is empty and $fallback_title is set, the latter
  831. * will be used.
  832. *
  833. * @since 1.0.0
  834. *
  835. * @param string $title The string to be sanitized.
  836. * @param string $fallback_title Optional. A title to use if $title is empty.
  837. * @param string $context Optional. The operation for which the string is sanitized
  838. * @return string The sanitized string.
  839. */
  840. function sanitize_title($title, $fallback_title = '', $context = 'save') {
  841. $raw_title = $title;
  842. if ( 'save' == $context )
  843. $title = remove_accents($title);
  844. $title = apply_filters('sanitize_title', $title, $raw_title, $context);
  845. if ( '' === $title || false === $title )
  846. $title = $fallback_title;
  847. return $title;
  848. }
  849. function sanitize_title_for_query($title) {
  850. return sanitize_title($title, '', 'query');
  851. }
  852. /**
  853. * Sanitizes title, replacing whitespace and a few other characters with dashes.
  854. *
  855. * Limits the output to alphanumeric characters, underscore (_) and dash (-).
  856. * Whitespace becomes a dash.
  857. *
  858. * @since 1.2.0
  859. *
  860. * @param string $title The title to be sanitized.
  861. * @param string $raw_title Optional. Not used.
  862. * @param string $context Optional. The operation for which the string is sanitized.
  863. * @return string The sanitized title.
  864. */
  865. function sanitize_title_with_dashes($title, $raw_title = '', $context = 'display') {
  866. $title = strip_tags($title);
  867. // Preserve escaped octets.
  868. $title = preg_replace('|%([a-fA-F0-9][a-fA-F0-9])|', '---$1---', $title);
  869. // Remove percent signs that are not part of an octet.
  870. $title = str_replace('%', '', $title);
  871. // Restore octets.
  872. $title = preg_replace('|---([a-fA-F0-9][a-fA-F0-9])---|', '%$1', $title);
  873. if (seems_utf8($title)) {
  874. if (function_exists('mb_strtolower')) {
  875. $title = mb_strtolower($title, 'UTF-8');
  876. }
  877. $title = utf8_uri_encode($title, 200);
  878. }
  879. $title = strtolower($title);
  880. $title = preg_replace('/&.+?;/', '', $title); // kill entities
  881. $title = str_replace('.', '-', $title);
  882. if ( 'save' == $context ) {
  883. // Convert nbsp, ndash and mdash to hyphens
  884. $title = str_replace( array( '%c2%a0', '%e2%80%93', '%e2%80%94' ), '-', $title );
  885. // Strip these characters entirely
  886. $title = str_replace( array(
  887. // iexcl and iquest
  888. '%c2%a1', '%c2%bf',
  889. // angle quotes
  890. '%c2%ab', '%c2%bb', '%e2%80%b9', '%e2%80%ba',
  891. // curly quotes
  892. '%e2%80%98', '%e2%80%99', '%e2%80%9c', '%e2%80%9d',
  893. '%e2%80%9a', '%e2%80%9b', '%e2%80%9e', '%e2%80%9f',
  894. // copy, reg, deg, hellip and trade
  895. '%c2%a9', '%c2%ae', '%c2%b0', '%e2%80%a6', '%e2%84%a2',
  896. // acute accents
  897. '%c2%b4', '%cb%8a', '%cc%81', '%cd%81',
  898. // grave accent, macron, caron
  899. '%cc%80', '%cc%84', '%cc%8c',
  900. ), '', $title );
  901. // Convert times to x
  902. $title = str_replace( '%c3%97', 'x', $title );
  903. }
  904. $title = preg_replace('/[^%a-z0-9 _-]/', '', $title);
  905. $title = preg_replace('/\s+/', '-', $title);
  906. $title = preg_replace('|-+|', '-', $title);
  907. $title = trim($title, '-');
  908. return $title;
  909. }
  910. /**
  911. * Ensures a string is a valid SQL order by clause.
  912. *
  913. * Accepts one or more columns, with or without ASC/DESC, and also accepts
  914. * RAND().
  915. *
  916. * @since 2.5.1
  917. *
  918. * @param string $orderby Order by string to be checked.
  919. * @return string|bool Returns the order by clause if it is a match, false otherwise.
  920. */
  921. function sanitize_sql_orderby( $orderby ){
  922. preg_match('/^\s*([a-z0-9_]+(\s+(ASC|DESC))?(\s*,\s*|\s*$))+|^\s*RAND\(\s*\)\s*$/i', $orderby, $obmatches);
  923. if ( !$obmatches )
  924. return false;
  925. return $orderby;
  926. }
  927. /**
  928. * Sanitizes a html classname to ensure it only contains valid characters
  929. *
  930. * Strips the string down to A-Z,a-z,0-9,_,-. If this results in an empty
  931. * string then it will return the alternative value supplied.
  932. *
  933. * @todo Expand to support the full range of CDATA that a class attribute can contain.
  934. *
  935. * @since 2.8.0
  936. *
  937. * @param string $class The classname to be sanitized
  938. * @param string $fallback Optional. The value to return if the sanitization end's up as an empty string.
  939. * Defaults to an empty string.
  940. * @return string The sanitized value
  941. */
  942. function sanitize_html_class( $class, $fallback = '' ) {
  943. //Strip out any % encoded octets
  944. $sanitized = preg_replace( '|%[a-fA-F0-9][a-fA-F0-9]|', '', $class );
  945. //Limit to A-Z,a-z,0-9,_,-
  946. $sanitized = preg_replace( '/[^A-Za-z0-9_-]/', '', $sanitized );
  947. if ( '' == $sanitized )
  948. $sanitized = $fallback;
  949. return apply_filters( 'sanitize_html_class', $sanitized, $class, $fallback );
  950. }
  951. /**
  952. * Converts a number of characters from a string.
  953. *
  954. * Metadata tags <<title>> and <<category>> are removed, <<br>> and <<hr>> are
  955. * converted into correct XHTML and Unicode characters are converted to the
  956. * valid range.
  957. *
  958. * @since 0.71
  959. *
  960. * @param string $content String of characters to be converted.
  961. * @param string $deprecated Not used.
  962. * @return string Converted string.
  963. */
  964. function convert_chars($content, $deprecated = '') {
  965. if ( !empty( $deprecated ) )
  966. _deprecated_argument( __FUNCTION__, '0.71' );
  967. // Translation of invalid Unicode references range to valid range
  968. $wp_htmltranswinuni = array(
  969. '&#128;' => '&#8364;', // the Euro sign
  970. '&#129;' => '',
  971. '&#130;' => '&#8218;', // these are Windows CP1252 specific characters
  972. '&#131;' => '&#402;', // they would look weird on non-Windows browsers
  973. '&#132;' => '&#8222;',
  974. '&#133;' => '&#8230;',
  975. '&#134;' => '&#8224;',
  976. '&#135;' => '&#8225;',
  977. '&#136;' => '&#710;',
  978. '&#137;' => '&#8240;',
  979. '&#138;' => '&#352;',
  980. '&#139;' => '&#8249;',
  981. '&#140;' => '&#338;',
  982. '&#141;' => '',
  983. '&#142;' => '&#381;',
  984. '&#143;' => '',
  985. '&#144;' => '',
  986. '&#145;' => '&#8216;',
  987. '&#146;' => '&#8217;',
  988. '&#147;' => '&#8220;',
  989. '&#148;' => '&#8221;',
  990. '&#149;' => '&#8226;',
  991. '&#150;' => '&#8211;',
  992. '&#151;' => '&#8212;',
  993. '&#152;' => '&#732;',
  994. '&#153;' => '&#8482;',
  995. '&#154;' => '&#353;',
  996. '&#155;' => '&#8250;',
  997. '&#156;' => '&#339;',
  998. '&#157;' => '',
  999. '&#158;' => '&#382;',
  1000. '&#159;' => '&#376;'
  1001. );
  1002. // Remove metadata tags
  1003. $content = preg_replace('/<title>(.+?)<\/title>/','',$content);
  1004. $content = preg_replace('/<category>(.+?)<\/category>/','',$content);
  1005. // Converts lone & characters into &#38; (a.k.a. &amp;)
  1006. $content = preg_replace('/&([^#])(?![a-z1-4]{1,8};)/i', '&#038;$1', $content);
  1007. // Fix Word pasting
  1008. $content = strtr($content, $wp_htmltranswinuni);
  1009. // Just a little XHTML help
  1010. $content = str_replace('<br>', '<br />', $content);
  1011. $content = str_replace('<hr>', '<hr />', $content);
  1012. return $content;
  1013. }
  1014. /**
  1015. * Will only balance the tags if forced to and the option is set to balance tags.
  1016. *
  1017. * The option 'use_balanceTags' is used to determine whether the tags will be balanced.
  1018. *
  1019. * @since 0.71
  1020. *
  1021. * @param string $text Text to be balanced
  1022. * @param bool $force If true, forces balancing, ignoring the value of the option. Default false.
  1023. * @return string Balanced text
  1024. */
  1025. function balanceTags( $text, $force = false ) {
  1026. if ( !$force && get_option('use_balanceTags') == 0 )
  1027. return $text;
  1028. return force_balance_tags( $text );
  1029. }
  1030. /**
  1031. * Balances tags of string using a modified stack.
  1032. *
  1033. * @since 2.0.4
  1034. *
  1035. * @author Leonard Lin <leonard@acm.org>
  1036. * @license GPL
  1037. * @copyright November 4, 2001
  1038. * @version 1.1
  1039. * @todo Make better - change loop condition to $text in 1.2
  1040. * @internal Modified by Scott Reilly (coffee2code) 02 Aug 2004
  1041. * 1.1 Fixed handling of append/stack pop order of end text
  1042. * Added Cleaning Hooks
  1043. * 1.0 First Version
  1044. *
  1045. * @param string $text Text to be balanced.
  1046. * @return string Balanced text.
  1047. */
  1048. function force_balance_tags( $text ) {
  1049. $tagstack = array();
  1050. $stacksize = 0;
  1051. $tagqueue = '';
  1052. $newtext = '';
  1053. // Known single-entity/self-closing tags
  1054. $single_tags = array( 'area', 'base', 'basefont', 'br', 'col', 'command', 'embed', 'frame', 'hr', 'img', 'input', 'isindex', 'link', 'meta', 'param', 'source' );
  1055. // Tags that can be immediately nested within themselves
  1056. $nestable_tags = array( 'blockquote', 'div', 'object', 'q', 'span' );
  1057. // WP bug fix for comments - in case you REALLY meant to type '< !--'
  1058. $text = str_replace('< !--', '< !--', $text);
  1059. // WP bug fix for LOVE <3 (and other situations with '<' before a number)
  1060. $text = preg_replace('#<([0-9]{1})#', '&lt;$1', $text);
  1061. while ( preg_match("/<(\/?[\w:]*)\s*([^>]*)>/", $text, $regex) ) {
  1062. $newtext .= $tagqueue;
  1063. $i = strpos($text, $regex[0]);
  1064. $l = strlen($regex[0]);
  1065. // clear the shifter
  1066. $tagqueue = '';
  1067. // Pop or Push
  1068. if ( isset($regex[1][0]) && '/' == $regex[1][0] ) { // End Tag
  1069. $tag = strtolower(substr($regex[1],1));
  1070. // if too many closing tags
  1071. if( $stacksize <= 0 ) {
  1072. $tag = '';
  1073. // or close to be safe $tag = '/' . $tag;
  1074. }
  1075. // if stacktop value = tag close value then pop
  1076. else if ( $tagstack[$stacksize - 1] == $tag ) { // found closing tag
  1077. $tag = '</' . $tag . '>'; // Close Tag
  1078. // Pop
  1079. array_pop( $tagstack );
  1080. $stacksize--;
  1081. } else { // closing tag not at top, search for it
  1082. for ( $j = $stacksize-1; $j >= 0; $j-- ) {
  1083. if ( $tagstack[$j] == $tag ) {
  1084. // add tag to tagqueue
  1085. for ( $k = $stacksize-1; $k >= $j; $k--) {
  1086. $tagqueue .= '</' . array_pop( $tagstack ) . '>';
  1087. $stacksize--;
  1088. }
  1089. break;
  1090. }
  1091. }
  1092. $tag = '';
  1093. }
  1094. } else { // Begin Tag
  1095. $tag = strtolower($regex[1]);
  1096. // Tag Cleaning
  1097. // If it's an empty tag "< >", do nothing
  1098. if ( '' == $tag ) {
  1099. // do nothing
  1100. }
  1101. // ElseIf it presents itself as a self-closing tag...
  1102. elseif ( substr( $regex[2], -1 ) == '/' ) {
  1103. // ...but it isn't a known single-entity self-closing tag, then don't let it be treated as such and
  1104. // immediately close it with a closing tag (the tag will encapsulate no text as a result)
  1105. if ( ! in_array( $tag, $single_tags ) )
  1106. $regex[2] = trim( substr( $regex[2], 0, -1 ) ) . "></$tag";
  1107. }
  1108. // ElseIf it's a known single-entity tag but it doesn't close itself, do so
  1109. elseif ( in_array($tag, $single_tags) ) {
  1110. $regex[2] .= '/';
  1111. }
  1112. // Else it's not a single-entity tag
  1113. else {
  1114. // If the top of the stack is the same as the tag we want to push, close previous tag
  1115. if ( $stacksize > 0 && !in_array($tag, $nestable_tags) && $tagstack[$stacksize - 1] == $tag ) {
  1116. $tagqueue = '</' . array_pop( $tagstack ) . '>';
  1117. $stacksize--;
  1118. }
  1119. $stacksize = array_push( $tagstack, $tag );
  1120. }
  1121. // Attributes
  1122. $attributes = $regex[2];
  1123. if( ! empty( $attributes ) && $attributes[0] != '>' )
  1124. $attributes = ' ' . $attributes;
  1125. $tag = '<' . $tag . $attributes . '>';
  1126. //If already queuing a close tag, then put this tag on, too
  1127. if ( !empty($tagqueue) ) {
  1128. $tagqueue .= $tag;
  1129. $tag = '';
  1130. }
  1131. }
  1132. $newtext .= substr($text, 0, $i) . $tag;
  1133. $text = substr($text, $i + $l);
  1134. }
  1135. // Clear Tag Queue
  1136. $newtext .= $tagqueue;
  1137. // Add Remaining text
  1138. $newtext .= $text;
  1139. // Empty Stack
  1140. while( $x = array_pop($tagstack) )
  1141. $newtext .= '</' . $x . '>'; // Add remaining tags to close
  1142. // WP fix for the bug with HTML comments
  1143. $newtext = str_replace("< !--","<!--",$newtext);
  1144. $newtext = str_replace("< !--","< !--",$newtext);
  1145. return $newtext;
  1146. }
  1147. /**
  1148. * Acts on text which is about to be edited.
  1149. *
  1150. * The $content is run through esc_textarea(), which uses htmlspecialchars()
  1151. * to convert special characters to HTML entities. If $richedit is set to true,
  1152. * it is simply a holder for the 'format_to_edit' filter.
  1153. *
  1154. * @since 0.71
  1155. *
  1156. * @param string $content The text about to be edited.
  1157. * @param bool $richedit Whether the $content should not pass through htmlspecialchars(). Default false (meaning it will be passed).
  1158. * @return string The text after the filter (and possibly htmlspecialchars()) has been run.
  1159. */
  1160. function format_to_edit( $content, $richedit = false ) {
  1161. $content = apply_filters( 'format_to_edit', $content );
  1162. if ( ! $richedit )
  1163. $content = esc_textarea( $content );
  1164. return $content;
  1165. }
  1166. /**
  1167. * Holder for the 'format_to_post' filter.
  1168. *
  1169. * @since 0.71
  1170. *
  1171. * @param string $content The text to pass through the filter.
  1172. * @return string Text returned from the 'format_to_post' filter.
  1173. */
  1174. function format_to_post($content) {
  1175. $content = apply_filters('format_to_post', $content);
  1176. return $content;
  1177. }
  1178. /**
  1179. * Add leading zeros when necessary.
  1180. *
  1181. * If you set the threshold to '4' and the number is '10', then you will get
  1182. * back '0010'. If you set the threshold to '4' and the number is '5000', then y…

Large files files are truncated, but you can click here to view the full file