PageRenderTime 58ms CodeModel.GetById 20ms RepoModel.GetById 1ms app.codeStats 0ms

/system/core/core.regex.php

https://github.com/danboy/Croissierd
PHP | 1196 lines | 821 code | 140 blank | 235 comment | 64 complexity | 1aab30469fc8d51f34369e829bf09fe2 MD5 | raw file
  1. <?php
  2. /*
  3. =====================================================
  4. ExpressionEngine - by EllisLab
  5. -----------------------------------------------------
  6. http://expressionengine.com/
  7. -----------------------------------------------------
  8. Copyright (c) 2003 - 2010 EllisLab, Inc.
  9. =====================================================
  10. THIS IS COPYRIGHTED SOFTWARE
  11. PLEASE READ THE LICENSE AGREEMENT
  12. http://expressionengine.com/docs/license.html
  13. =====================================================
  14. File: core.regex.php
  15. -----------------------------------------------------
  16. Purpose: Regular expression library.
  17. =====================================================
  18. */
  19. if ( ! defined('EXT'))
  20. {
  21. exit('Invalid file request');
  22. }
  23. class Regex {
  24. var $xss_hash = '';
  25. /* never allowed, string replacement */
  26. var $never_allowed_str = array(
  27. 'document.cookie' => '[removed]',
  28. 'document.write' => '[removed]',
  29. '.parentNode' => '[removed]',
  30. '.innerHTML' => '[removed]',
  31. 'window.location' => '[removed]',
  32. '-moz-binding' => '[removed]',
  33. '<!--' => '&lt;!--',
  34. '-->' => '--&gt;',
  35. '<![CDATA[' => '&lt;![CDATA['
  36. );
  37. /* never allowed, regex replacement */
  38. var $never_allowed_regex = array(
  39. "javascript\s*:" => '[removed]',
  40. "expression\s*(\(|&\#40;)" => '[removed]', // CSS and IE
  41. "vbscript\s*:" => '[removed]', // IE, surprise!
  42. "Redirect\s+302" => '[removed]'
  43. );
  44. /** -------------------------------------
  45. /** Validate Email Address
  46. /** -------------------------------------*/
  47. function valid_email($address)
  48. {
  49. if ( ! preg_match("/^([a-z0-9\+_\-]+)(\.[a-z0-9\+_\-]+)*@([a-z0-9\-]+\.)+[a-z]{2,6}$/ix", $address))
  50. return false;
  51. else
  52. return true;
  53. }
  54. /* END */
  55. /** -------------------------------------
  56. /** Validate IP Address
  57. /** -------------------------------------*/
  58. function valid_ip($ip)
  59. {
  60. $ip_segments = explode('.', $ip);
  61. // Always 4 segments needed
  62. if (count($ip_segments) != 4)
  63. {
  64. return FALSE;
  65. }
  66. // IP can not start with 0
  67. if ($ip_segments[0][0] == '0')
  68. {
  69. return FALSE;
  70. }
  71. // Check each segment
  72. foreach ($ip_segments as $segment)
  73. {
  74. // IP segments must be digits and can not be
  75. // longer than 3 digits or greater then 255
  76. if ($segment == '' OR preg_match("/[^0-9]/", $segment) OR $segment > 255 OR strlen($segment) > 3)
  77. {
  78. return FALSE;
  79. }
  80. }
  81. return TRUE;
  82. }
  83. /* END */
  84. /** -------------------------------------
  85. /** Prep URL
  86. /** -------------------------------------*/
  87. function prep_url($str = '')
  88. {
  89. if ($str == 'http://' || $str == '')
  90. {
  91. return '';
  92. }
  93. if (substr($str, 0, 7) != 'http://' && substr($str, 0, 8) != 'https://')
  94. {
  95. $str = 'http://'.$str;
  96. }
  97. return $str;
  98. }
  99. /* END */
  100. /** -------------------------------------
  101. /** Prep Query String
  102. /** -------------------------------------*/
  103. // This function checks to see if "Force Query Strings" is on
  104. // If so it adds a question mark to the URL if needed
  105. function prep_query_string($str)
  106. {
  107. global $PREFS;
  108. if (stristr($str, '.php') AND preg_match("#\/index\/$#", $str))
  109. {
  110. $str = substr($str, 0, -6);
  111. }
  112. if ( ! stristr($str, '?') AND $PREFS->ini('force_query_string') == 'y')
  113. {
  114. if (stristr($str, '.php'))
  115. {
  116. $str = preg_replace("#(.+?)\.php(.*?)#", "\\1.php?\\2", $str);
  117. }
  118. else
  119. {
  120. $str .= "?";
  121. }
  122. }
  123. return $str;
  124. }
  125. /* END */
  126. /** -------------------------------------
  127. /** Decode query string entities
  128. /** -------------------------------------*/
  129. function decode_qstr($str)
  130. {
  131. return str_replace(array('&#46;','&#63;','&amp;'),
  132. array('.','?','&'),
  133. $str);
  134. }
  135. /* END */
  136. /** --------------------------------------------
  137. /** Format HTML so it appears correct in forms
  138. /** --------------------------------------------*/
  139. function form_prep($str = '', $strip = 0)
  140. {
  141. global $FNS;
  142. if ($str == '')
  143. {
  144. return '';
  145. }
  146. if ($strip != 0)
  147. {
  148. $str = stripslashes($str);
  149. }
  150. // $str = $FNS->entities_to_ascii($str);
  151. $str = htmlspecialchars($str);
  152. $str = str_replace("'", "&#39;", $str);
  153. return $str;
  154. }
  155. /* END */
  156. /** -----------------------------------------
  157. /** Convert PHP tags to entities
  158. /** -----------------------------------------*/
  159. function encode_php_tags($str)
  160. {
  161. return str_replace(array('<?php', '<?PHP', '<?', '?'.'>'),
  162. array('&lt;?php', '&lt;?PHP', '&lt;?', '?&gt;'),
  163. $str);
  164. // <?php BBEdit fix
  165. }
  166. /* END */
  167. /** -------------------------------------
  168. /** Convert EE Tags
  169. /** -------------------------------------*/
  170. function encode_ee_tags($str, $convert_curly=FALSE)
  171. {
  172. if ($str != '')
  173. {
  174. if ($convert_curly === TRUE)
  175. {
  176. $str = str_replace(array('{', '}'), array('&#123;', '&#125;'), $str);
  177. }
  178. else
  179. {
  180. $str = preg_replace("/\{(\/){0,1}exp:(.+?)\}/", "&#123;\\1exp:\\2&#125;", $str);
  181. $str = preg_replace("/\{embed=(.+?)\}/", "&#123;embed=\\1&#125;", $str);
  182. $str = preg_replace("/\{path:(.+?)\}/", "&#123;path:\\1&#125;", $str);
  183. $str = preg_replace("/\{redirect=(.+?)\}/", "&#123;redirect=\\1&#125;", $str);
  184. }
  185. }
  186. return $str;
  187. }
  188. /* END */
  189. /** ----------------------------------------------
  190. /** Convert single and double quotes to entites
  191. /** ----------------------------------------------*/
  192. function convert_quotes($str)
  193. {
  194. return str_replace(array("\'","\""), array("&#39;","&quot;"), $str);
  195. }
  196. /* END */
  197. /** -------------------------------------
  198. /** Convert reserved XML characters
  199. /** -------------------------------------*/
  200. function xml_convert($str, $protect_all = FALSE)
  201. {
  202. $temp = '848ff8if9a6fb627facGGcdbcce6';
  203. $str = preg_replace("/&#(\d+);/", "$temp\\1;", $str);
  204. if ($protect_all === TRUE)
  205. {
  206. $str = preg_replace("/&(\w+);/", "$temp\\1;", $str);
  207. }
  208. $str = str_replace(array("&","<",">","\"", "'", "-"),
  209. array("&amp;", "&lt;", "&gt;", "&quot;", "&#39;", "&#45;"),
  210. $str);
  211. $str = preg_replace("/$temp(\d+);/","&#\\1;",$str);
  212. if ($protect_all === TRUE)
  213. {
  214. $str = preg_replace("/$temp(\w+);/","&\\1;", $str);
  215. }
  216. return stripslashes($str);
  217. }
  218. /* END */
  219. /** ----------------------------------------
  220. /** ASCII to Entities
  221. /** ----------------------------------------*/
  222. function ascii_to_entities($str)
  223. {
  224. $count = 1;
  225. $out = '';
  226. $temp = array();
  227. for ($i = 0, $s = strlen($str); $i < $s; $i++)
  228. {
  229. $ordinal = ord($str[$i]);
  230. if ($ordinal < 128)
  231. {
  232. /*
  233. If the $temp array has a value but we have moved on, then it seems only
  234. fair that we output that entity and restart $temp before continuing -Paul
  235. */
  236. if (count($temp) == 1)
  237. {
  238. $out .= '&#'.array_shift($temp).';';
  239. $count = 1;
  240. }
  241. $out .= $str[$i];
  242. }
  243. else
  244. {
  245. if (count($temp) == 0)
  246. {
  247. $count = ($ordinal < 224) ? 2 : 3;
  248. }
  249. $temp[] = $ordinal;
  250. if (count($temp) == $count)
  251. {
  252. $number = ($count == 3) ? (($temp['0'] % 16) * 4096) + (($temp['1'] % 64) * 64) + ($temp['2'] % 64) : (($temp['0'] % 32) * 64) + ($temp['1'] % 64);
  253. $out .= '&#'.$number.';';
  254. $count = 1;
  255. $temp = array();
  256. }
  257. }
  258. }
  259. return $out;
  260. }
  261. /* END */
  262. /** ----------------------------------------
  263. /** Entities to ASCII
  264. /** ----------------------------------------*/
  265. function entities_to_ascii($str, $all = TRUE)
  266. {
  267. global $PREFS;
  268. if (preg_match_all('/\&#(\d+)\;/', $str, $matches))
  269. {
  270. if (FALSE && function_exists('mb_convert_encoding'))
  271. {
  272. $str = mb_convert_encoding($str, strtoupper($PREFS->ini('charset')), 'HTML-ENTITIES');
  273. }
  274. else
  275. {
  276. // Converts to UTF-8 Bytes
  277. // http://us2.php.net/manual/en/function.chr.php#55978
  278. for ($i = 0, $s = count($matches['0']); $i < $s; $i++)
  279. {
  280. $digits = $matches['1'][$i];
  281. $out = '';
  282. if ($digits < 128)
  283. {
  284. $out .= '&#'.$digits.';';
  285. }
  286. elseif ($digits < 2048)
  287. {
  288. $out .= chr(192 + (($digits - ($digits % 64)) / 64));
  289. $out .= chr(128 + ($digits % 64));
  290. }
  291. else
  292. {
  293. $out .= chr(224 + (($digits - ($digits % 4096)) / 4096));
  294. $out .= chr(128 + ((($digits % 4096) - ($digits % 64)) / 64));
  295. $out .= chr(128 + ($digits % 64));
  296. }
  297. // This is a temporary fix for people who are foolish enough not to use UTF-8
  298. // A more detailed fix could be put in, but the likelihood of this occurring is rare
  299. // and this is entire functionality is probably going away in 2.0. -Paul
  300. if(strtolower($PREFS->ini('charset')) == 'iso-8859-1')
  301. {
  302. $out = utf8_decode($out);
  303. }
  304. $str = str_replace($matches['0'][$i], $out, $str);
  305. }
  306. }
  307. }
  308. if ($all)
  309. {
  310. $str = str_replace(array("&amp;", "&lt;", "&gt;", "&quot;", "&#39;", "&#45;"),
  311. array("&","<",">","\"", "'", "-"),
  312. $str);
  313. }
  314. return $str;
  315. }
  316. /* END */
  317. /** -------------------------------------------------
  318. /** Trim slashes "/" from front and back of string
  319. /** -------------------------------------------------*/
  320. function trim_slashes($str)
  321. {
  322. if (substr($str, 0, 1) == '/')
  323. {
  324. $str = substr($str, 1);
  325. }
  326. if (substr($str, 0, 5) == "&#47;")
  327. {
  328. $str = substr($str, 5);
  329. }
  330. if (substr($str, -1) == '/')
  331. {
  332. $str = substr($str, 0, -1);
  333. }
  334. if (substr($str, -5) == "&#47;")
  335. {
  336. $str = substr($str, 0, -5);
  337. }
  338. return $str;
  339. }
  340. /* END */
  341. /** -------------------------------------------------
  342. /** Removes double commas from string
  343. /** -------------------------------------------------*/
  344. function remove_extra_commas($str)
  345. {
  346. // Removes space separated commas as well as leading and trailing commas
  347. $str = implode(',', preg_split('/[\s,]+/', $str, -1, PREG_SPLIT_NO_EMPTY));
  348. return $str;
  349. }
  350. /* END */
  351. /** -------------------------------------------------
  352. /** Strip quotes
  353. /** -------------------------------------------------*/
  354. function strip_quotes($str)
  355. {
  356. return str_replace(array('"', "'"), '', $str);
  357. }
  358. /* END */
  359. /** ----------------------------------------
  360. /** Clean Keywords - used for searching
  361. /** ----------------------------------------*/
  362. function keyword_clean($str)
  363. {
  364. //$str = strtolower($str);
  365. $str = strip_tags($str);
  366. // We allow some words with periods.
  367. // This array defines them.
  368. // Note: Do not include periods in the array.
  369. $allowed = array(
  370. 'Mr',
  371. 'Ms',
  372. 'Mrs',
  373. 'Dr'
  374. );
  375. foreach ($allowed as $val)
  376. {
  377. $str = str_replace($val.".", $val."T9nbyrrsXCXv0pqemUAq8ff", $str);
  378. }
  379. // Remove periods unless they are within a word
  380. $str = preg_replace("#\.*(\s|$)#", " ", $str);
  381. // These are disallowed characters
  382. $chars = array(
  383. "," ,
  384. "(" ,
  385. ")" ,
  386. "+" ,
  387. "!" ,
  388. "?" ,
  389. "[" ,
  390. "]" ,
  391. "@" ,
  392. "^" ,
  393. "~" ,
  394. "*" ,
  395. "|" ,
  396. "\n",
  397. "\t"
  398. );
  399. $str = str_replace($chars, ' ', $str);
  400. $str = preg_replace("(\s+)", " ", $str);
  401. // Put allowed periods back
  402. $str = str_replace('T9nbyrrsXCXv0pqemUAq8ff', '.', $str);
  403. // Kill naughty stuff...
  404. $str = $this->xss_clean($str);
  405. return trim($str);
  406. }
  407. /* END */
  408. /** -------------------------------------------------
  409. /** Convert disallowed characters into entities
  410. /** -------------------------------------------------*/
  411. function convert_dissallowed_chars($str)
  412. {
  413. $bad = array(
  414. "\(" => "&#40;",
  415. "\)" => "&#41;",
  416. '\$' => "&#36;",
  417. "%28" => "&#40;", // (
  418. "%29" => "&#41;", // )
  419. "%2528" => "&#40;", // (
  420. "%24" => "&#36;" // $
  421. );
  422. foreach ($bad as $key => $val)
  423. {
  424. $str = preg_replace("#".$key."#i", $val, $str);
  425. }
  426. return $str;
  427. }
  428. /* END */
  429. /** -------------------------------------------------
  430. /** A Random Hash Used for Protecting URLs
  431. /** -------------------------------------------------*/
  432. function xss_protection_hash()
  433. {
  434. global $FNS;
  435. if ($this->xss_hash == '')
  436. {
  437. /*
  438. * We cannot use the $FNS random() method, so we create something that while
  439. * not perfectly random will serve our purposes well enough
  440. */
  441. if (phpversion() >= 4.2)
  442. mt_srand();
  443. else
  444. mt_srand(hexdec(substr(md5(microtime()), -8)) & 0x7fffffff);
  445. $this->xss_hash = md5(time() + mt_rand(0, 1999999999));
  446. }
  447. return $this->xss_hash;
  448. }
  449. /* END */
  450. /** -------------------------------------------------
  451. /** XSS hacking stuff
  452. /** -------------------------------------------------*/
  453. function xss_clean($str, $is_image = FALSE)
  454. {
  455. global $PREFS;
  456. /* ----------------------------------
  457. /* Every so often an array will be sent to this function,
  458. /* and so we simply go through the array, clean, and return
  459. /* ----------------------------------*/
  460. if (is_array($str))
  461. {
  462. while (list($key) = each($str))
  463. {
  464. $str[$key] = $this->xss_clean($str[$key]);
  465. }
  466. return $str;
  467. }
  468. $charset = strtoupper($PREFS->ini('charset'));
  469. /*
  470. * Remove Invisible Characters
  471. */
  472. $str = $this->_remove_invisible_characters($str);
  473. /*
  474. * Protect GET variables in URLs
  475. */
  476. // 901119URL5918AMP18930PROTECT8198
  477. $str = preg_replace('|\&([a-z\_0-9\-]+)\=([a-z\_0-9\-]+)|i', $this->xss_protection_hash()."\\1=\\2", $str);
  478. /*
  479. * Validate standard character entities
  480. *
  481. * Add a semicolon if missing. We do this to enable
  482. * the conversion of entities to ASCII later.
  483. *
  484. */
  485. $str = preg_replace('#(&\#?[0-9a-z]{2,})([\x00-\x20])*;?#i', "\\1;\\2", $str);
  486. /*
  487. * Validate UTF16 two byte encoding (x00)
  488. *
  489. * Just as above, adds a semicolon if missing.
  490. *
  491. */
  492. $str = preg_replace('#(&\#x?)([0-9A-F]+);?#i',"\\1\\2;",$str);
  493. /*
  494. * Un-Protect GET variables in URLs
  495. */
  496. $str = str_replace($this->xss_protection_hash(), '&', $str);
  497. /*
  498. * URL Decode
  499. *
  500. * Just in case stuff like this is submitted:
  501. *
  502. * <a href="http://%77%77%77%2E%67%6F%6F%67%6C%65%2E%63%6F%6D">Google</a>
  503. *
  504. * Note: Use rawurldecode() so it does not removes plus signs
  505. *
  506. */
  507. $str = rawurldecode($str);
  508. /*
  509. * Convert character entities to ASCII
  510. *
  511. * This permits our tests below to work reliably.
  512. * We only convert entities that are within tags since
  513. * these are the ones that will pose security problems.
  514. *
  515. */
  516. $str = preg_replace_callback("/[a-z]+=([\'\"]).*?\\1/si", array($this, '_convert_attribute'), $str);
  517. $str = preg_replace_callback("/<\w+.*?(?=>|<|$)/si", array($this, '_html_entity_decode_callback'), $str);
  518. /*
  519. * Remove Invisible Characters Again!
  520. */
  521. $str = $this->_remove_invisible_characters($str);
  522. /*
  523. * Convert all tabs to spaces
  524. *
  525. * This prevents strings like this: ja vascript
  526. * NOTE: we deal with spaces between characters later.
  527. * NOTE: preg_replace was found to be amazingly slow here on large blocks of data,
  528. * so we use str_replace.
  529. *
  530. */
  531. if (strpos($str, "\t") !== FALSE)
  532. {
  533. $str = str_replace("\t", ' ', $str);
  534. }
  535. /*
  536. * Capture converted string for later comparison
  537. */
  538. $converted_string = $str;
  539. /*
  540. * Not Allowed Under Any Conditions
  541. */
  542. foreach ($this->never_allowed_str as $key => $val)
  543. {
  544. $str = str_replace($key, $val, $str);
  545. }
  546. foreach ($this->never_allowed_regex as $key => $val)
  547. {
  548. $str = preg_replace("#".$key."#i", $val, $str);
  549. }
  550. /*
  551. * Makes PHP tags safe
  552. *
  553. * Note: XML tags are inadvertently replaced too:
  554. *
  555. * <?xml
  556. *
  557. * But it doesn't seem to pose a problem.
  558. *
  559. */
  560. if ($is_image === TRUE)
  561. {
  562. // Images have a tendency to have the PHP short opening and closing tags every so often
  563. // so we skip those and only do the long opening tags.
  564. $str = preg_replace('/<\?(php)/i', "&lt;?\\1", $str);
  565. }
  566. else
  567. {
  568. $str = str_replace(array('<?', '?'.'>'), array('&lt;?', '?&gt;'), $str);
  569. }
  570. /*
  571. * Compact any exploded words
  572. *
  573. * This corrects words like: j a v a s c r i p t
  574. * These words are compacted back to their correct state.
  575. *
  576. */
  577. $words = array('javascript', 'expression', 'vbscript', 'script', 'applet', 'alert', 'document', 'write', 'cookie', 'window');
  578. foreach ($words as $word)
  579. {
  580. $temp = '';
  581. for ($i = 0, $wordlen = strlen($word); $i < $wordlen; $i++)
  582. {
  583. $temp .= substr($word, $i, 1)."\s*";
  584. }
  585. // We only want to do this when it is followed by a non-word character
  586. // That way valid stuff like "dealer to" does not become "dealerto"
  587. $str = preg_replace_callback('#('.substr($temp, 0, -3).')(\W)#is', array($this, '_compact_exploded_words'), $str);
  588. }
  589. /*
  590. * Remove disallowed Javascript in links or img tags
  591. * We used to do some version comparisons and use of stripos for PHP5, but it is dog slow compared
  592. * to these simplified non-capturing preg_match(), especially if the pattern exists in the string
  593. */
  594. do
  595. {
  596. $original = $str;
  597. if (preg_match("/<a/i", $str))
  598. {
  599. $str = preg_replace_callback("#<a\s+([^>]*?)(>|$)#si", array($this, '_js_link_removal'), $str);
  600. }
  601. if (preg_match("/<img/i", $str))
  602. {
  603. $str = preg_replace_callback("#<img\s+([^>]*?)(\s?/?>|$)#si", array($this, '_js_img_removal'), $str);
  604. }
  605. if (preg_match("/script/i", $str) OR preg_match("/xss/i", $str))
  606. {
  607. $str = preg_replace("#<(/*)(script|xss)(.*?)\>#si", '[removed]', $str);
  608. }
  609. }
  610. while($original != $str);
  611. unset($original);
  612. /*
  613. * Remove JavaScript Event Handlers
  614. *
  615. * Note: This code is a little blunt. It removes
  616. * the event handler and anything upto the closing >,
  617. * but it's unlkely to be a problem.
  618. *
  619. */
  620. $event_handlers = array('[^a-z_\-]on\w*','xmlns');
  621. if ($is_image === TRUE)
  622. {
  623. /*
  624. * Adobe Photoshop puts XML metadata into JFIF images, including namespacing,
  625. * so we have to allow this for images. -Paul
  626. */
  627. unset($event_handlers[array_search('xmlns', $event_handlers)]);
  628. }
  629. $str = preg_replace("#<([^><]+?)(".implode('|', $event_handlers).")(\s*=\s*[^><]*)([><]*)#i", "<\\1\\4", $str);
  630. /*
  631. * Sanitize naughty HTML elements
  632. *
  633. * If a tag containing any of the words in the list
  634. * below is found, the tag gets converted to entities.
  635. *
  636. * So this: <blink>
  637. * Becomes: &lt;blink&gt;
  638. *
  639. */
  640. $naughty = 'alert|applet|audio|basefont|base|behavior|bgsound|blink|body|embed|expression|form|frameset|frame|head|html|ilayer|iframe|input|isindex|layer|link|meta|object|plaintext|style|script|textarea|title|video|xml|xss';
  641. $str = preg_replace_callback('#<(/*\s*)('.$naughty.')([^><]*)([><]*)#is', array($this, '_sanitize_naughty_html'), $str);
  642. /*
  643. * Sanitize naughty scripting elements
  644. *
  645. * Similar to above, only instead of looking for
  646. * tags it looks for PHP and JavaScript commands
  647. * that are disallowed. Rather than removing the
  648. * code, it simply converts the parenthesis to entities
  649. * rendering the code un-executable.
  650. *
  651. * For example: eval('some code')
  652. * Becomes: eval&#40;'some code'&#41;
  653. *
  654. */
  655. $str = preg_replace('#(alert|cmd|passthru|eval|exec|expression|system|fopen|fsockopen|file|file_get_contents|readfile|unlink)(\s*)\((.*?)\)#si', "\\1\\2&#40;\\3&#41;", $str);
  656. /*
  657. * Final clean up
  658. *
  659. * This adds a bit of extra precaution in case
  660. * something got through the above filters
  661. *
  662. */
  663. foreach ($this->never_allowed_str as $key => $val)
  664. {
  665. $str = str_replace($key, $val, $str);
  666. }
  667. foreach ($this->never_allowed_regex as $key => $val)
  668. {
  669. $str = preg_replace("#".$key."#i", $val, $str);
  670. }
  671. /* ----------------------------------
  672. /* Images are Handled in a Special Way
  673. /* - Essentially, we want to know that after all of the character conversion is done whether
  674. /* any unwanted, likely XSS, code was found. If not, we return TRUE, as the image is clean.
  675. /* However, if the string post-conversion does not matched the string post-removal of XSS,
  676. /* then it fails, as there was unwanted XSS code found and removed/changed during processing.
  677. /* ----------------------------------*/
  678. if ($is_image === TRUE)
  679. {
  680. if ($str == $converted_string)
  681. {
  682. return TRUE;
  683. }
  684. else
  685. {
  686. return FALSE;
  687. }
  688. }
  689. return $str;
  690. }
  691. // END xss_clean()
  692. /** -------------------------------------------------
  693. /** Remove Invisible Characters
  694. /** This prevents sandwiching null characters
  695. /** between ascii characters, like Java\0script.
  696. /** -------------------------------------------------*/
  697. function _remove_invisible_characters($str)
  698. {
  699. static $non_displayables;
  700. if ( ! isset($non_displayables))
  701. {
  702. // every control character except newline (dec 10), carriage return (dec 13), and horizontal tab (dec 09),
  703. $non_displayables = array(
  704. '/%0[0-8bcef]/', // url encoded 00-08, 11, 12, 14, 15
  705. '/%1[0-9a-f]/', // url encoded 16-31
  706. '/[\x00-\x08]/', // 00-08
  707. '/\x0b/', '/\x0c/', // 11, 12
  708. '/[\x0e-\x1f]/' // 14-31
  709. );
  710. }
  711. do
  712. {
  713. $cleaned = $str;
  714. $str = preg_replace($non_displayables, '', $str);
  715. }
  716. while ($cleaned != $str);
  717. return $str;
  718. }
  719. // END _remove_invisible_characters()
  720. /** -------------------------------------------------
  721. /** Compact Exploded Words
  722. /** Callback function for xss_clean() to remove whitespace from
  723. /** things like j a v a s c r i p t
  724. /** -------------------------------------------------*/
  725. function _compact_exploded_words($matches)
  726. {
  727. return preg_replace('/\s+/s', '', $matches[1]).$matches[2];
  728. }
  729. // END _compact_exploded_words()
  730. /** -------------------------------------------------
  731. /** Sanitize Naughty HTML
  732. /** Callback function for xss_clean() to remove naughty HTML elements
  733. /** -------------------------------------------------*/
  734. function _sanitize_naughty_html($matches)
  735. {
  736. // encode opening brace
  737. $str = '&lt;'.$matches[1].$matches[2].$matches[3];
  738. // encode captured opening or closing brace to prevent recursive vectors
  739. $str .= str_replace(array('>', '<'), array('&gt;', '&lt;'), $matches[4]);
  740. return $str;
  741. }
  742. // END _sanitize_naughty_html()
  743. /** -------------------------------------------------
  744. /** JS Link Removal
  745. /** Callback function to sanitize links
  746. /** -------------------------------------------------*/
  747. function _js_link_removal($match)
  748. {
  749. $attributes = $this->_filter_attributes(str_replace(array('<', '>'), '', $match[1]));
  750. return str_replace($match[1], preg_replace("#href=.*?(alert\(|alert&\#40;|javascript\:|charset\=|window\.|document\.|\.cookie|<script|<xss|base64\s*,)#si", "", $attributes), $match[0]);
  751. }
  752. // END _js_link_removal()
  753. /** -------------------------------------------------
  754. /** JS Image Removal
  755. /** Callback function to sanitize image tags
  756. /** -------------------------------------------------*/
  757. function _js_img_removal($match)
  758. {
  759. $attributes = $this->_filter_attributes(str_replace(array('<', '>'), '', $match[1]));
  760. return str_replace($match[1], preg_replace("#src=.*?(alert\(|alert&\#40;|javascript\:|charset\=|window\.|document\.|\.cookie|<script|<xss|base64\s*,)#si", "", $attributes), $match[0]);
  761. }
  762. // END _js_img_removal()
  763. /** -------------------------------------------------
  764. /** Filter Attributes
  765. /** Filters tag attributes for consistency and safety
  766. /** -------------------------------------------------*/
  767. function _filter_attributes($str)
  768. {
  769. $out = '';
  770. // EE 1.x adds slashes to all input, so there's a good chance we'll encounter attr=\"foo\" which
  771. // we account for with by optionally matching on the octal of a backslash (\134) before the quote
  772. if (preg_match_all('#\s*[a-z\-]+\s*=\s*(\134)?(\042|\047)([^\\2]*?)\\2#is', $str, $matches))
  773. {
  774. foreach ($matches[0] as $match)
  775. {
  776. $out .= preg_replace("#/\*.*?\*/#s", '', $match);
  777. }
  778. }
  779. return $out;
  780. }
  781. // END _filter_attributes()
  782. /** -------------------------------------------------
  783. /** Create URL Title
  784. /** -------------------------------------------------*/
  785. function create_url_title($str, $lowercase = FALSE)
  786. {
  787. global $PREFS;
  788. if (function_exists('mb_convert_encoding'))
  789. {
  790. $str = mb_convert_encoding($str, 'ISO-8859-1', 'auto');
  791. }
  792. elseif(function_exists('iconv') AND ($iconvstr = @iconv('', 'ISO-8859-1', $str)) !== FALSE)
  793. {
  794. $str = $iconvstr;
  795. }
  796. else
  797. {
  798. $str = utf8_decode($str);
  799. }
  800. if ($lowercase === TRUE)
  801. {
  802. $str = strtolower($str);
  803. }
  804. $str = preg_replace_callback('/(.)/', array($this, "convert_accented_characters"), $str);
  805. $str = strip_tags($str);
  806. // Use dash or underscore as separator
  807. $replace = ($PREFS->ini('word_separator') == 'dash') ? '-' : '_';
  808. $trans = array(
  809. '&\#\d+?;' => '',
  810. '&\S+?;' => '',
  811. '\s+' => $replace,
  812. '[^a-z0-9\-\._]' => '',
  813. $replace.'+' => $replace,
  814. $replace.'$' => $replace,
  815. '^'.$replace => $replace,
  816. '\.+$' => ''
  817. );
  818. foreach ($trans as $key => $val)
  819. {
  820. $str = preg_replace("#".$key."#i", $val, $str);
  821. }
  822. $str = trim(stripslashes($str));
  823. return $str;
  824. }
  825. /* END */
  826. /** ---------------------------------------
  827. /** Convert Accented Characters to Unaccented Equivalents
  828. /** ---------------------------------------*/
  829. function convert_accented_characters($match)
  830. {
  831. global $EXT;
  832. /* -------------------------------------
  833. /* 'foreign_character_conversion_array' hook.
  834. /* - Allows you to use your own foreign character conversion array
  835. /* - Added 1.6.0
  836. */
  837. if (isset($EXT->extensions['foreign_character_conversion_array']))
  838. {
  839. $foreign_characters = $EXT->call_extension('foreign_character_conversion_array');
  840. }
  841. else
  842. {
  843. $foreign_characters = array('223' => "ss", // ß
  844. '224' => "a", '225' => "a", '226' => "a", '229' => "a",
  845. '227' => "ae", '230' => "ae", '228' => "ae",
  846. '231' => "c",
  847. '232' => "e", // è
  848. '233' => "e", // é
  849. '234' => "e", // ê
  850. '235' => "e", // ë
  851. '236' => "i", '237' => "i", '238' => "i", '239' => "i",
  852. '241' => "n",
  853. '242' => "o", '243' => "o", '244' => "o", '245' => "o",
  854. '246' => "oe", // ö
  855. '249' => "u", '250' => "u", '251' => "u",
  856. '252' => "ue", // ü
  857. '255' => "y",
  858. '257' => "aa",
  859. '269' => "ch",
  860. '275' => "ee",
  861. '291' => "gj",
  862. '299' => "ii",
  863. '311' => "kj",
  864. '316' => "lj",
  865. '326' => "nj",
  866. '353' => "sh",
  867. '363' => "uu",
  868. '382' => "zh",
  869. '256' => "aa",
  870. '268' => "ch",
  871. '274' => "ee",
  872. '290' => "gj",
  873. '298' => "ii",
  874. '310' => "kj",
  875. '315' => "lj",
  876. '325' => "nj",
  877. '352' => "sh",
  878. '362' => "uu",
  879. '381' => "zh",
  880. );
  881. }
  882. /*
  883. /* -------------------------------------*/
  884. $ord = ord($match['1']);
  885. if (isset($foreign_characters[$ord]))
  886. {
  887. return $foreign_characters[$ord];
  888. }
  889. else
  890. {
  891. return $match['1'];
  892. }
  893. }
  894. /* END */
  895. /** -------------------------------------------------
  896. /** Used for a callback in XSS Clean
  897. /** -------------------------------------------------*/
  898. function _convert_attribute($match)
  899. {
  900. return str_replace(array('>', '<', '\\'), array('&gt;', '&lt;', '\\\\'), $match[0]);
  901. }
  902. /* END */
  903. /** -------------------------------------------------
  904. /** Replacement for html_entity_decode()
  905. /** -------------------------------------------------*/
  906. /*
  907. NOTE: html_entity_decode() has a bug in some PHP versions when UTF-8 is the
  908. character set, and the PHP developers said they were not back porting the
  909. fix to versions other than PHP 5.x.
  910. */
  911. function _html_entity_decode_callback($match)
  912. {
  913. global $PREFS;
  914. return $this->_html_entity_decode($match[0], strtoupper($PREFS->ini('charset')));
  915. }
  916. function _html_entity_decode($str, $charset='ISO-8859-1')
  917. {
  918. if (stristr($str, '&') === FALSE) return $str;
  919. // The reason we are not using html_entity_decode() by itself is because
  920. // while it is not technically correct to leave out the semicolon
  921. // at the end of an entity most browsers will still interpret the entity
  922. // correctly. html_entity_decode() does not convert entities without
  923. // semicolons, so we are left with our own little solution here. Bummer.
  924. if ( ! in_array(strtoupper($charset),
  925. array('ISO-8859-1', 'ISO-8859-15', 'UTF-8', 'cp866', 'cp1251', 'cp1252', 'KOI8-R', 'BIG5', 'GB2312', 'BIG5-HKSCS', 'Shift_JIS', 'EUC-JP')))
  926. {
  927. $charset = 'ISO-8859-1';
  928. }
  929. if (function_exists('html_entity_decode') && (strtolower($charset) != 'utf-8' OR version_compare(phpversion(), '5.0.0', '>=')))
  930. {
  931. $str = html_entity_decode($str, ENT_QUOTES, $charset);
  932. $str = preg_replace('~&#x(0*[0-9a-f]{2,5})~ei', 'chr(hexdec("\\1"))', $str);
  933. return preg_replace('~&#([0-9]{2,4})~e', 'chr(\\1)', $str);
  934. }
  935. // Numeric Entities
  936. $str = preg_replace('~&#x(0*[0-9a-f]{2,5});{0,1}~ei', 'chr(hexdec("\\1"))', $str);
  937. $str = preg_replace('~&#([0-9]{2,4});{0,1}~e', 'chr(\\1)', $str);
  938. // Literal Entities - Slightly slow so we do another check
  939. if (stristr($str, '&') === FALSE)
  940. {
  941. $str = strtr($str, array_flip(get_html_translation_table(HTML_ENTITIES)));
  942. }
  943. return $str;
  944. }
  945. /* END */
  946. function unhtmlentities($str)
  947. {
  948. return $this->_html_entity_decode($str);
  949. }
  950. /** -------------------------------------------------
  951. /** Removes slashes from array
  952. /** -------------------------------------------------*/
  953. function array_stripslashes($vals)
  954. {
  955. if (is_array($vals))
  956. {
  957. foreach ($vals as $key=>$val)
  958. {
  959. $vals[$key] = $this->array_stripslashes($val);
  960. }
  961. }
  962. else
  963. {
  964. $vals = stripslashes($vals);
  965. }
  966. return $vals;
  967. }
  968. /* END */
  969. }
  970. // END CLASS
  971. ?>