PageRenderTime 151ms CodeModel.GetById 13ms RepoModel.GetById 0ms app.codeStats 1ms

/common/libraries/plugin/kses-0.2.2/oop/php5.class.kses.php

https://bitbucket.org/renaatdemuynck/chamilo
PHP | 1132 lines | 542 code | 89 blank | 501 comment | 86 complexity | d491471df24a584b78d1e591b4be0499 MD5 | raw file
Possible License(s): BSD-3-Clause, LGPL-2.1, LGPL-3.0, GPL-3.0, MIT, GPL-2.0
  1. <?php
  2. /*
  3. * ==========================================================================================
  4. *
  5. * This program is free software and open source software; you can redistribute
  6. * it and/or modify it under the terms of the GNU General Public License as
  7. * published by the Free Software Foundation; either version 2 of the License,
  8. * or (at your option) any later version.
  9. *
  10. * This program is distributed in the hope that it will be useful, but WITHOUT
  11. * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  12. * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
  13. * more details.
  14. *
  15. * You should have received a copy of the GNU General Public License along
  16. * with this program; if not, write to the Free Software Foundation, Inc.,
  17. * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA or visit
  18. * http://www.gnu.org/licenses/gpl.html
  19. *
  20. * ==========================================================================================
  21. */
  22. /**
  23. * Class file for PHP5 OOP version of kses
  24. *
  25. * This is an updated version of kses to work with PHP5 that works under E_STRICT.
  26. *
  27. * This version is a bit of a rewrite to match my own coding style and use some of the
  28. * capabilities allowed in PHP5. Since this was a significant rewrite, but it still
  29. * maintains backward compatibility syntax-wise, the version number is now 1.0.0. Any
  30. * minor changes that do not break compatibility will be indicated in the second or third
  31. * digits. Anything that breaks compatibility will change the major version number.
  32. *
  33. * PHP5 specific changes:
  34. * + Private methods are now in place
  35. * + __construct() is now used rather then the standard class name 'kses()'
  36. * + Kses will not load in any version less that PHP5
  37. * Other modifications:
  38. * + PHPdoc style documentation has been added to the class. See http://www.phpdoc.org/ for more info.
  39. * + Method names have been changed to reflect status as verbs
  40. * + One line methods have been folded into the code
  41. * + Some methods are now deprecated due to nomenclature style change. See method documentation for specifics.
  42. * + Kses5 now works in E_STRICT
  43. * + Version number is 1.0.0 to reflect serious code changes
  44. * + Addition of methods AddProtocols(), filterKsestextHook(), RemoveProtocol(), RemoveProtocols() and SetProtocols()
  45. * + Deprecated _hook(), Protocols()
  46. *
  47. * @package kses
  48. * @subpackage kses5
  49. */
  50. if (substr(phpversion(), 0, 1) < 5)
  51. {
  52. die("Class kses requires PHP 5 or higher.");
  53. }
  54. /**
  55. * Only install KSES5 once
  56. */
  57. if (! defined('KSES_CLASS_PHP5'))
  58. {
  59. define('KSES_CLASS_PHP5', true);
  60. /**
  61. * Kses strips evil scripts!
  62. *
  63. * This class provides the capability for removing unwanted HTML/XHTML, attributes from
  64. * tags, and protocols contained in links. The net result is a much more powerful tool
  65. * than the PHP internal strip_tags()
  66. *
  67. * This is a fork of a slick piece of procedural code called 'kses' written by Ulf Harnhammar.
  68. *
  69. * The original class for PHP4 was basically a wrapper around all of the functions in
  70. * the procedural code written by Ulf, and was released 7/25/2003.
  71. *
  72. * This version is a bit of a rewrite to match my own coding style and use some of the
  73. * capabilities allowed in PHP5. Since this was a significant rewrite, but it still
  74. * maintains backward compatibility syntax-wise, the version number is now 1.0.0. Any
  75. * minor changes that do not break compatibility will be indicated in the second or third
  76. * digits. Anything that breaks compatibility will change the major version number.
  77. *
  78. * PHP5 specific changes:
  79. * + Private methods are now in place
  80. * + __construct() is now used rather then the standard class name 'kses()'
  81. * + Kses5 will not load in any version less that PHP5
  82. * Other modifications:
  83. * + PHPdoc style documentation has been added to the class. See http://www.phpdoc.org/ for more info.
  84. * + Method names have been changed to reflect status as verbs
  85. * + One line methods have been folded into the code
  86. * + Some methods are now deprecated due to nomenclature style change. See method documentation for specifics.
  87. * + Kses now works in E_STRICT
  88. * + Initial Version number set to 1.0.0 to reflect serious code changes
  89. * + Addition of methods AddProtocols(), filterKsestextHook(), RemoveProtocol(), RemoveProtocols() and SetProtocols()
  90. * + Deprecated _hook(), Protocols()
  91. * + Integrated code from kses 0.2.2 into class.
  92. * + Added methods DumpProtocols(), DumpMethods()
  93. *
  94. * @author Richard R. V�squez, Jr. (Original procedural code by Ulf H�rnhammar)
  95. * @link http://sourceforge.net/projects/kses/ Home Page for Kses
  96. * @link http://chaos.org/contact/ Contact page with current email address for Richard Vasquez
  97. * @copyright Richard R. V�squez, Jr. 2005
  98. * @version PHP5 OOP 1.0.2
  99. * @license http://www.gnu.org/licenses/gpl.html GNU Public License
  100. * @package kses
  101. */
  102. class kses5
  103. {
  104. /**#@+
  105. * @access private
  106. * @var array
  107. */
  108. private $allowed_protocols;
  109. private $allowed_html;
  110. /**#@-*/
  111. /**
  112. * Constructor for kses.
  113. *
  114. * This sets a default collection of protocols allowed in links, and creates an
  115. * empty set of allowed HTML tags.
  116. * @since PHP5 OOP 1.0.0
  117. */
  118. public function __construct()
  119. {
  120. /**
  121. * You could add protocols such as ftp, new, gopher, mailto, irc, etc.
  122. *
  123. * The base values the original kses provided were:
  124. * 'http', 'https', 'ftp', 'news', 'nntp', 'telnet', 'gopher', 'mailto'
  125. */
  126. $this->allowed_protocols = array('http', 'ftp', 'mailto');
  127. $this->allowed_html = array();
  128. }
  129. /**
  130. * Basic task of kses - parses $string and strips it as required.
  131. *
  132. * This method strips all the disallowed (X)HTML tags, attributes
  133. * and protocols from the input $string.
  134. *
  135. * @access public
  136. * @param string $string String to be stripped of 'evil scripts'
  137. * @return string The stripped string
  138. * @since PHP4 OOP 0.0.1
  139. */
  140. public function Parse($string = "")
  141. {
  142. if (get_magic_quotes_gpc())
  143. {
  144. $string = stripslashes($string);
  145. }
  146. $string = $this->removeNulls($string);
  147. // Remove JavaScript entities from early Netscape 4 versions
  148. $string = preg_replace('%&\s*\{[^}]*(\}\s*;?|$)%', '', $string);
  149. $string = $this->normalizeEntities($string);
  150. $string = $this->filterKsesTextHook($string);
  151. $string = preg_replace('%(<' . '[^>]*' . '(>|$)' . '|>)%e', "\$this->stripTags('\\1')", $string);
  152. return $string;
  153. }
  154. /**
  155. * Allows for single/batch addition of protocols
  156. *
  157. * This method accepts one argument that can be either a string
  158. * or an array of strings. Invalid data will be ignored.
  159. *
  160. * The argument will be processed, and each string will be added
  161. * via AddProtocol().
  162. *
  163. * @access public
  164. * @param mixed , A string or array of protocols that will be added to the internal list of allowed protocols.
  165. * @return bool Status of adding valid protocols.
  166. * @see AddProtocol()
  167. * @since PHP5 OOP 1.0.0
  168. */
  169. public function AddProtocols()
  170. {
  171. $c_args = func_num_args();
  172. if ($c_args != 1)
  173. {
  174. trigger_error("kses5::AddProtocols() did not receive an argument.", E_USER_WARNING);
  175. return false;
  176. }
  177. $protocol_data = func_get_arg(0);
  178. if (is_array($protocol_data) && count($protocol_data) > 0)
  179. {
  180. foreach ($protocol_data as $protocol)
  181. {
  182. $this->AddProtocol($protocol);
  183. }
  184. return true;
  185. }
  186. elseif (is_string($protocol_data))
  187. {
  188. $this->AddProtocol($protocol_data);
  189. return true;
  190. }
  191. else
  192. {
  193. trigger_error("kses5::AddProtocols() did not receive a string or an array.", E_USER_WARNING);
  194. return false;
  195. }
  196. }
  197. /**
  198. * Allows for single/batch addition of protocols
  199. *
  200. * @deprecated Use AddProtocols()
  201. * @see AddProtocols()
  202. * @return bool
  203. * @since PHP4 OOP 0.0.1
  204. */
  205. public function Protocols()
  206. {
  207. $c_args = func_num_args();
  208. if ($c_args != 1)
  209. {
  210. trigger_error("kses5::Protocols() did not receive an argument.", E_USER_WARNING);
  211. return false;
  212. }
  213. return $this->AddProtocols(func_get_arg(0));
  214. }
  215. /**
  216. * Adds a single protocol to $this->allowed_protocols.
  217. *
  218. * This method accepts a string argument and adds it to
  219. * the list of allowed protocols to keep when performing
  220. * Parse().
  221. *
  222. * @access public
  223. * @param string $protocol The name of the protocol to be added.
  224. * @return bool Status of adding valid protocol.
  225. * @since PHP4 OOP 0.0.1
  226. */
  227. public function AddProtocol($protocol = "")
  228. {
  229. if (! is_string($protocol))
  230. {
  231. trigger_error("kses5::AddProtocol() requires a string.", E_USER_WARNING);
  232. return false;
  233. }
  234. // Remove any inadvertent ':' at the end of the protocol.
  235. if (substr($protocol, strlen($protocol) - 1, 1) == ":")
  236. {
  237. $protocol = substr($protocol, 0, strlen($protocol) - 1);
  238. }
  239. $protocol = strtolower(trim($protocol));
  240. if ($protocol == "")
  241. {
  242. trigger_error("kses5::AddProtocol() tried to add an empty/NULL protocol.", E_USER_WARNING);
  243. return false;
  244. }
  245. // prevent duplicate protocols from being added.
  246. if (! in_array($protocol, $this->allowed_protocols))
  247. {
  248. array_push($this->allowed_protocols, $protocol);
  249. sort($this->allowed_protocols);
  250. }
  251. return true;
  252. }
  253. /**
  254. * Removes a single protocol from $this->allowed_protocols.
  255. *
  256. * This method accepts a string argument and removes it from
  257. * the list of allowed protocols to keep when performing
  258. * Parse().
  259. *
  260. * @access public
  261. * @param string $protocol The name of the protocol to be removed.
  262. * @return bool Status of removing valid protocol.
  263. * @since PHP5 OOP 1.0.0
  264. */
  265. public function RemoveProtocol($protocol = "")
  266. {
  267. if (! is_string($protocol))
  268. {
  269. trigger_error("kses5::RemoveProtocol() requires a string.", E_USER_WARNING);
  270. return false;
  271. }
  272. // Remove any inadvertent ':' at the end of the protocol.
  273. if (substr($protocol, strlen($protocol) - 1, 1) == ":")
  274. {
  275. $protocol = substr($protocol, 0, strlen($protocol) - 1);
  276. }
  277. $protocol = strtolower(trim($protocol));
  278. if ($protocol == "")
  279. {
  280. trigger_error("kses5::RemoveProtocol() tried to remove an empty/NULL protocol.", E_USER_WARNING);
  281. return false;
  282. }
  283. // Ensures that the protocol exists before removing it.
  284. if (in_array($protocol, $this->allowed_protocols))
  285. {
  286. $this->allowed_protocols = array_diff($this->allowed_protocols, array($protocol));
  287. sort($this->allowed_protocols);
  288. }
  289. return true;
  290. }
  291. /**
  292. * Allows for single/batch removal of protocols
  293. *
  294. * This method accepts one argument that can be either a string
  295. * or an array of strings. Invalid data will be ignored.
  296. *
  297. * The argument will be processed, and each string will be removed
  298. * via RemoveProtocol().
  299. *
  300. * @access public
  301. * @param mixed , A string or array of protocols that will be removed from the internal list of allowed protocols.
  302. * @return bool Status of removing valid protocols.
  303. * @see RemoveProtocol()
  304. * @since PHP5 OOP 1.0.0
  305. */
  306. public function RemoveProtocols()
  307. {
  308. $c_args = func_num_args();
  309. if ($c_args != 1)
  310. {
  311. return false;
  312. }
  313. $protocol_data = func_get_arg(0);
  314. if (is_array($protocol_data) && count($protocol_data) > 0)
  315. {
  316. foreach ($protocol_data as $protocol)
  317. {
  318. $this->RemoveProtocol($protocol);
  319. }
  320. }
  321. elseif (is_string($protocol_data))
  322. {
  323. $this->RemoveProtocol($protocol_data);
  324. return true;
  325. }
  326. else
  327. {
  328. trigger_error("kses5::RemoveProtocols() did not receive a string or an array.", E_USER_WARNING);
  329. return false;
  330. }
  331. }
  332. /**
  333. * Allows for single/batch replacement of protocols
  334. *
  335. * This method accepts one argument that can be either a string
  336. * or an array of strings. Invalid data will be ignored.
  337. *
  338. * Existing protocols will be removed, then the argument will be
  339. * processed, and each string will be added via AddProtocol().
  340. *
  341. * @access public
  342. * @param mixed , A string or array of protocols that will be the new internal list of allowed protocols.
  343. * @return bool Status of replacing valid protocols.
  344. * @since PHP5 OOP 1.0.1
  345. * @see AddProtocol()
  346. */
  347. public function SetProtocols()
  348. {
  349. $c_args = func_num_args();
  350. if ($c_args != 1)
  351. {
  352. trigger_error("kses5::SetProtocols() did not receive an argument.", E_USER_WARNING);
  353. return false;
  354. }
  355. $protocol_data = func_get_arg(0);
  356. if (is_array($protocol_data) && count($protocol_data) > 0)
  357. {
  358. $this->allowed_protocols = array();
  359. foreach ($protocol_data as $protocol)
  360. {
  361. $this->AddProtocol($protocol);
  362. }
  363. return true;
  364. }
  365. elseif (is_string($protocol_data))
  366. {
  367. $this->allowed_protocols = array();
  368. $this->AddProtocol($protocol_data);
  369. return true;
  370. }
  371. else
  372. {
  373. trigger_error("kses5::SetProtocols() did not receive a string or an array.", E_USER_WARNING);
  374. return false;
  375. }
  376. }
  377. /**
  378. * Raw dump of allowed protocols
  379. *
  380. * This returns an indexed array of allowed protocols for a particular KSES
  381. * instantiation.
  382. *
  383. * @access public
  384. * @return array The list of allowed protocols.
  385. * @since PHP5 OOP 1.0.2
  386. */
  387. public function DumpProtocols()
  388. {
  389. return $this->allowed_protocols;
  390. }
  391. /**
  392. * Raw dump of allowed (X)HTML elements
  393. *
  394. * This returns an indexed array of allowed (X)HTML elements and attributes
  395. * for a particular KSES instantiation.
  396. *
  397. * @access public
  398. * @return array The list of allowed elements.
  399. * @since PHP5 OOP 1.0.2
  400. */
  401. public function DumpElements()
  402. {
  403. return $this->allowed_html;
  404. }
  405. /**
  406. * Adds valid (X)HTML with corresponding attributes that will be kept when stripping 'evil scripts'.
  407. *
  408. * This method accepts one argument that can be either a string
  409. * or an array of strings. Invalid data will be ignored.
  410. *
  411. * @access public
  412. * @param string $tag (X)HTML tag that will be allowed after stripping text.
  413. * @param array $attribs Associative array of allowed attributes - key => attribute name - value => attribute parameter
  414. * @return bool Status of Adding (X)HTML and attributes.
  415. * @since PHP4 OOP 0.0.1
  416. */
  417. public function AddHTML($tag = "", $attribs = array())
  418. {
  419. if (! is_string($tag))
  420. {
  421. trigger_error("kses5::AddHTML() requires the tag to be a string", E_USER_WARNING);
  422. return false;
  423. }
  424. $tag = strtolower(trim($tag));
  425. if ($tag == "")
  426. {
  427. trigger_error("kses5::AddHTML() tried to add an empty/NULL tag", E_USER_WARNING);
  428. return false;
  429. }
  430. if (! is_array($attribs))
  431. {
  432. trigger_error("kses5::AddHTML() requires an array (even an empty one) of attributes for '$tag'", E_USER_WARNING);
  433. return false;
  434. }
  435. $new_attribs = array();
  436. if (is_array($attribs) && count($attribs) > 0)
  437. {
  438. foreach ($attribs as $idx1 => $val1)
  439. {
  440. $new_idx1 = strtolower($idx1);
  441. $new_val1 = $attribs[$idx1];
  442. if (is_array($new_val1) && count($attribs) > 0)
  443. {
  444. $tmp_val = array();
  445. foreach ($new_val1 as $idx2 => $val2)
  446. {
  447. $new_idx2 = strtolower($idx2);
  448. $tmp_val[$new_idx2] = $val2;
  449. }
  450. $new_val1 = $tmp_val;
  451. }
  452. $new_attribs[$new_idx1] = $new_val1;
  453. }
  454. }
  455. $this->allowed_html[$tag] = $new_attribs;
  456. return true;
  457. }
  458. /**
  459. * This method removes any NULL characters in $string.
  460. *
  461. * @access private
  462. * @param string $string
  463. * @return string String without any NULL/chr(173)
  464. * @since PHP4 OOP 0.0.1
  465. */
  466. private function removeNulls($string)
  467. {
  468. $string = preg_replace('/\0+/', '', $string);
  469. $string = preg_replace('/(\\\\0)+/', '', $string);
  470. return $string;
  471. }
  472. /**
  473. * Normalizes HTML entities
  474. *
  475. * This function normalizes HTML entities. It will convert "AT&T" to the correct
  476. * "AT&amp;T", "&#00058;" to "&#58;", "&#XYZZY;" to "&amp;#XYZZY;" and so on.
  477. *
  478. * @access private
  479. * @param string $string
  480. * @return string String with normalized entities
  481. * @since PHP4 OOP 0.0.1
  482. */
  483. private function normalizeEntities($string)
  484. {
  485. # Disarm all entities by converting & to &amp;
  486. $string = str_replace('&', '&amp;', $string);
  487. # TODO: Change back (Keep?) the allowed entities in our entity white list
  488. # Keeps entities that start with [A-Za-z]
  489. $string = preg_replace('/&amp;([A-Za-z][A-Za-z0-9]{0,19});/', '&\\1;', $string);
  490. # Change numeric entities to valid 16 bit values
  491. $string = preg_replace('/&amp;#0*([0-9]{1,5});/e', '\$this->normalizeEntities16bit("\\1")', $string);
  492. # Change &XHHHHHHH (Hex digits) to 16 bit hex values
  493. $string = preg_replace('/&amp;#([Xx])0*(([0-9A-Fa-f]{2}){1,2});/', '&#\\1\\2;', $string);
  494. return $string;
  495. }
  496. /**
  497. * Helper method used by normalizeEntites()
  498. *
  499. * This method helps normalizeEntities() to only accept 16 bit values
  500. * and nothing more for &#number; entities.
  501. *
  502. * This method helps normalize_entities() during a preg_replace()
  503. * where a &#(0)*XXXXX; occurs. The '(0)*XXXXXX' value is converted to
  504. * a number and the result is returned as a numeric entity if the number
  505. * is less than 65536. Otherwise, the value is returned 'as is'.
  506. *
  507. * @access private
  508. * @param string $i
  509. * @return string Normalized numeric entity
  510. * @see normalizeEntities()
  511. * @since PHP4 OOP 0.0.1
  512. */
  513. private function normalizeEntities16bit($i)
  514. {
  515. return (($i > 65535) ? "&amp;#$i;" : "&#$i;");
  516. }
  517. /**
  518. * Allows for additional user defined modifications to text.
  519. *
  520. * This method allows for additional modifications to be performed on
  521. * a string that's being run through Parse(). Currently, it returns the
  522. * input string 'as is'.
  523. *
  524. * This method is provided for users to extend the kses class for their own
  525. * requirements.
  526. *
  527. * @access public
  528. * @param string $string String to perfrom additional modifications on.
  529. * @return string User modified string.
  530. * @see Parse()
  531. * @since PHP5 OOP 1.0.0
  532. */
  533. private function filterKsesTextHook($string)
  534. {
  535. return $string;
  536. }
  537. /**
  538. * Allows for additional user defined modifications to text.
  539. *
  540. * @deprecated use filterKsesTextHook()
  541. * @param string $string
  542. * @return string
  543. * @see filterKsesTextHook()
  544. * @since PHP4 OOP 0.0.1
  545. */
  546. private function _hook($string)
  547. {
  548. return $this->filterKsesTextHook($string);
  549. }
  550. /**
  551. * This method goes through an array, and changes the keys to all lower case.
  552. *
  553. * @access private
  554. * @param array $in_array Associative array
  555. * @return array Modified array
  556. * @since PHP4 OOP 0.0.1
  557. */
  558. private function makeArrayKeysLowerCase($in_array)
  559. {
  560. $out_array = array();
  561. if (is_array($in_array) && count($in_array) > 0)
  562. {
  563. foreach ($in_array as $in_key => $in_val)
  564. {
  565. $out_key = strtolower($in_key);
  566. $out_array[$out_key] = array();
  567. if (is_array($in_val) && count($in_val) > 0)
  568. {
  569. foreach ($in_val as $in_key2 => $in_val2)
  570. {
  571. $out_key2 = strtolower($in_key2);
  572. $out_array[$out_key][$out_key2] = $in_val2;
  573. }
  574. }
  575. }
  576. }
  577. return $out_array;
  578. }
  579. /**
  580. * This method strips out disallowed and/or mangled (X)HTML tags along with assigned attributes.
  581. *
  582. * This method does a lot of work. It rejects some very malformed things
  583. * like <:::>. It returns an empty string if the element isn't allowed (look
  584. * ma, no strip_tags()!). Otherwise it splits the tag into an element and an
  585. * allowed attribute list.
  586. *
  587. * @access private
  588. * @param string $string
  589. * @return string Modified string minus disallowed/mangled (X)HTML and attributes
  590. * @since PHP4 OOP 0.0.1
  591. */
  592. private function stripTags($string)
  593. {
  594. $string = preg_replace('%\\\\"%', '"', $string);
  595. if (substr($string, 0, 1) != '<')
  596. {
  597. # It matched a ">" character
  598. return '&gt;';
  599. }
  600. if (! preg_match('%^<\s*(/\s*)?([a-zA-Z0-9]+)([^>]*)>?$%', $string, $matches))
  601. {
  602. # It's seriously malformed
  603. return '';
  604. }
  605. $slash = trim($matches[1]);
  606. $elem = $matches[2];
  607. $attrlist = $matches[3];
  608. if (! isset($this->allowed_html[strtolower($elem)]) || ! is_array($this->allowed_html[strtolower($elem)]))
  609. {
  610. # Found an HTML element not in the white list
  611. return '';
  612. }
  613. if ($slash != '')
  614. {
  615. return "<$slash$elem>";
  616. }
  617. # No attributes are allowed for closing elements
  618. return $this->stripAttributes("$slash$elem", $attrlist);
  619. }
  620. /**
  621. * This method strips out disallowed attributes for (X)HTML tags.
  622. *
  623. * This method removes all attributes if none are allowed for this element.
  624. * If some are allowed it calls combAttributes() to split them further, and then it
  625. * builds up new HTML code from the data that combAttributes() returns. It also
  626. * removes "<" and ">" characters, if there are any left. One more thing it
  627. * does is to check if the tag has a closing XHTML slash, and if it does,
  628. * it puts one in the returned code as well.
  629. *
  630. * @access private
  631. * @param string $element (X)HTML tag to check
  632. * @param string $attr Text containing attributes to check for validity.
  633. * @return string Resulting valid (X)HTML or ''
  634. * @see combAttributes()
  635. * @since PHP4 OOP 0.0.1
  636. */
  637. private function stripAttributes($element, $attr)
  638. {
  639. # Is there a closing XHTML slash at the end of the attributes?
  640. $xhtml_slash = '';
  641. if (preg_match('%\s/\s*$%', $attr))
  642. {
  643. $xhtml_slash = ' /';
  644. }
  645. # Are any attributes allowed at all for this element?
  646. if (! isset($this->allowed_html[strtolower($element)]) || count($this->allowed_html[strtolower($element)]) == 0)
  647. {
  648. return "<$element$xhtml_slash>";
  649. }
  650. # Split it
  651. $attrarr = $this->combAttributes($attr);
  652. # Go through $attrarr, and save the allowed attributes for this element
  653. # in $attr2
  654. $attr2 = '';
  655. if (is_array($attrarr) && count($attrarr) > 0)
  656. {
  657. foreach ($attrarr as $arreach)
  658. {
  659. if (! isset($this->allowed_html[strtolower($element)][strtolower($arreach['name'])]))
  660. {
  661. continue;
  662. }
  663. $current = $this->allowed_html[strtolower($element)][strtolower($arreach['name'])];
  664. if (! is_array($current))
  665. {
  666. # there are no checks
  667. $attr2 .= ' ' . $arreach['whole'];
  668. }
  669. else
  670. {
  671. # there are some checks
  672. $ok = true;
  673. if (is_array($current) && count($current) > 0)
  674. {
  675. foreach ($current as $currkey => $currval)
  676. {
  677. if (! $this->checkAttributeValue($arreach['value'], $arreach['vless'], $currkey, $currval))
  678. {
  679. $ok = false;
  680. break;
  681. }
  682. }
  683. }
  684. if ($ok)
  685. {
  686. # it passed them
  687. $attr2 .= ' ' . $arreach['whole'];
  688. }
  689. }
  690. }
  691. }
  692. # Remove any "<" or ">" characters
  693. $attr2 = preg_replace('/[<>]/', '', $attr2);
  694. return "<$element$attr2$xhtml_slash>";
  695. }
  696. /**
  697. * This method combs through an attribute list string and returns an associative array of attributes and values.
  698. *
  699. * This method does a lot of work. It parses an attribute list into an array
  700. * with attribute data, and tries to do the right thing even if it gets weird
  701. * input. It will add quotes around attribute values that don't have any quotes
  702. * or apostrophes around them, to make it easier to produce HTML code that will
  703. * conform to W3C's HTML specification. It will also remove bad URL protocols
  704. * from attribute values.
  705. *
  706. * @access private
  707. * @param string $attr Text containing tag attributes for parsing
  708. * @return array Associative array containing data on attribute and value
  709. * @since PHP4 OOP 0.0.1
  710. */
  711. private function combAttributes($attr)
  712. {
  713. $attrarr = array();
  714. $mode = 0;
  715. $attrname = '';
  716. # Loop through the whole attribute list
  717. while (strlen($attr) != 0)
  718. {
  719. # Was the last operation successful?
  720. $working = 0;
  721. switch ($mode)
  722. {
  723. case 0 : # attribute name, href for instance
  724. if (preg_match('/^([-a-zA-Z]+)/', $attr, $match))
  725. {
  726. $attrname = $match[1];
  727. $working = $mode = 1;
  728. $attr = preg_replace('/^[-a-zA-Z]+/', '', $attr);
  729. }
  730. break;
  731. case 1 : # equals sign or valueless ("selected")
  732. if (preg_match('/^\s*=\s*/', $attr)) # equals sign
  733. {
  734. $working = 1;
  735. $mode = 2;
  736. $attr = preg_replace('/^\s*=\s*/', '', $attr);
  737. break;
  738. }
  739. if (preg_match('/^\s+/', $attr)) # valueless
  740. {
  741. $working = 1;
  742. $mode = 0;
  743. $attrarr[] = array('name' => $attrname, 'value' => '', 'whole' => $attrname, 'vless' => 'y');
  744. $attr = preg_replace('/^\s+/', '', $attr);
  745. }
  746. break;
  747. case 2 : # attribute value, a URL after href= for instance
  748. if (preg_match('/^"([^"]*)"(\s+|$)/', $attr, $match)) # "value"
  749. {
  750. $thisval = $this->removeBadProtocols($match[1]);
  751. $attrarr[] = array('name' => $attrname, 'value' => $thisval,
  752. 'whole' => $attrname . '="' . $thisval . '"', 'vless' => 'n');
  753. $working = 1;
  754. $mode = 0;
  755. $attr = preg_replace('/^"[^"]*"(\s+|$)/', '', $attr);
  756. break;
  757. }
  758. if (preg_match("/^'([^']*)'(\s+|$)/", $attr, $match)) # 'value'
  759. {
  760. $thisval = $this->removeBadProtocols($match[1]);
  761. $attrarr[] = array('name' => $attrname, 'value' => $thisval,
  762. 'whole' => "$attrname='$thisval'", 'vless' => 'n');
  763. $working = 1;
  764. $mode = 0;
  765. $attr = preg_replace("/^'[^']*'(\s+|$)/", '', $attr);
  766. break;
  767. }
  768. if (preg_match("%^([^\s\"']+)(\s+|$)%", $attr, $match)) # value
  769. {
  770. $thisval = $this->removeBadProtocols($match[1]);
  771. $attrarr[] = array('name' => $attrname, 'value' => $thisval,
  772. 'whole' => $attrname . '="' . $thisval . '"', 'vless' => 'n');
  773. # We add quotes to conform to W3C's HTML spec.
  774. $working = 1;
  775. $mode = 0;
  776. $attr = preg_replace("%^[^\s\"']+(\s+|$)%", '', $attr);
  777. }
  778. break;
  779. }
  780. if ($working == 0) # not well formed, remove and try again
  781. {
  782. $attr = preg_replace('/^("[^"]*("|$)|\'[^\']*(\'|$)|\S)*\s*/', '', $attr);
  783. $mode = 0;
  784. }
  785. }
  786. # special case, for when the attribute list ends with a valueless
  787. # attribute like "selected"
  788. if ($mode == 1)
  789. {
  790. $attrarr[] = array('name' => $attrname, 'value' => '', 'whole' => $attrname, 'vless' => 'y');
  791. }
  792. return $attrarr;
  793. }
  794. /**
  795. * This method removes disallowed protocols.
  796. *
  797. * This method removes all non-allowed protocols from the beginning of
  798. * $string. It ignores whitespace and the case of the letters, and it does
  799. * understand HTML entities. It does its work in a while loop, so it won't be
  800. * fooled by a string like "javascript:javascript:alert(57)".
  801. *
  802. * @access private
  803. * @param string $string String to check for protocols
  804. * @return string String with removed protocols
  805. * @since PHP4 OOP 0.0.1
  806. */
  807. private function removeBadProtocols($string)
  808. {
  809. $string = $this->RemoveNulls($string);
  810. $string = preg_replace('/\xad+/', '', $string); # deals with Opera "feature"
  811. $string2 = $string . 'a';
  812. while ($string != $string2)
  813. {
  814. $string2 = $string;
  815. $string = preg_replace('/^((&[^;]*;|[\sA-Za-z0-9])*)' . '(:|&#58;|&#[Xx]3[Aa];)\s*/e', '\$this->filterProtocols("\\1")', $string);
  816. }
  817. return $string;
  818. }
  819. /**
  820. * Helper method used by removeBadProtocols()
  821. *
  822. * This function processes URL protocols, checks to see if they're in the white-
  823. * list or not, and returns different data depending on the answer.
  824. *
  825. * @access private
  826. * @param string $string String to check for protocols
  827. * @return string String with removed protocols
  828. * @see removeBadProtocols()
  829. * @since PHP4 OOP 0.0.1
  830. */
  831. private function filterProtocols($string)
  832. {
  833. $string = $this->decodeEntities($string);
  834. $string = preg_replace('/\s/', '', $string);
  835. $string = $this->removeNulls($string);
  836. $string = preg_replace('/\xad+/', '', $string2); # deals with Opera "feature"
  837. $string = strtolower($string);
  838. if (is_array($this->allowed_protocols) && count($this->allowed_protocols) > 0)
  839. {
  840. foreach ($this->allowed_protocols as $one_protocol)
  841. {
  842. if (strtolower($one_protocol) == $string)
  843. {
  844. return "$string:";
  845. }
  846. }
  847. }
  848. return '';
  849. }
  850. /**
  851. * Controller method for performing checks on attribute values.
  852. *
  853. * This method calls the appropriate method as specified by $checkname with
  854. * the parameters $value, $vless, and $checkvalue, and returns the result
  855. * of the call.
  856. *
  857. * This method's functionality can be expanded by creating new methods
  858. * that would match checkAttributeValue[$checkname].
  859. *
  860. * Current checks implemented are: "maxlen", "minlen", "maxval", "minval" and "valueless"
  861. *
  862. * @access private
  863. * @param string $value The value of the attribute to be checked.
  864. * @param string $vless Indicates whether the the value is supposed to be valueless
  865. * @param string $checkname The check to be performed
  866. * @param string $checkvalue The value that is to be checked against
  867. * @return bool Indicates whether the check passed or not
  868. * @since PHP5 OOP 1.0.0
  869. */
  870. private function checkAttributeValue($value, $vless, $checkname, $checkvalue)
  871. {
  872. $ok = true;
  873. $check_attribute_method_name = 'checkAttributeValue' . ucfirst(strtolower($checkname));
  874. if (method_exists($this, $check_attribute_method_name))
  875. {
  876. $ok = $this->$check_attribute_method_name($value, $checkvalue, $vless);
  877. }
  878. return $ok;
  879. }
  880. /**
  881. * Helper method invoked by checkAttributeValue().
  882. *
  883. * The maxlen check makes sure that the attribute value has a length not
  884. * greater than the given value. This can be used to avoid Buffer Overflows
  885. * in WWW clients and various Internet servers.
  886. *
  887. * @access private
  888. * @param string $value The value of the attribute to be checked.
  889. * @param int $checkvalue The maximum value allowed
  890. * @return bool Indicates whether the check passed or not
  891. * @see checkAttributeValue()
  892. * @since PHP5 OOP 1.0.0
  893. */
  894. private function checkAttributeValueMaxlen($value, $checkvalue)
  895. {
  896. if (strlen($value) > intval($checkvalue))
  897. {
  898. return false;
  899. }
  900. return true;
  901. }
  902. /**
  903. * Helper method invoked by checkAttributeValue().
  904. *
  905. * The minlen check makes sure that the attribute value has a length not
  906. * smaller than the given value.
  907. *
  908. * @access private
  909. * @param string $value The value of the attribute to be checked.
  910. * @param int $checkvalue The minimum value allowed
  911. * @return bool Indicates whether the check passed or not
  912. * @see checkAttributeValue()
  913. * @since PHP5 OOP 1.0.0
  914. */
  915. private function checkAttributeValueMinlen($value, $checkvalue)
  916. {
  917. if (strlen($value) < intval($checkvalue))
  918. {
  919. return false;
  920. }
  921. return true;
  922. }
  923. /**
  924. * Helper method invoked by checkAttributeValue().
  925. *
  926. * The maxval check does two things: it checks that the attribute value is
  927. * an integer from 0 and up, without an excessive amount of zeroes or
  928. * whitespace (to avoid Buffer Overflows). It also checks that the attribute
  929. * value is not greater than the given value.
  930. *
  931. * This check can be used to avoid Denial of Service attacks.
  932. *
  933. * @access private
  934. * @param int $value The value of the attribute to be checked.
  935. * @param int $checkvalue The maximum numeric value allowed
  936. * @return bool Indicates whether the check passed or not
  937. * @see checkAttributeValue()
  938. * @since PHP5 OOP 1.0.0
  939. */
  940. private function checkAttributeValueMaxval($value, $checkvalue)
  941. {
  942. if (! preg_match('/^\s{0,6}[0-9]{1,6}\s{0,6}$/', $value))
  943. {
  944. return false;
  945. }
  946. if (intval($value) > intval($checkvalue))
  947. {
  948. return false;
  949. }
  950. return true;
  951. }
  952. /**
  953. * Helper method invoked by checkAttributeValue().
  954. *
  955. * The minval check checks that the attribute value is a positive integer,
  956. * and that it is not smaller than the given value.
  957. *
  958. * @access private
  959. * @param int $value The value of the attribute to be checked.
  960. * @param int $checkvalue The minimum numeric value allowed
  961. * @return bool Indicates whether the check passed or not
  962. * @see checkAttributeValue()
  963. * @since PHP5 OOP 1.0.0
  964. */
  965. private function checkAttributeValueMinval($value, $checkvalue)
  966. {
  967. if (! preg_match('/^\s{0,6}[0-9]{1,6}\s{0,6}$/', $value))
  968. {
  969. return false;
  970. }
  971. if (intval($value) < ($checkvalue))
  972. {
  973. return false;
  974. }
  975. return true;
  976. }
  977. /**
  978. * Helper method invoked by checkAttributeValue().
  979. *
  980. * The valueless check checks if the attribute has a value
  981. * (like <a href="blah">) or not (<option selected>). If the given value
  982. * is a "y" or a "Y", the attribute must not have a value.
  983. *
  984. * If the given value is an "n" or an "N", the attribute must have one.
  985. *
  986. * @access private
  987. * @param int $value The value of the attribute to be checked.
  988. * @param mixed $checkvalue This variable is ignored for this test
  989. * @param string $vless Flag indicating if this attribute is not supposed to have an attribute
  990. * @return bool Indicates whether the check passed or not
  991. * @see checkAttributeValue()
  992. * @since PHP5 OOP 1.0.0
  993. */
  994. private function checkAttributeValueValueless($value, $checkvalue, $vless)
  995. {
  996. if (strtolower($checkvalue) != $vless)
  997. {
  998. return false;
  999. }
  1000. return true;
  1001. }
  1002. /**
  1003. * Decodes numeric HTML entities
  1004. *
  1005. * This method decodes numeric HTML entities (&#65; and &#x41;). It doesn't
  1006. * do anything with other entities like &auml;, but we don't need them in the
  1007. * URL protocol white listing system anyway.
  1008. *
  1009. * @access private
  1010. * @param string $value The entitiy to be decoded.
  1011. * @return string Decoded entity
  1012. * @since PHP4 OOP 0.0.1
  1013. */
  1014. private function decodeEntities($string)
  1015. {
  1016. $string = preg_replace('/&#([0-9]+);/e', 'chr("\\1")', $string);
  1017. $string = preg_replace('/&#[Xx]([0-9A-Fa-f]+);/e', 'chr(hexdec("\\1"))', $string);
  1018. return $string;
  1019. }
  1020. /**
  1021. * Returns PHP5 OOP version # of kses.
  1022. *
  1023. * Since this class has been refactored and documented and proven to work,
  1024. * I'm fixing the version number at 1.0.0.
  1025. *
  1026. * This version is syntax compatible with the PHP4 OOP version 0.0.2. Future
  1027. * versions may not be syntax compatible.
  1028. *
  1029. * @access public
  1030. * @return string Version number
  1031. * @since PHP4 OOP 0.0.1
  1032. */
  1033. public function Version()
  1034. {
  1035. return 'PHP5 OOP 1.0.2';
  1036. }
  1037. }
  1038. }
  1039. ?>