PageRenderTime 33ms CodeModel.GetById 0ms RepoModel.GetById 1ms app.codeStats 0ms

/common/libraries/plugin/kses-0.2.2/oop/php4.class.kses.php

https://bitbucket.org/renaatdemuynck/chamilo
PHP | 1130 lines | 566 code | 92 blank | 472 comment | 89 complexity | 9d4fe8066ac59cc0630a9a20f9903397 MD5 | raw file
Possible License(s): BSD-3-Clause, LGPL-2.1, LGPL-3.0, GPL-3.0, MIT, GPL-2.0
  1. <?php
  2. /*
  3. * ==========================================================================================
  4. *
  5. * This program is free software and open source software; you can redistribute
  6. * it and/or modify it under the terms of the GNU General Public License as
  7. * published by the Free Software Foundation; either version 2 of the License,
  8. * or (at your option) any later version.
  9. *
  10. * This program is distributed in the hope that it will be useful, but WITHOUT
  11. * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  12. * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
  13. * more details.
  14. *
  15. * You should have received a copy of the GNU General Public License along
  16. * with this program; if not, write to the Free Software Foundation, Inc.,
  17. * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA or visit
  18. * http://www.gnu.org/licenses/gpl.html
  19. *
  20. * ==========================================================================================
  21. */
  22. /**
  23. * Class file for PHP4 OOP version of kses
  24. *
  25. * This is an updated version of kses to work with PHP4 that works under E_STRICT.
  26. *
  27. * This upgrade provides the following:
  28. * + Version number synced to procedural version number
  29. * + PHPdoc style documentation has been added to the class. See http://www.phpdoc.org/ for more info.
  30. * + Some methods are now deprecated due to nomenclature style change. See method documentation for specifics.
  31. * + Kses4 now works in E_STRICT
  32. * + Addition of methods AddProtocols(), filterKsestextHook(), RemoveProtocol() and RemoveProtocols()
  33. * + Deprecated _hook(), Protocols()
  34. * + Integrated code from kses 0.2.2 into class.
  35. * + Added methods DumpProtocols(), DumpMethods()
  36. *
  37. * @package kses
  38. * @subpackage kses4
  39. */
  40. if (substr(phpversion(), 0, 1) < 4)
  41. {
  42. die("Class kses requires PHP 4 or higher.");
  43. }
  44. /**
  45. * Only install KSES4 once
  46. */
  47. if (! defined('KSES_CLASS_PHP4'))
  48. {
  49. define('KSES_CLASS_PHP4', true);
  50. /**
  51. * Kses strips evil scripts!
  52. *
  53. * This class provides the capability for removing unwanted HTML/XHTML, attributes from
  54. * tags, and protocols contained in links. The net result is a much more powerful tool
  55. * than the PHP internal strip_tags()
  56. *
  57. * This is a fork of a slick piece of procedural code called 'kses' written by Ulf Harnhammar
  58. * The entire set of functions was wrapped in a PHP object with some internal modifications
  59. * by Richard Vasquez (http://www.chaos.org/) 7/25/2003
  60. *
  61. * This upgrade provides the following:
  62. * + Version number synced to procedural version number
  63. * + PHPdoc style documentation has been added to the class. See http://www.phpdoc.org/ for more info.
  64. * + Some methods are now deprecated due to nomenclature style change. See method documentation for specifics.
  65. * + Kses4 now works in E_STRICT
  66. * + Addition of methods AddProtocols(), filterKsestextHook(), RemoveProtocol(), RemoveProtocols() and SetProtocols()
  67. * + Deprecated _hook(), Protocols()
  68. * + Integrated code from kses 0.2.2 into class.
  69. *
  70. * @author Richard R. V�squez, Jr. (Original procedural code by Ulf H�rnhammar)
  71. * @link http://sourceforge.net/projects/kses/ Home Page for Kses
  72. * @link http://chaos.org/contact/ Contact page with current email address for Richard Vasquez
  73. * @copyright Richard R. V�squez, Jr. 2003-2005
  74. * @version PHP4 OOP 0.2.2
  75. * @license http://www.gnu.org/licenses/gpl.html GNU Public License
  76. * @package kses
  77. */
  78. class kses4
  79. {
  80. /**#@+
  81. * @access private
  82. * @var array
  83. */
  84. var $allowed_protocols = array();
  85. var $allowed_html = array();
  86. /**#@-*/
  87. /**
  88. * Constructor for kses.
  89. *
  90. * This sets a default collection of protocols allowed in links, and creates an
  91. * empty set of allowed HTML tags.
  92. * @since PHP4 OOP 0.0.1
  93. */
  94. function __construct()
  95. {
  96. /**
  97. * You could add protocols such as ftp, new, gopher, mailto, irc, etc.
  98. *
  99. * The base values the original kses provided were:
  100. * 'http', 'https', 'ftp', 'news', 'nntp', 'telnet', 'gopher', 'mailto'
  101. */
  102. $this->allowed_protocols = array('http', 'ftp', 'mailto');
  103. $this->allowed_html = array();
  104. }
  105. /**
  106. * Basic task of kses - parses $string and strips it as required.
  107. *
  108. * This method strips all the disallowed (X)HTML tags, attributes
  109. * and protocols from the input $string.
  110. *
  111. * @access public
  112. * @param string $string String to be stripped of 'evil scripts'
  113. * @return string The stripped string
  114. * @since PHP4 OOP 0.2.1
  115. */
  116. function Parse($string = "")
  117. {
  118. if (get_magic_quotes_gpc())
  119. {
  120. $string = stripslashes($string);
  121. }
  122. $string = $this->_no_null($string);
  123. $string = $this->_js_entities($string);
  124. $string = $this->_normalize_entities($string);
  125. $string = $this->filterKsesTextHook($string);
  126. return $this->_split($string);
  127. }
  128. /**
  129. * Allows for single/batch addition of protocols
  130. *
  131. * This method accepts one argument that can be either a string
  132. * or an array of strings. Invalid data will be ignored.
  133. *
  134. * The argument will be processed, and each string will be added
  135. * via AddProtocol().
  136. *
  137. * @access public
  138. * @param mixed , A string or array of protocols that will be added to the internal list of allowed protocols.
  139. * @return bool Status of adding valid protocols.
  140. * @see AddProtocol()
  141. * @since PHP4 OOP 0.2.1
  142. */
  143. function AddProtocols()
  144. {
  145. $c_args = func_num_args();
  146. if ($c_args != 1)
  147. {
  148. trigger_error("kses4::AddProtocols() did not receive an argument.", E_USER_WARNING);
  149. return false;
  150. }
  151. $protocol_data = func_get_arg(0);
  152. if (is_array($protocol_data) && count($protocol_data) > 0)
  153. {
  154. foreach ($protocol_data as $protocol)
  155. {
  156. $this->AddProtocol($protocol);
  157. }
  158. return true;
  159. }
  160. elseif (is_string($protocol_data))
  161. {
  162. $this->AddProtocol($protocol_data);
  163. return true;
  164. }
  165. else
  166. {
  167. trigger_error("kses4::AddProtocols() did not receive a string or an array.", E_USER_WARNING);
  168. return false;
  169. }
  170. }
  171. /**
  172. * Allows for single/batch addition of protocols
  173. *
  174. * @deprecated Use AddProtocols()
  175. * @see AddProtocols()
  176. * @return bool
  177. * @since PHP4 OOP 0.0.1
  178. */
  179. function Protocols()
  180. {
  181. $c_args = func_num_args();
  182. if ($c_args != 1)
  183. {
  184. trigger_error("kses4::Protocols() did not receive an argument.", E_USER_WARNING);
  185. return false;
  186. }
  187. return $this->AddProtocols(func_get_arg(0));
  188. }
  189. /**
  190. * Adds a single protocol to $this->allowed_protocols.
  191. *
  192. * This method accepts a string argument and adds it to
  193. * the list of allowed protocols to keep when performing
  194. * Parse().
  195. *
  196. * @access public
  197. * @param string $protocol The name of the protocol to be added.
  198. * @return bool Status of adding valid protocol.
  199. * @since PHP4 OOP 0.0.1
  200. */
  201. function AddProtocol($protocol = "")
  202. {
  203. if (! is_string($protocol))
  204. {
  205. trigger_error("kses4::AddProtocol() requires a string.", E_USER_WARNING);
  206. return false;
  207. }
  208. $protocol = strtolower(trim($protocol));
  209. if ($protocol == "")
  210. {
  211. trigger_error("kses4::AddProtocol() tried to add an empty/NULL protocol.", E_USER_WARNING);
  212. return false;
  213. }
  214. // Remove any inadvertent ':' at the end of the protocol.
  215. if (substr($protocol, strlen($protocol) - 1, 1) == ":")
  216. {
  217. $protocol = substr($protocol, 0, strlen($protocol) - 1);
  218. }
  219. if (! in_array($protocol, $this->allowed_protocols))
  220. {
  221. array_push($this->allowed_protocols, $protocol);
  222. sort($this->allowed_protocols);
  223. }
  224. return true;
  225. }
  226. /**
  227. * Allows for single/batch replacement of protocols
  228. *
  229. * This method accepts one argument that can be either a string
  230. * or an array of strings. Invalid data will be ignored.
  231. *
  232. * Existing protocols will be removed, then the argument will be
  233. * processed, and each string will be added via AddProtocol().
  234. *
  235. * @access public
  236. * @param mixed , A string or array of protocols that will be the new internal list of allowed protocols.
  237. * @return bool Status of replacing valid protocols.
  238. * @since PHP4 OOP 0.2.2
  239. * @see AddProtocol()
  240. */
  241. function SetProtocols()
  242. {
  243. $c_args = func_num_args();
  244. if ($c_args != 1)
  245. {
  246. trigger_error("kses4::SetProtocols() did not receive an argument.", E_USER_WARNING);
  247. return false;
  248. }
  249. $protocol_data = func_get_arg(0);
  250. if (is_array($protocol_data) && count($protocol_data) > 0)
  251. {
  252. $this->allowed_protocols = array();
  253. foreach ($protocol_data as $protocol)
  254. {
  255. $this->AddProtocol($protocol);
  256. }
  257. return true;
  258. }
  259. elseif (is_string($protocol_data))
  260. {
  261. $this->allowed_protocols = array();
  262. $this->AddProtocol($protocol_data);
  263. return true;
  264. }
  265. else
  266. {
  267. trigger_error("kses4::SetProtocols() did not receive a string or an array.", E_USER_WARNING);
  268. return false;
  269. }
  270. }
  271. /**
  272. * Raw dump of allowed protocols
  273. *
  274. * This returns an indexed array of allowed protocols for a particular KSES
  275. * instantiation.
  276. *
  277. * @access public
  278. * @return array The list of allowed protocols.
  279. * @since PHP4 OOP 0.2.2
  280. */
  281. function DumpProtocols()
  282. {
  283. return $this->allowed_protocols;
  284. }
  285. /**
  286. * Raw dump of allowed (X)HTML elements
  287. *
  288. * This returns an indexed array of allowed (X)HTML elements and attributes
  289. * for a particular KSES instantiation.
  290. *
  291. * @access public
  292. * @return array The list of allowed elements.
  293. * @since PHP4 OOP 0.2.2
  294. */
  295. function DumpElements()
  296. {
  297. return $this->allowed_html;
  298. }
  299. /**
  300. * Adds valid (X)HTML with corresponding attributes that will be kept when stripping 'evil scripts'.
  301. *
  302. * This method accepts one argument that can be either a string
  303. * or an array of strings. Invalid data will be ignored.
  304. *
  305. * @access public
  306. * @param string $tag (X)HTML tag that will be allowed after stripping text.
  307. * @param array $attribs Associative array of allowed attributes - key => attribute name - value => attribute parameter
  308. * @return bool Status of Adding (X)HTML and attributes.
  309. * @since PHP4 OOP 0.0.1
  310. */
  311. function AddHTML($tag = "", $attribs = array())
  312. {
  313. if (! is_string($tag))
  314. {
  315. trigger_error("kses4::AddHTML() requires the tag to be a string", E_USER_WARNING);
  316. return false;
  317. }
  318. $tag = strtolower(trim($tag));
  319. if ($tag == "")
  320. {
  321. trigger_error("kses4::AddHTML() tried to add an empty/NULL tag", E_USER_WARNING);
  322. return false;
  323. }
  324. if (! is_array($attribs))
  325. {
  326. trigger_error("kses4::AddHTML() requires an array (even an empty one) of attributes for '$tag'", E_USER_WARNING);
  327. return false;
  328. }
  329. $new_attribs = array();
  330. if (is_array($attribs) && count($attribs) > 0)
  331. {
  332. foreach ($attribs as $idx1 => $val1)
  333. {
  334. $new_idx1 = strtolower($idx1);
  335. $new_val1 = $attribs[$idx1];
  336. if (is_array($new_val1) && count($new_val1) > 0)
  337. {
  338. $tmp_val = array();
  339. foreach ($new_val1 as $idx2 => $val2)
  340. {
  341. $new_idx2 = strtolower($idx2);
  342. $tmp_val[$new_idx2] = $val2;
  343. }
  344. $new_val1 = $tmp_val;
  345. }
  346. $new_attribs[$new_idx1] = $new_val1;
  347. }
  348. }
  349. $this->allowed_html[$tag] = $new_attribs;
  350. return true;
  351. }
  352. /**
  353. * Removes a single protocol from $this->allowed_protocols.
  354. *
  355. * This method accepts a string argument and removes it from
  356. * the list of allowed protocols to keep when performing
  357. * Parse().
  358. *
  359. * @access public
  360. * @param string $protocol The name of the protocol to be removed.
  361. * @return bool Status of removing valid protocol.
  362. * @since PHP4 OOP 0.2.1
  363. */
  364. function RemoveProtocol($protocol = "")
  365. {
  366. if (! is_string($protocol))
  367. {
  368. trigger_error("kses4::RemoveProtocol() requires a string.", E_USER_WARNING);
  369. return false;
  370. }
  371. // Remove any inadvertent ':' at the end of the protocol.
  372. if (substr($protocol, strlen($protocol) - 1, 1) == ":")
  373. {
  374. $protocol = substr($protocol, 0, strlen($protocol) - 1);
  375. }
  376. $protocol = strtolower(trim($protocol));
  377. if ($protocol == "")
  378. {
  379. trigger_error("kses4::RemoveProtocol() tried to remove an empty/NULL protocol.", E_USER_WARNING);
  380. return false;
  381. }
  382. // Ensures that the protocol exists before removing it.
  383. if (in_array($protocol, $this->allowed_protocols))
  384. {
  385. $this->allowed_protocols = array_diff($this->allowed_protocols, array($protocol));
  386. sort($this->allowed_protocols);
  387. }
  388. return true;
  389. }
  390. /**
  391. * Allows for single/batch removal of protocols
  392. *
  393. * This method accepts one argument that can be either a string
  394. * or an array of strings. Invalid data will be ignored.
  395. *
  396. * The argument will be processed, and each string will be removed
  397. * via RemoveProtocol().
  398. *
  399. * @access public
  400. * @param mixed , A string or array of protocols that will be removed from the internal list of allowed protocols.
  401. * @return bool Status of removing valid protocols.
  402. * @see RemoveProtocol()
  403. * @since PHP5 OOP 0.2.1
  404. */
  405. function RemoveProtocols()
  406. {
  407. $c_args = func_num_args();
  408. if ($c_args != 1)
  409. {
  410. return false;
  411. }
  412. $protocol_data = func_get_arg(0);
  413. if (is_array($protocol_data) && count($protocol_data) > 0)
  414. {
  415. foreach ($protocol_data as $protocol)
  416. {
  417. $this->RemoveProtocol($protocol);
  418. }
  419. }
  420. elseif (is_string($protocol_data))
  421. {
  422. $this->RemoveProtocol($protocol_data);
  423. return true;
  424. }
  425. else
  426. {
  427. trigger_error("kses4::RemoveProtocols() did not receive a string or an array.", E_USER_WARNING);
  428. return false;
  429. }
  430. }
  431. /**
  432. * This method removes any NULL or characters in $string.
  433. *
  434. * @access private
  435. * @param string $string
  436. * @return string String without any NULL/chr(173)
  437. * @since PHP4 OOP 0.0.1
  438. */
  439. function _no_null($string)
  440. {
  441. $string = preg_replace('/\0+/', '', $string);
  442. $string = preg_replace('/(\\\\0)+/', '', $string);
  443. return $string;
  444. }
  445. /**
  446. * This function removes the HTML JavaScript entities found in early versions of
  447. * Netscape 4.
  448. *
  449. * @access private
  450. * @param string $string
  451. * @return string String without any NULL/chr(173)
  452. * @since PHP4 OOP 0.0.1
  453. */
  454. function _js_entities($string)
  455. {
  456. return preg_replace('%&\s*\{[^}]*(\}\s*;?|$)%', '', $string);
  457. }
  458. /**
  459. * Normalizes HTML entities
  460. *
  461. * This function normalizes HTML entities. It will convert "AT&T" to the correct
  462. * "AT&amp;T", "&#00058;" to "&#58;", "&#XYZZY;" to "&amp;#XYZZY;" and so on.
  463. *
  464. * @access private
  465. * @param string $string
  466. * @return string String with normalized entities
  467. * @since PHP4 OOP 0.0.1
  468. */
  469. function _normalize_entities($string)
  470. {
  471. # Disarm all entities by converting & to &amp;
  472. $string = str_replace('&', '&amp;', $string);
  473. # Change back the allowed entities in our entity white list
  474. $string = preg_replace('/&amp;([A-Za-z][A-Za-z0-9]{0,19});/', '&\\1;', $string);
  475. $string = preg_replace('/&amp;#0*([0-9]{1,5});/e', '\$this->_normalize_entities2("\\1")', $string);
  476. $string = preg_replace('/&amp;#([Xx])0*(([0-9A-Fa-f]{2}){1,2});/', '&#\\1\\2;', $string);
  477. return $string;
  478. }
  479. /**
  480. * Helper method used by normalizeEntites()
  481. *
  482. * This method helps normalizeEntities() to only accept 16 bit values
  483. * and nothing more for &#number; entities.
  484. *
  485. * This method helps normalize_entities() during a preg_replace()
  486. * where a &#(0)*XXXXX; occurs. The '(0)*XXXXXX' value is converted to
  487. * a number and the result is returned as a numeric entity if the number
  488. * is less than 65536. Otherwise, the value is returned 'as is'.
  489. *
  490. * @access private
  491. * @param string $i
  492. * @return string Normalized numeric entity
  493. * @see _normalize_entities()
  494. * @since PHP4 OOP 0.0.1
  495. */
  496. function _normalize_entities2($i)
  497. {
  498. return (($i > 65535) ? "&amp;#$i;" : "&#$i;");
  499. }
  500. /**
  501. * Allows for additional user defined modifications to text.
  502. *
  503. * @deprecated use filterKsesTextHook()
  504. * @param string $string
  505. * @see filterKsesTextHook()
  506. * @return string
  507. * @since PHP4 OOP 0.0.1
  508. */
  509. function _hook($string)
  510. {
  511. return $this->filterKsesTextHook($string);
  512. }
  513. /**
  514. * Allows for additional user defined modifications to text.
  515. *
  516. * This method allows for additional modifications to be performed on
  517. * a string that's being run through Parse(). Currently, it returns the
  518. * input string 'as is'.
  519. *
  520. * This method is provided for users to extend the kses class for their own
  521. * requirements.
  522. *
  523. * @access public
  524. * @param string $string String to perfrom additional modifications on.
  525. * @return string User modified string.
  526. * @see Parse()
  527. * @since PHP5 OOP 1.0.0
  528. */
  529. function filterKsesTextHook($string)
  530. {
  531. return $string;
  532. }
  533. /**
  534. * This method goes through an array, and changes the keys to all lower case.
  535. *
  536. * @access private
  537. * @param array $in_array Associative array
  538. * @return array Modified array
  539. * @since PHP4 OOP 0.0.1
  540. */
  541. function _array_lc($inarray)
  542. {
  543. $outarray = array();
  544. if (is_array($inarray) && count($inarray) > 0)
  545. {
  546. foreach ($inarray as $inkey => $inval)
  547. {
  548. $outkey = strtolower($inkey);
  549. $outarray[$outkey] = array();
  550. if (is_array($inval) && count($inval) > 0)
  551. {
  552. foreach ($inval as $inkey2 => $inval2)
  553. {
  554. $outkey2 = strtolower($inkey2);
  555. $outarray[$outkey][$outkey2] = $inval2;
  556. }
  557. }
  558. }
  559. }
  560. return $outarray;
  561. }
  562. /**
  563. * This method searched for HTML tags, no matter how malformed. It also
  564. * matches stray ">" characters.
  565. *
  566. * @access private
  567. * @param string $string
  568. * @return string HTML tags
  569. * @since PHP4 OOP 0.0.1
  570. */
  571. function _split($string)
  572. {
  573. return preg_replace('%(<' . # EITHER: <
  574. '[^>]*' . # things that aren't >
  575. '(>|$)' . # > or end of string
  576. '|>)%e', # OR: just a >
  577. "\$this->_split2('\\1')", $string);
  578. }
  579. /**
  580. * This method strips out disallowed and/or mangled (X)HTML tags along with assigned attributes.
  581. *
  582. * This method does a lot of work. It rejects some very malformed things
  583. * like <:::>. It returns an empty string if the element isn't allowed (look
  584. * ma, no strip_tags()!). Otherwise it splits the tag into an element and an
  585. * allowed attribute list.
  586. *
  587. * @access private
  588. * @param string $string
  589. * @return string Modified string minus disallowed/mangled (X)HTML and attributes
  590. * @since PHP4 OOP 0.0.1
  591. */
  592. function _split2($string)
  593. {
  594. $string = $this->_stripslashes($string);
  595. if (substr($string, 0, 1) != '<')
  596. {
  597. # It matched a ">" character
  598. return '&gt;';
  599. }
  600. if (! preg_match('%^<\s*(/\s*)?([a-zA-Z0-9]+)([^>]*)>?$%', $string, $matches))
  601. {
  602. # It's seriously malformed
  603. return '';
  604. }
  605. $slash = trim($matches[1]);
  606. $elem = $matches[2];
  607. $attrlist = $matches[3];
  608. if (! isset($this->allowed_html[strtolower($elem)]) || ! is_array($this->allowed_html[strtolower($elem)]))
  609. {
  610. # They are using a not allowed HTML element
  611. return '';
  612. }
  613. if ($slash != '')
  614. {
  615. return "<$slash$elem>";
  616. }
  617. # No attributes are allowed for closing elements
  618. return $this->_attr("$slash$elem", $attrlist);
  619. }
  620. /**
  621. * This method strips out disallowed attributes for (X)HTML tags.
  622. *
  623. * This method removes all attributes if none are allowed for this element.
  624. * If some are allowed it calls $this->_hair() to split them further, and then it
  625. * builds up new HTML code from the data that $this->_hair() returns. It also
  626. * removes "<" and ">" characters, if there are any left. One more thing it
  627. * does is to check if the tag has a closing XHTML slash, and if it does,
  628. * it puts one in the returned code as well.
  629. *
  630. * @access private
  631. * @param string $element (X)HTML tag to check
  632. * @param string $attr Text containing attributes to check for validity.
  633. * @return string Resulting valid (X)HTML or ''
  634. * @see _hair()
  635. * @since PHP4 OOP 0.0.1
  636. */
  637. function _attr($element, $attr)
  638. {
  639. # Is there a closing XHTML slash at the end of the attributes?
  640. $xhtml_slash = '';
  641. if (preg_match('%\s/\s*$%', $attr))
  642. {
  643. $xhtml_slash = ' /';
  644. }
  645. # Are any attributes allowed at all for this element?
  646. if (! isset($this->allowed_html[strtolower($element)]) || count($this->allowed_html[strtolower($element)]) == 0)
  647. {
  648. return "<$element$xhtml_slash>";
  649. }
  650. # Split it
  651. $attrarr = $this->_hair($attr);
  652. # Go through $attrarr, and save the allowed attributes for this element
  653. # in $attr2
  654. $attr2 = '';
  655. if (is_array($attrarr) && count($attrarr) > 0)
  656. {
  657. foreach ($attrarr as $arreach)
  658. {
  659. if (! isset($this->allowed_html[strtolower($element)][strtolower($arreach['name'])]))
  660. {
  661. continue;
  662. }
  663. $current = $this->allowed_html[strtolower($element)][strtolower($arreach['name'])];
  664. if ($current == '')
  665. {
  666. # the attribute is not allowed
  667. continue;
  668. }
  669. if (! is_array($current))
  670. {
  671. # there are no checks
  672. $attr2 .= ' ' . $arreach['whole'];
  673. }
  674. else
  675. {
  676. # there are some checks
  677. $ok = true;
  678. if (is_array($current) && count($current) > 0)
  679. {
  680. foreach ($current as $currkey => $currval)
  681. {
  682. if (! $this->_check_attr_val($arreach['value'], $arreach['vless'], $currkey, $currval))
  683. {
  684. $ok = false;
  685. break;
  686. }
  687. }
  688. if ($ok)
  689. {
  690. # it passed them
  691. $attr2 .= ' ' . $arreach['whole'];
  692. }
  693. }
  694. }
  695. }
  696. }
  697. # Remove any "<" or ">" characters
  698. $attr2 = preg_replace('/[<>]/', '', $attr2);
  699. return "<$element$attr2$xhtml_slash>";
  700. }
  701. /**
  702. * This method combs through an attribute list string and returns an associative array of attributes and values.
  703. *
  704. * This method does a lot of work. It parses an attribute list into an array
  705. * with attribute data, and tries to do the right thing even if it gets weird
  706. * input. It will add quotes around attribute values that don't have any quotes
  707. * or apostrophes around them, to make it easier to produce HTML code that will
  708. * conform to W3C's HTML specification. It will also remove bad URL protocols
  709. * from attribute values.
  710. *
  711. * @access private
  712. * @param string $attr Text containing tag attributes for parsing
  713. * @return array Associative array containing data on attribute and value
  714. * @since PHP4 OOP 0.0.1
  715. */
  716. function _hair($attr)
  717. {
  718. $attrarr = array();
  719. $mode = 0;
  720. $attrname = '';
  721. # Loop through the whole attribute list
  722. while (strlen($attr) != 0)
  723. {
  724. # Was the last operation successful?
  725. $working = 0;
  726. switch ($mode)
  727. {
  728. case 0 : # attribute name, href for instance
  729. if (preg_match('/^([-a-zA-Z]+)/', $attr, $match))
  730. {
  731. $attrname = $match[1];
  732. $working = $mode = 1;
  733. $attr = preg_replace('/^[-a-zA-Z]+/', '', $attr);
  734. }
  735. break;
  736. case 1 : # equals sign or valueless ("selected")
  737. if (preg_match('/^\s*=\s*/', $attr)) # equals sign
  738. {
  739. $working = 1;
  740. $mode = 2;
  741. $attr = preg_replace('/^\s*=\s*/', '', $attr);
  742. break;
  743. }
  744. if (preg_match('/^\s+/', $attr)) # valueless
  745. {
  746. $working = 1;
  747. $mode = 0;
  748. $attrarr[] = array('name' => $attrname, 'value' => '', 'whole' => $attrname, 'vless' => 'y');
  749. $attr = preg_replace('/^\s+/', '', $attr);
  750. }
  751. break;
  752. case 2 : # attribute value, a URL after href= for instance
  753. if (preg_match('/^"([^"]*)"(\s+|$)/', $attr, $match)) # "value"
  754. {
  755. $thisval = $this->_bad_protocol($match[1]);
  756. $attrarr[] = array('name' => $attrname, 'value' => $thisval,
  757. 'whole' => "$attrname=\"$thisval\"", 'vless' => 'n');
  758. $working = 1;
  759. $mode = 0;
  760. $attr = preg_replace('/^"[^"]*"(\s+|$)/', '', $attr);
  761. break;
  762. }
  763. if (preg_match("/^'([^']*)'(\s+|$)/", $attr, $match)) # 'value'
  764. {
  765. $thisval = $this->_bad_protocol($match[1]);
  766. $attrarr[] = array('name' => $attrname, 'value' => $thisval,
  767. 'whole' => "$attrname='$thisval'", 'vless' => 'n');
  768. $working = 1;
  769. $mode = 0;
  770. $attr = preg_replace("/^'[^']*'(\s+|$)/", '', $attr);
  771. break;
  772. }
  773. if (preg_match("%^([^\s\"']+)(\s+|$)%", $attr, $match)) # value
  774. {
  775. $thisval = $this->_bad_protocol($match[1]);
  776. $attrarr[] = array('name' => $attrname, 'value' => $thisval,
  777. 'whole' => "$attrname=\"$thisval\"", 'vless' => 'n');
  778. # We add quotes to conform to W3C's HTML spec.
  779. $working = 1;
  780. $mode = 0;
  781. $attr = preg_replace("%^[^\s\"']+(\s+|$)%", '', $attr);
  782. }
  783. break;
  784. }
  785. if ($working == 0) # not well formed, remove and try again
  786. {
  787. $attr = $this->_html_error($attr);
  788. $mode = 0;
  789. }
  790. }
  791. # special case, for when the attribute list ends with a valueless
  792. # attribute like "selected"
  793. if ($mode == 1)
  794. {
  795. $attrarr[] = array('name' => $attrname, 'value' => '', 'whole' => $attrname, 'vless' => 'y');
  796. }
  797. return $attrarr;
  798. }
  799. /**
  800. * This method removes disallowed protocols.
  801. *
  802. * This method removes all non-allowed protocols from the beginning of
  803. * $string. It ignores whitespace and the case of the letters, and it does
  804. * understand HTML entities. It does its work in a while loop, so it won't be
  805. * fooled by a string like "javascript:javascript:alert(57)".
  806. *
  807. * @access private
  808. * @param string $string String to check for protocols
  809. * @return string String with removed protocols
  810. * @since PHP4 OOP 0.0.1
  811. */
  812. function _bad_protocol($string)
  813. {
  814. $string = $this->_no_null($string);
  815. $string = preg_replace('/\xad+/', '', $string); # deals with Opera "feature"
  816. $string2 = $string . 'a';
  817. while ($string != $string2)
  818. {
  819. $string2 = $string;
  820. $string = $this->_bad_protocol_once($string);
  821. } # while
  822. return $string;
  823. }
  824. /**
  825. * Helper method used by _bad_protocol()
  826. *
  827. * This function searches for URL protocols at the beginning of $string, while
  828. * handling whitespace and HTML entities.
  829. *
  830. * @access private
  831. * @param string $string String to check for protocols
  832. * @return string String with removed protocols
  833. * @see _bad_protocol()
  834. * @since PHP4 OOP 0.0.1
  835. */
  836. function _bad_protocol_once($string)
  837. {
  838. return preg_replace('/^((&[^;]*;|[\sA-Za-z0-9])*)' . '(:|&#58;|&#[Xx]3[Aa];)\s*/e', '\$this->_bad_protocol_once2("\\1")', $string);
  839. }
  840. /**
  841. * Helper method used by _bad_protocol_once() regex
  842. *
  843. * This function processes URL protocols, checks to see if they're in the white-
  844. * list or not, and returns different data depending on the answer.
  845. *
  846. * @access private
  847. * @param string $string String to check for protocols
  848. * @return string String with removed protocols
  849. * @see _bad_protocol()
  850. * @see _bad_protocol_once()
  851. * @since PHP4 OOP 0.0.1
  852. */
  853. function _bad_protocol_once2($string)
  854. {
  855. $string = $this->_decode_entities($string);
  856. $string = preg_replace('/\s/', '', $string);
  857. $string = $this->_no_null($string);
  858. $string = preg_replace('/\xad+/', '', $string); # deals with Opera "feature"
  859. $string = strtolower($string);
  860. $allowed = false;
  861. if (is_array($this->allowed_protocols) && count($this->allowed_protocols) > 0)
  862. {
  863. foreach ($this->allowed_protocols as $one_protocol)
  864. {
  865. if (strtolower($one_protocol) == $string)
  866. {
  867. $allowed = true;
  868. break;
  869. }
  870. }
  871. }
  872. if ($allowed)
  873. {
  874. return "$string:";
  875. }
  876. else
  877. {
  878. return '';
  879. }
  880. }
  881. /**
  882. * This function performs different checks for attribute values.
  883. *
  884. * The currently implemented checks are "maxlen", "minlen", "maxval",
  885. * "minval" and "valueless" with even more checks to come soon.
  886. *
  887. * @access private
  888. * @param string $value The value of the attribute to be checked.
  889. * @param string $vless Indicates whether the the value is supposed to be valueless
  890. * @param string $checkname The check to be performed
  891. * @param string $checkvalue The value that is to be checked against
  892. * @return bool Indicates whether the check passed or not
  893. * @since PHP4 OOP 0.0.1
  894. */
  895. function _check_attr_val($value, $vless, $checkname, $checkvalue)
  896. {
  897. $ok = true;
  898. switch (strtolower($checkname))
  899. {
  900. /**
  901. * The maxlen check makes sure that the attribute value has a length not
  902. * greater than the given value. This can be used to avoid Buffer Overflows
  903. * in WWW clients and various Internet servers.
  904. */
  905. case 'maxlen' :
  906. if (strlen($value) > $checkvalue)
  907. {
  908. $ok = false;
  909. }
  910. break;
  911. /**
  912. * The minlen check makes sure that the attribute value has a length not
  913. * smaller than the given value.
  914. */
  915. case 'minlen' :
  916. if (strlen($value) < $checkvalue)
  917. {
  918. $ok = false;
  919. }
  920. break;
  921. /**
  922. * The maxval check does two things: it checks that the attribute value is
  923. * an integer from 0 and up, without an excessive amount of zeroes or
  924. * whitespace (to avoid Buffer Overflows). It also checks that the attribute
  925. * value is not greater than the given value.
  926. * This check can be used to avoid Denial of Service attacks.
  927. */
  928. case 'maxval' :
  929. if (! preg_match('/^\s{0,6}[0-9]{1,6}\s{0,6}$/', $value))
  930. {
  931. $ok = false;
  932. }
  933. if ($value > $checkvalue)
  934. {
  935. $ok = false;
  936. }
  937. break;
  938. /**
  939. * The minval check checks that the attribute value is a positive integer,
  940. * and that it is not smaller than the given value.
  941. */
  942. case 'minval' :
  943. if (! preg_match('/^\s{0,6}[0-9]{1,6}\s{0,6}$/', $value))
  944. {
  945. $ok = false;
  946. }
  947. if ($value < $checkvalue)
  948. {
  949. $ok = false;
  950. }
  951. break;
  952. /**
  953. * The valueless check checks if the attribute has a value
  954. * (like <a href="blah">) or not (<option selected>). If the given value
  955. * is a "y" or a "Y", the attribute must not have a value.
  956. * If the given value is an "n" or an "N", the attribute must have one.
  957. */
  958. case 'valueless' :
  959. if (strtolower($checkvalue) != $vless)
  960. {
  961. $ok = false;
  962. }
  963. break;
  964. }
  965. return $ok;
  966. }
  967. /**
  968. * Changes \" to "
  969. *
  970. * This function changes the character sequence \" to just "
  971. * It leaves all other slashes alone. It's really weird, but the quoting from
  972. * preg_replace(//e) seems to require this.
  973. *
  974. * @access private
  975. * @param string $string The string to be stripped.
  976. * @return string string stripped of \"
  977. * @since PHP4 OOP 0.0.1
  978. */
  979. function _stripslashes($string)
  980. {
  981. return preg_replace('%\\\\"%', '"', $string);
  982. }
  983. /**
  984. * helper method for _hair()
  985. *
  986. * This function deals with parsing errors in _hair(). The general plan is
  987. * to remove everything to and including some whitespace, but it deals with
  988. * quotes and apostrophes as well.
  989. *
  990. * @access private
  991. * @param string $string The string to be stripped.
  992. * @return string string stripped of whitespace
  993. * @see _hair()
  994. * @since PHP4 OOP 0.0.1
  995. */
  996. function _html_error($string)
  997. {
  998. return preg_replace('/^("[^"]*("|$)|\'[^\']*(\'|$)|\S)*\s*/', '', $string);
  999. }
  1000. /**
  1001. * Decodes numeric HTML entities
  1002. *
  1003. * This method decodes numeric HTML entities (&#65; and &#x41;). It doesn't
  1004. * do anything with other entities like &auml;, but we don't need them in the
  1005. * URL protocol white listing system anyway.
  1006. *
  1007. * @access private
  1008. * @param string $value The entitiy to be decoded.
  1009. * @return string Decoded entity
  1010. * @since PHP4 OOP 0.0.1
  1011. */
  1012. function _decode_entities($string)
  1013. {
  1014. $string = preg_replace('/&#([0-9]+);/e', 'chr("\\1")', $string);
  1015. $string = preg_replace('/&#[Xx]([0-9A-Fa-f]+);/e', 'chr(hexdec("\\1"))', $string);
  1016. return $string;
  1017. }
  1018. /**
  1019. * Returns PHP4 OOP version # of kses.
  1020. *
  1021. * Since this class has been refactored and documented and proven to work,
  1022. * I'm syncing the version number to procedural kses.
  1023. *
  1024. * @access public
  1025. * @return string Version number
  1026. * @since PHP4 OOP 0.0.1
  1027. */
  1028. function _version()
  1029. {
  1030. return 'PHP4 0.2.2 (OOP fork of procedural kses 0.2.2)';
  1031. }
  1032. }
  1033. }
  1034. ?>