PageRenderTime 206ms CodeModel.GetById 36ms RepoModel.GetById 19ms app.codeStats 0ms

/vendor/nooku/libraries/koowa/filter/html.php

https://github.com/bhar1red/anahita
PHP | 417 lines | 204 code | 51 blank | 162 comment | 66 complexity | 5a7e2201cf48eb415153fbbe849e0cae MD5 | raw file
Possible License(s): GPL-2.0, LGPL-2.1
  1. <?php
  2. /**
  3. * @version $Id: html.php 4628 2012-05-06 19:56:43Z johanjanssens $
  4. * @package Koowa_Filter
  5. * @copyright Copyright (C) 2007 - 2012 Johan Janssens. All rights reserved.
  6. * @license GNU GPLv3 <http://www.gnu.org/licenses/gpl.html>
  7. * @link http://www.nooku.org
  8. */
  9. /**
  10. * Html XSS Filter
  11. *
  12. * Forked from the php input filter library by: Daniel Morris <dan@rootcube.com>
  13. * Original Contributors: Gianpaolo Racca, Ghislain Picard, Marco Wandschneider,
  14. * Chris Tobin.
  15. *
  16. * @author Johan Janssens <johan@nooku.org>
  17. * @package Koowa_Filter
  18. */
  19. class KFilterHtml extends KFilterAbstract
  20. {
  21. /**
  22. * List of user-defined tags
  23. *
  24. * @var array
  25. */
  26. protected $_tagsArray = array();
  27. /**
  28. * List of user-defined attributes
  29. *
  30. * @var array
  31. */
  32. protected $_attrArray = array();
  33. /**
  34. * If false, use whiteList method, if true use blackList method
  35. *
  36. * @var boolean
  37. */
  38. protected $_tagsMethod = true;
  39. /**
  40. * If false, use whiteList method, if true use blackList method
  41. *
  42. * @var boolean
  43. */
  44. protected $_attrMethod = true;
  45. /**
  46. * If true, only auto clean essentials, if false allow clean blacklisted tags/attr
  47. *
  48. * @var boolean
  49. */
  50. protected $_xssAuto = true;
  51. protected $_tagBlacklist = array ('applet', 'body', 'bgsound', 'base', 'basefont', 'embed', 'frame', 'frameset', 'head', 'html', 'id', 'iframe', 'ilayer', 'layer', 'link', 'meta', 'name', 'object', 'script', 'style', 'title', 'xml');
  52. protected $_attrBlacklist = array ('action', 'background', 'codebase', 'dynsrc', 'lowsrc'); // also will strip ALL event handlers
  53. /**
  54. * Constructor
  55. *
  56. * @param object An optional KConfig object with configuration options
  57. */
  58. public function __construct(KConfig $config)
  59. {
  60. parent::__construct($config);
  61. // List of user-defined tags
  62. if(isset($config->tag_list)) {
  63. $this->_tagsArray = array_map('strtolower', (array) $config->tag_list);
  64. }
  65. // List of user-defined attributes
  66. if(isset($config->attribute_list)) {
  67. $this->_attrArray = array_map('strtolower', (array) $config->attribute_list);
  68. }
  69. // WhiteList method = 0, BlackList method = 1
  70. if(isset($config->tag_method)) {
  71. $this->_tagsMethod = $config->tag_method;
  72. }
  73. // WhiteList method = 0, BlackList method = 1
  74. if(isset($config->attribute_method)) {
  75. $this->_attrMethod = $config->attribute_method;
  76. }
  77. //If false, only auto clean essentials, if true allow clean blacklisted tags/attr
  78. if(isset($config->xss_auto)) {
  79. $this->_xssAuto = $config->xss_auto;
  80. }
  81. }
  82. /**
  83. * Validate a value
  84. *
  85. * @param scalar Value to be validated
  86. * @return bool True when the variable is valid
  87. */
  88. protected function _validate($value)
  89. {
  90. return (is_string($value)
  91. // this is too strict, html is usually sanitized
  92. //&& strcmp($value, $this->sanitize($value)) === 0
  93. );
  94. }
  95. /**
  96. * Sanitize a value
  97. *
  98. * @param scalar Input string/array-of-string to be 'cleaned'
  99. * @return mixed 'Cleaned' version of input parameter
  100. */
  101. protected function _sanitize($value)
  102. {
  103. $value = (string) $value;
  104. // Filter var for XSS and other 'bad' code etc.
  105. if (!empty ($value)) {
  106. $value = $this->_remove($this->_decode($value));
  107. }
  108. return $value;
  109. }
  110. /**
  111. * Internal method to iteratively remove all unwanted tags and attributes
  112. *
  113. * @param string $source Input string to be 'cleaned'
  114. * @return string 'Cleaned' version of input parameter
  115. */
  116. protected function _remove($source)
  117. {
  118. $loopCounter = 0;
  119. // Iteration provides nested tag protection
  120. while ($source != $this->_cleanTags($source))
  121. {
  122. $source = $this->_cleanTags($source);
  123. $loopCounter ++;
  124. }
  125. return $source;
  126. }
  127. /**
  128. * Internal method to strip a string of certain tags
  129. *
  130. * @param string $source Input string to be 'cleaned'
  131. * @return string 'Cleaned' version of input parameter
  132. */
  133. protected function _cleanTags($source)
  134. {
  135. $preTag = null;
  136. $postTag = $source;
  137. $currentSpace = false;
  138. $attr = '';
  139. // Is there a tag? If so it will certainly start with a '<'
  140. $tagOpen_start = strpos($source, '<');
  141. while ($tagOpen_start !== false)
  142. {
  143. // Get some information about the tag we are processing
  144. $preTag .= substr($postTag, 0, $tagOpen_start);
  145. $postTag = substr($postTag, $tagOpen_start);
  146. $fromTagOpen = substr($postTag, 1);
  147. $tagOpen_end = strpos($fromTagOpen, '>');
  148. // Let's catch any non-terminated tags and skip over them
  149. if ($tagOpen_end === false) {
  150. $postTag = substr($postTag, $tagOpen_start +1);
  151. $tagOpen_start = strpos($postTag, '<');
  152. continue;
  153. }
  154. // Do we have a nested tag?
  155. $tagOpen_nested = strpos($fromTagOpen, '<');
  156. $tagOpen_nested_end = strpos(substr($postTag, $tagOpen_end), '>');
  157. if (($tagOpen_nested !== false) && ($tagOpen_nested < $tagOpen_end)) {
  158. $preTag .= substr($postTag, 0, ($tagOpen_nested +1));
  159. $postTag = substr($postTag, ($tagOpen_nested +1));
  160. $tagOpen_start = strpos($postTag, '<');
  161. continue;
  162. }
  163. // Lets get some information about our tag and setup attribute pairs
  164. $tagOpen_nested = (strpos($fromTagOpen, '<') + $tagOpen_start +1);
  165. $currentTag = substr($fromTagOpen, 0, $tagOpen_end);
  166. $tagLength = strlen($currentTag);
  167. $tagLeft = $currentTag;
  168. $attrSet = array ();
  169. $currentSpace = strpos($tagLeft, ' ');
  170. // Are we an open tag or a close tag?
  171. if (substr($currentTag, 0, 1) == '/') {
  172. // Close Tag
  173. $isCloseTag = true;
  174. list ($tagName) = explode(' ', $currentTag);
  175. $tagName = substr($tagName, 1);
  176. } else {
  177. // Open Tag
  178. $isCloseTag = false;
  179. list ($tagName) = explode(' ', $currentTag);
  180. }
  181. /*
  182. * Exclude all "non-regular" tagnames
  183. * OR no tagname
  184. * OR remove if xssauto is on and tag is blacklisted
  185. */
  186. if ((!preg_match("/^[a-z][a-z0-9]*$/i", $tagName)) || (!$tagName) || ((in_array(strtolower($tagName), $this->_tagBlacklist)) && ($this->_xssAuto))) {
  187. $postTag = substr($postTag, ($tagLength +2));
  188. $tagOpen_start = strpos($postTag, '<');
  189. // Strip tag
  190. continue;
  191. }
  192. /*
  193. * Time to grab any attributes from the tag... need this section in
  194. * case attributes have spaces in the values.
  195. */
  196. while ($currentSpace !== false)
  197. {
  198. $attr = '';
  199. $fromSpace = substr($tagLeft, ($currentSpace +1));
  200. $nextSpace = strpos($fromSpace, ' ');
  201. $openQuotes = strpos($fromSpace, '"');
  202. $closeQuotes = strpos(substr($fromSpace, ($openQuotes +1)), '"') + $openQuotes +1;
  203. // Do we have an attribute to process? [check for equal sign]
  204. if (strpos($fromSpace, '=') !== false) {
  205. /*
  206. * If the attribute value is wrapped in quotes we need to
  207. * grab the substring from the closing quote, otherwise grab
  208. * till the next space
  209. */
  210. if (($openQuotes !== false) && (strpos(substr($fromSpace, ($openQuotes +1)), '"') !== false)) {
  211. $attr = substr($fromSpace, 0, ($closeQuotes +1));
  212. } else {
  213. $attr = substr($fromSpace, 0, $nextSpace);
  214. }
  215. } else {
  216. /*
  217. * No more equal signs so add any extra text in the tag into
  218. * the attribute array [eg. checked]
  219. */
  220. if ($fromSpace != '/') {
  221. $attr = substr($fromSpace, 0, $nextSpace);
  222. }
  223. }
  224. // Last Attribute Pair
  225. if (!$attr && $fromSpace != '/') {
  226. $attr = $fromSpace;
  227. }
  228. // Add attribute pair to the attribute array
  229. $attrSet[] = $attr;
  230. // Move search point and continue iteration
  231. $tagLeft = substr($fromSpace, strlen($attr));
  232. $currentSpace = strpos($tagLeft, ' ');
  233. }
  234. // Is our tag in the user input array?
  235. $tagFound = in_array(strtolower($tagName), $this->_tagsArray);
  236. // If the tag is allowed lets append it to the output string
  237. if ((!$tagFound && $this->_tagsMethod) || ($tagFound && !$this->_tagsMethod)) {
  238. // Reconstruct tag with allowed attributes
  239. if (!$isCloseTag) {
  240. // Open or Single tag
  241. $attrSet = $this->_cleanAttributes($attrSet);
  242. $preTag .= '<'.$tagName;
  243. for ($i = 0; $i < count($attrSet); $i ++)
  244. {
  245. $preTag .= ' '.$attrSet[$i];
  246. }
  247. // Reformat single tags to XHTML
  248. if (strpos($fromTagOpen, '</'.$tagName)) {
  249. $preTag .= '>';
  250. } else {
  251. $preTag .= ' />';
  252. }
  253. } else {
  254. // Closing Tag
  255. $preTag .= '</'.$tagName.'>';
  256. }
  257. }
  258. // Find next tag's start and continue iteration
  259. $postTag = substr($postTag, ($tagLength +2));
  260. $tagOpen_start = strpos($postTag, '<');
  261. }
  262. // Append any code after the end of tags and return
  263. if ($postTag != '<') {
  264. $preTag .= $postTag;
  265. }
  266. return $preTag;
  267. }
  268. /**
  269. * Internal method to strip a tag of certain attributes
  270. *
  271. * @param array $attrSet Array of attribute pairs to filter
  272. * @return array Filtered array of attribute pairs
  273. */
  274. protected function _cleanAttributes($attrSet)
  275. {
  276. // Initialize variables
  277. $newSet = array();
  278. // Iterate through attribute pairs
  279. for ($i = 0; $i < count($attrSet); $i ++)
  280. {
  281. // Skip blank spaces
  282. if (!$attrSet[$i]) {
  283. continue;
  284. }
  285. // Split into name/value pairs
  286. $attrSubSet = explode('=', trim($attrSet[$i]), 2);
  287. list ($attrSubSet[0]) = explode(' ', $attrSubSet[0]);
  288. /*
  289. * Remove all "non-regular" attribute names
  290. * AND blacklisted attributes
  291. */
  292. if ((!preg_match('/[a-z]*$/i', $attrSubSet[0])) || (($this->_xssAuto) && ((in_array(strtolower($attrSubSet[0]), $this->_attrBlacklist)) || (substr($attrSubSet[0], 0, 2) == 'on')))) {
  293. continue;
  294. }
  295. // XSS attribute value filtering
  296. if ($attrSubSet[1]) {
  297. // strips unicode, hex, etc
  298. $attrSubSet[1] = str_replace('&#', '', $attrSubSet[1]);
  299. // strip normal newline within attr value
  300. $attrSubSet[1] = preg_replace('/[\n\r]/', '', $attrSubSet[1]);
  301. // strip double quotes
  302. $attrSubSet[1] = str_replace('"', '', $attrSubSet[1]);
  303. // convert single quotes from either side to doubles (Single quotes shouldn't be used to pad attr value)
  304. if ((substr($attrSubSet[1], 0, 1) == "'") && (substr($attrSubSet[1], (strlen($attrSubSet[1]) - 1), 1) == "'")) {
  305. $attrSubSet[1] = substr($attrSubSet[1], 1, (strlen($attrSubSet[1]) - 2));
  306. }
  307. // strip slashes
  308. $attrSubSet[1] = stripslashes($attrSubSet[1]);
  309. }
  310. // Autostrip script tags
  311. if ($this->_checkAttribute($attrSubSet)) {
  312. continue;
  313. }
  314. // Is our attribute in the user input array?
  315. $attrFound = in_array(strtolower($attrSubSet[0]), $this->_attrArray);
  316. // If the tag is allowed lets keep it
  317. if ((!$attrFound && $this->_attrMethod) || ($attrFound && !$this->_attrMethod)) {
  318. // Does the attribute have a value?
  319. if ($attrSubSet[1]) {
  320. $newSet[] = $attrSubSet[0].'="'.$attrSubSet[1].'"';
  321. } elseif ($attrSubSet[1] == "0") {
  322. /*
  323. * Special Case
  324. * Is the value 0?
  325. */
  326. $newSet[] = $attrSubSet[0].'="0"';
  327. } else {
  328. $newSet[] = $attrSubSet[0].'="'.$attrSubSet[0].'"';
  329. }
  330. }
  331. }
  332. return $newSet;
  333. }
  334. /**
  335. * Function to determine if contents of an attribute is safe
  336. *
  337. * @param array $attrSubSet A 2 element array for attributes name,value
  338. * @return boolean True if bad code is detected
  339. */
  340. protected function _checkAttribute($attrSubSet)
  341. {
  342. $attrSubSet[0] = strtolower($attrSubSet[0]);
  343. $attrSubSet[1] = strtolower($attrSubSet[1]);
  344. return (((strpos($attrSubSet[1], 'expression') !== false) && ($attrSubSet[0]) == 'style') || (strpos($attrSubSet[1], 'javascript:') !== false) || (strpos($attrSubSet[1], 'behaviour:') !== false) || (strpos($attrSubSet[1], 'vbscript:') !== false) || (strpos($attrSubSet[1], 'mocha:') !== false) || (strpos($attrSubSet[1], 'livescript:') !== false));
  345. }
  346. /**
  347. * Try to convert to plaintext
  348. *
  349. * @param string $source
  350. * @return string Plaintext string
  351. */
  352. protected function _decode($source)
  353. {
  354. // entity decode
  355. $trans_tbl = get_html_translation_table(HTML_ENTITIES);
  356. foreach($trans_tbl as $k => $v) {
  357. $ttr[$v] = utf8_encode($k);
  358. }
  359. $source = strtr($source, $ttr);
  360. // convert decimal
  361. $source = preg_replace('/&#(\d+);/me', "chr(\\1)", $source); // decimal notation
  362. // convert hex
  363. $source = preg_replace('/&#x([a-f0-9]+);/mei', "chr(0x\\1)", $source); // hex notation
  364. return $source;
  365. }
  366. }