PageRenderTime 70ms CodeModel.GetById 19ms RepoModel.GetById 1ms app.codeStats 0ms

/joomla/libraries/joomla/filter/filterinput.php

https://github.com/reechalee/joomla1.6
PHP | 593 lines | 317 code | 65 blank | 211 comment | 84 complexity | d53e8e57e68697503989c10a686f172e MD5 | raw file
Possible License(s): GPL-2.0, LGPL-2.1, BSD-3-Clause, JSON
  1. <?php
  2. /**
  3. * @version $Id: filterinput.php 20899 2011-03-07 20:56:09Z ian $
  4. * @package Joomla.Framework
  5. * @subpackage Filter
  6. * @copyright Copyright (C) 2005 - 2011 Open Source Matters, Inc. All rights reserved.
  7. * @license GNU General Public License version 2 or later; see LICENSE.txt
  8. */
  9. // No direct access
  10. defined('JPATH_BASE') or die;
  11. /**
  12. * JFilterInput is a class for filtering input from any data source
  13. *
  14. * Forked from the php input filter library by: Daniel Morris <dan@rootcube.com>
  15. * Original Contributors: Gianpaolo Racca, Ghislain Picard, Marco Wandschneider, Chris Tobin and Andrew Eddie.
  16. *
  17. * @package Joomla.Framework
  18. * @subpackage Filter
  19. * @since 1.5
  20. */
  21. class JFilterInput extends JObject
  22. {
  23. /**
  24. * @var array An array of permitted tags.
  25. * @since 1.5
  26. */
  27. var $tagsArray;
  28. /**
  29. * @var array An array of permitted tag attributes.
  30. * @since 1.5
  31. */
  32. var $attrArray;
  33. /**
  34. * @var int WhiteList method = 0 (default), BlackList method = 1
  35. * @since 1.5
  36. */
  37. var $tagsMethod;
  38. /**
  39. * @var int WhiteList method = 0 (default), BlackList method = 1
  40. * @since 1.5
  41. */
  42. var $attrMethod;
  43. /**
  44. * @var int Only auto clean essentials = 0, Allow clean blacklisted tags/attr = 1
  45. * @since 1.5
  46. */
  47. var $xssAuto;
  48. /**
  49. * @var array A list of the default blacklisted tags.
  50. * @since 1.5
  51. */
  52. var $tagBlacklist = array ('applet', 'body', 'bgsound', 'base', 'basefont', 'embed', 'frame', 'frameset', 'head', 'html', 'id', 'iframe', 'ilayer', 'layer', 'link', 'meta', 'name', 'object', 'script', 'style', 'title', 'xml');
  53. /**
  54. * @var array A list of the default blacklisted tag attributes.
  55. * @since 1.5
  56. */
  57. var $attrBlacklist = array ('action', 'background', 'codebase', 'dynsrc', 'lowsrc'); // also will strip ALL event handlers
  58. /**
  59. * Constructor for inputFilter class. Only first parameter is required.
  60. *
  61. * @access protected
  62. * @param array $tagsArray List of user-defined tags
  63. * @param array $attrArray List of user-defined attributes
  64. * @param int $tagsMethod WhiteList method = 0, BlackList method = 1
  65. * @param int $attrMethod WhiteList method = 0, BlackList method = 1
  66. * @param int $xssAuto Only auto clean essentials = 0, Allow clean blacklisted tags/attr = 1
  67. * @since 1.5
  68. */
  69. public function __construct($tagsArray = array(), $attrArray = array(), $tagsMethod = 0, $attrMethod = 0, $xssAuto = 1)
  70. {
  71. // Make sure user defined arrays are in lowercase
  72. $tagsArray = array_map('strtolower', (array) $tagsArray);
  73. $attrArray = array_map('strtolower', (array) $attrArray);
  74. // Assign member variables
  75. $this->tagsArray = $tagsArray;
  76. $this->attrArray = $attrArray;
  77. $this->tagsMethod = $tagsMethod;
  78. $this->attrMethod = $attrMethod;
  79. $this->xssAuto = $xssAuto;
  80. }
  81. /**
  82. * Returns an input filter object, only creating it if it doesn't already exist.
  83. *
  84. * @param array $tagsArray List of user-defined tags
  85. * @param array $attrArray List of user-defined attributes
  86. * @param int $tagsMethod WhiteList method = 0, BlackList method = 1
  87. * @param int $attrMethod WhiteList method = 0, BlackList method = 1
  88. * @param int $xssAuto Only auto clean essentials = 0, Allow clean blacklisted tags/attr = 1
  89. * @return object The JFilterInput object.
  90. * @since 1.5
  91. */
  92. public static function &getInstance($tagsArray = array(), $attrArray = array(), $tagsMethod = 0, $attrMethod = 0, $xssAuto = 1)
  93. {
  94. static $instances;
  95. $sig = md5(serialize(array($tagsArray,$attrArray,$tagsMethod,$attrMethod,$xssAuto)));
  96. if (!isset ($instances)) {
  97. $instances = array();
  98. }
  99. if (empty ($instances[$sig])) {
  100. $instances[$sig] = new JFilterInput($tagsArray, $attrArray, $tagsMethod, $attrMethod, $xssAuto);
  101. }
  102. return $instances[$sig];
  103. }
  104. /**
  105. * Method to be called by another php script. Processes for XSS and
  106. * specified bad code.
  107. *
  108. * @param mixed $source Input string/array-of-string to be 'cleaned'
  109. * @param string $type Return type for the variable (INT, FLOAT, BOOLEAN, WORD, ALNUM, CMD, BASE64, STRING, ARRAY, PATH, NONE)
  110. * @return mixed 'Cleaned' version of input parameter
  111. * @since 1.5
  112. * @static
  113. */
  114. public function clean($source, $type='string')
  115. {
  116. // Handle the type constraint
  117. switch (strtoupper($type))
  118. {
  119. case 'INT' :
  120. case 'INTEGER' :
  121. // Only use the first integer value
  122. preg_match('/-?[0-9]+/', (string) $source, $matches);
  123. $result = @ (int) $matches[0];
  124. break;
  125. case 'FLOAT' :
  126. case 'DOUBLE' :
  127. // Only use the first floating point value
  128. preg_match('/-?[0-9]+(\.[0-9]+)?/', (string) $source, $matches);
  129. $result = @ (float) $matches[0];
  130. break;
  131. case 'BOOL' :
  132. case 'BOOLEAN' :
  133. $result = (bool) $source;
  134. break;
  135. case 'WORD' :
  136. $result = (string) preg_replace('/[^A-Z_]/i', '', $source);
  137. break;
  138. case 'ALNUM' :
  139. $result = (string) preg_replace('/[^A-Z0-9]/i', '', $source);
  140. break;
  141. case 'CMD' :
  142. $result = (string) preg_replace('/[^A-Z0-9_\.-]/i', '', $source);
  143. $result = ltrim($result, '.');
  144. break;
  145. case 'BASE64' :
  146. $result = (string) preg_replace('/[^A-Z0-9\/+=]/i', '', $source);
  147. break;
  148. case 'STRING' :
  149. $result = (string) $this->_remove($this->_decode((string) $source));
  150. break;
  151. case 'HTML' :
  152. $result = (string) $this->_remove((string) $source);
  153. break;
  154. case 'ARRAY' :
  155. $result = (array) $source;
  156. break;
  157. case 'PATH' :
  158. $pattern = '/^[A-Za-z0-9_-]+[A-Za-z0-9_\.-]*([\\\\\/][A-Za-z0-9_-]+[A-Za-z0-9_\.-]*)*$/';
  159. preg_match($pattern, (string) $source, $matches);
  160. $result = @ (string) $matches[0];
  161. break;
  162. case 'USERNAME' :
  163. $result = (string) preg_replace('/[\x00-\x1F\x7F<>"\'%&]/', '', $source);
  164. break;
  165. default :
  166. // Are we dealing with an array?
  167. if (is_array($source))
  168. {
  169. foreach ($source as $key => $value)
  170. {
  171. // filter element for XSS and other 'bad' code etc.
  172. if (is_string($value)) {
  173. $source[$key] = $this->_remove($this->_decode($value));
  174. }
  175. }
  176. $result = $source;
  177. }
  178. else
  179. {
  180. // Or a string?
  181. if (is_string($source) && !empty ($source)) {
  182. // filter source for XSS and other 'bad' code etc.
  183. $result = $this->_remove($this->_decode($source));
  184. }
  185. else {
  186. // Not an array or string.. return the passed parameter
  187. $result = $source;
  188. }
  189. }
  190. break;
  191. }
  192. return $result;
  193. }
  194. /**
  195. * Function to determine if contents of an attribute is safe
  196. *
  197. * @param array $attrSubSet A 2 element array for attributes name,value
  198. * @return boolean True if bad code is detected
  199. * @since 1.5
  200. */
  201. public static function checkAttribute($attrSubSet)
  202. {
  203. $attrSubSet[0] = strtolower($attrSubSet[0]);
  204. $attrSubSet[1] = strtolower($attrSubSet[1]);
  205. return (((strpos($attrSubSet[1], 'expression') !== false) && ($attrSubSet[0]) == 'style') || (strpos($attrSubSet[1], 'javascript:') !== false) || (strpos($attrSubSet[1], 'behaviour:') !== false) || (strpos($attrSubSet[1], 'vbscript:') !== false) || (strpos($attrSubSet[1], 'mocha:') !== false) || (strpos($attrSubSet[1], 'livescript:') !== false));
  206. }
  207. /**
  208. * Internal method to iteratively remove all unwanted tags and attributes
  209. *
  210. * @param string $source Input string to be 'cleaned'
  211. * @return string 'Cleaned' version of input parameter
  212. * @since 1.5
  213. */
  214. protected function _remove($source)
  215. {
  216. $loopCounter = 0;
  217. // Iteration provides nested tag protection
  218. while ($source != $this->_cleanTags($source)) {
  219. $source = $this->_cleanTags($source);
  220. $loopCounter ++;
  221. }
  222. return $source;
  223. }
  224. /**
  225. * Internal method to strip a string of certain tags
  226. *
  227. * @param string Input string to be 'cleaned'
  228. * @return string 'Cleaned' version of input parameter
  229. * @since 1.5
  230. */
  231. protected function _cleanTags($source)
  232. {
  233. // First, pre-process this for illegal characters inside attribute values
  234. $source = $this->_escapeAttributeValues($source);
  235. // In the beginning we don't really have a tag, so everything is postTag
  236. $preTag = null;
  237. $postTag = $source;
  238. $currentSpace = false;
  239. $attr = ''; // moffats: setting to null due to issues in migration system - undefined variable errors
  240. // Is there a tag? If so it will certainly start with a '<'
  241. $tagOpen_start = strpos($source, '<');
  242. while ($tagOpen_start !== false) {
  243. // Get some information about the tag we are processing
  244. $preTag .= substr($postTag, 0, $tagOpen_start);
  245. $postTag = substr($postTag, $tagOpen_start);
  246. $fromTagOpen = substr($postTag, 1);
  247. $tagOpen_end = strpos($fromTagOpen, '>');
  248. // Let's catch any non-terminated tags and skip over them
  249. if ($tagOpen_end === false) {
  250. $postTag = substr($postTag, $tagOpen_start +1);
  251. $tagOpen_start = strpos($postTag, '<');
  252. continue;
  253. }
  254. // Do we have a nested tag?
  255. $tagOpen_nested = strpos($fromTagOpen, '<');
  256. $tagOpen_nested_end = strpos(substr($postTag, $tagOpen_end), '>');
  257. if (($tagOpen_nested !== false) && ($tagOpen_nested < $tagOpen_end)) {
  258. $preTag .= substr($postTag, 0, ($tagOpen_nested +1));
  259. $postTag = substr($postTag, ($tagOpen_nested +1));
  260. $tagOpen_start = strpos($postTag, '<');
  261. continue;
  262. }
  263. // Lets get some information about our tag and setup attribute pairs
  264. $tagOpen_nested = (strpos($fromTagOpen, '<') + $tagOpen_start +1);
  265. $currentTag = substr($fromTagOpen, 0, $tagOpen_end);
  266. $tagLength = strlen($currentTag);
  267. $tagLeft = $currentTag;
  268. $attrSet = array ();
  269. $currentSpace = strpos($tagLeft, ' ');
  270. // Are we an open tag or a close tag?
  271. if (substr($currentTag, 0, 1) == '/') {
  272. // Close Tag
  273. $isCloseTag = true;
  274. list ($tagName) = explode(' ', $currentTag);
  275. $tagName = substr($tagName, 1);
  276. } else {
  277. // Open Tag
  278. $isCloseTag = false;
  279. list ($tagName) = explode(' ', $currentTag);
  280. }
  281. /*
  282. * Exclude all "non-regular" tagnames
  283. * OR no tagname
  284. * OR remove if xssauto is on and tag is blacklisted
  285. */
  286. if ((!preg_match("/^[a-z][a-z0-9]*$/i", $tagName)) || (!$tagName) || ((in_array(strtolower($tagName), $this->tagBlacklist)) && ($this->xssAuto))) {
  287. $postTag = substr($postTag, ($tagLength +2));
  288. $tagOpen_start = strpos($postTag, '<');
  289. // Strip tag
  290. continue;
  291. }
  292. /*
  293. * Time to grab any attributes from the tag... need this section in
  294. * case attributes have spaces in the values.
  295. */
  296. while ($currentSpace !== false) {
  297. $attr = '';
  298. $fromSpace = substr($tagLeft, ($currentSpace +1));
  299. $nextEqual = strpos($fromSpace, '=');
  300. $nextSpace = strpos($fromSpace, ' ');
  301. $openQuotes = strpos($fromSpace, '"');
  302. $closeQuotes = strpos(substr($fromSpace, ($openQuotes +1)), '"') + $openQuotes +1;
  303. $startAtt = '';
  304. $startAttPosition = 0;
  305. // Find position of equal and open quotes ignoring
  306. if (preg_match('#\s*=\s*\"#', $fromSpace, $matches, PREG_OFFSET_CAPTURE)) {
  307. $startAtt = $matches[0][0];
  308. $startAttPosition = $matches[0][1];
  309. $closeQuotes = strpos(substr($fromSpace, ($startAttPosition + strlen($startAtt))), '"') + $startAttPosition + strlen($startAtt);
  310. $nextEqual = $startAttPosition + strpos($startAtt, '=');
  311. $openQuotes = $startAttPosition + strpos($startAtt, '"');
  312. $nextSpace = strpos(substr($fromSpace, $closeQuotes), ' ') + $closeQuotes;
  313. }
  314. // Do we have an attribute to process? [check for equal sign]
  315. if ($fromSpace != '/' && (($nextEqual && $nextSpace && $nextSpace < $nextEqual ) || !$nextEqual))
  316. {
  317. if(!$nextEqual)
  318. {
  319. $attribEnd = strpos($fromSpace, '/') - 1;
  320. } else {
  321. $attribEnd = $nextSpace - 1;
  322. }
  323. // if there is an ending, use this, if not do not worry
  324. if($attribEnd > 0)
  325. {
  326. $fromSpace = substr($fromSpace, $attribEnd + 1);
  327. }
  328. }
  329. if (strpos($fromSpace, '=') !== false) {
  330. /*
  331. * If the attribute value is wrapped in quotes we need to
  332. * grab the substring from the closing quote, otherwise grab
  333. * till the next space
  334. */
  335. if (($openQuotes !== false) && (strpos(substr($fromSpace, ($openQuotes +1)), '"') !== false)) {
  336. $attr = substr($fromSpace, 0, ($closeQuotes +1));
  337. } else {
  338. $attr = substr($fromSpace, 0, $nextSpace);
  339. }
  340. } else {
  341. /*
  342. * No more equal signs so add any extra text in the tag into
  343. * the attribute array [eg. checked]
  344. */
  345. if ($fromSpace != '/') {
  346. $attr = substr($fromSpace, 0, $nextSpace);
  347. }
  348. }
  349. // Last Attribute Pair
  350. if (!$attr && $fromSpace != '/') {
  351. $attr = $fromSpace;
  352. }
  353. // Add attribute pair to the attribute array
  354. $attrSet[] = $attr;
  355. // Move search point and continue iteration
  356. $tagLeft = substr($fromSpace, strlen($attr));
  357. $currentSpace = strpos($tagLeft, ' ');
  358. }
  359. // Is our tag in the user input array?
  360. $tagFound = in_array(strtolower($tagName), $this->tagsArray);
  361. // If the tag is allowed lets append it to the output string
  362. if ((!$tagFound && $this->tagsMethod) || ($tagFound && !$this->tagsMethod)) {
  363. // Reconstruct tag with allowed attributes
  364. if (!$isCloseTag) {
  365. // Open or Single tag
  366. $attrSet = $this->_cleanAttributes($attrSet);
  367. $preTag .= '<'.$tagName;
  368. for ($i = 0; $i < count($attrSet); $i ++) {
  369. $preTag .= ' '.$attrSet[$i];
  370. }
  371. // Reformat single tags to XHTML
  372. if (strpos($fromTagOpen, '</'.$tagName)) {
  373. $preTag .= '>';
  374. } else {
  375. $preTag .= ' />';
  376. }
  377. } else {
  378. // Closing Tag
  379. $preTag .= '</'.$tagName.'>';
  380. }
  381. }
  382. // Find next tag's start and continue iteration
  383. $postTag = substr($postTag, ($tagLength +2));
  384. $tagOpen_start = strpos($postTag, '<');
  385. }
  386. // Append any code after the end of tags and return
  387. if ($postTag != '<') {
  388. $preTag .= $postTag;
  389. }
  390. return $preTag;
  391. }
  392. /**
  393. * Internal method to strip a tag of certain attributes
  394. *
  395. * @param array $attrSet Array of attribute pairs to filter
  396. * @return array Filtered array of attribute pairs
  397. * @since 1.5
  398. */
  399. protected function _cleanAttributes($attrSet)
  400. {
  401. // Initialise variables.
  402. $newSet = array();
  403. $count = count($attrSet);
  404. // Iterate through attribute pairs
  405. for ($i = 0; $i < $count; $i ++) {
  406. // Skip blank spaces
  407. if (!$attrSet[$i]) {
  408. continue;
  409. }
  410. // Split into name/value pairs
  411. $attrSubSet = explode('=', trim($attrSet[$i]), 2);
  412. // Take the last attribute in case there is an attribute with no value
  413. $attrSubSet[0] = array_pop(explode(' ', trim($attrSubSet[0])));
  414. /*
  415. * Remove all "non-regular" attribute names
  416. * AND blacklisted attributes
  417. */
  418. if ((!preg_match('/[a-z]*$/i', $attrSubSet[0])) || (($this->xssAuto) && ((in_array(strtolower($attrSubSet[0]), $this->attrBlacklist)) || (substr($attrSubSet[0], 0, 2) == 'on')))) {
  419. continue;
  420. }
  421. // XSS attribute value filtering
  422. if (isset($attrSubSet[1])) {
  423. // trim leading and trailing spaces
  424. $attrSubSet[1] = trim($attrSubSet[1]);
  425. // strips unicode, hex, etc
  426. $attrSubSet[1] = str_replace('&#', '', $attrSubSet[1]);
  427. // strip normal newline within attr value
  428. $attrSubSet[1] = preg_replace('/[\n\r]/', '', $attrSubSet[1]);
  429. // strip double quotes
  430. $attrSubSet[1] = str_replace('"', '', $attrSubSet[1]);
  431. // convert single quotes from either side to doubles (Single quotes shouldn't be used to pad attr value)
  432. if ((substr($attrSubSet[1], 0, 1) == "'") && (substr($attrSubSet[1], (strlen($attrSubSet[1]) - 1), 1) == "'")) {
  433. $attrSubSet[1] = substr($attrSubSet[1], 1, (strlen($attrSubSet[1]) - 2));
  434. }
  435. // strip slashes
  436. $attrSubSet[1] = stripslashes($attrSubSet[1]);
  437. } else {
  438. continue;
  439. }
  440. // Autostrip script tags
  441. if (self::checkAttribute($attrSubSet)) {
  442. continue;
  443. }
  444. // Is our attribute in the user input array?
  445. $attrFound = in_array(strtolower($attrSubSet[0]), $this->attrArray);
  446. // If the tag is allowed lets keep it
  447. if ((!$attrFound && $this->attrMethod) || ($attrFound && !$this->attrMethod)) {
  448. // Does the attribute have a value?
  449. if (empty($attrSubSet[1]) === false) {
  450. $newSet[] = $attrSubSet[0].'="'.$attrSubSet[1].'"';
  451. } else if ($attrSubSet[1] === "0") {
  452. /*
  453. * Special Case
  454. * Is the value 0?
  455. */
  456. $newSet[] = $attrSubSet[0].'="0"';
  457. } else {
  458. // Leave empty attributes alone
  459. $newSet[] = $attrSubSet[0].'=""';
  460. }
  461. }
  462. }
  463. return $newSet;
  464. }
  465. /**
  466. * Try to convert to plaintext
  467. *
  468. * @param string $source The source string.
  469. * @return string Plaintext string
  470. * @since 1.5
  471. */
  472. protected function _decode($source)
  473. {
  474. static $ttr;
  475. if(!is_array($ttr))
  476. {
  477. // entity decode
  478. $trans_tbl = get_html_translation_table(HTML_ENTITIES);
  479. foreach($trans_tbl as $k => $v) {
  480. $ttr[$v] = utf8_encode($k);
  481. }
  482. }
  483. $source = strtr($source, $ttr);
  484. // convert decimal
  485. $source = preg_replace('/&#(\d+);/me', "utf8_encode(chr(\\1))", $source); // decimal notation
  486. // convert hex
  487. $source = preg_replace('/&#x([a-f0-9]+);/mei', "utf8_encode(chr(0x\\1))", $source); // hex notation
  488. return $source;
  489. }
  490. /**
  491. * Escape < > and " inside attribute values
  492. *
  493. * @param string $source The source string.
  494. * @return string Filtered string
  495. * @since 1.6
  496. */
  497. protected function _escapeAttributeValues($source)
  498. {
  499. $alreadyFiltered = '';
  500. $remainder = $source;
  501. $badChars = array ('<', '"', '>');
  502. $escapedChars = array ('&lt;', '&quot;', '&gt;');
  503. // Process each portion based on presence of =" and "<space>, "/>, or ">
  504. // See if there are any more attributes to process
  505. while (preg_match('#\s*=\s*\"#', $remainder, $matches, PREG_OFFSET_CAPTURE))
  506. {
  507. // get the portion before the attribute value
  508. $quotePosition = $matches[0][1];
  509. $nextBefore = $quotePosition + strlen($matches[0][0]);
  510. // get the portion after attribute value
  511. if (preg_match('#(\"\s*/\s*>|\"\s*>|\"\s+|\"$)#', substr($remainder, $nextBefore), $matches, PREG_OFFSET_CAPTURE)) {
  512. // We have a closing quote
  513. $nextAfter = $nextBefore + $matches[0][1];
  514. } else {
  515. // No closing quote
  516. $nextAfter = strlen($remainder);
  517. }
  518. // Get the actual attribute value
  519. $attributeValue = substr($remainder, $nextBefore, $nextAfter - $nextBefore);
  520. // Escape bad chars
  521. $attributeValue = str_replace($badChars, $escapedChars, $attributeValue);
  522. $alreadyFiltered .= substr($remainder, 0, $nextBefore) . $attributeValue . '"';
  523. $remainder = substr($remainder, $nextAfter + 1);
  524. }
  525. // At this point, we just have to return the $alreadyFiltered and the $remainder
  526. return $alreadyFiltered . $remainder;
  527. }
  528. }