PageRenderTime 59ms CodeModel.GetById 23ms RepoModel.GetById 0ms app.codeStats 0ms

/libraries/joomla/filter/filterinput.php

https://github.com/joebushi/joomla
PHP | 498 lines | 259 code | 57 blank | 182 comment | 70 complexity | d67a1dc43b56994bcef89427750547ad MD5 | raw file
Possible License(s): LGPL-2.1, Apache-2.0
  1. <?php
  2. /**
  3. * @version $Id$
  4. * @package Joomla.Framework
  5. * @subpackage Filter
  6. * @copyright Copyright (C) 2005 - 2010 Open Source Matters, Inc. All rights reserved.
  7. * @license GNU General Public License version 2 or later; see LICENSE.txt
  8. */
  9. // No direct access
  10. defined('JPATH_BASE') or die;
  11. /**
  12. * JFilterInput is a class for filtering input from any data source
  13. *
  14. * Forked from the php input filter library by: Daniel Morris <dan@rootcube.com>
  15. * Original Contributors: Gianpaolo Racca, Ghislain Picard, Marco Wandschneider, Chris Tobin and Andrew Eddie.
  16. *
  17. * @package Joomla.Framework
  18. * @subpackage Filter
  19. * @since 1.5
  20. */
  21. class JFilterInput extends JObject
  22. {
  23. /**
  24. * @var array An array of permitted tags.
  25. */
  26. var $tagsArray;
  27. /**
  28. * @var array An array of permitted tag attributes.
  29. */
  30. var $attrArray; // default = empty array
  31. /**
  32. * @var int WhiteList method = 0 (default), BlackList method = 1
  33. */
  34. var $tagsMethod;
  35. /**
  36. * @var int WhiteList method = 0 (default), BlackList method = 1
  37. */
  38. var $attrMethod;
  39. /**
  40. * @var int Only auto clean essentials = 0, Allow clean blacklisted tags/attr = 1
  41. */
  42. var $xssAuto;
  43. /**
  44. * @var array A list of the default blacklisted tags.
  45. */
  46. var $tagBlacklist = array ('applet', 'body', 'bgsound', 'base', 'basefont', 'embed', 'frame', 'frameset', 'head', 'html', 'id', 'iframe', 'ilayer', 'layer', 'link', 'meta', 'name', 'object', 'script', 'style', 'title', 'xml');
  47. /**
  48. * @var array A list of the default blacklisted tag attributes.
  49. */
  50. var $attrBlacklist = array ('action', 'background', 'codebase', 'dynsrc', 'lowsrc'); // also will strip ALL event handlers
  51. /**
  52. * Constructor for inputFilter class. Only first parameter is required.
  53. *
  54. * @access protected
  55. * @param array List of user-defined tags
  56. * @param array List of user-defined attributes
  57. * @param int WhiteList method = 0, BlackList method = 1
  58. * @param int WhiteList method = 0, BlackList method = 1
  59. * @param int Only auto clean essentials = 0, Allow clean blacklisted tags/attr = 1
  60. * @since 1.5
  61. */
  62. function __construct($tagsArray = array(), $attrArray = array(), $tagsMethod = 0, $attrMethod = 0, $xssAuto = 1)
  63. {
  64. // Make sure user defined arrays are in lowercase
  65. $tagsArray = array_map('strtolower', (array) $tagsArray);
  66. $attrArray = array_map('strtolower', (array) $attrArray);
  67. // Assign member variables
  68. $this->tagsArray = $tagsArray;
  69. $this->attrArray = $attrArray;
  70. $this->tagsMethod = $tagsMethod;
  71. $this->attrMethod = $attrMethod;
  72. $this->xssAuto = $xssAuto;
  73. }
  74. /**
  75. * Returns an input filter object, only creating it if it doesn't already exist.
  76. *
  77. * @param array List of user-defined tags
  78. * @param array List of user-defined attributes
  79. * @param int WhiteList method = 0, BlackList method = 1
  80. * @param int WhiteList method = 0, BlackList method = 1
  81. * @param int Only auto clean essentials = 0, Allow clean blacklisted tags/attr = 1
  82. * @return object The JFilterInput object.
  83. * @since 1.5
  84. */
  85. public static function getInstance($tagsArray = array(), $attrArray = array(), $tagsMethod = 0, $attrMethod = 0, $xssAuto = 1)
  86. {
  87. static $instances;
  88. $sig = md5(serialize(array($tagsArray,$attrArray,$tagsMethod,$attrMethod,$xssAuto)));
  89. if (!isset ($instances)) {
  90. $instances = array();
  91. }
  92. if (empty ($instances[$sig])) {
  93. $instances[$sig] = new JFilterInput($tagsArray, $attrArray, $tagsMethod, $attrMethod, $xssAuto);
  94. }
  95. return $instances[$sig];
  96. }
  97. /**
  98. * Method to be called by another php script. Processes for XSS and
  99. * specified bad code.
  100. *
  101. * @param mixed $source Input string/array-of-string to be 'cleaned'
  102. * @param string $type Return type for the variable (INT, FLOAT, BOOLEAN, WORD, ALNUM, CMD, BASE64, STRING, ARRAY, PATH, NONE)
  103. * @return mixed 'Cleaned' version of input parameter
  104. * @since 1.5
  105. * @static
  106. */
  107. public function clean($source, $type='string')
  108. {
  109. // Handle the type constraint
  110. switch (strtoupper($type))
  111. {
  112. case 'INT' :
  113. case 'INTEGER' :
  114. // Only use the first integer value
  115. preg_match('/-?[0-9]+/', (string) $source, $matches);
  116. $result = @ (int) $matches[0];
  117. break;
  118. case 'FLOAT' :
  119. case 'DOUBLE' :
  120. // Only use the first floating point value
  121. preg_match('/-?[0-9]+(\.[0-9]+)?/', (string) $source, $matches);
  122. $result = @ (float) $matches[0];
  123. break;
  124. case 'BOOL' :
  125. case 'BOOLEAN' :
  126. $result = (bool) $source;
  127. break;
  128. case 'WORD' :
  129. $result = (string) preg_replace('/[^A-Z_]/i', '', $source);
  130. break;
  131. case 'ALNUM' :
  132. $result = (string) preg_replace('/[^A-Z0-9]/i', '', $source);
  133. break;
  134. case 'CMD' :
  135. $result = (string) preg_replace('/[^A-Z0-9_\.-]/i', '', $source);
  136. $result = ltrim($result, '.');
  137. break;
  138. case 'BASE64' :
  139. $result = (string) preg_replace('/[^A-Z0-9\/+=]/i', '', $source);
  140. break;
  141. case 'STRING' :
  142. $result = (string) $this->_remove($this->_decode((string) $source));
  143. break;
  144. case 'ARRAY' :
  145. $result = (array) $source;
  146. break;
  147. case 'PATH' :
  148. $pattern = '/^[A-Za-z0-9_-]+[A-Za-z0-9_\.-]*([\\\\\/][A-Za-z0-9_-]+[A-Za-z0-9_\.-]*)*$/';
  149. preg_match($pattern, (string) $source, $matches);
  150. $result = @ (string) $matches[0];
  151. break;
  152. case 'USERNAME' :
  153. $result = (string) preg_replace('/[\x00-\x1F\x7F<>"\'%&]/', '', $source);
  154. break;
  155. default :
  156. // Are we dealing with an array?
  157. if (is_array($source))
  158. {
  159. foreach ($source as $key => $value)
  160. {
  161. // filter element for XSS and other 'bad' code etc.
  162. if (is_string($value)) {
  163. $source[$key] = $this->_remove($this->_decode($value));
  164. }
  165. }
  166. $result = $source;
  167. }
  168. else
  169. {
  170. // Or a string?
  171. if (is_string($source) && !empty ($source)) {
  172. // filter source for XSS and other 'bad' code etc.
  173. $result = $this->_remove($this->_decode($source));
  174. }
  175. else {
  176. // Not an array or string.. return the passed parameter
  177. $result = $source;
  178. }
  179. }
  180. break;
  181. }
  182. return $result;
  183. }
  184. /**
  185. * Function to determine if contents of an attribute is safe
  186. *
  187. * @param array A 2 element array for attributes name,value
  188. * @return boolean True if bad code is detected
  189. * @since 1.5
  190. */
  191. public static function checkAttribute($attrSubSet)
  192. {
  193. $attrSubSet[0] = strtolower($attrSubSet[0]);
  194. $attrSubSet[1] = strtolower($attrSubSet[1]);
  195. return (((strpos($attrSubSet[1], 'expression') !== false) && ($attrSubSet[0]) == 'style') || (strpos($attrSubSet[1], 'javascript:') !== false) || (strpos($attrSubSet[1], 'behaviour:') !== false) || (strpos($attrSubSet[1], 'vbscript:') !== false) || (strpos($attrSubSet[1], 'mocha:') !== false) || (strpos($attrSubSet[1], 'livescript:') !== false));
  196. }
  197. /**
  198. * Internal method to iteratively remove all unwanted tags and attributes
  199. *
  200. * @param string Input string to be 'cleaned'
  201. * @return string 'Cleaned' version of input parameter
  202. * @since 1.5
  203. */
  204. protected function _remove($source)
  205. {
  206. $loopCounter = 0;
  207. // Iteration provides nested tag protection
  208. while ($source != $this->_cleanTags($source)) {
  209. $source = $this->_cleanTags($source);
  210. $loopCounter ++;
  211. }
  212. return $source;
  213. }
  214. /**
  215. * Internal method to strip a string of certain tags
  216. *
  217. * @param string Input string to be 'cleaned'
  218. * @return string 'Cleaned' version of input parameter
  219. * @since 1.5
  220. */
  221. protected function _cleanTags($source)
  222. {
  223. // In the beginning we don't really have a tag, so everything is postTag
  224. $preTag = null;
  225. $postTag = $source;
  226. $currentSpace = false;
  227. $attr = ''; // moffats: setting to null due to issues in migration system - undefined variable errors
  228. // Is there a tag? If so it will certainly start with a '<'
  229. $tagOpen_start = strpos($source, '<');
  230. while ($tagOpen_start !== false) {
  231. // Get some information about the tag we are processing
  232. $preTag .= substr($postTag, 0, $tagOpen_start);
  233. $postTag = substr($postTag, $tagOpen_start);
  234. $fromTagOpen = substr($postTag, 1);
  235. $tagOpen_end = strpos($fromTagOpen, '>');
  236. // Let's catch any non-terminated tags and skip over them
  237. if ($tagOpen_end === false) {
  238. $postTag = substr($postTag, $tagOpen_start +1);
  239. $tagOpen_start = strpos($postTag, '<');
  240. continue;
  241. }
  242. // Do we have a nested tag?
  243. $tagOpen_nested = strpos($fromTagOpen, '<');
  244. $tagOpen_nested_end = strpos(substr($postTag, $tagOpen_end), '>');
  245. if (($tagOpen_nested !== false) && ($tagOpen_nested < $tagOpen_end)) {
  246. $preTag .= substr($postTag, 0, ($tagOpen_nested +1));
  247. $postTag = substr($postTag, ($tagOpen_nested +1));
  248. $tagOpen_start = strpos($postTag, '<');
  249. continue;
  250. }
  251. // Lets get some information about our tag and setup attribute pairs
  252. $tagOpen_nested = (strpos($fromTagOpen, '<') + $tagOpen_start +1);
  253. $currentTag = substr($fromTagOpen, 0, $tagOpen_end);
  254. $tagLength = strlen($currentTag);
  255. $tagLeft = $currentTag;
  256. $attrSet = array ();
  257. $currentSpace = strpos($tagLeft, ' ');
  258. // Are we an open tag or a close tag?
  259. if (substr($currentTag, 0, 1) == '/') {
  260. // Close Tag
  261. $isCloseTag = true;
  262. list ($tagName) = explode(' ', $currentTag);
  263. $tagName = substr($tagName, 1);
  264. } else {
  265. // Open Tag
  266. $isCloseTag = false;
  267. list ($tagName) = explode(' ', $currentTag);
  268. }
  269. /*
  270. * Exclude all "non-regular" tagnames
  271. * OR no tagname
  272. * OR remove if xssauto is on and tag is blacklisted
  273. */
  274. if ((!preg_match("/^[a-z][a-z0-9]*$/i", $tagName)) || (!$tagName) || ((in_array(strtolower($tagName), $this->tagBlacklist)) && ($this->xssAuto))) {
  275. $postTag = substr($postTag, ($tagLength +2));
  276. $tagOpen_start = strpos($postTag, '<');
  277. // Strip tag
  278. continue;
  279. }
  280. /*
  281. * Time to grab any attributes from the tag... need this section in
  282. * case attributes have spaces in the values.
  283. */
  284. while ($currentSpace !== false) {
  285. $attr = '';
  286. $fromSpace = substr($tagLeft, ($currentSpace +1));
  287. $nextSpace = strpos($fromSpace, ' ');
  288. $openQuotes = strpos($fromSpace, '"');
  289. $closeQuotes = strpos(substr($fromSpace, ($openQuotes +1)), '"') + $openQuotes +1;
  290. // Do we have an attribute to process? [check for equal sign]
  291. if (strpos($fromSpace, '=') !== false) {
  292. /*
  293. * If the attribute value is wrapped in quotes we need to
  294. * grab the substring from the closing quote, otherwise grab
  295. * till the next space
  296. */
  297. if (($openQuotes !== false) && (strpos(substr($fromSpace, ($openQuotes +1)), '"') !== false)) {
  298. $attr = substr($fromSpace, 0, ($closeQuotes +1));
  299. } else {
  300. $attr = substr($fromSpace, 0, $nextSpace);
  301. }
  302. } else {
  303. /*
  304. * No more equal signs so add any extra text in the tag into
  305. * the attribute array [eg. checked]
  306. */
  307. if ($fromSpace != '/') {
  308. $attr = substr($fromSpace, 0, $nextSpace);
  309. }
  310. }
  311. // Last Attribute Pair
  312. if (!$attr && $fromSpace != '/') {
  313. $attr = $fromSpace;
  314. }
  315. // Add attribute pair to the attribute array
  316. $attrSet[] = $attr;
  317. // Move search point and continue iteration
  318. $tagLeft = substr($fromSpace, strlen($attr));
  319. $currentSpace = strpos($tagLeft, ' ');
  320. }
  321. // Is our tag in the user input array?
  322. $tagFound = in_array(strtolower($tagName), $this->tagsArray);
  323. // If the tag is allowed lets append it to the output string
  324. if ((!$tagFound && $this->tagsMethod) || ($tagFound && !$this->tagsMethod)) {
  325. // Reconstruct tag with allowed attributes
  326. if (!$isCloseTag) {
  327. // Open or Single tag
  328. $attrSet = $this->_cleanAttributes($attrSet);
  329. $preTag .= '<'.$tagName;
  330. for ($i = 0; $i < count($attrSet); $i ++) {
  331. $preTag .= ' '.$attrSet[$i];
  332. }
  333. // Reformat single tags to XHTML
  334. if (strpos($fromTagOpen, '</'.$tagName)) {
  335. $preTag .= '>';
  336. } else {
  337. $preTag .= ' />';
  338. }
  339. } else {
  340. // Closing Tag
  341. $preTag .= '</'.$tagName.'>';
  342. }
  343. }
  344. // Find next tag's start and continue iteration
  345. $postTag = substr($postTag, ($tagLength +2));
  346. $tagOpen_start = strpos($postTag, '<');
  347. }
  348. // Append any code after the end of tags and return
  349. if ($postTag != '<') {
  350. $preTag .= $postTag;
  351. }
  352. return $preTag;
  353. }
  354. /**
  355. * Internal method to strip a tag of certain attributes
  356. *
  357. * @param array Array of attribute pairs to filter
  358. * @return array Filtered array of attribute pairs
  359. * @since 1.5
  360. */
  361. protected function _cleanAttributes($attrSet)
  362. {
  363. // Initialise variables.
  364. $newSet = array();
  365. // Iterate through attribute pairs
  366. for ($i = 0; $i < count($attrSet); $i ++) {
  367. // Skip blank spaces
  368. if (!$attrSet[$i]) {
  369. continue;
  370. }
  371. // Split into name/value pairs
  372. $attrSubSet = explode('=', trim($attrSet[$i]), 2);
  373. list ($attrSubSet[0]) = explode(' ', $attrSubSet[0]);
  374. /*
  375. * Remove all "non-regular" attribute names
  376. * AND blacklisted attributes
  377. */
  378. if ((!preg_match('/[a-z]*$/i', $attrSubSet[0])) || (($this->xssAuto) && ((in_array(strtolower($attrSubSet[0]), $this->attrBlacklist)) || (substr($attrSubSet[0], 0, 2) == 'on')))) {
  379. continue;
  380. }
  381. // XSS attribute value filtering
  382. if ($attrSubSet[1]) {
  383. // strips unicode, hex, etc
  384. $attrSubSet[1] = str_replace('&#', '', $attrSubSet[1]);
  385. // strip normal newline within attr value
  386. $attrSubSet[1] = preg_replace('/[\n\r]/', '', $attrSubSet[1]);
  387. // strip double quotes
  388. $attrSubSet[1] = str_replace('"', '', $attrSubSet[1]);
  389. // convert single quotes from either side to doubles (Single quotes shouldn't be used to pad attr value)
  390. if ((substr($attrSubSet[1], 0, 1) == "'") && (substr($attrSubSet[1], (strlen($attrSubSet[1]) - 1), 1) == "'")) {
  391. $attrSubSet[1] = substr($attrSubSet[1], 1, (strlen($attrSubSet[1]) - 2));
  392. }
  393. // strip slashes
  394. $attrSubSet[1] = stripslashes($attrSubSet[1]);
  395. }
  396. // Autostrip script tags
  397. if (JFilterInput::checkAttribute($attrSubSet)) {
  398. continue;
  399. }
  400. // Is our attribute in the user input array?
  401. $attrFound = in_array(strtolower($attrSubSet[0]), $this->attrArray);
  402. // If the tag is allowed lets keep it
  403. if ((!$attrFound && $this->attrMethod) || ($attrFound && !$this->attrMethod)) {
  404. // Does the attribute have a value?
  405. if ($attrSubSet[1]) {
  406. $newSet[] = $attrSubSet[0].'="'.$attrSubSet[1].'"';
  407. } else if ($attrSubSet[1] == "0") {
  408. /*
  409. * Special Case
  410. * Is the value 0?
  411. */
  412. $newSet[] = $attrSubSet[0].'="0"';
  413. } else {
  414. $newSet[] = $attrSubSet[0].'="'.$attrSubSet[0].'"';
  415. }
  416. }
  417. }
  418. return $newSet;
  419. }
  420. /**
  421. * Try to convert to plaintext
  422. *
  423. * @param string
  424. * @return string Plaintext string
  425. * @since 1.5
  426. */
  427. protected function _decode($source)
  428. {
  429. // entity decode
  430. $trans_tbl = get_html_translation_table(HTML_ENTITIES);
  431. foreach($trans_tbl as $k => $v) {
  432. $ttr[$v] = utf8_encode($k);
  433. }
  434. $source = strtr($source, $ttr);
  435. // convert decimal
  436. $source = preg_replace('/&#(\d+);/me', "utf8_encode(chr(\\1))", $source); // decimal notation
  437. // convert hex
  438. $source = preg_replace('/&#x([a-f0-9]+);/mei', "utf8_encode(chr(0x\\1))", $source); // hex notation
  439. return $source;
  440. }
  441. }