/libraries/joomla/filter/input.php

https://gitlab.com/vitaliylukin91/text · PHP · 1089 lines · 616 code · 137 blank · 336 comment · 106 complexity · 497aca35b4091ec71a3d15fa04e6592f MD5 · raw file

  1. <?php
  2. /**
  3. * @package Joomla.Platform
  4. * @subpackage Filter
  5. *
  6. * @copyright Copyright (C) 2005 - 2015 Open Source Matters, Inc. All rights reserved.
  7. * @license GNU General Public License version 2 or later; see LICENSE
  8. */
  9. defined('JPATH_PLATFORM') or die;
  10. /**
  11. * JFilterInput is a class for filtering input from any data source
  12. *
  13. * Forked from the php input filter library by: Daniel Morris <dan@rootcube.com>
  14. * Original Contributors: Gianpaolo Racca, Ghislain Picard, Marco Wandschneider, Chris Tobin and Andrew Eddie.
  15. *
  16. * @since 11.1
  17. */
  18. class JFilterInput
  19. {
  20. /**
  21. * A container for JFilterInput instances.
  22. *
  23. * @var array
  24. * @since 11.3
  25. */
  26. protected static $instances = array();
  27. /**
  28. * The array of permitted tags (white list).
  29. *
  30. * @var array
  31. * @since 11.1
  32. */
  33. public $tagsArray;
  34. /**
  35. * The array of permitted tag attributes (white list).
  36. *
  37. * @var array
  38. * @since 11.1
  39. */
  40. public $attrArray;
  41. /**
  42. * The method for sanitising tags: WhiteList method = 0 (default), BlackList method = 1
  43. *
  44. * @var integer
  45. * @since 11.1
  46. */
  47. public $tagsMethod;
  48. /**
  49. * The method for sanitising attributes: WhiteList method = 0 (default), BlackList method = 1
  50. *
  51. * @var integer
  52. * @since 11.1
  53. */
  54. public $attrMethod;
  55. /**
  56. * A flag for XSS checks. Only auto clean essentials = 0, Allow clean blacklisted tags/attr = 1
  57. *
  58. * @var integer
  59. * @since 11.1
  60. */
  61. public $xssAuto;
  62. /**
  63. * The list of the default blacklisted tags.
  64. *
  65. * @var array
  66. * @since 11.1
  67. */
  68. public $tagBlacklist = array(
  69. 'applet',
  70. 'body',
  71. 'bgsound',
  72. 'base',
  73. 'basefont',
  74. 'embed',
  75. 'frame',
  76. 'frameset',
  77. 'head',
  78. 'html',
  79. 'id',
  80. 'iframe',
  81. 'ilayer',
  82. 'layer',
  83. 'link',
  84. 'meta',
  85. 'name',
  86. 'object',
  87. 'script',
  88. 'style',
  89. 'title',
  90. 'xml'
  91. );
  92. /**
  93. * The list of the default blacklisted tag attributes. All event handlers implicit.
  94. *
  95. * @var array
  96. * @since 11.1
  97. */
  98. public $attrBlacklist = array(
  99. 'action',
  100. 'background',
  101. 'codebase',
  102. 'dynsrc',
  103. 'lowsrc'
  104. );
  105. /**
  106. * Constructor for inputFilter class. Only first parameter is required.
  107. *
  108. * @param array $tagsArray List of user-defined tags
  109. * @param array $attrArray List of user-defined attributes
  110. * @param integer $tagsMethod WhiteList method = 0, BlackList method = 1
  111. * @param integer $attrMethod WhiteList method = 0, BlackList method = 1
  112. * @param integer $xssAuto Only auto clean essentials = 0, Allow clean blacklisted tags/attr = 1
  113. *
  114. * @since 11.1
  115. */
  116. public function __construct($tagsArray = array(), $attrArray = array(), $tagsMethod = 0, $attrMethod = 0, $xssAuto = 1)
  117. {
  118. // Make sure user defined arrays are in lowercase
  119. $tagsArray = array_map('strtolower', (array) $tagsArray);
  120. $attrArray = array_map('strtolower', (array) $attrArray);
  121. // Assign member variables
  122. $this->tagsArray = $tagsArray;
  123. $this->attrArray = $attrArray;
  124. $this->tagsMethod = $tagsMethod;
  125. $this->attrMethod = $attrMethod;
  126. $this->xssAuto = $xssAuto;
  127. }
  128. /**
  129. * Returns an input filter object, only creating it if it doesn't already exist.
  130. *
  131. * @param array $tagsArray List of user-defined tags
  132. * @param array $attrArray List of user-defined attributes
  133. * @param integer $tagsMethod WhiteList method = 0, BlackList method = 1
  134. * @param integer $attrMethod WhiteList method = 0, BlackList method = 1
  135. * @param integer $xssAuto Only auto clean essentials = 0, Allow clean blacklisted tags/attr = 1
  136. *
  137. * @return JFilterInput The JFilterInput object.
  138. *
  139. * @since 11.1
  140. */
  141. public static function &getInstance($tagsArray = array(), $attrArray = array(), $tagsMethod = 0, $attrMethod = 0, $xssAuto = 1)
  142. {
  143. $sig = md5(serialize(array($tagsArray, $attrArray, $tagsMethod, $attrMethod, $xssAuto)));
  144. if (empty(self::$instances[$sig]))
  145. {
  146. self::$instances[$sig] = new JFilterInput($tagsArray, $attrArray, $tagsMethod, $attrMethod, $xssAuto);
  147. }
  148. return self::$instances[$sig];
  149. }
  150. /**
  151. * Method to be called by another php script. Processes for XSS and
  152. * specified bad code.
  153. *
  154. * @param mixed $source Input string/array-of-string to be 'cleaned'
  155. * @param string $type The return type for the variable:
  156. * INT: An integer,
  157. * UINT: An unsigned integer,
  158. * FLOAT: A floating point number,
  159. * BOOLEAN: A boolean value,
  160. * WORD: A string containing A-Z or underscores only (not case sensitive),
  161. * ALNUM: A string containing A-Z or 0-9 only (not case sensitive),
  162. * CMD: A string containing A-Z, 0-9, underscores, periods or hyphens (not case sensitive),
  163. * BASE64: A string containing A-Z, 0-9, forward slashes, plus or equals (not case sensitive),
  164. * STRING: A fully decoded and sanitised string (default),
  165. * HTML: A sanitised string,
  166. * ARRAY: An array,
  167. * PATH: A sanitised file path,
  168. * TRIM: A string trimmed from normal, non-breaking and multibyte spaces
  169. * USERNAME: Do not use (use an application specific filter),
  170. * RAW: The raw string is returned with no filtering,
  171. * unknown: An unknown filter will act like STRING. If the input is an array it will return an
  172. * array of fully decoded and sanitised strings.
  173. *
  174. * @return mixed 'Cleaned' version of input parameter
  175. *
  176. * @since 11.1
  177. */
  178. public function clean($source, $type = 'string')
  179. {
  180. // Handle the type constraint
  181. switch (strtoupper($type))
  182. {
  183. case 'INT':
  184. case 'INTEGER':
  185. // Only use the first integer value
  186. preg_match('/-?[0-9]+/', (string) $source, $matches);
  187. $result = @ (int) $matches[0];
  188. break;
  189. case 'UINT':
  190. // Only use the first integer value
  191. preg_match('/-?[0-9]+/', (string) $source, $matches);
  192. $result = @ abs((int) $matches[0]);
  193. break;
  194. case 'FLOAT':
  195. case 'DOUBLE':
  196. // Only use the first floating point value
  197. preg_match('/-?[0-9]+(\.[0-9]+)?/', (string) $source, $matches);
  198. $result = @ (float) $matches[0];
  199. break;
  200. case 'BOOL':
  201. case 'BOOLEAN':
  202. $result = (bool) $source;
  203. break;
  204. case 'WORD':
  205. $result = (string) preg_replace('/[^A-Z_]/i', '', $source);
  206. break;
  207. case 'ALNUM':
  208. $result = (string) preg_replace('/[^A-Z0-9]/i', '', $source);
  209. break;
  210. case 'CMD':
  211. $result = (string) preg_replace('/[^A-Z0-9_\.-]/i', '', $source);
  212. $result = ltrim($result, '.');
  213. break;
  214. case 'BASE64':
  215. $result = (string) preg_replace('/[^A-Z0-9\/+=]/i', '', $source);
  216. break;
  217. case 'STRING':
  218. $result = (string) $this->_remove($this->_decode((string) $source));
  219. break;
  220. case 'HTML':
  221. $result = (string) $this->_remove((string) $source);
  222. break;
  223. case 'ARRAY':
  224. $result = (array) $source;
  225. break;
  226. case 'PATH':
  227. $pattern = '/^[A-Za-z0-9_\/-]+[A-Za-z0-9_\.-]*([\\\\\/][A-Za-z0-9_-]+[A-Za-z0-9_\.-]*)*$/';
  228. preg_match($pattern, (string) $source, $matches);
  229. $result = @ (string) $matches[0];
  230. break;
  231. case 'TRIM':
  232. $result = (string) trim($source);
  233. $result = JString::trim($result, chr(0xE3) . chr(0x80) . chr(0x80));
  234. $result = JString::trim($result, chr(0xC2) . chr(0xA0));
  235. break;
  236. case 'USERNAME':
  237. $result = (string) preg_replace('/[\x00-\x1F\x7F<>"\'%&]/', '', $source);
  238. break;
  239. case 'RAW':
  240. $result = $source;
  241. break;
  242. default:
  243. // Are we dealing with an array?
  244. if (is_array($source))
  245. {
  246. foreach ($source as $key => $value)
  247. {
  248. // Filter element for XSS and other 'bad' code etc.
  249. if (is_string($value))
  250. {
  251. $source[$key] = $this->_remove($this->_decode($value));
  252. }
  253. }
  254. $result = $source;
  255. }
  256. else
  257. {
  258. // Or a string?
  259. if (is_string($source) && !empty($source))
  260. {
  261. // Filter source for XSS and other 'bad' code etc.
  262. $result = $this->_remove($this->_decode($source));
  263. }
  264. else
  265. {
  266. // Not an array or string.. return the passed parameter
  267. $result = $source;
  268. }
  269. }
  270. break;
  271. }
  272. return $result;
  273. }
  274. /**
  275. * Function to determine if contents of an attribute are safe
  276. *
  277. * @param array $attrSubSet A 2 element array for attribute's name, value
  278. *
  279. * @return boolean True if bad code is detected
  280. *
  281. * @since 11.1
  282. */
  283. public static function checkAttribute($attrSubSet)
  284. {
  285. $attrSubSet[0] = strtolower($attrSubSet[0]);
  286. $attrSubSet[1] = strtolower($attrSubSet[1]);
  287. return (((strpos($attrSubSet[1], 'expression') !== false) && ($attrSubSet[0]) == 'style') || (strpos($attrSubSet[1], 'javascript:') !== false) ||
  288. (strpos($attrSubSet[1], 'behaviour:') !== false) || (strpos($attrSubSet[1], 'vbscript:') !== false) ||
  289. (strpos($attrSubSet[1], 'mocha:') !== false) || (strpos($attrSubSet[1], 'livescript:') !== false));
  290. }
  291. /**
  292. * Checks an uploaded for suspicious naming and potential PHP contents which could indicate a hacking attempt.
  293. *
  294. * The options you can define are:
  295. * null_byte Prevent files with a null byte in their name (buffer overflow attack)
  296. * forbidden_extensions Do not allow these strings anywhere in the file's extension
  297. * php_tag_in_content Do not allow <?php tag in content
  298. * shorttag_in_content Do not allow short tag <? in content
  299. * shorttag_extensions Which file extensions to scan for short tags in content
  300. * fobidden_ext_in_content Do not allow forbidden_extensions anywhere in content
  301. * php_ext_content_extensions Which file extensions to scan for .php in content
  302. *
  303. * This code is an adaptation and improvement of Admin Tools' UploadShield feature,
  304. * relicensed and contributed by its author.
  305. *
  306. * @param array $file An uploaded file descriptor
  307. * @param array $options The scanner options (see the code for details)
  308. *
  309. * @return boolean True of the file is safe
  310. *
  311. * @since 3.4
  312. */
  313. public static function isSafeFile($file, $options = array())
  314. {
  315. $defaultOptions = array(
  316. // Null byte in file name
  317. 'null_byte' => true,
  318. // Forbidden string in extension (e.g. php matched .php, .xxx.php, .php.xxx and so on)
  319. 'forbidden_extensions' => array(
  320. 'php', 'phps', 'php5', 'php3', 'php4', 'inc', 'pl', 'cgi', 'fcgi', 'java', 'jar', 'py'
  321. ),
  322. // <?php tag in file contents
  323. 'php_tag_in_content' => true,
  324. // <? tag in file contents
  325. 'shorttag_in_content' => true,
  326. // Which file extensions to scan for short tags
  327. 'shorttag_extensions' => array(
  328. 'inc', 'phps', 'class', 'php3', 'php4', 'php5', 'txt', 'dat', 'tpl', 'tmpl'
  329. ),
  330. // Forbidden extensions anywhere in the content
  331. 'fobidden_ext_in_content' => true,
  332. // Which file extensions to scan for .php in the content
  333. 'php_ext_content_extensions' => array('zip', 'rar', 'tar', 'gz', 'tgz', 'bz2', 'tbz', 'jpa'),
  334. );
  335. $options = array_merge($defaultOptions, $options);
  336. // Make sure we can scan nested file descriptors
  337. $descriptors = $file;
  338. if (isset($file['name']) && isset($file['tmp_name']))
  339. {
  340. $descriptors = self::decodeFileData(
  341. array(
  342. $file['name'],
  343. $file['type'],
  344. $file['tmp_name'],
  345. $file['error'],
  346. $file['size']
  347. )
  348. );
  349. }
  350. // Handle non-nested descriptors (single files)
  351. if (isset($descriptors['name']))
  352. {
  353. $descriptors = array($descriptors);
  354. }
  355. // Scan all descriptors detected
  356. foreach ($descriptors as $fileDescriptor)
  357. {
  358. if (!isset($fileDescriptor['name']))
  359. {
  360. // This is a nested descriptor. We have to recurse.
  361. if (!self::isSafeFile($fileDescriptor, $options))
  362. {
  363. return false;
  364. }
  365. continue;
  366. }
  367. $tempNames = $fileDescriptor['tmp_name'];
  368. $intendedNames = $fileDescriptor['name'];
  369. if (!is_array($tempNames))
  370. {
  371. $tempNames = array($tempNames);
  372. }
  373. if (!is_array($intendedNames))
  374. {
  375. $intendedNames = array($intendedNames);
  376. }
  377. $len = count($tempNames);
  378. for ($i = 0; $i < $len; $i++)
  379. {
  380. $tempName = array_shift($tempNames);
  381. $intendedName = array_shift($intendedNames);
  382. // 1. Null byte check
  383. if ($options['null_byte'])
  384. {
  385. if (strstr($intendedName, "\x00"))
  386. {
  387. return false;
  388. }
  389. }
  390. // 2. PHP-in-extension check (.php, .php.xxx[.yyy[.zzz[...]]], .xxx[.yyy[.zzz[...]]].php)
  391. if (!empty($options['forbidden_extensions']))
  392. {
  393. $explodedName = explode('.', $intendedName);
  394. $explodedName = array_reverse($explodedName);
  395. array_pop($explodedName);
  396. array_map('strtolower', $explodedName);
  397. /*
  398. * DO NOT USE array_intersect HERE! array_intersect expects the two arrays to
  399. * be set, i.e. they should have unique values.
  400. */
  401. foreach ($options['forbidden_extensions'] as $ext)
  402. {
  403. if (in_array($ext, $explodedName))
  404. {
  405. return false;
  406. }
  407. }
  408. }
  409. // 3. File contents scanner (PHP tag in file contents)
  410. if ($options['php_tag_in_content'] || $options['shorttag_in_content']
  411. || ($options['fobidden_ext_in_content'] && !empty($options['forbidden_extensions'])))
  412. {
  413. $fp = @fopen($tempName, 'r');
  414. if ($fp !== false)
  415. {
  416. $data = '';
  417. while (!feof($fp))
  418. {
  419. $buffer = @fread($fp, 131072);
  420. $data .= $buffer;
  421. if ($options['php_tag_in_content'] && strstr($buffer, '<?php'))
  422. {
  423. return false;
  424. }
  425. if ($options['shorttag_in_content'])
  426. {
  427. $suspiciousExtensions = $options['shorttag_extensions'];
  428. if (empty($suspiciousExtensions))
  429. {
  430. $suspiciousExtensions = array(
  431. 'inc', 'phps', 'class', 'php3', 'php4', 'txt', 'dat', 'tpl', 'tmpl'
  432. );
  433. }
  434. /*
  435. * DO NOT USE array_intersect HERE! array_intersect expects the two arrays to
  436. * be set, i.e. they should have unique values.
  437. */
  438. $collide = false;
  439. foreach ($suspiciousExtensions as $ext)
  440. {
  441. if (in_array($ext, $explodedName))
  442. {
  443. $collide = true;
  444. break;
  445. }
  446. }
  447. if ($collide)
  448. {
  449. // These are suspicious text files which may have the short tag (<?) in them
  450. if (strstr($buffer, '<?'))
  451. {
  452. return false;
  453. }
  454. }
  455. }
  456. if ($options['fobidden_ext_in_content'] && !empty($options['forbidden_extensions']))
  457. {
  458. $suspiciousExtensions = $options['php_ext_content_extensions'];
  459. if (empty($suspiciousExtensions))
  460. {
  461. $suspiciousExtensions = array(
  462. 'zip', 'rar', 'tar', 'gz', 'tgz', 'bz2', 'tbz', 'jpa'
  463. );
  464. }
  465. /*
  466. * DO NOT USE array_intersect HERE! array_intersect expects the two arrays to
  467. * be set, i.e. they should have unique values.
  468. */
  469. $collide = false;
  470. foreach ($suspiciousExtensions as $ext)
  471. {
  472. if (in_array($ext, $explodedName))
  473. {
  474. $collide = true;
  475. break;
  476. }
  477. }
  478. if ($collide)
  479. {
  480. /*
  481. * These are suspicious text files which may have an executable
  482. * file extension in them
  483. */
  484. foreach ($options['forbidden_extensions'] as $ext)
  485. {
  486. if (strstr($buffer, '.' . $ext))
  487. {
  488. return false;
  489. }
  490. }
  491. }
  492. }
  493. /*
  494. * This makes sure that we don't accidentally skip a <?php tag if it's across
  495. * a read boundary, even on multibyte strings
  496. */
  497. $data = substr($data, -8);
  498. }
  499. fclose($fp);
  500. }
  501. }
  502. }
  503. }
  504. return true;
  505. }
  506. /**
  507. * Method to decode a file data array.
  508. *
  509. * @param array $data The data array to decode.
  510. *
  511. * @return array
  512. *
  513. * @since 3.4
  514. */
  515. protected static function decodeFileData(array $data)
  516. {
  517. $result = array();
  518. if (is_array($data[0]))
  519. {
  520. foreach ($data[0] as $k => $v)
  521. {
  522. $result[$k] = self::decodeFileData(array($data[0][$k], $data[1][$k], $data[2][$k], $data[3][$k], $data[4][$k]));
  523. }
  524. return $result;
  525. }
  526. return array('name' => $data[0], 'type' => $data[1], 'tmp_name' => $data[2], 'error' => $data[3], 'size' => $data[4]);
  527. }
  528. /**
  529. * Internal method to iteratively remove all unwanted tags and attributes
  530. *
  531. * @param string $source Input string to be 'cleaned'
  532. *
  533. * @return string 'Cleaned' version of input parameter
  534. *
  535. * @since 11.1
  536. */
  537. protected function _remove($source)
  538. {
  539. $loopCounter = 0;
  540. // Iteration provides nested tag protection
  541. while ($source != $this->_cleanTags($source))
  542. {
  543. $source = $this->_cleanTags($source);
  544. $loopCounter++;
  545. }
  546. return $source;
  547. }
  548. /**
  549. * Internal method to strip a string of certain tags
  550. *
  551. * @param string $source Input string to be 'cleaned'
  552. *
  553. * @return string 'Cleaned' version of input parameter
  554. *
  555. * @since 11.1
  556. */
  557. protected function _cleanTags($source)
  558. {
  559. // First, pre-process this for illegal characters inside attribute values
  560. $source = $this->_escapeAttributeValues($source);
  561. // In the beginning we don't really have a tag, so everything is postTag
  562. $preTag = null;
  563. $postTag = $source;
  564. $currentSpace = false;
  565. // Setting to null to deal with undefined variables
  566. $attr = '';
  567. // Is there a tag? If so it will certainly start with a '<'.
  568. $tagOpen_start = strpos($source, '<');
  569. while ($tagOpen_start !== false)
  570. {
  571. // Get some information about the tag we are processing
  572. $preTag .= substr($postTag, 0, $tagOpen_start);
  573. $postTag = substr($postTag, $tagOpen_start);
  574. $fromTagOpen = substr($postTag, 1);
  575. $tagOpen_end = strpos($fromTagOpen, '>');
  576. // Check for mal-formed tag where we have a second '<' before the first '>'
  577. $nextOpenTag = (strlen($postTag) > $tagOpen_start) ? strpos($postTag, '<', $tagOpen_start + 1) : false;
  578. if (($nextOpenTag !== false) && ($nextOpenTag < $tagOpen_end))
  579. {
  580. // At this point we have a mal-formed tag -- remove the offending open
  581. $postTag = substr($postTag, 0, $tagOpen_start) . substr($postTag, $tagOpen_start + 1);
  582. $tagOpen_start = strpos($postTag, '<');
  583. continue;
  584. }
  585. // Let's catch any non-terminated tags and skip over them
  586. if ($tagOpen_end === false)
  587. {
  588. $postTag = substr($postTag, $tagOpen_start + 1);
  589. $tagOpen_start = strpos($postTag, '<');
  590. continue;
  591. }
  592. // Do we have a nested tag?
  593. $tagOpen_nested = strpos($fromTagOpen, '<');
  594. if (($tagOpen_nested !== false) && ($tagOpen_nested < $tagOpen_end))
  595. {
  596. $preTag .= substr($postTag, 0, ($tagOpen_nested + 1));
  597. $postTag = substr($postTag, ($tagOpen_nested + 1));
  598. $tagOpen_start = strpos($postTag, '<');
  599. continue;
  600. }
  601. // Let's get some information about our tag and setup attribute pairs
  602. $tagOpen_nested = (strpos($fromTagOpen, '<') + $tagOpen_start + 1);
  603. $currentTag = substr($fromTagOpen, 0, $tagOpen_end);
  604. $tagLength = strlen($currentTag);
  605. $tagLeft = $currentTag;
  606. $attrSet = array();
  607. $currentSpace = strpos($tagLeft, ' ');
  608. // Are we an open tag or a close tag?
  609. if (substr($currentTag, 0, 1) == '/')
  610. {
  611. // Close Tag
  612. $isCloseTag = true;
  613. list ($tagName) = explode(' ', $currentTag);
  614. $tagName = substr($tagName, 1);
  615. }
  616. else
  617. {
  618. // Open Tag
  619. $isCloseTag = false;
  620. list ($tagName) = explode(' ', $currentTag);
  621. }
  622. /*
  623. * Exclude all "non-regular" tagnames
  624. * OR no tagname
  625. * OR remove if xssauto is on and tag is blacklisted
  626. */
  627. if ((!preg_match("/^[a-z][a-z0-9]*$/i", $tagName)) || (!$tagName) || ((in_array(strtolower($tagName), $this->tagBlacklist)) && ($this->xssAuto)))
  628. {
  629. $postTag = substr($postTag, ($tagLength + 2));
  630. $tagOpen_start = strpos($postTag, '<');
  631. // Strip tag
  632. continue;
  633. }
  634. /*
  635. * Time to grab any attributes from the tag... need this section in
  636. * case attributes have spaces in the values.
  637. */
  638. while ($currentSpace !== false)
  639. {
  640. $attr = '';
  641. $fromSpace = substr($tagLeft, ($currentSpace + 1));
  642. $nextEqual = strpos($fromSpace, '=');
  643. $nextSpace = strpos($fromSpace, ' ');
  644. $openQuotes = strpos($fromSpace, '"');
  645. $closeQuotes = strpos(substr($fromSpace, ($openQuotes + 1)), '"') + $openQuotes + 1;
  646. $startAtt = '';
  647. $startAttPosition = 0;
  648. // Find position of equal and open quotes ignoring
  649. if (preg_match('#\s*=\s*\"#', $fromSpace, $matches, PREG_OFFSET_CAPTURE))
  650. {
  651. $startAtt = $matches[0][0];
  652. $startAttPosition = $matches[0][1];
  653. $closeQuotes = strpos(substr($fromSpace, ($startAttPosition + strlen($startAtt))), '"') + $startAttPosition + strlen($startAtt);
  654. $nextEqual = $startAttPosition + strpos($startAtt, '=');
  655. $openQuotes = $startAttPosition + strpos($startAtt, '"');
  656. $nextSpace = strpos(substr($fromSpace, $closeQuotes), ' ') + $closeQuotes;
  657. }
  658. // Do we have an attribute to process? [check for equal sign]
  659. if ($fromSpace != '/' && (($nextEqual && $nextSpace && $nextSpace < $nextEqual) || !$nextEqual))
  660. {
  661. if (!$nextEqual)
  662. {
  663. $attribEnd = strpos($fromSpace, '/') - 1;
  664. }
  665. else
  666. {
  667. $attribEnd = $nextSpace - 1;
  668. }
  669. // If there is an ending, use this, if not, do not worry.
  670. if ($attribEnd > 0)
  671. {
  672. $fromSpace = substr($fromSpace, $attribEnd + 1);
  673. }
  674. }
  675. if (strpos($fromSpace, '=') !== false)
  676. {
  677. // If the attribute value is wrapped in quotes we need to grab the substring from
  678. // the closing quote, otherwise grab until the next space.
  679. if (($openQuotes !== false) && (strpos(substr($fromSpace, ($openQuotes + 1)), '"') !== false))
  680. {
  681. $attr = substr($fromSpace, 0, ($closeQuotes + 1));
  682. }
  683. else
  684. {
  685. $attr = substr($fromSpace, 0, $nextSpace);
  686. }
  687. }
  688. // No more equal signs so add any extra text in the tag into the attribute array [eg. checked]
  689. else
  690. {
  691. if ($fromSpace != '/')
  692. {
  693. $attr = substr($fromSpace, 0, $nextSpace);
  694. }
  695. }
  696. // Last Attribute Pair
  697. if (!$attr && $fromSpace != '/')
  698. {
  699. $attr = $fromSpace;
  700. }
  701. // Add attribute pair to the attribute array
  702. $attrSet[] = $attr;
  703. // Move search point and continue iteration
  704. $tagLeft = substr($fromSpace, strlen($attr));
  705. $currentSpace = strpos($tagLeft, ' ');
  706. }
  707. // Is our tag in the user input array?
  708. $tagFound = in_array(strtolower($tagName), $this->tagsArray);
  709. // If the tag is allowed let's append it to the output string.
  710. if ((!$tagFound && $this->tagsMethod) || ($tagFound && !$this->tagsMethod))
  711. {
  712. // Reconstruct tag with allowed attributes
  713. if (!$isCloseTag)
  714. {
  715. // Open or single tag
  716. $attrSet = $this->_cleanAttributes($attrSet);
  717. $preTag .= '<' . $tagName;
  718. for ($i = 0, $count = count($attrSet); $i < $count; $i++)
  719. {
  720. $preTag .= ' ' . $attrSet[$i];
  721. }
  722. // Reformat single tags to XHTML
  723. if (strpos($fromTagOpen, '</' . $tagName))
  724. {
  725. $preTag .= '>';
  726. }
  727. else
  728. {
  729. $preTag .= ' />';
  730. }
  731. }
  732. // Closing tag
  733. else
  734. {
  735. $preTag .= '</' . $tagName . '>';
  736. }
  737. }
  738. // Find next tag's start and continue iteration
  739. $postTag = substr($postTag, ($tagLength + 2));
  740. $tagOpen_start = strpos($postTag, '<');
  741. }
  742. // Append any code after the end of tags and return
  743. if ($postTag != '<')
  744. {
  745. $preTag .= $postTag;
  746. }
  747. return $preTag;
  748. }
  749. /**
  750. * Internal method to strip a tag of certain attributes
  751. *
  752. * @param array $attrSet Array of attribute pairs to filter
  753. *
  754. * @return array Filtered array of attribute pairs
  755. *
  756. * @since 11.1
  757. */
  758. protected function _cleanAttributes($attrSet)
  759. {
  760. $newSet = array();
  761. $count = count($attrSet);
  762. // Iterate through attribute pairs
  763. for ($i = 0; $i < $count; $i++)
  764. {
  765. // Skip blank spaces
  766. if (!$attrSet[$i])
  767. {
  768. continue;
  769. }
  770. // Split into name/value pairs
  771. $attrSubSet = explode('=', trim($attrSet[$i]), 2);
  772. // Take the last attribute in case there is an attribute with no value
  773. $attrSubSet_0 = explode(' ', trim($attrSubSet[0]));
  774. $attrSubSet[0] = array_pop($attrSubSet_0);
  775. // Remove all "non-regular" attribute names
  776. // AND blacklisted attributes
  777. if ((!preg_match('/[a-z]*$/i', $attrSubSet[0]))
  778. || (($this->xssAuto) && ((in_array(strtolower($attrSubSet[0]), $this->attrBlacklist))
  779. || (substr($attrSubSet[0], 0, 2) == 'on'))))
  780. {
  781. continue;
  782. }
  783. // XSS attribute value filtering
  784. if (isset($attrSubSet[1]))
  785. {
  786. // Trim leading and trailing spaces
  787. $attrSubSet[1] = trim($attrSubSet[1]);
  788. // Strips unicode, hex, etc
  789. $attrSubSet[1] = str_replace('&#', '', $attrSubSet[1]);
  790. // Strip normal newline within attr value
  791. $attrSubSet[1] = preg_replace('/[\n\r]/', '', $attrSubSet[1]);
  792. // Strip double quotes
  793. $attrSubSet[1] = str_replace('"', '', $attrSubSet[1]);
  794. // Convert single quotes from either side to doubles (Single quotes shouldn't be used to pad attr values)
  795. if ((substr($attrSubSet[1], 0, 1) == "'") && (substr($attrSubSet[1], (strlen($attrSubSet[1]) - 1), 1) == "'"))
  796. {
  797. $attrSubSet[1] = substr($attrSubSet[1], 1, (strlen($attrSubSet[1]) - 2));
  798. }
  799. // Strip slashes
  800. $attrSubSet[1] = stripslashes($attrSubSet[1]);
  801. }
  802. else
  803. {
  804. continue;
  805. }
  806. // Autostrip script tags
  807. if (self::checkAttribute($attrSubSet))
  808. {
  809. continue;
  810. }
  811. // Is our attribute in the user input array?
  812. $attrFound = in_array(strtolower($attrSubSet[0]), $this->attrArray);
  813. // If the tag is allowed lets keep it
  814. if ((!$attrFound && $this->attrMethod) || ($attrFound && !$this->attrMethod))
  815. {
  816. // Does the attribute have a value?
  817. if (empty($attrSubSet[1]) === false)
  818. {
  819. $newSet[] = $attrSubSet[0] . '="' . $attrSubSet[1] . '"';
  820. }
  821. elseif ($attrSubSet[1] === "0")
  822. {
  823. // Special Case
  824. // Is the value 0?
  825. $newSet[] = $attrSubSet[0] . '="0"';
  826. }
  827. else
  828. {
  829. // Leave empty attributes alone
  830. $newSet[] = $attrSubSet[0] . '=""';
  831. }
  832. }
  833. }
  834. return $newSet;
  835. }
  836. /**
  837. * Try to convert to plaintext
  838. *
  839. * @param string $source The source string.
  840. *
  841. * @return string Plaintext string
  842. *
  843. * @since 11.1
  844. */
  845. protected function _decode($source)
  846. {
  847. static $ttr;
  848. if (!is_array($ttr))
  849. {
  850. // Entity decode
  851. $trans_tbl = get_html_translation_table(HTML_ENTITIES, ENT_COMPAT, 'ISO-8859-1');
  852. foreach ($trans_tbl as $k => $v)
  853. {
  854. $ttr[$v] = utf8_encode($k);
  855. }
  856. }
  857. $source = strtr($source, $ttr);
  858. // Convert decimal
  859. $source = preg_replace_callback('/&#(\d+);/m', function($m)
  860. {
  861. return utf8_encode(chr($m[1]));
  862. }, $source
  863. );
  864. // Convert hex
  865. $source = preg_replace_callback('/&#x([a-f0-9]+);/mi', function($m)
  866. {
  867. return utf8_encode(chr('0x' . $m[1]));
  868. }, $source
  869. );
  870. return $source;
  871. }
  872. /**
  873. * Escape < > and " inside attribute values
  874. *
  875. * @param string $source The source string.
  876. *
  877. * @return string Filtered string
  878. *
  879. * @since 11.1
  880. */
  881. protected function _escapeAttributeValues($source)
  882. {
  883. $alreadyFiltered = '';
  884. $remainder = $source;
  885. $badChars = array('<', '"', '>');
  886. $escapedChars = array('&lt;', '&quot;', '&gt;');
  887. // Process each portion based on presence of =" and "<space>, "/>, or ">
  888. // See if there are any more attributes to process
  889. while (preg_match('#<[^>]*?=\s*?(\"|\')#s', $remainder, $matches, PREG_OFFSET_CAPTURE))
  890. {
  891. // Get the portion before the attribute value
  892. $quotePosition = $matches[0][1];
  893. $nextBefore = $quotePosition + strlen($matches[0][0]);
  894. // Figure out if we have a single or double quote and look for the matching closing quote
  895. // Closing quote should be "/>, ">, "<space>, or " at the end of the string
  896. $quote = substr($matches[0][0], -1);
  897. $pregMatch = ($quote == '"') ? '#(\"\s*/\s*>|\"\s*>|\"\s+|\"$)#' : "#(\'\s*/\s*>|\'\s*>|\'\s+|\'$)#";
  898. // Get the portion after attribute value
  899. if (preg_match($pregMatch, substr($remainder, $nextBefore), $matches, PREG_OFFSET_CAPTURE))
  900. {
  901. // We have a closing quote
  902. $nextAfter = $nextBefore + $matches[0][1];
  903. }
  904. else
  905. {
  906. // No closing quote
  907. $nextAfter = strlen($remainder);
  908. }
  909. // Get the actual attribute value
  910. $attributeValue = substr($remainder, $nextBefore, $nextAfter - $nextBefore);
  911. // Escape bad chars
  912. $attributeValue = str_replace($badChars, $escapedChars, $attributeValue);
  913. $attributeValue = $this->_stripCSSExpressions($attributeValue);
  914. $alreadyFiltered .= substr($remainder, 0, $nextBefore) . $attributeValue . $quote;
  915. $remainder = substr($remainder, $nextAfter + 1);
  916. }
  917. // At this point, we just have to return the $alreadyFiltered and the $remainder
  918. return $alreadyFiltered . $remainder;
  919. }
  920. /**
  921. * Remove CSS Expressions in the form of <property>:expression(...)
  922. *
  923. * @param string $source The source string.
  924. *
  925. * @return string Filtered string
  926. *
  927. * @since 11.1
  928. */
  929. protected function _stripCSSExpressions($source)
  930. {
  931. // Strip any comments out (in the form of /*...*/)
  932. $test = preg_replace('#\/\*.*\*\/#U', '', $source);
  933. // Test for :expression
  934. if (!stripos($test, ':expression'))
  935. {
  936. // Not found, so we are done
  937. $return = $source;
  938. }
  939. else
  940. {
  941. // At this point, we have stripped out the comments and have found :expression
  942. // Test stripped string for :expression followed by a '('
  943. if (preg_match_all('#:expression\s*\(#', $test, $matches))
  944. {
  945. // If found, remove :expression
  946. $test = str_ireplace(':expression', '', $test);
  947. $return = $test;
  948. }
  949. }
  950. return $return;
  951. }
  952. }