PageRenderTime 26ms CodeModel.GetById 19ms RepoModel.GetById 0ms app.codeStats 0ms

/vendor/zendframework/zendframework/library/Zend/Escaper/Escaper.php

https://bitbucket.org/juan_sanchez/aiyellow
PHP | 390 lines | 179 code | 38 blank | 173 comment | 35 complexity | e95ea647ffbf40d386958b2c81adb3c1 MD5 | raw file
  1. <?php
  2. /**
  3. * Zend Framework (http://framework.zend.com/)
  4. *
  5. * @link http://github.com/zendframework/zf2 for the canonical source repository
  6. * @copyright Copyright (c) 2005-2013 Zend Technologies USA Inc. (http://www.zend.com)
  7. * @license http://framework.zend.com/license/new-bsd New BSD License
  8. */
  9. namespace Zend\Escaper;
  10. use Zend\Escaper\Exception;
  11. /**
  12. * Context specific methods for use in secure output escaping
  13. */
  14. class Escaper
  15. {
  16. /**
  17. * Entity Map mapping Unicode codepoints to any available named HTML entities.
  18. *
  19. * While HTML supports far more named entities, the lowest common denominator
  20. * has become HTML5's XML Serialisation which is restricted to the those named
  21. * entities that XML supports. Using HTML entities would result in this error:
  22. * XML Parsing Error: undefined entity
  23. *
  24. * @var array
  25. */
  26. protected static $htmlNamedEntityMap = array(
  27. 34 => 'quot', // quotation mark
  28. 38 => 'amp', // ampersand
  29. 60 => 'lt', // less-than sign
  30. 62 => 'gt', // greater-than sign
  31. );
  32. /**
  33. * Current encoding for escaping. If not UTF-8, we convert strings from this encoding
  34. * pre-escaping and back to this encoding post-escaping.
  35. *
  36. * @var string
  37. */
  38. protected $encoding = 'utf-8';
  39. /**
  40. * Holds the value of the special flags passed as second parameter to
  41. * htmlspecialchars(). We modify these for PHP 5.4 to take advantage
  42. * of the new ENT_SUBSTITUTE flag for correctly dealing with invalid
  43. * UTF-8 sequences.
  44. *
  45. * @var string
  46. */
  47. protected $htmlSpecialCharsFlags = \ENT_QUOTES;
  48. /**
  49. * Static Matcher which escapes characters for HTML Attribute contexts
  50. *
  51. * @var callable
  52. */
  53. protected $htmlAttrMatcher;
  54. /**
  55. * Static Matcher which escapes characters for Javascript contexts
  56. *
  57. * @var callable
  58. */
  59. protected $jsMatcher;
  60. /**
  61. * Static Matcher which escapes characters for CSS Attribute contexts
  62. *
  63. * @var callable
  64. */
  65. protected $cssMatcher;
  66. /**
  67. * List of all encoding supported by this class
  68. *
  69. * @var array
  70. */
  71. protected $supportedEncodings = array(
  72. 'iso-8859-1', 'iso8859-1', 'iso-8859-5', 'iso8859-5',
  73. 'iso-8859-15', 'iso8859-15', 'utf-8', 'cp866',
  74. 'ibm866', '866', 'cp1251', 'windows-1251',
  75. 'win-1251', '1251', 'cp1252', 'windows-1252',
  76. '1252', 'koi8-r', 'koi8-ru', 'koi8r',
  77. 'big5', '950', 'gb2312', '936',
  78. 'big5-hkscs', 'shift_jis', 'sjis', 'sjis-win',
  79. 'cp932', '932', 'euc-jp', 'eucjp',
  80. 'eucjp-win', 'macroman'
  81. );
  82. /**
  83. * Constructor: Single parameter allows setting of global encoding for use by
  84. * the current object. If PHP 5.4 is detected, additional ENT_SUBSTITUTE flag
  85. * is set for htmlspecialchars() calls.
  86. *
  87. * @param string $encoding
  88. * @throws Exception\InvalidArgumentException
  89. */
  90. public function __construct($encoding = null)
  91. {
  92. if ($encoding !== null) {
  93. $encoding = (string) $encoding;
  94. if ($encoding === '') {
  95. throw new Exception\InvalidArgumentException(
  96. get_called_class() . ' constructor parameter does not allow a blank value'
  97. );
  98. }
  99. $encoding = strtolower($encoding);
  100. if (!in_array($encoding, $this->supportedEncodings)) {
  101. throw new Exception\InvalidArgumentException(
  102. 'Value of \'' . $encoding . '\' passed to ' . get_called_class()
  103. . ' constructor parameter is invalid. Provide an encoding supported by htmlspecialchars()'
  104. );
  105. }
  106. $this->encoding = $encoding;
  107. }
  108. if (defined('ENT_SUBSTITUTE')) {
  109. $this->htmlSpecialCharsFlags|= \ENT_SUBSTITUTE;
  110. }
  111. // set matcher callbacks
  112. $this->htmlAttrMatcher = array($this, 'htmlAttrMatcher');
  113. $this->jsMatcher = array($this, 'jsMatcher');
  114. $this->cssMatcher = array($this, 'cssMatcher');
  115. }
  116. /**
  117. * Return the encoding that all output/input is expected to be encoded in.
  118. *
  119. * @return string
  120. */
  121. public function getEncoding()
  122. {
  123. return $this->encoding;
  124. }
  125. /**
  126. * Escape a string for the HTML Body context where there are very few characters
  127. * of special meaning. Internally this will use htmlspecialchars().
  128. *
  129. * @param string $string
  130. * @return string
  131. */
  132. public function escapeHtml($string)
  133. {
  134. $result = htmlspecialchars($string, $this->htmlSpecialCharsFlags, $this->encoding);
  135. return $result;
  136. }
  137. /**
  138. * Escape a string for the HTML Attribute context. We use an extended set of characters
  139. * to escape that are not covered by htmlspecialchars() to cover cases where an attribute
  140. * might be unquoted or quoted illegally (e.g. backticks are valid quotes for IE).
  141. *
  142. * @param string $string
  143. * @return string
  144. */
  145. public function escapeHtmlAttr($string)
  146. {
  147. $string = $this->toUtf8($string);
  148. if ($string === '' || ctype_digit($string)) {
  149. return $string;
  150. }
  151. $result = preg_replace_callback('/[^a-z0-9,\.\-_]/iSu', $this->htmlAttrMatcher, $string);
  152. return $this->fromUtf8($result);
  153. }
  154. /**
  155. * Escape a string for the Javascript context. This does not use json_encode(). An extended
  156. * set of characters are escaped beyond ECMAScript's rules for Javascript literal string
  157. * escaping in order to prevent misinterpretation of Javascript as HTML leading to the
  158. * injection of special characters and entities. The escaping used should be tolerant
  159. * of cases where HTML escaping was not applied on top of Javascript escaping correctly.
  160. * Backslash escaping is not used as it still leaves the escaped character as-is and so
  161. * is not useful in a HTML context.
  162. *
  163. * @param string $string
  164. * @return string
  165. */
  166. public function escapeJs($string)
  167. {
  168. $string = $this->toUtf8($string);
  169. if ($string === '' || ctype_digit($string)) {
  170. return $string;
  171. }
  172. $result = preg_replace_callback('/[^a-z0-9,\._]/iSu', $this->jsMatcher, $string);
  173. return $this->fromUtf8($result);
  174. }
  175. /**
  176. * Escape a string for the URI or Parameter contexts. This should not be used to escape
  177. * an entire URI - only a subcomponent being inserted. The function is a simple proxy
  178. * to rawurlencode() which now implements RFC 3986 since PHP 5.3 completely.
  179. *
  180. * @param string $string
  181. * @return string
  182. */
  183. public function escapeUrl($string)
  184. {
  185. return rawurlencode($string);
  186. }
  187. /**
  188. * Escape a string for the CSS context. CSS escaping can be applied to any string being
  189. * inserted into CSS and escapes everything except alphanumerics.
  190. *
  191. * @param string $string
  192. * @return string
  193. */
  194. public function escapeCss($string)
  195. {
  196. $string = $this->toUtf8($string);
  197. if ($string === '' || ctype_digit($string)) {
  198. return $string;
  199. }
  200. $result = preg_replace_callback('/[^a-z0-9]/iSu', $this->cssMatcher, $string);
  201. return $this->fromUtf8($result);
  202. }
  203. /**
  204. * Callback function for preg_replace_callback that applies HTML Attribute
  205. * escaping to all matches.
  206. *
  207. * @param array $matches
  208. * @return string
  209. */
  210. protected function htmlAttrMatcher($matches)
  211. {
  212. $chr = $matches[0];
  213. $ord = ord($chr);
  214. /**
  215. * The following replaces characters undefined in HTML with the
  216. * hex entity for the Unicode replacement character.
  217. */
  218. if (($ord <= 0x1f && $chr != "\t" && $chr != "\n" && $chr != "\r")
  219. || ($ord >= 0x7f && $ord <= 0x9f)
  220. ) {
  221. return '&#xFFFD;';
  222. }
  223. /**
  224. * Check if the current character to escape has a name entity we should
  225. * replace it with while grabbing the integer value of the character.
  226. */
  227. if (strlen($chr) > 1) {
  228. $chr = $this->convertEncoding($chr, 'UTF-16BE', 'UTF-8');
  229. }
  230. $hex = bin2hex($chr);
  231. $ord = hexdec($hex);
  232. if (isset(static::$htmlNamedEntityMap[$ord])) {
  233. return '&' . static::$htmlNamedEntityMap[$ord] . ';';
  234. }
  235. /**
  236. * Per OWASP recommendations, we'll use upper hex entities
  237. * for any other characters where a named entity does not exist.
  238. */
  239. if ($ord > 255) {
  240. return sprintf('&#x%04X;', $ord);
  241. }
  242. return sprintf('&#x%02X;', $ord);
  243. }
  244. /**
  245. * Callback function for preg_replace_callback that applies Javascript
  246. * escaping to all matches.
  247. *
  248. * @param array $matches
  249. * @return string
  250. */
  251. protected function jsMatcher($matches)
  252. {
  253. $chr = $matches[0];
  254. if (strlen($chr) == 1) {
  255. return sprintf('\\x%02X', ord($chr));
  256. }
  257. $chr = $this->convertEncoding($chr, 'UTF-16BE', 'UTF-8');
  258. return sprintf('\\u%04s', strtoupper(bin2hex($chr)));
  259. }
  260. /**
  261. * Callback function for preg_replace_callback that applies CSS
  262. * escaping to all matches.
  263. *
  264. * @param array $matches
  265. * @return string
  266. */
  267. protected function cssMatcher($matches)
  268. {
  269. $chr = $matches[0];
  270. if (strlen($chr) == 1) {
  271. $ord = ord($chr);
  272. } else {
  273. $chr = $this->convertEncoding($chr, 'UTF-16BE', 'UTF-8');
  274. $ord = hexdec(bin2hex($chr));
  275. }
  276. return sprintf('\\%X ', $ord);
  277. }
  278. /**
  279. * Converts a string to UTF-8 from the base encoding. The base encoding is set via this
  280. * class' constructor.
  281. *
  282. * @param string $string
  283. * @throws Exception\RuntimeException
  284. * @return string
  285. */
  286. protected function toUtf8($string)
  287. {
  288. if ($this->getEncoding() === 'utf-8') {
  289. $result = $string;
  290. } else {
  291. $result = $this->convertEncoding($string, 'UTF-8', $this->getEncoding());
  292. }
  293. if (!$this->isUtf8($result)) {
  294. throw new Exception\RuntimeException(sprintf(
  295. 'String to be escaped was not valid UTF-8 or could not be converted: %s', $result
  296. ));
  297. }
  298. return $result;
  299. }
  300. /**
  301. * Converts a string from UTF-8 to the base encoding. The base encoding is set via this
  302. * class' constructor.
  303. * @param string $string
  304. * @return string
  305. */
  306. protected function fromUtf8($string)
  307. {
  308. if ($this->getEncoding() === 'utf-8') {
  309. return $string;
  310. }
  311. return $this->convertEncoding($string, $this->getEncoding(), 'UTF-8');
  312. }
  313. /**
  314. * Checks if a given string appears to be valid UTF-8 or not.
  315. *
  316. * @param string $string
  317. * @return bool
  318. */
  319. protected function isUtf8($string)
  320. {
  321. return ($string === '' || preg_match('/^./su', $string));
  322. }
  323. /**
  324. * Encoding conversion helper which wraps iconv and mbstring where they exist or throws
  325. * and exception where neither is available.
  326. *
  327. * @param string $string
  328. * @param string $to
  329. * @param array|string $from
  330. * @throws Exception\RuntimeException
  331. * @return string
  332. */
  333. protected function convertEncoding($string, $to, $from)
  334. {
  335. $result = '';
  336. if (function_exists('iconv')) {
  337. $result = iconv($from, $to, $string);
  338. } elseif (function_exists('mb_convert_encoding')) {
  339. $result = mb_convert_encoding($string, $to, $from);
  340. } else {
  341. throw new Exception\RuntimeException(
  342. get_called_class()
  343. . ' requires either the iconv or mbstring extension to be installed'
  344. . ' when escaping for non UTF-8 strings.'
  345. );
  346. }
  347. if ($result === false) {
  348. return ''; // return non-fatal blank string on encoding errors from users
  349. }
  350. return $result;
  351. }
  352. }