PageRenderTime 46ms CodeModel.GetById 16ms RepoModel.GetById 1ms app.codeStats 0ms

/php/lib/sanitize_lib.php

http://github.com/openmelody/melody
PHP | 160 lines | 152 code | 7 blank | 1 comment | 47 complexity | a229c608334276f4d12a1917145f99cc MD5 | raw file
Possible License(s): GPL-2.0, LGPL-2.0, LGPL-2.1
  1. <?php
  2. require_once("MTUtil.php");
  3. function sanitize($s, $arg) {
  4. if (($arg) && (!is_array($arg)))
  5. $arg = sanitize_parse_spec($arg);
  6. $ok_tags = $arg['ok'];
  7. $tag_attr = $arg['tag_attr'];
  8. $s = preg_replace('/\x00/', '', $s);
  9. $closings = array('<'.'?' => '?'.'>', '<!--' => '-->', '<%' => '%>');
  10. $tokens = preg_split('/(<(?:!--|%|\?)|<\/\w*|<\w*|(?:-->|%>|\?'.'>|>))/', $s, -1, PREG_SPLIT_DELIM_CAPTURE);
  11. $open_tag_a = array();
  12. $open_tag_h = array();
  13. $toknum = 0;
  14. $result = '';
  15. while ($toknum < count($tokens)) {
  16. $token = $tokens[$toknum];
  17. if (isset($closings[$token])) {
  18. $toknum = sanitize_tokens_up_to($tokens, $toknum, $closings[$token]);
  19. } elseif (substr($token, 0, 1) == '<') {
  20. $closure = 0;
  21. $name = strtolower(substr($token, 1));
  22. $start = $toknum;
  23. $end = sanitize_tokens_up_to($tokens, $start, '>');
  24. $toknum = $end;
  25. if (substr($name, 0, 1) == '/') {
  26. $name = substr($name, 1);
  27. $closure = 1;
  28. }
  29. if (isset($ok_tags[$name])) {
  30. if ($tag_attr[$name] == '/')
  31. $closure = 2;
  32. # process attribute list...
  33. $inside = sanitize_output_tokens($tokens, $start + 1, $end - 1);
  34. if (preg_match('!/>$!', $inside))
  35. $closure = 2;
  36. $inside = preg_replace('!/?>$!', '', $inside);
  37. $attrs = '';
  38. if (preg_match_all('/\s*(\w+)\s*=(?:([\'"])(.*?)\2|([^\s]+))\s*/s', $inside, $matches, PREG_SET_ORDER)) {
  39. foreach ($matches as $match) {
  40. $attr = strtolower($match[1]);
  41. if (isset($match[4])) {
  42. $value = $match[4];
  43. $value = '"' . preg_replace('/"/', '&quot;', $value) . '"';
  44. $dec_val = decode_html($match[4]);
  45. } else {
  46. $value = $match[2] . $match[3] . $match[2];
  47. $dec_val = decode_html($match[3]);
  48. }
  49. if (isset($ok_tags[$name][$attr]) ||
  50. isset($ok_tags['*'][$attr])) {
  51. $safe = 1;
  52. if (preg_match('/^(src|href|dynsrc)$/', $attr)) {
  53. $dec_val = preg_replace('/&#0*58(?:=;|[^0-9])/', ':', $dec_val);
  54. $dec_val = preg_replace('/&#x0*3[Aa](?:=;|[^a-fA-F0-9])/', ':', $dec_val);
  55. if (preg_match('/^([\s\S]+?):/', $dec_val, $proto_match)) {
  56. $proto = $proto_match[1];
  57. if (preg_match('/[\r\n\t]/', $proto)) {
  58. $safe = 0;
  59. } else {
  60. $proto = preg_replace('/\s+/s', '', $proto);
  61. if (preg_match('/[^a-zA-Z0-9\\+]/', $proto))
  62. $safe = 0;
  63. elseif (preg_match('/script$/i', $proto))
  64. $safe = 0;
  65. }
  66. }
  67. }
  68. if ($safe)
  69. $attrs .= ' ' . $attr . '=' . $value;
  70. }
  71. }
  72. }
  73. if (($closure != 1) || ($closure == 1 && isset($open_tag_h[$name]))) {
  74. if ($closure == 1) {
  75. $result .= sanitize_expel_up_to($open_tag_a, $open_tag_h, $name);
  76. } elseif (!$closure) {
  77. $open_tag_a[] = $name;
  78. $open_tag_h[$name]++;
  79. }
  80. }
  81. $result .= '<' .
  82. ($closure == 1 ? '/' : '') .
  83. $name .
  84. $attrs .
  85. ($closure == 2 ? ' /' : '') . '>';
  86. if ($closure == 1)
  87. $open_tag_h[$name]--;
  88. }
  89. } else {
  90. if (strlen($token) > 0)
  91. $result .= $token;
  92. $toknum++;
  93. }
  94. }
  95. $result .= sanitize_expel_up_to($open_tag_a, $open_tag_h, null);
  96. return $result;
  97. }
  98. function sanitize_parse_spec($a) {
  99. $ok_tags = array();
  100. $tag_attr = array();
  101. $rules = preg_split('/\s*,\s*/', $a);
  102. foreach ($rules as $rule) {
  103. $ok_attr = array();
  104. $tag = strtolower($rule);
  105. $style = '';
  106. if (preg_match('|^([^\s]+)\s+(.+)$|', $tag, $matches)) {
  107. $tag = $matches[1];
  108. $attrs = $matches[2];
  109. if (preg_match('!/$!', $tag)) {
  110. $tag = substr($tag, 0, strlen($tag) - 1);
  111. $style = '/';
  112. }
  113. $a_attr = preg_split('/\s+/', $attrs);
  114. foreach ($a_attr as $attr) {
  115. $ok_attr[$attr] = 1;
  116. }
  117. } else {
  118. if (preg_match('!/$!', $tag)) {
  119. $tag = substr($tag, 0, strlen($tag) - 1);
  120. $style = '/';
  121. }
  122. }
  123. if ($style) $tag_attr[$tag] = $style;
  124. $ok_tags[$tag] = count($ok_attr) ? $ok_attr : 1;
  125. }
  126. return array('ok' => $ok_tags, 'tag_attr' => $tag_attr);
  127. }
  128. function sanitize_expel_up_to(&$open_tag_a, &$open_tag_h, $stop_tag) {
  129. $out = '';
  130. while (count($open_tag_a) &&
  131. (empty($stop_tag) || $open_tag_a[count($open_tag_a)-1] != $stop_tag)) {
  132. $t = array_pop($open_tag_a);
  133. $open_tag_h[$t]--;
  134. $out .= '</' . $t . '>';
  135. }
  136. if (count($open_tag_a))
  137. $t = array_pop($open_tag_a);
  138. return $out;
  139. }
  140. function sanitize_tokens_up_to($tokens, $i, $closure) {
  141. while ($i < count($tokens)) {
  142. if ($tokens[$i++] == $closure)
  143. break;
  144. }
  145. return $i;
  146. }
  147. function sanitize_output_tokens($tokens, $start, $end) {
  148. $out = '';
  149. for ($i = $start; $i <= $end; $i++)
  150. $out .= $tokens[$i];
  151. return $out;
  152. }
  153. ?>