PageRenderTime 28ms CodeModel.GetById 14ms RepoModel.GetById 0ms app.codeStats 0ms

/trunk/jfx-private/classes/Antz/TagFilter.php

http://jfxcms.googlecode.com/
PHP | 413 lines | 265 code | 48 blank | 100 comment | 61 complexity | 30994e8581e816ae2a30865ef77ea951 MD5 | raw file
Possible License(s): GPL-2.0, LGPL-2.1
  1. <?php
  2. ############### COPYLEFT GPLv3 LICENSE ###############
  3. ##
  4. ## Copyright 2009 GPLv3 - http://www.opensource.org/licenses/gpl-3.0.html
  5. ##
  6. ## Anthony Gallon
  7. ## oi_antz@hotmail.com
  8. ##
  9. ## Permission is hereby granted to any person having a copy of this software
  10. ## to freely use and modify as required so long as the copyright notices
  11. ## and branding remain intact.
  12. ##
  13. ############### COPYLEFT GPLv3 LICENSE ###############
  14. if(!class_exists('phpQuery')) die('Antz_TagFilter requires class phpQuery - see '.__FILE__.', line '.__LINE__);
  15. /**
  16. * Strips unwanted and malicious tags from html content with whitelist and blacklist approach.
  17. * Supports whitelist tagnames, attributes and explicit tag/attribute combinations
  18. */
  19. class Antz_TagFilter
  20. {
  21. protected $attributeWhitelist = array();
  22. protected $attributeBlacklist = array();
  23. protected $tagnameWhitelist = array();
  24. protected $tagnameBlacklist = array();
  25. protected $explicitWhitelist = array();
  26. protected $explicitBlacklist = array();
  27. protected $htmlMode = 'xhtml';
  28. protected $errors = array();
  29. protected $removeNodes = array();
  30. protected $allowDoctype = false;
  31. protected $config = null;
  32. public function __construct(){
  33. }
  34. public function getConfig(){
  35. return $this->config;
  36. }
  37. public function setConfig(Antz_TagFilter_Config $config){
  38. $this->config = $config;
  39. foreach($this->config->get() as $propertyName=>$propertyValue){
  40. if(property_exists($this, $propertyName)){
  41. if(is_array($propertyValue) && is_array($this->$propertyName)){
  42. $this->$propertyName = array_merge($this->$propertyName, $propertyValue);
  43. }
  44. }else if(is_string($propertyValue) || is_bool($propertyValue) || is_null($propertyValue)){
  45. $this->$propertyName = $propertyValue;
  46. }
  47. }
  48. }
  49. public function getErrors(){
  50. return $this->errors;
  51. }
  52. /**
  53. * Set the mode which phpQuery runs (XHTML or HTML)
  54. * @param string $mode
  55. */
  56. public function setHtmlMode($mode='xhtml'){
  57. $mode = strtolower((string) $mode);
  58. if($mode === 'xhtml' || $mode === 'html') $this->htmlMode = $mode;
  59. }
  60. /**
  61. * Overwrite attributes whitelist with new values
  62. * @param mixed $atts
  63. */
  64. public function setAttributeWhitelist($atts){
  65. if(!is_array($atts)) return;
  66. $this->attributeWhitelist = array();
  67. $this->addAttributeWhitelist($atts);
  68. }
  69. /**
  70. * Overwrite attributes blacklist with new values
  71. * @param mixed $atts
  72. */
  73. public function setAttributeBlacklist($atts){
  74. if(!is_array($atts)) return;
  75. $this->attributeBlacklist = array();
  76. $this->addAttributeBlacklist($atts);
  77. }
  78. /**
  79. * Overwrite tagname whitelist with new values
  80. * @param mixed $tags
  81. */
  82. public function setTagnameWhitelist($tags){
  83. if(!is_array($tags)) return;
  84. $this->tagnameWhitelist = array();
  85. $this->addTagnameWhitelist($tags);
  86. }
  87. /**
  88. * Overwrite tagname blacklist with new values
  89. * @param mixed $tags
  90. */
  91. public function setTagnameBlacklist($tags){
  92. if(!is_array($tags)) return;
  93. $this->tagnameBlacklist = array();
  94. $this->addTagnameBlacklist($tags);
  95. }
  96. /**
  97. * Overwrite explicit blacklist with new values
  98. * @param mixed $tags
  99. */
  100. public function setExplicitBlacklist($tags){
  101. if(!is_array($tags)) return;
  102. $this->explicitBlacklist = array();
  103. $this->addExplicitBlacklist($tags);
  104. }
  105. /**
  106. * Overwrite explicit whitelist with new values
  107. * @param mixed $tags
  108. */
  109. public function setExplicitWhitelist($tags){
  110. if(!is_array($tags)) return;
  111. $this->explicitWhitelist = array();
  112. $this->addExplicitWhitelist($tags);
  113. }
  114. /**
  115. * Add an explicit blacklist rule (tagname=>attname)
  116. * @param mixed $tags
  117. */
  118. public function addExplicitBlacklist($tags){
  119. if(!is_array($tags)) return;
  120. if(count($tags)==1){
  121. foreach($tags as $tagname=>$attname){
  122. if(is_array($attname)){
  123. $this->addExplicitBlacklist($attname);
  124. return;
  125. }else{
  126. $this->explicitBlacklist[] = array($tagname=>$attname);
  127. return;
  128. }
  129. }
  130. }else{
  131. foreach($tags as $tagname=>$attname){
  132. if(is_array($attname)){
  133. $this->addExplicitBlacklist($attname);
  134. }else{
  135. $this->explicitBlacklist[] = array($tagname=>$attname);
  136. }
  137. }
  138. }
  139. }
  140. /**
  141. * Add an explicit blacklist rule (tagname=>attname)
  142. * @param mixed $tags
  143. */
  144. public function addExplicitWhitelist($tags){
  145. if(!is_array($tags)) return;
  146. if(count($tags)==1){
  147. foreach($tags as $tagname=>$attname){
  148. if(is_array($attname)){
  149. $this->addExplicitWhitelist($attname);
  150. return;
  151. }else{
  152. $this->explicitWhitelist[] = array($tagname=>$attname);
  153. return;
  154. }
  155. }
  156. }else{
  157. foreach($tags as $tagname=>$attname){
  158. if(is_array($attname)){
  159. $this->addExplicitWhitelist($attname);
  160. }else{
  161. $this->explicitWhitelist[] = array($tagname=>$attname);
  162. }
  163. }
  164. }
  165. }
  166. /**
  167. * Add an tagname blacklist rule
  168. * @param mixed $tagname
  169. */
  170. public function addTagnameBlacklist($tagname){
  171. if(is_array($tagname)){
  172. foreach($tagname as $tag){
  173. $this->addTagnameBlacklist($tag);
  174. }
  175. }else{
  176. if(!in_array($tagname, $this->tagnameBlacklist)) $this->tagnameBlacklist[] = trim($tagname);
  177. }
  178. }
  179. /**
  180. * Add an tagname whitelist rule
  181. * @param mixed $tagname
  182. */
  183. public function addTagnameWhitelist($tagname){
  184. if(is_array($tagname)){
  185. foreach($tagname as $tag){
  186. $this->addTagnameWhitelist($tag);
  187. }
  188. }else{
  189. if(!in_array($tagname, $this->tagnameWhitelist)) $this->tagnameWhitelist[] = trim($tagname);
  190. }
  191. }
  192. /**
  193. * Add an attribute blacklist rule
  194. * @param mixed $att
  195. */
  196. public function addAttributeBlacklist($att){
  197. if(is_array($att)){
  198. foreach($att as $at){
  199. $this->addAttributeBlacklist($at);
  200. }
  201. }else{
  202. if(!in_array($att, $this->attributeBlacklist)) $this->attributeBlacklist[] = trim($att);
  203. }
  204. }
  205. /**
  206. * Add an attribute whitelist rule
  207. * @param mixed $att
  208. */
  209. public function addAttributeWhitelist($att){
  210. if(is_array($att)){
  211. foreach($att as $at){
  212. $this->addAttributeWhitelist($at);
  213. }
  214. }else{
  215. if(!in_array($att, $this->attributeWhitelist)) $this->attributeWhitelist[] = trim($att);
  216. }
  217. }
  218. /**
  219. * Remove a tagname blacklist rule
  220. * @param mixed $tagname
  221. */
  222. public function removeTagnameBlacklist($tagname){
  223. if(is_array($tagname)){
  224. foreach($tagname as $tag){
  225. $this->removeTagnameBlacklist($tag);
  226. }
  227. }else{
  228. if(in_array($tagname, $this->tagnameBlacklist)) unset($this->tagnameBlacklist[trim($tagname)]);
  229. }
  230. }
  231. /**
  232. * Remove a tagname whitelist rule
  233. * @param mixed $tagname
  234. */
  235. public function removeTagnameWhitelist($tagname){
  236. if(is_array($tagname)){
  237. foreach($tagname as $tag){
  238. $this->removeTagnameWhitelist($tag);
  239. }
  240. }else{
  241. if(in_array($tagname, $this->tagnameWhitelist)) unset($this->tagnameWhitelist[trim($tagname)]);
  242. }
  243. }
  244. /**
  245. * Remove an attribute blacklist rule
  246. * @param mixed $att
  247. */
  248. public function removeAttributeBlacklist($att){
  249. if(is_array($att)){
  250. foreach($att as $at){
  251. $this->removeAttributeBlacklist($at);
  252. }
  253. }else{
  254. if(in_array($att, $this->attributeBlacklist)) unset($this->attributeBlacklist[trim($att)]);
  255. }
  256. }
  257. /**
  258. * Remove an attribute whitelist rule
  259. * @param mixed $att
  260. */
  261. public function removeAttributeWhitelist($att){
  262. if(is_array($att)){
  263. foreach($att as $at){
  264. $this->removeAttributeWhitelist($at);
  265. }
  266. }else{
  267. if(in_array($att, $this->attributeWhitelist)) unset($this->attributeWhitelist[trim($att)]);
  268. }
  269. }
  270. /**
  271. * Sanitizes and returns supplied HTML with all blacklisted and non-whitelisted tags/attributes removed
  272. * @param string $content
  273. * @return string $content
  274. */
  275. public function process($content){
  276. $this->removedNodes = array();
  277. if(is_array($content)){
  278. foreach($content as $k=>$v){
  279. $content[$k] = $this->process($v);
  280. }
  281. return implode('', $content);
  282. }
  283. $content = trim($content);
  284. foreach($this->tagnameBlacklist as $k=>$tagname){
  285. $content = eregi_replace("<{$tagname}[^>]*>.*</{$tagname}[^>]*>", "", $content);
  286. $content = eregi_replace("<{$tagname}[^>]*>", "", $content);
  287. }
  288. $dom = $this->initDom($content);
  289. foreach($dom->elements as $k => &$el){
  290. $this->processElement($el);
  291. }
  292. foreach($this->explicitBlacklist as $tagname=>$attribute){
  293. if(array_key_exists($tagname, $this->explicitWhitelist) && $this->explicitWhitelist[$tagname]==$attribute){
  294. // explicit allowed overrides explicit deny
  295. continue;
  296. }
  297. $removedNodes = pq($tagname.'['.$attribute.']');
  298. foreach($removedNodes as $node){
  299. $node->removeAttribute($attribute);
  300. }
  301. }
  302. foreach($this->removedNodes as $obj){
  303. pq($obj)->remove();
  304. }
  305. $content = (string) $dom;
  306. return $content;
  307. }
  308. /**
  309. * Creates a new phpQuery dom element
  310. * @param string $content
  311. * @return object DOMDocument
  312. */
  313. protected function initDom($content){
  314. switch($this->htmlMode){
  315. case 'xhtml':
  316. $dom = phpQuery::newDocumentXhtml($content);
  317. break;
  318. case 'html':
  319. $dom = phpQuery::newDocumentHtml($content);
  320. break;
  321. default:
  322. $this->errors[] = 'Invalid mode: should be xhtml or html';
  323. return $content;
  324. }
  325. return $dom;
  326. }
  327. /**
  328. * Removes blacklisted and non-whitelisted attributes from the element and recurses into all child nodes
  329. * @param DOMElement $el
  330. */
  331. protected function processElement(&$el){
  332. if(false === ($el instanceof DOMElement) && false === ($el instanceof DOMDocument)){
  333. return;
  334. }
  335. $invalidAtts = array();
  336. $elAtts = $el->attributes;
  337. if($elAtts==null) $elAtts = array();
  338. foreach($elAtts as $k3=>$att){
  339. // check if explicitly allowed
  340. $explicitelyAllowed = false;
  341. foreach($this->explicitWhitelist as $tagname=>$attname){
  342. if($tagname == $el->nodeName && $attname == $att->name){
  343. $explicitelyAllowed = true;
  344. }
  345. }
  346. if(in_array($att->name, $this->attributeBlacklist)) $invalidAtts[] = $att->name;
  347. else if(!in_array($att->name, $this->attributeWhitelist) && !$explicitelyAllowed) $invalidAtts[] = $att->name;
  348. }
  349. foreach($invalidAtts as $k => $v){
  350. $el->removeAttribute($v);
  351. }
  352. $childNodes = $el->childNodes;
  353. if(is_object($childNodes) && $childNodes->length > 0){
  354. for($i=0, $max=$childNodes->length; $i<$max; $i++){
  355. $this->processElement($childNodes->item($i));
  356. }
  357. }
  358. if($el instanceof DOMDocument) return;
  359. if(in_array($el->nodeName, $this->tagnameBlacklist) && !array_key_exists($el->nodeName, $this->explicitWhitelist)){
  360. $this->removedNodes[] = $el;
  361. }else if(!in_array($el->nodeName, $this->tagnameWhitelist) && !array_key_exists($el->nodeName, $this->explicitWhitelist)){
  362. $this->removedNodes[] = $el;
  363. }
  364. }
  365. }