/trunk/jfx-private/classes/Antz/TagFilter.php
PHP | 413 lines | 265 code | 48 blank | 100 comment | 61 complexity | 30994e8581e816ae2a30865ef77ea951 MD5 | raw file
Possible License(s): GPL-2.0, LGPL-2.1
- <?php
- ############### COPYLEFT GPLv3 LICENSE ###############
- ##
- ## Copyright 2009 GPLv3 - http://www.opensource.org/licenses/gpl-3.0.html
- ##
- ## Anthony Gallon
- ## oi_antz@hotmail.com
- ##
- ## Permission is hereby granted to any person having a copy of this software
- ## to freely use and modify as required so long as the copyright notices
- ## and branding remain intact.
- ##
- ############### COPYLEFT GPLv3 LICENSE ###############
- if(!class_exists('phpQuery')) die('Antz_TagFilter requires class phpQuery - see '.__FILE__.', line '.__LINE__);
- /**
- * Strips unwanted and malicious tags from html content with whitelist and blacklist approach.
- * Supports whitelist tagnames, attributes and explicit tag/attribute combinations
- */
- class Antz_TagFilter
- {
- protected $attributeWhitelist = array();
- protected $attributeBlacklist = array();
- protected $tagnameWhitelist = array();
- protected $tagnameBlacklist = array();
- protected $explicitWhitelist = array();
- protected $explicitBlacklist = array();
- protected $htmlMode = 'xhtml';
- protected $errors = array();
- protected $removeNodes = array();
- protected $allowDoctype = false;
- protected $config = null;
- public function __construct(){
- }
- public function getConfig(){
- return $this->config;
- }
- public function setConfig(Antz_TagFilter_Config $config){
- $this->config = $config;
- foreach($this->config->get() as $propertyName=>$propertyValue){
- if(property_exists($this, $propertyName)){
- if(is_array($propertyValue) && is_array($this->$propertyName)){
- $this->$propertyName = array_merge($this->$propertyName, $propertyValue);
- }
- }else if(is_string($propertyValue) || is_bool($propertyValue) || is_null($propertyValue)){
- $this->$propertyName = $propertyValue;
- }
- }
- }
- public function getErrors(){
- return $this->errors;
- }
- /**
- * Set the mode which phpQuery runs (XHTML or HTML)
- * @param string $mode
- */
- public function setHtmlMode($mode='xhtml'){
- $mode = strtolower((string) $mode);
- if($mode === 'xhtml' || $mode === 'html') $this->htmlMode = $mode;
- }
- /**
- * Overwrite attributes whitelist with new values
- * @param mixed $atts
- */
- public function setAttributeWhitelist($atts){
- if(!is_array($atts)) return;
- $this->attributeWhitelist = array();
- $this->addAttributeWhitelist($atts);
- }
- /**
- * Overwrite attributes blacklist with new values
- * @param mixed $atts
- */
- public function setAttributeBlacklist($atts){
- if(!is_array($atts)) return;
- $this->attributeBlacklist = array();
- $this->addAttributeBlacklist($atts);
- }
- /**
- * Overwrite tagname whitelist with new values
- * @param mixed $tags
- */
- public function setTagnameWhitelist($tags){
- if(!is_array($tags)) return;
- $this->tagnameWhitelist = array();
- $this->addTagnameWhitelist($tags);
- }
- /**
- * Overwrite tagname blacklist with new values
- * @param mixed $tags
- */
- public function setTagnameBlacklist($tags){
- if(!is_array($tags)) return;
- $this->tagnameBlacklist = array();
- $this->addTagnameBlacklist($tags);
- }
- /**
- * Overwrite explicit blacklist with new values
- * @param mixed $tags
- */
- public function setExplicitBlacklist($tags){
- if(!is_array($tags)) return;
- $this->explicitBlacklist = array();
- $this->addExplicitBlacklist($tags);
- }
- /**
- * Overwrite explicit whitelist with new values
- * @param mixed $tags
- */
- public function setExplicitWhitelist($tags){
- if(!is_array($tags)) return;
- $this->explicitWhitelist = array();
- $this->addExplicitWhitelist($tags);
- }
- /**
- * Add an explicit blacklist rule (tagname=>attname)
- * @param mixed $tags
- */
- public function addExplicitBlacklist($tags){
- if(!is_array($tags)) return;
- if(count($tags)==1){
- foreach($tags as $tagname=>$attname){
- if(is_array($attname)){
- $this->addExplicitBlacklist($attname);
- return;
- }else{
- $this->explicitBlacklist[] = array($tagname=>$attname);
- return;
- }
- }
- }else{
- foreach($tags as $tagname=>$attname){
- if(is_array($attname)){
- $this->addExplicitBlacklist($attname);
- }else{
- $this->explicitBlacklist[] = array($tagname=>$attname);
- }
- }
- }
- }
- /**
- * Add an explicit blacklist rule (tagname=>attname)
- * @param mixed $tags
- */
- public function addExplicitWhitelist($tags){
- if(!is_array($tags)) return;
- if(count($tags)==1){
- foreach($tags as $tagname=>$attname){
- if(is_array($attname)){
- $this->addExplicitWhitelist($attname);
- return;
- }else{
- $this->explicitWhitelist[] = array($tagname=>$attname);
- return;
- }
- }
- }else{
- foreach($tags as $tagname=>$attname){
- if(is_array($attname)){
- $this->addExplicitWhitelist($attname);
- }else{
- $this->explicitWhitelist[] = array($tagname=>$attname);
- }
- }
- }
- }
- /**
- * Add an tagname blacklist rule
- * @param mixed $tagname
- */
- public function addTagnameBlacklist($tagname){
- if(is_array($tagname)){
- foreach($tagname as $tag){
- $this->addTagnameBlacklist($tag);
- }
- }else{
- if(!in_array($tagname, $this->tagnameBlacklist)) $this->tagnameBlacklist[] = trim($tagname);
- }
- }
- /**
- * Add an tagname whitelist rule
- * @param mixed $tagname
- */
- public function addTagnameWhitelist($tagname){
- if(is_array($tagname)){
- foreach($tagname as $tag){
- $this->addTagnameWhitelist($tag);
- }
- }else{
- if(!in_array($tagname, $this->tagnameWhitelist)) $this->tagnameWhitelist[] = trim($tagname);
- }
- }
- /**
- * Add an attribute blacklist rule
- * @param mixed $att
- */
- public function addAttributeBlacklist($att){
- if(is_array($att)){
- foreach($att as $at){
- $this->addAttributeBlacklist($at);
- }
- }else{
- if(!in_array($att, $this->attributeBlacklist)) $this->attributeBlacklist[] = trim($att);
- }
- }
- /**
- * Add an attribute whitelist rule
- * @param mixed $att
- */
- public function addAttributeWhitelist($att){
- if(is_array($att)){
- foreach($att as $at){
- $this->addAttributeWhitelist($at);
- }
- }else{
- if(!in_array($att, $this->attributeWhitelist)) $this->attributeWhitelist[] = trim($att);
- }
- }
- /**
- * Remove a tagname blacklist rule
- * @param mixed $tagname
- */
- public function removeTagnameBlacklist($tagname){
- if(is_array($tagname)){
- foreach($tagname as $tag){
- $this->removeTagnameBlacklist($tag);
- }
- }else{
- if(in_array($tagname, $this->tagnameBlacklist)) unset($this->tagnameBlacklist[trim($tagname)]);
- }
- }
- /**
- * Remove a tagname whitelist rule
- * @param mixed $tagname
- */
- public function removeTagnameWhitelist($tagname){
- if(is_array($tagname)){
- foreach($tagname as $tag){
- $this->removeTagnameWhitelist($tag);
- }
- }else{
- if(in_array($tagname, $this->tagnameWhitelist)) unset($this->tagnameWhitelist[trim($tagname)]);
- }
- }
- /**
- * Remove an attribute blacklist rule
- * @param mixed $att
- */
- public function removeAttributeBlacklist($att){
- if(is_array($att)){
- foreach($att as $at){
- $this->removeAttributeBlacklist($at);
- }
- }else{
- if(in_array($att, $this->attributeBlacklist)) unset($this->attributeBlacklist[trim($att)]);
- }
- }
- /**
- * Remove an attribute whitelist rule
- * @param mixed $att
- */
- public function removeAttributeWhitelist($att){
- if(is_array($att)){
- foreach($att as $at){
- $this->removeAttributeWhitelist($at);
- }
- }else{
- if(in_array($att, $this->attributeWhitelist)) unset($this->attributeWhitelist[trim($att)]);
- }
- }
- /**
- * Sanitizes and returns supplied HTML with all blacklisted and non-whitelisted tags/attributes removed
- * @param string $content
- * @return string $content
- */
- public function process($content){
- $this->removedNodes = array();
- if(is_array($content)){
- foreach($content as $k=>$v){
- $content[$k] = $this->process($v);
- }
- return implode('', $content);
- }
- $content = trim($content);
- foreach($this->tagnameBlacklist as $k=>$tagname){
- $content = eregi_replace("<{$tagname}[^>]*>.*</{$tagname}[^>]*>", "", $content);
- $content = eregi_replace("<{$tagname}[^>]*>", "", $content);
- }
- $dom = $this->initDom($content);
- foreach($dom->elements as $k => &$el){
- $this->processElement($el);
- }
- foreach($this->explicitBlacklist as $tagname=>$attribute){
- if(array_key_exists($tagname, $this->explicitWhitelist) && $this->explicitWhitelist[$tagname]==$attribute){
- // explicit allowed overrides explicit deny
- continue;
- }
- $removedNodes = pq($tagname.'['.$attribute.']');
- foreach($removedNodes as $node){
- $node->removeAttribute($attribute);
- }
- }
- foreach($this->removedNodes as $obj){
- pq($obj)->remove();
- }
- $content = (string) $dom;
- return $content;
- }
- /**
- * Creates a new phpQuery dom element
- * @param string $content
- * @return object DOMDocument
- */
- protected function initDom($content){
- switch($this->htmlMode){
- case 'xhtml':
- $dom = phpQuery::newDocumentXhtml($content);
- break;
- case 'html':
- $dom = phpQuery::newDocumentHtml($content);
- break;
- default:
- $this->errors[] = 'Invalid mode: should be xhtml or html';
- return $content;
- }
- return $dom;
- }
- /**
- * Removes blacklisted and non-whitelisted attributes from the element and recurses into all child nodes
- * @param DOMElement $el
- */
- protected function processElement(&$el){
- if(false === ($el instanceof DOMElement) && false === ($el instanceof DOMDocument)){
- return;
- }
- $invalidAtts = array();
- $elAtts = $el->attributes;
- if($elAtts==null) $elAtts = array();
- foreach($elAtts as $k3=>$att){
- // check if explicitly allowed
- $explicitelyAllowed = false;
- foreach($this->explicitWhitelist as $tagname=>$attname){
- if($tagname == $el->nodeName && $attname == $att->name){
- $explicitelyAllowed = true;
- }
- }
- if(in_array($att->name, $this->attributeBlacklist)) $invalidAtts[] = $att->name;
- else if(!in_array($att->name, $this->attributeWhitelist) && !$explicitelyAllowed) $invalidAtts[] = $att->name;
- }
- foreach($invalidAtts as $k => $v){
- $el->removeAttribute($v);
- }
- $childNodes = $el->childNodes;
- if(is_object($childNodes) && $childNodes->length > 0){
- for($i=0, $max=$childNodes->length; $i<$max; $i++){
- $this->processElement($childNodes->item($i));
- }
- }
- if($el instanceof DOMDocument) return;
- if(in_array($el->nodeName, $this->tagnameBlacklist) && !array_key_exists($el->nodeName, $this->explicitWhitelist)){
- $this->removedNodes[] = $el;
- }else if(!in_array($el->nodeName, $this->tagnameWhitelist) && !array_key_exists($el->nodeName, $this->explicitWhitelist)){
- $this->removedNodes[] = $el;
- }
- }
- }