PageRenderTime 25ms CodeModel.GetById 5ms RepoModel.GetById 0ms app.codeStats 0ms

/views/helpers/cleaner.php

http://github.com/CakeDC/comments
PHP | 444 lines | 305 code | 22 blank | 117 comment | 87 complexity | e627a36cb49ac79bed1fc2b31002935c MD5 | raw file
  1. <?php
  2. /**
  3. * Copyright 2009-2010, Cake Development Corporation (http://cakedc.com)
  4. *
  5. * Licensed under The MIT License
  6. * Redistributions of files must retain the above copyright notice.
  7. *
  8. * @copyright Copyright 2009-2010, Cake Development Corporation (http://cakedc.com)
  9. * @license MIT License (http://www.opensource.org/licenses/mit-license.php)
  10. */
  11. /**
  12. * Cleaner Helper
  13. *
  14. * @package comments
  15. * @subpackage comments.views.helpers
  16. */
  17. class CleanerHelper extends AppHelper {
  18. /**
  19. * Other helpers
  20. *
  21. * @var array
  22. */
  23. public $helpers = array('Javascript');
  24. /**
  25. * Replace image thumb
  26. *
  27. * @var boolean $replaceImgThumb
  28. */
  29. public $replaceImgThumb = false;
  30. /**
  31. * Tags
  32. *
  33. * @var array $tagsArray
  34. */
  35. public $tagsArray = array();
  36. /**
  37. * Attributes
  38. *
  39. * @var array $attributesArray
  40. */
  41. public $attributesArray = array();
  42. /**
  43. * Holds different configurations
  44. *
  45. * @var array $config
  46. */
  47. public $config = array(
  48. 'full' => array(
  49. 'replaceImgThumb' => false,
  50. 'tagsArray' => array('pre', 'br', 'p', 'strong', 'em', 'ul', 'ol', 'li', 'dl', 'dd', 'dt', 'a', 'img', 'i', 'u', 'b'),
  51. 'attributesArray' => array('lang', 'src', 'href', 'title', 'alt', 'width', 'height')),
  52. 'mini' => array(
  53. 'replaceImgThumb' => true,
  54. 'tagsArray' => array('br', 'p', 'strong', 'em', 'ul', 'ol', 'li', 'dl', 'dd', 'dt', 'a', 'img', 'i', 'u', 'b'),
  55. 'attributesArray' => array('src', 'href', 'title', 'alt')),
  56. 'small' => array(
  57. 'replaceImgThumb' => false,
  58. 'tagsArray' => array('img'),
  59. 'attributesArray' => array('src', 'href', 'title'))
  60. );
  61. /**
  62. * Constructor
  63. *
  64. */
  65. public function __contruct() {
  66. foreach ($this->config['full'] as $key => $value) {
  67. $this->{$key} = $value;
  68. }
  69. return parent::__construct();
  70. }
  71. /**
  72. * Configuration of cleaner. possible to call separately or from clean method
  73. *
  74. * @param array $options
  75. */
  76. public function configure($options) {
  77. if (is_null($options)) {
  78. return;
  79. //$options = 'full';
  80. }
  81. if (is_string($options) && isset($this->config[$options])) {
  82. foreach ($this->config[$options] as $key => $value) {
  83. $this->{$key} = $value;
  84. }
  85. } else {
  86. if (isset($options['tagsArray']) && is_array($options['tagsArray'])) {
  87. $this->tagsArray = array_map('strtolower', $options['tagsArray']);
  88. }
  89. if (isset($options['attributesArray']) && is_array($options['attributesArray'])) {
  90. $this->attributesArray = array_map('strtolower', $options['attributesArray']);
  91. }
  92. if (isset($options['replaceImgThumb']) && is_bool($options['replaceImgThumb'])) {
  93. $this->replaceImgThumb = $options['replaceImgThumb'];
  94. }
  95. }
  96. }
  97. /**
  98. * Main clean method
  99. *
  100. * @param string $data
  101. * @param mixed $options String for config or array to set custom options
  102. */
  103. public function clean($data, $options = null) {
  104. $this->configure($options);
  105. $cleaned = $data;
  106. // disable call to Helper::clean because it lead to the broken texts
  107. // $cleaned = parent::clean($data);
  108. if (is_array($cleaned)) {
  109. foreach($cleaned as $key => $value) {
  110. if (is_string($value)) {
  111. $cleaned[$key] = $this->__remove($value);
  112. }
  113. }
  114. return $cleaned;
  115. } else if (is_string($cleaned)) {
  116. return $this->__remove($cleaned);
  117. } else {
  118. return $cleaned;
  119. }
  120. }
  121. /**
  122. * Iteratively remove all unwanted tags and attributes
  123. *
  124. * @param string $cleaned
  125. * @return string
  126. */
  127. function __remove($cleaned) {
  128. do {
  129. $oldstring = $cleaned;
  130. $cleaned = $this->__tagsFilter($cleaned);
  131. } while ($oldstring != $cleaned);
  132. return $cleaned;
  133. }
  134. /**
  135. * Strip a string of certain tags
  136. *
  137. * @param string $cleaned
  138. * @return string
  139. */
  140. function __tagsFilter($cleaned) {
  141. $beforeTag = NULL;
  142. $afterTag = $cleaned;
  143. $tagOpenStart = strpos($cleaned, '<');
  144. while($tagOpenStart !== false) {
  145. $beforeTag .= substr($afterTag, 0, $tagOpenStart);
  146. $afterTag = substr($afterTag, $tagOpenStart);
  147. $fromTagOpen = substr($afterTag, 1);
  148. $tagOpenEnd = strpos($fromTagOpen, '>');
  149. if ($tagOpenEnd === false) {
  150. break;
  151. }
  152. $tagOpenNested = strpos($fromTagOpen, '<');
  153. if (($tagOpenNested !== false) && ($tagOpenNested < $tagOpenEnd)) {
  154. $beforeTag .= substr($afterTag, 0, ($tagOpenNested+1));
  155. $afterTag = substr($afterTag, ($tagOpenNested+1));
  156. $tagOpenStart = strpos($afterTag, '<');
  157. continue;
  158. }
  159. $tagOpenNested = (strpos($fromTagOpen, '<') + $tagOpenStart + 1);
  160. $currentTag = substr($fromTagOpen, 0, $tagOpenEnd);
  161. $tagLength = strlen($currentTag);
  162. if (!$tagOpenEnd) {
  163. $beforeTag .= $afterTag;
  164. $tagOpenStart = strpos($afterTag, '<');
  165. }
  166. $tagLeft = $currentTag;
  167. $attributeSet = array();
  168. $currentSpace = strpos($tagLeft, ' ');
  169. if (substr($currentTag, 0, 1) == "/") {
  170. $isCloseTag = true;
  171. list($tagName) = explode(' ', $currentTag);
  172. $tagName = substr($tagName, 1);
  173. } else {
  174. $isCloseTag = false;
  175. list($tagName) = explode(' ', $currentTag);
  176. }
  177. if ((!preg_match("/^[a-z][a-z0-9]*$/i",$tagName)) || (!$tagName)) {
  178. $afterTag = substr($afterTag, ($tagLength + 2));
  179. $tagOpenStart = strpos($afterTag, '<');
  180. continue;
  181. }
  182. while ($currentSpace !== false) {
  183. $fromSpace = substr($tagLeft, ($currentSpace + 1));
  184. $nextSpace = strpos($fromSpace, ' ');
  185. $openQuotes = strpos($fromSpace, '"');
  186. $closeQuotes = strpos(substr($fromSpace, ($openQuotes + 1)), '"') + $openQuotes + 1;
  187. if (strpos($fromSpace, '=') !== false) {
  188. if (($openQuotes !== false) && (strpos(substr($fromSpace, ($openQuotes+1)), '"') !== false)) {
  189. $attribute = substr($fromSpace, 0, ($closeQuotes + 1));
  190. }
  191. else {
  192. $attribute = substr($fromSpace, 0, $nextSpace);
  193. }
  194. } else {
  195. $attribute = substr($fromSpace, 0, $nextSpace);
  196. }
  197. if (!$attribute) {
  198. $attribute = $fromSpace;
  199. }
  200. $attributeSet[] = $attribute;
  201. $tagLeft = substr($fromSpace, strlen($attribute));
  202. $currentSpace = strpos($tagLeft, ' ');
  203. }
  204. $tagFound = in_array(strtolower($tagName), $this->tagsArray);
  205. if ($tagFound) {
  206. if (!$isCloseTag) {
  207. if ($this->__filterAttr($attributeSet, strtolower($tagName))) {
  208. $beforeTag .= '<' . $tagName;
  209. for ($i = 0; $i < count($attributeSet); $i++) {
  210. $beforeTag .= ' ' . $attributeSet[$i];
  211. }
  212. if (strpos($fromTagOpen, "</" . $tagName)) {
  213. $beforeTag .= '>';
  214. } else {
  215. $beforeTag .= ' />';
  216. }
  217. }
  218. } else {
  219. $beforeTag .= '</' . $tagName . '>';
  220. }
  221. }
  222. $afterTag = substr($afterTag, ($tagLength + 2));
  223. $tagOpenStart = strpos($afterTag, '<');
  224. }
  225. $beforeTag .= $afterTag;
  226. return $beforeTag;
  227. }
  228. /**
  229. * strip a tag of certain attributes
  230. *
  231. * @param string $attributeSet
  232. * @param string $tag
  233. * @return string
  234. */
  235. function __filterAttr(&$attributeSet, $tag) {
  236. $newAttrSet = array();
  237. for ($i = 0; $i <count($attributeSet); $i++) {
  238. if (!$attributeSet[$i]) {
  239. continue;
  240. }
  241. $attributeSubSet = explode('=', trim($attributeSet[$i]));
  242. if (count($attributeSubSet)>2) {
  243. $attributeSubSetTmp = $attributeSubSet;
  244. $attributeSubSetTmp = array_reverse($attributeSubSetTmp);
  245. array_pop($attributeSubSetTmp);
  246. $attributeSubSetTmp = array_reverse($attributeSubSetTmp);
  247. $attributeSubSet[1] = join('=', $attributeSubSetTmp);
  248. }
  249. list($attributeSubSet[0]) = explode(' ', $attributeSubSet[0]);
  250. if (!eregi("^[a-z]*$",$attributeSubSet[0]) || substr($attributeSubSet[0], 0, 2) == 'on') {
  251. continue;
  252. }
  253. if ($attributeSubSet[1]) {
  254. $attributeSubSet[1] = str_replace('&#', '', $attributeSubSet[1]);
  255. $attributeSubSet[1] = preg_replace('/\s+/', '', $attributeSubSet[1]);
  256. $attributeSubSet[1] = str_replace('"', '', $attributeSubSet[1]);
  257. if ((substr($attributeSubSet[1], 0, 1) == "'") && (substr($attributeSubSet[1], (strlen($attributeSubSet[1]) - 1), 1) == "'")) {
  258. $attributeSubSet[1] = substr($attributeSubSet[1], 1, (strlen($attributeSubSet[1]) - 2));
  259. }
  260. $attributeSubSet[1] = stripslashes($attributeSubSet[1]);
  261. }
  262. if (((strpos(strtolower($attributeSubSet[1]), 'expression') !== false) && (strtolower($attributeSubSet[0]) == 'style')) || $this->__checkPos($attributeSubSet[1])) {
  263. continue;
  264. }
  265. $attributeFound = in_array(strtolower($attributeSubSet[0]), $this->attributesArray);
  266. if (!$this->__postFilter($tag, strtolower($attributeSubSet[0]), $attributeSubSet[1])) {
  267. return false;
  268. }
  269. if ($attributeFound) {
  270. if ($attributeSubSet[1]) {
  271. $newAttrSet[] = $attributeSubSet[0] . '="' . $attributeSubSet[1] . '"';
  272. } elseif ($attributeSubSet[1] == "0") {
  273. $newAttrSet[] = $attributeSubSet[0] . '="0"';
  274. } else {
  275. $newAttrSet[] = $attributeSubSet[0] . '="' . $attributeSubSet[0] . '"';
  276. }
  277. }
  278. }
  279. $attributeSet = $newAttrSet;
  280. return true;
  281. }
  282. /**
  283. * Check pos
  284. *
  285. * @param string $attrval
  286. * @return boolean
  287. */
  288. function __checkPos($attrval) {
  289. $checkList = array('javascript:', 'behaviour:', 'vbscript:', 'mocha:', 'livescript:');
  290. $result = false;
  291. foreach ($checkList as $check) {
  292. $result = $result || (strpos(strtolower($attrval), $check) !== false);
  293. }
  294. return $result;
  295. }
  296. /**
  297. * filter external image links
  298. *
  299. * @param string $tag
  300. * @param string $attribute
  301. * @param string $attributeValue
  302. * @return boolean
  303. */
  304. function __postFilter($tag, $attribute, &$attributeValue) {
  305. if ($tag == 'img' && $attribute == 'src') {
  306. if (substr($attributeValue, 0, 1) != '/' && strpos($attributeValue, FULL_BASE_URL) === false) {
  307. return false;
  308. } else {
  309. if ($this->replaceImgThumb && preg_match('/(?<path>\/media\/display\/)(?<uuid>[0-9a-z-]{36})/', $attributeValue, $matches)) {
  310. $attributeValue = $matches['path'] . 'thumb/' . $matches['uuid'];
  311. }
  312. }
  313. }
  314. return true;
  315. }
  316. /**
  317. * Replace All Image Tags
  318. *
  319. * @param string $text
  320. * @param string $showVideo
  321. * @return string
  322. */
  323. function replaceAllImageTags($text, $showVideo = true) {
  324. $text = $this->bbcode2js($text, $showVideo);
  325. //while (preg_match('/src="(\/media\/display\/)([0-9a-z-]{36})"/', $text, $matches)) {
  326. // $name = 'src="' . $matches[1] . $matches[2] . '"';
  327. // $newName = 'src="' . $matches[1] . 'thumb/' . $matches[2] . '"';
  328. // $text = r($name, $newName, $text);
  329. // }
  330. return $text;
  331. }
  332. /**
  333. * convert bbcode to javascript for embedding videos
  334. *
  335. * @param string $text
  336. * @param string $show
  337. * @return string
  338. */
  339. function bbcode2js($text, $show = true) {
  340. do {
  341. $oldstring = $text;
  342. $text = $this->__bb2js($text, $show);
  343. } while ($oldstring != $text);
  344. return $text;
  345. }
  346. /**
  347. * BB 2 JS
  348. *
  349. * @param string $text
  350. * @param string $show
  351. * @return string
  352. */
  353. function __bb2js($text, $show = true) {
  354. if(preg_match('/\[googlevideo\]/', $text)) {
  355. $vid = null;
  356. if (preg_match('/(?:docid=)([-a-z0-9]+)/i', $text, $found)) {
  357. if (isset($found[1])) {
  358. $vid = $found[1];
  359. }
  360. }
  361. if ($vid) {
  362. $this->Javascript->link('vipers-video-quicktags', false);
  363. $this->Javascript->codeBlock('vvq_googlevideo("vvq_' . $vid . '", "325", "265", "' . $vid . '");', array('inline' => false), true);
  364. $content = "<p id=\"vvq_$vid\">";
  365. $content .= '<a href="http://video.google.com/videoplay?docid=' . $vid .'">';
  366. $content .= 'http://video.google.com/videoplay?docid=' . $vid . '</a></p><br />';
  367. if (!$show) {
  368. $content = '';
  369. }
  370. $text = str_replace('[googlevideo]http://video.google.com/videoplay?docid=' . $vid . '[/googlevideo]', $content, $text);
  371. } else {
  372. $start = strpos($text, '[googlevideo]');
  373. $endStr = '[/googlevideo]';
  374. $end = strpos($text, $endStr, $start) + strlen($endStr);
  375. $text = substr($text, 0, $start) . substr($text, $end);
  376. }
  377. } elseif (preg_match('/\[youtubevideo\]/', $text)) {
  378. $vid = null;
  379. if (preg_match('/(?:v=)([-_a-z0-9]+)/i', $text, $found)) {
  380. if (isset($found[1])) {
  381. $vid = $found[1];
  382. }
  383. }
  384. if ($vid) {
  385. $this->Javascript->link('vipers-video-quicktags', false);
  386. $this->Javascript->codeBlock('vvq_youtube("vvq_' . $vid . '", "325", "271", "' . $vid . '");', array('inline' => false), true);
  387. $content = "<p id=\"vvq_$vid\">";
  388. $content .= '<a href="http://www.youtube.com/watch?v=' . $vid . '">';
  389. $content .= 'http://www.youtube.com/watch?v=' . $vid . '</a></p><br />';
  390. if (!$show) {
  391. $content = '';
  392. }
  393. $text = str_replace('[youtubevideo]http://www.youtube.com/watch?v=' . $vid . '[/youtubevideo]', $content, $text);
  394. } else {
  395. $start = strpos($text, '[youtubevideo]');
  396. $endStr = '[/youtubevideo]';
  397. $end = strpos($text, $endStr, $start) + strlen($endStr);
  398. $text = substr($text, 0, $start) . substr($text, $end);
  399. }
  400. } elseif (preg_match('/\[breakvideo\]/', $text)) {
  401. $vid = null;
  402. if (preg_match('/\/([a-zA-Z0-9]+)(\[)/', $text, $found)) {
  403. if (isset($found[1])) {
  404. $vid = $found[1];
  405. }
  406. }
  407. if ($vid) {
  408. $content = '<object width="464" height="392"><param name="movie" value="http://embed.break.com/' . $vid . '"></param><param name="allowScriptAccess" value="always"></param><embed src="http://embed.break.com/' . $vid . '" type="application/x-shockwave-flash" allowScriptAccess=always width="464" height="392"></embed></object>';
  409. if (!$show) {
  410. $content = '';
  411. }
  412. $text = str_replace('[breakvideo]http://embed.break.com/' . $vid . '[/breakvideo]', $content, $text);
  413. } else {
  414. $start = strpos($text, '[breakvideo]');
  415. $endStr = '[/breakvideo]';
  416. $end = strpos($text, $endStr, $start) + strlen($endStr);
  417. $text = substr($text, 0, $start) . substr($text, $end);
  418. }
  419. }
  420. return $text;
  421. }
  422. }