PageRenderTime 65ms CodeModel.GetById 30ms RepoModel.GetById 0ms app.codeStats 0ms

/View/Helper/CleanerHelper.php

https://github.com/mendo/utils
PHP | 451 lines | 306 code | 24 blank | 121 comment | 87 complexity | d4b7907c4dbcf443e2983068a51dbde8 MD5 | raw file
Possible License(s): MIT
  1. <?php
  2. /**
  3. * Copyright 2007-2010, Cake Development Corporation (http://cakedc.com)
  4. *
  5. * Licensed under The MIT License
  6. * Redistributions of files must retain the above copyright notice.
  7. *
  8. * @copyright Copyright 2007-2010, Cake Development Corporation (http://cakedc.com)
  9. * @license MIT License (http://www.opensource.org/licenses/mit-license.php)
  10. */
  11. /**
  12. * Utils Plugin
  13. *
  14. * Utils Cleaner Helper
  15. *
  16. * @package utils
  17. * @subpackage utils.views.cleaner
  18. */
  19. class CleanerHelper extends AppHelper {
  20. /**
  21. * Other helpers
  22. *
  23. * @var array
  24. */
  25. public $helpers = array('Html');
  26. /**
  27. * Replace image thumb
  28. *
  29. * @var boolean $replaceImgThumb
  30. */
  31. public $replaceImgThumb = false;
  32. /**
  33. * Tags
  34. *
  35. * @var array $tagsArray
  36. */
  37. public $tagsArray = array();
  38. /**
  39. * Attributes
  40. *
  41. * @var array $attributesArray
  42. */
  43. public $attributesArray = array();
  44. /**
  45. * Holds different configurations
  46. *
  47. * @var array $config
  48. */
  49. public $config = array(
  50. 'full' => array(
  51. 'replaceImgThumb' => false,
  52. 'tagsArray' => array('pre', 'br', 'p', 'strong', 'em', 'ul', 'ol', 'li', 'dl', 'dd', 'dt', 'a', 'img', 'i', 'u', 'b'),
  53. 'attributesArray' => array('lang', 'src', 'href', 'title', 'alt', 'width', 'height')),
  54. 'mini' => array(
  55. 'replaceImgThumb' => true,
  56. 'tagsArray' => array('br', 'p', 'strong', 'em', 'ul', 'ol', 'li', 'dl', 'dd', 'dt', 'a', 'img', 'i', 'u', 'b'),
  57. 'attributesArray' => array('src', 'href', 'title', 'alt')),
  58. 'small' => array(
  59. 'replaceImgThumb' => false,
  60. 'tagsArray' => array('img'),
  61. 'attributesArray' => array('src', 'href', 'title'))
  62. );
  63. /**
  64. * Constructor
  65. *
  66. */
  67. public function __construct(View $View, $settings = array()) {
  68. $this->View = $View;
  69. foreach ($this->config['full'] as $key => $value) {
  70. $this->{$key} = $value;
  71. }
  72. return parent::__construct($View, $settings);
  73. }
  74. /**
  75. * Configuration of cleaner. possible to call separately or from clean method
  76. *
  77. * @param string $options
  78. * @return void
  79. */
  80. public function configure($options) {
  81. if (is_null($options)) {
  82. return;
  83. //$options = 'full';
  84. }
  85. if (is_string($options) && isset($this->config[$options])) {
  86. foreach ($this->config[$options] as $key => $value) {
  87. $this->{$key} = $value;
  88. }
  89. } else {
  90. if (isset($options['tagsArray']) && is_array($options['tagsArray'])) {
  91. $this->tagsArray = array_map('strtolower', $options['tagsArray']);
  92. }
  93. if (isset($options['attributesArray']) && is_array($options['attributesArray'])) {
  94. $this->attributesArray = array_map('strtolower', $options['attributesArray']);
  95. }
  96. if (isset($options['replaceImgThumb']) && is_bool($options['replaceImgThumb'])) {
  97. $this->replaceImgThumb = $options['replaceImgThumb'];
  98. }
  99. }
  100. }
  101. /**
  102. * Main clean method
  103. *
  104. * @param string $data
  105. * @param mixed $options String for config or array to set custom options
  106. * @return string
  107. */
  108. public function clean($data, $options = null) {
  109. $this->configure($options);
  110. $cleaned = $data;
  111. // disable call to Helper::clean because it lead to the broken texts
  112. // $cleaned = parent::clean($data);
  113. if (is_array($cleaned)) {
  114. foreach($cleaned as $key => $value) {
  115. if (is_string($value)) {
  116. $cleaned[$key] = $this->__remove($value);
  117. }
  118. }
  119. return $cleaned;
  120. } else if (is_string($cleaned)) {
  121. return $this->__remove($cleaned);
  122. } else {
  123. return $cleaned;
  124. }
  125. }
  126. /**
  127. * Remove all unwanted tags and attributes.
  128. *
  129. * @param string $cleaned
  130. * @return void
  131. */
  132. function __remove($cleaned) {
  133. do {
  134. $oldstring = $cleaned;
  135. $cleaned = $this->__tagsFilter($cleaned);
  136. } while ($oldstring != $cleaned);
  137. return $cleaned;
  138. }
  139. /**
  140. * Strip a string of certain tags
  141. *
  142. * @param string $cleaned
  143. * @return void
  144. */
  145. function __tagsFilter($cleaned) {
  146. $beforeTag = NULL;
  147. $afterTag = $cleaned;
  148. $tagOpenStart = strpos($cleaned, '<');
  149. while($tagOpenStart !== false) {
  150. $beforeTag .= substr($afterTag, 0, $tagOpenStart);
  151. $afterTag = substr($afterTag, $tagOpenStart);
  152. $fromTagOpen = substr($afterTag, 1);
  153. $tagOpenEnd = strpos($fromTagOpen, '>');
  154. if ($tagOpenEnd === false) {
  155. break;
  156. }
  157. $tagOpenNested = strpos($fromTagOpen, '<');
  158. if (($tagOpenNested !== false) && ($tagOpenNested < $tagOpenEnd)) {
  159. $beforeTag .= substr($afterTag, 0, ($tagOpenNested+1));
  160. $afterTag = substr($afterTag, ($tagOpenNested+1));
  161. $tagOpenStart = strpos($afterTag, '<');
  162. continue;
  163. }
  164. $tagOpenNested = (strpos($fromTagOpen, '<') + $tagOpenStart + 1);
  165. $currentTag = substr($fromTagOpen, 0, $tagOpenEnd);
  166. $tagLength = strlen($currentTag);
  167. if (!$tagOpenEnd) {
  168. $beforeTag .= $afterTag;
  169. $tagOpenStart = strpos($afterTag, '<');
  170. }
  171. $tagLeft = $currentTag;
  172. $attributeSet = array();
  173. $currentSpace = strpos($tagLeft, ' ');
  174. if (substr($currentTag, 0, 1) == "/") {
  175. $isCloseTag = true;
  176. list($tagName) = explode(' ', $currentTag);
  177. $tagName = substr($tagName, 1);
  178. } else {
  179. $isCloseTag = false;
  180. list($tagName) = explode(' ', $currentTag);
  181. }
  182. if ((!preg_match("/^[a-z][a-z0-9]*$/i",$tagName)) || (!$tagName)) {
  183. $afterTag = substr($afterTag, ($tagLength + 2));
  184. $tagOpenStart = strpos($afterTag, '<');
  185. continue;
  186. }
  187. while ($currentSpace !== false) {
  188. $fromSpace = substr($tagLeft, ($currentSpace + 1));
  189. $nextSpace = strpos($fromSpace, ' ');
  190. $openQuotes = strpos($fromSpace, '"');
  191. $closeQuotes = strpos(substr($fromSpace, ($openQuotes + 1)), '"') + $openQuotes + 1;
  192. if (strpos($fromSpace, '=') !== false) {
  193. if (($openQuotes !== false) && (strpos(substr($fromSpace, ($openQuotes+1)), '"') !== false)) {
  194. $attribute = substr($fromSpace, 0, ($closeQuotes + 1));
  195. }
  196. else {
  197. $attribute = substr($fromSpace, 0, $nextSpace);
  198. }
  199. } else {
  200. $attribute = substr($fromSpace, 0, $nextSpace);
  201. }
  202. if (!$attribute) {
  203. $attribute = $fromSpace;
  204. }
  205. $attributeSet[] = $attribute;
  206. $tagLeft = substr($fromSpace, strlen($attribute));
  207. $currentSpace = strpos($tagLeft, ' ');
  208. }
  209. $tagFound = in_array(strtolower($tagName), $this->tagsArray);
  210. if ($tagFound) {
  211. if (!$isCloseTag) {
  212. if ($this->__filterAttr($attributeSet, strtolower($tagName))) {
  213. $beforeTag .= '<' . $tagName;
  214. for ($i = 0; $i < count($attributeSet); $i++) {
  215. $beforeTag .= ' ' . $attributeSet[$i];
  216. }
  217. if (strpos($fromTagOpen, "</" . $tagName)) {
  218. $beforeTag .= '>';
  219. } else {
  220. $beforeTag .= ' />';
  221. }
  222. }
  223. } else {
  224. $beforeTag .= '</' . $tagName . '>';
  225. }
  226. }
  227. $afterTag = substr($afterTag, ($tagLength + 2));
  228. $tagOpenStart = strpos($afterTag, '<');
  229. }
  230. $beforeTag .= $afterTag;
  231. return $beforeTag;
  232. }
  233. /**
  234. * Strip a tag of certain attributes
  235. *
  236. * @param string $attributeSet
  237. * @param string $tag
  238. * @return void
  239. */
  240. function __filterAttr(&$attributeSet, $tag) {
  241. $newAttrSet = array();
  242. for ($i = 0; $i <count($attributeSet); $i++) {
  243. if (!$attributeSet[$i]) {
  244. continue;
  245. }
  246. $attributeSubSet = explode('=', trim($attributeSet[$i]));
  247. if (count($attributeSubSet)>2) {
  248. $attributeSubSetTmp = $attributeSubSet;
  249. $attributeSubSetTmp = array_reverse($attributeSubSetTmp);
  250. array_pop($attributeSubSetTmp);
  251. $attributeSubSetTmp = array_reverse($attributeSubSetTmp);
  252. $attributeSubSet[1] = join('=', $attributeSubSetTmp);
  253. }
  254. list($attributeSubSet[0]) = explode(' ', $attributeSubSet[0]);
  255. if (!eregi("^[a-z]*$",$attributeSubSet[0]) || substr($attributeSubSet[0], 0, 2) == 'on') {
  256. continue;
  257. }
  258. if ($attributeSubSet[1]) {
  259. $attributeSubSet[1] = str_replace('&#', '', $attributeSubSet[1]);
  260. $attributeSubSet[1] = preg_replace('/\s+/', '', $attributeSubSet[1]);
  261. $attributeSubSet[1] = str_replace('"', '', $attributeSubSet[1]);
  262. if ((substr($attributeSubSet[1], 0, 1) == "'") && (substr($attributeSubSet[1], (strlen($attributeSubSet[1]) - 1), 1) == "'")) {
  263. $attributeSubSet[1] = substr($attributeSubSet[1], 1, (strlen($attributeSubSet[1]) - 2));
  264. }
  265. $attributeSubSet[1] = stripslashes($attributeSubSet[1]);
  266. }
  267. if (((strpos(strtolower($attributeSubSet[1]), 'expression') !== false) && (strtolower($attributeSubSet[0]) == 'style')) || $this->__checkPos($attributeSubSet[1])) {
  268. continue;
  269. }
  270. $attributeFound = in_array(strtolower($attributeSubSet[0]), $this->attributesArray);
  271. if (!$this->__postFilter($tag, strtolower($attributeSubSet[0]), $attributeSubSet[1])) {
  272. return false;
  273. }
  274. if ($attributeFound) {
  275. if ($attributeSubSet[1]) {
  276. $newAttrSet[] = $attributeSubSet[0] . '="' . $attributeSubSet[1] . '"';
  277. } elseif ($attributeSubSet[1] == "0") {
  278. $newAttrSet[] = $attributeSubSet[0] . '="0"';
  279. } else {
  280. $newAttrSet[] = $attributeSubSet[0] . '="' . $attributeSubSet[0] . '"';
  281. }
  282. }
  283. }
  284. $attributeSet = $newAttrSet;
  285. return true;
  286. }
  287. /**
  288. * Check pos
  289. *
  290. * @param string $attrval
  291. * @return void
  292. */
  293. function __checkPos($attrval) {
  294. $checkList = array('javascript:', 'behaviour:', 'vbscript:', 'mocha:', 'livescript:');
  295. $result = false;
  296. foreach ($checkList as $check) {
  297. $result = $result || (strpos(strtolower($attrval), $check) !== false);
  298. }
  299. return $result;
  300. }
  301. /**
  302. * Filter external image links
  303. *
  304. * @param string $tag
  305. * @param string $attribute
  306. * @param string $attributeValue
  307. * @return void
  308. */
  309. function __postFilter($tag, $attribute, &$attributeValue) {
  310. if ($tag == 'img' && $attribute == 'src') {
  311. if (substr($attributeValue, 0, 1) != '/' && strpos($attributeValue, FULL_BASE_URL) === false) {
  312. return false;
  313. } else {
  314. if ($this->replaceImgThumb && preg_match('/(?<path>\/media\/display\/)(?<uuid>[0-9a-z-]{36})/', $attributeValue, $matches)) {
  315. $attributeValue = $matches['path'] . 'thumb/' . $matches['uuid'];
  316. }
  317. }
  318. }
  319. return true;
  320. }
  321. /**
  322. * Replave all image tags
  323. *
  324. * @param string $text
  325. * @param boolean $showVideo
  326. * @return void
  327. */
  328. function replaceAllImageTags($text, $showVideo = true) {
  329. $text = $this->bbcode2js($text, $showVideo);
  330. // while (preg_match('/src="(\/media\/display\/)([0-9a-z-]{36})"/', $text, $matches)) {
  331. // $name = 'src="' . $matches[1] . $matches[2] . '"';
  332. // $newName = 'src="' . $matches[1] . 'thumb/' . $matches[2] . '"';
  333. // $text = str_replace($name, $newName, $text);
  334. // }
  335. return $text;
  336. }
  337. /**
  338. * Convert BBCode to Javascript for video embedding
  339. *
  340. * @param string $text
  341. * @param boolean $show
  342. * @return void
  343. */
  344. function bbcode2js($text, $show = true) {
  345. do {
  346. $oldstring = $text;
  347. $text = $this->__bb2js($text, $show);
  348. } while ($oldstring != $text);
  349. return $text;
  350. }
  351. /**
  352. * BB 2 JS
  353. *
  354. * @param string $text
  355. * @param boolean $show
  356. * @return void
  357. */
  358. function __bb2js($text, $show = true) {
  359. if(preg_match('/\[googlevideo\]/', $text)) {
  360. $vid = null;
  361. if (preg_match('/(?:docid=)([-a-z0-9]+)/i', $text, $found)) {
  362. if (isset($found[1])) {
  363. $vid = $found[1];
  364. }
  365. }
  366. if ($vid) {
  367. $this->Html->script('vipers-video-quicktags', array('inline' => false));
  368. $this->Html->scriptBlock('vvq_googlevideo("vvq_' . $vid . '", "325", "265", "' . $vid . '");', array('inline' => false), true);
  369. $content = "<p id=\"vvq_$vid\">";
  370. $content .= '<a href="http://video.google.com/videoplay?docid=' . $vid .'">';
  371. $content .= 'http://video.google.com/videoplay?docid=' . $vid . '</a></p><br />';
  372. if (!$show) {
  373. $content = '';
  374. }
  375. $text = str_replace('[googlevideo]http://video.google.com/videoplay?docid=' . $vid . '[/googlevideo]', $content, $text);
  376. } else {
  377. $start = strpos($text, '[googlevideo]');
  378. $endStr = '[/googlevideo]';
  379. $end = strpos($text, $endStr, $start) + strlen($endStr);
  380. $text = substr($text, 0, $start) . substr($text, $end);
  381. }
  382. } elseif (preg_match('/\[youtubevideo\]/', $text)) {
  383. $vid = null;
  384. if (preg_match('/(?:v=)([-_a-z0-9]+)/i', $text, $found)) {
  385. if (isset($found[1])) {
  386. $vid = $found[1];
  387. }
  388. }
  389. if ($vid) {
  390. $this->Html->script('vipers-video-quicktags', array('inline' => false));
  391. $this->Html->scriptBlock('vvq_youtube("vvq_' . $vid . '", "325", "271", "' . $vid . '");', array('inline' => false), true);
  392. $content = "<p id=\"vvq_$vid\">";
  393. $content .= '<a href="http://www.youtube.com/watch?v=' . $vid . '">';
  394. $content .= 'http://www.youtube.com/watch?v=' . $vid . '</a></p><br />';
  395. if (!$show) {
  396. $content = '';
  397. }
  398. $text = str_replace('[youtubevideo]http://www.youtube.com/watch?v=' . $vid . '[/youtubevideo]', $content, $text);
  399. } else {
  400. $start = strpos($text, '[youtubevideo]');
  401. $endStr = '[/youtubevideo]';
  402. $end = strpos($text, $endStr, $start) + strlen($endStr);
  403. $text = substr($text, 0, $start) . substr($text, $end);
  404. }
  405. } elseif (preg_match('/\[breakvideo\]/', $text)) {
  406. $vid = null;
  407. if (preg_match('/\/([a-zA-Z0-9]+)(\[)/', $text, $found)) {
  408. if (isset($found[1])) {
  409. $vid = $found[1];
  410. }
  411. }
  412. if ($vid) {
  413. $content = '<object width="464" height="392"><param name="movie" value="http://embed.break.com/' . $vid . '"></param><param name="allowScriptAccess" value="always"></param><embed src="http://embed.break.com/' . $vid . '" type="application/x-shockwave-flash" allowScriptAccess=always width="464" height="392"></embed></object>';
  414. if (!$show) {
  415. $content = '';
  416. }
  417. $text = str_replace('[breakvideo]http://embed.break.com/' . $vid . '[/breakvideo]', $content, $text);
  418. } else {
  419. $start = strpos($text, '[breakvideo]');
  420. $endStr = '[/breakvideo]';
  421. $end = strpos($text, $endStr, $start) + strlen($endStr);
  422. $text = substr($text, 0, $start) . substr($text, $end);
  423. }
  424. }
  425. return $text;
  426. }
  427. }