PageRenderTime 48ms CodeModel.GetById 19ms RepoModel.GetById 0ms app.codeStats 0ms

/View/Helper/CleanerHelper.php

https://github.com/bartvanremortele/utils
PHP | 452 lines | 307 code | 24 blank | 121 comment | 87 complexity | 60c17c7b0c7f5356024433eeb686687a MD5 | raw file
Possible License(s): MIT
  1. <?php
  2. /**
  3. * Copyright 2007-2010, Cake Development Corporation (http://cakedc.com)
  4. *
  5. * Licensed under The MIT License
  6. * Redistributions of files must retain the above copyright notice.
  7. *
  8. * @copyright Copyright 2007-2010, Cake Development Corporation (http://cakedc.com)
  9. * @license MIT License (http://www.opensource.org/licenses/mit-license.php)
  10. */
  11. /**
  12. * Utils Plugin
  13. *
  14. * Utils Cleaner Helper
  15. *
  16. * @package utils
  17. * @subpackage utils.views.cleaner
  18. */
  19. App::uses('AppHelper', 'View/Helper');
  20. class CleanerHelper extends AppHelper {
  21. /**
  22. * Other helpers
  23. *
  24. * @var array
  25. */
  26. public $helpers = array('Html');
  27. /**
  28. * Replace image thumb
  29. *
  30. * @var boolean $replaceImgThumb
  31. */
  32. public $replaceImgThumb = false;
  33. /**
  34. * Tags
  35. *
  36. * @var array $tagsArray
  37. */
  38. public $tagsArray = array();
  39. /**
  40. * Attributes
  41. *
  42. * @var array $attributesArray
  43. */
  44. public $attributesArray = array();
  45. /**
  46. * Holds different configurations
  47. *
  48. * @var array $config
  49. */
  50. public $config = array(
  51. 'full' => array(
  52. 'replaceImgThumb' => false,
  53. 'tagsArray' => array('pre', 'br', 'p', 'strong', 'em', 'ul', 'ol', 'li', 'dl', 'dd', 'dt', 'a', 'img', 'i', 'u', 'b'),
  54. 'attributesArray' => array('lang', 'src', 'href', 'title', 'alt', 'width', 'height')),
  55. 'mini' => array(
  56. 'replaceImgThumb' => true,
  57. 'tagsArray' => array('br', 'p', 'strong', 'em', 'ul', 'ol', 'li', 'dl', 'dd', 'dt', 'a', 'img', 'i', 'u', 'b'),
  58. 'attributesArray' => array('src', 'href', 'title', 'alt')),
  59. 'small' => array(
  60. 'replaceImgThumb' => false,
  61. 'tagsArray' => array('img'),
  62. 'attributesArray' => array('src', 'href', 'title'))
  63. );
  64. /**
  65. * Constructor
  66. *
  67. */
  68. public function __construct(View $View, $settings = array()) {
  69. $this->View = $View;
  70. foreach ($this->config['full'] as $key => $value) {
  71. $this->{$key} = $value;
  72. }
  73. return parent::__construct($View, $settings);
  74. }
  75. /**
  76. * Configuration of cleaner. possible to call separately or from clean method
  77. *
  78. * @param string $options
  79. * @return void
  80. */
  81. public function configure($options) {
  82. if (is_null($options)) {
  83. return;
  84. //$options = 'full';
  85. }
  86. if (is_string($options) && isset($this->config[$options])) {
  87. foreach ($this->config[$options] as $key => $value) {
  88. $this->{$key} = $value;
  89. }
  90. } else {
  91. if (isset($options['tagsArray']) && is_array($options['tagsArray'])) {
  92. $this->tagsArray = array_map('strtolower', $options['tagsArray']);
  93. }
  94. if (isset($options['attributesArray']) && is_array($options['attributesArray'])) {
  95. $this->attributesArray = array_map('strtolower', $options['attributesArray']);
  96. }
  97. if (isset($options['replaceImgThumb']) && is_bool($options['replaceImgThumb'])) {
  98. $this->replaceImgThumb = $options['replaceImgThumb'];
  99. }
  100. }
  101. }
  102. /**
  103. * Main clean method
  104. *
  105. * @param string $data
  106. * @param mixed $options String for config or array to set custom options
  107. * @return string
  108. */
  109. public function clean($data, $options = null) {
  110. $this->configure($options);
  111. $cleaned = $data;
  112. // disable call to Helper::clean because it lead to the broken texts
  113. // $cleaned = parent::clean($data);
  114. if (is_array($cleaned)) {
  115. foreach($cleaned as $key => $value) {
  116. if (is_string($value)) {
  117. $cleaned[$key] = $this->__remove($value);
  118. }
  119. }
  120. return $cleaned;
  121. } else if (is_string($cleaned)) {
  122. return $this->__remove($cleaned);
  123. } else {
  124. return $cleaned;
  125. }
  126. }
  127. /**
  128. * Remove all unwanted tags and attributes.
  129. *
  130. * @param string $cleaned
  131. * @return void
  132. */
  133. function __remove($cleaned) {
  134. do {
  135. $oldstring = $cleaned;
  136. $cleaned = $this->__tagsFilter($cleaned);
  137. } while ($oldstring != $cleaned);
  138. return $cleaned;
  139. }
  140. /**
  141. * Strip a string of certain tags
  142. *
  143. * @param string $cleaned
  144. * @return void
  145. */
  146. function __tagsFilter($cleaned) {
  147. $beforeTag = NULL;
  148. $afterTag = $cleaned;
  149. $tagOpenStart = strpos($cleaned, '<');
  150. while($tagOpenStart !== false) {
  151. $beforeTag .= substr($afterTag, 0, $tagOpenStart);
  152. $afterTag = substr($afterTag, $tagOpenStart);
  153. $fromTagOpen = substr($afterTag, 1);
  154. $tagOpenEnd = strpos($fromTagOpen, '>');
  155. if ($tagOpenEnd === false) {
  156. break;
  157. }
  158. $tagOpenNested = strpos($fromTagOpen, '<');
  159. if (($tagOpenNested !== false) && ($tagOpenNested < $tagOpenEnd)) {
  160. $beforeTag .= substr($afterTag, 0, ($tagOpenNested+1));
  161. $afterTag = substr($afterTag, ($tagOpenNested+1));
  162. $tagOpenStart = strpos($afterTag, '<');
  163. continue;
  164. }
  165. $tagOpenNested = (strpos($fromTagOpen, '<') + $tagOpenStart + 1);
  166. $currentTag = substr($fromTagOpen, 0, $tagOpenEnd);
  167. $tagLength = strlen($currentTag);
  168. if (!$tagOpenEnd) {
  169. $beforeTag .= $afterTag;
  170. $tagOpenStart = strpos($afterTag, '<');
  171. }
  172. $tagLeft = $currentTag;
  173. $attributeSet = array();
  174. $currentSpace = strpos($tagLeft, ' ');
  175. if (substr($currentTag, 0, 1) == "/") {
  176. $isCloseTag = true;
  177. list($tagName) = explode(' ', $currentTag);
  178. $tagName = substr($tagName, 1);
  179. } else {
  180. $isCloseTag = false;
  181. list($tagName) = explode(' ', $currentTag);
  182. }
  183. if ((!preg_match("/^[a-z][a-z0-9]*$/i",$tagName)) || (!$tagName)) {
  184. $afterTag = substr($afterTag, ($tagLength + 2));
  185. $tagOpenStart = strpos($afterTag, '<');
  186. continue;
  187. }
  188. while ($currentSpace !== false) {
  189. $fromSpace = substr($tagLeft, ($currentSpace + 1));
  190. $nextSpace = strpos($fromSpace, ' ');
  191. $openQuotes = strpos($fromSpace, '"');
  192. $closeQuotes = strpos(substr($fromSpace, ($openQuotes + 1)), '"') + $openQuotes + 1;
  193. if (strpos($fromSpace, '=') !== false) {
  194. if (($openQuotes !== false) && (strpos(substr($fromSpace, ($openQuotes+1)), '"') !== false)) {
  195. $attribute = substr($fromSpace, 0, ($closeQuotes + 1));
  196. }
  197. else {
  198. $attribute = substr($fromSpace, 0, $nextSpace);
  199. }
  200. } else {
  201. $attribute = substr($fromSpace, 0, $nextSpace);
  202. }
  203. if (!$attribute) {
  204. $attribute = $fromSpace;
  205. }
  206. $attributeSet[] = $attribute;
  207. $tagLeft = substr($fromSpace, strlen($attribute));
  208. $currentSpace = strpos($tagLeft, ' ');
  209. }
  210. $tagFound = in_array(strtolower($tagName), $this->tagsArray);
  211. if ($tagFound) {
  212. if (!$isCloseTag) {
  213. if ($this->__filterAttr($attributeSet, strtolower($tagName))) {
  214. $beforeTag .= '<' . $tagName;
  215. for ($i = 0; $i < count($attributeSet); $i++) {
  216. $beforeTag .= ' ' . $attributeSet[$i];
  217. }
  218. if (strpos($fromTagOpen, "</" . $tagName)) {
  219. $beforeTag .= '>';
  220. } else {
  221. $beforeTag .= ' />';
  222. }
  223. }
  224. } else {
  225. $beforeTag .= '</' . $tagName . '>';
  226. }
  227. }
  228. $afterTag = substr($afterTag, ($tagLength + 2));
  229. $tagOpenStart = strpos($afterTag, '<');
  230. }
  231. $beforeTag .= $afterTag;
  232. return $beforeTag;
  233. }
  234. /**
  235. * Strip a tag of certain attributes
  236. *
  237. * @param string $attributeSet
  238. * @param string $tag
  239. * @return void
  240. */
  241. function __filterAttr(&$attributeSet, $tag) {
  242. $newAttrSet = array();
  243. for ($i = 0; $i <count($attributeSet); $i++) {
  244. if (!$attributeSet[$i]) {
  245. continue;
  246. }
  247. $attributeSubSet = explode('=', trim($attributeSet[$i]));
  248. if (count($attributeSubSet)>2) {
  249. $attributeSubSetTmp = $attributeSubSet;
  250. $attributeSubSetTmp = array_reverse($attributeSubSetTmp);
  251. array_pop($attributeSubSetTmp);
  252. $attributeSubSetTmp = array_reverse($attributeSubSetTmp);
  253. $attributeSubSet[1] = join('=', $attributeSubSetTmp);
  254. }
  255. list($attributeSubSet[0]) = explode(' ', $attributeSubSet[0]);
  256. if (!eregi("^[a-z]*$",$attributeSubSet[0]) || substr($attributeSubSet[0], 0, 2) == 'on') {
  257. continue;
  258. }
  259. if ($attributeSubSet[1]) {
  260. $attributeSubSet[1] = str_replace('&#', '', $attributeSubSet[1]);
  261. $attributeSubSet[1] = preg_replace('/\s+/', '', $attributeSubSet[1]);
  262. $attributeSubSet[1] = str_replace('"', '', $attributeSubSet[1]);
  263. if ((substr($attributeSubSet[1], 0, 1) == "'") && (substr($attributeSubSet[1], (strlen($attributeSubSet[1]) - 1), 1) == "'")) {
  264. $attributeSubSet[1] = substr($attributeSubSet[1], 1, (strlen($attributeSubSet[1]) - 2));
  265. }
  266. $attributeSubSet[1] = stripslashes($attributeSubSet[1]);
  267. }
  268. if (((strpos(strtolower($attributeSubSet[1]), 'expression') !== false) && (strtolower($attributeSubSet[0]) == 'style')) || $this->__checkPos($attributeSubSet[1])) {
  269. continue;
  270. }
  271. $attributeFound = in_array(strtolower($attributeSubSet[0]), $this->attributesArray);
  272. if (!$this->__postFilter($tag, strtolower($attributeSubSet[0]), $attributeSubSet[1])) {
  273. return false;
  274. }
  275. if ($attributeFound) {
  276. if ($attributeSubSet[1]) {
  277. $newAttrSet[] = $attributeSubSet[0] . '="' . $attributeSubSet[1] . '"';
  278. } elseif ($attributeSubSet[1] == "0") {
  279. $newAttrSet[] = $attributeSubSet[0] . '="0"';
  280. } else {
  281. $newAttrSet[] = $attributeSubSet[0] . '="' . $attributeSubSet[0] . '"';
  282. }
  283. }
  284. }
  285. $attributeSet = $newAttrSet;
  286. return true;
  287. }
  288. /**
  289. * Check pos
  290. *
  291. * @param string $attrval
  292. * @return void
  293. */
  294. function __checkPos($attrval) {
  295. $checkList = array('javascript:', 'behaviour:', 'vbscript:', 'mocha:', 'livescript:');
  296. $result = false;
  297. foreach ($checkList as $check) {
  298. $result = $result || (strpos(strtolower($attrval), $check) !== false);
  299. }
  300. return $result;
  301. }
  302. /**
  303. * Filter external image links
  304. *
  305. * @param string $tag
  306. * @param string $attribute
  307. * @param string $attributeValue
  308. * @return void
  309. */
  310. function __postFilter($tag, $attribute, &$attributeValue) {
  311. if ($tag == 'img' && $attribute == 'src') {
  312. if (substr($attributeValue, 0, 1) != '/' && strpos($attributeValue, FULL_BASE_URL) === false) {
  313. return false;
  314. } else {
  315. if ($this->replaceImgThumb && preg_match('/(?<path>\/media\/display\/)(?<uuid>[0-9a-z-]{36})/', $attributeValue, $matches)) {
  316. $attributeValue = $matches['path'] . 'thumb/' . $matches['uuid'];
  317. }
  318. }
  319. }
  320. return true;
  321. }
  322. /**
  323. * Replave all image tags
  324. *
  325. * @param string $text
  326. * @param boolean $showVideo
  327. * @return void
  328. */
  329. function replaceAllImageTags($text, $showVideo = true) {
  330. $text = $this->bbcode2js($text, $showVideo);
  331. // while (preg_match('/src="(\/media\/display\/)([0-9a-z-]{36})"/', $text, $matches)) {
  332. // $name = 'src="' . $matches[1] . $matches[2] . '"';
  333. // $newName = 'src="' . $matches[1] . 'thumb/' . $matches[2] . '"';
  334. // $text = str_replace($name, $newName, $text);
  335. // }
  336. return $text;
  337. }
  338. /**
  339. * Convert BBCode to Javascript for video embedding
  340. *
  341. * @param string $text
  342. * @param boolean $show
  343. * @return void
  344. */
  345. function bbcode2js($text, $show = true) {
  346. do {
  347. $oldstring = $text;
  348. $text = $this->__bb2js($text, $show);
  349. } while ($oldstring != $text);
  350. return $text;
  351. }
  352. /**
  353. * BB 2 JS
  354. *
  355. * @param string $text
  356. * @param boolean $show
  357. * @return void
  358. */
  359. function __bb2js($text, $show = true) {
  360. if(preg_match('/\[googlevideo\]/', $text)) {
  361. $vid = null;
  362. if (preg_match('/(?:docid=)([-a-z0-9]+)/i', $text, $found)) {
  363. if (isset($found[1])) {
  364. $vid = $found[1];
  365. }
  366. }
  367. if ($vid) {
  368. $this->Html->script('vipers-video-quicktags', array('inline' => false));
  369. $this->Html->scriptBlock('vvq_googlevideo("vvq_' . $vid . '", "325", "265", "' . $vid . '");', array('inline' => false), true);
  370. $content = "<p id=\"vvq_$vid\">";
  371. $content .= '<a href="http://video.google.com/videoplay?docid=' . $vid .'">';
  372. $content .= 'http://video.google.com/videoplay?docid=' . $vid . '</a></p><br />';
  373. if (!$show) {
  374. $content = '';
  375. }
  376. $text = str_replace('[googlevideo]http://video.google.com/videoplay?docid=' . $vid . '[/googlevideo]', $content, $text);
  377. } else {
  378. $start = strpos($text, '[googlevideo]');
  379. $endStr = '[/googlevideo]';
  380. $end = strpos($text, $endStr, $start) + strlen($endStr);
  381. $text = substr($text, 0, $start) . substr($text, $end);
  382. }
  383. } elseif (preg_match('/\[youtubevideo\]/', $text)) {
  384. $vid = null;
  385. if (preg_match('/(?:v=)([-_a-z0-9]+)/i', $text, $found)) {
  386. if (isset($found[1])) {
  387. $vid = $found[1];
  388. }
  389. }
  390. if ($vid) {
  391. $this->Html->script('vipers-video-quicktags', array('inline' => false));
  392. $this->Html->scriptBlock('vvq_youtube("vvq_' . $vid . '", "325", "271", "' . $vid . '");', array('inline' => false), true);
  393. $content = "<p id=\"vvq_$vid\">";
  394. $content .= '<a href="http://www.youtube.com/watch?v=' . $vid . '">';
  395. $content .= 'http://www.youtube.com/watch?v=' . $vid . '</a></p><br />';
  396. if (!$show) {
  397. $content = '';
  398. }
  399. $text = str_replace('[youtubevideo]http://www.youtube.com/watch?v=' . $vid . '[/youtubevideo]', $content, $text);
  400. } else {
  401. $start = strpos($text, '[youtubevideo]');
  402. $endStr = '[/youtubevideo]';
  403. $end = strpos($text, $endStr, $start) + strlen($endStr);
  404. $text = substr($text, 0, $start) . substr($text, $end);
  405. }
  406. } elseif (preg_match('/\[breakvideo\]/', $text)) {
  407. $vid = null;
  408. if (preg_match('/\/([a-zA-Z0-9]+)(\[)/', $text, $found)) {
  409. if (isset($found[1])) {
  410. $vid = $found[1];
  411. }
  412. }
  413. if ($vid) {
  414. $content = '<object width="464" height="392"><param name="movie" value="http://embed.break.com/' . $vid . '"></param><param name="allowScriptAccess" value="always"></param><embed src="http://embed.break.com/' . $vid . '" type="application/x-shockwave-flash" allowScriptAccess=always width="464" height="392"></embed></object>';
  415. if (!$show) {
  416. $content = '';
  417. }
  418. $text = str_replace('[breakvideo]http://embed.break.com/' . $vid . '[/breakvideo]', $content, $text);
  419. } else {
  420. $start = strpos($text, '[breakvideo]');
  421. $endStr = '[/breakvideo]';
  422. $end = strpos($text, $endStr, $start) + strlen($endStr);
  423. $text = substr($text, 0, $start) . substr($text, $end);
  424. }
  425. }
  426. return $text;
  427. }
  428. }