PageRenderTime 30ms CodeModel.GetById 0ms RepoModel.GetById 0ms app.codeStats 0ms

/View/Helper/CleanerHelper.php

http://github.com/CakeDC/comments
PHP | 444 lines | 306 code | 21 blank | 117 comment | 87 complexity | 076617b1f5fd30c6fa13c261773710e3 MD5 | raw file
  1. <?php
  2. /**
  3. * Copyright 2009 - 2013, Cake Development Corporation (http://cakedc.com)
  4. *
  5. * Licensed under The MIT License
  6. * Redistributions of files must retain the above copyright notice.
  7. *
  8. * @copyright Copyright 2009 - 2013, Cake Development Corporation (http://cakedc.com)
  9. * @license MIT License (http://www.opensource.org/licenses/mit-license.php)
  10. */
  11. App::uses('AppHelper', 'View/Helper');
  12. /**
  13. * Cleaner Helper
  14. *
  15. * @package comments
  16. * @subpackage comments.views.helpers
  17. */
  18. class CleanerHelper extends AppHelper {
  19. /**
  20. * Other helpers
  21. *
  22. * @var array
  23. */
  24. public $helpers = array('Javascript');
  25. /**
  26. * Replace image thumb
  27. *
  28. * @var boolean $replaceImgThumb
  29. */
  30. public $replaceImgThumb = false;
  31. /**
  32. * Tags
  33. *
  34. * @var array $tagsArray
  35. */
  36. public $tagsArray = array();
  37. /**
  38. * Attributes
  39. *
  40. * @var array $attributesArray
  41. */
  42. public $attributesArray = array();
  43. /**
  44. * Holds different configurations
  45. *
  46. * @var array $config
  47. */
  48. public $config = array(
  49. 'full' => array(
  50. 'replaceImgThumb' => false,
  51. 'tagsArray' => array('pre', 'br', 'p', 'strong', 'em', 'ul', 'ol', 'li', 'dl', 'dd', 'dt', 'a', 'img', 'i', 'u', 'b'),
  52. 'attributesArray' => array('lang', 'src', 'href', 'title', 'alt', 'width', 'height')),
  53. 'mini' => array(
  54. 'replaceImgThumb' => true,
  55. 'tagsArray' => array('br', 'p', 'strong', 'em', 'ul', 'ol', 'li', 'dl', 'dd', 'dt', 'a', 'img', 'i', 'u', 'b'),
  56. 'attributesArray' => array('src', 'href', 'title', 'alt')),
  57. 'small' => array(
  58. 'replaceImgThumb' => false,
  59. 'tagsArray' => array('img'),
  60. 'attributesArray' => array('src', 'href', 'title'))
  61. );
  62. /**
  63. * Constructor
  64. *
  65. */
  66. public function __contruct() {
  67. foreach ($this->config['full'] as $key => $value) {
  68. $this->{$key} = $value;
  69. }
  70. return parent::__construct();
  71. }
  72. /**
  73. * Configuration of cleaner. possible to call separately or from clean method
  74. *
  75. * @param array $options
  76. */
  77. public function configure($options) {
  78. if (is_null($options)) {
  79. return;
  80. //$options = 'full';
  81. }
  82. if (is_string($options) && isset($this->config[$options])) {
  83. foreach ($this->config[$options] as $key => $value) {
  84. $this->{$key} = $value;
  85. }
  86. } else {
  87. if (isset($options['tagsArray']) && is_array($options['tagsArray'])) {
  88. $this->tagsArray = array_map('strtolower', $options['tagsArray']);
  89. }
  90. if (isset($options['attributesArray']) && is_array($options['attributesArray'])) {
  91. $this->attributesArray = array_map('strtolower', $options['attributesArray']);
  92. }
  93. if (isset($options['replaceImgThumb']) && is_bool($options['replaceImgThumb'])) {
  94. $this->replaceImgThumb = $options['replaceImgThumb'];
  95. }
  96. }
  97. }
  98. /**
  99. * Main clean method
  100. *
  101. * @param string $data
  102. * @param mixed $options String for config or array to set custom options
  103. */
  104. public function clean($data, $options = null) {
  105. $this->configure($options);
  106. $cleaned = $data;
  107. // disable call to Helper::clean because it lead to the broken texts
  108. // $cleaned = parent::clean($data);
  109. if (is_array($cleaned)) {
  110. foreach($cleaned as $key => $value) {
  111. if (is_string($value)) {
  112. $cleaned[$key] = $this->__remove($value);
  113. }
  114. }
  115. return $cleaned;
  116. } else if (is_string($cleaned)) {
  117. return $this->__remove($cleaned);
  118. } else {
  119. return $cleaned;
  120. }
  121. }
  122. /**
  123. * Iteratively remove all unwanted tags and attributes
  124. *
  125. * @param string $cleaned
  126. * @return string
  127. */
  128. function __remove($cleaned) {
  129. do {
  130. $oldstring = $cleaned;
  131. $cleaned = $this->__tagsFilter($cleaned);
  132. } while ($oldstring != $cleaned);
  133. return $cleaned;
  134. }
  135. /**
  136. * Strip a string of certain tags
  137. *
  138. * @param string $cleaned
  139. * @return string
  140. */
  141. function __tagsFilter($cleaned) {
  142. $beforeTag = NULL;
  143. $afterTag = $cleaned;
  144. $tagOpenStart = strpos($cleaned, '<');
  145. while($tagOpenStart !== false) {
  146. $beforeTag .= substr($afterTag, 0, $tagOpenStart);
  147. $afterTag = substr($afterTag, $tagOpenStart);
  148. $fromTagOpen = substr($afterTag, 1);
  149. $tagOpenEnd = strpos($fromTagOpen, '>');
  150. if ($tagOpenEnd === false) {
  151. break;
  152. }
  153. $tagOpenNested = strpos($fromTagOpen, '<');
  154. if (($tagOpenNested !== false) && ($tagOpenNested < $tagOpenEnd)) {
  155. $beforeTag .= substr($afterTag, 0, ($tagOpenNested+1));
  156. $afterTag = substr($afterTag, ($tagOpenNested+1));
  157. $tagOpenStart = strpos($afterTag, '<');
  158. continue;
  159. }
  160. $tagOpenNested = (strpos($fromTagOpen, '<') + $tagOpenStart + 1);
  161. $currentTag = substr($fromTagOpen, 0, $tagOpenEnd);
  162. $tagLength = strlen($currentTag);
  163. if (!$tagOpenEnd) {
  164. $beforeTag .= $afterTag;
  165. $tagOpenStart = strpos($afterTag, '<');
  166. }
  167. $tagLeft = $currentTag;
  168. $attributeSet = array();
  169. $currentSpace = strpos($tagLeft, ' ');
  170. if (substr($currentTag, 0, 1) == "/") {
  171. $isCloseTag = true;
  172. list($tagName) = explode(' ', $currentTag);
  173. $tagName = substr($tagName, 1);
  174. } else {
  175. $isCloseTag = false;
  176. list($tagName) = explode(' ', $currentTag);
  177. }
  178. if ((!preg_match("/^[a-z][a-z0-9]*$/i",$tagName)) || (!$tagName)) {
  179. $afterTag = substr($afterTag, ($tagLength + 2));
  180. $tagOpenStart = strpos($afterTag, '<');
  181. continue;
  182. }
  183. while ($currentSpace !== false) {
  184. $fromSpace = substr($tagLeft, ($currentSpace + 1));
  185. $nextSpace = strpos($fromSpace, ' ');
  186. $openQuotes = strpos($fromSpace, '"');
  187. $closeQuotes = strpos(substr($fromSpace, ($openQuotes + 1)), '"') + $openQuotes + 1;
  188. if (strpos($fromSpace, '=') !== false) {
  189. if (($openQuotes !== false) && (strpos(substr($fromSpace, ($openQuotes+1)), '"') !== false)) {
  190. $attribute = substr($fromSpace, 0, ($closeQuotes + 1));
  191. }
  192. else {
  193. $attribute = substr($fromSpace, 0, $nextSpace);
  194. }
  195. } else {
  196. $attribute = substr($fromSpace, 0, $nextSpace);
  197. }
  198. if (!$attribute) {
  199. $attribute = $fromSpace;
  200. }
  201. $attributeSet[] = $attribute;
  202. $tagLeft = substr($fromSpace, strlen($attribute));
  203. $currentSpace = strpos($tagLeft, ' ');
  204. }
  205. $tagFound = in_array(strtolower($tagName), $this->tagsArray);
  206. if ($tagFound) {
  207. if (!$isCloseTag) {
  208. if ($this->__filterAttr($attributeSet, strtolower($tagName))) {
  209. $beforeTag .= '<' . $tagName;
  210. for ($i = 0; $i < count($attributeSet); $i++) {
  211. $beforeTag .= ' ' . $attributeSet[$i];
  212. }
  213. if (strpos($fromTagOpen, "</" . $tagName)) {
  214. $beforeTag .= '>';
  215. } else {
  216. $beforeTag .= ' />';
  217. }
  218. }
  219. } else {
  220. $beforeTag .= '</' . $tagName . '>';
  221. }
  222. }
  223. $afterTag = substr($afterTag, ($tagLength + 2));
  224. $tagOpenStart = strpos($afterTag, '<');
  225. }
  226. $beforeTag .= $afterTag;
  227. return $beforeTag;
  228. }
  229. /**
  230. * strip a tag of certain attributes
  231. *
  232. * @param string $attributeSet
  233. * @param string $tag
  234. * @return string
  235. */
  236. function __filterAttr(&$attributeSet, $tag) {
  237. $newAttrSet = array();
  238. for ($i = 0; $i <count($attributeSet); $i++) {
  239. if (!$attributeSet[$i]) {
  240. continue;
  241. }
  242. $attributeSubSet = explode('=', trim($attributeSet[$i]));
  243. if (count($attributeSubSet)>2) {
  244. $attributeSubSetTmp = $attributeSubSet;
  245. $attributeSubSetTmp = array_reverse($attributeSubSetTmp);
  246. array_pop($attributeSubSetTmp);
  247. $attributeSubSetTmp = array_reverse($attributeSubSetTmp);
  248. $attributeSubSet[1] = join('=', $attributeSubSetTmp);
  249. }
  250. list($attributeSubSet[0]) = explode(' ', $attributeSubSet[0]);
  251. if (!eregi("^[a-z]*$",$attributeSubSet[0]) || substr($attributeSubSet[0], 0, 2) == 'on') {
  252. continue;
  253. }
  254. if ($attributeSubSet[1]) {
  255. $attributeSubSet[1] = str_replace('&#', '', $attributeSubSet[1]);
  256. $attributeSubSet[1] = preg_replace('/\s+/', '', $attributeSubSet[1]);
  257. $attributeSubSet[1] = str_replace('"', '', $attributeSubSet[1]);
  258. if ((substr($attributeSubSet[1], 0, 1) == "'") && (substr($attributeSubSet[1], (strlen($attributeSubSet[1]) - 1), 1) == "'")) {
  259. $attributeSubSet[1] = substr($attributeSubSet[1], 1, (strlen($attributeSubSet[1]) - 2));
  260. }
  261. $attributeSubSet[1] = stripslashes($attributeSubSet[1]);
  262. }
  263. if (((strpos(strtolower($attributeSubSet[1]), 'expression') !== false) && (strtolower($attributeSubSet[0]) == 'style')) || $this->__checkPos($attributeSubSet[1])) {
  264. continue;
  265. }
  266. $attributeFound = in_array(strtolower($attributeSubSet[0]), $this->attributesArray);
  267. if (!$this->__postFilter($tag, strtolower($attributeSubSet[0]), $attributeSubSet[1])) {
  268. return false;
  269. }
  270. if ($attributeFound) {
  271. if ($attributeSubSet[1]) {
  272. $newAttrSet[] = $attributeSubSet[0] . '="' . $attributeSubSet[1] . '"';
  273. } elseif ($attributeSubSet[1] == "0") {
  274. $newAttrSet[] = $attributeSubSet[0] . '="0"';
  275. } else {
  276. $newAttrSet[] = $attributeSubSet[0] . '="' . $attributeSubSet[0] . '"';
  277. }
  278. }
  279. }
  280. $attributeSet = $newAttrSet;
  281. return true;
  282. }
  283. /**
  284. * Check pos
  285. *
  286. * @param string $attrval
  287. * @return boolean
  288. */
  289. function __checkPos($attrval) {
  290. $checkList = array('javascript:', 'behaviour:', 'vbscript:', 'mocha:', 'livescript:');
  291. $result = false;
  292. foreach ($checkList as $check) {
  293. $result = $result || (strpos(strtolower($attrval), $check) !== false);
  294. }
  295. return $result;
  296. }
  297. /**
  298. * filter external image links
  299. *
  300. * @param string $tag
  301. * @param string $attribute
  302. * @param string $attributeValue
  303. * @return boolean
  304. */
  305. function __postFilter($tag, $attribute, &$attributeValue) {
  306. if ($tag == 'img' && $attribute == 'src') {
  307. if (substr($attributeValue, 0, 1) != '/' && strpos($attributeValue, FULL_BASE_URL) === false) {
  308. return false;
  309. } else {
  310. if ($this->replaceImgThumb && preg_match('/(?<path>\/media\/display\/)(?<uuid>[0-9a-z-]{36})/', $attributeValue, $matches)) {
  311. $attributeValue = $matches['path'] . 'thumb/' . $matches['uuid'];
  312. }
  313. }
  314. }
  315. return true;
  316. }
  317. /**
  318. * Replace All Image Tags
  319. *
  320. * @param string $text
  321. * @param string $showVideo
  322. * @return string
  323. */
  324. function replaceAllImageTags($text, $showVideo = true) {
  325. $text = $this->bbcode2js($text, $showVideo);
  326. //while (preg_match('/src="(\/media\/display\/)([0-9a-z-]{36})"/', $text, $matches)) {
  327. // $name = 'src="' . $matches[1] . $matches[2] . '"';
  328. // $newName = 'src="' . $matches[1] . 'thumb/' . $matches[2] . '"';
  329. // $text = str_replace($name, $newName, $text);
  330. // }
  331. return $text;
  332. }
  333. /**
  334. * convert bbcode to javascript for embedding videos
  335. *
  336. * @param string $text
  337. * @param string $show
  338. * @return string
  339. */
  340. function bbcode2js($text, $show = true) {
  341. do {
  342. $oldstring = $text;
  343. $text = $this->__bb2js($text, $show);
  344. } while ($oldstring != $text);
  345. return $text;
  346. }
  347. /**
  348. * BB 2 JS
  349. *
  350. * @param string $text
  351. * @param string $show
  352. * @return string
  353. */
  354. function __bb2js($text, $show = true) {
  355. if(preg_match('/\[googlevideo\]/', $text)) {
  356. $vid = null;
  357. if (preg_match('/(?:docid=)([-a-z0-9]+)/i', $text, $found)) {
  358. if (isset($found[1])) {
  359. $vid = $found[1];
  360. }
  361. }
  362. if ($vid) {
  363. $this->Javascript->link('vipers-video-quicktags', false);
  364. $this->Javascript->codeBlock('vvq_googlevideo("vvq_' . $vid . '", "325", "265", "' . $vid . '");', array('inline' => false), true);
  365. $content = "<p id=\"vvq_$vid\">";
  366. $content .= '<a href="http://video.google.com/videoplay?docid=' . $vid .'">';
  367. $content .= 'http://video.google.com/videoplay?docid=' . $vid . '</a></p><br />';
  368. if (!$show) {
  369. $content = '';
  370. }
  371. $text = str_replace('[googlevideo]http://video.google.com/videoplay?docid=' . $vid . '[/googlevideo]', $content, $text);
  372. } else {
  373. $start = strpos($text, '[googlevideo]');
  374. $endStr = '[/googlevideo]';
  375. $end = strpos($text, $endStr, $start) + strlen($endStr);
  376. $text = substr($text, 0, $start) . substr($text, $end);
  377. }
  378. } elseif (preg_match('/\[youtubevideo\]/', $text)) {
  379. $vid = null;
  380. if (preg_match('/(?:v=)([-_a-z0-9]+)/i', $text, $found)) {
  381. if (isset($found[1])) {
  382. $vid = $found[1];
  383. }
  384. }
  385. if ($vid) {
  386. $this->Javascript->link('vipers-video-quicktags', false);
  387. $this->Javascript->codeBlock('vvq_youtube("vvq_' . $vid . '", "325", "271", "' . $vid . '");', array('inline' => false), true);
  388. $content = "<p id=\"vvq_$vid\">";
  389. $content .= '<a href="http://www.youtube.com/watch?v=' . $vid . '">';
  390. $content .= 'http://www.youtube.com/watch?v=' . $vid . '</a></p><br />';
  391. if (!$show) {
  392. $content = '';
  393. }
  394. $text = str_replace('[youtubevideo]http://www.youtube.com/watch?v=' . $vid . '[/youtubevideo]', $content, $text);
  395. } else {
  396. $start = strpos($text, '[youtubevideo]');
  397. $endStr = '[/youtubevideo]';
  398. $end = strpos($text, $endStr, $start) + strlen($endStr);
  399. $text = substr($text, 0, $start) . substr($text, $end);
  400. }
  401. } elseif (preg_match('/\[breakvideo\]/', $text)) {
  402. $vid = null;
  403. if (preg_match('/\/([a-zA-Z0-9]+)(\[)/', $text, $found)) {
  404. if (isset($found[1])) {
  405. $vid = $found[1];
  406. }
  407. }
  408. if ($vid) {
  409. $content = '<object width="464" height="392"><param name="movie" value="http://embed.break.com/' . $vid . '"></param><param name="allowScriptAccess" value="always"></param><embed src="http://embed.break.com/' . $vid . '" type="application/x-shockwave-flash" allowScriptAccess=always width="464" height="392"></embed></object>';
  410. if (!$show) {
  411. $content = '';
  412. }
  413. $text = str_replace('[breakvideo]http://embed.break.com/' . $vid . '[/breakvideo]', $content, $text);
  414. } else {
  415. $start = strpos($text, '[breakvideo]');
  416. $endStr = '[/breakvideo]';
  417. $end = strpos($text, $endStr, $start) + strlen($endStr);
  418. $text = substr($text, 0, $start) . substr($text, $end);
  419. }
  420. }
  421. return $text;
  422. }
  423. }