PageRenderTime 29ms CodeModel.GetById 17ms RepoModel.GetById 1ms app.codeStats 0ms

/modules/main/classes/general/sanitizer.php

https://gitlab.com/alexprowars/bitrix
PHP | 1079 lines | 803 code | 112 blank | 164 comment | 126 complexity | 98b9af96073776046a552ebd98de6321 MD5 | raw file
  1. <?
  2. IncludeModuleLangFile(__FILE__);
  3. /**
  4. * CBXSanitizer
  5. * Class to cut all tags and attributies from html not contained in white list
  6. *
  7. * Example to use:
  8. * <code>
  9. * $Sanitizer = new CBXSanitizer;
  10. *
  11. * $Sanitizer->SetLevel(CBXSanitizer::SECURE_LEVEL_MIDDLE);
  12. * or
  13. * $Sanitizer->AddTags( array (
  14. * 'a' = > array('href','id','style','alt'...),
  15. * 'br' => array(),
  16. * .... ));
  17. *
  18. * $Sanitizer->SanitizeHtml($html);
  19. * </code>
  20. *
  21. */
  22. class CBXSanitizer
  23. {
  24. /**
  25. * Security levels
  26. */
  27. const SECURE_LEVEL_CUSTOM = 0;
  28. const SECURE_LEVEL_HIGH = 1;
  29. const SECURE_LEVEL_MIDDLE = 2;
  30. const SECURE_LEVEL_LOW = 3;
  31. const TABLE_TOP = 0;
  32. const TABLE_CAPT = 1;
  33. const TABLE_GROUP = 2;
  34. const TABLE_ROWS = 3;
  35. const TABLE_COLS = 4;
  36. const ACTION_DEL = 'del';
  37. const ACTION_ADD = 'add';
  38. const ACTION_DEL_WITH_CONTENT = 'del_with_content';
  39. /**
  40. * @deprecated For compability only will be erased next versions
  41. * @var mixed
  42. */
  43. protected static $arOldTags = array();
  44. protected $arHtmlTags = array();
  45. protected $bHtmlSpecChars = true;
  46. protected $bDelSanitizedTags = true;
  47. protected $bDoubleEncode = true;
  48. protected $secLevel = self::SECURE_LEVEL_HIGH;
  49. protected $additionalAttrs = array();
  50. protected $arNoClose = array(
  51. 'br','hr','img','area','base',
  52. 'basefont','col','frame','input',
  53. 'isindex','link','meta','param'
  54. );
  55. protected $localAlph;
  56. protected $arTableTags = array(
  57. 'table' => self::TABLE_TOP,
  58. 'caption' => self::TABLE_CAPT,
  59. 'thead' => self::TABLE_GROUP,
  60. 'tfoot' => self::TABLE_GROUP,
  61. 'tbody' => self::TABLE_GROUP,
  62. 'tr' => self::TABLE_ROWS,
  63. 'th' => self::TABLE_COLS,
  64. 'td' => self::TABLE_COLS
  65. );
  66. /**
  67. * Tags witch will be cut with their content
  68. * @var array
  69. */
  70. protected $delTagsWithContent = ['script', 'style'];
  71. /**
  72. * CBXSanitizer constructor.
  73. */
  74. public function __construct()
  75. {
  76. if(SITE_CHARSET == "UTF-8")
  77. {
  78. $this->localAlph="\p{L}".GetMessage("SNT_SYMB_NONE_LETTERS");
  79. }
  80. elseif(LANGUAGE_ID != "en")
  81. {
  82. $this->localAlph=GetMessage("SNT_SYMB");
  83. }
  84. else
  85. {
  86. $this->localAlph="";
  87. }
  88. $this->localAlph .= '\\x80-\\xFF';
  89. }
  90. /**
  91. * Allow additional attributes in html.
  92. * @param array $attrs Additional attrs
  93. * Example:
  94. $sanitizer->allowAttributes(array(
  95. 'aria-label' => array(
  96. 'tag' => function($tag)
  97. {
  98. return ($tag == 'div');
  99. },
  100. 'content' => function($value)
  101. {
  102. return !preg_match("#[^\\s\\w\\-\\#\\.;]#i" . BX_UTF_PCRE_MODIFIER, $value);
  103. }
  104. )
  105. ));
  106. * @return void
  107. */
  108. public function allowAttributes(array $attrs)
  109. {
  110. foreach ($attrs as $code => $item)
  111. {
  112. if (
  113. isset($item['tag']) && is_callable($item['tag']) &&
  114. isset($item['content']) && is_callable($item['content'])
  115. )
  116. {
  117. $this->additionalAttrs[$code] = $item;
  118. }
  119. }
  120. }
  121. /**
  122. * Adds HTML tags and attributes to white list
  123. * @param mixed $arTags array('tagName1' = > array('attribute1','attribute2',...), 'tagName2' => ........)
  124. * @return int count of added tags
  125. */
  126. public function AddTags($arTags)
  127. {
  128. if(!is_array($arTags))
  129. return false;
  130. $counter = 0;
  131. $this->secLevel = self::SECURE_LEVEL_CUSTOM;
  132. foreach($arTags as $tagName => $arAttrs)
  133. {
  134. $tagName = mb_strtolower($tagName);
  135. $arAttrs = array_change_key_case($arAttrs, CASE_LOWER);
  136. $this->arHtmlTags[$tagName] = $arAttrs;
  137. $counter++;
  138. }
  139. return $counter;
  140. }
  141. /**
  142. * @see AddTags()
  143. */
  144. public function UpdateTags($arTags)
  145. {
  146. return $this->AddTags($arTags);
  147. }
  148. /**
  149. * Deletes tags from white list
  150. * @param mixed $arTagNames array('tagName1','tagname2',...)
  151. * @return int count of deleted tags
  152. */
  153. public function DelTags($arTagNames)
  154. {
  155. if(!is_array($arTagNames))
  156. return false;
  157. $this->secLevel = self::SECURE_LEVEL_CUSTOM;
  158. $arTmp = array();
  159. $counter = 0;
  160. foreach ($this->arHtmlTags as $tagName => $arAttrs)
  161. foreach ($arTagNames as $delTagName)
  162. if(mb_strtolower($delTagName) != $tagName)
  163. $arTmp[$tagName] = $arAttrs;
  164. else
  165. $counter++;
  166. $this->arHtmlTags = $arTmp;
  167. return $counter;
  168. }
  169. /**
  170. * @param array $arDeleteAttrs
  171. */
  172. public function DeleteAttributes(array $arDeleteAttrs)
  173. {
  174. $this->secLevel = self::SECURE_LEVEL_CUSTOM;
  175. $arResultTags = array();
  176. foreach ($this->arHtmlTags as $tagName => $arAttrs)
  177. {
  178. $arResultTags[$tagName] = array_diff($arAttrs, $arDeleteAttrs);
  179. }
  180. $this->arHtmlTags = $arResultTags;
  181. }
  182. /**
  183. * Deletes all tags from white list
  184. */
  185. public function DelAllTags()
  186. {
  187. $this->secLevel = self::SECURE_LEVEL_CUSTOM;
  188. $this->arHtmlTags = array();
  189. }
  190. /**
  191. * If is turned off Sanitizer will not encode existing html entities,
  192. * in text blocks.
  193. * The default is to convert everything.
  194. * http://php.net/manual/ru/function.htmlspecialchars.php (double_encode)
  195. * @param bool $bApply true|false
  196. */
  197. public function ApplyDoubleEncode($bApply=true)
  198. {
  199. if($bApply)
  200. $this->bDoubleEncode = true;
  201. else
  202. $this->bDoubleEncode = false;
  203. }
  204. /**
  205. * Apply or not function htmlspecialchars to filtered tags and text
  206. * !WARNING! if DeleteSanitizedTags = false and ApplyHtmlSpecChars = false
  207. * html will not be sanitized!
  208. * @param bool $bApply true|false
  209. * @deprecated
  210. */
  211. public function ApplyHtmlSpecChars($bApply=true)
  212. {
  213. if($bApply)
  214. {
  215. $this->bHtmlSpecChars = true;
  216. }
  217. else
  218. {
  219. $this->bHtmlSpecChars = false;
  220. trigger_error('It is strongly not recommended to use \CBXSanitizer::ApplyHtmlSpecChars(false)', E_USER_WARNING);
  221. }
  222. }
  223. /**
  224. * Delete or not filtered tags
  225. * !WARNING! if DeleteSanitizedTags = false and ApplyHtmlSpecChars = false
  226. * html will not be sanitized!
  227. * @param bool $bApply true|false
  228. */
  229. public function DeleteSanitizedTags($bApply=true)
  230. {
  231. if($bApply)
  232. $this->bDelSanitizedTags = true;
  233. else
  234. $this->bDelSanitizedTags = false;
  235. }
  236. /**
  237. * Sets security level from predefined
  238. * @param int $secLevel { CBXSanitizer::SECURE_LEVEL_HIGH
  239. * | CBXSanitizer::SECURE_LEVEL_MIDDLE
  240. * | CBXSanitizer::SECURE_LEVEL_LOW }
  241. */
  242. public function SetLevel($secLevel)
  243. {
  244. if($secLevel!=self::SECURE_LEVEL_HIGH && $secLevel!=self::SECURE_LEVEL_MIDDLE && $secLevel!=self::SECURE_LEVEL_LOW)
  245. $secLevel=self::SECURE_LEVEL_HIGH;
  246. switch ($secLevel)
  247. {
  248. case self::SECURE_LEVEL_HIGH:
  249. $arTags = array(
  250. 'b' => array(),
  251. 'br' => array(),
  252. 'big' => array(),
  253. 'blockquote' => array(),
  254. 'code' => array(),
  255. 'del' => array(),
  256. 'dt' => array(),
  257. 'dd' => array(),
  258. 'font' => array(),
  259. 'h1' => array(),
  260. 'h2' => array(),
  261. 'h3' => array(),
  262. 'h4' => array(),
  263. 'h5' => array(),
  264. 'h6' => array(),
  265. 'hr' => array(),
  266. 'i' => array(),
  267. 'ins' => array(),
  268. 'li' => array(),
  269. 'ol' => array(),
  270. 'p' => array(),
  271. 'small' => array(),
  272. 's' => array(),
  273. 'sub' => array(),
  274. 'sup' => array(),
  275. 'strong' => array(),
  276. 'pre' => array(),
  277. 'u' => array(),
  278. 'ul' => array()
  279. );
  280. break;
  281. case self::SECURE_LEVEL_MIDDLE:
  282. $arTags = array(
  283. 'a' => array('href', 'title','name','alt'),
  284. 'b' => array(),
  285. 'br' => array(),
  286. 'big' => array(),
  287. 'blockquote' => array('title'),
  288. 'code' => array(),
  289. 'caption' => array(),
  290. 'del' => array('title'),
  291. 'dt' => array(),
  292. 'dd' => array(),
  293. 'font' => array('color','size'),
  294. 'color' => array(),
  295. 'h1' => array(),
  296. 'h2' => array(),
  297. 'h3' => array(),
  298. 'h4' => array(),
  299. 'h5' => array(),
  300. 'h6' => array(),
  301. 'hr' => array(),
  302. 'i' => array(),
  303. 'img' => array('src','alt','height','width','title'),
  304. 'ins' => array('title'),
  305. 'li' => array(),
  306. 'ol' => array(),
  307. 'p' => array(),
  308. 'pre' => array(),
  309. 's' => array(),
  310. 'small' => array(),
  311. 'strong' => array(),
  312. 'sub' => array(),
  313. 'sup' => array(),
  314. 'table' => array('border','width'),
  315. 'tbody' => array('align','valign'),
  316. 'td' => array('width','height','align','valign'),
  317. 'tfoot' => array('align','valign'),
  318. 'th' => array('width','height'),
  319. 'thead' => array('align','valign'),
  320. 'tr' => array('align','valign'),
  321. 'u' => array(),
  322. 'ul' => array()
  323. );
  324. break;
  325. case self::SECURE_LEVEL_LOW:
  326. $arTags = array(
  327. 'a' => array('href', 'title','name','style','id','class','shape','coords','alt','target'),
  328. 'b' => array('style','id','class'),
  329. 'br' => array('style','id','class'),
  330. 'big' => array('style','id','class'),
  331. 'blockquote' => array('title','style','id','class'),
  332. 'caption' => array('style','id','class'),
  333. 'code' => array('style','id','class'),
  334. 'del' => array('title','style','id','class'),
  335. 'div' => array('title','style','id','class','align'),
  336. 'dt' => array('style','id','class'),
  337. 'dd' => array('style','id','class'),
  338. 'font' => array('color','size','face','style','id','class'),
  339. 'h1' => array('style','id','class','align'),
  340. 'h2' => array('style','id','class','align'),
  341. 'h3' => array('style','id','class','align'),
  342. 'h4' => array('style','id','class','align'),
  343. 'h5' => array('style','id','class','align'),
  344. 'h6' => array('style','id','class','align'),
  345. 'hr' => array('style','id','class'),
  346. 'i' => array('style','id','class'),
  347. 'img' => array('style','id','class','src','alt','height','width','title','align'),
  348. 'ins' => array('title','style','id','class'),
  349. 'li' => array('style','id','class'),
  350. 'map' => array('shape','coords','href','alt','title','style','id','class','name'),
  351. 'ol' => array('style','id','class'),
  352. 'p' => array('style','id','class','align'),
  353. 'pre' => array('style','id','class'),
  354. 's' => array('style','id','class'),
  355. 'small' => array('style','id','class'),
  356. 'strong' => array('style','id','class'),
  357. 'span' => array('title','style','id','class','align'),
  358. 'sub' => array('style','id','class'),
  359. 'sup' => array('style','id','class'),
  360. 'table' => array('border','width','style','id','class','cellspacing','cellpadding'),
  361. 'tbody' => array('align','valign','style','id','class'),
  362. 'td' => array('width','height','style','id','class','align','valign','colspan','rowspan'),
  363. 'tfoot' => array('align','valign','style','id','class','align','valign'),
  364. 'th' => array('width','height','style','id','class','colspan','rowspan'),
  365. 'thead' => array('align','valign','style','id','class'),
  366. 'tr' => array('align','valign','style','id','class'),
  367. 'u' => array('style','id','class'),
  368. 'ul' => array('style','id','class')
  369. );
  370. break;
  371. default:
  372. $arTags = array();
  373. break;
  374. }
  375. $this->DelAllTags();
  376. $this->AddTags($arTags);
  377. $this->secLevel = $secLevel;
  378. }
  379. // Checks if tag's attributes are in white list ($this->arHtmlTags)
  380. protected function IsValidAttr(&$arAttr)
  381. {
  382. if (!isset($arAttr[1]) || !isset($arAttr[3]))
  383. {
  384. return false;
  385. }
  386. $attr = mb_strtolower($arAttr[1]);
  387. $attrValue = $this->Decode($arAttr[3]);
  388. switch ($attr)
  389. {
  390. case 'src':
  391. case 'href':
  392. case 'data-url':
  393. if(!preg_match("#^(http://|https://|ftp://|file://|mailto:|callto:|skype:|tel:|sms:|\\#|/)#i".BX_UTF_PCRE_MODIFIER, $attrValue))
  394. {
  395. $arAttr[3] = 'http://' . $arAttr[3];
  396. }
  397. $valid = (!preg_match("#javascript:|data:|[^\\w".$this->localAlph."a-zA-Z:/\\.=@;,!~\\*\\&\\#\\)(%\\s\\+\$\\?\\-\\[\\]]#i".BX_UTF_PCRE_MODIFIER, $attrValue))
  398. ? true : false;
  399. break;
  400. case 'height':
  401. case 'width':
  402. case 'cellpadding':
  403. case 'cellspacing':
  404. $valid = !preg_match("#^[^0-9\\-]+(px|%|\\*)*#i".BX_UTF_PCRE_MODIFIER, $attrValue)
  405. ? true : false;
  406. break;
  407. case 'title':
  408. case 'alt':
  409. $valid = !preg_match("#[^\\w".$this->localAlph."\\.\\?!,:;\\s\\-]#i".BX_UTF_PCRE_MODIFIER, $attrValue)
  410. ? true : false;
  411. break;
  412. case 'style':
  413. $attrValue = str_replace('&quot;', '', $attrValue);
  414. $valid = !preg_match("#(behavior|expression|javascript)#i".BX_UTF_PCRE_MODIFIER, $attrValue) && !preg_match("#[^\\/\\w\\s)(!%,:\\.;\\-\\#\\']#i".BX_UTF_PCRE_MODIFIER, $attrValue)
  415. ? true : false;
  416. break;
  417. case 'coords':
  418. $valid = !preg_match("#[^0-9\\s,\\-]#i".BX_UTF_PCRE_MODIFIER, $attrValue)
  419. ? true : false;
  420. break;
  421. default:
  422. if (array_key_exists($attr, $this->additionalAttrs))
  423. {
  424. $valid = true === call_user_func_array(
  425. $this->additionalAttrs[$attr]['content'],
  426. array($attrValue)
  427. );
  428. }
  429. else
  430. {
  431. $valid = !preg_match("#[^\\s\\w" . $this->localAlph . "\\-\\#\\.\/;]#i" . BX_UTF_PCRE_MODIFIER, $attrValue)
  432. ? true : false;
  433. }
  434. break;
  435. }
  436. return $valid;
  437. }
  438. protected function encodeAttributeValue(array $attr)
  439. {
  440. if (!$this->bHtmlSpecChars)
  441. {
  442. return $attr[3];
  443. }
  444. $result = $attr[3];
  445. $flags = ENT_QUOTES;
  446. if ($attr[1] === 'style')
  447. {
  448. $flags = ENT_COMPAT;
  449. }
  450. elseif ($attr[1] === 'href')
  451. {
  452. $result = str_replace('&', '##AMP##', $result);
  453. }
  454. $result = htmlspecialchars($result, $flags, LANG_CHARSET, $this->bDoubleEncode);
  455. if ($attr[1] === 'href')
  456. {
  457. $result = str_replace('##AMP##', '&', $result);
  458. }
  459. return $result;
  460. }
  461. /**
  462. * Returns allowed tags and attributies
  463. * @return string
  464. */
  465. public function GetTags()
  466. {
  467. if(!is_array($this->arHtmlTags))
  468. return false;
  469. $confStr="";
  470. foreach ($this->arHtmlTags as $tag => $arAttrs)
  471. {
  472. $confStr.=$tag." (";
  473. foreach ($arAttrs as $attr)
  474. if($attr)
  475. $confStr.=" ".$attr." ";
  476. $confStr.=")<br>";
  477. }
  478. return $confStr;
  479. }
  480. /**
  481. * @deprecated For compability only will be erased next versions
  482. */
  483. public static function SetTags($arTags)
  484. {
  485. self::$arOldTags = $arTags;
  486. /* for next version
  487. $this->DelAllTags();
  488. return $this->AddTags($arTags);
  489. */
  490. }
  491. /**
  492. * @deprecated For compability only will be erased next versions
  493. */
  494. public static function Sanitize($html, $secLevel='HIGH', $htmlspecialchars=true, $delTags=true)
  495. {
  496. $Sanitizer = new self;
  497. if(empty(self::$arOldTags))
  498. $Sanitizer->SetLevel(self::SECURE_LEVEL_HIGH);
  499. else
  500. {
  501. $Sanitizer->DelAllTags();
  502. $Sanitizer->AddTags(self::$arOldTags);
  503. }
  504. $Sanitizer->ApplyHtmlSpecChars($htmlspecialchars);
  505. $Sanitizer->DeleteSanitizedTags($delTags);
  506. $Sanitizer->ApplyDoubleEncode();
  507. return $Sanitizer->SanitizeHtml($html);
  508. }
  509. /**
  510. * Split html to tags and simple text chunks
  511. * @param string $html
  512. * @return array
  513. */
  514. protected function splitHtml($html)
  515. {
  516. $result = [];
  517. $arData = preg_split('/(<[^<>]+>)/si'.BX_UTF_PCRE_MODIFIER, $html, -1, PREG_SPLIT_DELIM_CAPTURE);
  518. foreach($arData as $i => $chunk)
  519. {
  520. $isTag = $i % 2 || (mb_substr($chunk, 0, 1) == '<' && mb_substr($chunk, -1) == '>');
  521. if ($isTag)
  522. {
  523. $result[] = array('segType'=>'tag', 'value'=>$chunk);
  524. }
  525. elseif ($chunk != "")
  526. {
  527. $result[]=array('segType'=>'text', 'value'=> $chunk);
  528. }
  529. }
  530. return $result;
  531. }
  532. /**
  533. * Erases, or HtmlSpecChares Tags and attributies wich not contained in white list
  534. * from inputted HTML
  535. * @param string $html Dirty HTML
  536. * @return string filtered HTML
  537. */
  538. public function SanitizeHtml($html)
  539. {
  540. if(empty($this->arHtmlTags))
  541. $this->SetLevel(self::SECURE_LEVEL_HIGH);
  542. $openTagsStack = array();
  543. $isCode = false;
  544. $seg = $this->splitHtml($html);
  545. //process segments
  546. $segCount = count($seg);
  547. for($i=0; $i<$segCount; $i++)
  548. {
  549. if($seg[$i]['segType'] == 'text')
  550. {
  551. if (trim($seg[$i]['value']) && ($tp = array_search('table', $openTagsStack)) !== false)
  552. {
  553. $cellTags = array_intersect(array('td', 'th'), array_keys($this->arHtmlTags));
  554. if ($cellTags && !array_intersect($cellTags, array_slice($openTagsStack, $tp+1)))
  555. {
  556. array_splice($seg, $i, 0, array(array('segType' => 'tag', 'value' => sprintf('<%s>', reset($cellTags)))));
  557. $i--; $segCount++;
  558. continue;
  559. }
  560. }
  561. if ($this->bHtmlSpecChars)
  562. {
  563. $openTagsStackSize = count($openTagsStack);
  564. $entQuotes = ($openTagsStackSize && $openTagsStack[$openTagsStackSize-1] === 'style' ? ENT_NOQUOTES : ENT_QUOTES);
  565. $seg[$i]['value'] = htmlspecialchars(
  566. $seg[$i]['value'],
  567. $entQuotes,
  568. LANG_CHARSET,
  569. $this->bDoubleEncode
  570. );
  571. }
  572. }
  573. elseif(
  574. $seg[$i]['segType'] == 'tag'
  575. && (
  576. preg_match('/^<!--\\[if\\s+((?:mso|gt|lt|gte|lte|\\||!|[0-9]+|\\(|\\))\\s*)+\\]>$/', $seg[$i]['value'])
  577. || preg_match('/^<!\\[endif\\]-->$/', $seg[$i]['value'])
  578. )
  579. )
  580. {
  581. //Keep ms html comments https://stackoverflow.design/email/base/mso/
  582. $seg[$i]['segType'] = 'text';
  583. }
  584. elseif($seg[$i]['segType'] == 'tag')
  585. {
  586. //find tag type (open/close), tag name, attributies
  587. preg_match('#^<\s*(/)?\s*([a-z0-9]+)(.*?)>$#si'.BX_UTF_PCRE_MODIFIER, $seg[$i]['value'], $matches);
  588. $seg[$i]['tagType'] = ( $matches[1] ? 'close' : 'open' );
  589. $seg[$i]['tagName'] = mb_strtolower($matches[2]);
  590. if(($seg[$i]['tagName']=='code') && ($seg[$i]['tagType']=='close'))
  591. $isCode = false;
  592. //if tag founded inside <code></code> it is simple text
  593. if($isCode)
  594. {
  595. $seg[$i]['segType'] = 'text';
  596. $i--;
  597. continue;
  598. }
  599. if($seg[$i]['tagType'] == 'open')
  600. {
  601. // if tag unallowed screen it, or erase
  602. if(!array_key_exists($seg[$i]['tagName'], $this->arHtmlTags))
  603. {
  604. if($this->bDelSanitizedTags)
  605. {
  606. $seg[$i]['action'] = self::ACTION_DEL;
  607. }
  608. else
  609. {
  610. $seg[$i]['segType'] = 'text';
  611. $i--;
  612. continue;
  613. }
  614. }
  615. //if allowed
  616. else
  617. {
  618. if (in_array('table', $openTagsStack))
  619. {
  620. if ($openTagsStack[count($openTagsStack)-1] == 'table')
  621. {
  622. if (array_key_exists('tr', $this->arHtmlTags) && !in_array($seg[$i]['tagName'], array('thead', 'tfoot', 'tbody', 'tr')))
  623. {
  624. array_splice($seg, $i, 0, array(array('segType' => 'tag', 'tagType' => 'open', 'tagName' => 'tr', 'action' => self::ACTION_ADD)));
  625. $i++; $segCount++;
  626. $openTagsStack[] = 'tr';
  627. }
  628. }
  629. if (in_array($openTagsStack[count($openTagsStack)-1], array('thead', 'tfoot', 'tbody')))
  630. {
  631. if (array_key_exists('tr', $this->arHtmlTags) && $seg[$i]['tagName'] != 'tr')
  632. {
  633. array_splice($seg, $i, 0, array(array('segType' => 'tag', 'tagType' => 'open', 'tagName' => 'tr', 'action' => self::ACTION_ADD)));
  634. $i++; $segCount++;
  635. $openTagsStack[] = 'tr';
  636. }
  637. }
  638. if ($seg[$i]['tagName'] == 'tr')
  639. {
  640. for ($j = count($openTagsStack)-1; $j >= 0; $j--)
  641. {
  642. if (in_array($openTagsStack[$j], array('table', 'thead', 'tfoot', 'tbody')))
  643. break;
  644. array_splice($seg, $i, 0, array(array('segType' => 'tag', 'tagType' => 'close', 'tagName' => $openTagsStack[$j], 'action' => self::ACTION_ADD)));
  645. $i++; $segCount++;
  646. array_splice($openTagsStack, $j, 1);
  647. }
  648. }
  649. if ($openTagsStack[count($openTagsStack)-1] == 'tr')
  650. {
  651. $cellTags = array_intersect(array('td', 'th'), array_keys($this->arHtmlTags));
  652. if ($cellTags && !in_array($seg[$i]['tagName'], $cellTags))
  653. {
  654. array_splice($seg, $i, 0, array(array('segType' => 'tag', 'tagType' => 'open', 'tagName' => reset($cellTags), 'action' => self::ACTION_ADD)));
  655. $i++; $segCount++;
  656. $openTagsStack[] = 'td';
  657. }
  658. }
  659. if (in_array($seg[$i]['tagName'], array('td', 'th')))
  660. {
  661. for ($j = count($openTagsStack)-1; $j >= 0; $j--)
  662. {
  663. if ($openTagsStack[$j] == 'tr')
  664. break;
  665. array_splice($seg, $i, 0, array(array('segType' => 'tag', 'tagType' => 'close', 'tagName' => $openTagsStack[$j], 'action' => self::ACTION_ADD)));
  666. $i++; $segCount++;
  667. array_splice($openTagsStack, $j, 1);
  668. }
  669. }
  670. }
  671. //Processing valid tables
  672. //if find 'tr','td', etc...
  673. if(array_key_exists($seg[$i]['tagName'], $this->arTableTags))
  674. {
  675. $this->CleanTable($seg, $openTagsStack, $i, false);
  676. if($seg[$i]['action'] == self::ACTION_DEL)
  677. continue;
  678. }
  679. $seg[$i]['attr'] = $this->processAttributes(
  680. (string)$matches[3], //attributes string
  681. (string)$seg[$i]['tagName']
  682. );
  683. if($seg[$i]['tagName'] === 'code')
  684. {
  685. $isCode = true;
  686. }
  687. //if tag need close tag add it to stack opened tags
  688. if(!in_array($seg[$i]['tagName'], $this->arNoClose)) //!count($this->arHtmlTags[$seg[$i]['tagName']]) || fix: </br>
  689. {
  690. $openTagsStack[] = $seg[$i]['tagName'];
  691. $seg[$i]['closeIndex'] = count($openTagsStack)-1;
  692. }
  693. }
  694. }
  695. //if closing tag
  696. else
  697. { //if tag allowed
  698. if(array_key_exists($seg[$i]['tagName'], $this->arHtmlTags) && (!count($this->arHtmlTags[$seg[$i]['tagName']]) || ($this->arHtmlTags[$seg[$i]['tagName']][count($this->arHtmlTags[$seg[$i]['tagName']])-1] != false)))
  699. {
  700. if($seg[$i]['tagName'] == 'code')
  701. {
  702. $isCode = false;
  703. }
  704. //if open tags stack is empty, or not include it's name lets screen/erase it
  705. if((count($openTagsStack) == 0) || (!in_array($seg[$i]['tagName'], $openTagsStack)))
  706. {
  707. if($this->bDelSanitizedTags || $this->arNoClose)
  708. {
  709. $seg[$i]['action'] = self::ACTION_DEL;
  710. }
  711. else
  712. {
  713. $seg[$i]['segType'] = 'text';
  714. $i--;
  715. continue;
  716. }
  717. }
  718. else
  719. {
  720. //if this tag don't match last from open tags stack , adding right close tag
  721. $tagName = array_pop($openTagsStack);
  722. if($seg[$i]['tagName'] != $tagName)
  723. {
  724. array_splice($seg, $i, 0, array(array('segType'=>'tag', 'tagType'=>'close', 'tagName'=>$tagName, 'action'=>self::ACTION_ADD)));
  725. $segCount++;
  726. }
  727. }
  728. }
  729. //if tag unallowed erase it
  730. else
  731. {
  732. if($this->bDelSanitizedTags)
  733. {
  734. $seg[$i]['action'] = self::ACTION_DEL;
  735. }
  736. else
  737. {
  738. $seg[$i]['segType'] = 'text';
  739. $i--;
  740. continue;
  741. }
  742. }
  743. }
  744. }
  745. }
  746. //close tags stayed in stack
  747. foreach(array_reverse($openTagsStack) as $val)
  748. array_push($seg, array('segType'=>'tag', 'tagType'=>'close', 'tagName'=>$val, 'action'=>self::ACTION_ADD));
  749. //build filtered code and return it
  750. $filteredHTML = '';
  751. $flagDeleteContent = false;
  752. foreach($seg as $segt)
  753. {
  754. if(($segt['action'] ?? '') != self::ACTION_DEL && !$flagDeleteContent)
  755. {
  756. if($segt['segType'] == 'text')
  757. {
  758. $filteredHTML .= $segt['value'];
  759. }
  760. elseif($segt['segType'] == 'tag')
  761. {
  762. if($segt['tagType'] == 'open')
  763. {
  764. $filteredHTML .= '<'.$segt['tagName'];
  765. if(is_array($segt['attr']))
  766. foreach($segt['attr'] as $attr_key => $attr_val)
  767. $filteredHTML .= ' '.$attr_key.'="'.$attr_val.'"';
  768. if (count($this->arHtmlTags[$segt['tagName']]) && ($this->arHtmlTags[$segt['tagName']][count($this->arHtmlTags[$segt['tagName']])-1] == false))
  769. $filteredHTML .= " /";
  770. $filteredHTML .= '>';
  771. }
  772. elseif($segt['tagType'] == 'close')
  773. $filteredHTML .= '</'.$segt['tagName'].'>';
  774. }
  775. }
  776. else
  777. {
  778. if(in_array($segt['tagName'], $this->delTagsWithContent))
  779. {
  780. $flagDeleteContent = $segt['tagType'] == 'open';
  781. }
  782. }
  783. }
  784. if(!$this->bHtmlSpecChars && $html != $filteredHTML)
  785. {
  786. $filteredHTML = $this->SanitizeHtml($filteredHTML);
  787. }
  788. return $filteredHTML;
  789. }
  790. protected function extractAttributes(string $attrData): array
  791. {
  792. $result = [];
  793. preg_match_all(
  794. '#([a-z0-9_-]+)\s*=\s*([\'\"]?)(?:\s*)(.*?)(?:\s*)\2(\s|$|(?:\/\s*$))+#is'.BX_UTF_PCRE_MODIFIER,
  795. $attrData,
  796. $result,
  797. PREG_SET_ORDER
  798. );
  799. return $result;
  800. }
  801. protected function processAttributes(string $attrData, string $currTag): array
  802. {
  803. $attr = [];
  804. $arTagAttrs = $this->extractAttributes($attrData);
  805. foreach($arTagAttrs as $arTagAttr)
  806. {
  807. // Attribute name
  808. $arTagAttr[1] = mb_strtolower($arTagAttr[1]);
  809. $attrAllowed = in_array($arTagAttr[1], $this->arHtmlTags[$currTag], true);
  810. if (!$attrAllowed && array_key_exists($arTagAttr[1], $this->additionalAttrs))
  811. {
  812. $attrAllowed = true === call_user_func($this->additionalAttrs[$arTagAttr[1]]['tag'], $currTag);
  813. }
  814. if ($attrAllowed)
  815. {
  816. // Attribute value. Wrap attribute by "
  817. $arTagAttr[3] = str_replace('"', "'", $arTagAttr[3]);
  818. if($this->IsValidAttr($arTagAttr))
  819. {
  820. $attr[$arTagAttr[1]] = $this->encodeAttributeValue($arTagAttr);
  821. }
  822. }
  823. }
  824. return $attr;
  825. }
  826. /**
  827. * function CleanTable
  828. * Check if table code is valid, and corrects. If need
  829. * deletes all text and tags between diferent table tags if $delTextBetweenTags=true.
  830. * Checks if where are open tags from upper level if not - self-distructs.
  831. */
  832. protected function CleanTable(&$seg, &$openTagsStack, $segIndex, $delTextBetweenTags=true)
  833. {
  834. //if we found up level or not
  835. $bFindUp = false;
  836. //count open & close tags
  837. $arOpenClose = array();
  838. for ($tElCategory=self::TABLE_COLS;$tElCategory>self::TABLE_TOP;$tElCategory--)
  839. {
  840. if($this->arTableTags[$seg[$segIndex]['tagName']] != $tElCategory)
  841. continue;
  842. //find back upper level
  843. for($j=$segIndex-1;$j>=0;$j--)
  844. {
  845. if ($seg[$j]['segType'] != 'tag' || !array_key_exists($seg[$j]['tagName'], $this->arTableTags))
  846. continue;
  847. if($seg[$j]['action'] == self::ACTION_DEL)
  848. continue;
  849. if($tElCategory == self::TABLE_COLS)
  850. {
  851. if($this->arTableTags[$seg[$j]['tagName']] == self::TABLE_COLS || $this->arTableTags[$seg[$j]['tagName']] == self::TABLE_ROWS)
  852. $bFindUp = true;
  853. }
  854. else
  855. if($this->arTableTags[$seg[$j]['tagName']] <= $tElCategory)
  856. $bFindUp = true;
  857. if(!$bFindUp)
  858. continue;
  859. //count opened and closed tags
  860. $arOpenClose[$seg[$j]['tagName']][$seg[$j]['tagType']]++;
  861. //if opened tag not found yet, searching for more
  862. if(($arOpenClose[$seg[$j]['tagName']]['open'] <= $arOpenClose[$seg[$j]['tagName']]['close']))
  863. {
  864. $bFindUp = false;
  865. continue;
  866. }
  867. if(!$delTextBetweenTags)
  868. break;
  869. //if find up level let's mark all middle text and tags for del-action
  870. for($k=$segIndex-1;$k>$j;$k--)
  871. {
  872. //lt's save text-format
  873. if($seg[$k]['segType'] == 'text' && !preg_match("#[^\n\r\s]#i".BX_UTF_PCRE_MODIFIER, $seg[$k]['value']))
  874. continue;
  875. $seg[$k]['action'] = self::ACTION_DEL;
  876. if(isset($seg[$k]['closeIndex']))
  877. unset($openTagsStack[$seg[$k]['closeIndex']]);
  878. }
  879. break;
  880. }
  881. //if we didn't find up levels,lets mark this block as del
  882. if(!$bFindUp)
  883. $seg[$segIndex]['action'] = self::ACTION_DEL;
  884. break;
  885. }
  886. return $bFindUp;
  887. }
  888. /**
  889. * Decodes text from codes like &#***, html-entities wich may be coded several times;
  890. * @param string $str
  891. * @return string decoded
  892. * */
  893. public function Decode($str)
  894. {
  895. $str1="";
  896. while($str1 <> $str)
  897. {
  898. $str1 = $str;
  899. $str = $this->_decode($str);
  900. $str = str_replace("\x00", "", $str);
  901. $str = preg_replace("/\&\#0+(;|([^\d;]))/is", "\\2", $str);
  902. $str = preg_replace("/\&\#x0+(;|([^\da-f;]))/is", "\\2", $str);
  903. }
  904. return $str1;
  905. }
  906. /*
  907. Function is used in regular expressions in order to decode characters presented as &#123;
  908. */
  909. protected function _decode_cb($in)
  910. {
  911. $ad = $in[2];
  912. if($ad == ';')
  913. $ad="";
  914. $num = intval($in[1]);
  915. return chr($num).$ad;
  916. }
  917. /*
  918. Function is used in regular expressions in order to decode characters presented as &#xAB;
  919. */
  920. protected function _decode_cb_hex($in)
  921. {
  922. $ad = $in[2];
  923. if($ad==';')
  924. $ad="";
  925. $num = intval(hexdec($in[1]));
  926. return chr($num).$ad;
  927. }
  928. /*
  929. Decodes string from html codes &#***;
  930. One pass!
  931. -- Decode only a-zA-Z:().=, because only theese are used in filters
  932. */
  933. protected function _decode($str)
  934. {
  935. $str = preg_replace_callback("/\&\#(\d+)([^\d])/is", array("CBXSanitizer", "_decode_cb"), $str);
  936. $str = preg_replace_callback("/\&\#x([\da-f]+)([^\da-f])/is", array("CBXSanitizer", "_decode_cb_hex"), $str);
  937. return str_replace(array("&colon;","&tab;","&newline;"), array(":","\t","\n"), $str);
  938. }
  939. /**
  940. * @param array $tags
  941. */
  942. public function setDelTagsWithContent(array $tags)
  943. {
  944. $this->delTagsWithContent = $tags;
  945. }
  946. /**
  947. * @return array
  948. */
  949. public function getDelTagsWithContent()
  950. {
  951. return $this->delTagsWithContent;
  952. }
  953. };