PageRenderTime 47ms CodeModel.GetById 16ms RepoModel.GetById 0ms app.codeStats 0ms

/application/vendor/phpmailer/htmlfilter.php

https://bitbucket.org/chrispiechowicz/zepto
PHP | 861 lines | 507 code | 31 blank | 323 comment | 130 complexity | 4d5143899340a690d41a4959428ea674 MD5 | raw file
Possible License(s): LGPL-2.1, MIT, BSD-3-Clause
  1. <?php
  2. /**
  3. * htmlfilter.inc
  4. * ---------------
  5. * This set of functions allows you to filter html in order to remove
  6. * any malicious tags from it. Useful in cases when you need to filter
  7. * user input for any cross-site-scripting attempts.
  8. *
  9. * Copyright (C) 2002-2004 by Duke University
  10. *
  11. * This library is free software; you can redistribute it and/or
  12. * modify it under the terms of the GNU Lesser General Public
  13. * License as published by the Free Software Foundation; either
  14. * version 2.1 of the License, or (at your option) any later version.
  15. *
  16. * This library is distributed in the hope that it will be useful,
  17. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  18. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  19. * Lesser General Public License for more details.
  20. *
  21. * You should have received a copy of the GNU Lesser General Public
  22. * License along with this library; if not, write to the Free Software
  23. * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
  24. * 02110-1301 USA
  25. *
  26. * @Author Konstantin Riabitsev <icon@linux.duke.edu>
  27. * @Version 1.1 ($Date: 2011-07-04 14:02:23 -0400 (Mon, 04 Jul 2011) $)
  28. */
  29. /**
  30. * @Author Jim Jagielski <jim@jaguNET.com / jimjag@gmail.com>
  31. */
  32. /**
  33. * This function returns the final tag out of the tag name, an array
  34. * of attributes, and the type of the tag. This function is called by
  35. * tln_sanitize internally.
  36. *
  37. * @param $tagname the name of the tag.
  38. * @param $attary the array of attributes and their values
  39. * @param $tagtype The type of the tag (see in comments).
  40. * @return a string with the final tag representation.
  41. */
  42. function tln_tagprint($tagname, $attary, $tagtype){
  43. $me = 'tln_tagprint';
  44. if ($tagtype == 2){
  45. $fulltag = '</' . $tagname . '>';
  46. } else {
  47. $fulltag = '<' . $tagname;
  48. if (is_array($attary) && sizeof($attary)){
  49. $atts = Array();
  50. while (list($attname, $attvalue) = each($attary)){
  51. array_push($atts, "$attname=$attvalue");
  52. }
  53. $fulltag .= ' ' . join(' ', $atts);
  54. }
  55. if ($tagtype == 3){
  56. $fulltag .= ' /';
  57. }
  58. $fulltag .= '>';
  59. }
  60. return $fulltag;
  61. }
  62. /**
  63. * A small helper function to use with array_walk. Modifies a by-ref
  64. * value and makes it lowercase.
  65. *
  66. * @param $val a value passed by-ref.
  67. * @return void since it modifies a by-ref value.
  68. */
  69. function tln_casenormalize(&$val){
  70. $val = strtolower($val);
  71. }
  72. /**
  73. * This function skips any whitespace from the current position within
  74. * a string and to the next non-whitespace value.
  75. *
  76. * @param $body the string
  77. * @param $offset the offset within the string where we should start
  78. * looking for the next non-whitespace character.
  79. * @return the location within the $body where the next
  80. * non-whitespace char is located.
  81. */
  82. function tln_skipspace($body, $offset){
  83. $me = 'tln_skipspace';
  84. preg_match('/^(\s*)/s', substr($body, $offset), $matches);
  85. if (sizeof($matches[1])){
  86. $count = strlen($matches[1]);
  87. $offset += $count;
  88. }
  89. return $offset;
  90. }
  91. /**
  92. * This function looks for the next character within a string. It's
  93. * really just a glorified "strpos", except it catches the failures
  94. * nicely.
  95. *
  96. * @param $body The string to look for needle in.
  97. * @param $offset Start looking from this position.
  98. * @param $needle The character/string to look for.
  99. * @return location of the next occurance of the needle, or
  100. * strlen($body) if needle wasn't found.
  101. */
  102. function tln_findnxstr($body, $offset, $needle){
  103. $me = 'tln_findnxstr';
  104. $pos = strpos($body, $needle, $offset);
  105. if ($pos === FALSE){
  106. $pos = strlen($body);
  107. }
  108. return $pos;
  109. }
  110. /**
  111. * This function takes a PCRE-style regexp and tries to match it
  112. * within the string.
  113. *
  114. * @param $body The string to look for needle in.
  115. * @param $offset Start looking from here.
  116. * @param $reg A PCRE-style regex to match.
  117. * @return Returns a false if no matches found, or an array
  118. * with the following members:
  119. * - integer with the location of the match within $body
  120. * - string with whatever content between offset and the match
  121. * - string with whatever it is we matched
  122. */
  123. function tln_findnxreg($body, $offset, $reg){
  124. $me = 'tln_findnxreg';
  125. $matches = Array();
  126. $retarr = Array();
  127. $preg_rule = '%^(.*?)(' . $reg . ')%s';
  128. preg_match($preg_rule, substr($body, $offset), $matches);
  129. if (!isset($matches[0])){
  130. $retarr = false;
  131. } else {
  132. $retarr[0] = $offset + strlen($matches[1]);
  133. $retarr[1] = $matches[1];
  134. $retarr[2] = $matches[2];
  135. }
  136. return $retarr;
  137. }
  138. /**
  139. * This function looks for the next tag.
  140. *
  141. * @param $body String where to look for the next tag.
  142. * @param $offset Start looking from here.
  143. * @return false if no more tags exist in the body, or
  144. * an array with the following members:
  145. * - string with the name of the tag
  146. * - array with attributes and their values
  147. * - integer with tag type (1, 2, or 3)
  148. * - integer where the tag starts (starting "<")
  149. * - integer where the tag ends (ending ">")
  150. * first three members will be false, if the tag is invalid.
  151. */
  152. function tln_getnxtag($body, $offset){
  153. $me = 'tln_getnxtag';
  154. if ($offset > strlen($body)){
  155. return false;
  156. }
  157. $lt = tln_findnxstr($body, $offset, '<');
  158. if ($lt == strlen($body)){
  159. return false;
  160. }
  161. /**
  162. * We are here:
  163. * blah blah <tag attribute="value">
  164. * \---------^
  165. */
  166. $pos = tln_skipspace($body, $lt + 1);
  167. if ($pos >= strlen($body)){
  168. return Array(false, false, false, $lt, strlen($body));
  169. }
  170. /**
  171. * There are 3 kinds of tags:
  172. * 1. Opening tag, e.g.:
  173. * <a href="blah">
  174. * 2. Closing tag, e.g.:
  175. * </a>
  176. * 3. XHTML-style content-less tag, e.g.:
  177. * <img src="blah"/>
  178. */
  179. $tagtype = false;
  180. switch (substr($body, $pos, 1)){
  181. case '/':
  182. $tagtype = 2;
  183. $pos++;
  184. break;
  185. case '!':
  186. /**
  187. * A comment or an SGML declaration.
  188. */
  189. if (substr($body, $pos+1, 2) == '--'){
  190. $gt = strpos($body, '-->', $pos);
  191. if ($gt === false){
  192. $gt = strlen($body);
  193. } else {
  194. $gt += 2;
  195. }
  196. return Array(false, false, false, $lt, $gt);
  197. } else {
  198. $gt = tln_findnxstr($body, $pos, '>');
  199. return Array(false, false, false, $lt, $gt);
  200. }
  201. break;
  202. default:
  203. /**
  204. * Assume tagtype 1 for now. If it's type 3, we'll switch values
  205. * later.
  206. */
  207. $tagtype = 1;
  208. break;
  209. }
  210. $tag_start = $pos;
  211. $tagname = '';
  212. /**
  213. * Look for next [\W-_], which will indicate the end of the tag name.
  214. */
  215. $regary = tln_findnxreg($body, $pos, '[^\w\-_]');
  216. if ($regary == false){
  217. return Array(false, false, false, $lt, strlen($body));
  218. }
  219. list($pos, $tagname, $match) = $regary;
  220. $tagname = strtolower($tagname);
  221. /**
  222. * $match can be either of these:
  223. * '>' indicating the end of the tag entirely.
  224. * '\s' indicating the end of the tag name.
  225. * '/' indicating that this is type-3 xhtml tag.
  226. *
  227. * Whatever else we find there indicates an invalid tag.
  228. */
  229. switch ($match){
  230. case '/':
  231. /**
  232. * This is an xhtml-style tag with a closing / at the
  233. * end, like so: <img src="blah"/>. Check if it's followed
  234. * by the closing bracket. If not, then this tag is invalid
  235. */
  236. if (substr($body, $pos, 2) == '/>'){
  237. $pos++;
  238. $tagtype = 3;
  239. } else {
  240. $gt = tln_findnxstr($body, $pos, '>');
  241. $retary = Array(false, false, false, $lt, $gt);
  242. return $retary;
  243. }
  244. case '>':
  245. return Array($tagname, false, $tagtype, $lt, $pos);
  246. break;
  247. default:
  248. /**
  249. * Check if it's whitespace
  250. */
  251. if (preg_match('/\s/', $match)){
  252. } else {
  253. /**
  254. * This is an invalid tag! Look for the next closing ">".
  255. */
  256. $gt = tln_findnxstr($body, $lt, '>');
  257. return Array(false, false, false, $lt, $gt);
  258. }
  259. }
  260. /**
  261. * At this point we're here:
  262. * <tagname attribute='blah'>
  263. * \-------^
  264. *
  265. * At this point we loop in order to find all attributes.
  266. */
  267. $attname = '';
  268. $atttype = false;
  269. $attary = Array();
  270. while ($pos <= strlen($body)){
  271. $pos = tln_skipspace($body, $pos);
  272. if ($pos == strlen($body)){
  273. /**
  274. * Non-closed tag.
  275. */
  276. return Array(false, false, false, $lt, $pos);
  277. }
  278. /**
  279. * See if we arrived at a ">" or "/>", which means that we reached
  280. * the end of the tag.
  281. */
  282. $matches = Array();
  283. preg_match('%^(\s*)(>|/>)%s', substr($body, $pos), $matches);
  284. if (isset($matches[0]) && $matches[0]){
  285. /**
  286. * Yep. So we did.
  287. */
  288. $pos += strlen($matches[1]);
  289. if ($matches[2] == '/>'){
  290. $tagtype = 3;
  291. $pos++;
  292. }
  293. return Array($tagname, $attary, $tagtype, $lt, $pos);
  294. }
  295. /**
  296. * There are several types of attributes, with optional
  297. * [:space:] between members.
  298. * Type 1:
  299. * attrname[:space:]=[:space:]'CDATA'
  300. * Type 2:
  301. * attrname[:space:]=[:space:]"CDATA"
  302. * Type 3:
  303. * attr[:space:]=[:space:]CDATA
  304. * Type 4:
  305. * attrname
  306. *
  307. * We leave types 1 and 2 the same, type 3 we check for
  308. * '"' and convert to "&quot" if needed, then wrap in
  309. * double quotes. Type 4 we convert into:
  310. * attrname="yes".
  311. */
  312. $regary = tln_findnxreg($body, $pos, '[^\w\-_]');
  313. if ($regary == false){
  314. /**
  315. * Looks like body ended before the end of tag.
  316. */
  317. return Array(false, false, false, $lt, strlen($body));
  318. }
  319. list($pos, $attname, $match) = $regary;
  320. $attname = strtolower($attname);
  321. /**
  322. * We arrived at the end of attribute name. Several things possible
  323. * here:
  324. * '>' means the end of the tag and this is attribute type 4
  325. * '/' if followed by '>' means the same thing as above
  326. * '\s' means a lot of things -- look what it's followed by.
  327. * anything else means the attribute is invalid.
  328. */
  329. switch($match){
  330. case '/':
  331. /**
  332. * This is an xhtml-style tag with a closing / at the
  333. * end, like so: <img src="blah"/>. Check if it's followed
  334. * by the closing bracket. If not, then this tag is invalid
  335. */
  336. if (substr($body, $pos, 2) == '/>'){
  337. $pos++;
  338. $tagtype = 3;
  339. } else {
  340. $gt = tln_findnxstr($body, $pos, '>');
  341. $retary = Array(false, false, false, $lt, $gt);
  342. return $retary;
  343. }
  344. case '>':
  345. $attary{$attname} = '"yes"';
  346. return Array($tagname, $attary, $tagtype, $lt, $pos);
  347. break;
  348. default:
  349. /**
  350. * Skip whitespace and see what we arrive at.
  351. */
  352. $pos = tln_skipspace($body, $pos);
  353. $char = substr($body, $pos, 1);
  354. /**
  355. * Two things are valid here:
  356. * '=' means this is attribute type 1 2 or 3.
  357. * \w means this was attribute type 4.
  358. * anything else we ignore and re-loop. End of tag and
  359. * invalid stuff will be caught by our checks at the beginning
  360. * of the loop.
  361. */
  362. if ($char == '='){
  363. $pos++;
  364. $pos = tln_skipspace($body, $pos);
  365. /**
  366. * Here are 3 possibilities:
  367. * "'" attribute type 1
  368. * '"' attribute type 2
  369. * everything else is the content of tag type 3
  370. */
  371. $quot = substr($body, $pos, 1);
  372. if ($quot == '\''){
  373. $regary = tln_findnxreg($body, $pos+1, '\'');
  374. if ($regary == false){
  375. return Array(false, false, false, $lt, strlen($body));
  376. }
  377. list($pos, $attval, $match) = $regary;
  378. $pos++;
  379. $attary{$attname} = '\'' . $attval . '\'';
  380. } else if ($quot == '"'){
  381. $regary = tln_findnxreg($body, $pos+1, '\"');
  382. if ($regary == false){
  383. return Array(false, false, false, $lt, strlen($body));
  384. }
  385. list($pos, $attval, $match) = $regary;
  386. $pos++;
  387. $attary{$attname} = '"' . $attval . '"';
  388. } else {
  389. /**
  390. * These are hateful. Look for \s, or >.
  391. */
  392. $regary = tln_findnxreg($body, $pos, '[\s>]');
  393. if ($regary == false){
  394. return Array(false, false, false, $lt, strlen($body));
  395. }
  396. list($pos, $attval, $match) = $regary;
  397. /**
  398. * If it's ">" it will be caught at the top.
  399. */
  400. $attval = preg_replace('/\"/s', '&quot;', $attval);
  401. $attary{$attname} = '"' . $attval . '"';
  402. }
  403. } else if (preg_match('|[\w/>]|', $char)) {
  404. /**
  405. * That was attribute type 4.
  406. */
  407. $attary{$attname} = '"yes"';
  408. } else {
  409. /**
  410. * An illegal character. Find next '>' and return.
  411. */
  412. $gt = tln_findnxstr($body, $pos, '>');
  413. return Array(false, false, false, $lt, $gt);
  414. }
  415. }
  416. }
  417. /**
  418. * The fact that we got here indicates that the tag end was never
  419. * found. Return invalid tag indication so it gets stripped.
  420. */
  421. return Array(false, false, false, $lt, strlen($body));
  422. }
  423. /**
  424. * Translates entities into literal values so they can be checked.
  425. *
  426. * @param $attvalue the by-ref value to check.
  427. * @param $regex the regular expression to check against.
  428. * @param $hex whether the entites are hexadecimal.
  429. * @return True or False depending on whether there were matches.
  430. */
  431. function tln_deent(&$attvalue, $regex, $hex=false){
  432. $me = 'tln_deent';
  433. $ret_match = false;
  434. preg_match_all($regex, $attvalue, $matches);
  435. if (is_array($matches) && sizeof($matches[0]) > 0){
  436. $repl = Array();
  437. for ($i = 0; $i < sizeof($matches[0]); $i++){
  438. $numval = $matches[1][$i];
  439. if ($hex){
  440. $numval = hexdec($numval);
  441. }
  442. $repl{$matches[0][$i]} = chr($numval);
  443. }
  444. $attvalue = strtr($attvalue, $repl);
  445. return true;
  446. } else {
  447. return false;
  448. }
  449. }
  450. /**
  451. * This function checks attribute values for entity-encoded values
  452. * and returns them translated into 8-bit strings so we can run
  453. * checks on them.
  454. *
  455. * @param $attvalue A string to run entity check against.
  456. * @return Nothing, modifies a reference value.
  457. */
  458. function tln_defang(&$attvalue){
  459. $me = 'tln_defang';
  460. /**
  461. * Skip this if there aren't ampersands or backslashes.
  462. */
  463. if (strpos($attvalue, '&') === false
  464. && strpos($attvalue, '\\') === false){
  465. return;
  466. }
  467. $m = false;
  468. do {
  469. $m = false;
  470. $m = $m || tln_deent($attvalue, '/\&#0*(\d+);*/s');
  471. $m = $m || tln_deent($attvalue, '/\&#x0*((\d|[a-f])+);*/si', true);
  472. $m = $m || tln_deent($attvalue, '/\\\\(\d+)/s', true);
  473. } while ($m == true);
  474. $attvalue = stripslashes($attvalue);
  475. }
  476. /**
  477. * Kill any tabs, newlines, or carriage returns. Our friends the
  478. * makers of the browser with 95% market value decided that it'd
  479. * be funny to make "java[tab]script" be just as good as "javascript".
  480. *
  481. * @param attvalue The attribute value before extraneous spaces removed.
  482. * @return attvalue Nothing, modifies a reference value.
  483. */
  484. function tln_unspace(&$attvalue){
  485. $me = 'tln_unspace';
  486. if (strcspn($attvalue, "\t\r\n\0 ") != strlen($attvalue)){
  487. $attvalue = str_replace(Array("\t", "\r", "\n", "\0", " "),
  488. Array('', '', '', '', ''), $attvalue);
  489. }
  490. }
  491. /**
  492. * This function runs various checks against the attributes.
  493. *
  494. * @param $tagname String with the name of the tag.
  495. * @param $attary Array with all tag attributes.
  496. * @param $rm_attnames See description for tln_sanitize
  497. * @param $bad_attvals See description for tln_sanitize
  498. * @param $add_attr_to_tag See description for tln_sanitize
  499. * @return Array with modified attributes.
  500. */
  501. function tln_fixatts($tagname,
  502. $attary,
  503. $rm_attnames,
  504. $bad_attvals,
  505. $add_attr_to_tag
  506. ){
  507. $me = 'tln_fixatts';
  508. while (list($attname, $attvalue) = each($attary)){
  509. /**
  510. * See if this attribute should be removed.
  511. */
  512. foreach ($rm_attnames as $matchtag=>$matchattrs){
  513. if (preg_match($matchtag, $tagname)){
  514. foreach ($matchattrs as $matchattr){
  515. if (preg_match($matchattr, $attname)){
  516. unset($attary{$attname});
  517. continue;
  518. }
  519. }
  520. }
  521. }
  522. /**
  523. * Remove any backslashes, entities, or extraneous whitespace.
  524. */
  525. tln_defang($attvalue);
  526. tln_unspace($attvalue);
  527. /**
  528. * Now let's run checks on the attvalues.
  529. * I don't expect anyone to comprehend this. If you do,
  530. * get in touch with me so I can drive to where you live and
  531. * shake your hand personally. :)
  532. */
  533. foreach ($bad_attvals as $matchtag=>$matchattrs){
  534. if (preg_match($matchtag, $tagname)){
  535. foreach ($matchattrs as $matchattr=>$valary){
  536. if (preg_match($matchattr, $attname)){
  537. /**
  538. * There are two arrays in valary.
  539. * First is matches.
  540. * Second one is replacements
  541. */
  542. list($valmatch, $valrepl) = $valary;
  543. $newvalue = preg_replace($valmatch,$valrepl,$attvalue);
  544. if ($newvalue != $attvalue){
  545. $attary{$attname} = $newvalue;
  546. }
  547. }
  548. }
  549. }
  550. }
  551. }
  552. /**
  553. * See if we need to append any attributes to this tag.
  554. */
  555. foreach ($add_attr_to_tag as $matchtag=>$addattary){
  556. if (preg_match($matchtag, $tagname)){
  557. $attary = array_merge($attary, $addattary);
  558. }
  559. }
  560. return $attary;
  561. }
  562. /**
  563. *
  564. * @param $body the string with HTML you wish to filter
  565. * @param $tag_list see description above
  566. * @param $rm_tags_with_content see description above
  567. * @param $self_closing_tags see description above
  568. * @param $force_tag_closing see description above
  569. * @param $rm_attnames see description above
  570. * @param $bad_attvals see description above
  571. * @param $add_attr_to_tag see description above
  572. * @return tln_sanitized html safe to show on your pages.
  573. */
  574. function tln_sanitize($body,
  575. $tag_list,
  576. $rm_tags_with_content,
  577. $self_closing_tags,
  578. $force_tag_closing,
  579. $rm_attnames,
  580. $bad_attvals,
  581. $add_attr_to_tag
  582. )
  583. {
  584. $me = 'tln_sanitize';
  585. /**
  586. * Normalize rm_tags and rm_tags_with_content.
  587. */
  588. $rm_tags = array_shift($tag_list);
  589. @array_walk($tag_list, 'tln_casenormalize');
  590. @array_walk($rm_tags_with_content, 'tln_casenormalize');
  591. @array_walk($self_closing_tags, 'tln_casenormalize');
  592. /**
  593. * See if tag_list is of tags to remove or tags to allow.
  594. * false means remove these tags
  595. * true means allow these tags
  596. */
  597. $curpos = 0;
  598. $open_tags = Array();
  599. $trusted = "<!-- begin tln_sanitized html -->\n";
  600. $skip_content = false;
  601. /**
  602. * Take care of netscape's stupid javascript entities like
  603. * &{alert('boo')};
  604. */
  605. $body = preg_replace('/&(\{.*?\};)/si', '&amp;\\1', $body);
  606. while (($curtag = tln_getnxtag($body, $curpos)) != FALSE){
  607. list($tagname, $attary, $tagtype, $lt, $gt) = $curtag;
  608. $free_content = substr($body, $curpos, $lt - $curpos);
  609. if ($skip_content == false){
  610. $trusted .= $free_content;
  611. } else {
  612. }
  613. if ($tagname != FALSE){
  614. if ($tagtype == 2){
  615. if ($skip_content == $tagname){
  616. /**
  617. * Got to the end of tag we needed to remove.
  618. */
  619. $tagname = false;
  620. $skip_content = false;
  621. } else {
  622. if ($skip_content == false){
  623. if (isset($open_tags{$tagname}) &&
  624. $open_tags{$tagname} > 0){
  625. $open_tags{$tagname}--;
  626. } else {
  627. $tagname = false;
  628. }
  629. } else {
  630. }
  631. }
  632. } else {
  633. /**
  634. * $rm_tags_with_content
  635. */
  636. if ($skip_content == false){
  637. /**
  638. * See if this is a self-closing type and change
  639. * tagtype appropriately.
  640. */
  641. if ($tagtype == 1
  642. && in_array($tagname, $self_closing_tags)){
  643. $tagtype = 3;
  644. }
  645. /**
  646. * See if we should skip this tag and any content
  647. * inside it.
  648. */
  649. if ($tagtype == 1
  650. && in_array($tagname, $rm_tags_with_content)){
  651. $skip_content = $tagname;
  652. } else {
  653. if (($rm_tags == false
  654. && in_array($tagname, $tag_list)) ||
  655. ($rm_tags == true
  656. && !in_array($tagname, $tag_list))){
  657. $tagname = false;
  658. } else {
  659. if ($tagtype == 1){
  660. if (isset($open_tags{$tagname})){
  661. $open_tags{$tagname}++;
  662. } else {
  663. $open_tags{$tagname} = 1;
  664. }
  665. }
  666. /**
  667. * This is where we run other checks.
  668. */
  669. if (is_array($attary) && sizeof($attary) > 0){
  670. $attary = tln_fixatts($tagname,
  671. $attary,
  672. $rm_attnames,
  673. $bad_attvals,
  674. $add_attr_to_tag);
  675. }
  676. }
  677. }
  678. } else {
  679. }
  680. }
  681. if ($tagname != false && $skip_content == false){
  682. $trusted .= tln_tagprint($tagname, $attary, $tagtype);
  683. }
  684. } else {
  685. }
  686. $curpos = $gt + 1;
  687. }
  688. $trusted .= substr($body, $curpos, strlen($body) - $curpos);
  689. if ($force_tag_closing == true){
  690. foreach ($open_tags as $tagname=>$opentimes){
  691. while ($opentimes > 0){
  692. $trusted .= '</' . $tagname . '>';
  693. $opentimes--;
  694. }
  695. }
  696. $trusted .= "\n";
  697. }
  698. $trusted .= "<!-- end tln_sanitized html -->\n";
  699. return $trusted;
  700. }
  701. //
  702. // Use the nifty htmlfilter library
  703. //
  704. function HTMLFilter($body, $trans_image_path, $block_external_images = false) {
  705. $tag_list = Array(
  706. false,
  707. "object",
  708. "meta",
  709. "html",
  710. "head",
  711. "base",
  712. "link",
  713. "frame",
  714. "iframe",
  715. "plaintext",
  716. "marquee"
  717. );
  718. $rm_tags_with_content = Array(
  719. "script",
  720. "applet",
  721. "embed",
  722. "title",
  723. "frameset",
  724. "xmp",
  725. "xml"
  726. );
  727. $self_closing_tags = Array(
  728. "img",
  729. "br",
  730. "hr",
  731. "input",
  732. "outbind"
  733. );
  734. $force_tag_closing = true;
  735. $rm_attnames = Array(
  736. "/.*/" =>
  737. Array(
  738. // "/target/i",
  739. "/^on.*/i",
  740. "/^dynsrc/i",
  741. "/^data.*/i",
  742. "/^lowsrc.*/i"
  743. )
  744. );
  745. $bad_attvals = Array(
  746. "/.*/" =>
  747. Array(
  748. "/^src|background/i" =>
  749. Array(
  750. Array(
  751. "/^([\'\"])\s*\S+script\s*:.*([\'\"])/si",
  752. "/^([\'\"])\s*mocha\s*:*.*([\'\"])/si",
  753. "/^([\'\"])\s*about\s*:.*([\'\"])/si"
  754. ),
  755. Array(
  756. "\\1$trans_image_path\\2",
  757. "\\1$trans_image_path\\2",
  758. "\\1$trans_image_path\\2",
  759. "\\1$trans_image_path\\2"
  760. )
  761. ),
  762. "/^href|action/i" =>
  763. Array(
  764. Array(
  765. "/^([\'\"])\s*\S+script\s*:.*([\'\"])/si",
  766. "/^([\'\"])\s*mocha\s*:*.*([\'\"])/si",
  767. "/^([\'\"])\s*about\s*:.*([\'\"])/si"
  768. ),
  769. Array(
  770. "\\1#\\1",
  771. "\\1#\\1",
  772. "\\1#\\1",
  773. "\\1#\\1"
  774. )
  775. ),
  776. "/^style/i" =>
  777. Array(
  778. Array(
  779. "/expression/i",
  780. "/binding/i",
  781. "/behaviou*r/i",
  782. "/include-source/i",
  783. "/position\s*:\s*absolute/i",
  784. "/url\s*\(\s*([\'\"])\s*\S+script\s*:.*([\'\"])\s*\)/si",
  785. "/url\s*\(\s*([\'\"])\s*mocha\s*:.*([\'\"])\s*\)/si",
  786. "/url\s*\(\s*([\'\"])\s*about\s*:.*([\'\"])\s*\)/si",
  787. "/(.*)\s*:\s*url\s*\(\s*([\'\"]*)\s*\S+script\s*:.*([\'\"]*)\s*\)/si"
  788. ),
  789. Array(
  790. "idiocy",
  791. "idiocy",
  792. "idiocy",
  793. "idiocy",
  794. "",
  795. "url(\\1#\\1)",
  796. "url(\\1#\\1)",
  797. "url(\\1#\\1)",
  798. "url(\\1#\\1)",
  799. "url(\\1#\\1)",
  800. "\\1:url(\\2#\\3)"
  801. )
  802. )
  803. )
  804. );
  805. if ($block_external_images){
  806. array_push($bad_attvals{'/.*/'}{'/^src|background/i'}[0],
  807. '/^([\'\"])\s*https*:.*([\'\"])/si');
  808. array_push($bad_attvals{'/.*/'}{'/^src|background/i'}[1],
  809. "\\1$trans_image_path\\1");
  810. array_push($bad_attvals{'/.*/'}{'/^style/i'}[0],
  811. '/url\(([\'\"])\s*https*:.*([\'\"])\)/si');
  812. array_push($bad_attvals{'/.*/'}{'/^style/i'}[1],
  813. "url(\\1$trans_image_path\\1)");
  814. }
  815. $add_attr_to_tag = Array(
  816. "/^a$/i" =>
  817. Array('target'=>'"_blank"')
  818. );
  819. $trusted = tln_sanitize($body,
  820. $tag_list,
  821. $rm_tags_with_content,
  822. $self_closing_tags,
  823. $force_tag_closing,
  824. $rm_attnames,
  825. $bad_attvals,
  826. $add_attr_to_tag
  827. );
  828. return $trusted;
  829. }
  830. ?>