PageRenderTime 57ms CodeModel.GetById 20ms RepoModel.GetById 0ms app.codeStats 0ms

/lib/filterlib.php

https://bitbucket.org/ceu/moodle_demo
PHP | 365 lines | 216 code | 64 blank | 85 comment | 41 complexity | 3037165c8b19a15ffffa5807df9d9345 MD5 | raw file
Possible License(s): BSD-3-Clause, LGPL-2.0, LGPL-2.1
  1. <?php // $Id: filterlib.php,v 1.24.10.4 2009/02/18 11:02:11 stronk7 Exp $
  2. // Contains special functions that are particularly useful to filters
  3. /// Define one exclusive separator that we'll use in the temp saved tags
  4. /// keys. It must be something rare enough to avoid having matches with
  5. /// filterobjects. MDL-18165
  6. define ('EXCL_SEPARATOR', '-%-');
  7. /**
  8. * This is just a little object to define a phrase and some instructions
  9. * for how to process it. Filters can create an array of these to pass
  10. * to the filter_phrases function below.
  11. **/
  12. class filterobject {
  13. var $phrase;
  14. var $hreftagbegin;
  15. var $hreftagend;
  16. var $casesensitive;
  17. var $fullmatch;
  18. var $replacementphrase;
  19. var $work_phrase;
  20. var $work_hreftagbegin;
  21. var $work_hreftagend;
  22. var $work_casesensitive;
  23. var $work_fullmatch;
  24. var $work_replacementphrase;
  25. var $work_calculated;
  26. /// a constructor just because I like constructing
  27. function filterobject($phrase, $hreftagbegin='<span class="highlight">',
  28. $hreftagend='</span>',
  29. $casesensitive=false,
  30. $fullmatch=false,
  31. $replacementphrase=NULL) {
  32. $this->phrase = $phrase;
  33. $this->hreftagbegin = $hreftagbegin;
  34. $this->hreftagend = $hreftagend;
  35. $this->casesensitive = $casesensitive;
  36. $this->fullmatch = $fullmatch;
  37. $this->replacementphrase= $replacementphrase;
  38. $this->work_calculated = false;
  39. }
  40. }
  41. /**
  42. * Process phrases intelligently found within a HTML text (such as adding links)
  43. *
  44. * param text the text that we are filtering
  45. * param link_array an array of filterobjects
  46. * param ignoretagsopen an array of opening tags that we should ignore while filtering
  47. * param ignoretagsclose an array of corresponding closing tags
  48. **/
  49. function filter_phrases ($text, &$link_array, $ignoretagsopen=NULL, $ignoretagsclose=NULL) {
  50. global $CFG;
  51. static $usedphrases;
  52. $ignoretags = array(); //To store all the enclosig tags to be completely ignored
  53. $tags = array(); //To store all the simple tags to be ignored
  54. /// A list of open/close tags that we should not replace within
  55. /// No reason why you can't put full preg expressions in here too
  56. /// eg '<script(.+?)>' to match any type of script tag
  57. $filterignoretagsopen = array('<head>' , '<nolink>' , '<span class="nolink">');
  58. $filterignoretagsclose = array('</head>', '</nolink>', '</span>');
  59. /// Invalid prefixes and suffixes for the fullmatch searches
  60. /// Every "word" character, but the underscore, is a invalid suffix or prefix.
  61. /// (nice to use this because it includes national characters (accents...) as word characters.
  62. $filterinvalidprefixes = '([^\W_])';
  63. $filterinvalidsuffixes = '([^\W_])';
  64. /// Add the user defined ignore tags to the default list
  65. /// Unless specified otherwise, we will not replace within <a></a> tags
  66. if ( $ignoretagsopen === NULL ) {
  67. //$ignoretagsopen = array('<a(.+?)>');
  68. $ignoretagsopen = array('<a\s[^>]+?>');
  69. $ignoretagsclose = array('</a>');
  70. }
  71. if ( is_array($ignoretagsopen) ) {
  72. foreach ($ignoretagsopen as $open) $filterignoretagsopen[] = $open;
  73. foreach ($ignoretagsclose as $close) $filterignoretagsclose[] = $close;
  74. }
  75. //// Double up some magic chars to avoid "accidental matches"
  76. $text = preg_replace('/([#*%])/','\1\1',$text);
  77. ////Remove everything enclosed by the ignore tags from $text
  78. filter_save_ignore_tags($text,$filterignoretagsopen,$filterignoretagsclose,$ignoretags);
  79. /// Remove tags from $text
  80. filter_save_tags($text,$tags);
  81. /// Time to cycle through each phrase to be linked
  82. $size = sizeof($link_array);
  83. for ($n=0; $n < $size; $n++) {
  84. $linkobject =& $link_array[$n];
  85. /// Set some defaults if certain properties are missing
  86. /// Properties may be missing if the filterobject class has not been used to construct the object
  87. if (empty($linkobject->phrase)) {
  88. continue;
  89. }
  90. /// Avoid integers < 1000 to be linked. See bug 1446.
  91. $intcurrent = intval($linkobject->phrase);
  92. if (!empty($intcurrent) && strval($intcurrent) == $linkobject->phrase && $intcurrent < 1000) {
  93. continue;
  94. }
  95. /// All this work has to be done ONLY it it hasn't been done before
  96. if (!$linkobject->work_calculated) {
  97. if (!isset($linkobject->hreftagbegin) or !isset($linkobject->hreftagend)) {
  98. $linkobject->work_hreftagbegin = '<span class="highlight"';
  99. $linkobject->work_hreftagend = '</span>';
  100. } else {
  101. $linkobject->work_hreftagbegin = $linkobject->hreftagbegin;
  102. $linkobject->work_hreftagend = $linkobject->hreftagend;
  103. }
  104. /// Double up chars to protect true duplicates
  105. /// be cleared up before returning to the user.
  106. $linkobject->work_hreftagbegin = preg_replace('/([#*%])/','\1\1',$linkobject->work_hreftagbegin);
  107. if (empty($linkobject->casesensitive)) {
  108. $linkobject->work_casesensitive = false;
  109. } else {
  110. $linkobject->work_casesensitive = true;
  111. }
  112. if (empty($linkobject->fullmatch)) {
  113. $linkobject->work_fullmatch = false;
  114. } else {
  115. $linkobject->work_fullmatch = true;
  116. }
  117. /// Strip tags out of the phrase
  118. $linkobject->work_phrase = strip_tags($linkobject->phrase);
  119. /// Double up chars that might cause a false match -- the duplicates will
  120. /// be cleared up before returning to the user.
  121. $linkobject->work_phrase = preg_replace('/([#*%])/','\1\1',$linkobject->work_phrase);
  122. /// Set the replacement phrase properly
  123. if ($linkobject->replacementphrase) { //We have specified a replacement phrase
  124. /// Strip tags
  125. $linkobject->work_replacementphrase = strip_tags($linkobject->replacementphrase);
  126. } else { //The replacement is the original phrase as matched below
  127. $linkobject->work_replacementphrase = '$1';
  128. }
  129. /// Quote any regular expression characters and the delimiter in the work phrase to be searched
  130. $linkobject->work_phrase = preg_quote($linkobject->work_phrase, '/');
  131. /// Work calculated
  132. $linkobject->work_calculated = true;
  133. }
  134. /// If $CFG->filtermatchoneperpage, avoid previously (request) linked phrases
  135. if (!empty($CFG->filtermatchoneperpage)) {
  136. if (!empty($usedphrases) && in_array($linkobject->work_phrase,$usedphrases)) {
  137. continue;
  138. }
  139. }
  140. /// Regular expression modifiers
  141. $modifiers = ($linkobject->work_casesensitive) ? 's' : 'isu'; // works in unicode mode!
  142. /// Do we need to do a fullmatch?
  143. /// If yes then go through and remove any non full matching entries
  144. if ($linkobject->work_fullmatch) {
  145. $notfullmatches = array();
  146. $regexp = '/'.$filterinvalidprefixes.'('.$linkobject->work_phrase.')|('.$linkobject->work_phrase.')'.$filterinvalidsuffixes.'/'.$modifiers;
  147. preg_match_all($regexp,$text,$list_of_notfullmatches);
  148. if ($list_of_notfullmatches) {
  149. foreach (array_unique($list_of_notfullmatches[0]) as $key=>$value) {
  150. $notfullmatches['<*'.$key.'*>'] = $value;
  151. }
  152. if (!empty($notfullmatches)) {
  153. $text = str_replace($notfullmatches,array_keys($notfullmatches),$text);
  154. }
  155. }
  156. }
  157. /// Finally we do our highlighting
  158. if (!empty($CFG->filtermatchonepertext) || !empty($CFG->filtermatchoneperpage)) {
  159. $resulttext = preg_replace('/('.$linkobject->work_phrase.')/'.$modifiers,
  160. $linkobject->work_hreftagbegin.
  161. $linkobject->work_replacementphrase.
  162. $linkobject->work_hreftagend, $text, 1);
  163. } else {
  164. $resulttext = preg_replace('/('.$linkobject->work_phrase.')/'.$modifiers,
  165. $linkobject->work_hreftagbegin.
  166. $linkobject->work_replacementphrase.
  167. $linkobject->work_hreftagend, $text);
  168. }
  169. /// If the text has changed we have to look for links again
  170. if ($resulttext != $text) {
  171. /// Set $text to $resulttext
  172. $text = $resulttext;
  173. /// Remove everything enclosed by the ignore tags from $text
  174. filter_save_ignore_tags($text,$filterignoretagsopen,$filterignoretagsclose,$ignoretags);
  175. /// Remove tags from $text
  176. filter_save_tags($text,$tags);
  177. /// If $CFG->filtermatchoneperpage, save linked phrases to request
  178. if (!empty($CFG->filtermatchoneperpage)) {
  179. $usedphrases[] = $linkobject->work_phrase;
  180. }
  181. }
  182. /// Replace the not full matches before cycling to next link object
  183. if (!empty($notfullmatches)) {
  184. $text = str_replace(array_keys($notfullmatches),$notfullmatches,$text);
  185. unset($notfullmatches);
  186. }
  187. }
  188. /// Rebuild the text with all the excluded areas
  189. if (!empty($tags)) {
  190. $text = str_replace(array_keys($tags), $tags, $text);
  191. }
  192. if (!empty($ignoretags)) {
  193. $ignoretags = array_reverse($ignoretags); /// Reversed so "progressive" str_replace() will solve some nesting problems.
  194. $text = str_replace(array_keys($ignoretags),$ignoretags,$text);
  195. }
  196. //// Remove the protective doubleups
  197. $text = preg_replace('/([#*%])(\1)/','\1',$text);
  198. /// Add missing javascript for popus
  199. $text = filter_add_javascript($text);
  200. return $text;
  201. }
  202. function filter_remove_duplicates($linkarray) {
  203. $concepts = array(); // keep a record of concepts as we cycle through
  204. $lconcepts = array(); // a lower case version for case insensitive
  205. $cleanlinks = array();
  206. foreach ($linkarray as $key=>$filterobject) {
  207. if ($filterobject->casesensitive) {
  208. $exists = in_array($filterobject->phrase, $concepts);
  209. } else {
  210. $exists = in_array(moodle_strtolower($filterobject->phrase), $lconcepts);
  211. }
  212. if (!$exists) {
  213. $cleanlinks[] = $filterobject;
  214. $concepts[] = $filterobject->phrase;
  215. $lconcepts[] = moodle_strtolower($filterobject->phrase);
  216. }
  217. }
  218. return $cleanlinks;
  219. }
  220. /**
  221. * Extract open/lose tags and their contents to avoid being processed by filters.
  222. * Useful to extract pieces of code like <a>...</a> tags. It returns the text
  223. * converted with some <#xEXCL_SEPARATORx#> codes replacing the extracted text. Such extracted
  224. * texts are returned in the ignoretags array (as values), with codes as keys.
  225. *
  226. * param text the text that we are filtering (in/out)
  227. * param filterignoretagsopen an array of open tags to start searching
  228. * param filterignoretagsclose an array of close tags to end searching
  229. * param ignoretags an array of saved strings useful to rebuild the original text (in/out)
  230. **/
  231. function filter_save_ignore_tags(&$text,$filterignoretagsopen,$filterignoretagsclose,&$ignoretags) {
  232. /// Remove everything enclosed by the ignore tags from $text
  233. foreach ($filterignoretagsopen as $ikey=>$opentag) {
  234. $closetag = $filterignoretagsclose[$ikey];
  235. /// form regular expression
  236. $opentag = str_replace('/','\/',$opentag); // delimit forward slashes
  237. $closetag = str_replace('/','\/',$closetag); // delimit forward slashes
  238. $pregexp = '/'.$opentag.'(.*?)'.$closetag.'/is';
  239. preg_match_all($pregexp, $text, $list_of_ignores);
  240. foreach (array_unique($list_of_ignores[0]) as $key=>$value) {
  241. $prefix = (string)(count($ignoretags) + 1);
  242. $ignoretags['<#'.$prefix.EXCL_SEPARATOR.$key.'#>'] = $value;
  243. }
  244. if (!empty($ignoretags)) {
  245. $text = str_replace($ignoretags,array_keys($ignoretags),$text);
  246. }
  247. }
  248. }
  249. /**
  250. * Extract tags (any text enclosed by < and > to avoid being processed by filters.
  251. * It returns the text converted with some <%xEXCL_SEPARATORx%> codes replacing the extracted text. Such extracted
  252. * texts are returned in the tags array (as values), with codes as keys.
  253. *
  254. * param text the text that we are filtering (in/out)
  255. * param tags an array of saved strings useful to rebuild the original text (in/out)
  256. **/
  257. function filter_save_tags(&$text,&$tags) {
  258. preg_match_all('/<([^#%*].*?)>/is',$text,$list_of_newtags);
  259. foreach (array_unique($list_of_newtags[0]) as $ntkey=>$value) {
  260. $prefix = (string)(count($tags) + 1);
  261. $tags['<%'.$prefix.EXCL_SEPARATOR.$ntkey.'%>'] = $value;
  262. }
  263. if (!empty($tags)) {
  264. $text = str_replace($tags,array_keys($tags),$text);
  265. }
  266. }
  267. /**
  268. * Add missing openpopup javascript to HTML files.
  269. */
  270. function filter_add_javascript($text) {
  271. global $CFG;
  272. if (stripos($text, '</html>') === FALSE) {
  273. return $text; // this is not a html file
  274. }
  275. if (strpos($text, 'onclick="return openpopup') === FALSE) {
  276. return $text; // no popup - no need to add javascript
  277. }
  278. $js ="
  279. <script type=\"text/javascript\">
  280. <!--
  281. function openpopup(url,name,options,fullscreen) {
  282. fullurl = \"".$CFG->httpswwwroot."\" + url;
  283. windowobj = window.open(fullurl,name,options);
  284. if (fullscreen) {
  285. windowobj.moveTo(0,0);
  286. windowobj.resizeTo(screen.availWidth,screen.availHeight);
  287. }
  288. windowobj.focus();
  289. return false;
  290. }
  291. // -->
  292. </script>";
  293. if (stripos($text, '</head>') !== FALSE) {
  294. //try to add it into the head element
  295. $text = str_ireplace('</head>', $js.'</head>', $text);
  296. return $text;
  297. }
  298. //last chance - try adding head element
  299. return preg_replace("/<html.*?>/is", "\\0<head>".$js.'</head>', $text);
  300. }
  301. ?>