PageRenderTime 35ms CodeModel.GetById 14ms RepoModel.GetById 0ms app.codeStats 0ms

/filter/urltolink/filter.php

https://gitlab.com/JrLucena/moodle
PHP | 177 lines | 72 code | 24 blank | 81 comment | 12 complexity | fc60cec1604f1e56fd49da0963419329 MD5 | raw file
  1. <?php
  2. // This file is part of Moodle - http://moodle.org/
  3. //
  4. // Moodle is free software: you can redistribute it and/or modify
  5. // it under the terms of the GNU General Public License as published by
  6. // the Free Software Foundation, either version 3 of the License, or
  7. // (at your option) any later version.
  8. //
  9. // Moodle is distributed in the hope that it will be useful,
  10. // but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. // GNU General Public License for more details.
  13. //
  14. // You should have received a copy of the GNU General Public License
  15. // along with Moodle. If not, see <http://www.gnu.org/licenses/>.
  16. /**
  17. * Filter converting URLs in the text to HTML links
  18. *
  19. * @package filter
  20. * @subpackage urltolink
  21. * @copyright 2010 David Mudrak <david@moodle.com>
  22. * @license http://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later
  23. */
  24. defined('MOODLE_INTERNAL') || die();
  25. class filter_urltolink extends moodle_text_filter {
  26. /**
  27. * @var array global configuration for this filter
  28. *
  29. * This might be eventually moved into parent class if we found it
  30. * useful for other filters, too.
  31. */
  32. protected static $globalconfig;
  33. /**
  34. * Apply the filter to the text
  35. *
  36. * @see filter_manager::apply_filter_chain()
  37. * @param string $text to be processed by the text
  38. * @param array $options filter options
  39. * @return string text after processing
  40. */
  41. public function filter($text, array $options = array()) {
  42. if (!isset($options['originalformat'])) {
  43. // if the format is not specified, we are probably called by {@see format_string()}
  44. // in that case, it would be dangerous to replace URL with the link because it could
  45. // be stripped. therefore, we do nothing
  46. return $text;
  47. }
  48. if (in_array($options['originalformat'], explode(',', get_config('filter_urltolink', 'formats')))) {
  49. $this->convert_urls_into_links($text);
  50. }
  51. return $text;
  52. }
  53. ////////////////////////////////////////////////////////////////////////////
  54. // internal implementation starts here
  55. ////////////////////////////////////////////////////////////////////////////
  56. /**
  57. * Given some text this function converts any URLs it finds into HTML links
  58. *
  59. * @param string $text Passed in by reference. The string to be searched for urls.
  60. */
  61. protected function convert_urls_into_links(&$text) {
  62. //I've added img tags to this list of tags to ignore.
  63. //See MDL-21168 for more info. A better way to ignore tags whether or not
  64. //they are escaped partially or completely would be desirable. For example:
  65. //<a href="blah">
  66. //&lt;a href="blah"&gt;
  67. //&lt;a href="blah">
  68. $filterignoretagsopen = array('<a\s[^>]+?>');
  69. $filterignoretagsclose = array('</a>');
  70. filter_save_ignore_tags($text,$filterignoretagsopen,$filterignoretagsclose,$ignoretags);
  71. // Check if we support unicode modifiers in regular expressions. Cache it.
  72. // TODO: this check should be a environment requirement in Moodle 2.0, as far as unicode
  73. // chars are going to arrive to URLs officially really soon (2010?)
  74. // Original RFC regex from: http://www.bytemycode.com/snippets/snippet/796/
  75. // Various ideas from: http://alanstorm.com/url_regex_explained
  76. // Unicode check, negative assertion and other bits from Moodle.
  77. static $unicoderegexp;
  78. if (!isset($unicoderegexp)) {
  79. $unicoderegexp = @preg_match('/\pL/u', 'a'); // This will fail silently, returning false,
  80. }
  81. // TODO MDL-21296 - use of unicode modifiers may cause a timeout
  82. $urlstart = '(?:http(s)?://|(?<!://)(www\.))';
  83. $domainsegment = '(?:[\pLl0-9][\pLl0-9-]*[\pLl0-9]|[\pLl0-9])';
  84. $numericip = '(?:(?:[0-9]{1,3}\.){3}[0-9]{1,3})';
  85. $port = '(?::\d*)';
  86. $pathchar = '(?:[\pL0-9\.!$&\'\(\)*+,;=_~:@-]|%[a-f0-9]{2})';
  87. $path = "(?:/$pathchar*)*";
  88. $querystring = '(?:\?(?:[\pL0-9\.!$&\'\(\)*+,;=_~:@/?-]|%[a-fA-F0-9]{2})*)';
  89. $fragment = '(?:\#(?:[\pL0-9\.!$&\'\(\)*+,;=_~:@/?-]|%[a-fA-F0-9]{2})*)';
  90. // Lookbehind assertions.
  91. // Is not HTML attribute or CSS URL property. Unfortunately legit text like "url(http://...)" will not be a link.
  92. $lookbehindend = "(?<![]),.;])";
  93. $regex = "$urlstart((?:$domainsegment\.)+$domainsegment|$numericip)" .
  94. "($port?$path$querystring?$fragment?)$lookbehindend";
  95. if ($unicoderegexp) {
  96. $regex = '#' . $regex . '#ui';
  97. } else {
  98. $regex = '#' . preg_replace(array('\pLl', '\PL'), 'a-z', $regex) . '#i';
  99. }
  100. // Locate any HTML tags.
  101. $matches = preg_split('/(<[^<|>]*>)/i', $text, -1, PREG_SPLIT_NO_EMPTY | PREG_SPLIT_DELIM_CAPTURE);
  102. // Iterate through the tokenized text to handle chunks (html and content).
  103. foreach ($matches as $idx => $chunk) {
  104. // Nothing to do. We skip completely any html chunk.
  105. if (strpos(trim($chunk), '<') === 0) {
  106. continue;
  107. }
  108. // Nothing to do. We skip any content chunk having any of these attributes.
  109. if (preg_match('#(background=")|(action=")|(style="background)|(href=")|(src=")|(url [(])#', $chunk)) {
  110. continue;
  111. }
  112. // Arrived here, we want to process every word in this chunk.
  113. $text = $chunk;
  114. $words = explode(' ', $text);
  115. foreach ($words as $idx2 => $word) {
  116. // ReDoS protection. Stop processing if a word is too large.
  117. if (strlen($word) < 4096) {
  118. $words[$idx2] = preg_replace($regex, '<a href="http$1://$2$3$4" class="_blanktarget">$0</a>', $word);
  119. }
  120. }
  121. $text = implode(' ', $words);
  122. // Copy the result back to the array.
  123. $matches[$idx] = $text;
  124. }
  125. $text = implode('', $matches);
  126. if (!empty($ignoretags)) {
  127. $ignoretags = array_reverse($ignoretags); /// Reversed so "progressive" str_replace() will solve some nesting problems.
  128. $text = str_replace(array_keys($ignoretags),$ignoretags,$text);
  129. }
  130. if (get_config('filter_urltolink', 'embedimages')) {
  131. // now try to inject the images, this code was originally in the mediapluing filter
  132. // this may be useful only if somebody relies on the fact the links in FORMAT_MOODLE get converted
  133. // to URLs which in turn change to real images
  134. $search = '/<a href="([^"]+\.(jpg|png|gif))" class="_blanktarget">([^>]*)<\/a>/is';
  135. $text = preg_replace_callback($search, 'filter_urltolink_img_callback', $text);
  136. }
  137. }
  138. }
  139. /**
  140. * Change links to images into embedded images.
  141. *
  142. * This plugin is intended for automatic conversion of image URLs when FORMAT_MOODLE used.
  143. *
  144. * @param $link
  145. * @return string
  146. */
  147. function filter_urltolink_img_callback($link) {
  148. if ($link[1] !== $link[3]) {
  149. // this is not a link created by this filter, because the url does not match the text
  150. return $link[0];
  151. }
  152. return '<img class="filter_urltolink_image" alt="" src="'.$link[1].'" />';
  153. }