PageRenderTime 85ms CodeModel.GetById 27ms RepoModel.GetById 8ms app.codeStats 0ms

/wp-content/plugins/broken-link-checker/modules/extras/plaintext-url.php

https://bitbucket.org/lgorence/quickpress
PHP | 154 lines | 79 code | 20 blank | 55 comment | 16 complexity | d3fbba1ba064f37f63c58ddd9e5587f5 MD5 | raw file
Possible License(s): GPL-2.0, LGPL-2.1, AGPL-1.0
  1. <?php
  2. /*
  3. Plugin Name: Plaintext URLs
  4. Description: Parse plaintext URLs as links
  5. Version: 1.0
  6. Author: Janis Elsts
  7. ModuleCategory: parser
  8. ModuleClassName: blcPlaintextURL
  9. ModuleContext: on-demand
  10. ModuleLazyInit: true
  11. ModulePriority: 800
  12. */
  13. class blcPlaintextURL extends blcParser {
  14. var $supported_formats = array('html', 'plaintext');
  15. //Regexp for detecting plaintext URLs lifted from make_clickable()
  16. var $url_regexp = '#(?<=[\s>\]])(\()?([\w]+?://(?:[\w\\x80-\\xff\#$%&~/=?@\[\](+-]|[.,;:](?![\s<]|(\))?([\s]|$))|(?(1)\)(?![\s<.,;:]|$)|\)))+)#is';
  17. //Used by the edit and unlink callbacks
  18. var $old_url = '';
  19. var $new_url = '';
  20. /**
  21. * Parse a string for plaintext URLs
  22. *
  23. * @param string $content The text to parse.
  24. * @param string $base_url The base URL. Ignored.
  25. * @param string $default_link_text Default link text.
  26. * @return array An array of new blcLinkInstance objects.
  27. */
  28. function parse($content, $base_url = '', $default_link_text = ''){
  29. //Don't want to detect URLs inside links or tag attributes -
  30. //there are already other parsers for that.
  31. //Avoid <a href="http://...">http://...</a>
  32. $content = preg_replace('#<a[^>]*>.*?</a>#si', '', $content);
  33. //HTML tags are treated as natural boundaries for plaintext URLs
  34. //(since we strip tags, we must place another boundary char where they were).
  35. //The closing tag of [shortcodes] is also treated as a boundary.
  36. $content = str_replace(array('<', '>', '[/'), array("\n<", ">\n", "\n[/"), $content);
  37. //Finally, kill all tags.
  38. $content = strip_tags($content);
  39. //Find all URLs
  40. $found = preg_match_all(
  41. $this->url_regexp,
  42. $content,
  43. $matches
  44. );
  45. $instances = array();
  46. if ( $found ){
  47. //Create a new instance for each match
  48. foreach($matches[2] as $match){
  49. //Do a little bit of validation
  50. $url = esc_url_raw(trim($match));
  51. if ( empty($url) ){
  52. continue;
  53. }
  54. if ( function_exists('filter_var') ){
  55. //Note: filter_var() is no panacea as it accepts many invalid URLs
  56. if ( !filter_var($url, FILTER_VALIDATE_URL, FILTER_FLAG_HOST_REQUIRED) ){
  57. continue;
  58. }
  59. }
  60. $parts = @parse_url($url);
  61. if ( empty($parts['host']) || !strpos($parts['host'], '.') ){
  62. continue;
  63. }
  64. //Create a new link instance.
  65. $instance = new blcLinkInstance();
  66. $instance->set_parser($this);
  67. $instance->raw_url = $match;
  68. $instance->link_text = $match;
  69. $link_obj = new blcLink($url); //Creates or loads the link
  70. $instance->set_link($link_obj);
  71. $instances[] = $instance;
  72. }
  73. }
  74. return $instances;
  75. }
  76. /**
  77. * Change all occurences of a given plaintext URLs to a new URL.
  78. *
  79. * @param string $content Look for URLs in this string.
  80. * @param string $new_url Change them to this URL.
  81. * @param string $old_url The URL to look for.
  82. * @param string $old_raw_url The raw, not-normalized URL. Optional.
  83. *
  84. * @return array|WP_Error If successful, the return value will be an associative array with two
  85. * keys : 'content' - the modified content, and 'raw_url' - the new raw, non-normalized URL used
  86. * for the modified links. In most cases, the returned raw_url will be equal to the new_url.
  87. */
  88. function edit($content, $new_url, $old_url, $old_raw_url = ''){
  89. $this->new_url = $new_url;
  90. if ( empty($old_raw_url) ){
  91. $this->old_url = $old_url;
  92. } else {
  93. $this->old_url = $old_raw_url;
  94. }
  95. return array(
  96. 'content' => preg_replace_callback($this->url_regexp, array(&$this, 'edit_callback'), $content),
  97. 'raw_url' => $new_url,
  98. 'link_text' => $new_url,
  99. );
  100. }
  101. function edit_callback($match){
  102. if ( $match[2] == $this->old_url ){
  103. return $this->new_url;
  104. } else {
  105. return $match[0];
  106. }
  107. }
  108. /**
  109. * Remove all occurences of a specific plaintext URL.
  110. *
  111. * @param string $content Look for URLs in this string.
  112. * @param string $url The URL to look for.
  113. * @param string $raw_url The raw, non-normalized version of the URL to look for. Optional.
  114. * @return string Input string with all matching plaintext URLs removed.
  115. */
  116. function unlink($content, $url, $raw_url = ''){
  117. if ( empty($raw_url) ){
  118. $this->old_url = $url;
  119. } else {
  120. $this->old_url = $raw_url;
  121. }
  122. return preg_replace_callback($this->url_regexp, array(&$this, 'unlink_callback'), $content);
  123. }
  124. function unlink_callback($match){
  125. if ( $match[2] == $this->old_url ){
  126. return '';
  127. } else {
  128. return $match[0];
  129. }
  130. }
  131. }
  132. ?>