PageRenderTime 62ms CodeModel.GetById 35ms RepoModel.GetById 0ms app.codeStats 0ms

/wp-content/plugins/broken-link-checker/modules/parsers/html_link.php

https://bitbucket.org/lgorence/quickpress
PHP | 345 lines | 151 code | 50 blank | 144 comment | 21 complexity | c4034a688a3030c74889287c65d796e1 MD5 | raw file
Possible License(s): GPL-2.0, LGPL-2.1, AGPL-1.0
  1. <?php
  2. /*
  3. Plugin Name: HTML links
  4. Description: Example : <code>&lt;a href="http://example.com/"&gt;link text&lt;/a&gt;</code>
  5. Version: 1.0
  6. Author: Janis Elsts
  7. ModuleID: link
  8. ModuleCategory: parser
  9. ModuleClassName: blcHTMLLink
  10. ModuleContext: on-demand
  11. ModuleLazyInit: true
  12. ModulePriority: 1000
  13. */
  14. class blcHTMLLink extends blcParser {
  15. var $supported_formats = array('html');
  16. /**
  17. * Parse a string for HTML links - <a href="URL">anchor text</a>
  18. *
  19. * @param string $content The text to parse.
  20. * @param string $base_url The base URL to use for normalizing relative URLs. If ommitted, the blog's root URL will be used.
  21. * @param string $default_link_text
  22. * @return array An array of new blcLinkInstance objects. The objects will include info about the links found, but not about the corresponding container entity.
  23. */
  24. function parse($content, $base_url = '', $default_link_text = ''){
  25. $instances = array();
  26. //remove all <code></code> blocks first
  27. $content = preg_replace('/<code[^>]*>.+?<\/code>/si', ' ', $content);
  28. //Find links
  29. $params = array(
  30. 'base_url' => $base_url,
  31. 'default_link_text' => $default_link_text,
  32. );
  33. $instances = $this->map($content, array(&$this, 'parser_callback'), $params);
  34. //The parser callback returns NULL when it finds an invalid link. Filter out those nulls
  35. //from the list of instances.
  36. $instances = array_filter($instances);
  37. return $instances;
  38. }
  39. /**
  40. * blcHTMLLink::parser_callback()
  41. *
  42. * @access private
  43. *
  44. * @param array $link
  45. * @param array $params
  46. * @return blcLinkInstance|null
  47. */
  48. function parser_callback($link, $params){
  49. extract($params);
  50. $url = $raw_url = $link['href'];
  51. $url = trim($url);
  52. //FB::log($url, "Found link");
  53. //Sometimes links may contain shortcodes. Execute them.
  54. $url = do_shortcode($url);
  55. //Skip empty URLs
  56. if ( empty($url) ){
  57. return null;
  58. };
  59. //Attempt to parse the URL
  60. $parts = @parse_url($url);
  61. if(!$parts) {
  62. return null; //Skip invalid URLs
  63. };
  64. if ( !isset($parts['scheme']) ){
  65. //No sheme - likely a relative URL. Turn it into an absolute one.
  66. $url = $this->relative2absolute($url, $base_url); //$base_url comes from $params
  67. }
  68. //Skip invalid links (again)
  69. if ( !$url || (strlen($url)<6) ) {
  70. return null;
  71. }
  72. $text = strip_tags( $link['#link_text'] );
  73. //The URL is okay, create and populate a new link instance.
  74. $instance = new blcLinkInstance();
  75. $instance->set_parser($this);
  76. $instance->raw_url = $raw_url;
  77. $instance->link_text = $text;
  78. $link_obj = new blcLink($url); //Creates or loads the link
  79. $instance->set_link($link_obj);
  80. return $instance;
  81. }
  82. /**
  83. * Change all links that have a certain URL to a new URL.
  84. *
  85. * @param string $content Look for links in this string.
  86. * @param string $new_url Change the links to this URL.
  87. * @param string $old_url The URL to look for.
  88. * @param string $old_raw_url The raw, not-normalized URL of the links to look for. Optional.
  89. *
  90. * @return array|WP_Error If successful, the return value will be an associative array with two
  91. * keys : 'content' - the modified content, and 'raw_url' - the new raw, non-normalized URL used
  92. * for the modified links. In most cases, the returned raw_url will be equal to the new_url.
  93. */
  94. function edit($content, $new_url, $old_url, $old_raw_url){
  95. if ( empty($old_raw_url) ){
  96. $old_raw_url = $old_url;
  97. }
  98. //Save the old & new URLs for use in the edit callback.
  99. $args = array(
  100. 'old_url' => $old_raw_url,
  101. 'new_url' => $new_url,
  102. );
  103. //Find all links and replace those that match $old_url.
  104. $content = $this->multi_edit($content, array(&$this, 'edit_callback'), $args);
  105. return array(
  106. 'content' => $content,
  107. 'raw_url' => $new_url,
  108. );
  109. }
  110. function edit_callback($link, $params){
  111. if ($link['href'] == $params['old_url']){
  112. return array(
  113. 'href' => $params['new_url'],
  114. );
  115. } else {
  116. return $link['#raw'];
  117. }
  118. }
  119. /**
  120. * Remove all links that have a certain URL, leaving anchor text intact.
  121. *
  122. * @param string $content Look for links in this string.
  123. * @param string $url The URL to look for.
  124. * @param string $raw_url The raw, non-normalized version of the URL to look for. Optional.
  125. * @return string Input string with all matching links removed.
  126. */
  127. function unlink($content, $url, $raw_url){
  128. if ( empty($raw_url) ){
  129. $raw_url = $url;
  130. }
  131. $args = array(
  132. 'old_url' => $raw_url,
  133. );
  134. //Find all links and remove those that match $raw_url.
  135. $content = $this->multi_edit($content, array(&$this, 'unlink_callback'), $args);
  136. return $content;
  137. }
  138. /**
  139. * blcHTMLLink::unlink_callback()
  140. *
  141. * @access private
  142. *
  143. * @param array $link
  144. * @param array $params
  145. * @return string
  146. */
  147. function unlink_callback($link, $params){
  148. //Skip links that don't match the specified URL
  149. if ($link['href'] != $params['old_url']){
  150. return $link['#raw'];
  151. }
  152. $config = blc_get_configuration();
  153. if ( $config->options['mark_removed_links'] ){
  154. //Leave only the anchor text + the removed_link CSS class
  155. return sprintf(
  156. '<span class="removed_link" title="%s">%s</span>',
  157. esc_attr($link['href']),
  158. $link['#link_text']
  159. );
  160. } else {
  161. //Just the anchor text
  162. return $link['#link_text'];
  163. }
  164. }
  165. /**
  166. * Get the link text for printing in the "Broken Links" table.
  167. * Sub-classes should override this method and display the link text in a way appropriate for the link type.
  168. *
  169. * @param blcLinkInstance $instance
  170. * @return string HTML
  171. */
  172. function ui_get_link_text($instance, $context = 'display'){
  173. return $instance->link_text;
  174. }
  175. /**
  176. * Apply a callback function to all HTML links found in a string and return the results.
  177. *
  178. * The link data array will contain at least these keys :
  179. * 'href' - the URL of the link (with htmlentitydecode() already applied).
  180. * '#raw' - the raw link code, e.g. the entire '<a href="...">...</a>' tag of a HTML link.
  181. * '#offset' - the offset within $content at which the first character of the link tag was found.
  182. * '#link_text' - the link's anchor text, if any. May contain HTML tags.
  183. *
  184. * Any attributes of the link tag will also be included in the returned array as attr_name => attr_value
  185. * pairs. This function will also automatically decode any HTML entities found in attribute values.
  186. *
  187. * @see blcParser::map()
  188. *
  189. * @param string $content A text string to parse for links.
  190. * @param callback $callback Callback function to apply to all found links.
  191. * @param mixed $extra If the optional $extra param. is supplied, it will be passed as the second parameter to the function $callback.
  192. * @return array An array of all detected links after applying $callback to each of them.
  193. */
  194. function map($content, $callback, $extra = null){
  195. $results = array();
  196. //Find all links
  197. $links = blcUtility::extract_tags($content, 'a', false, true);
  198. //Iterate over the links and apply $callback to each
  199. foreach($links as $link){
  200. //Massage the found link into a form required for the callback function
  201. $param = $link['attributes'];
  202. $param = array_merge(
  203. $param,
  204. array(
  205. '#raw' => $link['full_tag'],
  206. '#offset' => $link['offset'],
  207. '#link_text' => $link['contents'],
  208. 'href' => isset($link['attributes']['href'])?$link['attributes']['href']:'',
  209. )
  210. );
  211. //Prepare arguments for the callback
  212. $params = array($param);
  213. if ( isset($extra) ){
  214. $params[] = $extra;
  215. }
  216. //Execute & store :)
  217. $results[] = call_user_func_array($callback, $params);
  218. }
  219. return $results;
  220. }
  221. /**
  222. * Modify all HTML links found in a string using a callback function.
  223. *
  224. * The callback function should return either an associative array or a string. If
  225. * a string is returned, the parser will replace the current link with the contents
  226. * of that string. If an array is returned, the current link will be modified/rebuilt
  227. * by substituting the new values for the old ones.
  228. *
  229. * htmlentities() will be automatically applied to attribute values (but not to #link_text).
  230. *
  231. * @see blcParser::multi_edit()
  232. *
  233. * @param string $content A text string containing the links to edit.
  234. * @param callback $callback Callback function used to modify the links.
  235. * @param mixed $extra If supplied, $extra will be passed as the second parameter to the function $callback.
  236. * @return string The modified input string.
  237. */
  238. function multi_edit($content, $callback, $extra = null){
  239. //Just reuse map() + a little helper func. to apply the callback to all links and get modified links
  240. $modified_links = $this->map($content, array(&$this, 'execute_edit_callback'), array($callback, $extra));
  241. //Replace each old link with the modified one
  242. $offset = 0;
  243. foreach($modified_links as $link){
  244. if ( isset($link['#new_raw']) ){
  245. $new_html = $link['#new_raw'];
  246. } else {
  247. //Assemble the new link tag
  248. $new_html = '<a';
  249. foreach ( $link as $name => $value ){
  250. //Skip special keys like '#raw' and '#offset'
  251. if ( substr($name, 0, 1) == '#' ){
  252. continue;
  253. }
  254. $new_html .= sprintf(' %s="%s"', $name, esc_attr( $value ));
  255. }
  256. $new_html .= '>' . $link['#link_text'] . '</a>';
  257. }
  258. $content = substr_replace($content, $new_html, $link['#offset'] + $offset, strlen($link['#raw']));
  259. //Update the replacement offset
  260. $offset += ( strlen($new_html) - strlen($link['#raw']) );
  261. }
  262. return $content;
  263. }
  264. /**
  265. * Helper function for blcHtmlLink::multi_edit()
  266. * Applies the specified callback function to each link and merges
  267. * the result with the current link attributes. If the callback returns
  268. * a replacement HTML tag instead, it will be stored in the '#new_raw'
  269. * key of the return array.
  270. *
  271. * @access protected
  272. *
  273. * @param array $link
  274. * @param array $info The callback function and the extra argument to pass to that function (if any).
  275. * @return array
  276. */
  277. function execute_edit_callback($link, $info){
  278. list($callback, $extra) = $info;
  279. //Prepare arguments for the callback
  280. $params = array($link);
  281. if ( isset($extra) ){
  282. $params[] = $extra;
  283. }
  284. $new_link = call_user_func_array($callback, $params);
  285. if ( is_array($new_link) ){
  286. $link = array_merge($link, $new_link);
  287. } elseif (is_string($new_link)) {
  288. $link['#new_raw'] = $new_link;
  289. }
  290. return $link;
  291. }
  292. }
  293. ?>