PageRenderTime 52ms CodeModel.GetById 27ms RepoModel.GetById 1ms app.codeStats 0ms

/wp-content/plugins/broken-link-checker/includes/parsers.php

https://bitbucket.org/lgorence/quickpress
PHP | 329 lines | 137 code | 32 blank | 160 comment | 25 complexity | c47d35b15ae9723d9f69e23cd0d73042 MD5 | raw file
Possible License(s): GPL-2.0, LGPL-2.1, AGPL-1.0
  1. <?php
  2. /**
  3. * A base class for parsers.
  4. *
  5. * In the context of this plugin, a "parser" is a class that knows how to extract or modfify
  6. * a specific type of links from a given piece of text. For example, there could be a "HTML Link"
  7. * parser that knows how to find and modify standard HTML links such as this one :
  8. * <a href="http://example.com/">Example</a>
  9. *
  10. * Other parsers could extract plaintext URLs or handle metadata fields.
  11. *
  12. * Each parser has a list of supported formats (e.g. "html", "plaintext", etc) and container types
  13. * (e.g. "post", "comment", "blogroll", etc). When something needs to be parsed, the involved
  14. * container class will look up the parsers that support the relevant format or the container's type,
  15. * and apply them to the to-be-parsed string.
  16. *
  17. * All sub-classes of blcParser should override at least the blcParser::parse() method.
  18. *
  19. * @see blcContainer::$fields
  20. *
  21. * @package Broken Link Checker
  22. * @access public
  23. */
  24. class blcParser extends blcModule {
  25. var $parser_type;
  26. var $supported_formats = array();
  27. var $supported_containers = array();
  28. /**
  29. * Initialize the parser. Nothing much here.
  30. *
  31. * @return void
  32. */
  33. function init(){
  34. parent::init();
  35. $this->parser_type = $this->module_id;
  36. }
  37. /**
  38. * Called when the parser is activated.
  39. *
  40. * @return void
  41. */
  42. function activated(){
  43. parent::activated();
  44. $this->resynch_relevant_containers();
  45. }
  46. /**
  47. * Mark containers that this parser might be interested in as unparsed.
  48. *
  49. * @uses blcContainerHelper::mark_as_unsynched_where()
  50. *
  51. * @param bool $only_return If true, just return the list of formats and container types without actually modifying any synch. records.
  52. * @return void|array Either nothing or an array in the form [ [format1=>timestamp1, ...], [container_type1=>timestamp1, ...] ]
  53. */
  54. function resynch_relevant_containers($only_return = false){
  55. global $blclog;
  56. $blclog->log(sprintf('...... Parser "%s" is marking relevant items as unsynched', $this->module_id));
  57. $last_deactivated = $this->module_manager->get_last_deactivation_time($this->module_id);
  58. $formats = array();
  59. foreach($this->supported_formats as $format){
  60. $formats[$format] = $last_deactivated;
  61. }
  62. $container_types = array();
  63. foreach($this->supported_containers as $container_type){
  64. $container_types[$container_type] = $last_deactivated;
  65. }
  66. if ( $only_return ){
  67. return array($formats, $container_types);
  68. } else {
  69. blcContainerHelper::mark_as_unsynched_where($formats, $container_types);
  70. }
  71. }
  72. /**
  73. * Parse a string for links.
  74. *
  75. * @param string $content The text to parse.
  76. * @param string $base_url The base URL to use for normalizing relative URLs. If ommitted, the blog's root URL will be used.
  77. * @param string $default_link_text
  78. * @return array An array of new blcLinkInstance objects. The objects will include info about the links found, but not about the corresponding container entity.
  79. */
  80. function parse($content, $base_url = '', $default_link_text = ''){
  81. return array();
  82. }
  83. /**
  84. * Change all links that have a certain URL to a new URL.
  85. *
  86. * @param string $content Look for links in this string.
  87. * @param string $new_url Change the links to this URL.
  88. * @param string $old_url The URL to look for.
  89. * @param string $old_raw_url The raw, not-normalized URL of the links to look for. Optional.
  90. *
  91. * @return array|WP_Error If successful, the return value will be an associative array with two
  92. * keys : 'content' - the modified content, and 'raw_url' - the new raw, non-normalized URL used
  93. * for the modified links. In most cases, the returned raw_url will be equal to the new_url.
  94. */
  95. function edit($content, $new_url, $old_url, $old_raw_url){
  96. return new WP_Error(
  97. 'not_implemented',
  98. sprintf(__("Editing is not implemented in the '%s' parser", 'broken-link-checker'), $this->parser_type)
  99. );
  100. }
  101. /**
  102. * Remove all links that have a certain URL, leaving anchor text intact.
  103. *
  104. * @param string $content Look for links in this string.
  105. * @param string $url The URL to look for.
  106. * @param string $raw_url The raw, non-normalized version of the URL to look for. Optional.
  107. * @return string Input string with all matching links removed.
  108. */
  109. function unlink($content, $url, $raw_url){
  110. return new WP_Error(
  111. 'not_implemented',
  112. sprintf(__("Unlinking is not implemented in the '%s' parser", 'broken-link-checker'), $this->parser_type)
  113. );
  114. }
  115. /**
  116. * Get the link text for printing in the "Broken Links" table.
  117. * Sub-classes should override this method and display the link text in a way appropriate for the link type.
  118. *
  119. * @param blcLinkInstance $instance
  120. * @return string HTML
  121. */
  122. function ui_get_link_text($instance, $context = 'display'){
  123. return $instance->link_text;
  124. }
  125. /**
  126. * Turn a relative URL into an absolute one.
  127. *
  128. * @param string $url Relative URL.
  129. * @param string $base_url Base URL. If omitted, the blog's root URL will be used.
  130. * @return string
  131. */
  132. function relative2absolute($url, $base_url = ''){
  133. if ( empty($base_url) ){
  134. $base_url = home_url();
  135. }
  136. $p = @parse_url($url);
  137. if(!$p) {
  138. //URL is a malformed
  139. return false;
  140. }
  141. if( isset($p["scheme"]) ) return $url;
  142. //If the relative URL is just a query string or anchor, simply attach it to the absolute URL and return
  143. $first_char = substr($url, 0, 1);
  144. if ( ($first_char == '?') || ($first_char == '#') ){
  145. return $base_url . $url;
  146. }
  147. $parts=(parse_url($base_url));
  148. if(substr($url,0,1)=='/') {
  149. //Relative URL starts with a slash => ignore the base path and jump straight to the root.
  150. $path_segments = explode("/", $url);
  151. array_shift($path_segments);
  152. } else {
  153. if(isset($parts['path'])){
  154. $aparts=explode('/',$parts['path']);
  155. array_pop($aparts);
  156. $aparts=array_filter($aparts);
  157. } else {
  158. $aparts=array();
  159. }
  160. //Merge together the base path & the relative path
  161. $aparts = array_merge($aparts, explode("/", $url));
  162. //Filter the merged path
  163. $path_segments = array();
  164. foreach($aparts as $part){
  165. if ( $part == '.' ){
  166. continue; //. = "this directory". It's basically a no-op, so we skip it.
  167. } elseif ( $part == '..' ) {
  168. array_pop($path_segments); //.. = one directory up. Remove the last seen path segment.
  169. } else {
  170. array_push($path_segments, $part); //Normal directory -> add it to the path.
  171. }
  172. }
  173. }
  174. $path = implode("/", $path_segments);
  175. //Build the absolute URL.
  176. $url = '';
  177. if($parts['scheme']) {
  178. $url = "$parts[scheme]://";
  179. }
  180. if(isset($parts['user'])) {
  181. $url .= $parts['user'];
  182. if(isset($parts['pass'])) {
  183. $url .= ":".$parts['pass'];
  184. }
  185. $url .= "@";
  186. }
  187. if(isset($parts['host'])) {
  188. $url .= $parts['host'];
  189. if(isset($parts['port'])) {
  190. $url .= ':' . $parts['port'];
  191. }
  192. $url .= '/';
  193. }
  194. $url .= $path;
  195. return $url;
  196. }
  197. /**
  198. * Apply a callback function to all links found in a string and return the results.
  199. *
  200. * The first argument passed to the callback function will be an associative array
  201. * of link data. If the optional $extra parameter is set, it will be passed as the
  202. * second argument to the callback function.
  203. *
  204. * The link data array will contain at least these keys :
  205. * 'href' - the URL of the link, as-is (i.e. without any sanitization or relative-to-absolute translation).
  206. * '#raw' - the raw link code, e.g. the entire '<a href="...">...</a>' tag of a HTML link.
  207. *
  208. * Sub-classes may also set additional keys.
  209. *
  210. * This method is currently used only internally, so sub-classes are not required
  211. * to implement it.
  212. *
  213. * @param string $content A text string to parse for links.
  214. * @param callback $callback Callback function to apply to all found links.
  215. * @param mixed $extra If the optional $extra param. is supplied, it will be passed as the second parameter to the function $callback.
  216. * @return array An array of all detected links after applying $callback to each of them.
  217. */
  218. function map($content, $callback, $extra = null){
  219. return array();
  220. }
  221. /**
  222. * Modify all links found in a string using a callback function.
  223. *
  224. * The first argument passed to the callback function will be an associative array
  225. * of link data. If the optional $extra parameter is set, it will be passed as the
  226. * second argument to the callback function. See the map() method of this class for
  227. * details on the first argument.
  228. *
  229. * The callback function should return either an associative array or a string. If
  230. * a string is returned, the parser will replace the current link with the contents
  231. * of that string. If an array is returned, the current link will be modified/rebuilt
  232. * by substituting the new values for the old ones (e.g. returning array with the key
  233. * 'href' set to 'http://example.com/' will replace the current link's URL with
  234. * http://example.com/).
  235. *
  236. * This method is currently only used internally, so sub-classes are not required
  237. * to implement it.
  238. *
  239. * @see blcParser::map()
  240. *
  241. * @param string $content A text string containing the links to edit.
  242. * @param callback $callback Callback function used to modify the links.
  243. * @param mixed $extra If supplied, $extra will be passed as the second parameter to the function $callback.
  244. * @return string The modified input string.
  245. */
  246. function multi_edit($content, $callback, $extra = null){
  247. return $content; //No-op
  248. }
  249. }
  250. /**
  251. * A helper class for working with parsers. All its methods should be called statically.
  252. *
  253. * @see blcParser
  254. *
  255. * @package Broken Link Checker
  256. * @access public
  257. */
  258. class blcParserHelper {
  259. /**
  260. * Get the parser matching a parser type ID.
  261. *
  262. * @uses blcModuleManager::get_module()
  263. *
  264. * @param string $parser_type
  265. * @return blcParser|null
  266. */
  267. static function get_parser( $parser_type ){
  268. $manager = blcModuleManager::getInstance();
  269. return $manager->get_module($parser_type, true, 'parser');
  270. }
  271. /**
  272. * Get all parsers that support either the specified format or the container type.
  273. * If a parser supports both, it will still be included only once.
  274. *
  275. * @param string $format
  276. * @param string $container_type
  277. * @return array of blcParser
  278. */
  279. static function get_parsers( $format, $container_type ){
  280. $found = array();
  281. //Retrieve a list of active parsers
  282. $manager = blcModuleManager::getInstance();
  283. $active_parsers = $manager->get_modules_by_category('parser');
  284. //Try each one
  285. foreach($active_parsers as $module_id => $module_data){
  286. $parser = $manager->get_module($module_id); //Will autoload if necessary
  287. if ( !$parser ){
  288. continue;
  289. }
  290. if ( in_array($format, $parser->supported_formats) || in_array($container_type, $parser->supported_containers) ){
  291. array_push($found, $parser);
  292. }
  293. }
  294. return $found;
  295. }
  296. }
  297. ?>