PageRenderTime 40ms CodeModel.GetById 11ms RepoModel.GetById 0ms app.codeStats 0ms

/wp-content/plugins/wordpress-seo/inc/sitemaps/class-sitemaps.php

https://bitbucket.org/callum-harrod/thestudentroom
PHP | 556 lines | 248 code | 108 blank | 200 comment | 34 complexity | 871e4e151d1ac3678508d73e13ad42b8 MD5 | raw file
Possible License(s): GPL-2.0, ISC
  1. <?php
  2. /**
  3. * WPSEO plugin file.
  4. *
  5. * @package WPSEO\XML_Sitemaps
  6. */
  7. /**
  8. * Class WPSEO_Sitemaps
  9. *
  10. * @todo This class could use a general description with some explanation on sitemaps. OR.
  11. */
  12. class WPSEO_Sitemaps {
  13. /** Sitemap index identifier. */
  14. const SITEMAP_INDEX_TYPE = '1';
  15. /** @var string $sitemap Content of the sitemap to output. */
  16. protected $sitemap = '';
  17. /** @var bool $bad_sitemap Flag to indicate if this is an invalid or empty sitemap. */
  18. public $bad_sitemap = false;
  19. /** @var bool $transient Whether or not the XML sitemap was served from a transient or not. */
  20. private $transient = false;
  21. /**
  22. * @var string $http_protocol HTTP protocol to use in headers.
  23. * @since 3.2
  24. */
  25. protected $http_protocol = 'HTTP/1.1';
  26. /** @var int $current_page Holds the n variable. */
  27. private $current_page = 1;
  28. /** @var WPSEO_Sitemap_Timezone $timezone */
  29. private $timezone;
  30. /**
  31. * @var WPSEO_Sitemaps_Router $router
  32. * @since 3.2
  33. */
  34. public $router;
  35. /**
  36. * @var WPSEO_Sitemaps_Renderer $renderer
  37. * @since 3.2
  38. */
  39. public $renderer;
  40. /**
  41. * @var WPSEO_Sitemaps_Cache $cache
  42. * @since 3.2
  43. */
  44. public $cache;
  45. /**
  46. * @var WPSEO_Sitemap_Provider[] $providers
  47. * @since 3.2
  48. */
  49. public $providers;
  50. /**
  51. * Class constructor
  52. */
  53. public function __construct() {
  54. add_action( 'after_setup_theme', array( $this, 'init_sitemaps_providers' ) );
  55. add_action( 'after_setup_theme', array( $this, 'reduce_query_load' ), 99 );
  56. add_action( 'pre_get_posts', array( $this, 'redirect' ), 1 );
  57. add_action( 'wpseo_hit_sitemap_index', array( $this, 'hit_sitemap_index' ) );
  58. add_action( 'wpseo_ping_search_engines', array( __CLASS__, 'ping_search_engines' ) );
  59. $this->timezone = new WPSEO_Sitemap_Timezone();
  60. $this->router = new WPSEO_Sitemaps_Router();
  61. $this->renderer = new WPSEO_Sitemaps_Renderer();
  62. $this->cache = new WPSEO_Sitemaps_Cache();
  63. if ( ! empty( $_SERVER['SERVER_PROTOCOL'] ) ) {
  64. $this->http_protocol = sanitize_text_field( $_SERVER['SERVER_PROTOCOL'] );
  65. }
  66. }
  67. /**
  68. * Initialize sitemap providers classes.
  69. *
  70. * @since 5.3
  71. */
  72. public function init_sitemaps_providers() {
  73. $this->providers = array(
  74. new WPSEO_Post_Type_Sitemap_Provider(),
  75. new WPSEO_Taxonomy_Sitemap_Provider(),
  76. new WPSEO_Author_Sitemap_Provider(),
  77. );
  78. $external_providers = apply_filters( 'wpseo_sitemaps_providers', array() );
  79. foreach ( $external_providers as $provider ) {
  80. if ( is_object( $provider ) && $provider instanceof WPSEO_Sitemap_Provider ) {
  81. $this->providers[] = $provider;
  82. }
  83. }
  84. }
  85. /**
  86. * Check the current request URI, if we can determine it's probably an XML sitemap, kill loading the widgets
  87. */
  88. public function reduce_query_load() {
  89. if ( ! isset( $_SERVER['REQUEST_URI'] ) ) {
  90. return;
  91. }
  92. $request_uri = $_SERVER['REQUEST_URI'];
  93. $extension = substr( $request_uri, -4 );
  94. if ( false !== stripos( $request_uri, 'sitemap' ) && in_array( $extension, array( '.xml', '.xsl' ), true ) ) {
  95. remove_all_actions( 'widgets_init' );
  96. }
  97. }
  98. /**
  99. * Register your own sitemap. Call this during 'init'.
  100. *
  101. * @param string $name The name of the sitemap.
  102. * @param callback $function Function to build your sitemap.
  103. * @param string $rewrite Optional. Regular expression to match your sitemap with.
  104. */
  105. public function register_sitemap( $name, $function, $rewrite = '' ) {
  106. add_action( 'wpseo_do_sitemap_' . $name, $function );
  107. if ( ! empty( $rewrite ) ) {
  108. add_rewrite_rule( $rewrite, 'index.php?sitemap=' . $name, 'top' );
  109. }
  110. }
  111. /**
  112. * Register your own XSL file. Call this during 'init'.
  113. *
  114. * @since 1.4.23
  115. *
  116. * @param string $name The name of the XSL file.
  117. * @param callback $function Function to build your XSL file.
  118. * @param string $rewrite Optional. Regular expression to match your sitemap with.
  119. */
  120. public function register_xsl( $name, $function, $rewrite = '' ) {
  121. add_action( 'wpseo_xsl_' . $name, $function );
  122. if ( ! empty( $rewrite ) ) {
  123. add_rewrite_rule( $rewrite, 'index.php?xsl=' . $name, 'top' );
  124. }
  125. }
  126. /**
  127. * Set the sitemap current page to allow creating partial sitemaps with wp-cli
  128. * in a one-off process.
  129. *
  130. * @param integer $current_page The part that should be generated.
  131. */
  132. public function set_n( $current_page ) {
  133. if ( is_scalar( $current_page ) && intval( $current_page ) > 0 ) {
  134. $this->current_page = intval( $current_page );
  135. }
  136. }
  137. /**
  138. * Set the sitemap content to display after you have generated it.
  139. *
  140. * @param string $sitemap The generated sitemap to output.
  141. */
  142. public function set_sitemap( $sitemap ) {
  143. $this->sitemap = $sitemap;
  144. }
  145. /**
  146. * Set as true to make the request 404. Used stop the display of empty sitemaps or invalid requests.
  147. *
  148. * @param bool $bool Is this a bad request. True or false.
  149. */
  150. public function set_bad_sitemap( $bool ) {
  151. $this->bad_sitemap = (bool) $bool;
  152. }
  153. /**
  154. * Prevent stupid plugins from running shutdown scripts when we're obviously not outputting HTML.
  155. *
  156. * @since 1.4.16
  157. */
  158. public function sitemap_close() {
  159. remove_all_actions( 'wp_footer' );
  160. die();
  161. }
  162. /**
  163. * Hijack requests for potential sitemaps and XSL files.
  164. *
  165. * @param \WP_Query $query Main query instance.
  166. */
  167. public function redirect( $query ) {
  168. if ( ! $query->is_main_query() ) {
  169. return;
  170. }
  171. $xsl = get_query_var( 'xsl' );
  172. if ( ! empty( $xsl ) ) {
  173. /*
  174. * This is a method to provide the XSL via the home_url.
  175. * Needed when the site_url and home_url are not the same.
  176. * Loading the XSL needs to come from the same domain, protocol and port as the XML.
  177. *
  178. * Whenever home_url and site_url are the same, the file can be loaded directly.
  179. */
  180. $this->xsl_output( $xsl );
  181. $this->sitemap_close();
  182. return;
  183. }
  184. $type = get_query_var( 'sitemap' );
  185. if ( empty( $type ) ) {
  186. return;
  187. }
  188. $this->set_n( get_query_var( 'sitemap_n' ) );
  189. if ( ! $this->get_sitemap_from_cache( $type, $this->current_page ) ) {
  190. $this->build_sitemap( $type );
  191. }
  192. if ( $this->bad_sitemap ) {
  193. $query->set_404();
  194. status_header( 404 );
  195. return;
  196. }
  197. $this->output();
  198. $this->sitemap_close();
  199. }
  200. /**
  201. * Try to get the sitemap from cache
  202. *
  203. * @param string $type Sitemap type.
  204. * @param int $page_number The page number to retrieve.
  205. *
  206. * @return bool If the sitemap has been retrieved from cache.
  207. */
  208. private function get_sitemap_from_cache( $type, $page_number ) {
  209. $this->transient = false;
  210. if ( true !== $this->cache->is_enabled() ) {
  211. return false;
  212. }
  213. /**
  214. * Fires before the attempt to retrieve XML sitemap from the transient cache.
  215. *
  216. * @param WPSEO_Sitemaps $sitemaps Sitemaps object.
  217. */
  218. do_action( 'wpseo_sitemap_stylesheet_cache_' . $type, $this );
  219. $sitemap_cache_data = $this->cache->get_sitemap_data( $type, $page_number );
  220. // No cache was found, refresh it because cache is enabled.
  221. if ( empty( $sitemap_cache_data ) ) {
  222. return $this->refresh_sitemap_cache( $type, $page_number );
  223. }
  224. // Cache object was found, parse information.
  225. $this->transient = true;
  226. $this->sitemap = $sitemap_cache_data->get_sitemap();
  227. $this->bad_sitemap = ! $sitemap_cache_data->is_usable();
  228. return true;
  229. }
  230. /**
  231. * Build and save sitemap to cache.
  232. *
  233. * @param string $type Sitemap type.
  234. * @param int $page_number The page number to save to.
  235. *
  236. * @return bool
  237. */
  238. private function refresh_sitemap_cache( $type, $page_number ) {
  239. $this->set_n( $page_number );
  240. $this->build_sitemap( $type );
  241. return $this->cache->store_sitemap( $type, $page_number, $this->sitemap, ! $this->bad_sitemap );
  242. }
  243. /**
  244. * Attempts to build the requested sitemap.
  245. *
  246. * Sets $bad_sitemap if this isn't for the root sitemap, a post type or taxonomy.
  247. *
  248. * @param string $type The requested sitemap's identifier.
  249. */
  250. public function build_sitemap( $type ) {
  251. /**
  252. * Filter the type of sitemap to build.
  253. *
  254. * @param string $type Sitemap type, determined by the request.
  255. */
  256. $type = apply_filters( 'wpseo_build_sitemap_post_type', $type );
  257. if ( $type === '1' ) {
  258. $this->build_root_map();
  259. return;
  260. }
  261. $entries_per_page = $this->get_entries_per_page();
  262. foreach ( $this->providers as $provider ) {
  263. if ( ! $provider->handles_type( $type ) ) {
  264. continue;
  265. }
  266. $links = $provider->get_sitemap_links( $type, $entries_per_page, $this->current_page );
  267. if ( empty( $links ) ) {
  268. $this->bad_sitemap = true;
  269. return;
  270. }
  271. $this->sitemap = $this->renderer->get_sitemap( $links, $type, $this->current_page );
  272. return;
  273. }
  274. if ( has_action( 'wpseo_do_sitemap_' . $type ) ) {
  275. /**
  276. * Fires custom handler, if hooked to generate sitemap for the type.
  277. */
  278. do_action( 'wpseo_do_sitemap_' . $type );
  279. return;
  280. }
  281. $this->bad_sitemap = true;
  282. }
  283. /**
  284. * Build the root sitemap (example.com/sitemap_index.xml) which lists sub-sitemaps for other content types.
  285. */
  286. public function build_root_map() {
  287. $links = array();
  288. $entries_per_page = $this->get_entries_per_page();
  289. foreach ( $this->providers as $provider ) {
  290. $links = array_merge( $links, $provider->get_index_links( $entries_per_page ) );
  291. }
  292. if ( empty( $links ) ) {
  293. $this->bad_sitemap = true;
  294. $this->sitemap = '';
  295. return;
  296. }
  297. $this->sitemap = $this->renderer->get_index( $links );
  298. }
  299. /**
  300. * Spits out the XSL for the XML sitemap.
  301. *
  302. * @param string $type Type to output.
  303. *
  304. * @since 1.4.13
  305. */
  306. public function xsl_output( $type ) {
  307. if ( $type !== 'main' ) {
  308. /**
  309. * Fires for the output of XSL for XML sitemaps, other than type "main".
  310. */
  311. do_action( 'wpseo_xsl_' . $type );
  312. return;
  313. }
  314. header( $this->http_protocol . ' 200 OK', true, 200 );
  315. // Prevent the search engines from indexing the XML Sitemap.
  316. header( 'X-Robots-Tag: noindex, follow', true );
  317. header( 'Content-Type: text/xml' );
  318. // Make the browser cache this file properly.
  319. $expires = YEAR_IN_SECONDS;
  320. header( 'Pragma: public' );
  321. header( 'Cache-Control: maxage=' . $expires );
  322. header( 'Expires: ' . gmdate( 'D, d M Y H:i:s', ( time() + $expires ) ) . ' GMT' );
  323. readfile( WPSEO_PATH . 'css/main-sitemap.xsl' );
  324. }
  325. /**
  326. * Spit out the generated sitemap and relevant headers and encoding information.
  327. */
  328. public function output() {
  329. if ( ! headers_sent() ) {
  330. header( $this->http_protocol . ' 200 OK', true, 200 );
  331. // Prevent the search engines from indexing the XML Sitemap.
  332. header( 'X-Robots-Tag: noindex, follow', true );
  333. header( 'Content-Type: text/xml; charset=' . esc_attr( $this->renderer->get_output_charset() ) );
  334. }
  335. echo $this->renderer->get_output( $this->sitemap, $this->transient );
  336. }
  337. /**
  338. * Makes a request to the sitemap index to cache it before the arrival of the search engines.
  339. *
  340. * @return void
  341. */
  342. public function hit_sitemap_index() {
  343. if ( ! $this->cache->is_enabled() ) {
  344. return;
  345. }
  346. wp_remote_get( WPSEO_Sitemaps_Router::get_base_url( 'sitemap_index.xml' ) );
  347. }
  348. /**
  349. * Get the GMT modification date for the last modified post in the post type.
  350. *
  351. * @since 3.2
  352. *
  353. * @param string|array $post_types Post type or array of types.
  354. * @param bool $return_all Flag to return array of values.
  355. *
  356. * @return string|array|false
  357. */
  358. public static function get_last_modified_gmt( $post_types, $return_all = false ) {
  359. global $wpdb;
  360. static $post_type_dates = null;
  361. if ( ! is_array( $post_types ) ) {
  362. $post_types = array( $post_types );
  363. }
  364. foreach ( $post_types as $post_type ) {
  365. if ( ! isset( $post_type_dates[ $post_type ] ) ) { // If we hadn't seen post type before. R.
  366. $post_type_dates = null;
  367. break;
  368. }
  369. }
  370. if ( is_null( $post_type_dates ) ) {
  371. $post_type_dates = array();
  372. // Consider using WPSEO_Post_Type::get_accessible_post_types() to filter out any `no-index` post-types.
  373. $post_type_names = get_post_types( array( 'public' => true ) );
  374. if ( ! empty( $post_type_names ) ) {
  375. $sql = "
  376. SELECT post_type, MAX(post_modified_gmt) AS date
  377. FROM $wpdb->posts
  378. WHERE post_status IN ('publish','inherit')
  379. AND post_type IN ('" . implode( "','", $post_type_names ) . "')
  380. GROUP BY post_type
  381. ORDER BY post_modified_gmt DESC
  382. ";
  383. foreach ( $wpdb->get_results( $sql ) as $obj ) {
  384. $post_type_dates[ $obj->post_type ] = $obj->date;
  385. }
  386. }
  387. }
  388. $dates = array_intersect_key( $post_type_dates, array_flip( $post_types ) );
  389. if ( count( $dates ) > 0 ) {
  390. if ( $return_all ) {
  391. return $dates;
  392. }
  393. return max( $dates );
  394. }
  395. return false;
  396. }
  397. /**
  398. * Get the modification date for the last modified post in the post type.
  399. *
  400. * @param array $post_types Post types to get the last modification date for.
  401. *
  402. * @return string
  403. */
  404. public function get_last_modified( $post_types ) {
  405. return $this->timezone->format_date( self::get_last_modified_gmt( $post_types ) );
  406. }
  407. /**
  408. * Notify search engines of the updated sitemap.
  409. *
  410. * @param string|null $url Optional URL to make the ping for.
  411. */
  412. public static function ping_search_engines( $url = null ) {
  413. /**
  414. * Filter: 'wpseo_allow_xml_sitemap_ping' - Check if pinging is not allowed (allowed by default)
  415. *
  416. * @api boolean $allow_ping The boolean that is set to true by default.
  417. */
  418. if ( apply_filters( 'wpseo_allow_xml_sitemap_ping', true ) === false ) {
  419. return;
  420. }
  421. if ( '0' === get_option( 'blog_public' ) ) { // Don't ping if blog is not public.
  422. return;
  423. }
  424. if ( empty( $url ) ) {
  425. $url = urlencode( WPSEO_Sitemaps_Router::get_base_url( 'sitemap_index.xml' ) );
  426. }
  427. // Ping Google and Bing.
  428. wp_remote_get( 'http://www.google.com/webmasters/tools/ping?sitemap=' . $url, array( 'blocking' => false ) );
  429. wp_remote_get( 'http://www.bing.com/ping?sitemap=' . $url, array( 'blocking' => false ) );
  430. }
  431. /**
  432. * Get the maximum number of entries per XML sitemap.
  433. *
  434. * @return int The maximum number of entries.
  435. */
  436. protected function get_entries_per_page() {
  437. /**
  438. * Filter the maximum number of entries per XML sitemap.
  439. *
  440. * After changing the output of the filter, make sure that you disable and enable the
  441. * sitemaps to make sure the value is picked up for the sitemap cache.
  442. *
  443. * @param int $entries The maximum number of entries per XML sitemap.
  444. */
  445. $entries = (int) apply_filters( 'wpseo_sitemap_entries_per_page', 1000 );
  446. return $entries;
  447. }
  448. }