/wp-content/plugins/wordpress-seo/inc/sitemaps/class-post-type-sitemap-provider.php

https://bitbucket.org/carloskikea/helpet · PHP · 641 lines · 301 code · 128 blank · 212 comment · 48 complexity · 6a4519005405420c64050d82c5bf6477 MD5 · raw file

  1. <?php
  2. /**
  3. * WPSEO plugin file.
  4. *
  5. * @package WPSEO\XML_Sitemaps
  6. */
  7. /**
  8. * Sitemap provider for author archives.
  9. */
  10. class WPSEO_Post_Type_Sitemap_Provider implements WPSEO_Sitemap_Provider {
  11. /** @var string $home_url Holds the home_url() value. */
  12. protected static $home_url;
  13. /** @var WPSEO_Sitemap_Image_Parser $image_parser Holds image parser instance. */
  14. protected static $image_parser;
  15. /** @var object $classifier Holds instance of classifier for a link. */
  16. protected static $classifier;
  17. /** @var int $page_on_front_id Static front page ID. */
  18. protected static $page_on_front_id;
  19. /** @var int $page_for_posts_id Posts page ID. */
  20. protected static $page_for_posts_id;
  21. /**
  22. * Set up object properties for data reuse.
  23. */
  24. public function __construct() {
  25. add_filter( 'save_post', array( $this, 'save_post' ) );
  26. }
  27. /**
  28. * Get all the options
  29. *
  30. * @deprecated 7.0
  31. */
  32. protected function get_options() {
  33. _deprecated_function( __METHOD__, 'WPSEO 7.0', 'WPSEO_Options::get' );
  34. }
  35. /**
  36. * Get front page ID
  37. *
  38. * @return int
  39. */
  40. protected function get_page_on_front_id() {
  41. if ( ! isset( self::$page_on_front_id ) ) {
  42. self::$page_on_front_id = (int) get_option( 'page_on_front' );
  43. }
  44. return self::$page_on_front_id;
  45. }
  46. /**
  47. * Get page for posts ID
  48. *
  49. * @return int
  50. */
  51. protected function get_page_for_posts_id() {
  52. if ( ! isset( self::$page_for_posts_id ) ) {
  53. self::$page_for_posts_id = (int) get_option( 'page_for_posts' );
  54. }
  55. return self::$page_for_posts_id;
  56. }
  57. /**
  58. * Get the Image Parser
  59. *
  60. * @return WPSEO_Sitemap_Image_Parser
  61. */
  62. protected function get_image_parser() {
  63. if ( ! isset( self::$image_parser ) ) {
  64. self::$image_parser = new WPSEO_Sitemap_Image_Parser();
  65. }
  66. return self::$image_parser;
  67. }
  68. /**
  69. * Get the Classifier for a link
  70. *
  71. * @return WPSEO_Link_Type_Classifier
  72. */
  73. protected function get_classifier() {
  74. if ( ! isset( self::$classifier ) ) {
  75. self::$classifier = new WPSEO_Link_Type_Classifier( $this->get_home_url() );
  76. }
  77. return self::$classifier;
  78. }
  79. /**
  80. * Get Home URL
  81. *
  82. * This has been moved from the constructor because wp_rewrite is not available on plugins_loaded in multisite.
  83. * It will now be requested on need and not on initialization.
  84. *
  85. * @return string
  86. */
  87. protected function get_home_url() {
  88. if ( ! isset( self::$home_url ) ) {
  89. self::$home_url = WPSEO_Utils::home_url();
  90. }
  91. return self::$home_url;
  92. }
  93. /**
  94. * Check if provider supports given item type.
  95. *
  96. * @param string $type Type string to check for.
  97. *
  98. * @return boolean
  99. */
  100. public function handles_type( $type ) {
  101. return post_type_exists( $type );
  102. }
  103. /**
  104. * @param int $max_entries Entries per sitemap.
  105. *
  106. * @return array
  107. */
  108. public function get_index_links( $max_entries ) {
  109. global $wpdb;
  110. // Consider using WPSEO_Post_Type::get_accessible_post_types() to filter out any `no-index` post-types.
  111. $post_types = WPSEO_Post_Type::get_accessible_post_types();
  112. $post_types = array_filter( $post_types, array( $this, 'is_valid_post_type' ) );
  113. $last_modified_times = WPSEO_Sitemaps::get_last_modified_gmt( $post_types, true );
  114. $index = array();
  115. foreach ( $post_types as $post_type ) {
  116. $total_count = $this->get_post_type_count( $post_type );
  117. if ( $total_count === 0 ) {
  118. continue;
  119. }
  120. $max_pages = 1;
  121. if ( $total_count > $max_entries ) {
  122. $max_pages = (int) ceil( $total_count / $max_entries );
  123. }
  124. $all_dates = array();
  125. if ( $max_pages > 1 ) {
  126. $sql = "
  127. SELECT post_modified_gmt
  128. FROM ( SELECT @rownum:=0 ) init
  129. JOIN {$wpdb->posts} USE INDEX( type_status_date )
  130. WHERE post_status IN ( 'publish', 'inherit' )
  131. AND post_type = %s
  132. AND ( @rownum:=@rownum+1 ) %% %d = 0
  133. ORDER BY post_modified_gmt ASC
  134. ";
  135. $all_dates = $wpdb->get_col( $wpdb->prepare( $sql, $post_type, $max_entries ) );
  136. }
  137. for ( $page_counter = 0; $page_counter < $max_pages; $page_counter++ ) {
  138. $current_page = ( $max_pages > 1 ) ? ( $page_counter + 1 ) : '';
  139. $date = false;
  140. if ( empty( $current_page ) || $current_page === $max_pages ) {
  141. if ( ! empty( $last_modified_times[ $post_type ] ) ) {
  142. $date = $last_modified_times[ $post_type ];
  143. }
  144. }
  145. else {
  146. $date = $all_dates[ $page_counter ];
  147. }
  148. $index[] = array(
  149. 'loc' => WPSEO_Sitemaps_Router::get_base_url( $post_type . '-sitemap' . $current_page . '.xml' ),
  150. 'lastmod' => $date,
  151. );
  152. }
  153. }
  154. return $index;
  155. }
  156. /**
  157. * Get set of sitemap link data.
  158. *
  159. * @param string $type Sitemap type.
  160. * @param int $max_entries Entries per sitemap.
  161. * @param int $current_page Current page of the sitemap.
  162. *
  163. * @return array
  164. */
  165. public function get_sitemap_links( $type, $max_entries, $current_page ) {
  166. $links = array();
  167. $post_type = $type;
  168. if ( ! $this->is_valid_post_type( $post_type ) ) {
  169. return $links;
  170. }
  171. $steps = min( 100, $max_entries );
  172. $offset = ( $current_page > 1 ) ? ( ( $current_page - 1 ) * $max_entries ) : 0;
  173. $total = ( $offset + $max_entries );
  174. $typecount = $this->get_post_type_count( $post_type );
  175. if ( $total > $typecount ) {
  176. $total = $typecount;
  177. }
  178. if ( $current_page === 1 ) {
  179. $links = array_merge( $links, $this->get_first_links( $post_type ) );
  180. }
  181. if ( $typecount === 0 ) {
  182. return $links;
  183. }
  184. $stacked_urls = array();
  185. $posts_to_exclude = $this->get_excluded_posts();
  186. while ( $total > $offset ) {
  187. $posts = $this->get_posts( $post_type, $steps, $offset );
  188. $offset += $steps;
  189. if ( empty( $posts ) ) {
  190. continue;
  191. }
  192. foreach ( $posts as $post ) {
  193. if ( in_array( $post->ID, $posts_to_exclude, true ) ) {
  194. continue;
  195. }
  196. if ( WPSEO_Meta::get_value( 'meta-robots-noindex', $post->ID ) === '1' ) {
  197. continue;
  198. }
  199. $url = $this->get_url( $post );
  200. if ( ! isset( $url['loc'] ) ) {
  201. continue;
  202. }
  203. /**
  204. * Filter URL entry before it gets added to the sitemap.
  205. *
  206. * @param array $url Array of URL parts.
  207. * @param string $type URL type.
  208. * @param object $post Data object for the URL.
  209. */
  210. $url = apply_filters( 'wpseo_sitemap_entry', $url, 'post', $post );
  211. if ( empty( $url ) ) {
  212. continue;
  213. }
  214. if ( $post->ID === $this->get_page_for_posts_id() || $post->ID === $this->get_page_on_front_id() ) {
  215. array_unshift( $links, $url );
  216. continue;
  217. }
  218. $links[] = $url;
  219. }
  220. unset( $post, $url );
  221. }
  222. return $links;
  223. }
  224. /**
  225. * Check for relevant post type before invalidation.
  226. *
  227. * @param int $post_id Post ID to possibly invalidate for.
  228. */
  229. public function save_post( $post_id ) {
  230. if ( $this->is_valid_post_type( get_post_type( $post_id ) ) ) {
  231. WPSEO_Sitemaps_Cache::invalidate_post( $post_id );
  232. }
  233. }
  234. /**
  235. * Check if post type should be present in sitemaps.
  236. *
  237. * @param string $post_type Post type string to check for.
  238. *
  239. * @return bool
  240. */
  241. public function is_valid_post_type( $post_type ) {
  242. if ( ! WPSEO_Post_Type::is_post_type_indexable( $post_type ) ) {
  243. return false;
  244. }
  245. /**
  246. * Filter decision if post type is excluded from the XML sitemap.
  247. *
  248. * @param bool $exclude Default false.
  249. * @param string $post_type Post type name.
  250. */
  251. if ( apply_filters( 'wpseo_sitemap_exclude_post_type', false, $post_type ) ) {
  252. return false;
  253. }
  254. return true;
  255. }
  256. /**
  257. * Retrieves a list with the excluded post ids.
  258. *
  259. * @return array Array with post ids to exclude.
  260. */
  261. protected function get_excluded_posts() {
  262. /**
  263. * Filter: 'wpseo_exclude_from_sitemap_by_post_ids' - Allow extending and modifying the posts to exclude.
  264. *
  265. * @api array $posts_to_exclude The posts to exclude.
  266. */
  267. $excluded_posts_ids = apply_filters( 'wpseo_exclude_from_sitemap_by_post_ids', array() );
  268. if ( ! is_array( $excluded_posts_ids ) || $excluded_posts_ids === array() ) {
  269. return array();
  270. }
  271. return array_map( 'intval', $excluded_posts_ids );
  272. }
  273. /**
  274. * Get count of posts for post type.
  275. *
  276. * @param string $post_type Post type to retrieve count for.
  277. *
  278. * @return int
  279. */
  280. protected function get_post_type_count( $post_type ) {
  281. global $wpdb;
  282. /**
  283. * Filter JOIN query part for type count of post type.
  284. *
  285. * @param string $join SQL part, defaults to empty string.
  286. * @param string $post_type Post type name.
  287. */
  288. $join_filter = apply_filters( 'wpseo_typecount_join', '', $post_type );
  289. /**
  290. * Filter WHERE query part for type count of post type.
  291. *
  292. * @param string $where SQL part, defaults to empty string.
  293. * @param string $post_type Post type name.
  294. */
  295. $where_filter = apply_filters( 'wpseo_typecount_where', '', $post_type );
  296. $where = $this->get_sql_where_clause( $post_type );
  297. $sql = "
  298. SELECT COUNT({$wpdb->posts}.ID)
  299. FROM {$wpdb->posts}
  300. {$join_filter}
  301. {$where}
  302. {$where_filter}
  303. ";
  304. return (int) $wpdb->get_var( $sql );
  305. }
  306. /**
  307. * Produces set of links to prepend at start of first sitemap page.
  308. *
  309. * @param string $post_type Post type to produce links for.
  310. *
  311. * @return array
  312. */
  313. protected function get_first_links( $post_type ) {
  314. $links = array();
  315. $needs_archive = true;
  316. if ( ! $this->get_page_on_front_id() && ( $post_type === 'post' || $post_type === 'page' ) ) {
  317. $links[] = array(
  318. 'loc' => $this->get_home_url(),
  319. // Deprecated, kept for backwards data compat. R.
  320. 'chf' => 'daily',
  321. 'pri' => 1,
  322. );
  323. $needs_archive = false;
  324. }
  325. elseif ( $this->get_page_on_front_id() && $post_type === 'post' && $this->get_page_for_posts_id() ) {
  326. $page_for_posts_url = get_permalink( $this->get_page_for_posts_id() );
  327. $links[] = array(
  328. 'loc' => $page_for_posts_url,
  329. // Deprecated, kept for backwards data compat. R.
  330. 'chf' => 'daily',
  331. 'pri' => 1,
  332. );
  333. $needs_archive = false;
  334. }
  335. if ( ! $needs_archive ) {
  336. return $links;
  337. }
  338. $archive_url = $this->get_post_type_archive_link( $post_type );
  339. /**
  340. * Filter the URL Yoast SEO uses in the XML sitemap for this post type archive.
  341. *
  342. * @param string $archive_url The URL of this archive
  343. * @param string $post_type The post type this archive is for.
  344. */
  345. $archive_url = apply_filters( 'wpseo_sitemap_post_type_archive_link', $archive_url, $post_type );
  346. if ( $archive_url ) {
  347. /**
  348. * Filter the priority of the URL Yoast SEO uses in the XML sitemap.
  349. *
  350. * @param float $priority The priority for this URL, ranging from 0 to 1
  351. * @param string $post_type The post type this archive is for.
  352. */
  353. $links[] = array(
  354. 'loc' => $archive_url,
  355. 'mod' => WPSEO_Sitemaps::get_last_modified_gmt( $post_type ),
  356. // Deprecated, kept for backwards data compat. R.
  357. 'chf' => 'daily',
  358. 'pri' => 1,
  359. );
  360. }
  361. return $links;
  362. }
  363. /**
  364. * Get URL for a post type archive.
  365. *
  366. * @since 5.3
  367. *
  368. * @param string $post_type Post type.
  369. *
  370. * @return string|bool URL or false if it should be excluded.
  371. */
  372. protected function get_post_type_archive_link( $post_type ) {
  373. if ( WPSEO_Options::get( 'noindex-ptarchive-' . $post_type, false ) ) {
  374. return false;
  375. }
  376. // Post archive should be excluded if it isn't front page or posts page.
  377. if ( $post_type === 'post' && get_option( 'show_on_front' ) !== 'posts' && ! $this->get_page_for_posts_id() ) {
  378. return false;
  379. }
  380. $archive_url = get_post_type_archive_link( $post_type );
  381. return $archive_url;
  382. }
  383. /**
  384. * Retrieve set of posts with optimized query routine.
  385. *
  386. * @param string $post_type Post type to retrieve.
  387. * @param int $count Count of posts to retrieve.
  388. * @param int $offset Starting offset.
  389. *
  390. * @return object[]
  391. */
  392. protected function get_posts( $post_type, $count, $offset ) {
  393. global $wpdb;
  394. static $filters = array();
  395. if ( ! isset( $filters[ $post_type ] ) ) {
  396. // Make sure you're wpdb->preparing everything you throw into this!!
  397. $filters[ $post_type ] = array(
  398. /**
  399. * Filter JOIN query part for the post type.
  400. *
  401. * @param string $join SQL part, defaults to false.
  402. * @param string $post_type Post type name.
  403. */
  404. 'join' => apply_filters( 'wpseo_posts_join', false, $post_type ),
  405. /**
  406. * Filter Where query part for the post type.
  407. *
  408. * @param string $where SQL part, defaults to false.
  409. * @param string $post_type Post type name.
  410. */
  411. 'where' => apply_filters( 'wpseo_posts_where', false, $post_type ),
  412. );
  413. }
  414. $join_filter = $filters[ $post_type ]['join'];
  415. $where_filter = $filters[ $post_type ]['where'];
  416. $where = $this->get_sql_where_clause( $post_type );
  417. // Optimized query per this thread: http://wordpress.org/support/topic/plugin-wordpress-seo-by-yoast-performance-suggestion.
  418. // Also see http://explainextended.com/2009/10/23/mysql-order-by-limit-performance-late-row-lookups/.
  419. $sql = "
  420. SELECT l.ID, post_title, post_content, post_name, post_parent, post_author, post_modified_gmt, post_date, post_date_gmt
  421. FROM (
  422. SELECT {$wpdb->posts}.ID
  423. FROM {$wpdb->posts}
  424. {$join_filter}
  425. {$where}
  426. {$where_filter}
  427. ORDER BY {$wpdb->posts}.post_modified ASC LIMIT %d OFFSET %d
  428. )
  429. o JOIN {$wpdb->posts} l ON l.ID = o.ID
  430. ";
  431. $posts = $wpdb->get_results( $wpdb->prepare( $sql, $count, $offset ) );
  432. $post_ids = array();
  433. foreach ( $posts as $post ) {
  434. $post->post_type = $post_type;
  435. $post->post_status = 'publish';
  436. $post->filter = 'sample';
  437. $post->ID = (int) $post->ID;
  438. $post->post_parent = (int) $post->post_parent;
  439. $post->post_author = (int) $post->post_author;
  440. $post_ids[] = $post->ID;
  441. }
  442. update_meta_cache( 'post', $post_ids );
  443. return $posts;
  444. }
  445. /**
  446. * @param string $post_type Post type slug.
  447. *
  448. * @return string
  449. */
  450. protected function get_sql_where_clause( $post_type ) {
  451. global $wpdb;
  452. $join = '';
  453. $status = "{$wpdb->posts}.post_status = 'publish'";
  454. // Based on WP_Query->get_posts(). R.
  455. if ( 'attachment' === $post_type ) {
  456. $join = " LEFT JOIN {$wpdb->posts} AS p2 ON ({$wpdb->posts}.post_parent = p2.ID) ";
  457. $status = "p2.post_status = 'publish' AND p2.post_password = ''";
  458. }
  459. $where_clause = "
  460. {$join}
  461. WHERE {$status}
  462. AND {$wpdb->posts}.post_type = %s
  463. AND {$wpdb->posts}.post_password = ''
  464. AND {$wpdb->posts}.post_date != '0000-00-00 00:00:00'
  465. ";
  466. return $wpdb->prepare( $where_clause, $post_type );
  467. }
  468. /**
  469. * Produce array of URL parts for given post object.
  470. *
  471. * @param object $post Post object to get URL parts for.
  472. *
  473. * @return array|bool
  474. */
  475. protected function get_url( $post ) {
  476. $url = array();
  477. /**
  478. * Filter the URL Yoast SEO uses in the XML sitemap.
  479. *
  480. * Note that only absolute local URLs are allowed as the check after this removes external URLs.
  481. *
  482. * @param string $url URL to use in the XML sitemap
  483. * @param object $post Post object for the URL.
  484. */
  485. $url['loc'] = apply_filters( 'wpseo_xml_sitemap_post_url', get_permalink( $post ), $post );
  486. /**
  487. * Do not include external URLs.
  488. *
  489. * @see https://wordpress.org/plugins/page-links-to/ can rewrite permalinks to external URLs.
  490. */
  491. if ( $this->get_classifier()->classify( $url['loc'] ) === WPSEO_Link::TYPE_EXTERNAL ) {
  492. return false;
  493. }
  494. $modified = max( $post->post_modified_gmt, $post->post_date_gmt );
  495. if ( $modified !== '0000-00-00 00:00:00' ) {
  496. $url['mod'] = $modified;
  497. }
  498. $url['chf'] = 'daily'; // Deprecated, kept for backwards data compat. R.
  499. $canonical = WPSEO_Meta::get_value( 'canonical', $post->ID );
  500. if ( $canonical !== '' && $canonical !== $url['loc'] ) {
  501. /*
  502. * Let's assume that if a canonical is set for this page and it's different from
  503. * the URL of this post, that page is either already in the XML sitemap OR is on
  504. * an external site, either way, we shouldn't include it here.
  505. */
  506. return false;
  507. }
  508. unset( $canonical );
  509. $url['pri'] = 1; // Deprecated, kept for backwards data compat. R.
  510. $url['images'] = $this->get_image_parser()->get_images( $post );
  511. return $url;
  512. }
  513. }