/wp-content/plugins/wordpress-seo/inc/sitemaps/class-sitemaps-renderer.php

https://bitbucket.org/carloskikea/helpet · PHP · 344 lines · 159 code · 76 blank · 109 comment · 26 complexity · bdded78a9631cf24a51c92cc461a09fd MD5 · raw file

  1. <?php
  2. /**
  3. * WPSEO plugin file.
  4. *
  5. * @package WPSEO\XML_Sitemaps
  6. */
  7. /**
  8. * Renders XML output for sitemaps.
  9. */
  10. class WPSEO_Sitemaps_Renderer {
  11. /** @var string $stylesheet XSL stylesheet for styling a sitemap for web browsers. */
  12. protected $stylesheet = '';
  13. /** @var string $charset Holds the get_bloginfo( 'charset' ) value to reuse for performance. */
  14. protected $charset = 'UTF-8';
  15. /** @var string $output_charset Holds charset of output, might be converted. */
  16. protected $output_charset = 'UTF-8';
  17. /** @var bool $needs_conversion If data encoding needs to be converted for output. */
  18. protected $needs_conversion = false;
  19. /** @var WPSEO_Sitemap_Timezone $timezone */
  20. protected $timezone;
  21. /**
  22. * Set up object properties.
  23. */
  24. public function __construct() {
  25. $stylesheet_url = preg_replace( '/(^http[s]?:)/', '', $this->get_xsl_url() );
  26. $this->stylesheet = '<?xml-stylesheet type="text/xsl" href="' . esc_url( $stylesheet_url ) . '"?>';
  27. $this->charset = get_bloginfo( 'charset' );
  28. $this->output_charset = $this->charset;
  29. $this->timezone = new WPSEO_Sitemap_Timezone();
  30. if (
  31. 'UTF-8' !== $this->charset
  32. && function_exists( 'mb_list_encodings' )
  33. && in_array( $this->charset, mb_list_encodings(), true )
  34. ) {
  35. $this->output_charset = 'UTF-8';
  36. }
  37. $this->needs_conversion = $this->output_charset !== $this->charset;
  38. }
  39. /**
  40. * @param array $links Set of sitemaps index links.
  41. *
  42. * @return string
  43. */
  44. public function get_index( $links ) {
  45. $xml = '<sitemapindex xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">' . "\n";
  46. foreach ( $links as $link ) {
  47. $xml .= $this->sitemap_index_url( $link );
  48. }
  49. /**
  50. * Filter to append sitemaps to the index.
  51. *
  52. * @param string $index String to append to sitemaps index, defaults to empty.
  53. */
  54. $xml .= apply_filters( 'wpseo_sitemap_index', '' );
  55. $xml .= '</sitemapindex>';
  56. return $xml;
  57. }
  58. /**
  59. * @param array $links Set of sitemap links.
  60. * @param string $type Sitemap type.
  61. * @param int $current_page Current sitemap page number.
  62. *
  63. * @return string
  64. */
  65. public function get_sitemap( $links, $type, $current_page ) {
  66. $urlset = '<urlset xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:image="http://www.google.com/schemas/sitemap-image/1.1" '
  67. . 'xsi:schemaLocation="http://www.sitemaps.org/schemas/sitemap/0.9 http://www.sitemaps.org/schemas/sitemap/0.9/sitemap.xsd '
  68. . 'http://www.google.com/schemas/sitemap-image/1.1 http://www.google.com/schemas/sitemap-image/1.1/sitemap-image.xsd" '
  69. . 'xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">' . "\n";
  70. /**
  71. * Filters the `urlset` for a sitemap by type.
  72. *
  73. * @api string $urlset The output for the sitemap's `urlset`.
  74. */
  75. $xml = apply_filters( "wpseo_sitemap_{$type}_urlset", $urlset );
  76. foreach ( $links as $url ) {
  77. $xml .= $this->sitemap_url( $url );
  78. }
  79. /**
  80. * Filter to add extra URLs to the XML sitemap by type.
  81. *
  82. * Only runs for the first page, not on all.
  83. *
  84. * @param string $content String content to add, defaults to empty.
  85. */
  86. if ( $current_page === 1 ) {
  87. $xml .= apply_filters( "wpseo_sitemap_{$type}_content", '' );
  88. }
  89. $xml .= '</urlset>';
  90. return $xml;
  91. }
  92. /**
  93. * Produce final XML output with debug information.
  94. *
  95. * @param string $sitemap Sitemap XML.
  96. * @param boolean $transient Transient cache flag.
  97. *
  98. * @return string
  99. */
  100. public function get_output( $sitemap, $transient ) {
  101. $output = '<?xml version="1.0" encoding="' . esc_attr( $this->output_charset ) . '"?>';
  102. if ( $this->stylesheet ) {
  103. /**
  104. * Filter the stylesheet URL for the XML sitemap.
  105. *
  106. * @param string $stylesheet Stylesheet URL.
  107. */
  108. $output .= apply_filters( 'wpseo_stylesheet_url', $this->stylesheet ) . "\n";
  109. }
  110. $output .= $sitemap;
  111. $output .= "\n<!-- XML Sitemap generated by Yoast SEO -->";
  112. $debug = WP_DEBUG || ( defined( 'WPSEO_DEBUG' ) && true === WPSEO_DEBUG );
  113. if ( ! WP_DEBUG_DISPLAY || ! $debug ) {
  114. return $output;
  115. }
  116. $memory_used = number_format( ( memory_get_peak_usage() / 1048576 ), 2 );
  117. $queries_run = ( $transient ) ? 'Served from transient cache' : 'Queries executed ' . absint( $GLOBALS['wpdb']->num_queries );
  118. $output .= "\n<!-- {$memory_used}MB | {$queries_run} -->";
  119. if ( defined( 'SAVEQUERIES' ) && SAVEQUERIES ) {
  120. $queries = print_r( $GLOBALS['wpdb']->queries, true );
  121. $output .= "\n<!-- {$queries} -->";
  122. }
  123. return $output;
  124. }
  125. /**
  126. * Get charset for the output.
  127. *
  128. * @return string
  129. */
  130. public function get_output_charset() {
  131. return $this->output_charset;
  132. }
  133. /**
  134. * Set a custom stylesheet for this sitemap. Set to empty to just remove the default stylesheet.
  135. *
  136. * @param string $stylesheet Full xml-stylesheet declaration.
  137. */
  138. public function set_stylesheet( $stylesheet ) {
  139. $this->stylesheet = $stylesheet;
  140. }
  141. /**
  142. * Build the `<sitemap>` tag for a given URL.
  143. *
  144. * @param array $url Array of parts that make up this entry.
  145. *
  146. * @return string
  147. */
  148. protected function sitemap_index_url( $url ) {
  149. $date = null;
  150. if ( ! empty( $url['lastmod'] ) ) {
  151. $date = $this->timezone->format_date( $url['lastmod'] );
  152. }
  153. $url['loc'] = htmlspecialchars( $url['loc'] );
  154. $output = "\t<sitemap>\n";
  155. $output .= "\t\t<loc>" . $url['loc'] . "</loc>\n";
  156. $output .= empty( $date ) ? '' : "\t\t<lastmod>" . htmlspecialchars( $date ) . "</lastmod>\n";
  157. $output .= "\t</sitemap>\n";
  158. return $output;
  159. }
  160. /**
  161. * Build the `<url>` tag for a given URL.
  162. *
  163. * Public access for backwards compatibility reasons.
  164. *
  165. * @param array $url Array of parts that make up this entry.
  166. *
  167. * @return string
  168. */
  169. public function sitemap_url( $url ) {
  170. $date = null;
  171. if ( ! empty( $url['mod'] ) ) {
  172. // Create a DateTime object date in the correct timezone.
  173. $date = $this->timezone->format_date( $url['mod'] );
  174. }
  175. $url['loc'] = htmlspecialchars( $url['loc'] );
  176. $output = "\t<url>\n";
  177. $output .= "\t\t<loc>" . $this->encode_url_rfc3986( $url['loc'] ) . "</loc>\n";
  178. $output .= empty( $date ) ? '' : "\t\t<lastmod>" . htmlspecialchars( $date ) . "</lastmod>\n";
  179. if ( empty( $url['images'] ) ) {
  180. $url['images'] = array();
  181. }
  182. foreach ( $url['images'] as $img ) {
  183. if ( empty( $img['src'] ) ) {
  184. continue;
  185. }
  186. $output .= "\t\t<image:image>\n";
  187. $output .= "\t\t\t<image:loc>" . esc_html( $this->encode_url_rfc3986( $img['src'] ) ) . "</image:loc>\n";
  188. if ( ! empty( $img['title'] ) ) {
  189. $title = $img['title'];
  190. if ( $this->needs_conversion ) {
  191. $title = mb_convert_encoding( $title, $this->output_charset, $this->charset );
  192. }
  193. $title = _wp_specialchars( html_entity_decode( $title, ENT_QUOTES, $this->output_charset ) );
  194. $output .= "\t\t\t<image:title><![CDATA[{$title}]]></image:title>\n";
  195. }
  196. if ( ! empty( $img['alt'] ) ) {
  197. $alt = $img['alt'];
  198. if ( $this->needs_conversion ) {
  199. $alt = mb_convert_encoding( $alt, $this->output_charset, $this->charset );
  200. }
  201. $alt = _wp_specialchars( html_entity_decode( $alt, ENT_QUOTES, $this->output_charset ) );
  202. $output .= "\t\t\t<image:caption><![CDATA[{$alt}]]></image:caption>\n";
  203. }
  204. $output .= "\t\t</image:image>\n";
  205. }
  206. unset( $img, $title, $alt );
  207. $output .= "\t</url>\n";
  208. /**
  209. * Filters the output for the sitemap url tag.
  210. *
  211. * @api string $output The output for the sitemap url tag.
  212. *
  213. * @param array $url The sitemap url array on which the output is based.
  214. */
  215. return apply_filters( 'wpseo_sitemap_url', $output, $url );
  216. }
  217. /**
  218. * Apply some best effort conversion to comply with RFC3986.
  219. *
  220. * @param string $url URL to encode.
  221. *
  222. * @return string
  223. */
  224. protected function encode_url_rfc3986( $url ) {
  225. if ( filter_var( $url, FILTER_VALIDATE_URL ) ) {
  226. return $url;
  227. }
  228. $path = wp_parse_url( $url, PHP_URL_PATH );
  229. if ( ! empty( $path ) && '/' !== $path ) {
  230. $encoded_path = explode( '/', $path );
  231. // First decode the path, to prevent double encoding.
  232. $encoded_path = array_map( 'rawurldecode', $encoded_path );
  233. $encoded_path = array_map( 'rawurlencode', $encoded_path );
  234. $encoded_path = implode( '/', $encoded_path );
  235. $encoded_path = str_replace( '%7E', '~', $encoded_path ); // PHP <5.3.
  236. $url = str_replace( $path, $encoded_path, $url );
  237. }
  238. $query = wp_parse_url( $url, PHP_URL_QUERY );
  239. if ( ! empty( $query ) ) {
  240. parse_str( $query, $parsed_query );
  241. if ( defined( 'PHP_QUERY_RFC3986' ) ) { // PHP 5.4+.
  242. $parsed_query = http_build_query( $parsed_query, null, '&amp;', PHP_QUERY_RFC3986 );
  243. }
  244. else {
  245. $parsed_query = http_build_query( $parsed_query, null, '&amp;' );
  246. $parsed_query = str_replace( '+', '%20', $parsed_query );
  247. $parsed_query = str_replace( '%7E', '~', $parsed_query );
  248. }
  249. $url = str_replace( $query, $parsed_query, $url );
  250. }
  251. return $url;
  252. }
  253. /**
  254. * Retrieves the XSL URL that should be used in the current environment
  255. *
  256. * When home_url and site_url are not the same, the home_url should be used.
  257. * This is because the XSL needs to be served from the same domain, protocol and port
  258. * as the XML file that is loading it.
  259. *
  260. * @return string The XSL URL that needs to be used.
  261. */
  262. protected function get_xsl_url() {
  263. if ( home_url() !== site_url() ) {
  264. return home_url( 'main-sitemap.xsl' );
  265. }
  266. return plugin_dir_url( WPSEO_FILE ) . 'css/main-sitemap.xsl';
  267. }
  268. }