PageRenderTime 58ms CodeModel.GetById 37ms app.highlight 17ms RepoModel.GetById 1ms app.codeStats 0ms

/wp-content/plugins/wordpress-seo/inc/sitemaps/class-sitemaps-renderer.php

https://bitbucket.org/carloskikea/helpet
PHP | 344 lines | 159 code | 76 blank | 109 comment | 26 complexity | bdded78a9631cf24a51c92cc461a09fd MD5 | raw file
  1<?php
  2/**
  3 * WPSEO plugin file.
  4 *
  5 * @package WPSEO\XML_Sitemaps
  6 */
  7
  8/**
  9 * Renders XML output for sitemaps.
 10 */
 11class WPSEO_Sitemaps_Renderer {
 12
 13	/** @var string $stylesheet XSL stylesheet for styling a sitemap for web browsers. */
 14	protected $stylesheet = '';
 15
 16	/** @var string $charset Holds the get_bloginfo( 'charset' ) value to reuse for performance. */
 17	protected $charset = 'UTF-8';
 18
 19	/** @var string $output_charset Holds charset of output, might be converted. */
 20	protected $output_charset = 'UTF-8';
 21
 22	/** @var bool $needs_conversion If data encoding needs to be converted for output. */
 23	protected $needs_conversion = false;
 24
 25	/** @var WPSEO_Sitemap_Timezone $timezone */
 26	protected $timezone;
 27
 28	/**
 29	 * Set up object properties.
 30	 */
 31	public function __construct() {
 32		$stylesheet_url       = preg_replace( '/(^http[s]?:)/', '', $this->get_xsl_url() );
 33		$this->stylesheet     = '<?xml-stylesheet type="text/xsl" href="' . esc_url( $stylesheet_url ) . '"?>';
 34		$this->charset        = get_bloginfo( 'charset' );
 35		$this->output_charset = $this->charset;
 36		$this->timezone       = new WPSEO_Sitemap_Timezone();
 37
 38		if (
 39			'UTF-8' !== $this->charset
 40			&& function_exists( 'mb_list_encodings' )
 41			&& in_array( $this->charset, mb_list_encodings(), true )
 42		) {
 43			$this->output_charset = 'UTF-8';
 44		}
 45
 46		$this->needs_conversion = $this->output_charset !== $this->charset;
 47	}
 48
 49	/**
 50	 * @param array $links Set of sitemaps index links.
 51	 *
 52	 * @return string
 53	 */
 54	public function get_index( $links ) {
 55
 56		$xml = '<sitemapindex xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">' . "\n";
 57
 58		foreach ( $links as $link ) {
 59			$xml .= $this->sitemap_index_url( $link );
 60		}
 61
 62		/**
 63		 * Filter to append sitemaps to the index.
 64		 *
 65		 * @param string $index String to append to sitemaps index, defaults to empty.
 66		 */
 67		$xml .= apply_filters( 'wpseo_sitemap_index', '' );
 68		$xml .= '</sitemapindex>';
 69
 70		return $xml;
 71	}
 72
 73	/**
 74	 * @param array  $links        Set of sitemap links.
 75	 * @param string $type         Sitemap type.
 76	 * @param int    $current_page Current sitemap page number.
 77	 *
 78	 * @return string
 79	 */
 80	public function get_sitemap( $links, $type, $current_page ) {
 81
 82		$urlset = '<urlset xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:image="http://www.google.com/schemas/sitemap-image/1.1" '
 83			. 'xsi:schemaLocation="http://www.sitemaps.org/schemas/sitemap/0.9 http://www.sitemaps.org/schemas/sitemap/0.9/sitemap.xsd '
 84			. 'http://www.google.com/schemas/sitemap-image/1.1 http://www.google.com/schemas/sitemap-image/1.1/sitemap-image.xsd" '
 85			. 'xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">' . "\n";
 86
 87		/**
 88		 * Filters the `urlset` for a sitemap by type.
 89		 *
 90		 * @api string $urlset The output for the sitemap's `urlset`.
 91		 */
 92		$xml = apply_filters( "wpseo_sitemap_{$type}_urlset", $urlset );
 93
 94		foreach ( $links as $url ) {
 95			$xml .= $this->sitemap_url( $url );
 96		}
 97
 98		/**
 99		 * Filter to add extra URLs to the XML sitemap by type.
100		 *
101		 * Only runs for the first page, not on all.
102		 *
103		 * @param string $content String content to add, defaults to empty.
104		 */
105		if ( $current_page === 1 ) {
106			$xml .= apply_filters( "wpseo_sitemap_{$type}_content", '' );
107		}
108
109		$xml .= '</urlset>';
110
111		return $xml;
112	}
113
114	/**
115	 * Produce final XML output with debug information.
116	 *
117	 * @param string  $sitemap    Sitemap XML.
118	 * @param boolean $transient  Transient cache flag.
119	 *
120	 * @return string
121	 */
122	public function get_output( $sitemap, $transient ) {
123
124		$output = '<?xml version="1.0" encoding="' . esc_attr( $this->output_charset ) . '"?>';
125
126		if ( $this->stylesheet ) {
127			/**
128			 * Filter the stylesheet URL for the XML sitemap.
129			 *
130			 * @param string $stylesheet Stylesheet URL.
131			 */
132			$output .= apply_filters( 'wpseo_stylesheet_url', $this->stylesheet ) . "\n";
133		}
134
135		$output .= $sitemap;
136		$output .= "\n<!-- XML Sitemap generated by Yoast SEO -->";
137
138		$debug = WP_DEBUG || ( defined( 'WPSEO_DEBUG' ) && true === WPSEO_DEBUG );
139
140		if ( ! WP_DEBUG_DISPLAY || ! $debug ) {
141			return $output;
142		}
143
144		$memory_used = number_format( ( memory_get_peak_usage() / 1048576 ), 2 );
145		$queries_run = ( $transient ) ? 'Served from transient cache' : 'Queries executed ' . absint( $GLOBALS['wpdb']->num_queries );
146
147		$output .= "\n<!-- {$memory_used}MB | {$queries_run} -->";
148
149		if ( defined( 'SAVEQUERIES' ) && SAVEQUERIES ) {
150
151			$queries = print_r( $GLOBALS['wpdb']->queries, true );
152			$output .= "\n<!-- {$queries} -->";
153		}
154
155		return $output;
156	}
157
158	/**
159	 * Get charset for the output.
160	 *
161	 * @return string
162	 */
163	public function get_output_charset() {
164		return $this->output_charset;
165	}
166
167	/**
168	 * Set a custom stylesheet for this sitemap. Set to empty to just remove the default stylesheet.
169	 *
170	 * @param string $stylesheet Full xml-stylesheet declaration.
171	 */
172	public function set_stylesheet( $stylesheet ) {
173		$this->stylesheet = $stylesheet;
174	}
175
176	/**
177	 * Build the `<sitemap>` tag for a given URL.
178	 *
179	 * @param array $url Array of parts that make up this entry.
180	 *
181	 * @return string
182	 */
183	protected function sitemap_index_url( $url ) {
184
185		$date = null;
186
187		if ( ! empty( $url['lastmod'] ) ) {
188			$date = $this->timezone->format_date( $url['lastmod'] );
189		}
190
191		$url['loc'] = htmlspecialchars( $url['loc'] );
192
193		$output  = "\t<sitemap>\n";
194		$output .= "\t\t<loc>" . $url['loc'] . "</loc>\n";
195		$output .= empty( $date ) ? '' : "\t\t<lastmod>" . htmlspecialchars( $date ) . "</lastmod>\n";
196		$output .= "\t</sitemap>\n";
197
198		return $output;
199	}
200
201	/**
202	 * Build the `<url>` tag for a given URL.
203	 *
204	 * Public access for backwards compatibility reasons.
205	 *
206	 * @param array $url Array of parts that make up this entry.
207	 *
208	 * @return string
209	 */
210	public function sitemap_url( $url ) {
211
212		$date = null;
213
214
215		if ( ! empty( $url['mod'] ) ) {
216			// Create a DateTime object date in the correct timezone.
217			$date = $this->timezone->format_date( $url['mod'] );
218		}
219
220		$url['loc'] = htmlspecialchars( $url['loc'] );
221
222		$output  = "\t<url>\n";
223		$output .= "\t\t<loc>" . $this->encode_url_rfc3986( $url['loc'] ) . "</loc>\n";
224		$output .= empty( $date ) ? '' : "\t\t<lastmod>" . htmlspecialchars( $date ) . "</lastmod>\n";
225
226		if ( empty( $url['images'] ) ) {
227			$url['images'] = array();
228		}
229
230		foreach ( $url['images'] as $img ) {
231
232			if ( empty( $img['src'] ) ) {
233				continue;
234			}
235
236			$output .= "\t\t<image:image>\n";
237			$output .= "\t\t\t<image:loc>" . esc_html( $this->encode_url_rfc3986( $img['src'] ) ) . "</image:loc>\n";
238
239			if ( ! empty( $img['title'] ) ) {
240
241				$title = $img['title'];
242
243				if ( $this->needs_conversion ) {
244					$title = mb_convert_encoding( $title, $this->output_charset, $this->charset );
245				}
246
247				$title   = _wp_specialchars( html_entity_decode( $title, ENT_QUOTES, $this->output_charset ) );
248				$output .= "\t\t\t<image:title><![CDATA[{$title}]]></image:title>\n";
249			}
250
251			if ( ! empty( $img['alt'] ) ) {
252
253				$alt = $img['alt'];
254
255				if ( $this->needs_conversion ) {
256					$alt = mb_convert_encoding( $alt, $this->output_charset, $this->charset );
257				}
258
259				$alt     = _wp_specialchars( html_entity_decode( $alt, ENT_QUOTES, $this->output_charset ) );
260				$output .= "\t\t\t<image:caption><![CDATA[{$alt}]]></image:caption>\n";
261			}
262
263			$output .= "\t\t</image:image>\n";
264		}
265		unset( $img, $title, $alt );
266
267		$output .= "\t</url>\n";
268
269		/**
270		 * Filters the output for the sitemap url tag.
271		 *
272		 * @api   string $output The output for the sitemap url tag.
273		 *
274		 * @param array  $url The sitemap url array on which the output is based.
275		 */
276		return apply_filters( 'wpseo_sitemap_url', $output, $url );
277	}
278
279	/**
280	 * Apply some best effort conversion to comply with RFC3986.
281	 *
282	 * @param string $url URL to encode.
283	 *
284	 * @return string
285	 */
286	protected function encode_url_rfc3986( $url ) {
287
288		if ( filter_var( $url, FILTER_VALIDATE_URL ) ) {
289			return $url;
290		}
291
292		$path = wp_parse_url( $url, PHP_URL_PATH );
293
294		if ( ! empty( $path ) && '/' !== $path ) {
295			$encoded_path = explode( '/', $path );
296
297			// First decode the path, to prevent double encoding.
298			$encoded_path = array_map( 'rawurldecode', $encoded_path );
299
300			$encoded_path = array_map( 'rawurlencode', $encoded_path );
301			$encoded_path = implode( '/', $encoded_path );
302			$encoded_path = str_replace( '%7E', '~', $encoded_path ); // PHP <5.3.
303
304			$url = str_replace( $path, $encoded_path, $url );
305		}
306
307		$query = wp_parse_url( $url, PHP_URL_QUERY );
308
309		if ( ! empty( $query ) ) {
310
311			parse_str( $query, $parsed_query );
312
313			if ( defined( 'PHP_QUERY_RFC3986' ) ) { // PHP 5.4+.
314				$parsed_query = http_build_query( $parsed_query, null, '&amp;', PHP_QUERY_RFC3986 );
315			}
316			else {
317				$parsed_query = http_build_query( $parsed_query, null, '&amp;' );
318				$parsed_query = str_replace( '+', '%20', $parsed_query );
319				$parsed_query = str_replace( '%7E', '~', $parsed_query );
320			}
321
322			$url = str_replace( $query, $parsed_query, $url );
323		}
324
325		return $url;
326	}
327
328	/**
329	 * Retrieves the XSL URL that should be used in the current environment
330	 *
331	 * When home_url and site_url are not the same, the home_url should be used.
332	 * This is because the XSL needs to be served from the same domain, protocol and port
333	 * as the XML file that is loading it.
334	 *
335	 * @return string The XSL URL that needs to be used.
336	 */
337	protected function get_xsl_url() {
338		if ( home_url() !== site_url() ) {
339			return home_url( 'main-sitemap.xsl' );
340		}
341
342		return plugin_dir_url( WPSEO_FILE ) . 'css/main-sitemap.xsl';
343	}
344}