PageRenderTime 58ms CodeModel.GetById 14ms RepoModel.GetById 1ms app.codeStats 0ms

/wp-content/plugins/wordpress-seo/inc/sitemaps/class-sitemap-image-parser.php

https://gitlab.com/najomie/fit-hippie
PHP | 428 lines | 217 code | 98 blank | 113 comment | 36 complexity | a44f33fcd3c80144ac5d19584ea3ed52 MD5 | raw file
  1. <?php
  2. /**
  3. * @package WPSEO\XML_Sitemaps
  4. */
  5. /**
  6. * Parses images from the given post.
  7. */
  8. class WPSEO_Sitemap_Image_Parser {
  9. /** @var string $home_url Holds the home_url() value to speed up loops. */
  10. protected $home_url = '';
  11. /** @var string $host Holds site URL hostname. */
  12. protected $host = '';
  13. /** @var string $scheme Holds site URL protocol. */
  14. protected $scheme = 'http';
  15. /** @var array $attachments Cached set of attachments for multiple posts. */
  16. protected $attachments = array();
  17. /** @var string $charset Holds blog charset value for use in DOM parsing. */
  18. protected $charset = 'UTF-8';
  19. /**
  20. * Set up URL properties for reuse.
  21. */
  22. public function __construct() {
  23. $this->home_url = home_url();
  24. $parsed_home = parse_url( $this->home_url );
  25. if ( ! empty( $parsed_home['host'] ) ) {
  26. $this->host = str_replace( 'www.', '', $parsed_home['host'] );
  27. }
  28. if ( ! empty( $parsed_home['scheme'] ) ) {
  29. $this->scheme = $parsed_home['scheme'];
  30. }
  31. $this->charset = esc_attr( get_bloginfo( 'charset' ) );
  32. }
  33. /**
  34. * Get set of image data sets for the given post.
  35. *
  36. * @param object $post Post object to get images for.
  37. *
  38. * @return array
  39. */
  40. public function get_images( $post ) {
  41. $images = array();
  42. if ( ! is_object( $post ) ) {
  43. return $images;
  44. }
  45. $thumbnail_id = get_post_thumbnail_id( $post->ID );
  46. if ( $thumbnail_id ) {
  47. $src = $this->get_absolute_url( $this->image_url( $thumbnail_id ) );
  48. $alt = get_post_meta( $thumbnail_id, '_wp_attachment_image_alt', true );
  49. $title = get_post_field( 'post_title', $thumbnail_id );
  50. $images[] = $this->get_image_item( $post, $src, $title, $alt );
  51. }
  52. $unfiltered_images = $this->parse_html_images( $post->post_content );
  53. foreach ( $unfiltered_images as $image ) {
  54. $images[] = $this->get_image_item( $post, $image['src'], $image['title'], $image['alt'] );
  55. }
  56. foreach ( $this->parse_galleries( $post->post_content, $post->ID ) as $attachment ) {
  57. $src = $this->get_absolute_url( $this->image_url( $attachment->ID ) );
  58. $alt = get_post_meta( $attachment->ID, '_wp_attachment_image_alt', true );
  59. $images[] = $this->get_image_item( $post, $src, $attachment->post_title, $alt );
  60. }
  61. if ( 'attachment' === $post->post_type && wp_attachment_is_image( $post ) ) {
  62. $src = $this->get_absolute_url( $this->image_url( $post->ID ) );
  63. $alt = get_post_meta( $post->ID, '_wp_attachment_image_alt', true );
  64. $images[] = $this->get_image_item( $post, $src, $post->post_title, $alt );
  65. }
  66. foreach ( $images as $key => $image ) {
  67. if ( empty( $image['src'] ) ) {
  68. unset( $images[ $key ] );
  69. }
  70. }
  71. /**
  72. * Filter images to be included for the post in XML sitemap.
  73. *
  74. * @param array $images Array of image items.
  75. * @param int $post_id ID of the post.
  76. */
  77. $images = apply_filters( 'wpseo_sitemap_urlimages', $images, $post->ID );
  78. return $images;
  79. }
  80. /**
  81. * @param object $term Term to get images from description for.
  82. *
  83. * @return array
  84. */
  85. public function get_term_images( $term ) {
  86. $images = $this->parse_html_images( $term->description );
  87. foreach ( $this->parse_galleries( $term->description ) as $attachment ) {
  88. $images[] = array(
  89. 'src' => $this->get_absolute_url( $this->image_url( $attachment->ID ) ),
  90. 'title' => $attachment->post_title,
  91. 'alt' => get_post_meta( $attachment->ID, '_wp_attachment_image_alt', true ),
  92. );
  93. }
  94. return $images;
  95. }
  96. /**
  97. * Parse `<img />` tags in content.
  98. *
  99. * @param string $content Content string to parse.
  100. *
  101. * @return array
  102. */
  103. private function parse_html_images( $content ) {
  104. $images = array();
  105. if ( ! class_exists( 'DOMDocument' ) ) {
  106. return $images;
  107. }
  108. if ( empty( $content ) ) {
  109. return $images;
  110. }
  111. // Prevent DOMDocument from bubbling warnings about invalid HTML.
  112. libxml_use_internal_errors( true );
  113. $post_dom = new DOMDocument();
  114. $post_dom->loadHTML( '<?xml encoding="'. $this->charset .'">' . $content );
  115. // Clear the errors, so they don't get kept in memory.
  116. libxml_clear_errors();
  117. /** @var DOMElement $img */
  118. foreach ( $post_dom->getElementsByTagName( 'img' ) as $img ) {
  119. $src = $img->getAttribute( 'src' );
  120. if ( empty( $src ) ) {
  121. continue;
  122. }
  123. $class = $img->getAttribute( 'class' );
  124. if ( // This detects WP-inserted images, which we need to upsize. R.
  125. ! empty( $class )
  126. && false === strpos( $class, 'size-full' )
  127. && preg_match( '|wp-image-(?P<id>\d+)|', $class, $matches )
  128. && get_post_status( $matches['id'] )
  129. ) {
  130. $src = $this->image_url( $matches['id'] );
  131. }
  132. $src = $this->get_absolute_url( $src );
  133. if ( strpos( $src, $this->host ) === false ) {
  134. continue;
  135. }
  136. if ( $src !== esc_url( $src ) ) {
  137. continue;
  138. }
  139. $images[] = array(
  140. 'src' => $src,
  141. 'title' => $img->getAttribute( 'title' ),
  142. 'alt' => $img->getAttribute( 'alt' ),
  143. );
  144. }
  145. return $images;
  146. }
  147. /**
  148. * Parse gallery shortcodes in a given content.
  149. *
  150. * @param string $content Content string.
  151. * @param int $post_id Optional ID of post being parsed.
  152. *
  153. * @return array Set of attachment objects.
  154. */
  155. private function parse_galleries( $content, $post_id = 0 ) {
  156. $attachments = array();
  157. $galleries = $this->get_content_galleries( $content );
  158. foreach ( $galleries as $gallery ) {
  159. $id = $post_id;
  160. if ( ! empty( $gallery['id'] ) ) {
  161. $id = intval( $gallery['id'] );
  162. }
  163. // Forked from core gallery_shortcode() to have exact same logic. R.
  164. if ( ! empty( $gallery['ids'] ) ) {
  165. $gallery['include'] = $gallery['ids'];
  166. }
  167. $gallery_attachments = array();
  168. if ( ! empty( $gallery['include'] ) ) {
  169. $_attachments = get_posts( array(
  170. 'include' => $gallery['include'],
  171. 'post_status' => 'inherit',
  172. 'post_type' => 'attachment',
  173. 'post_mime_type' => 'image',
  174. ) );
  175. foreach ( $_attachments as $key => $val ) {
  176. $gallery_attachments[ $val->ID ] = $_attachments[ $key ];
  177. }
  178. }
  179. elseif ( ! empty( $gallery['exclude'] ) && ! empty( $id ) ) {
  180. $gallery_attachments = get_children( array(
  181. 'post_parent' => $id,
  182. 'exclude' => $gallery['exclude'],
  183. 'post_status' => 'inherit',
  184. 'post_type' => 'attachment',
  185. 'post_mime_type' => 'image',
  186. ) );
  187. }
  188. elseif ( ! empty( $id ) ) {
  189. $gallery_attachments = get_children( array(
  190. 'post_parent' => $id,
  191. 'post_status' => 'inherit',
  192. 'post_type' => 'attachment',
  193. 'post_mime_type' => 'image',
  194. ) );
  195. }
  196. $attachments = array_merge( $attachments, $gallery_attachments );
  197. }
  198. return array_unique( $attachments, SORT_REGULAR );
  199. }
  200. /**
  201. * Retrieves galleries from the passed content.
  202. *
  203. * Forked from core to skip executing shortcodes for performance.
  204. *
  205. * @param string $content Content to parse for shortcodes.
  206. *
  207. * @return array A list of arrays, each containing gallery data.
  208. */
  209. protected function get_content_galleries( $content ) {
  210. if ( ! has_shortcode( $content, 'gallery' ) ) {
  211. return array();
  212. }
  213. $galleries = array();
  214. if ( ! preg_match_all( '/' . get_shortcode_regex() . '/s', $content, $matches, PREG_SET_ORDER ) ) {
  215. return $galleries;
  216. }
  217. foreach ( $matches as $shortcode ) {
  218. if ( 'gallery' === $shortcode[2] ) {
  219. $attributes = shortcode_parse_atts( $shortcode[3] );
  220. if ( '' === $attributes ) { // Valid shortcode without any attributes. R.
  221. $attributes = array();
  222. }
  223. $galleries[] = $attributes;
  224. }
  225. }
  226. return $galleries;
  227. }
  228. /**
  229. * Get image item array with filters applied.
  230. *
  231. * @param WP_Post $post Post object for the context.
  232. * @param string $src Image URL.
  233. * @param string $title Optional image title.
  234. * @param string $alt Optional image alt text.
  235. *
  236. * @return array
  237. */
  238. protected function get_image_item( $post, $src, $title = '', $alt = '' ) {
  239. $image = array();
  240. /**
  241. * Filter image URL to be included in XML sitemap for the post.
  242. *
  243. * @param string $src Image URL.
  244. * @param object $post Post object.
  245. */
  246. $image['src'] = apply_filters( 'wpseo_xml_sitemap_img_src', $src, $post );
  247. if ( ! empty( $title ) ) {
  248. $image['title'] = $title;
  249. }
  250. if ( ! empty( $alt ) ) {
  251. $image['alt'] = $alt;
  252. }
  253. /**
  254. * Filter image data to be included in XML sitemap for the post.
  255. *
  256. * @param array $image {
  257. * Array of image data.
  258. *
  259. * @type string $src Image URL.
  260. * @type string $title Image title attribute (optional).
  261. * @type string $alt Image alt attribute (optional).
  262. * }
  263. *
  264. * @param object $post Post object.
  265. */
  266. return apply_filters( 'wpseo_xml_sitemap_img', $image, $post );
  267. }
  268. /**
  269. * Get attached image URL. Adapted from core for speed.
  270. *
  271. * @param int $post_id ID of the post.
  272. *
  273. * @return string
  274. */
  275. private function image_url( $post_id ) {
  276. static $uploads;
  277. if ( empty( $uploads ) ) {
  278. $uploads = wp_upload_dir();
  279. }
  280. if ( false !== $uploads['error'] ) {
  281. return '';
  282. }
  283. $file = get_post_meta( $post_id, '_wp_attached_file', true );
  284. if ( empty( $file ) ) {
  285. return '';
  286. }
  287. // Check that the upload base exists in the file location.
  288. if ( 0 === strpos( $file, $uploads['basedir'] ) ) {
  289. return str_replace( $uploads['basedir'], $uploads['baseurl'], $file );
  290. }
  291. // Replace file location with url location.
  292. if ( false !== strpos( $file, 'wp-content/uploads' ) ) {
  293. return $uploads['baseurl'] . substr( $file, ( strpos( $file, 'wp-content/uploads' ) + 18 ) );
  294. }
  295. // It's a newly uploaded file, therefore $file is relative to the baseurl.
  296. return $uploads['baseurl'] . "/$file";
  297. }
  298. /**
  299. * Make absolute URL for domain or protocol-relative one.
  300. *
  301. * @param string $src URL to process.
  302. *
  303. * @return string
  304. */
  305. protected function get_absolute_url( $src ) {
  306. if ( empty( $src ) || ! is_string( $src ) ) {
  307. return $src;
  308. }
  309. if ( WPSEO_Utils::is_url_relative( $src ) === true ) {
  310. if ( $src[0] !== '/' ) {
  311. return $src;
  312. }
  313. // The URL is relative, we'll have to make it absolute.
  314. return $this->home_url . $src;
  315. }
  316. if ( strpos( $src, 'http' ) !== 0 ) {
  317. // Protocol relative url, we add the scheme as the standard requires a protocol.
  318. return $this->scheme . ':' . $src;
  319. }
  320. return $src;
  321. }
  322. /**
  323. * Cache attached images and thumbnails for a set of posts.
  324. *
  325. * @deprecated 3.3 Blanket caching no longer makes sense with modern galleries. R.
  326. */
  327. public function cache_attachments() {
  328. _deprecated_function( __FUNCTION__, '3.3' );
  329. }
  330. }