PageRenderTime 46ms CodeModel.GetById 17ms RepoModel.GetById 0ms app.codeStats 0ms

/wp-content/plugins/wordpress-seo/inc/sitemaps/class-sitemap-image-parser.php

https://bitbucket.org/wedodigital/wedo-sitemaps
PHP | 500 lines | 243 code | 107 blank | 150 comment | 38 complexity | 671ff8d74316af311a43d275b0d903f4 MD5 | raw file
Possible License(s): GPL-2.0
  1. <?php
  2. /**
  3. * WPSEO plugin file.
  4. *
  5. * @package WPSEO\XML_Sitemaps
  6. */
  7. /**
  8. * Parses images from the given post.
  9. */
  10. class WPSEO_Sitemap_Image_Parser {
  11. /** @var string $home_url Holds the home_url() value to speed up loops. */
  12. protected $home_url = '';
  13. /** @var string $host Holds site URL hostname. */
  14. protected $host = '';
  15. /** @var string $scheme Holds site URL protocol. */
  16. protected $scheme = 'http';
  17. /** @var array $attachments Cached set of attachments for multiple posts. */
  18. protected $attachments = array();
  19. /** @var string $charset Holds blog charset value for use in DOM parsing. */
  20. protected $charset = 'UTF-8';
  21. /**
  22. * Set up URL properties for reuse.
  23. */
  24. public function __construct() {
  25. $this->home_url = home_url();
  26. $parsed_home = wp_parse_url( $this->home_url );
  27. if ( ! empty( $parsed_home['host'] ) ) {
  28. $this->host = str_replace( 'www.', '', $parsed_home['host'] );
  29. }
  30. if ( ! empty( $parsed_home['scheme'] ) ) {
  31. $this->scheme = $parsed_home['scheme'];
  32. }
  33. $this->charset = esc_attr( get_bloginfo( 'charset' ) );
  34. }
  35. /**
  36. * Get set of image data sets for the given post.
  37. *
  38. * @param object $post Post object to get images for.
  39. *
  40. * @return array
  41. */
  42. public function get_images( $post ) {
  43. $images = array();
  44. if ( ! is_object( $post ) ) {
  45. return $images;
  46. }
  47. $thumbnail_id = get_post_thumbnail_id( $post->ID );
  48. if ( $thumbnail_id ) {
  49. $src = $this->get_absolute_url( $this->image_url( $thumbnail_id ) );
  50. $alt = WPSEO_Image_Utils::get_alt_tag( $thumbnail_id );
  51. $title = get_post_field( 'post_title', $thumbnail_id );
  52. $images[] = $this->get_image_item( $post, $src, $title, $alt );
  53. }
  54. /**
  55. * Filter: 'wpseo_sitemap_content_before_parse_html_images' - Filters the post content
  56. * before it is parsed for images.
  57. *
  58. * @param string $content The raw/unprocessed post content.
  59. */
  60. $content = apply_filters( 'wpseo_sitemap_content_before_parse_html_images', $post->post_content );
  61. $unfiltered_images = $this->parse_html_images( $content );
  62. foreach ( $unfiltered_images as $image ) {
  63. $images[] = $this->get_image_item( $post, $image['src'], $image['title'], $image['alt'] );
  64. }
  65. foreach ( $this->parse_galleries( $post->post_content, $post->ID ) as $attachment ) {
  66. $src = $this->get_absolute_url( $this->image_url( $attachment->ID ) );
  67. $alt = WPSEO_Image_Utils::get_alt_tag( $attachment->ID );
  68. $images[] = $this->get_image_item( $post, $src, $attachment->post_title, $alt );
  69. }
  70. if ( 'attachment' === $post->post_type && wp_attachment_is_image( $post ) ) {
  71. $src = $this->get_absolute_url( $this->image_url( $post->ID ) );
  72. $alt = WPSEO_Image_Utils::get_alt_tag( $post->ID );
  73. $images[] = $this->get_image_item( $post, $src, $post->post_title, $alt );
  74. }
  75. foreach ( $images as $key => $image ) {
  76. if ( empty( $image['src'] ) ) {
  77. unset( $images[ $key ] );
  78. }
  79. }
  80. /**
  81. * Filter images to be included for the post in XML sitemap.
  82. *
  83. * @param array $images Array of image items.
  84. * @param int $post_id ID of the post.
  85. */
  86. $images = apply_filters( 'wpseo_sitemap_urlimages', $images, $post->ID );
  87. return $images;
  88. }
  89. /**
  90. * @param object $term Term to get images from description for.
  91. *
  92. * @return array
  93. */
  94. public function get_term_images( $term ) {
  95. $images = $this->parse_html_images( $term->description );
  96. foreach ( $this->parse_galleries( $term->description ) as $attachment ) {
  97. $images[] = array(
  98. 'src' => $this->get_absolute_url( $this->image_url( $attachment->ID ) ),
  99. 'title' => $attachment->post_title,
  100. 'alt' => WPSEO_Image_Utils::get_alt_tag( $attachment->ID ),
  101. );
  102. }
  103. return $images;
  104. }
  105. /**
  106. * Parse `<img />` tags in content.
  107. *
  108. * @param string $content Content string to parse.
  109. *
  110. * @return array
  111. */
  112. private function parse_html_images( $content ) {
  113. $images = array();
  114. if ( ! class_exists( 'DOMDocument' ) ) {
  115. return $images;
  116. }
  117. if ( empty( $content ) ) {
  118. return $images;
  119. }
  120. // Prevent DOMDocument from bubbling warnings about invalid HTML.
  121. libxml_use_internal_errors( true );
  122. $post_dom = new DOMDocument();
  123. $post_dom->loadHTML( '<?xml encoding="' . $this->charset . '">' . $content );
  124. // Clear the errors, so they don't get kept in memory.
  125. libxml_clear_errors();
  126. /** @var DOMElement $img */
  127. foreach ( $post_dom->getElementsByTagName( 'img' ) as $img ) {
  128. $src = $img->getAttribute( 'src' );
  129. if ( empty( $src ) ) {
  130. continue;
  131. }
  132. $class = $img->getAttribute( 'class' );
  133. if ( // This detects WP-inserted images, which we need to upsize. R.
  134. ! empty( $class )
  135. && false === strpos( $class, 'size-full' )
  136. && preg_match( '|wp-image-(?P<id>\d+)|', $class, $matches )
  137. && get_post_status( $matches['id'] )
  138. ) {
  139. $src = $this->image_url( $matches['id'] );
  140. }
  141. $src = $this->get_absolute_url( $src );
  142. if ( strpos( $src, $this->host ) === false ) {
  143. continue;
  144. }
  145. if ( $src !== esc_url( $src ) ) {
  146. continue;
  147. }
  148. $images[] = array(
  149. 'src' => $src,
  150. 'title' => $img->getAttribute( 'title' ),
  151. 'alt' => $img->getAttribute( 'alt' ),
  152. );
  153. }
  154. return $images;
  155. }
  156. /**
  157. * Parse gallery shortcodes in a given content.
  158. *
  159. * @param string $content Content string.
  160. * @param int $post_id Optional ID of post being parsed.
  161. *
  162. * @return array Set of attachment objects.
  163. */
  164. protected function parse_galleries( $content, $post_id = 0 ) {
  165. $attachments = array();
  166. $galleries = $this->get_content_galleries( $content );
  167. foreach ( $galleries as $gallery ) {
  168. $id = $post_id;
  169. if ( ! empty( $gallery['id'] ) ) {
  170. $id = intval( $gallery['id'] );
  171. }
  172. // Forked from core gallery_shortcode() to have exact same logic. R.
  173. if ( ! empty( $gallery['ids'] ) ) {
  174. $gallery['include'] = $gallery['ids'];
  175. }
  176. $gallery_attachments = $this->get_gallery_attachments( $id, $gallery );
  177. $attachments = array_merge( $attachments, $gallery_attachments );
  178. }
  179. if ( PHP_VERSION_ID >= 50209 ) {
  180. // phpcs:ignore PHPCompatibility.PHP.NewFunctionParameters.array_unique_sort_flagsFound -- Wrapped in version check.
  181. return array_unique( $attachments, SORT_REGULAR );
  182. }
  183. return $attachments;
  184. }
  185. /**
  186. * Retrieves galleries from the passed content.
  187. *
  188. * Forked from core to skip executing shortcodes for performance.
  189. *
  190. * @param string $content Content to parse for shortcodes.
  191. *
  192. * @return array A list of arrays, each containing gallery data.
  193. */
  194. protected function get_content_galleries( $content ) {
  195. if ( ! has_shortcode( $content, 'gallery' ) ) {
  196. return array();
  197. }
  198. $galleries = array();
  199. if ( ! preg_match_all( '/' . get_shortcode_regex() . '/s', $content, $matches, PREG_SET_ORDER ) ) {
  200. return $galleries;
  201. }
  202. foreach ( $matches as $shortcode ) {
  203. if ( 'gallery' === $shortcode[2] ) {
  204. $attributes = shortcode_parse_atts( $shortcode[3] );
  205. if ( '' === $attributes ) { // Valid shortcode without any attributes. R.
  206. $attributes = array();
  207. }
  208. $galleries[] = $attributes;
  209. }
  210. }
  211. return $galleries;
  212. }
  213. /**
  214. * Get image item array with filters applied.
  215. *
  216. * @param WP_Post $post Post object for the context.
  217. * @param string $src Image URL.
  218. * @param string $title Optional image title.
  219. * @param string $alt Optional image alt text.
  220. *
  221. * @return array
  222. */
  223. protected function get_image_item( $post, $src, $title = '', $alt = '' ) {
  224. $image = array();
  225. /**
  226. * Filter image URL to be included in XML sitemap for the post.
  227. *
  228. * @param string $src Image URL.
  229. * @param object $post Post object.
  230. */
  231. $image['src'] = apply_filters( 'wpseo_xml_sitemap_img_src', $src, $post );
  232. if ( ! empty( $title ) ) {
  233. $image['title'] = $title;
  234. }
  235. if ( ! empty( $alt ) ) {
  236. $image['alt'] = $alt;
  237. }
  238. /**
  239. * Filter image data to be included in XML sitemap for the post.
  240. *
  241. * @param array $image {
  242. * Array of image data.
  243. *
  244. * @type string $src Image URL.
  245. * @type string $title Image title attribute (optional).
  246. * @type string $alt Image alt attribute (optional).
  247. * }
  248. *
  249. * @param object $post Post object.
  250. */
  251. return apply_filters( 'wpseo_xml_sitemap_img', $image, $post );
  252. }
  253. /**
  254. * Get attached image URL with filters applied. Adapted from core for speed.
  255. *
  256. * @param int $post_id ID of the post.
  257. *
  258. * @return string
  259. */
  260. private function image_url( $post_id ) {
  261. static $uploads;
  262. if ( empty( $uploads ) ) {
  263. $uploads = wp_upload_dir();
  264. }
  265. if ( false !== $uploads['error'] ) {
  266. return '';
  267. }
  268. $file = get_post_meta( $post_id, '_wp_attached_file', true );
  269. if ( empty( $file ) ) {
  270. return '';
  271. }
  272. // Check that the upload base exists in the file location.
  273. if ( 0 === strpos( $file, $uploads['basedir'] ) ) {
  274. $src = str_replace( $uploads['basedir'], $uploads['baseurl'], $file );
  275. }
  276. elseif ( false !== strpos( $file, 'wp-content/uploads' ) ) {
  277. $src = $uploads['baseurl'] . substr( $file, ( strpos( $file, 'wp-content/uploads' ) + 18 ) );
  278. }
  279. else {
  280. // It's a newly uploaded file, therefore $file is relative to the baseurl.
  281. $src = $uploads['baseurl'] . '/' . $file;
  282. }
  283. return apply_filters( 'wp_get_attachment_url', $src, $post_id );
  284. }
  285. /**
  286. * Make absolute URL for domain or protocol-relative one.
  287. *
  288. * @param string $src URL to process.
  289. *
  290. * @return string
  291. */
  292. protected function get_absolute_url( $src ) {
  293. if ( empty( $src ) || ! is_string( $src ) ) {
  294. return $src;
  295. }
  296. if ( WPSEO_Utils::is_url_relative( $src ) === true ) {
  297. if ( $src[0] !== '/' ) {
  298. return $src;
  299. }
  300. // The URL is relative, we'll have to make it absolute.
  301. return $this->home_url . $src;
  302. }
  303. if ( strpos( $src, 'http' ) !== 0 ) {
  304. // Protocol relative url, we add the scheme as the standard requires a protocol.
  305. return $this->scheme . ':' . $src;
  306. }
  307. return $src;
  308. }
  309. /**
  310. * Returns the attachments for a gallery.
  311. *
  312. * @param int $id The post id.
  313. * @param array $gallery The gallery config.
  314. *
  315. * @return array The selected attachments.
  316. */
  317. protected function get_gallery_attachments( $id, $gallery ) {
  318. // When there are attachments to include.
  319. if ( ! empty( $gallery['include'] ) ) {
  320. return $this->get_gallery_attachments_for_included( $gallery['include'] );
  321. }
  322. // When $id is empty, just return empty array.
  323. if ( empty( $id ) ) {
  324. return array();
  325. }
  326. return $this->get_gallery_attachments_for_parent( $id, $gallery );
  327. }
  328. /**
  329. * Returns the attachments for the given id.
  330. *
  331. * @param int $id The post id.
  332. * @param array $gallery The gallery config.
  333. *
  334. * @return array The selected attachments.
  335. */
  336. protected function get_gallery_attachments_for_parent( $id, $gallery ) {
  337. $query = array(
  338. 'posts_per_page' => -1,
  339. 'post_parent' => $id,
  340. );
  341. // When there are posts that should be excluded from result set.
  342. if ( ! empty( $gallery['exclude'] ) ) {
  343. $query['post__not_in'] = wp_parse_id_list( $gallery['exclude'] );
  344. }
  345. return $this->get_attachments( $query );
  346. }
  347. /**
  348. * Returns an array with attachments for the post ids that will be included.
  349. *
  350. * @param array $include Array with ids to include.
  351. *
  352. * @return array The found attachments.
  353. */
  354. protected function get_gallery_attachments_for_included( $include ) {
  355. $ids_to_include = wp_parse_id_list( $include );
  356. $attachments = $this->get_attachments(
  357. array(
  358. 'posts_per_page' => count( $ids_to_include ),
  359. 'post__in' => $ids_to_include,
  360. )
  361. );
  362. $gallery_attachments = array();
  363. foreach ( $attachments as $key => $val ) {
  364. $gallery_attachments[ $val->ID ] = $val;
  365. }
  366. return $gallery_attachments;
  367. }
  368. /**
  369. * Returns the attachments.
  370. *
  371. * @param array $args Array with query args.
  372. *
  373. * @return array The found attachments.
  374. */
  375. protected function get_attachments( $args ) {
  376. $default_args = array(
  377. 'post_status' => 'inherit',
  378. 'post_type' => 'attachment',
  379. 'post_mime_type' => 'image',
  380. // Defaults taken from function get_posts.
  381. 'orderby' => 'date',
  382. 'order' => 'DESC',
  383. 'meta_key' => '',
  384. 'meta_value' => '',
  385. 'suppress_filters' => true,
  386. 'ignore_sticky_posts' => true,
  387. 'no_found_rows' => true,
  388. );
  389. $args = wp_parse_args( $args, $default_args );
  390. $get_attachments = new WP_Query();
  391. return $get_attachments->query( $args );
  392. }
  393. }