PageRenderTime 55ms CodeModel.GetById 24ms RepoModel.GetById 1ms app.codeStats 0ms

/wp-content/plugins/wordpress-seo/inc/sitemaps/class-sitemap-image-parser.php

https://bitbucket.org/oriaxx/wenco
PHP | 503 lines | 243 code | 108 blank | 152 comment | 37 complexity | fbacc22f573717ca288a244facd05e0f MD5 | raw file
Possible License(s): GPL-2.0, MIT, BSD-3-Clause, LGPL-3.0
  1. <?php
  2. /**
  3. * @package WPSEO\XML_Sitemaps
  4. */
  5. /**
  6. * Parses images from the given post.
  7. */
  8. class WPSEO_Sitemap_Image_Parser {
  9. /** @var string $home_url Holds the home_url() value to speed up loops. */
  10. protected $home_url = '';
  11. /** @var string $host Holds site URL hostname. */
  12. protected $host = '';
  13. /** @var string $scheme Holds site URL protocol. */
  14. protected $scheme = 'http';
  15. /** @var array $attachments Cached set of attachments for multiple posts. */
  16. protected $attachments = array();
  17. /** @var string $charset Holds blog charset value for use in DOM parsing. */
  18. protected $charset = 'UTF-8';
  19. /**
  20. * Set up URL properties for reuse.
  21. */
  22. public function __construct() {
  23. $this->home_url = home_url();
  24. $parsed_home = wp_parse_url( $this->home_url );
  25. if ( ! empty( $parsed_home['host'] ) ) {
  26. $this->host = str_replace( 'www.', '', $parsed_home['host'] );
  27. }
  28. if ( ! empty( $parsed_home['scheme'] ) ) {
  29. $this->scheme = $parsed_home['scheme'];
  30. }
  31. $this->charset = esc_attr( get_bloginfo( 'charset' ) );
  32. }
  33. /**
  34. * Get set of image data sets for the given post.
  35. *
  36. * @param object $post Post object to get images for.
  37. *
  38. * @return array
  39. */
  40. public function get_images( $post ) {
  41. $images = array();
  42. if ( ! is_object( $post ) ) {
  43. return $images;
  44. }
  45. $thumbnail_id = get_post_thumbnail_id( $post->ID );
  46. if ( $thumbnail_id ) {
  47. $src = $this->get_absolute_url( $this->image_url( $thumbnail_id ) );
  48. $alt = get_post_meta( $thumbnail_id, '_wp_attachment_image_alt', true );
  49. $title = get_post_field( 'post_title', $thumbnail_id );
  50. $images[] = $this->get_image_item( $post, $src, $title, $alt );
  51. }
  52. /**
  53. * Filter: 'wpseo_sitemap_content_before_parse_html_images' - Filters the post content
  54. * before it is parsed for images.
  55. *
  56. * @param string $content The raw/unprocessed post content.
  57. */
  58. $content = apply_filters( 'wpseo_sitemap_content_before_parse_html_images', $post->post_content );
  59. $unfiltered_images = $this->parse_html_images( $content );
  60. foreach ( $unfiltered_images as $image ) {
  61. $images[] = $this->get_image_item( $post, $image['src'], $image['title'], $image['alt'] );
  62. }
  63. foreach ( $this->parse_galleries( $post->post_content, $post->ID ) as $attachment ) {
  64. $src = $this->get_absolute_url( $this->image_url( $attachment->ID ) );
  65. $alt = get_post_meta( $attachment->ID, '_wp_attachment_image_alt', true );
  66. $images[] = $this->get_image_item( $post, $src, $attachment->post_title, $alt );
  67. }
  68. if ( 'attachment' === $post->post_type && wp_attachment_is_image( $post ) ) {
  69. $src = $this->get_absolute_url( $this->image_url( $post->ID ) );
  70. $alt = get_post_meta( $post->ID, '_wp_attachment_image_alt', true );
  71. $images[] = $this->get_image_item( $post, $src, $post->post_title, $alt );
  72. }
  73. foreach ( $images as $key => $image ) {
  74. if ( empty( $image['src'] ) ) {
  75. unset( $images[ $key ] );
  76. }
  77. }
  78. /**
  79. * Filter images to be included for the post in XML sitemap.
  80. *
  81. * @param array $images Array of image items.
  82. * @param int $post_id ID of the post.
  83. */
  84. $images = apply_filters( 'wpseo_sitemap_urlimages', $images, $post->ID );
  85. return $images;
  86. }
  87. /**
  88. * @param object $term Term to get images from description for.
  89. *
  90. * @return array
  91. */
  92. public function get_term_images( $term ) {
  93. $images = $this->parse_html_images( $term->description );
  94. foreach ( $this->parse_galleries( $term->description ) as $attachment ) {
  95. $images[] = array(
  96. 'src' => $this->get_absolute_url( $this->image_url( $attachment->ID ) ),
  97. 'title' => $attachment->post_title,
  98. 'alt' => get_post_meta( $attachment->ID, '_wp_attachment_image_alt', true ),
  99. );
  100. }
  101. return $images;
  102. }
  103. /**
  104. * Parse `<img />` tags in content.
  105. *
  106. * @param string $content Content string to parse.
  107. *
  108. * @return array
  109. */
  110. private function parse_html_images( $content ) {
  111. $images = array();
  112. if ( ! class_exists( 'DOMDocument' ) ) {
  113. return $images;
  114. }
  115. if ( empty( $content ) ) {
  116. return $images;
  117. }
  118. // Prevent DOMDocument from bubbling warnings about invalid HTML.
  119. libxml_use_internal_errors( true );
  120. $post_dom = new DOMDocument();
  121. $post_dom->loadHTML( '<?xml encoding="' . $this->charset . '">' . $content );
  122. // Clear the errors, so they don't get kept in memory.
  123. libxml_clear_errors();
  124. /** @var DOMElement $img */
  125. foreach ( $post_dom->getElementsByTagName( 'img' ) as $img ) {
  126. $src = $img->getAttribute( 'src' );
  127. if ( empty( $src ) ) {
  128. continue;
  129. }
  130. $class = $img->getAttribute( 'class' );
  131. if ( // This detects WP-inserted images, which we need to upsize. R.
  132. ! empty( $class )
  133. && false === strpos( $class, 'size-full' )
  134. && preg_match( '|wp-image-(?P<id>\d+)|', $class, $matches )
  135. && get_post_status( $matches['id'] )
  136. ) {
  137. $src = $this->image_url( $matches['id'] );
  138. }
  139. $src = $this->get_absolute_url( $src );
  140. if ( strpos( $src, $this->host ) === false ) {
  141. continue;
  142. }
  143. if ( $src !== esc_url( $src ) ) {
  144. continue;
  145. }
  146. $images[] = array(
  147. 'src' => $src,
  148. 'title' => $img->getAttribute( 'title' ),
  149. 'alt' => $img->getAttribute( 'alt' ),
  150. );
  151. }
  152. return $images;
  153. }
  154. /**
  155. * Parse gallery shortcodes in a given content.
  156. *
  157. * @param string $content Content string.
  158. * @param int $post_id Optional ID of post being parsed.
  159. *
  160. * @return array Set of attachment objects.
  161. */
  162. private function parse_galleries( $content, $post_id = 0 ) {
  163. $attachments = array();
  164. $galleries = $this->get_content_galleries( $content );
  165. foreach ( $galleries as $gallery ) {
  166. $id = $post_id;
  167. if ( ! empty( $gallery['id'] ) ) {
  168. $id = intval( $gallery['id'] );
  169. }
  170. // Forked from core gallery_shortcode() to have exact same logic. R.
  171. if ( ! empty( $gallery['ids'] ) ) {
  172. $gallery['include'] = $gallery['ids'];
  173. }
  174. $gallery_attachments = $this->get_gallery_attachments( $id, $gallery );
  175. $attachments = array_merge( $attachments, $gallery_attachments );
  176. }
  177. return array_unique( $attachments, SORT_REGULAR );
  178. }
  179. /**
  180. * Retrieves galleries from the passed content.
  181. *
  182. * Forked from core to skip executing shortcodes for performance.
  183. *
  184. * @param string $content Content to parse for shortcodes.
  185. *
  186. * @return array A list of arrays, each containing gallery data.
  187. */
  188. protected function get_content_galleries( $content ) {
  189. if ( ! has_shortcode( $content, 'gallery' ) ) {
  190. return array();
  191. }
  192. $galleries = array();
  193. if ( ! preg_match_all( '/' . get_shortcode_regex() . '/s', $content, $matches, PREG_SET_ORDER ) ) {
  194. return $galleries;
  195. }
  196. foreach ( $matches as $shortcode ) {
  197. if ( 'gallery' === $shortcode[2] ) {
  198. $attributes = shortcode_parse_atts( $shortcode[3] );
  199. if ( '' === $attributes ) { // Valid shortcode without any attributes. R.
  200. $attributes = array();
  201. }
  202. $galleries[] = $attributes;
  203. }
  204. }
  205. return $galleries;
  206. }
  207. /**
  208. * Get image item array with filters applied.
  209. *
  210. * @param WP_Post $post Post object for the context.
  211. * @param string $src Image URL.
  212. * @param string $title Optional image title.
  213. * @param string $alt Optional image alt text.
  214. *
  215. * @return array
  216. */
  217. protected function get_image_item( $post, $src, $title = '', $alt = '' ) {
  218. $image = array();
  219. /**
  220. * Filter image URL to be included in XML sitemap for the post.
  221. *
  222. * @param string $src Image URL.
  223. * @param object $post Post object.
  224. */
  225. $image['src'] = apply_filters( 'wpseo_xml_sitemap_img_src', $src, $post );
  226. if ( ! empty( $title ) ) {
  227. $image['title'] = $title;
  228. }
  229. if ( ! empty( $alt ) ) {
  230. $image['alt'] = $alt;
  231. }
  232. /**
  233. * Filter image data to be included in XML sitemap for the post.
  234. *
  235. * @param array $image {
  236. * Array of image data.
  237. *
  238. * @type string $src Image URL.
  239. * @type string $title Image title attribute (optional).
  240. * @type string $alt Image alt attribute (optional).
  241. * }
  242. *
  243. * @param object $post Post object.
  244. */
  245. return apply_filters( 'wpseo_xml_sitemap_img', $image, $post );
  246. }
  247. /**
  248. * Get attached image URL with filters applied. Adapted from core for speed.
  249. *
  250. * @param int $post_id ID of the post.
  251. *
  252. * @return string
  253. */
  254. private function image_url( $post_id ) {
  255. static $uploads;
  256. if ( empty( $uploads ) ) {
  257. $uploads = wp_upload_dir();
  258. }
  259. if ( false !== $uploads['error'] ) {
  260. return '';
  261. }
  262. $file = get_post_meta( $post_id, '_wp_attached_file', true );
  263. if ( empty( $file ) ) {
  264. return '';
  265. }
  266. // Check that the upload base exists in the file location.
  267. if ( 0 === strpos( $file, $uploads['basedir'] ) ) {
  268. $src = str_replace( $uploads['basedir'], $uploads['baseurl'], $file );
  269. }
  270. elseif ( false !== strpos( $file, 'wp-content/uploads' ) ) {
  271. $src = $uploads['baseurl'] . substr( $file, ( strpos( $file, 'wp-content/uploads' ) + 18 ) );
  272. }
  273. else {
  274. // It's a newly uploaded file, therefore $file is relative to the baseurl.
  275. $src = $uploads['baseurl'] . '/' . $file;
  276. }
  277. return apply_filters( 'wp_get_attachment_url', $src, $post_id );
  278. }
  279. /**
  280. * Make absolute URL for domain or protocol-relative one.
  281. *
  282. * @param string $src URL to process.
  283. *
  284. * @return string
  285. */
  286. protected function get_absolute_url( $src ) {
  287. if ( empty( $src ) || ! is_string( $src ) ) {
  288. return $src;
  289. }
  290. if ( WPSEO_Utils::is_url_relative( $src ) === true ) {
  291. if ( $src[0] !== '/' ) {
  292. return $src;
  293. }
  294. // The URL is relative, we'll have to make it absolute.
  295. return $this->home_url . $src;
  296. }
  297. if ( strpos( $src, 'http' ) !== 0 ) {
  298. // Protocol relative url, we add the scheme as the standard requires a protocol.
  299. return $this->scheme . ':' . $src;
  300. }
  301. return $src;
  302. }
  303. /**
  304. * Returns the attachments for a gallery.
  305. *
  306. * @param int $id The post id.
  307. * @param array $gallery The gallery config.
  308. *
  309. * @return array The selected attachments.
  310. */
  311. protected function get_gallery_attachments( $id, $gallery ) {
  312. // When there are attachments to include.
  313. if ( ! empty( $gallery['include'] ) ) {
  314. return $this->get_gallery_attachments_for_included( $gallery['include'] );
  315. }
  316. // When $id is empty, just return empty array.
  317. if ( empty( $id ) ) {
  318. return array();
  319. }
  320. return $this->get_gallery_attachments_for_parent( $id, $gallery );
  321. }
  322. /**
  323. * Returns the attachments for the given id.
  324. *
  325. * @param int $id The post id.
  326. * @param array $gallery The gallery config.
  327. *
  328. * @return array The selected attachments.
  329. */
  330. protected function get_gallery_attachments_for_parent( $id, $gallery ) {
  331. $query = array(
  332. 'posts_per_page' => -1,
  333. 'post_parent' => $id,
  334. );
  335. // When there are posts that should be excluded from result set.
  336. if ( ! empty( $gallery['exclude'] ) ) {
  337. $query['post__not_in'] = wp_parse_id_list( $gallery['exclude'] );
  338. }
  339. return $this->get_attachments( $query );
  340. }
  341. /**
  342. * Returns an array with attachments for the post ids that will be included.
  343. *
  344. * @param array $include Array with ids to include.
  345. *
  346. * @return array The found attachments.
  347. */
  348. protected function get_gallery_attachments_for_included( $include ) {
  349. $ids_to_include = wp_parse_id_list( $include );
  350. $attachments = $this->get_attachments(
  351. array(
  352. 'posts_per_page' => count( $ids_to_include ),
  353. 'post__in' => $ids_to_include,
  354. )
  355. );
  356. $gallery_attachments = array();
  357. foreach ( $attachments as $key => $val ) {
  358. $gallery_attachments[ $val->ID ] = $val;
  359. }
  360. return $gallery_attachments;
  361. }
  362. /**
  363. * Returns the attachments.
  364. *
  365. * @param array $args Array with query args.
  366. *
  367. * @return array The found attachments.
  368. */
  369. protected function get_attachments( $args ) {
  370. $default_args = array(
  371. 'post_status' => 'inherit',
  372. 'post_type' => 'attachment',
  373. 'post_mime_type' => 'image',
  374. // Defaults taken from function get_posts.
  375. 'orderby' => 'date',
  376. 'order' => 'DESC',
  377. 'meta_key' => '',
  378. 'meta_value' => '',
  379. 'suppress_filters' => true,
  380. 'ignore_sticky_posts' => true,
  381. 'no_found_rows' => true,
  382. );
  383. $args = wp_parse_args( $args, $default_args );
  384. $get_attachments = new WP_Query();
  385. return $get_attachments->query( $args );
  386. }
  387. /**
  388. * Cache attached images and thumbnails for a set of posts.
  389. *
  390. * @deprecated 3.3 Blanket caching no longer makes sense with modern galleries. R.
  391. */
  392. public function cache_attachments() {
  393. _deprecated_function( __METHOD__, '3.3' );
  394. }
  395. }