PageRenderTime 55ms CodeModel.GetById 25ms RepoModel.GetById 1ms app.codeStats 0ms

/classes/favicon.php

https://github.com/RomanSixty/Feed-on-Feeds
PHP | 337 lines | 222 code | 49 blank | 66 comment | 42 complexity | 0fa00719e8e45f83c27ddfecfffdc069 MD5 | raw file
  1. <?php
  2. /** Locate a suitable favicon for a site.
  3. Copyright (C) 2013-2016 Justin Wind <justin.wind@gmail.com>
  4. */
  5. class FavIcon {
  6. const VERSION = '1.0';
  7. const BUILD = '20160424000000';
  8. const SRC_URL = '';
  9. static protected function default_user_agent() {
  10. return sprintf('%s/%s (Caching Utility; %s; Allow like Gecko) Build/%s', __CLASS__, self::VERSION, self::SRC_URL, self::BUILD);
  11. }
  12. protected $site_url; /* whose favicons are we interested in */
  13. protected $site_url_parts; /* broken down by parse_url() */
  14. protected $favicons; /* list of extant favicons referenced by site */
  15. protected $stream_context_options; /* user-agent &c */
  16. /** Create a new FavIcon instance, which will learn about all the potential
  17. favicons at the given url.
  18. */
  19. function __construct($site_url, $user_agent=null) {
  20. $this->favicons = array();
  21. $this->site_url = $site_url;
  22. $this->site_url_parts = parse_url($site_url);
  23. $this->stream_context_options = array( 'http' => array() );
  24. $this->stream_context_options['http']['user_agent'] = ($user_agent !== null) ? $user_agent : self::default_user_agent();
  25. /* refuse compressed streams for now, as decompression isn't automatic */
  26. $this->stream_context_options['http']['accept_encoding'] = "gzip;q=0, compress;q=0";
  27. $this->links_from_site();
  28. if (empty($this->favicons)) {
  29. $this->links_from_rote();
  30. }
  31. /* TODO: sort by something other than first-occurence */
  32. }
  33. /** Return the url of the first favicon.
  34. */
  35. function __toString() {
  36. list($first) = $this->favicons;
  37. return empty($first) ? '' : $first['href'];
  38. }
  39. /** Return an array containing all the information about the first favicon.
  40. href: icon url
  41. type: icon content-type
  42. sizes: sizes provided by icon (only parsed out of <link> for now)
  43. data: icon file data
  44. */
  45. function getIcon() {
  46. @list($first) = $this->favicons;
  47. if (empty($first)) {
  48. return null;
  49. }
  50. return $first;
  51. }
  52. /** $http_response_header can be empty during certain connection-closed
  53. cases, so try to minimize errors from such.
  54. */
  55. static protected function safe_headers() {
  56. return ( ! empty($http_response_header)) ? $http_response_header : array('HTTP/1.1 400 Bad request');
  57. }
  58. /** Load the page, parse for iconic links, and add them to icon list if
  59. they are valid.
  60. */
  61. protected function links_from_site() {
  62. /*
  63. Quietly fetch the site contents into a DOM
  64. */
  65. $dom = new DOMDocument();
  66. $dom->recover = true;
  67. $dom->strictErrorChecking = false;
  68. $default_context = stream_context_get_default();
  69. $stream_context = stream_context_create($this->stream_context_options);
  70. libxml_set_streams_context($stream_context);
  71. $libxml_err_state = libxml_use_internal_errors(true);
  72. $dom_result = @$dom->loadHTMLFile($this->site_url);
  73. libxml_clear_errors();
  74. libxml_use_internal_errors($libxml_err_state);
  75. libxml_set_streams_context($default_context);
  76. if ($dom_result === false) {
  77. $status = self::header_findr(self::safe_headers(), null);
  78. @list ( , $status, ) = explode(' ', $status, 3);
  79. $status = (integer)$status;
  80. trigger_error('site \'' . $this->site_url . '\' returned ' . $status, E_USER_NOTICE);
  81. return false;
  82. }
  83. /*
  84. If we followed any redirects, rewrite the site_url with the current
  85. location, so that relative urls may be correctly converted into
  86. their absolute form.
  87. */
  88. $location = self::header_findr(self::safe_headers(), 'Location');
  89. if ($location !== null) {
  90. $this->site_url = $location;
  91. }
  92. /* check all the links which relate to icons */
  93. foreach ($dom->getElementsByTagName('link') as $link) {
  94. $relations = explode(' ', $link->getAttribute('rel'));
  95. if (in_array('icon', array_map('strtolower', $relations))) {
  96. $href = $link->getAttribute('href');
  97. $href_absolute = $this->absolutize_url($href);
  98. $icon = $this->validate_icon($href_absolute);
  99. if ($icon !== null) {
  100. if (empty($icon['type'])) {
  101. $icon['type'] = $link->getAttribute('type');
  102. }
  103. if (empty($icon['sizes'])) {
  104. $icon['sizes'] = $link->getAttribute('sizes');
  105. }
  106. $this->favicons[] = $icon;
  107. }
  108. }
  109. }
  110. }
  111. /** Add standard favicon locations to icon list.
  112. */
  113. protected function links_from_rote() {
  114. $favicon_url = array();
  115. /* take only what we want */
  116. foreach (array('scheme', 'user', 'pass', 'host', 'port') as $key) {
  117. if ( ! empty($this->site_url_parts[$key])) {
  118. $favicon_url[$key] = $this->site_url_parts[$key];
  119. }
  120. }
  121. /* add our own */
  122. $favicon_url['path'] = '/favicon.ico';
  123. /* put back together */
  124. $favicon_url = self::unparse_url($favicon_url);
  125. /* look for it */
  126. $icon = $this->validate_icon($favicon_url);
  127. if ($icon !== null) {
  128. $this->favicons[] = $icon;
  129. }
  130. }
  131. /** Returns the relevant header value. Null search returns status.
  132. This matches in reverse order, because I guess headers are cumulative
  133. when http wrappers follow redirects.
  134. */
  135. static protected function header_findr($headers, $header=null) {
  136. if (empty($headers)) {
  137. return null;
  138. }
  139. end($headers);
  140. while (key($headers) !== null) {
  141. if ($header === null) {
  142. @list($proto, $code, $msg) = explode(' ', current($headers), 3);
  143. @list($protocol, $version) = explode('/', $proto, 2);
  144. if ($protocol === 'HTTP')
  145. return current($headers);
  146. } else {
  147. @list($name, $value) = explode(': ', current($headers), 2);
  148. if (strcasecmp($header, $name) === 0)
  149. return $value;
  150. }
  151. prev($headers);
  152. }
  153. return null;
  154. }
  155. /** Validate an icon resource by attempting to fetch it.
  156. */
  157. protected function validate_icon($url, $fetch=false) {
  158. $icon = array('href' => $url);
  159. $stream_context = stream_context_create($this->stream_context_options);
  160. $icon['data'] = @file_get_contents($url, NULL, $stream_context);
  161. if ($icon['data'] === false) {
  162. trigger_error('failed to get icon resource \'' . $url .'\'', E_USER_NOTICE);
  163. return null;
  164. }
  165. /* did we get a useful response */
  166. $status = self::header_findr(self::safe_headers(), null);
  167. @list ( , $status, ) = explode(' ', $status, 3);
  168. $status = (integer)$status;
  169. if ($status !== 200) {
  170. trigger_error('icon resource \'' . $url . '\' returned ' . $status, E_USER_NOTICE);
  171. return null;
  172. }
  173. if (empty($icon['data'])) {
  174. trigger_error('icon resource \'' . $url . '\' is empty', E_USER_NOTICE);
  175. return null;
  176. }
  177. /* is it displayable */
  178. $icon['type'] = self::header_findr(self::safe_headers(), 'Content-Type');
  179. @list($icon['type'], ) = explode(';', $icon['type']);
  180. @list($type, $subtype) = explode('/', $icon['type'], 2);
  181. if (strcasecmp($type, 'image') !== 0) {
  182. if (class_exists('finfo')) {
  183. /*
  184. Is their server possibly just sending the wrong content-type?
  185. This turns out to be a fairly common problem with .ico files.
  186. Double-check against magic mimetypes before giving up.
  187. */
  188. $finfo = new finfo(FILEINFO_MIME);
  189. @list($icon['type'], ) = explode(';', $finfo->buffer($icon['data']));
  190. @list($type, $subtype) = explode('/', $icon['type'], 2);
  191. if (strcasecmp($type, 'image') !== 0) {
  192. /* really not an image */
  193. trigger_error('icon resource \'' . $url . '\' is not an image', E_USER_NOTICE);
  194. return null;
  195. }
  196. } else {
  197. /* allow common extensions */
  198. $ext = pathinfo(parse_url($url, PHP_URL_PATH), PATHINFO_EXTENSION);
  199. switch ($ext) {
  200. case 'ico':
  201. case 'gif':
  202. case 'png':
  203. case 'jpg':
  204. break;
  205. default:
  206. trigger_error('icon resource \'' . $url . '\' has non-image type \'' . $icon['type'] . '\'', E_USER_NOTICE);
  207. return null;
  208. }
  209. }
  210. }
  211. return $icon;
  212. }
  213. /** Return a full url from what might be just a path.
  214. */
  215. protected function absolutize_url($url) {
  216. /* If there's a scheme, it's already good to go. */
  217. if (strpos($url, '://')) {
  218. return $url;
  219. }
  220. /* If there's no scheme, $url is just a path, so we need to fill in
  221. the preambling parts from the site's url. */
  222. $url_parts = array();
  223. foreach (array('scheme', 'user', 'pass', 'host', 'port') as $key) {
  224. if (empty($url_parts[$key])
  225. && ! empty($this->site_url_parts[$key])) {
  226. $url_parts[$key] = $this->site_url_parts[$key];
  227. }
  228. }
  229. /* If it starts with a /, it's a complete path. */
  230. if ( ! empty($url) && $url[0] === '/') {
  231. $url_parts['path'] = $url;
  232. } else {
  233. /* Otherwise, we need to tack this relative path on to the site's
  234. path's directory, without the trailing-most non-directory bit..
  235. */
  236. $last_slash_pos = strrpos($this->site_url_parts['path'], '/');
  237. if ($last_slash_pos === false) {
  238. $base_path = '/';
  239. } else {
  240. $base_path = substr($this->site_url_parts['path'], 0, $last_slash_pos + 1);
  241. }
  242. $url_parts['path'] = $base_path . $url;
  243. }
  244. /* Put it all together. */
  245. return self::unparse_url($url_parts);
  246. }
  247. /** Assemble a url from its parse_url components.
  248. */
  249. static protected function unparse_url($parts) {
  250. $url = array();
  251. if ( ! empty($parts['scheme'])) {
  252. $url[] = $parts['scheme'];
  253. $url[] = '://';
  254. }
  255. if ( ! empty($parts['user']) || ! empty($parts['pass'])) {
  256. if ( ! empty($parts['user'])) {
  257. $url[] = $parts['user'];
  258. }
  259. if ( ! empty($parts['pass'])) {
  260. $url[] = ':';
  261. $url[] = $parts['pass'];
  262. }
  263. $url[] = '@';
  264. }
  265. if ( ! empty($parts['host'])) {
  266. $url[] = $parts['host'];
  267. }
  268. if ( ! empty($parts['port'])) {
  269. $url[] = ':';
  270. $url[] = $parts['port'];
  271. }
  272. if ( ! empty($parts['path'])) {
  273. $url[] = $parts['path'];
  274. }
  275. if ( ! empty($parts['query'])) {
  276. $url[] = '?';
  277. $url[] = $parts['query'];
  278. }
  279. if ( ! empty($parts['fragment'])) {
  280. $url[] = '#';
  281. $url[] = $parts['fragment'];
  282. }
  283. return implode($url);
  284. }
  285. }
  286. ?>