PageRenderTime 46ms CodeModel.GetById 17ms RepoModel.GetById 0ms app.codeStats 0ms

/wp-includes/SimplePie/Content/Type/Sniffer.php

https://bitbucket.org/abnopanda/wordpress
PHP | 332 lines | 225 code | 16 blank | 91 comment | 32 complexity | 7c72c3f369855562d96c77ece1c7db33 MD5 | raw file
  1. <?php
  2. /**
  3. * SimplePie
  4. *
  5. * A PHP-Based RSS and Atom Feed Framework.
  6. * Takes the hard work out of managing a complete RSS/Atom solution.
  7. *
  8. * Copyright (c) 2004-2012, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors
  9. * All rights reserved.
  10. *
  11. * Redistribution and use in source and binary forms, with or without modification, are
  12. * permitted provided that the following conditions are met:
  13. *
  14. * * Redistributions of source code must retain the above copyright notice, this list of
  15. * conditions and the following disclaimer.
  16. *
  17. * * Redistributions in binary form must reproduce the above copyright notice, this list
  18. * of conditions and the following disclaimer in the documentation and/or other materials
  19. * provided with the distribution.
  20. *
  21. * * Neither the name of the SimplePie Team nor the names of its contributors may be used
  22. * to endorse or promote products derived from this software without specific prior
  23. * written permission.
  24. *
  25. * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS
  26. * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
  27. * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS
  28. * AND CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  29. * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
  30. * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  31. * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
  32. * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  33. * POSSIBILITY OF SUCH DAMAGE.
  34. *
  35. * @package SimplePie
  36. * @version 1.3.1
  37. * @copyright 2004-2012 Ryan Parman, Geoffrey Sneddon, Ryan McCue
  38. * @author Ryan Parman
  39. * @author Geoffrey Sneddon
  40. * @author Ryan McCue
  41. * @link http://simplepie.org/ SimplePie
  42. * @license http://www.opensource.org/licenses/bsd-license.php BSD License
  43. */
  44. /**
  45. * Content-type sniffing
  46. *
  47. * Based on the rules in http://tools.ietf.org/html/draft-abarth-mime-sniff-06
  48. *
  49. * This is used since we can't always trust Content-Type headers, and is based
  50. * upon the HTML5 parsing rules.
  51. *
  52. *
  53. * This class can be overloaded with {@see SimplePie::set_content_type_sniffer_class()}
  54. *
  55. * @package SimplePie
  56. * @subpackage HTTP
  57. */
  58. class SimplePie_Content_Type_Sniffer
  59. {
  60. /**
  61. * File object
  62. *
  63. * @var SimplePie_File
  64. */
  65. var $file;
  66. /**
  67. * Create an instance of the class with the input file
  68. *
  69. * @param SimplePie_Content_Type_Sniffer $file Input file
  70. */
  71. public function __construct($file)
  72. {
  73. $this->file = $file;
  74. }
  75. /**
  76. * Get the Content-Type of the specified file
  77. *
  78. * @return string Actual Content-Type
  79. */
  80. public function get_type()
  81. {
  82. if (isset($this->file->headers['content-type']))
  83. {
  84. if (!isset($this->file->headers['content-encoding'])
  85. && ($this->file->headers['content-type'] === 'text/plain'
  86. || $this->file->headers['content-type'] === 'text/plain; charset=ISO-8859-1'
  87. || $this->file->headers['content-type'] === 'text/plain; charset=iso-8859-1'
  88. || $this->file->headers['content-type'] === 'text/plain; charset=UTF-8'))
  89. {
  90. return $this->text_or_binary();
  91. }
  92. if (($pos = strpos($this->file->headers['content-type'], ';')) !== false)
  93. {
  94. $official = substr($this->file->headers['content-type'], 0, $pos);
  95. }
  96. else
  97. {
  98. $official = $this->file->headers['content-type'];
  99. }
  100. $official = trim(strtolower($official));
  101. if ($official === 'unknown/unknown'
  102. || $official === 'application/unknown')
  103. {
  104. return $this->unknown();
  105. }
  106. elseif (substr($official, -4) === '+xml'
  107. || $official === 'text/xml'
  108. || $official === 'application/xml')
  109. {
  110. return $official;
  111. }
  112. elseif (substr($official, 0, 6) === 'image/')
  113. {
  114. if ($return = $this->image())
  115. {
  116. return $return;
  117. }
  118. else
  119. {
  120. return $official;
  121. }
  122. }
  123. elseif ($official === 'text/html')
  124. {
  125. return $this->feed_or_html();
  126. }
  127. else
  128. {
  129. return $official;
  130. }
  131. }
  132. else
  133. {
  134. return $this->unknown();
  135. }
  136. }
  137. /**
  138. * Sniff text or binary
  139. *
  140. * @return string Actual Content-Type
  141. */
  142. public function text_or_binary()
  143. {
  144. if (substr($this->file->body, 0, 2) === "\xFE\xFF"
  145. || substr($this->file->body, 0, 2) === "\xFF\xFE"
  146. || substr($this->file->body, 0, 4) === "\x00\x00\xFE\xFF"
  147. || substr($this->file->body, 0, 3) === "\xEF\xBB\xBF")
  148. {
  149. return 'text/plain';
  150. }
  151. elseif (preg_match('/[\x00-\x08\x0E-\x1A\x1C-\x1F]/', $this->file->body))
  152. {
  153. return 'application/octect-stream';
  154. }
  155. else
  156. {
  157. return 'text/plain';
  158. }
  159. }
  160. /**
  161. * Sniff unknown
  162. *
  163. * @return string Actual Content-Type
  164. */
  165. public function unknown()
  166. {
  167. $ws = strspn($this->file->body, "\x09\x0A\x0B\x0C\x0D\x20");
  168. if (strtolower(substr($this->file->body, $ws, 14)) === '<!doctype html'
  169. || strtolower(substr($this->file->body, $ws, 5)) === '<html'
  170. || strtolower(substr($this->file->body, $ws, 7)) === '<script')
  171. {
  172. return 'text/html';
  173. }
  174. elseif (substr($this->file->body, 0, 5) === '%PDF-')
  175. {
  176. return 'application/pdf';
  177. }
  178. elseif (substr($this->file->body, 0, 11) === '%!PS-Adobe-')
  179. {
  180. return 'application/postscript';
  181. }
  182. elseif (substr($this->file->body, 0, 6) === 'GIF87a'
  183. || substr($this->file->body, 0, 6) === 'GIF89a')
  184. {
  185. return 'image/gif';
  186. }
  187. elseif (substr($this->file->body, 0, 8) === "\x89\x50\x4E\x47\x0D\x0A\x1A\x0A")
  188. {
  189. return 'image/png';
  190. }
  191. elseif (substr($this->file->body, 0, 3) === "\xFF\xD8\xFF")
  192. {
  193. return 'image/jpeg';
  194. }
  195. elseif (substr($this->file->body, 0, 2) === "\x42\x4D")
  196. {
  197. return 'image/bmp';
  198. }
  199. elseif (substr($this->file->body, 0, 4) === "\x00\x00\x01\x00")
  200. {
  201. return 'image/vnd.microsoft.icon';
  202. }
  203. else
  204. {
  205. return $this->text_or_binary();
  206. }
  207. }
  208. /**
  209. * Sniff images
  210. *
  211. * @return string Actual Content-Type
  212. */
  213. public function image()
  214. {
  215. if (substr($this->file->body, 0, 6) === 'GIF87a'
  216. || substr($this->file->body, 0, 6) === 'GIF89a')
  217. {
  218. return 'image/gif';
  219. }
  220. elseif (substr($this->file->body, 0, 8) === "\x89\x50\x4E\x47\x0D\x0A\x1A\x0A")
  221. {
  222. return 'image/png';
  223. }
  224. elseif (substr($this->file->body, 0, 3) === "\xFF\xD8\xFF")
  225. {
  226. return 'image/jpeg';
  227. }
  228. elseif (substr($this->file->body, 0, 2) === "\x42\x4D")
  229. {
  230. return 'image/bmp';
  231. }
  232. elseif (substr($this->file->body, 0, 4) === "\x00\x00\x01\x00")
  233. {
  234. return 'image/vnd.microsoft.icon';
  235. }
  236. else
  237. {
  238. return false;
  239. }
  240. }
  241. /**
  242. * Sniff HTML
  243. *
  244. * @return string Actual Content-Type
  245. */
  246. public function feed_or_html()
  247. {
  248. $len = strlen($this->file->body);
  249. $pos = strspn($this->file->body, "\x09\x0A\x0D\x20");
  250. while ($pos < $len)
  251. {
  252. switch ($this->file->body[$pos])
  253. {
  254. case "\x09":
  255. case "\x0A":
  256. case "\x0D":
  257. case "\x20":
  258. $pos += strspn($this->file->body, "\x09\x0A\x0D\x20", $pos);
  259. continue 2;
  260. case '<':
  261. $pos++;
  262. break;
  263. default:
  264. return 'text/html';
  265. }
  266. if (substr($this->file->body, $pos, 3) === '!--')
  267. {
  268. $pos += 3;
  269. if ($pos < $len && ($pos = strpos($this->file->body, '-->', $pos)) !== false)
  270. {
  271. $pos += 3;
  272. }
  273. else
  274. {
  275. return 'text/html';
  276. }
  277. }
  278. elseif (substr($this->file->body, $pos, 1) === '!')
  279. {
  280. if ($pos < $len && ($pos = strpos($this->file->body, '>', $pos)) !== false)
  281. {
  282. $pos++;
  283. }
  284. else
  285. {
  286. return 'text/html';
  287. }
  288. }
  289. elseif (substr($this->file->body, $pos, 1) === '?')
  290. {
  291. if ($pos < $len && ($pos = strpos($this->file->body, '?>', $pos)) !== false)
  292. {
  293. $pos += 2;
  294. }
  295. else
  296. {
  297. return 'text/html';
  298. }
  299. }
  300. elseif (substr($this->file->body, $pos, 3) === 'rss'
  301. || substr($this->file->body, $pos, 7) === 'rdf:RDF')
  302. {
  303. return 'application/rss+xml';
  304. }
  305. elseif (substr($this->file->body, $pos, 4) === 'feed')
  306. {
  307. return 'application/atom+xml';
  308. }
  309. else
  310. {
  311. return 'text/html';
  312. }
  313. }
  314. return 'text/html';
  315. }
  316. }