/inc/app/sitesearch/lib/Fetch.php

https://github.com/durand54/sitellite · PHP · 244 lines · 101 code · 29 blank · 114 comment · 16 complexity · 609b8e935ed752be5e23f9bfed2f0978 MD5 · raw file

  1. <?php
  2. /**
  3. * Implements a basic HTTP (extendable to other protocols as well) request
  4. * and request parsing package. Uses the fsockopen() function as a basis
  5. * for making requests.
  6. *
  7. * @package SiteSearch
  8. */
  9. class Fetch {
  10. /**
  11. * User agent to use when making requests.
  12. */
  13. var $ua = 'Fetch/1.0 (Sitellite CMS)';
  14. /**
  15. * Default content type when none is provided in the response headers.
  16. */
  17. var $defaultContentType = 'text/html';
  18. /**
  19. * Default port when none is provided in the get() URL requests.
  20. */
  21. var $defaultPort = 80;
  22. /**
  23. * Number of seconds to wait before a request should give up.
  24. */
  25. var $timeout = 30;
  26. /**
  27. * Error code returned if fsockopen() fails.
  28. */
  29. var $errno = false;
  30. /**
  31. * Error message should an error occur anywhere in this package.
  32. */
  33. var $error = false;
  34. /**
  35. * Constructor method.
  36. *
  37. * @param string
  38. */
  39. function Fetch ($ua = false) {
  40. if ($ua) {
  41. $this->ua = $ua;
  42. }
  43. }
  44. /**
  45. * Parses the specified URL using the parse_url() function, but also
  46. * ensures that the scheme value is set to 'http' if none was present
  47. * in the URL, and also that the port value is set to $defaultPort if
  48. * none was present.
  49. *
  50. * @param string
  51. * @return array hash
  52. */
  53. function parseUrl ($url) {
  54. $r = parse_url ($url);
  55. if (! isset ($r['scheme'])) {
  56. $r['scheme'] = 'http';
  57. }
  58. if (! isset ($r['port'])) {
  59. $r['port'] = $this->defaultPort;
  60. }
  61. return $r;
  62. }
  63. /**
  64. * Retrieves the specified URL. Returns the entire un-parsed response.
  65. *
  66. * @param string
  67. * @return string
  68. */
  69. function get ($url) {
  70. $r = $this->parseUrl ($url);
  71. if ($r['scheme'] != 'http') {
  72. $host = $r['scheme'] . '://' . $r['host'];
  73. return $this->getScheme ($r['scheme'], $host, $r);
  74. } else {
  75. $host = $r['host'];
  76. }
  77. $f = @fsockopen ($host, $r['port'], $this->errno, $this->error, $this->timeout);
  78. if (! $f) {
  79. return false;
  80. } else {
  81. fputs ($f, sprintf ("GET %s HTTP/1.0\r\nHost: %s\r\nUser-Agent: %s\r\n\r\n", $r['path'] . '?' . $r['query'], $r['host'], $this->ua));
  82. $data = '';
  83. while (! feof ($f)) {
  84. $data .= fgets ($f, 128);
  85. }
  86. fclose ($f);
  87. }
  88. return $data;
  89. }
  90. /**
  91. * Override this function to implement alternate scheme handlers.
  92. * Please note that the $host parameter is prefixed by the scheme
  93. * and '://' already. To access the host value independently, use
  94. * $parsed['host'].
  95. *
  96. * @param string
  97. * @param string
  98. * @param array hash from parseUrl()
  99. * @return string
  100. */
  101. function getScheme ($scheme, $host, $parsed) {
  102. return '';
  103. }
  104. /**
  105. * Returns an array containing the response headers and body separated.
  106. *
  107. * @param string response string
  108. * @return array
  109. */
  110. function splitRequest ($data) {
  111. return explode ("\r\n\r\n", $data, 2);
  112. }
  113. /**
  114. * Returns the content type from a response header string.
  115. * If no content type is found, it returns the value of the
  116. * $defaultContentType property.
  117. *
  118. * @param string response headers
  119. * @return string
  120. */
  121. function getContentType ($data) {
  122. if (preg_match ('/Content-Type: ([^\r\n\t ]+)\r\n/i', $data . "\r\n", $regs)) {
  123. return $regs[1];
  124. }
  125. // unknown, sane default
  126. return $this->defaultContentType;
  127. }
  128. /**
  129. * Returns the response code and message from the header string.
  130. * The response is an array with the first value being the response
  131. * code, and the second being the message.
  132. *
  133. * @param string response headers
  134. * @return array
  135. */
  136. function getResponseCode ($data) {
  137. if (preg_match ('/HTTP\/[0-9\.]+ ([0-9]+) ([^\r\n\t ]+)\r\n/i', $data . "\r\n", $regs)) {
  138. return array ($regs[1], $regs[2]);
  139. }
  140. return array (200, 'OK');
  141. }
  142. /**
  143. * Determines whether the response is a redirect. If so,
  144. * it returns the HTTP Location value. If not, it returns
  145. * false.
  146. *
  147. * @param string response headers
  148. * @return mixed
  149. */
  150. function isRedirect ($data) {
  151. if (preg_match ('/Location: ([^\r\n\t ]+)\r\n/i', $data . "\r\n", $regs)) {
  152. return $regs[1];
  153. }
  154. return false;
  155. }
  156. /**
  157. * Merges the $url and $redirect into one $request url then returns
  158. * an array with the new $request url as the first value, and the
  159. * second being a call to $this->get ($request).
  160. *
  161. * @param string
  162. * @param string
  163. * @return array
  164. */
  165. function getRedirect ($url, $redirect) {
  166. // merge $url and $redirect into one $request url then return get($request)
  167. $request = '';
  168. // url is current request
  169. if (strstr ($redirect, '://')) {
  170. $request = $redirect; // full url
  171. } elseif (strpos ($redirect, '/') === 0) {
  172. $info = parse_url ($url);
  173. $request = $info['scheme'] . '://' . $info['host'] . $redirect;
  174. } else {
  175. $info = pathinfo ($url);
  176. $request = $info['dirname'] . '/' . $redirect;
  177. }
  178. return array ($request, $this->get ($request));
  179. }
  180. /**
  181. * Returns a parsed HTTP request for the specified $url.
  182. * The request array has the following structure:
  183. *
  184. * array (
  185. * url => string
  186. * headers => string
  187. * body => string
  188. * response-code => integer
  189. * response-code-message => string
  190. * content-type => string
  191. * )
  192. *
  193. * @param string
  194. * @return array hash
  195. */
  196. function getParsed ($url) {
  197. $response = $this->get ($url);
  198. if (! $response) {
  199. return false;
  200. }
  201. $data = array ();
  202. list ($data['headers'], $data['body']) = $this->splitRequest ($response);
  203. $redirect = $this->isRedirect ($data['headers']);
  204. while ($redirect) {
  205. list ($url, $response) = $this->getRedirect ($url, $redirect);
  206. list ($data['headers'], $data['body']) = $this->splitRequest ($response);
  207. $redirect = $this->isRedirect ($url, $data['headers']);
  208. }
  209. list ($data['response-code'], $data['response-code-message']) = $this->getResponseCode ($data['headers']);
  210. $data['content-type'] = $this->getContentType ($data['headers']);
  211. $data['url'] = $url;
  212. return $data;
  213. }
  214. }
  215. ?>