PageRenderTime 60ms CodeModel.GetById 33ms RepoModel.GetById 0ms app.codeStats 0ms

/application/libraries/Http.php

https://bitbucket.org/nfreear/trackoer-core
PHP | 200 lines | 131 code | 34 blank | 35 comment | 19 complexity | 65559b2437a7868110a4cdf881130d31 MD5 | raw file
  1. <?php if ( ! defined('BASEPATH')) exit('No direct script access allowed');
  2. /**
  3. * HTTP request library.
  4. * Code from base_service.php, using cURL.
  5. *
  6. * @copyright Copyright 2011 The Open University.
  7. * @author N.D.Freear, 6 March 2012.
  8. * @link https://github.com/IET-OU/ouplayer/blob/master/application/libraries/http.php
  9. * @link http://api.drupal.org/api/drupal/core%21includes%21common.inc/function/drupal_http_request/8
  10. */
  11. class Http {
  12. protected $CI;
  13. const UA_DEFAULT = 'TrackOER/0.1 (PHP/cURL) (+http://track.olnet.org)';
  14. const UA_BROWSER = 'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/535.19 (KHTML, like Gecko) Chrome/18.0.1025.142 Safari/535.19'; // Updated, April 2012.
  15. const UA_BROWSER_2 = 'Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10.6; en-GB; rv:1.9.2.3) Gecko/20100401 Firefox/3.6.3';
  16. const UA_GOOGLEBOT = 'Googlebot/2.1 (+http://www.googlebot.com/bot.html)';
  17. const UA_LIKE_BOT = 'TrackOER/0.1 (PHP/cURL like Googlebot/2.1) (+http://track.olnet.org)';
  18. const UA_LIKE_BOT_2 = 'Mozilla/5.0 (compatible; PHP-trackoer/0.1; Googlebot/2.1)';
  19. public function request($url, $spoof=TRUE, $options=array()) {
  20. $use_curl = TRUE;
  21. if ($use_curl) {
  22. $result = $this->_prepare_request($url, $spoof, $options);
  23. return $this->_http_request_curl($url, $spoof, $options, $result);
  24. }
  25. else {
  26. // php.ini: allow_url_fopen
  27. #ini_set('track_errors', 1); global $php_errormsg;
  28. ini_set('user_agent', 'PHP-trackoer/0.1 (+http://track.olnet.org)');
  29. $result = (object) array(
  30. 'url' => $url,
  31. 'success' => NULL,
  32. 'http_code' => NULL,
  33. 'data' => @ file_get_contents($url),
  34. '_headers' => $http_response_header,
  35. #'_er' => error_get_last(), '_e2' => $php_errormsg,
  36. );
  37. $result->success = $result->data!==FALSE;
  38. $result->http_code = (int) substr($result->_headers[0], 9, 3);
  39. return $result;
  40. }
  41. }
  42. /** Prepare the HTTP request.
  43. */
  44. protected function _prepare_request($url, $spoof, &$options) {
  45. $this->CI =& get_instance();
  46. $this->CI->load->helper('url');
  47. $result = new stdClass();
  48. // Parse the URL and make sure we can handle the schema.
  49. $uri = @parse_url($url);
  50. if ($uri == FALSE) {
  51. $result->error = 'unable to parse URL';
  52. $result->code = -1001;
  53. return $result;
  54. }
  55. if (!isset($uri['scheme'])) {
  56. $result->error = 'missing schema';
  57. $result->code = -1002;
  58. return $result;
  59. }
  60. #timer_start(__FUNCTION__);
  61. // Bug #1334, Proxy mode to fix VLE caption redirects (Timedtext controller).
  62. if (isset($options['proxy_cookies'])) {
  63. $cookie_names = $this->CI->config->item('httplib_proxy_cookies');
  64. if (! is_array($cookie_names)) {
  65. $this->CI->_error('Array expected for $config[httplib_proxy_cookies]', 400);
  66. }
  67. $cookies = '';
  68. foreach ($cookie_names as $cname) {
  69. $cookies .= "$cname=". $this->CI->input->cookie($cname) .'; ';
  70. }
  71. $options['cookie'] = rtrim($cookies, '; ');
  72. }
  73. // Merge the default options.
  74. $options += array(
  75. 'headers' => array(),
  76. 'method' => 'GET',
  77. 'data' => NULL,
  78. 'max_redirects' => 2, #3,
  79. 'timeout' => 15.0, #30.0 seconds,
  80. 'context' => NULL,
  81. 'cookie' => NULL,
  82. 'ua' => $this->_get_user_agent($spoof),
  83. 'debug' => FALSE,
  84. 'auth' => NULL, #'[domain\]user:password'
  85. );
  86. return $result;
  87. }
  88. /** Perform the HTTP request using cURL.
  89. */
  90. protected function _http_request_curl($url, $spoof, $options, $result) {
  91. if (!function_exists('curl_init')) die('Error, cURL is required.');
  92. $h_curl = curl_init($url);
  93. curl_setopt($h_curl, CURLOPT_USERAGENT, $options['ua']);
  94. if (!$spoof) {
  95. curl_setopt($h_curl, CURLOPT_REFERER, base_url());
  96. }
  97. if ($options['cookie']) {
  98. curl_setopt($h_curl, CURLOPT_COOKIE, $options['cookie']);
  99. header('X-Proxy-Cookie: '.$options['cookie']);
  100. }
  101. curl_setopt($h_curl, CURLOPT_AUTOREFERER, TRUE);
  102. curl_setopt($h_curl, CURLOPT_MAXREDIRS, $options['max_redirects']);
  103. curl_setopt($h_curl, CURLOPT_FOLLOWLOCATION, TRUE);
  104. curl_setopt($h_curl, CURLOPT_TIMEOUT, $options['timeout']);
  105. if ($options['debug']) {
  106. curl_setopt($h_curl, CURLOPT_HEADER, TRUE);
  107. curl_setopt($h_curl, CURLINFO_HEADER_OUT, TRUE);
  108. }
  109. if ($options['auth']) {
  110. //TODO: http://unitstep.net/blog/2009/05/05/using-curl-in-php-to-access-https-ssltls-protected-sites/
  111. curl_setopt($h_curl, CURLOPT_SSL_VERIFYPEER, FALSE);
  112. curl_setopt($h_curl, CURLOPT_HTTPAUTH, CURLAUTH_NTLM);
  113. curl_setopt($h_curl, CURLOPT_USERPWD, $options['auth']);
  114. }
  115. $http_proxy = $this->CI->config->item('http_proxy');
  116. if ($http_proxy) {
  117. curl_setopt($h_curl, CURLOPT_PROXY, $http_proxy);
  118. }
  119. curl_setopt($h_curl, CURLOPT_RETURNTRANSFER, TRUE);
  120. $result->data = curl_exec($h_curl);
  121. $result->_headers = NULL;
  122. // Fragile: rely on cURL always putting 'Content-Length' last..
  123. if ($options['debug'] && preg_match('#^(HTTP\/1\..+Content\-Length: \d+\s)(.*)$#ms', $result->data, $matches)) {
  124. $result->_headers = $matches[1];
  125. $result->data = trim($matches[2], "\r\n");
  126. }
  127. if ($errno = curl_errno($h_curl)) {
  128. //Error. Quietly log?
  129. $this->CI->_log('error', "cURL $errno, ".curl_error($h_curl)." GET $url");
  130. #$this->CI->firephp->fb("cURL $errno", "cURL error", "ERROR");
  131. }
  132. $result->info = curl_getinfo($h_curl);
  133. $result->http_code = $result->info['http_code'] ? $result->info['http_code'] : 500.01;
  134. $result->success = ($result->info['http_code'] < 300 && $result->data);
  135. return (object) $result;
  136. }
  137. /** Determine a User-Agent string.
  138. *
  139. * http://www.installationwiki.org/Moodle#opentogoogle
  140. *
  141. https://github.com/moodle/moodle/blob/f49c53615410071d994f636ad687f1dc19b2ea32/lib/sessionlib.php#L209 -- skodak July 25, 2010 MDL-21249 improved php docs and adding direct access prevention in co…
  142. https://github.com/moodle/moodle/blob/master/lib/sessionlib.php#L257 : check_user_initialised() $CFG->opentogoogle -- 2012-05-06
  143. https://github.com/moodle/moodle/blob/master/lib/setuplib.php#L1330 : is_web_crawler() -- 2012-07-03
  144. http://xref.schoolsict.net/moodle/2.2/nav.html?lib/setuplib.php.html#is_web_crawler
  145. ini_set('user_agent', 'Googlebot/2.1 (+http://www.googlebot.com/bot.html)');
  146. ini_set('user_agent', 'Mozilla/5.0 (compatible; PHP-trackoer/0.1; Googlebot/2.1');
  147. */
  148. protected function _get_user_agent($spoof) {
  149. switch ($spoof) {
  150. case 'googlebot':
  151. $ua = self::UA_GOOGLEBOT;
  152. break;
  153. case 'like bot':
  154. $ua = self::UA_LIKE_BOT;
  155. break;
  156. case 'browser': # Fall-through.
  157. case TRUE:
  158. $ua = self::UA_BROWSER;
  159. break;
  160. default:
  161. $ua = self::UA_DEFAULT;
  162. break;
  163. }
  164. return $ua;
  165. }
  166. }