PageRenderTime 43ms CodeModel.GetById 13ms RepoModel.GetById 0ms app.codeStats 0ms

/include/feedonfeeds-0.1.9/magpierss/extlib/Snoopy.class.inc

https://github.com/radicaldesigns/amp
PHP | 998 lines | 802 code | 49 blank | 147 comment | 28 complexity | 19c2b172e733e3fc8d0a4556411da967 MD5 | raw file
Possible License(s): LGPL-2.1, GPL-2.0, BSD-3-Clause, LGPL-2.0, CC-BY-SA-3.0, AGPL-1.0
  1. <?php
  2. /*************************************************
  3. Snoopy - the PHP net client
  4. Author: Monte Ohrt <monte@ispi.net>
  5. Copyright (c): 1999-2000 ispi, all rights reserved
  6. Version: 1.0 (plus - see SJM comments below)
  7. * This library is free software; you can redistribute it and/or
  8. * modify it under the terms of the GNU Lesser General Public
  9. * License as published by the Free Software Foundation; either
  10. * version 2.1 of the License, or (at your option) any later version.
  11. *
  12. * This library is distributed in the hope that it will be useful,
  13. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  15. * Lesser General Public License for more details.
  16. *
  17. * You should have received a copy of the GNU Lesser General Public
  18. * License along with this library; if not, write to the Free Software
  19. * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
  20. You may contact the author of Snoopy by e-mail at:
  21. monte@ispi.net
  22. Or, write to:
  23. Monte Ohrt
  24. CTO, ispi
  25. 237 S. 70th suite 220
  26. Lincoln, NE 68510
  27. The latest version of Snoopy can be obtained from:
  28. http://snoopy.sourceforge.com
  29. SJM - alpha-grade changes based on the version of Snoopy released with MagpieRSS 0.7
  30. comments to steve@minutillo.com
  31. Two additions:
  32. 1) If this is PHP 4.3 or greater, and 'openssl' is available,
  33. use the PHP built in SSL support for "https" instead of calling curl externally.
  34. Use of external curl can still be forced by setting $use_curl = true.
  35. ref: http://us2.php.net/fsockopen
  36. 2) HTTP Digest Authentication. If you set a username and password, basic auth
  37. will be tried first. If that fails, and the server sends back an
  38. WWW-Authenticate: Digest header, the request will be retried with the appropriate
  39. digest response. Only qop=auth is supported, with MD5 as the algorithm.
  40. I realize that sending basic auth first, and then following up with a digest
  41. challenge-response kind of defeats the purpose in terms of security.
  42. ref: http://www.faqs.org/rfcs/rfc2617.html
  43. *************************************************/
  44. class Snoopy
  45. {
  46. /**** Public variables ****/
  47. /* user definable vars */
  48. var $host = "www.php.net"; // host name we are connecting to
  49. var $port = 80; // port we are connecting to
  50. var $proxy_host = ""; // proxy host to use
  51. var $proxy_port = ""; // proxy port to use
  52. var $agent = "Snoopy v1.0"; // agent we masquerade as
  53. var $referer = ""; // referer info to pass
  54. var $cookies = array(); // array of cookies to pass
  55. // $cookies["username"]="joe";
  56. var $rawheaders = array(); // array of raw headers to send
  57. // $rawheaders["Content-type"]="text/html";
  58. var $maxredirs = 5; // http redirection depth maximum. 0 = disallow
  59. var $lastredirectaddr = ""; // contains address of last redirected address
  60. var $offsiteok = true; // allows redirection off-site
  61. var $maxframes = 0; // frame content depth maximum. 0 = disallow
  62. var $expandlinks = true; // expand links to fully qualified URLs.
  63. // this only applies to fetchlinks()
  64. // or submitlinks()
  65. var $passcookies = true; // pass set cookies back through redirects
  66. // NOTE: this currently does not respect
  67. // dates, domains or paths.
  68. var $user = ""; // user for http authentication
  69. var $pass = ""; // password for http authentication
  70. // http accept types
  71. var $accept = "image/gif, image/x-xbitmap, image/jpeg, image/pjpeg, */*";
  72. var $results = ""; // where the content is put
  73. var $error = ""; // error messages sent here
  74. var $response_code = ""; // response code returned from server
  75. var $headers = array(); // headers returned from server sent here
  76. var $maxlength = 500000; // max return data length (body)
  77. var $read_timeout = 0; // timeout on read operations, in seconds
  78. // supported only since PHP 4 Beta 4
  79. // set to 0 to disallow timeouts
  80. var $timed_out = false; // if a read operation timed out
  81. var $status = 0; // http request status
  82. var $curl_path = "/usr/bin/curl";
  83. // Snoopy will use cURL for fetching
  84. // SSL content if a full system path to
  85. // the cURL binary is supplied here.
  86. // set to false if you do not have
  87. // cURL installed. See http://curl.haxx.se
  88. // for details on installing cURL.
  89. // Snoopy does *not* use the cURL
  90. // library functions built into php,
  91. // as these functions are not stable
  92. // as of this Snoopy release.
  93. // SJM - always use curl for HTTPS requests?
  94. var $use_curl = false;
  95. // send Accept-encoding: gzip?
  96. var $use_gzip = true;
  97. /**** Private variables ****/
  98. var $_maxlinelen = 4096; // max line length (headers)
  99. var $_scheme = "http"; // default scheme
  100. var $_httpmethod = "GET"; // default http request method
  101. var $_httpversion = "HTTP/1.0"; // default http request version
  102. var $_submit_method = "POST"; // default submit method
  103. var $_submit_type = "application/x-www-form-urlencoded"; // default submit type
  104. var $_mime_boundary = ""; // MIME boundary for multipart/form-data submit type
  105. var $_redirectaddr = false; // will be set if page fetched is a redirect
  106. var $_redirectdepth = 0; // increments on an http redirect
  107. var $_trieddigest = false; // have we tried Digest auth yet?
  108. var $_frameurls = array(); // frame src urls
  109. var $_framedepth = 0; // increments on frame depth
  110. var $_isproxy = false; // set if using a proxy server
  111. var $_fp_timeout = 30; // timeout for socket connection
  112. /*======================================================================*\
  113. Function: fetch
  114. Purpose: fetch the contents of a web page
  115. (and possibly other protocols in the
  116. future like ftp, nntp, gopher, etc.)
  117. Input: $URI the location of the page to fetch
  118. Output: $this->results the output text from the fetch
  119. \*======================================================================*/
  120. function fetch($URI)
  121. {
  122. //preg_match("|^([^:]+)://([^:/]+)(:[\d]+)*(.*)|",$URI,$URI_PARTS);
  123. $URI_PARTS = parse_url($URI);
  124. if (!empty($URI_PARTS["user"]))
  125. $this->user = $URI_PARTS["user"];
  126. if (!empty($URI_PARTS["pass"]))
  127. $this->pass = $URI_PARTS["pass"];
  128. $this->_scheme = $URI_PARTS["scheme"];
  129. switch($URI_PARTS["scheme"])
  130. {
  131. case "http":
  132. case "https":
  133. break;
  134. default:
  135. // not a valid protocol
  136. $this->error = 'Invalid protocol "'.$URI_PARTS["scheme"].'"\n';
  137. return false;
  138. }
  139. if($URI_PARTS["scheme"] == "https")
  140. {
  141. // SJM - if they really want curl, or it isn't PHP 4.3 yet, or openssl extension isn't loaded
  142. if($use_curl || !function_exists('file_get_contents') || !extension_loaded('openssl'))
  143. {
  144. if(!$this->curl_path || (!is_executable($this->curl_path))) {
  145. $this->error = "Bad curl ($this->curl_path), can't fetch HTTPS \n";
  146. return false;
  147. }
  148. $this->host = $URI_PARTS["host"];
  149. if(!empty($URI_PARTS["port"]))
  150. $this->port = $URI_PARTS["port"];
  151. if($this->_isproxy)
  152. {
  153. // using proxy, send entire URI
  154. $this->_curlrequest($URI,$URI,$this->_httpmethod);
  155. }
  156. else
  157. {
  158. $path = $URI_PARTS["path"].($URI_PARTS["query"] ? "?".$URI_PARTS["query"] : "");
  159. // no proxy, send only the path
  160. $this->_curlrequest($path, $URI, $this->_httpmethod);
  161. }
  162. if($this->_redirectaddr)
  163. {
  164. /* url was redirected, check if we've hit the max depth */
  165. if($this->maxredirs > $this->_redirectdepth)
  166. {
  167. // only follow redirect if it's on this site, or offsiteok is true
  168. if(preg_match("|^http://".preg_quote($this->host)."|i",$this->_redirectaddr) || $this->offsiteok)
  169. {
  170. /* follow the redirect */
  171. $this->_redirectdepth++;
  172. $this->lastredirectaddr=$this->_redirectaddr;
  173. $this->fetch($this->_redirectaddr);
  174. }
  175. }
  176. }
  177. if($this->_framedepth < $this->maxframes && count($this->_frameurls) > 0)
  178. {
  179. $frameurls = $this->_frameurls;
  180. $this->_frameurls = array();
  181. while(list(,$frameurl) = each($frameurls))
  182. {
  183. if($this->_framedepth < $this->maxframes)
  184. {
  185. $this->fetch($frameurl);
  186. $this->_framedepth++;
  187. }
  188. else
  189. break;
  190. }
  191. }
  192. return true;
  193. }
  194. }
  195. // SJM - else drop through and treat https as http
  196. $this->host = $URI_PARTS["host"];
  197. if(!empty($URI_PARTS["port"]))
  198. $this->port = $URI_PARTS["port"];
  199. // SJM - if it's https, default the port to 443
  200. if($URI_PARTS["scheme"] == "https")
  201. {
  202. if(empty($URI_PARTS["port"]))
  203. {
  204. $this->port = 443;
  205. }
  206. }
  207. if($this->_connect($fp))
  208. {
  209. if($this->_isproxy)
  210. {
  211. // using proxy, send entire URI
  212. $this->_httprequest($URI,$fp,$URI,$this->_httpmethod);
  213. }
  214. else
  215. {
  216. $path = $URI_PARTS["path"].(isset($URI_PARTS["query"]) ? "?".$URI_PARTS["query"] : "");
  217. // no proxy, send only the path
  218. $this->_httprequest($path, $fp, $URI, $this->_httpmethod);
  219. }
  220. $this->_disconnect($fp);
  221. if($this->_redirectaddr)
  222. {
  223. /* url was redirected, check if we've hit the max depth */
  224. if($this->maxredirs > $this->_redirectdepth)
  225. {
  226. // only follow redirect if it's on this site, or offsiteok is true
  227. if(preg_match("|^http://".preg_quote($this->host)."|i",$this->_redirectaddr) || $this->offsiteok)
  228. {
  229. /* follow the redirect */
  230. $this->_redirectdepth++;
  231. $this->lastredirectaddr=$this->_redirectaddr;
  232. $this->fetch($this->_redirectaddr);
  233. }
  234. }
  235. }
  236. if($this->_framedepth < $this->maxframes && count($this->_frameurls) > 0)
  237. {
  238. $frameurls = $this->_frameurls;
  239. $this->_frameurls = array();
  240. while(list(,$frameurl) = each($frameurls))
  241. {
  242. if($this->_framedepth < $this->maxframes)
  243. {
  244. $this->fetch($frameurl);
  245. $this->_framedepth++;
  246. }
  247. else
  248. break;
  249. }
  250. }
  251. }
  252. else
  253. {
  254. return false;
  255. }
  256. return true;
  257. }
  258. /*======================================================================*\
  259. Private functions
  260. \*======================================================================*/
  261. /*======================================================================*\
  262. Function: _striplinks
  263. Purpose: strip the hyperlinks from an html document
  264. Input: $document document to strip.
  265. Output: $match an array of the links
  266. \*======================================================================*/
  267. function _striplinks($document)
  268. {
  269. preg_match_all("'<\s*a\s+.*href\s*=\s* # find <a href=
  270. ([\"\'])? # find single or double quote
  271. (?(1) (.*?)\\1 | ([^\s\>]+)) # if quote found, match up to next matching
  272. # quote, otherwise match up to next space
  273. 'isx",$document,$links);
  274. // catenate the non-empty matches from the conditional subpattern
  275. while(list($key,$val) = each($links[2]))
  276. {
  277. if(!empty($val))
  278. $match[] = $val;
  279. }
  280. while(list($key,$val) = each($links[3]))
  281. {
  282. if(!empty($val))
  283. $match[] = $val;
  284. }
  285. // return the links
  286. return $match;
  287. }
  288. /*======================================================================*\
  289. Function: _stripform
  290. Purpose: strip the form elements from an html document
  291. Input: $document document to strip.
  292. Output: $match an array of the links
  293. \*======================================================================*/
  294. function _stripform($document)
  295. {
  296. preg_match_all("'<\/?(FORM|INPUT|SELECT|TEXTAREA|(OPTION))[^<>]*>(?(2)(.*(?=<\/?(option|select)[^<>]*>[\r\n]*)|(?=[\r\n]*))|(?=[\r\n]*))'Usi",$document,$elements);
  297. // catenate the matches
  298. $match = implode("\r\n",$elements[0]);
  299. // return the links
  300. return $match;
  301. }
  302. /*======================================================================*\
  303. Function: _striptext
  304. Purpose: strip the text from an html document
  305. Input: $document document to strip.
  306. Output: $text the resulting text
  307. \*======================================================================*/
  308. function _striptext($document)
  309. {
  310. // I didn't use preg eval (//e) since that is only available in PHP 4.0.
  311. // so, list your entities one by one here. I included some of the
  312. // more common ones.
  313. $search = array("'<script[^>]*?>.*?</script>'si", // strip out javascript
  314. "'<[\/\!]*?[^<>]*?>'si", // strip out html tags
  315. "'([\r\n])[\s]+'", // strip out white space
  316. "'&(quote|#34);'i", // replace html entities
  317. "'&(amp|#38);'i",
  318. "'&(lt|#60);'i",
  319. "'&(gt|#62);'i",
  320. "'&(nbsp|#160);'i",
  321. "'&(iexcl|#161);'i",
  322. "'&(cent|#162);'i",
  323. "'&(pound|#163);'i",
  324. "'&(copy|#169);'i"
  325. );
  326. $replace = array( "",
  327. "",
  328. "\\1",
  329. "\"",
  330. "&",
  331. "<",
  332. ">",
  333. " ",
  334. chr(161),
  335. chr(162),
  336. chr(163),
  337. chr(169));
  338. $text = preg_replace($search,$replace,$document);
  339. return $text;
  340. }
  341. /*======================================================================*\
  342. Function: _expandlinks
  343. Purpose: expand each link into a fully qualified URL
  344. Input: $links the links to qualify
  345. $URI the full URI to get the base from
  346. Output: $expandedLinks the expanded links
  347. \*======================================================================*/
  348. function _expandlinks($links,$URI)
  349. {
  350. preg_match("/^[^\?]+/",$URI,$match);
  351. $match = preg_replace("|/[^\/\.]+\.[^\/\.]+$|","",$match[0]);
  352. $search = array( "|^http://".preg_quote($this->host)."|i",
  353. "|^(?!http://)(\/)?(?!mailto:)|i",
  354. "|/\./|",
  355. "|/[^\/]+/\.\./|"
  356. );
  357. $replace = array( "",
  358. $match."/",
  359. "/",
  360. "/"
  361. );
  362. $expandedLinks = preg_replace($search,$replace,$links);
  363. return $expandedLinks;
  364. }
  365. /*======================================================================*\
  366. Function: _httprequest
  367. Purpose: go get the http data from the server
  368. Input: $url the url to fetch
  369. $fp the current open file pointer
  370. $URI the full URI
  371. $body body contents to send if any (POST)
  372. Output:
  373. \*======================================================================*/
  374. function _httprequest($url,$fp,$URI,$http_method,$content_type="",$body="")
  375. {
  376. if($this->passcookies && $this->_redirectaddr)
  377. $this->setcookies();
  378. $URI_PARTS = parse_url($URI);
  379. if(empty($url))
  380. $url = "/";
  381. $headers = $http_method." ".$url." ".$this->_httpversion."\r\n";
  382. if(!empty($this->agent))
  383. $headers .= "User-Agent: ".$this->agent."\r\n";
  384. if(!empty($this->host) && !isset($this->rawheaders['Host']))
  385. $headers .= "Host: ".$this->host."\r\n";
  386. if(!empty($this->accept))
  387. $headers .= "Accept: ".$this->accept."\r\n";
  388. if($this->use_gzip) {
  389. // make sure PHP was built with --with-zlib
  390. // and we can handle gzipp'ed data
  391. if ( function_exists(gzinflate) ) {
  392. $headers .= "Accept-encoding: gzip\r\n";
  393. }
  394. else {
  395. trigger_error(
  396. "use_gzip is on, but PHP was built without zlib support.".
  397. " Requesting file(s) without gzip encoding.",
  398. E_USER_NOTICE);
  399. }
  400. }
  401. if(!empty($this->referer))
  402. $headers .= "Referer: ".$this->referer."\r\n";
  403. if(!empty($this->cookies))
  404. {
  405. if(!is_array($this->cookies))
  406. $this->cookies = (array)$this->cookies;
  407. reset($this->cookies);
  408. if ( count($this->cookies) > 0 ) {
  409. $cookie_headers .= 'Cookie: ';
  410. foreach ( $this->cookies as $cookieKey => $cookieVal ) {
  411. $cookie_headers .= $cookieKey."=".urlencode($cookieVal)."; ";
  412. }
  413. $headers .= substr($cookie_headers,0,-2) . "\r\n";
  414. }
  415. }
  416. if(!empty($this->rawheaders))
  417. {
  418. if(!is_array($this->rawheaders))
  419. $this->rawheaders = (array)$this->rawheaders;
  420. while(list($headerKey,$headerVal) = each($this->rawheaders))
  421. $headers .= $headerKey.": ".$headerVal."\r\n";
  422. }
  423. if(!empty($content_type)) {
  424. $headers .= "Content-type: $content_type";
  425. if ($content_type == "multipart/form-data")
  426. $headers .= "; boundary=".$this->_mime_boundary;
  427. $headers .= "\r\n";
  428. }
  429. if(!empty($body))
  430. $headers .= "Content-length: ".strlen($body)."\r\n";
  431. if(!empty($this->user) || !empty($this->pass))
  432. $headers .= "Authorization: BASIC ".base64_encode($this->user.":".$this->pass)."\r\n";
  433. $headers .= "\r\n";
  434. // set the read timeout if needed
  435. if ($this->read_timeout > 0)
  436. socket_set_timeout($fp, $this->read_timeout);
  437. $this->timed_out = false;
  438. fwrite($fp,$headers.$body,strlen($headers.$body));
  439. $this->_redirectaddr = false;
  440. unset($this->headers);
  441. // content was returned gzip encoded?
  442. $is_gzipped = false;
  443. while($currentHeader = fgets($fp,$this->_maxlinelen))
  444. {
  445. if ($this->read_timeout > 0 && $this->_check_timeout($fp))
  446. {
  447. $this->status=-100;
  448. return false;
  449. }
  450. // if($currentHeader == "\r\n")
  451. if(preg_match("/^\r?\n$/", $currentHeader) )
  452. break;
  453. if(!$this->_tried_digest && preg_match("/^WWW-Authenticate: Digest (.*)/", $currentHeader, $matches))
  454. {
  455. // SJM - we got a Digest challenge. Try to respond...
  456. $digestheader = $matches[1];
  457. preg_match("/nonce=\"(.*?)\"/", $digestheader, $matches);
  458. $nonce = $matches[1];
  459. preg_match("/realm=\"(.*?)\"/", $digestheader, $matches);
  460. $realm = $matches[1];
  461. $cnonce = md5(microtime());
  462. $a1 = $this->user . ":" . $realm . ":" . $this->pass;
  463. $a2 = $http_method . ":" . $url;
  464. $ha1 = md5($a1);
  465. $ha2 = md5($a2);
  466. $response = md5($ha1 . ":" . $nonce . ":00000001:" . $cnonce . ":auth:" . $ha2);
  467. $auth = 'Digest username="' . $this->user . '", ';
  468. $auth .= 'realm="' . $realm . '", ';
  469. $auth .= 'nonce="' . $nonce . '", ';
  470. $auth .= 'uri="' . $url . '", ';
  471. $auth .= 'response="' . $response . '", ';
  472. $auth .= 'algorithm="MD5", ';
  473. $auth .= 'cnonce="' . $cnonce . '", ';
  474. $auth .= 'nc=00000001, ';
  475. $auth .= 'qop="auth"';
  476. // SJM - treat Digest challenge as a redirect. set flag so we don't keep retrying.
  477. $this->_tried_digest = true;
  478. $this->rawheaders["Authorization"]=$auth;
  479. $this->user = "";
  480. $this->pass = "";
  481. $this->_redirectaddr = $URI_PARTS['scheme'] . '://' . $this->host . $url;
  482. }
  483. // if a header begins with Location: or URI:, set the redirect
  484. if(preg_match("/^(Location:|URI:)/i",$currentHeader))
  485. {
  486. // get URL portion of the redirect
  487. preg_match("/^(Location:|URI:)\s+(.*)/",chop($currentHeader),$matches);
  488. // look for :// in the Location header to see if hostname is included
  489. if(!preg_match("|\:\/\/|",$matches[2]))
  490. {
  491. // no host in the path, so prepend
  492. $this->_redirectaddr = $URI_PARTS["scheme"]."://".$this->host.":".$this->port;
  493. // eliminate double slash
  494. if(!preg_match("|^/|",$matches[2]))
  495. $this->_redirectaddr .= "/".$matches[2];
  496. else
  497. $this->_redirectaddr .= $matches[2];
  498. }
  499. else
  500. $this->_redirectaddr = $matches[2];
  501. }
  502. if(preg_match("|^HTTP/|",$currentHeader))
  503. {
  504. if(preg_match("|^HTTP/[^\s]*\s(.*?)\s|",$currentHeader, $status))
  505. {
  506. $this->status= $status[1];
  507. }
  508. $this->response_code = $currentHeader;
  509. }
  510. if (preg_match("/Content-Encoding: gzip/", $currentHeader) ) {
  511. $is_gzipped = true;
  512. }
  513. $this->headers[] = $currentHeader;
  514. }
  515. # $results = fread($fp, $this->maxlength);
  516. $results = "";
  517. while ( $data = fread($fp, $this->maxlength) ) {
  518. $results .= $data;
  519. if (
  520. strlen($results) > $this->maxlength ) {
  521. break;
  522. }
  523. }
  524. // gunzip
  525. if ( $is_gzipped ) {
  526. // per http://www.php.net/manual/en/function.gzencode.php
  527. $results = substr($results, 10);
  528. $results = gzinflate($results);
  529. }
  530. if ($this->read_timeout > 0 && $this->_check_timeout($fp))
  531. {
  532. $this->status=-100;
  533. return false;
  534. }
  535. // check if there is a a redirect meta tag
  536. if(preg_match("'<meta[\s]*http-equiv[^>]*?content[\s]*=[\s]*[\"\']?\d+;[\s]+URL[\s]*=[\s]*([^\"\']*?)[\"\']?>'i",$results,$match))
  537. {
  538. $this->_redirectaddr = $this->_expandlinks($match[1],$URI);
  539. }
  540. // have we hit our frame depth and is there frame src to fetch?
  541. if(($this->_framedepth < $this->maxframes) && preg_match_all("'<frame\s+.*src[\s]*=[\'\"]?([^\'\"\>]+)'i",$results,$match))
  542. {
  543. $this->results[] = $results;
  544. for($x=0; $x<count($match[1]); $x++)
  545. $this->_frameurls[] = $this->_expandlinks($match[1][$x],$URI_PARTS["scheme"]."://".$this->host);
  546. }
  547. // have we already fetched framed content?
  548. elseif(is_array($this->results))
  549. $this->results[] = $results;
  550. // no framed content
  551. else
  552. $this->results = $results;
  553. return true;
  554. }
  555. /*======================================================================*\
  556. Function: _curlrequest
  557. Purpose: go get the https data from the server using curl
  558. Input: $url the url to fetch
  559. $URI the full URI
  560. $body body contents to send if any (POST)
  561. Output:
  562. \*======================================================================*/
  563. function _curlrequest($url,$URI,$http_method,$content_type="",$body="")
  564. {
  565. if($this->passcookies && $this->_redirectaddr)
  566. $this->setcookies();
  567. $headers = array();
  568. $URI_PARTS = parse_url($URI);
  569. if(empty($url))
  570. $url = "/";
  571. // GET ... header not needed for curl
  572. //$headers[] = $http_method." ".$url." ".$this->_httpversion;
  573. if(!empty($this->agent))
  574. $headers[] = "User-Agent: ".$this->agent;
  575. if(!empty($this->host))
  576. $headers[] = "Host: ".$this->host;
  577. if(!empty($this->accept))
  578. $headers[] = "Accept: ".$this->accept;
  579. if(!empty($this->referer))
  580. $headers[] = "Referer: ".$this->referer;
  581. if(!empty($this->cookies))
  582. {
  583. if(!is_array($this->cookies))
  584. $this->cookies = (array)$this->cookies;
  585. reset($this->cookies);
  586. if ( count($this->cookies) > 0 ) {
  587. $cookie_str = 'Cookie: ';
  588. foreach ( $this->cookies as $cookieKey => $cookieVal ) {
  589. $cookie_str .= $cookieKey."=".urlencode($cookieVal)."; ";
  590. }
  591. $headers[] = substr($cookie_str,0,-2);
  592. }
  593. }
  594. if(!empty($this->rawheaders))
  595. {
  596. if(!is_array($this->rawheaders))
  597. $this->rawheaders = (array)$this->rawheaders;
  598. while(list($headerKey,$headerVal) = each($this->rawheaders))
  599. $headers[] = $headerKey.": ".$headerVal;
  600. }
  601. if(!empty($content_type)) {
  602. if ($content_type == "multipart/form-data")
  603. $headers[] = "Content-type: $content_type; boundary=".$this->_mime_boundary;
  604. else
  605. $headers[] = "Content-type: $content_type";
  606. }
  607. if(!empty($body))
  608. $headers[] = "Content-length: ".strlen($body);
  609. if(!empty($this->user) || !empty($this->pass))
  610. $headers[] = "Authorization: BASIC ".base64_encode($this->user.":".$this->pass);
  611. for($curr_header = 0; $curr_header < count($headers); $curr_header++)
  612. $cmdline_params .= " -H \"".$headers[$curr_header]."\"";
  613. if(!empty($body))
  614. $cmdline_params .= " -d \"$body\"";
  615. if($this->read_timeout > 0)
  616. $cmdline_params .= " -m ".$this->read_timeout;
  617. $headerfile = uniqid(time());
  618. # accept self-signed certs
  619. $cmdline_params .= " -k";
  620. exec($this->curl_path." -D \"/tmp/$headerfile\"".$cmdline_params." ".$URI,$results,$return);
  621. if($return)
  622. {
  623. $this->error = "Error: cURL could not retrieve the document, error $return.";
  624. return false;
  625. }
  626. $results = implode("\r\n",$results);
  627. $result_headers = file("/tmp/$headerfile");
  628. $this->_redirectaddr = false;
  629. unset($this->headers);
  630. for($currentHeader = 0; $currentHeader < count($result_headers); $currentHeader++)
  631. {
  632. // if a header begins with Location: or URI:, set the redirect
  633. if(preg_match("/^(Location: |URI: )/i",$result_headers[$currentHeader]))
  634. {
  635. // get URL portion of the redirect
  636. preg_match("/^(Location: |URI:)(.*)/",chop($result_headers[$currentHeader]),$matches);
  637. // look for :// in the Location header to see if hostname is included
  638. if(!preg_match("|\:\/\/|",$matches[2]))
  639. {
  640. // no host in the path, so prepend
  641. $this->_redirectaddr = $URI_PARTS["scheme"]."://".$this->host.":".$this->port;
  642. // eliminate double slash
  643. if(!preg_match("|^/|",$matches[2]))
  644. $this->_redirectaddr .= "/".$matches[2];
  645. else
  646. $this->_redirectaddr .= $matches[2];
  647. }
  648. else
  649. $this->_redirectaddr = $matches[2];
  650. }
  651. if(preg_match("|^HTTP/|",$result_headers[$currentHeader]))
  652. {
  653. $this->response_code = $result_headers[$currentHeader];
  654. if(preg_match("|^HTTP/[^\s]*\s(.*?)\s|",$this->response_code, $match))
  655. {
  656. $this->status= $match[1];
  657. }
  658. }
  659. $this->headers[] = $result_headers[$currentHeader];
  660. }
  661. // check if there is a a redirect meta tag
  662. if(preg_match("'<meta[\s]*http-equiv[^>]*?content[\s]*=[\s]*[\"\']?\d+;[\s]+URL[\s]*=[\s]*([^\"\']*?)[\"\']?>'i",$results,$match))
  663. {
  664. $this->_redirectaddr = $this->_expandlinks($match[1],$URI);
  665. }
  666. // have we hit our frame depth and is there frame src to fetch?
  667. if(($this->_framedepth < $this->maxframes) && preg_match_all("'<frame\s+.*src[\s]*=[\'\"]?([^\'\"\>]+)'i",$results,$match))
  668. {
  669. $this->results[] = $results;
  670. for($x=0; $x<count($match[1]); $x++)
  671. $this->_frameurls[] = $this->_expandlinks($match[1][$x],$URI_PARTS["scheme"]."://".$this->host);
  672. }
  673. // have we already fetched framed content?
  674. elseif(is_array($this->results))
  675. $this->results[] = $results;
  676. // no framed content
  677. else
  678. $this->results = $results;
  679. unlink("/tmp/$headerfile");
  680. return true;
  681. }
  682. /*======================================================================*\
  683. Function: setcookies()
  684. Purpose: set cookies for a redirection
  685. \*======================================================================*/
  686. function setcookies()
  687. {
  688. for($x=0; $x<count($this->headers); $x++)
  689. {
  690. if(preg_match("/^set-cookie:[\s]+([^=]+)=([^;]+)/i", $this->headers[$x],$match))
  691. $this->cookies[$match[1]] = $match[2];
  692. }
  693. }
  694. /*======================================================================*\
  695. Function: _check_timeout
  696. Purpose: checks whether timeout has occurred
  697. Input: $fp file pointer
  698. \*======================================================================*/
  699. function _check_timeout($fp)
  700. {
  701. if ($this->read_timeout > 0) {
  702. $fp_status = socket_get_status($fp);
  703. if ($fp_status["timed_out"]) {
  704. $this->timed_out = true;
  705. return true;
  706. }
  707. }
  708. return false;
  709. }
  710. /*======================================================================*\
  711. Function: _connect
  712. Purpose: make a socket connection
  713. Input: $fp file pointer
  714. \*======================================================================*/
  715. function _connect(&$fp)
  716. {
  717. if(!empty($this->proxy_host) && !empty($this->proxy_port))
  718. {
  719. $this->_isproxy = true;
  720. $host = $this->proxy_host;
  721. $port = $this->proxy_port;
  722. }
  723. else
  724. {
  725. $host = $this->host;
  726. $port = $this->port;
  727. }
  728. $this->status = 0;
  729. if($this->_scheme == "https")
  730. {
  731. $host = "ssl://" . $host;
  732. }
  733. if($fp = fsockopen(
  734. $host,
  735. $port,
  736. $errno,
  737. $errstr,
  738. $this->_fp_timeout
  739. ))
  740. {
  741. // socket connection succeeded
  742. return true;
  743. }
  744. else
  745. {
  746. // socket connection failed
  747. $this->status = $errno;
  748. switch($errno)
  749. {
  750. case -3:
  751. $this->error="socket creation failed (-3)";
  752. case -4:
  753. $this->error="dns lookup failure (-4)";
  754. case -5:
  755. $this->error="connection refused or timed out (-5)";
  756. default:
  757. $this->error="connection failed (".$errno.")";
  758. }
  759. return false;
  760. }
  761. }
  762. /*======================================================================*\
  763. Function: _disconnect
  764. Purpose: disconnect a socket connection
  765. Input: $fp file pointer
  766. \*======================================================================*/
  767. function _disconnect($fp)
  768. {
  769. return(fclose($fp));
  770. }
  771. /*======================================================================*\
  772. Function: _prepare_post_body
  773. Purpose: Prepare post body according to encoding type
  774. Input: $formvars - form variables
  775. $formfiles - form upload files
  776. Output: post body
  777. \*======================================================================*/
  778. function _prepare_post_body($formvars, $formfiles)
  779. {
  780. settype($formvars, "array");
  781. settype($formfiles, "array");
  782. if (count($formvars) == 0 && count($formfiles) == 0)
  783. return;
  784. switch ($this->_submit_type) {
  785. case "application/x-www-form-urlencoded":
  786. reset($formvars);
  787. while(list($key,$val) = each($formvars)) {
  788. if (is_array($val) || is_object($val)) {
  789. while (list($cur_key, $cur_val) = each($val)) {
  790. $postdata .= urlencode($key)."[]=".urlencode($cur_val)."&";
  791. }
  792. } else
  793. $postdata .= urlencode($key)."=".urlencode($val)."&";
  794. }
  795. break;
  796. case "multipart/form-data":
  797. $this->_mime_boundary = "Snoopy".md5(uniqid(microtime()));
  798. reset($formvars);
  799. while(list($key,$val) = each($formvars)) {
  800. if (is_array($val) || is_object($val)) {
  801. while (list($cur_key, $cur_val) = each($val)) {
  802. $postdata .= "--".$this->_mime_boundary."\r\n";
  803. $postdata .= "Content-Disposition: form-data; name=\"$key\[\]\"\r\n\r\n";
  804. $postdata .= "$cur_val\r\n";
  805. }
  806. } else {
  807. $postdata .= "--".$this->_mime_boundary."\r\n";
  808. $postdata .= "Content-Disposition: form-data; name=\"$key\"\r\n\r\n";
  809. $postdata .= "$val\r\n";
  810. }
  811. }
  812. reset($formfiles);
  813. while (list($field_name, $file_names) = each($formfiles)) {
  814. settype($file_names, "array");
  815. while (list(, $file_name) = each($file_names)) {
  816. if (!is_readable($file_name)) continue;
  817. $fp = fopen($file_name, "r");
  818. $file_content = fread($fp, filesize($file_name));
  819. fclose($fp);
  820. $base_name = basename($file_name);
  821. $postdata .= "--".$this->_mime_boundary."\r\n";
  822. $postdata .= "Content-Disposition: form-data; name=\"$field_name\"; filename=\"$base_name\"\r\n\r\n";
  823. $postdata .= "$file_content\r\n";
  824. }
  825. }
  826. $postdata .= "--".$this->_mime_boundary."--\r\n";
  827. break;
  828. }
  829. return $postdata;
  830. }
  831. }
  832. ?>