PageRenderTime 72ms CodeModel.GetById 38ms RepoModel.GetById 0ms app.codeStats 1ms

/snoopy.class.php

https://github.com/zhangv/wechat-php-sdk
PHP | 1275 lines | 1021 code | 78 blank | 176 comment | 79 complexity | 5a03b3587eae2184311912f03a0d5cd7 MD5 | raw file
  1. <?php
  2. /*************************************************
  3. Snoopy - the PHP net client
  4. Author: Monte Ohrt <monte@ispi.net>
  5. Copyright (c): 1999-2008 New Digital Group, all rights reserved
  6. Version: 1.2.4
  7. * This library is free software; you can redistribute it and/or
  8. * modify it under the terms of the GNU Lesser General Public
  9. * License as published by the Free Software Foundation; either
  10. * version 2.1 of the License, or (at your option) any later version.
  11. *
  12. * This library is distributed in the hope that it will be useful,
  13. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  15. * Lesser General Public License for more details.
  16. *
  17. * You should have received a copy of the GNU Lesser General Public
  18. * License along with this library; if not, write to the Free Software
  19. * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
  20. You may contact the author of Snoopy by e-mail at:
  21. monte@ohrt.com
  22. The latest version of Snoopy can be obtained from:
  23. http://snoopy.sourceforge.net/
  24. *************************************************/
  25. class Snoopy
  26. {
  27. /**** Public variables ****/
  28. /* user definable vars */
  29. var $host = "www.php.net"; // host name we are connecting to
  30. var $port = 80; // port we are connecting to
  31. var $proxy_host = ""; // proxy host to use
  32. var $proxy_port = ""; // proxy port to use
  33. var $proxy_user = ""; // proxy user to use
  34. var $proxy_pass = ""; // proxy password to use
  35. var $agent = "Mozilla/5.0"; // agent we masquerade as
  36. var $referer = ""; // referer info to pass
  37. var $cookies = array(); // array of cookies to pass
  38. // $cookies["username"]="joe";
  39. var $rawheaders = array(); // array of raw headers to send
  40. // $rawheaders["Content-type"]="text/html";
  41. var $maxredirs = 5; // http redirection depth maximum. 0 = disallow
  42. var $lastredirectaddr = ""; // contains address of last redirected address
  43. var $offsiteok = true; // allows redirection off-site
  44. var $maxframes = 0; // frame content depth maximum. 0 = disallow
  45. var $expandlinks = true; // expand links to fully qualified URLs.
  46. // this only applies to fetchlinks()
  47. // submitlinks(), and submittext()
  48. var $passcookies = true; // pass set cookies back through redirects
  49. // NOTE: this currently does not respect
  50. // dates, domains or paths.
  51. var $user = ""; // user for http authentication
  52. var $pass = ""; // password for http authentication
  53. // http accept types
  54. var $accept = "application/json, text/javascript, */*; q=0.01";
  55. var $results = ""; // where the content is put
  56. var $error = ""; // error messages sent here
  57. var $response_code = ""; // response code returned from server
  58. var $headers = array(); // headers returned from server sent here
  59. var $maxlength = 500000; // max return data length (body)
  60. var $read_timeout = 0; // timeout on read operations, in seconds
  61. // supported only since PHP 4 Beta 4
  62. // set to 0 to disallow timeouts
  63. var $timed_out = false; // if a read operation timed out
  64. var $status = 0; // http request status
  65. var $temp_dir = "/tmp"; // temporary directory that the webserver
  66. // has permission to write to.
  67. // under Windows, this should be C:\temp
  68. var $curl_path = "/usr/local/bin/curl";
  69. // Snoopy will use cURL for fetching
  70. // SSL content if a full system path to
  71. // the cURL binary is supplied here.
  72. // set to false if you do not have
  73. // cURL installed. See http://curl.haxx.se
  74. // for details on installing cURL.
  75. // Snoopy does *not* use the cURL
  76. // library functions built into php,
  77. // as these functions are not stable
  78. // as of this Snoopy release.
  79. /**** Private variables ****/
  80. var $_maxlinelen = 4096; // max line length (headers)
  81. var $_httpmethod = "GET"; // default http request method
  82. var $_httpversion = "HTTP/1.0"; // default http request version
  83. var $_submit_method = "POST"; // default submit method
  84. var $_submit_type = "application/x-www-form-urlencoded"; // default submit type
  85. var $_mime_boundary = ""; // MIME boundary for multipart/form-data submit type
  86. var $_redirectaddr = false; // will be set if page fetched is a redirect
  87. var $_redirectdepth = 0; // increments on an http redirect
  88. var $_frameurls = array(); // frame src urls
  89. var $_framedepth = 0; // increments on frame depth
  90. var $_isproxy = false; // set if using a proxy server
  91. var $_fp_timeout = 30; // timeout for socket connection
  92. /*======================================================================*\
  93. Function: fetch
  94. Purpose: fetch the contents of a web page
  95. (and possibly other protocols in the
  96. future like ftp, nntp, gopher, etc.)
  97. Input: $URI the location of the page to fetch
  98. Output: $this->results the output text from the fetch
  99. \*======================================================================*/
  100. function fetch($URI)
  101. {
  102. //preg_match("|^([^:]+)://([^:/]+)(:[\d]+)*(.*)|",$URI,$URI_PARTS);
  103. $URI_PARTS = parse_url($URI);
  104. if (!empty($URI_PARTS["user"]))
  105. $this->user = $URI_PARTS["user"];
  106. if (!empty($URI_PARTS["pass"]))
  107. $this->pass = $URI_PARTS["pass"];
  108. if (empty($URI_PARTS["query"]))
  109. $URI_PARTS["query"] = '';
  110. if (empty($URI_PARTS["path"]))
  111. $URI_PARTS["path"] = '';
  112. switch(strtolower($URI_PARTS["scheme"]))
  113. {
  114. case "http":
  115. $this->host = $URI_PARTS["host"];
  116. if(!empty($URI_PARTS["port"]))
  117. $this->port = $URI_PARTS["port"];
  118. if($this->_connect($fp))
  119. {
  120. if($this->_isproxy)
  121. {
  122. // using proxy, send entire URI
  123. $this->_httprequest($URI,$fp,$URI,$this->_httpmethod);
  124. }
  125. else
  126. {
  127. $path = $URI_PARTS["path"].($URI_PARTS["query"] ? "?".$URI_PARTS["query"] : "");
  128. // no proxy, send only the path
  129. $this->_httprequest($path, $fp, $URI, $this->_httpmethod);
  130. }
  131. $this->_disconnect($fp);
  132. if($this->_redirectaddr)
  133. {
  134. /* url was redirected, check if we've hit the max depth */
  135. if($this->maxredirs > $this->_redirectdepth)
  136. {
  137. // only follow redirect if it's on this site, or offsiteok is true
  138. if(preg_match("|^http://".preg_quote($this->host)."|i",$this->_redirectaddr) || $this->offsiteok)
  139. {
  140. /* follow the redirect */
  141. $this->_redirectdepth++;
  142. $this->lastredirectaddr=$this->_redirectaddr;
  143. $this->fetch($this->_redirectaddr);
  144. }
  145. }
  146. }
  147. if($this->_framedepth < $this->maxframes && count($this->_frameurls) > 0)
  148. {
  149. $frameurls = $this->_frameurls;
  150. $this->_frameurls = array();
  151. while(list(,$frameurl) = each($frameurls))
  152. {
  153. if($this->_framedepth < $this->maxframes)
  154. {
  155. $this->fetch($frameurl);
  156. $this->_framedepth++;
  157. }
  158. else
  159. break;
  160. }
  161. }
  162. }
  163. else
  164. {
  165. return false;
  166. }
  167. return true;
  168. break;
  169. case "https":
  170. if (!function_exists('curl_init')) {
  171. if(!$this->curl_path)
  172. return false;
  173. if(function_exists("is_executable"))
  174. if (!is_executable($this->curl_path))
  175. return false;
  176. }
  177. $this->host = $URI_PARTS["host"];
  178. if(!empty($URI_PARTS["port"]))
  179. $this->port = $URI_PARTS["port"];
  180. if($this->_isproxy)
  181. {
  182. // using proxy, send entire URI
  183. $this->_httpsrequest($URI,$URI,$this->_httpmethod);
  184. }
  185. else
  186. {
  187. $path = $URI_PARTS["path"].($URI_PARTS["query"] ? "?".$URI_PARTS["query"] : "");
  188. // no proxy, send only the path
  189. $this->_httpsrequest($path, $URI, $this->_httpmethod);
  190. }
  191. if($this->_redirectaddr)
  192. {
  193. /* url was redirected, check if we've hit the max depth */
  194. if($this->maxredirs > $this->_redirectdepth)
  195. {
  196. // only follow redirect if it's on this site, or offsiteok is true
  197. if(preg_match("|^https://".preg_quote($this->host)."|i",$this->_redirectaddr) || $this->offsiteok)
  198. {
  199. /* follow the redirect */
  200. $this->_redirectdepth++;
  201. $this->lastredirectaddr=$this->_redirectaddr;
  202. $this->fetch($this->_redirectaddr);
  203. }
  204. }
  205. }
  206. if($this->_framedepth < $this->maxframes && count($this->_frameurls) > 0)
  207. {
  208. $frameurls = $this->_frameurls;
  209. $this->_frameurls = array();
  210. while(list(,$frameurl) = each($frameurls))
  211. {
  212. if($this->_framedepth < $this->maxframes)
  213. {
  214. $this->fetch($frameurl);
  215. $this->_framedepth++;
  216. }
  217. else
  218. break;
  219. }
  220. }
  221. return true;
  222. break;
  223. default:
  224. // not a valid protocol
  225. $this->error = 'Invalid protocol "'.$URI_PARTS["scheme"].'"\n';
  226. return false;
  227. break;
  228. }
  229. return true;
  230. }
  231. /*======================================================================*\
  232. Function: submit
  233. Purpose: submit an http form
  234. Input: $URI the location to post the data
  235. $formvars the formvars to use.
  236. format: $formvars["var"] = "val";
  237. $formfiles an array of files to submit
  238. format: $formfiles["var"] = "/dir/filename.ext";
  239. Output: $this->results the text output from the post
  240. \*======================================================================*/
  241. function submit($URI, $formvars="", $formfiles="")
  242. {
  243. unset($postdata);
  244. $postdata = $this->_prepare_post_body($formvars, $formfiles);
  245. $URI_PARTS = parse_url($URI);
  246. if (!empty($URI_PARTS["user"]))
  247. $this->user = $URI_PARTS["user"];
  248. if (!empty($URI_PARTS["pass"]))
  249. $this->pass = $URI_PARTS["pass"];
  250. if (empty($URI_PARTS["query"]))
  251. $URI_PARTS["query"] = '';
  252. if (empty($URI_PARTS["path"]))
  253. $URI_PARTS["path"] = '';
  254. switch(strtolower($URI_PARTS["scheme"]))
  255. {
  256. case "http":
  257. $this->host = $URI_PARTS["host"];
  258. if(!empty($URI_PARTS["port"]))
  259. $this->port = $URI_PARTS["port"];
  260. if($this->_connect($fp))
  261. {
  262. if($this->_isproxy)
  263. {
  264. // using proxy, send entire URI
  265. $this->_httprequest($URI,$fp,$URI,$this->_submit_method,$this->_submit_type,$postdata);
  266. }
  267. else
  268. {
  269. $path = $URI_PARTS["path"].($URI_PARTS["query"] ? "?".$URI_PARTS["query"] : "");
  270. // no proxy, send only the path
  271. $this->_httprequest($path, $fp, $URI, $this->_submit_method, $this->_submit_type, $postdata);
  272. }
  273. $this->_disconnect($fp);
  274. if($this->_redirectaddr)
  275. {
  276. /* url was redirected, check if we've hit the max depth */
  277. if($this->maxredirs > $this->_redirectdepth)
  278. {
  279. if(!preg_match("|^".$URI_PARTS["scheme"]."://|", $this->_redirectaddr))
  280. $this->_redirectaddr = $this->_expandlinks($this->_redirectaddr,$URI_PARTS["scheme"]."://".$URI_PARTS["host"]);
  281. // only follow redirect if it's on this site, or offsiteok is true
  282. if(preg_match("|^http://".preg_quote($this->host)."|i",$this->_redirectaddr) || $this->offsiteok)
  283. {
  284. /* follow the redirect */
  285. $this->_redirectdepth++;
  286. $this->lastredirectaddr=$this->_redirectaddr;
  287. if( strpos( $this->_redirectaddr, "?" ) > 0 )
  288. $this->fetch($this->_redirectaddr); // the redirect has changed the request method from post to get
  289. else
  290. $this->submit($this->_redirectaddr,$formvars, $formfiles);
  291. }
  292. }
  293. }
  294. if($this->_framedepth < $this->maxframes && count($this->_frameurls) > 0)
  295. {
  296. $frameurls = $this->_frameurls;
  297. $this->_frameurls = array();
  298. while(list(,$frameurl) = each($frameurls))
  299. {
  300. if($this->_framedepth < $this->maxframes)
  301. {
  302. $this->fetch($frameurl);
  303. $this->_framedepth++;
  304. }
  305. else
  306. break;
  307. }
  308. }
  309. }
  310. else
  311. {
  312. return false;
  313. }
  314. return true;
  315. break;
  316. case "https":
  317. if (!function_exists('curl_init')) {
  318. if(!$this->curl_path)
  319. return false;
  320. if(function_exists("is_executable"))
  321. if (!is_executable($this->curl_path))
  322. return false;
  323. }
  324. $this->host = $URI_PARTS["host"];
  325. if(!empty($URI_PARTS["port"]))
  326. $this->port = $URI_PARTS["port"];
  327. if($this->_isproxy)
  328. {
  329. // using proxy, send entire URI
  330. $this->_httpsrequest($URI, $URI, $this->_submit_method, $this->_submit_type, $postdata);
  331. }
  332. else
  333. {
  334. $path = $URI_PARTS["path"].($URI_PARTS["query"] ? "?".$URI_PARTS["query"] : "");
  335. // no proxy, send only the path
  336. $this->_httpsrequest($path, $URI, $this->_submit_method, $this->_submit_type, $postdata);
  337. }
  338. if($this->_redirectaddr)
  339. {
  340. /* url was redirected, check if we've hit the max depth */
  341. if($this->maxredirs > $this->_redirectdepth)
  342. {
  343. if(!preg_match("|^".$URI_PARTS["scheme"]."://|", $this->_redirectaddr))
  344. $this->_redirectaddr = $this->_expandlinks($this->_redirectaddr,$URI_PARTS["scheme"]."://".$URI_PARTS["host"]);
  345. // only follow redirect if it's on this site, or offsiteok is true
  346. if(preg_match("|^https://".preg_quote($this->host)."|i",$this->_redirectaddr) || $this->offsiteok)
  347. {
  348. /* follow the redirect */
  349. $this->_redirectdepth++;
  350. $this->lastredirectaddr=$this->_redirectaddr;
  351. if( strpos( $this->_redirectaddr, "?" ) > 0 )
  352. $this->fetch($this->_redirectaddr); // the redirect has changed the request method from post to get
  353. else
  354. $this->submit($this->_redirectaddr,$formvars, $formfiles);
  355. }
  356. }
  357. }
  358. if($this->_framedepth < $this->maxframes && count($this->_frameurls) > 0)
  359. {
  360. $frameurls = $this->_frameurls;
  361. $this->_frameurls = array();
  362. while(list(,$frameurl) = each($frameurls))
  363. {
  364. if($this->_framedepth < $this->maxframes)
  365. {
  366. $this->fetch($frameurl);
  367. $this->_framedepth++;
  368. }
  369. else
  370. break;
  371. }
  372. }
  373. return true;
  374. break;
  375. default:
  376. // not a valid protocol
  377. $this->error = 'Invalid protocol "'.$URI_PARTS["scheme"].'"\n';
  378. return false;
  379. break;
  380. }
  381. return true;
  382. }
  383. /*======================================================================*\
  384. Function: fetchlinks
  385. Purpose: fetch the links from a web page
  386. Input: $URI where you are fetching from
  387. Output: $this->results an array of the URLs
  388. \*======================================================================*/
  389. function fetchlinks($URI)
  390. {
  391. if ($this->fetch($URI))
  392. {
  393. if($this->lastredirectaddr)
  394. $URI = $this->lastredirectaddr;
  395. if(is_array($this->results))
  396. {
  397. for($x=0;$x<count($this->results);$x++)
  398. $this->results[$x] = $this->_striplinks($this->results[$x]);
  399. }
  400. else
  401. $this->results = $this->_striplinks($this->results);
  402. if($this->expandlinks)
  403. $this->results = $this->_expandlinks($this->results, $URI);
  404. return true;
  405. }
  406. else
  407. return false;
  408. }
  409. /*======================================================================*\
  410. Function: fetchform
  411. Purpose: fetch the form elements from a web page
  412. Input: $URI where you are fetching from
  413. Output: $this->results the resulting html form
  414. \*======================================================================*/
  415. function fetchform($URI)
  416. {
  417. if ($this->fetch($URI))
  418. {
  419. if(is_array($this->results))
  420. {
  421. for($x=0;$x<count($this->results);$x++)
  422. $this->results[$x] = $this->_stripform($this->results[$x]);
  423. }
  424. else
  425. $this->results = $this->_stripform($this->results);
  426. return true;
  427. }
  428. else
  429. return false;
  430. }
  431. /*======================================================================*\
  432. Function: fetchtext
  433. Purpose: fetch the text from a web page, stripping the links
  434. Input: $URI where you are fetching from
  435. Output: $this->results the text from the web page
  436. \*======================================================================*/
  437. function fetchtext($URI)
  438. {
  439. if($this->fetch($URI))
  440. {
  441. if(is_array($this->results))
  442. {
  443. for($x=0;$x<count($this->results);$x++)
  444. $this->results[$x] = $this->_striptext($this->results[$x]);
  445. }
  446. else
  447. $this->results = $this->_striptext($this->results);
  448. return true;
  449. }
  450. else
  451. return false;
  452. }
  453. /*======================================================================*\
  454. Function: submitlinks
  455. Purpose: grab links from a form submission
  456. Input: $URI where you are submitting from
  457. Output: $this->results an array of the links from the post
  458. \*======================================================================*/
  459. function submitlinks($URI, $formvars="", $formfiles="")
  460. {
  461. if($this->submit($URI,$formvars, $formfiles))
  462. {
  463. if($this->lastredirectaddr)
  464. $URI = $this->lastredirectaddr;
  465. if(is_array($this->results))
  466. {
  467. for($x=0;$x<count($this->results);$x++)
  468. {
  469. $this->results[$x] = $this->_striplinks($this->results[$x]);
  470. if($this->expandlinks)
  471. $this->results[$x] = $this->_expandlinks($this->results[$x],$URI);
  472. }
  473. }
  474. else
  475. {
  476. $this->results = $this->_striplinks($this->results);
  477. if($this->expandlinks)
  478. $this->results = $this->_expandlinks($this->results,$URI);
  479. }
  480. return true;
  481. }
  482. else
  483. return false;
  484. }
  485. /*======================================================================*\
  486. Function: submittext
  487. Purpose: grab text from a form submission
  488. Input: $URI where you are submitting from
  489. Output: $this->results the text from the web page
  490. \*======================================================================*/
  491. function submittext($URI, $formvars = "", $formfiles = "")
  492. {
  493. if($this->submit($URI,$formvars, $formfiles))
  494. {
  495. if($this->lastredirectaddr)
  496. $URI = $this->lastredirectaddr;
  497. if(is_array($this->results))
  498. {
  499. for($x=0;$x<count($this->results);$x++)
  500. {
  501. $this->results[$x] = $this->_striptext($this->results[$x]);
  502. if($this->expandlinks)
  503. $this->results[$x] = $this->_expandlinks($this->results[$x],$URI);
  504. }
  505. }
  506. else
  507. {
  508. $this->results = $this->_striptext($this->results);
  509. if($this->expandlinks)
  510. $this->results = $this->_expandlinks($this->results,$URI);
  511. }
  512. return true;
  513. }
  514. else
  515. return false;
  516. }
  517. /*======================================================================*\
  518. Function: set_submit_multipart
  519. Purpose: Set the form submission content type to
  520. multipart/form-data
  521. \*======================================================================*/
  522. function set_submit_multipart()
  523. {
  524. $this->_submit_type = "multipart/form-data";
  525. }
  526. /*======================================================================*\
  527. Function: set_submit_normal
  528. Purpose: Set the form submission content type to
  529. application/x-www-form-urlencoded
  530. \*======================================================================*/
  531. function set_submit_normal()
  532. {
  533. $this->_submit_type = "application/x-www-form-urlencoded";
  534. }
  535. /*======================================================================*\
  536. Private functions
  537. \*======================================================================*/
  538. /*======================================================================*\
  539. Function: _striplinks
  540. Purpose: strip the hyperlinks from an html document
  541. Input: $document document to strip.
  542. Output: $match an array of the links
  543. \*======================================================================*/
  544. function _striplinks($document)
  545. {
  546. preg_match_all("'<\s*a\s.*?href\s*=\s* # find <a href=
  547. ([\"\'])? # find single or double quote
  548. (?(1) (.*?)\\1 | ([^\s\>]+)) # if quote found, match up to next matching
  549. # quote, otherwise match up to next space
  550. 'isx",$document,$links);
  551. // catenate the non-empty matches from the conditional subpattern
  552. while(list($key,$val) = each($links[2]))
  553. {
  554. if(!empty($val))
  555. $match[] = $val;
  556. }
  557. while(list($key,$val) = each($links[3]))
  558. {
  559. if(!empty($val))
  560. $match[] = $val;
  561. }
  562. // return the links
  563. return $match;
  564. }
  565. /*======================================================================*\
  566. Function: _stripform
  567. Purpose: strip the form elements from an html document
  568. Input: $document document to strip.
  569. Output: $match an array of the links
  570. \*======================================================================*/
  571. function _stripform($document)
  572. {
  573. preg_match_all("'<\/?(FORM|INPUT|SELECT|TEXTAREA|(OPTION))[^<>]*>(?(2)(.*(?=<\/?(option|select)[^<>]*>[\r\n]*)|(?=[\r\n]*))|(?=[\r\n]*))'Usi",$document,$elements);
  574. // catenate the matches
  575. $match = implode("\r\n",$elements[0]);
  576. // return the links
  577. return $match;
  578. }
  579. /*======================================================================*\
  580. Function: _striptext
  581. Purpose: strip the text from an html document
  582. Input: $document document to strip.
  583. Output: $text the resulting text
  584. \*======================================================================*/
  585. function _striptext($document)
  586. {
  587. // I didn't use preg eval (//e) since that is only available in PHP 4.0.
  588. // so, list your entities one by one here. I included some of the
  589. // more common ones.
  590. $search = array("'<script[^>]*?>.*?</script>'si", // strip out javascript
  591. "'<[\/\!]*?[^<>]*?>'si", // strip out html tags
  592. "'([\r\n])[\s]+'", // strip out white space
  593. "'&(quot|#34|#034|#x22);'i", // replace html entities
  594. "'&(amp|#38|#038|#x26);'i", // added hexadecimal values
  595. "'&(lt|#60|#060|#x3c);'i",
  596. "'&(gt|#62|#062|#x3e);'i",
  597. "'&(nbsp|#160|#xa0);'i",
  598. "'&(iexcl|#161);'i",
  599. "'&(cent|#162);'i",
  600. "'&(pound|#163);'i",
  601. "'&(copy|#169);'i",
  602. "'&(reg|#174);'i",
  603. "'&(deg|#176);'i",
  604. "'&(#39|#039|#x27);'",
  605. "'&(euro|#8364);'i", // europe
  606. "'&a(uml|UML);'", // german
  607. "'&o(uml|UML);'",
  608. "'&u(uml|UML);'",
  609. "'&A(uml|UML);'",
  610. "'&O(uml|UML);'",
  611. "'&U(uml|UML);'",
  612. "'&szlig;'i",
  613. );
  614. $replace = array( "",
  615. "",
  616. "\\1",
  617. "\"",
  618. "&",
  619. "<",
  620. ">",
  621. " ",
  622. chr(161),
  623. chr(162),
  624. chr(163),
  625. chr(169),
  626. chr(174),
  627. chr(176),
  628. chr(39),
  629. chr(128),
  630. "�",
  631. "�",
  632. "�",
  633. "�",
  634. "�",
  635. "�",
  636. "�",
  637. );
  638. $text = preg_replace($search,$replace,$document);
  639. return $text;
  640. }
  641. /*======================================================================*\
  642. Function: _expandlinks
  643. Purpose: expand each link into a fully qualified URL
  644. Input: $links the links to qualify
  645. $URI the full URI to get the base from
  646. Output: $expandedLinks the expanded links
  647. \*======================================================================*/
  648. function _expandlinks($links,$URI)
  649. {
  650. preg_match("/^[^\?]+/",$URI,$match);
  651. $match = preg_replace("|/[^\/\.]+\.[^\/\.]+$|","",$match[0]);
  652. $match = preg_replace("|/$|","",$match);
  653. $match_part = parse_url($match);
  654. $match_root =
  655. $match_part["scheme"]."://".$match_part["host"];
  656. $search = array( "|^http://".preg_quote($this->host)."|i",
  657. "|^(\/)|i",
  658. "|^(?!http://)(?!mailto:)|i",
  659. "|/\./|",
  660. "|/[^\/]+/\.\./|"
  661. );
  662. $replace = array( "",
  663. $match_root."/",
  664. $match."/",
  665. "/",
  666. "/"
  667. );
  668. $expandedLinks = preg_replace($search,$replace,$links);
  669. return $expandedLinks;
  670. }
  671. /*======================================================================*\
  672. Function: _httprequest
  673. Purpose: go get the http data from the server
  674. Input: $url the url to fetch
  675. $fp the current open file pointer
  676. $URI the full URI
  677. $body body contents to send if any (POST)
  678. Output:
  679. \*======================================================================*/
  680. function _httprequest($url,$fp,$URI,$http_method,$content_type="",$body="")
  681. {
  682. $cookie_headers = '';
  683. if($this->passcookies && $this->_redirectaddr)
  684. $this->setcookies();
  685. $URI_PARTS = parse_url($URI);
  686. if(empty($url))
  687. $url = "/";
  688. $headers = $http_method." ".$url." ".$this->_httpversion."\r\n";
  689. if(!empty($this->agent))
  690. $headers .= "User-Agent: ".$this->agent."\r\n";
  691. if(!empty($this->host) && !isset($this->rawheaders['Host'])) {
  692. $headers .= "Host: ".$this->host;
  693. if(!empty($this->port) && $this->port!=80)
  694. $headers .= ":".$this->port;
  695. $headers .= "\r\n";
  696. }
  697. if(!empty($this->accept))
  698. $headers .= "Accept: ".$this->accept."\r\n";
  699. if(!empty($this->referer))
  700. $headers .= "Referer: ".$this->referer."\r\n";
  701. if(!empty($this->cookies))
  702. {
  703. if(!is_array($this->cookies))
  704. $this->cookies = (array)$this->cookies;
  705. reset($this->cookies);
  706. if ( count($this->cookies) > 0 ) {
  707. $cookie_headers .= 'Cookie: ';
  708. foreach ( $this->cookies as $cookieKey => $cookieVal ) {
  709. $cookie_headers .= $cookieKey."=".urlencode($cookieVal)."; ";
  710. }
  711. $headers .= substr($cookie_headers,0,-2) . "\r\n";
  712. }
  713. }
  714. if(!empty($this->rawheaders))
  715. {
  716. if(!is_array($this->rawheaders))
  717. $this->rawheaders = (array)$this->rawheaders;
  718. while(list($headerKey,$headerVal) = each($this->rawheaders))
  719. $headers .= $headerKey.": ".$headerVal."\r\n";
  720. }
  721. if(!empty($content_type)) {
  722. $headers .= "Content-type: $content_type";
  723. if ($content_type == "multipart/form-data")
  724. $headers .= "; boundary=".$this->_mime_boundary;
  725. $headers .= "\r\n";
  726. }
  727. if(!empty($body))
  728. $headers .= "Content-length: ".strlen($body)."\r\n";
  729. if(!empty($this->user) || !empty($this->pass))
  730. $headers .= "Authorization: Basic ".base64_encode($this->user.":".$this->pass)."\r\n";
  731. //add proxy auth headers
  732. if(!empty($this->proxy_user))
  733. $headers .= 'Proxy-Authorization: ' . 'Basic ' . base64_encode($this->proxy_user . ':' . $this->proxy_pass)."\r\n";
  734. $headers .= "\r\n";
  735. // set the read timeout if needed
  736. if ($this->read_timeout > 0)
  737. socket_set_timeout($fp, $this->read_timeout);
  738. $this->timed_out = false;
  739. fwrite($fp,$headers.$body,strlen($headers.$body));
  740. $this->_redirectaddr = false;
  741. unset($this->headers);
  742. while($currentHeader = fgets($fp,$this->_maxlinelen))
  743. {
  744. if ($this->read_timeout > 0 && $this->_check_timeout($fp))
  745. {
  746. $this->status=-100;
  747. return false;
  748. }
  749. if($currentHeader == "\r\n")
  750. break;
  751. // if a header begins with Location: or URI:, set the redirect
  752. if(preg_match("/^(Location:|URI:)/i",$currentHeader))
  753. {
  754. // get URL portion of the redirect
  755. preg_match("/^(Location:|URI:)[ ]+(.*)/i",chop($currentHeader),$matches);
  756. // look for :// in the Location header to see if hostname is included
  757. if (!empty($matches)) {
  758. if(!preg_match("|\:\/\/|",$matches[2]))
  759. {
  760. // no host in the path, so prepend
  761. $this->_redirectaddr = $URI_PARTS["scheme"]."://".$this->host.":".$this->port;
  762. // eliminate double slash
  763. if(!preg_match("|^/|",$matches[2]))
  764. $this->_redirectaddr .= "/".$matches[2];
  765. else
  766. $this->_redirectaddr .= $matches[2];
  767. }
  768. else
  769. $this->_redirectaddr = $matches[2];
  770. }
  771. }
  772. if(preg_match("|^HTTP/|",$currentHeader))
  773. {
  774. if(preg_match("|^HTTP/[^\s]*\s(.*?)\s|",$currentHeader, $status))
  775. {
  776. $this->status= $status[1];
  777. }
  778. $this->response_code = $currentHeader;
  779. }
  780. $this->headers[] = $currentHeader;
  781. }
  782. $results = '';
  783. do {
  784. $_data = fread($fp, $this->maxlength);
  785. if (strlen($_data) == 0) {
  786. break;
  787. }
  788. $results .= $_data;
  789. } while(true);
  790. if ($this->read_timeout > 0 && $this->_check_timeout($fp))
  791. {
  792. $this->status=-100;
  793. return false;
  794. }
  795. // check if there is a a redirect meta tag
  796. if(preg_match("'<meta[\s]*http-equiv[^>]*?content[\s]*=[\s]*[\"\']?\d+;[\s]*URL[\s]*=[\s]*([^\"\']*?)[\"\']?>'i",$results,$match))
  797. {
  798. $this->_redirectaddr = $this->_expandlinks($match[1],$URI);
  799. }
  800. // have we hit our frame depth and is there frame src to fetch?
  801. if(($this->_framedepth < $this->maxframes) && preg_match_all("'<frame\s+.*src[\s]*=[\'\"]?([^\'\"\>]+)'i",$results,$match))
  802. {
  803. $this->results[] = $results;
  804. for($x=0; $x<count($match[1]); $x++)
  805. $this->_frameurls[] = $this->_expandlinks($match[1][$x],$URI_PARTS["scheme"]."://".$this->host);
  806. }
  807. // have we already fetched framed content?
  808. elseif(is_array($this->results))
  809. $this->results[] = $results;
  810. // no framed content
  811. else
  812. $this->results = $results;
  813. return true;
  814. }
  815. /*======================================================================*\
  816. Function: _httpsrequest
  817. Purpose: go get the https data from the server using curl
  818. Input: $url the url to fetch
  819. $URI the full URI
  820. $body body contents to send if any (POST)
  821. Output:
  822. \*======================================================================*/
  823. function _httpsrequest($url,$URI,$http_method,$content_type="",$body="")
  824. {
  825. if($this->passcookies && $this->_redirectaddr)
  826. $this->setcookies();
  827. $headers = array();
  828. $URI_PARTS = parse_url($URI);
  829. if(empty($url))
  830. $url = "/";
  831. // GET ... header not needed for curl
  832. //$headers[] = $http_method." ".$url." ".$this->_httpversion;
  833. if(!empty($this->agent))
  834. $headers[] = "User-Agent: ".$this->agent;
  835. if(!empty($this->host))
  836. if(!empty($this->port) && $this->port!=80)
  837. $headers[] = "Host: ".$this->host.":".$this->port;
  838. else
  839. $headers[] = "Host: ".$this->host;
  840. if(!empty($this->accept))
  841. $headers[] = "Accept: ".$this->accept;
  842. if(!empty($this->referer))
  843. $headers[] = "Referer: ".$this->referer;
  844. if(!empty($this->cookies))
  845. {
  846. if(!is_array($this->cookies))
  847. $this->cookies = (array)$this->cookies;
  848. reset($this->cookies);
  849. if ( count($this->cookies) > 0 ) {
  850. $cookie_str = 'Cookie: ';
  851. foreach ( $this->cookies as $cookieKey => $cookieVal ) {
  852. $cookie_str .= $cookieKey."=".urlencode($cookieVal)."; ";
  853. }
  854. $headers[] = substr($cookie_str,0,-2);
  855. }
  856. }
  857. if(!empty($this->rawheaders))
  858. {
  859. if(!is_array($this->rawheaders))
  860. $this->rawheaders = (array)$this->rawheaders;
  861. while(list($headerKey,$headerVal) = each($this->rawheaders))
  862. $headers[] = $headerKey.": ".$headerVal;
  863. }
  864. if(!empty($content_type)) {
  865. if ($content_type == "multipart/form-data")
  866. $headers[] = "Content-type: $content_type; boundary=".$this->_mime_boundary;
  867. else
  868. $headers[] = "Content-type: $content_type";
  869. }
  870. if(!empty($body))
  871. $headers[] = "Content-length: ".strlen($body);
  872. if(!empty($this->user) || !empty($this->pass))
  873. $headers[] = "Authorization: BASIC ".base64_encode($this->user.":".$this->pass);
  874. if (function_exists('curl_init')) {
  875. $ch = curl_init();
  876. curl_setopt($ch, CURLOPT_URL, $URI);
  877. curl_setopt($ch, CURLOPT_HEADER, true);
  878. curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, FALSE);
  879. curl_setopt($ch, CURLOPT_SSL_VERIFYHOST, FALSE);
  880. curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
  881. curl_setopt($ch, CURLOPT_HTTPHEADER, $headers);
  882. curl_setopt($ch, CURLOPT_TIMEOUT, $this->read_timeout);
  883. if(!empty($body)) {
  884. curl_setopt($ch, CURLOPT_POST, true);
  885. curl_setopt($ch, CURLOPT_POSTFIELDS, $body);
  886. }
  887. $data = curl_exec($ch);
  888. if ($data === false) {
  889. $this->error = "Error: Curl error ".curl_error($ch);
  890. return false;
  891. }
  892. $parts = explode("\r\n\r\n",$data,2);
  893. $result_headers = explode("\r\n",$parts[0]);
  894. $results = $parts[1];
  895. unset($parts);
  896. } else {
  897. for($curr_header = 0; $curr_header < count($headers); $curr_header++) {
  898. $safer_header = strtr( $headers[$curr_header], "\"", " " );
  899. $cmdline_params .= " -H \"".$safer_header."\"";
  900. }
  901. if(!empty($body))
  902. $cmdline_params .= " -d \"$body\"";
  903. if($this->read_timeout > 0)
  904. $cmdline_params .= " -m ".$this->read_timeout;
  905. $headerfile = tempnam($temp_dir, "sno");
  906. exec($this->curl_path." -k -D \"$headerfile\"".$cmdline_params." \"".escapeshellcmd($URI)."\"",$results,$return);
  907. if($return)
  908. {
  909. $this->error = "Error: cURL could not retrieve the document, error $return.";
  910. return false;
  911. }
  912. $results = implode("\r\n",$results);
  913. $result_headers = file("$headerfile");
  914. }
  915. $this->_redirectaddr = false;
  916. unset($this->headers);
  917. for($currentHeader = 0; $currentHeader < count($result_headers); $currentHeader++)
  918. {
  919. // if a header begins with Location: or URI:, set the redirect
  920. if(preg_match("/^(Location: |URI: )/i",$result_headers[$currentHeader]))
  921. {
  922. // get URL portion of the redirect
  923. preg_match("/^(Location: |URI:)\s+(.*)/",chop($result_headers[$currentHeader]),$matches);
  924. // look for :// in the Location header to see if hostname is included
  925. if (!empty($matches)) {
  926. if(!preg_match("|\:\/\/|",$matches[2]))
  927. {
  928. // no host in the path, so prepend
  929. $this->_redirectaddr = $URI_PARTS["scheme"]."://".$this->host;
  930. // eliminate double slash
  931. if(!preg_match("|^/|",$matches[2]))
  932. $this->_redirectaddr .= "/".$matches[2];
  933. else
  934. $this->_redirectaddr .= $matches[2];
  935. }
  936. else
  937. $this->_redirectaddr = $matches[2];
  938. }
  939. }
  940. if(preg_match("|^HTTP/|",$result_headers[$currentHeader]))
  941. $this->response_code = $result_headers[$currentHeader];
  942. $this->headers[] = $result_headers[$currentHeader];
  943. }
  944. // check if there is a a redirect meta tag
  945. if(preg_match("'<meta[\s]*http-equiv[^>]*?content[\s]*=[\s]*[\"\']?\d+;[\s]*URL[\s]*=[\s]*([^\"\']*?)[\"\']?>'i",$results,$match))
  946. {
  947. $this->_redirectaddr = $this->_expandlinks($match[1],$URI);
  948. }
  949. // have we hit our frame depth and is there frame src to fetch?
  950. if(($this->_framedepth < $this->maxframes) && preg_match_all("'<frame\s+.*src[\s]*=[\'\"]?([^\'\"\>]+)'i",$results,$match))
  951. {
  952. $this->results[] = $results;
  953. for($x=0; $x<count($match[1]); $x++)
  954. $this->_frameurls[] = $this->_expandlinks($match[1][$x],$URI_PARTS["scheme"]."://".$this->host);
  955. }
  956. // have we already fetched framed content?
  957. elseif(is_array($this->results))
  958. $this->results[] = $results;
  959. // no framed content
  960. else
  961. $this->results = $results;
  962. if ($headerfile)
  963. unlink("$headerfile");
  964. return true;
  965. }
  966. /*======================================================================*\
  967. Function: setcookies()
  968. Purpose: set cookies for a redirection
  969. \*======================================================================*/
  970. function setcookies()
  971. {
  972. for($x=0; $x<count($this->headers); $x++)
  973. {
  974. if(preg_match('/^set-cookie:[\s]+([^=]+)=([^;]+)/i', $this->headers[$x],$match))
  975. $this->cookies[$match[1]] = urldecode($match[2]);
  976. }
  977. }
  978. /*======================================================================*\
  979. Function: _check_timeout
  980. Purpose: checks whether timeout has occurred
  981. Input: $fp file pointer
  982. \*======================================================================*/
  983. function _check_timeout($fp)
  984. {
  985. if ($this->read_timeout > 0) {
  986. $fp_status = socket_get_status($fp);
  987. if ($fp_status["timed_out"]) {
  988. $this->timed_out = true;
  989. return true;
  990. }
  991. }
  992. return false;
  993. }
  994. /*======================================================================*\
  995. Function: _connect
  996. Purpose: make a socket connection
  997. Input: $fp file pointer
  998. \*======================================================================*/
  999. function _connect(&$fp)
  1000. {
  1001. if(!empty($this->proxy_host) && !empty($this->proxy_port))
  1002. {
  1003. $this->_isproxy = true;
  1004. $host = $this->proxy_host;
  1005. $port = $this->proxy_port;
  1006. }
  1007. else
  1008. {
  1009. $host = $this->host;
  1010. $port = $this->port;
  1011. }
  1012. $this->status = 0;
  1013. if($fp = fsockopen(
  1014. $host,
  1015. $port,
  1016. $errno,
  1017. $errstr,
  1018. $this->_fp_timeout
  1019. ))
  1020. {
  1021. // socket connection succeeded
  1022. return true;
  1023. }
  1024. else
  1025. {
  1026. // socket connection failed
  1027. $this->status = $errno;
  1028. switch($errno)
  1029. {
  1030. case -3:
  1031. $this->error="socket creation failed (-3)";
  1032. case -4:
  1033. $this->error="dns lookup failure (-4)";
  1034. case -5:
  1035. $this->error="connection refused or timed out (-5)";
  1036. default:
  1037. $this->error="connection failed (".$errno.")";
  1038. }
  1039. return false;
  1040. }
  1041. }
  1042. /*======================================================================*\
  1043. Function: _disconnect
  1044. Purpose: disconnect a socket connection
  1045. Input: $fp file pointer
  1046. \*======================================================================*/
  1047. function _disconnect($fp)
  1048. {
  1049. return(fclose($fp));
  1050. }
  1051. /*======================================================================*\
  1052. Function: _prepare_post_body
  1053. Purpose: Prepare post body according to encoding type
  1054. Input: $formvars - form variables
  1055. $formfiles - form upload files
  1056. Output: post body
  1057. \*======================================================================*/
  1058. function _prepare_post_body($formvars, $formfiles)
  1059. {
  1060. settype($formvars, "array");
  1061. settype($formfiles, "array");
  1062. $postdata = '';
  1063. if (count($formvars) == 0 && count($formfiles) == 0)
  1064. return;
  1065. if (is_string($formvars)) return $formvars;
  1066. if(count($formvars) == 1) return $formvars[0];
  1067. switch ($this->_submit_type) {
  1068. case "application/x-www-form-urlencoded":
  1069. reset($formvars);
  1070. while(list($key,$val) = each($formvars)) {
  1071. if (is_array($val) || is_object($val)) {
  1072. while (list($cur_key, $cur_val) = each($val)) {
  1073. $postdata .= urlencode($key)."[]=".urlencode($cur_val)."&";
  1074. }
  1075. } else
  1076. $postdata .= urlencode($key)."=".urlencode($val)."&";
  1077. }
  1078. break;
  1079. case "multipart/form-data":
  1080. $this->_mime_boundary = "--------".md5(uniqid(microtime()));
  1081. reset($formvars);
  1082. while(list($key,$val) = each($formvars)) {
  1083. if (is_array($val) || is_object($val)) {
  1084. while (list($cur_key, $cur_val) = each($val)) {
  1085. $postdata .= "--".$this->_mime_boundary."\r\n";
  1086. $postdata .= "Content-Disposition: form-data; name=\"$key\[\]\"\r\n\r\n";
  1087. $postdata .= "$cur_val\r\n";
  1088. }
  1089. } else {
  1090. $postdata .= "--".$this->_mime_boundary."\r\n";
  1091. $postdata .= "Content-Disposition: form-data; name=\"$key\"\r\n\r\n";
  1092. $postdata .= "$val\r\n";
  1093. }
  1094. }
  1095. reset($formfiles);
  1096. while (list($field_name, $file_names) = each($formfiles)) {
  1097. settype($file_names, "array");
  1098. while (list(, $file_name) = each($file_names)) {
  1099. $file_content = file_get_contents($file_name);
  1100. if (!$file_content) continue;
  1101. $base_name = basename($file_name);
  1102. $postdata .= "--".$this->_mime_boundary."\r\n";
  1103. $postdata .= "Content-Disposition: form-data; name=\"$field_name\"; filename=\"$base_name\"\r\nContent-Type: image/jpeg\r\n\r\n";
  1104. $postdata .= "$file_content\r\n";
  1105. }
  1106. }
  1107. $postdata .= "--".$this->_mime_boundary."--\r\n";
  1108. break;
  1109. }
  1110. return $postdata;
  1111. }
  1112. }