PageRenderTime 26ms CodeModel.GetById 16ms RepoModel.GetById 1ms app.codeStats 1ms

/wikiRoboter/BulkPageCreator/Snoopy.class.php

https://bitbucket.org/tbayen/smallprojects
PHP | 1250 lines | 994 code | 80 blank | 176 comment | 77 complexity | 93919f8608b8c2dc8b15a11204655253 MD5 | raw file
  1. <?php
  2. /*************************************************
  3. Snoopy - the PHP net client
  4. Author: Monte Ohrt <monte@ispi.net>
  5. Copyright (c): 1999-2008 New Digital Group, all rights reserved
  6. Version: 1.2.4
  7. * This library is free software; you can redistribute it and/or
  8. * modify it under the terms of the GNU Lesser General Public
  9. * License as published by the Free Software Foundation; either
  10. * version 2.1 of the License, or (at your option) any later version.
  11. *
  12. * This library is distributed in the hope that it will be useful,
  13. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  15. * Lesser General Public License for more details.
  16. *
  17. * You should have received a copy of the GNU Lesser General Public
  18. * License along with this library; if not, write to the Free Software
  19. * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
  20. You may contact the author of Snoopy by e-mail at:
  21. monte@ohrt.com
  22. The latest version of Snoopy can be obtained from:
  23. http://snoopy.sourceforge.net/
  24. *************************************************/
  25. class Snoopy
  26. {
  27. /**** Public variables ****/
  28. /* user definable vars */
  29. var $host = "www.php.net"; // host name we are connecting to
  30. var $port = 80; // port we are connecting to
  31. var $proxy_host = ""; // proxy host to use
  32. var $proxy_port = ""; // proxy port to use
  33. var $proxy_user = ""; // proxy user to use
  34. var $proxy_pass = ""; // proxy password to use
  35. var $agent = "Snoopy v1.2.4"; // agent we masquerade as
  36. var $referer = ""; // referer info to pass
  37. var $cookies = array(); // array of cookies to pass
  38. // $cookies["username"]="joe";
  39. var $rawheaders = array(); // array of raw headers to send
  40. // $rawheaders["Content-type"]="text/html";
  41. var $maxredirs = 5; // http redirection depth maximum. 0 = disallow
  42. var $lastredirectaddr = ""; // contains address of last redirected address
  43. var $offsiteok = true; // allows redirection off-site
  44. var $maxframes = 0; // frame content depth maximum. 0 = disallow
  45. var $expandlinks = true; // expand links to fully qualified URLs.
  46. // this only applies to fetchlinks()
  47. // submitlinks(), and submittext()
  48. var $passcookies = true; // pass set cookies back through redirects
  49. // NOTE: this currently does not respect
  50. // dates, domains or paths.
  51. var $user = ""; // user for http authentication
  52. var $pass = ""; // password for http authentication
  53. // http accept types
  54. var $accept = "image/gif, image/x-xbitmap, image/jpeg, image/pjpeg, */*";
  55. var $results = ""; // where the content is put
  56. var $error = ""; // error messages sent here
  57. var $response_code = ""; // response code returned from server
  58. var $headers = array(); // headers returned from server sent here
  59. var $maxlength = 500000; // max return data length (body)
  60. var $read_timeout = 0; // timeout on read operations, in seconds
  61. // supported only since PHP 4 Beta 4
  62. // set to 0 to disallow timeouts
  63. var $timed_out = false; // if a read operation timed out
  64. var $status = 0; // http request status
  65. var $temp_dir = "/tmp"; // temporary directory that the webserver
  66. // has permission to write to.
  67. // under Windows, this should be C:\temp
  68. var $curl_path = "/usr/local/bin/curl";
  69. // Snoopy will use cURL for fetching
  70. // SSL content if a full system path to
  71. // the cURL binary is supplied here.
  72. // set to false if you do not have
  73. // cURL installed. See http://curl.haxx.se
  74. // for details on installing cURL.
  75. // Snoopy does *not* use the cURL
  76. // library functions built into php,
  77. // as these functions are not stable
  78. // as of this Snoopy release.
  79. /**** Private variables ****/
  80. var $_maxlinelen = 4096; // max line length (headers)
  81. var $_httpmethod = "GET"; // default http request method
  82. var $_httpversion = "HTTP/1.0"; // default http request version
  83. var $_submit_method = "POST"; // default submit method
  84. var $_submit_type = "application/x-www-form-urlencoded"; // default submit type
  85. var $_mime_boundary = ""; // MIME boundary for multipart/form-data submit type
  86. var $_redirectaddr = false; // will be set if page fetched is a redirect
  87. var $_redirectdepth = 0; // increments on an http redirect
  88. var $_frameurls = array(); // frame src urls
  89. var $_framedepth = 0; // increments on frame depth
  90. var $_isproxy = false; // set if using a proxy server
  91. var $_fp_timeout = 30; // timeout for socket connection
  92. /*======================================================================*\
  93. Function: fetch
  94. Purpose: fetch the contents of a web page
  95. (and possibly other protocols in the
  96. future like ftp, nntp, gopher, etc.)
  97. Input: $URI the location of the page to fetch
  98. Output: $this->results the output text from the fetch
  99. \*======================================================================*/
  100. function fetch($URI)
  101. {
  102. //preg_match("|^([^:]+)://([^:/]+)(:[\d]+)*(.*)|",$URI,$URI_PARTS);
  103. $URI_PARTS = parse_url($URI);
  104. if (!empty($URI_PARTS["user"]))
  105. $this->user = $URI_PARTS["user"];
  106. if (!empty($URI_PARTS["pass"]))
  107. $this->pass = $URI_PARTS["pass"];
  108. if (empty($URI_PARTS["query"]))
  109. $URI_PARTS["query"] = '';
  110. if (empty($URI_PARTS["path"]))
  111. $URI_PARTS["path"] = '';
  112. switch(strtolower($URI_PARTS["scheme"]))
  113. {
  114. case "http":
  115. $this->host = $URI_PARTS["host"];
  116. if(!empty($URI_PARTS["port"]))
  117. $this->port = $URI_PARTS["port"];
  118. if($this->_connect($fp))
  119. {
  120. if($this->_isproxy)
  121. {
  122. // using proxy, send entire URI
  123. $this->_httprequest($URI,$fp,$URI,$this->_httpmethod);
  124. }
  125. else
  126. {
  127. $path = $URI_PARTS["path"].($URI_PARTS["query"] ? "?".$URI_PARTS["query"] : "");
  128. // no proxy, send only the path
  129. $this->_httprequest($path, $fp, $URI, $this->_httpmethod);
  130. }
  131. $this->_disconnect($fp);
  132. if($this->_redirectaddr)
  133. {
  134. /* url was redirected, check if we've hit the max depth */
  135. if($this->maxredirs > $this->_redirectdepth)
  136. {
  137. // only follow redirect if it's on this site, or offsiteok is true
  138. if(preg_match("|^http://".preg_quote($this->host)."|i",$this->_redirectaddr) || $this->offsiteok)
  139. {
  140. /* follow the redirect */
  141. $this->_redirectdepth++;
  142. $this->lastredirectaddr=$this->_redirectaddr;
  143. $this->fetch($this->_redirectaddr);
  144. }
  145. }
  146. }
  147. if($this->_framedepth < $this->maxframes && count($this->_frameurls) > 0)
  148. {
  149. $frameurls = $this->_frameurls;
  150. $this->_frameurls = array();
  151. while(list(,$frameurl) = each($frameurls))
  152. {
  153. if($this->_framedepth < $this->maxframes)
  154. {
  155. $this->fetch($frameurl);
  156. $this->_framedepth++;
  157. }
  158. else
  159. break;
  160. }
  161. }
  162. }
  163. else
  164. {
  165. return false;
  166. }
  167. return true;
  168. break;
  169. case "https":
  170. if(!$this->curl_path)
  171. return false;
  172. if(function_exists("is_executable"))
  173. if (!is_executable($this->curl_path))
  174. return false;
  175. $this->host = $URI_PARTS["host"];
  176. if(!empty($URI_PARTS["port"]))
  177. $this->port = $URI_PARTS["port"];
  178. if($this->_isproxy)
  179. {
  180. // using proxy, send entire URI
  181. $this->_httpsrequest($URI,$URI,$this->_httpmethod);
  182. }
  183. else
  184. {
  185. $path = $URI_PARTS["path"].($URI_PARTS["query"] ? "?".$URI_PARTS["query"] : "");
  186. // no proxy, send only the path
  187. $this->_httpsrequest($path, $URI, $this->_httpmethod);
  188. }
  189. if($this->_redirectaddr)
  190. {
  191. /* url was redirected, check if we've hit the max depth */
  192. if($this->maxredirs > $this->_redirectdepth)
  193. {
  194. // only follow redirect if it's on this site, or offsiteok is true
  195. if(preg_match("|^http://".preg_quote($this->host)."|i",$this->_redirectaddr) || $this->offsiteok)
  196. {
  197. /* follow the redirect */
  198. $this->_redirectdepth++;
  199. $this->lastredirectaddr=$this->_redirectaddr;
  200. $this->fetch($this->_redirectaddr);
  201. }
  202. }
  203. }
  204. if($this->_framedepth < $this->maxframes && count($this->_frameurls) > 0)
  205. {
  206. $frameurls = $this->_frameurls;
  207. $this->_frameurls = array();
  208. while(list(,$frameurl) = each($frameurls))
  209. {
  210. if($this->_framedepth < $this->maxframes)
  211. {
  212. $this->fetch($frameurl);
  213. $this->_framedepth++;
  214. }
  215. else
  216. break;
  217. }
  218. }
  219. return true;
  220. break;
  221. default:
  222. // not a valid protocol
  223. $this->error = 'Invalid protocol "'.$URI_PARTS["scheme"].'"\n';
  224. return false;
  225. break;
  226. }
  227. return true;
  228. }
  229. /*======================================================================*\
  230. Function: submit
  231. Purpose: submit an http form
  232. Input: $URI the location to post the data
  233. $formvars the formvars to use.
  234. format: $formvars["var"] = "val";
  235. $formfiles an array of files to submit
  236. format: $formfiles["var"] = "/dir/filename.ext";
  237. Output: $this->results the text output from the post
  238. \*======================================================================*/
  239. function submit($URI, $formvars="", $formfiles="")
  240. {
  241. unset($postdata);
  242. $postdata = $this->_prepare_post_body($formvars, $formfiles);
  243. $URI_PARTS = parse_url($URI);
  244. if (!empty($URI_PARTS["user"]))
  245. $this->user = $URI_PARTS["user"];
  246. if (!empty($URI_PARTS["pass"]))
  247. $this->pass = $URI_PARTS["pass"];
  248. if (empty($URI_PARTS["query"]))
  249. $URI_PARTS["query"] = '';
  250. if (empty($URI_PARTS["path"]))
  251. $URI_PARTS["path"] = '';
  252. switch(strtolower($URI_PARTS["scheme"]))
  253. {
  254. case "http":
  255. $this->host = $URI_PARTS["host"];
  256. if(!empty($URI_PARTS["port"]))
  257. $this->port = $URI_PARTS["port"];
  258. if($this->_connect($fp))
  259. {
  260. if($this->_isproxy)
  261. {
  262. // using proxy, send entire URI
  263. $this->_httprequest($URI,$fp,$URI,$this->_submit_method,$this->_submit_type,$postdata);
  264. }
  265. else
  266. {
  267. $path = $URI_PARTS["path"].($URI_PARTS["query"] ? "?".$URI_PARTS["query"] : "");
  268. // no proxy, send only the path
  269. $this->_httprequest($path, $fp, $URI, $this->_submit_method, $this->_submit_type, $postdata);
  270. }
  271. $this->_disconnect($fp);
  272. if($this->_redirectaddr)
  273. {
  274. /* url was redirected, check if we've hit the max depth */
  275. if($this->maxredirs > $this->_redirectdepth)
  276. {
  277. if(!preg_match("|^".$URI_PARTS["scheme"]."://|", $this->_redirectaddr))
  278. $this->_redirectaddr = $this->_expandlinks($this->_redirectaddr,$URI_PARTS["scheme"]."://".$URI_PARTS["host"]);
  279. // only follow redirect if it's on this site, or offsiteok is true
  280. if(preg_match("|^http://".preg_quote($this->host)."|i",$this->_redirectaddr) || $this->offsiteok)
  281. {
  282. /* follow the redirect */
  283. $this->_redirectdepth++;
  284. $this->lastredirectaddr=$this->_redirectaddr;
  285. if( strpos( $this->_redirectaddr, "?" ) > 0 )
  286. $this->fetch($this->_redirectaddr); // the redirect has changed the request method from post to get
  287. else
  288. $this->submit($this->_redirectaddr,$formvars, $formfiles);
  289. }
  290. }
  291. }
  292. if($this->_framedepth < $this->maxframes && count($this->_frameurls) > 0)
  293. {
  294. $frameurls = $this->_frameurls;
  295. $this->_frameurls = array();
  296. while(list(,$frameurl) = each($frameurls))
  297. {
  298. if($this->_framedepth < $this->maxframes)
  299. {
  300. $this->fetch($frameurl);
  301. $this->_framedepth++;
  302. }
  303. else
  304. break;
  305. }
  306. }
  307. }
  308. else
  309. {
  310. return false;
  311. }
  312. return true;
  313. break;
  314. case "https":
  315. if(!$this->curl_path)
  316. return false;
  317. if(function_exists("is_executable"))
  318. if (!is_executable($this->curl_path))
  319. return false;
  320. $this->host = $URI_PARTS["host"];
  321. if(!empty($URI_PARTS["port"]))
  322. $this->port = $URI_PARTS["port"];
  323. if($this->_isproxy)
  324. {
  325. // using proxy, send entire URI
  326. $this->_httpsrequest($URI, $URI, $this->_submit_method, $this->_submit_type, $postdata);
  327. }
  328. else
  329. {
  330. $path = $URI_PARTS["path"].($URI_PARTS["query"] ? "?".$URI_PARTS["query"] : "");
  331. // no proxy, send only the path
  332. $this->_httpsrequest($path, $URI, $this->_submit_method, $this->_submit_type, $postdata);
  333. }
  334. if($this->_redirectaddr)
  335. {
  336. /* url was redirected, check if we've hit the max depth */
  337. if($this->maxredirs > $this->_redirectdepth)
  338. {
  339. if(!preg_match("|^".$URI_PARTS["scheme"]."://|", $this->_redirectaddr))
  340. $this->_redirectaddr = $this->_expandlinks($this->_redirectaddr,$URI_PARTS["scheme"]."://".$URI_PARTS["host"]);
  341. // only follow redirect if it's on this site, or offsiteok is true
  342. if(preg_match("|^http://".preg_quote($this->host)."|i",$this->_redirectaddr) || $this->offsiteok)
  343. {
  344. /* follow the redirect */
  345. $this->_redirectdepth++;
  346. $this->lastredirectaddr=$this->_redirectaddr;
  347. if( strpos( $this->_redirectaddr, "?" ) > 0 )
  348. $this->fetch($this->_redirectaddr); // the redirect has changed the request method from post to get
  349. else
  350. $this->submit($this->_redirectaddr,$formvars, $formfiles);
  351. }
  352. }
  353. }
  354. if($this->_framedepth < $this->maxframes && count($this->_frameurls) > 0)
  355. {
  356. $frameurls = $this->_frameurls;
  357. $this->_frameurls = array();
  358. while(list(,$frameurl) = each($frameurls))
  359. {
  360. if($this->_framedepth < $this->maxframes)
  361. {
  362. $this->fetch($frameurl);
  363. $this->_framedepth++;
  364. }
  365. else
  366. break;
  367. }
  368. }
  369. return true;
  370. break;
  371. default:
  372. // not a valid protocol
  373. $this->error = 'Invalid protocol "'.$URI_PARTS["scheme"].'"\n';
  374. return false;
  375. break;
  376. }
  377. return true;
  378. }
  379. /*======================================================================*\
  380. Function: fetchlinks
  381. Purpose: fetch the links from a web page
  382. Input: $URI where you are fetching from
  383. Output: $this->results an array of the URLs
  384. \*======================================================================*/
  385. function fetchlinks($URI)
  386. {
  387. if ($this->fetch($URI))
  388. {
  389. if($this->lastredirectaddr)
  390. $URI = $this->lastredirectaddr;
  391. if(is_array($this->results))
  392. {
  393. for($x=0;$x<count($this->results);$x++)
  394. $this->results[$x] = $this->_striplinks($this->results[$x]);
  395. }
  396. else
  397. $this->results = $this->_striplinks($this->results);
  398. if($this->expandlinks)
  399. $this->results = $this->_expandlinks($this->results, $URI);
  400. return true;
  401. }
  402. else
  403. return false;
  404. }
  405. /*======================================================================*\
  406. Function: fetchform
  407. Purpose: fetch the form elements from a web page
  408. Input: $URI where you are fetching from
  409. Output: $this->results the resulting html form
  410. \*======================================================================*/
  411. function fetchform($URI)
  412. {
  413. if ($this->fetch($URI))
  414. {
  415. if(is_array($this->results))
  416. {
  417. for($x=0;$x<count($this->results);$x++)
  418. $this->results[$x] = $this->_stripform($this->results[$x]);
  419. }
  420. else
  421. $this->results = $this->_stripform($this->results);
  422. return true;
  423. }
  424. else
  425. return false;
  426. }
  427. /*======================================================================*\
  428. Function: fetchtext
  429. Purpose: fetch the text from a web page, stripping the links
  430. Input: $URI where you are fetching from
  431. Output: $this->results the text from the web page
  432. \*======================================================================*/
  433. function fetchtext($URI)
  434. {
  435. if($this->fetch($URI))
  436. {
  437. if(is_array($this->results))
  438. {
  439. for($x=0;$x<count($this->results);$x++)
  440. $this->results[$x] = $this->_striptext($this->results[$x]);
  441. }
  442. else
  443. $this->results = $this->_striptext($this->results);
  444. return true;
  445. }
  446. else
  447. return false;
  448. }
  449. /*======================================================================*\
  450. Function: submitlinks
  451. Purpose: grab links from a form submission
  452. Input: $URI where you are submitting from
  453. Output: $this->results an array of the links from the post
  454. \*======================================================================*/
  455. function submitlinks($URI, $formvars="", $formfiles="")
  456. {
  457. if($this->submit($URI,$formvars, $formfiles))
  458. {
  459. if($this->lastredirectaddr)
  460. $URI = $this->lastredirectaddr;
  461. if(is_array($this->results))
  462. {
  463. for($x=0;$x<count($this->results);$x++)
  464. {
  465. $this->results[$x] = $this->_striplinks($this->results[$x]);
  466. if($this->expandlinks)
  467. $this->results[$x] = $this->_expandlinks($this->results[$x],$URI);
  468. }
  469. }
  470. else
  471. {
  472. $this->results = $this->_striplinks($this->results);
  473. if($this->expandlinks)
  474. $this->results = $this->_expandlinks($this->results,$URI);
  475. }
  476. return true;
  477. }
  478. else
  479. return false;
  480. }
  481. /*======================================================================*\
  482. Function: submittext
  483. Purpose: grab text from a form submission
  484. Input: $URI where you are submitting from
  485. Output: $this->results the text from the web page
  486. \*======================================================================*/
  487. function submittext($URI, $formvars = "", $formfiles = "")
  488. {
  489. if($this->submit($URI,$formvars, $formfiles))
  490. {
  491. if($this->lastredirectaddr)
  492. $URI = $this->lastredirectaddr;
  493. if(is_array($this->results))
  494. {
  495. for($x=0;$x<count($this->results);$x++)
  496. {
  497. $this->results[$x] = $this->_striptext($this->results[$x]);
  498. if($this->expandlinks)
  499. $this->results[$x] = $this->_expandlinks($this->results[$x],$URI);
  500. }
  501. }
  502. else
  503. {
  504. $this->results = $this->_striptext($this->results);
  505. if($this->expandlinks)
  506. $this->results = $this->_expandlinks($this->results,$URI);
  507. }
  508. return true;
  509. }
  510. else
  511. return false;
  512. }
  513. /*======================================================================*\
  514. Function: set_submit_multipart
  515. Purpose: Set the form submission content type to
  516. multipart/form-data
  517. \*======================================================================*/
  518. function set_submit_multipart()
  519. {
  520. $this->_submit_type = "multipart/form-data";
  521. }
  522. /*======================================================================*\
  523. Function: set_submit_normal
  524. Purpose: Set the form submission content type to
  525. application/x-www-form-urlencoded
  526. \*======================================================================*/
  527. function set_submit_normal()
  528. {
  529. $this->_submit_type = "application/x-www-form-urlencoded";
  530. }
  531. /*======================================================================*\
  532. Private functions
  533. \*======================================================================*/
  534. /*======================================================================*\
  535. Function: _striplinks
  536. Purpose: strip the hyperlinks from an html document
  537. Input: $document document to strip.
  538. Output: $match an array of the links
  539. \*======================================================================*/
  540. function _striplinks($document)
  541. {
  542. preg_match_all("'<\s*a\s.*?href\s*=\s* # find <a href=
  543. ([\"\'])? # find single or double quote
  544. (?(1) (.*?)\\1 | ([^\s\>]+)) # if quote found, match up to next matching
  545. # quote, otherwise match up to next space
  546. 'isx",$document,$links);
  547. // catenate the non-empty matches from the conditional subpattern
  548. while(list($key,$val) = each($links[2]))
  549. {
  550. if(!empty($val))
  551. $match[] = $val;
  552. }
  553. while(list($key,$val) = each($links[3]))
  554. {
  555. if(!empty($val))
  556. $match[] = $val;
  557. }
  558. // return the links
  559. return $match;
  560. }
  561. /*======================================================================*\
  562. Function: _stripform
  563. Purpose: strip the form elements from an html document
  564. Input: $document document to strip.
  565. Output: $match an array of the links
  566. \*======================================================================*/
  567. function _stripform($document)
  568. {
  569. preg_match_all("'<\/?(FORM|INPUT|SELECT|TEXTAREA|(OPTION))[^<>]*>(?(2)(.*(?=<\/?(option|select)[^<>]*>[\r\n]*)|(?=[\r\n]*))|(?=[\r\n]*))'Usi",$document,$elements);
  570. // catenate the matches
  571. $match = implode("\r\n",$elements[0]);
  572. // return the links
  573. return $match;
  574. }
  575. /*======================================================================*\
  576. Function: _striptext
  577. Purpose: strip the text from an html document
  578. Input: $document document to strip.
  579. Output: $text the resulting text
  580. \*======================================================================*/
  581. function _striptext($document)
  582. {
  583. // I didn't use preg eval (//e) since that is only available in PHP 4.0.
  584. // so, list your entities one by one here. I included some of the
  585. // more common ones.
  586. $search = array("'<script[^>]*?>.*?</script>'si", // strip out javascript
  587. "'<[\/\!]*?[^<>]*?>'si", // strip out html tags
  588. "'([\r\n])[\s]+'", // strip out white space
  589. "'&(quot|#34|#034|#x22);'i", // replace html entities
  590. "'&(amp|#38|#038|#x26);'i", // added hexadecimal values
  591. "'&(lt|#60|#060|#x3c);'i",
  592. "'&(gt|#62|#062|#x3e);'i",
  593. "'&(nbsp|#160|#xa0);'i",
  594. "'&(iexcl|#161);'i",
  595. "'&(cent|#162);'i",
  596. "'&(pound|#163);'i",
  597. "'&(copy|#169);'i",
  598. "'&(reg|#174);'i",
  599. "'&(deg|#176);'i",
  600. "'&(#39|#039|#x27);'",
  601. "'&(euro|#8364);'i", // europe
  602. "'&a(uml|UML);'", // german
  603. "'&o(uml|UML);'",
  604. "'&u(uml|UML);'",
  605. "'&A(uml|UML);'",
  606. "'&O(uml|UML);'",
  607. "'&U(uml|UML);'",
  608. "'&szlig;'i",
  609. );
  610. $replace = array( "",
  611. "",
  612. "\\1",
  613. "\"",
  614. "&",
  615. "<",
  616. ">",
  617. " ",
  618. chr(161),
  619. chr(162),
  620. chr(163),
  621. chr(169),
  622. chr(174),
  623. chr(176),
  624. chr(39),
  625. chr(128),
  626. "ä",
  627. "ö",
  628. "ü",
  629. "Ä",
  630. "Ö",
  631. "Ü",
  632. "ß",
  633. );
  634. $text = preg_replace($search,$replace,$document);
  635. return $text;
  636. }
  637. /*======================================================================*\
  638. Function: _expandlinks
  639. Purpose: expand each link into a fully qualified URL
  640. Input: $links the links to qualify
  641. $URI the full URI to get the base from
  642. Output: $expandedLinks the expanded links
  643. \*======================================================================*/
  644. function _expandlinks($links,$URI)
  645. {
  646. preg_match("/^[^\?]+/",$URI,$match);
  647. $match = preg_replace("|/[^\/\.]+\.[^\/\.]+$|","",$match[0]);
  648. $match = preg_replace("|/$|","",$match);
  649. $match_part = parse_url($match);
  650. $match_root =
  651. $match_part["scheme"]."://".$match_part["host"];
  652. $search = array( "|^http://".preg_quote($this->host)."|i",
  653. "|^(\/)|i",
  654. "|^(?!http://)(?!mailto:)|i",
  655. "|/\./|",
  656. "|/[^\/]+/\.\./|"
  657. );
  658. $replace = array( "",
  659. $match_root."/",
  660. $match."/",
  661. "/",
  662. "/"
  663. );
  664. $expandedLinks = preg_replace($search,$replace,$links);
  665. return $expandedLinks;
  666. }
  667. /*======================================================================*\
  668. Function: _httprequest
  669. Purpose: go get the http data from the server
  670. Input: $url the url to fetch
  671. $fp the current open file pointer
  672. $URI the full URI
  673. $body body contents to send if any (POST)
  674. Output:
  675. \*======================================================================*/
  676. function _httprequest($url,$fp,$URI,$http_method,$content_type="",$body="")
  677. {
  678. $cookie_headers = '';
  679. if($this->passcookies && $this->_redirectaddr)
  680. $this->setcookies();
  681. $URI_PARTS = parse_url($URI);
  682. if(empty($url))
  683. $url = "/";
  684. $headers = $http_method." ".$url." ".$this->_httpversion."\r\n";
  685. if(!empty($this->agent))
  686. $headers .= "User-Agent: ".$this->agent."\r\n";
  687. if(!empty($this->host) && !isset($this->rawheaders['Host'])) {
  688. $headers .= "Host: ".$this->host;
  689. if(!empty($this->port))
  690. $headers .= ":".$this->port;
  691. $headers .= "\r\n";
  692. }
  693. if(!empty($this->accept))
  694. $headers .= "Accept: ".$this->accept."\r\n";
  695. if(!empty($this->referer))
  696. $headers .= "Referer: ".$this->referer."\r\n";
  697. if(!empty($this->cookies))
  698. {
  699. if(!is_array($this->cookies))
  700. $this->cookies = (array)$this->cookies;
  701. reset($this->cookies);
  702. if ( count($this->cookies) > 0 ) {
  703. $cookie_headers .= 'Cookie: ';
  704. foreach ( $this->cookies as $cookieKey => $cookieVal ) {
  705. $cookie_headers .= $cookieKey."=".urlencode($cookieVal)."; ";
  706. }
  707. $headers .= substr($cookie_headers,0,-2) . "\r\n";
  708. }
  709. }
  710. if(!empty($this->rawheaders))
  711. {
  712. if(!is_array($this->rawheaders))
  713. $this->rawheaders = (array)$this->rawheaders;
  714. while(list($headerKey,$headerVal) = each($this->rawheaders))
  715. $headers .= $headerKey.": ".$headerVal."\r\n";
  716. }
  717. if(!empty($content_type)) {
  718. $headers .= "Content-type: $content_type";
  719. if ($content_type == "multipart/form-data")
  720. $headers .= "; boundary=".$this->_mime_boundary;
  721. $headers .= "\r\n";
  722. }
  723. if(!empty($body))
  724. $headers .= "Content-length: ".strlen($body)."\r\n";
  725. if(!empty($this->user) || !empty($this->pass))
  726. $headers .= "Authorization: Basic ".base64_encode($this->user.":".$this->pass)."\r\n";
  727. //add proxy auth headers
  728. if(!empty($this->proxy_user))
  729. $headers .= 'Proxy-Authorization: ' . 'Basic ' . base64_encode($this->proxy_user . ':' . $this->proxy_pass)."\r\n";
  730. $headers .= "\r\n";
  731. // set the read timeout if needed
  732. if ($this->read_timeout > 0)
  733. socket_set_timeout($fp, $this->read_timeout);
  734. $this->timed_out = false;
  735. fwrite($fp,$headers.$body,strlen($headers.$body));
  736. $this->_redirectaddr = false;
  737. unset($this->headers);
  738. while($currentHeader = fgets($fp,$this->_maxlinelen))
  739. {
  740. if ($this->read_timeout > 0 && $this->_check_timeout($fp))
  741. {
  742. $this->status=-100;
  743. return false;
  744. }
  745. if($currentHeader == "\r\n")
  746. break;
  747. // if a header begins with Location: or URI:, set the redirect
  748. if(preg_match("/^(Location:|URI:)/i",$currentHeader))
  749. {
  750. // get URL portion of the redirect
  751. preg_match("/^(Location:|URI:)[ ]+(.*)/i",chop($currentHeader),$matches);
  752. // look for :// in the Location header to see if hostname is included
  753. if(!preg_match("|\:\/\/|",$matches[2]))
  754. {
  755. // no host in the path, so prepend
  756. $this->_redirectaddr = $URI_PARTS["scheme"]."://".$this->host.":".$this->port;
  757. // eliminate double slash
  758. if(!preg_match("|^/|",$matches[2]))
  759. $this->_redirectaddr .= "/".$matches[2];
  760. else
  761. $this->_redirectaddr .= $matches[2];
  762. }
  763. else
  764. $this->_redirectaddr = $matches[2];
  765. }
  766. if(preg_match("|^HTTP/|",$currentHeader))
  767. {
  768. if(preg_match("|^HTTP/[^\s]*\s(.*?)\s|",$currentHeader, $status))
  769. {
  770. $this->status= $status[1];
  771. }
  772. $this->response_code = $currentHeader;
  773. }
  774. $this->headers[] = $currentHeader;
  775. }
  776. $results = '';
  777. do {
  778. $_data = fread($fp, $this->maxlength);
  779. if (strlen($_data) == 0) {
  780. break;
  781. }
  782. $results .= $_data;
  783. } while(true);
  784. if ($this->read_timeout > 0 && $this->_check_timeout($fp))
  785. {
  786. $this->status=-100;
  787. return false;
  788. }
  789. // check if there is a a redirect meta tag
  790. if(preg_match("'<meta[\s]*http-equiv[^>]*?content[\s]*=[\s]*[\"\']?\d+;[\s]*URL[\s]*=[\s]*([^\"\']*?)[\"\']?>'i",$results,$match))
  791. {
  792. $this->_redirectaddr = $this->_expandlinks($match[1],$URI);
  793. }
  794. // have we hit our frame depth and is there frame src to fetch?
  795. if(($this->_framedepth < $this->maxframes) && preg_match_all("'<frame\s+.*src[\s]*=[\'\"]?([^\'\"\>]+)'i",$results,$match))
  796. {
  797. $this->results[] = $results;
  798. for($x=0; $x<count($match[1]); $x++)
  799. $this->_frameurls[] = $this->_expandlinks($match[1][$x],$URI_PARTS["scheme"]."://".$this->host);
  800. }
  801. // have we already fetched framed content?
  802. elseif(is_array($this->results))
  803. $this->results[] = $results;
  804. // no framed content
  805. else
  806. $this->results = $results;
  807. return true;
  808. }
  809. /*======================================================================*\
  810. Function: _httpsrequest
  811. Purpose: go get the https data from the server using curl
  812. Input: $url the url to fetch
  813. $URI the full URI
  814. $body body contents to send if any (POST)
  815. Output:
  816. \*======================================================================*/
  817. function _httpsrequest($url,$URI,$http_method,$content_type="",$body="")
  818. {
  819. if($this->passcookies && $this->_redirectaddr)
  820. $this->setcookies();
  821. $headers = array();
  822. $URI_PARTS = parse_url($URI);
  823. if(empty($url))
  824. $url = "/";
  825. // GET ... header not needed for curl
  826. //$headers[] = $http_method." ".$url." ".$this->_httpversion;
  827. if(!empty($this->agent))
  828. $headers[] = "User-Agent: ".$this->agent;
  829. if(!empty($this->host))
  830. if(!empty($this->port))
  831. $headers[] = "Host: ".$this->host.":".$this->port;
  832. else
  833. $headers[] = "Host: ".$this->host;
  834. if(!empty($this->accept))
  835. $headers[] = "Accept: ".$this->accept;
  836. if(!empty($this->referer))
  837. $headers[] = "Referer: ".$this->referer;
  838. if(!empty($this->cookies))
  839. {
  840. if(!is_array($this->cookies))
  841. $this->cookies = (array)$this->cookies;
  842. reset($this->cookies);
  843. if ( count($this->cookies) > 0 ) {
  844. $cookie_str = 'Cookie: ';
  845. foreach ( $this->cookies as $cookieKey => $cookieVal ) {
  846. $cookie_str .= $cookieKey."=".urlencode($cookieVal)."; ";
  847. }
  848. $headers[] = substr($cookie_str,0,-2);
  849. }
  850. }
  851. if(!empty($this->rawheaders))
  852. {
  853. if(!is_array($this->rawheaders))
  854. $this->rawheaders = (array)$this->rawheaders;
  855. while(list($headerKey,$headerVal) = each($this->rawheaders))
  856. $headers[] = $headerKey.": ".$headerVal;
  857. }
  858. if(!empty($content_type)) {
  859. if ($content_type == "multipart/form-data")
  860. $headers[] = "Content-type: $content_type; boundary=".$this->_mime_boundary;
  861. else
  862. $headers[] = "Content-type: $content_type";
  863. }
  864. if(!empty($body))
  865. $headers[] = "Content-length: ".strlen($body);
  866. if(!empty($this->user) || !empty($this->pass))
  867. $headers[] = "Authorization: BASIC ".base64_encode($this->user.":".$this->pass);
  868. for($curr_header = 0; $curr_header < count($headers); $curr_header++) {
  869. $safer_header = strtr( $headers[$curr_header], "\"", " " );
  870. $cmdline_params .= " -H \"".$safer_header."\"";
  871. }
  872. if(!empty($body))
  873. $cmdline_params .= " -d \"$body\"";
  874. if($this->read_timeout > 0)
  875. $cmdline_params .= " -m ".$this->read_timeout;
  876. $headerfile = tempnam($temp_dir, "sno");
  877. exec($this->curl_path." -k -D \"$headerfile\"".$cmdline_params." \"".escapeshellcmd($URI)."\"",$results,$return);
  878. if($return)
  879. {
  880. $this->error = "Error: cURL could not retrieve the document, error $return.";
  881. return false;
  882. }
  883. $results = implode("\r\n",$results);
  884. $result_headers = file("$headerfile");
  885. $this->_redirectaddr = false;
  886. unset($this->headers);
  887. for($currentHeader = 0; $currentHeader < count($result_headers); $currentHeader++)
  888. {
  889. // if a header begins with Location: or URI:, set the redirect
  890. if(preg_match("/^(Location: |URI: )/i",$result_headers[$currentHeader]))
  891. {
  892. // get URL portion of the redirect
  893. preg_match("/^(Location: |URI:)\s+(.*)/",chop($result_headers[$currentHeader]),$matches);
  894. // look for :// in the Location header to see if hostname is included
  895. if(!preg_match("|\:\/\/|",$matches[2]))
  896. {
  897. // no host in the path, so prepend
  898. $this->_redirectaddr = $URI_PARTS["scheme"]."://".$this->host.":".$this->port;
  899. // eliminate double slash
  900. if(!preg_match("|^/|",$matches[2]))
  901. $this->_redirectaddr .= "/".$matches[2];
  902. else
  903. $this->_redirectaddr .= $matches[2];
  904. }
  905. else
  906. $this->_redirectaddr = $matches[2];
  907. }
  908. if(preg_match("|^HTTP/|",$result_headers[$currentHeader]))
  909. $this->response_code = $result_headers[$currentHeader];
  910. $this->headers[] = $result_headers[$currentHeader];
  911. }
  912. // check if there is a a redirect meta tag
  913. if(preg_match("'<meta[\s]*http-equiv[^>]*?content[\s]*=[\s]*[\"\']?\d+;[\s]*URL[\s]*=[\s]*([^\"\']*?)[\"\']?>'i",$results,$match))
  914. {
  915. $this->_redirectaddr = $this->_expandlinks($match[1],$URI);
  916. }
  917. // have we hit our frame depth and is there frame src to fetch?
  918. if(($this->_framedepth < $this->maxframes) && preg_match_all("'<frame\s+.*src[\s]*=[\'\"]?([^\'\"\>]+)'i",$results,$match))
  919. {
  920. $this->results[] = $results;
  921. for($x=0; $x<count($match[1]); $x++)
  922. $this->_frameurls[] = $this->_expandlinks($match[1][$x],$URI_PARTS["scheme"]."://".$this->host);
  923. }
  924. // have we already fetched framed content?
  925. elseif(is_array($this->results))
  926. $this->results[] = $results;
  927. // no framed content
  928. else
  929. $this->results = $results;
  930. unlink("$headerfile");
  931. return true;
  932. }
  933. /*======================================================================*\
  934. Function: setcookies()
  935. Purpose: set cookies for a redirection
  936. \*======================================================================*/
  937. function setcookies()
  938. {
  939. for($x=0; $x<count($this->headers); $x++)
  940. {
  941. if(preg_match('/^set-cookie:[\s]+([^=]+)=([^;]+)/i', $this->headers[$x],$match))
  942. $this->cookies[$match[1]] = urldecode($match[2]);
  943. }
  944. }
  945. /*======================================================================*\
  946. Function: _check_timeout
  947. Purpose: checks whether timeout has occurred
  948. Input: $fp file pointer
  949. \*======================================================================*/
  950. function _check_timeout($fp)
  951. {
  952. if ($this->read_timeout > 0) {
  953. $fp_status = socket_get_status($fp);
  954. if ($fp_status["timed_out"]) {
  955. $this->timed_out = true;
  956. return true;
  957. }
  958. }
  959. return false;
  960. }
  961. /*======================================================================*\
  962. Function: _connect
  963. Purpose: make a socket connection
  964. Input: $fp file pointer
  965. \*======================================================================*/
  966. function _connect(&$fp)
  967. {
  968. if(!empty($this->proxy_host) && !empty($this->proxy_port))
  969. {
  970. $this->_isproxy = true;
  971. $host = $this->proxy_host;
  972. $port = $this->proxy_port;
  973. }
  974. else
  975. {
  976. $host = $this->host;
  977. $port = $this->port;
  978. }
  979. $this->status = 0;
  980. if($fp = fsockopen(
  981. $host,
  982. $port,
  983. $errno,
  984. $errstr,
  985. $this->_fp_timeout
  986. ))
  987. {
  988. // socket connection succeeded
  989. return true;
  990. }
  991. else
  992. {
  993. // socket connection failed
  994. $this->status = $errno;
  995. switch($errno)
  996. {
  997. case -3:
  998. $this->error="socket creation failed (-3)";
  999. case -4:
  1000. $this->error="dns lookup failure (-4)";
  1001. case -5:
  1002. $this->error="connection refused or timed out (-5)";
  1003. default:
  1004. $this->error="connection failed (".$errno.")";
  1005. }
  1006. return false;
  1007. }
  1008. }
  1009. /*======================================================================*\
  1010. Function: _disconnect
  1011. Purpose: disconnect a socket connection
  1012. Input: $fp file pointer
  1013. \*======================================================================*/
  1014. function _disconnect($fp)
  1015. {
  1016. return(fclose($fp));
  1017. }
  1018. /*======================================================================*\
  1019. Function: _prepare_post_body
  1020. Purpose: Prepare post body according to encoding type
  1021. Input: $formvars - form variables
  1022. $formfiles - form upload files
  1023. Output: post body
  1024. \*======================================================================*/
  1025. function _prepare_post_body($formvars, $formfiles)
  1026. {
  1027. settype($formvars, "array");
  1028. settype($formfiles, "array");
  1029. $postdata = '';
  1030. if (count($formvars) == 0 && count($formfiles) == 0)
  1031. return;
  1032. switch ($this->_submit_type) {
  1033. case "application/x-www-form-urlencoded":
  1034. reset($formvars);
  1035. while(list($key,$val) = each($formvars)) {
  1036. if (is_array($val) || is_object($val)) {
  1037. while (list($cur_key, $cur_val) = each($val)) {
  1038. $postdata .= urlencode($key)."[]=".urlencode($cur_val)."&";
  1039. }
  1040. } else
  1041. $postdata .= urlencode($key)."=".urlencode($val)."&";
  1042. }
  1043. break;
  1044. case "multipart/form-data":
  1045. $this->_mime_boundary = "Snoopy".md5(uniqid(microtime()));
  1046. reset($formvars);
  1047. while(list($key,$val) = each($formvars)) {
  1048. if (is_array($val) || is_object($val)) {
  1049. while (list($cur_key, $cur_val) = each($val)) {
  1050. $postdata .= "--".$this->_mime_boundary."\r\n";
  1051. $postdata .= "Content-Disposition: form-data; name=\"$key\[\]\"\r\n\r\n";
  1052. $postdata .= "$cur_val\r\n";
  1053. }
  1054. } else {
  1055. $postdata .= "--".$this->_mime_boundary."\r\n";
  1056. $postdata .= "Content-Disposition: form-data; name=\"$key\"\r\n\r\n";
  1057. $postdata .= "$val\r\n";
  1058. }
  1059. }
  1060. reset($formfiles);
  1061. while (list($field_name, $file_names) = each($formfiles)) {
  1062. settype($file_names, "array");
  1063. while (list(, $file_name) = each($file_names)) {
  1064. if (!is_readable($file_name)) continue;
  1065. $fp = fopen($file_name, "r");
  1066. $file_content = fread($fp, filesize($file_name));
  1067. fclose($fp);
  1068. $base_name = basename($file_name);
  1069. $postdata .= "--".$this->_mime_boundary."\r\n";
  1070. $postdata .= "Content-Disposition: form-data; name=\"$field_name\"; filename=\"$base_name\"\r\n\r\n";
  1071. $postdata .= "$file_content\r\n";
  1072. }
  1073. }
  1074. $postdata .= "--".$this->_mime_boundary."--\r\n";
  1075. break;
  1076. }
  1077. return $postdata;
  1078. }
  1079. }
  1080. ?>