PageRenderTime 54ms CodeModel.GetById 20ms RepoModel.GetById 0ms app.codeStats 0ms

/administrator/components/com_biblestudy/Snoopy.class.php

https://bitbucket.org/ericrlarson/com_biblestudy
PHP | 1297 lines | 1029 code | 80 blank | 188 comment | 77 complexity | f12d0afe87478c99b07b6a867bdbb1ed MD5 | raw file
Possible License(s): LGPL-2.1, Apache-2.0, BSD-3-Clause
  1. <?php
  2. /*************************************************
  3. * $Id: Snoopy.class.php 361 2006-08-07 09:30:31Z beat $
  4. Snoopy - the PHP net client
  5. Author: Monte Ohrt <monte@ispi.net>
  6. Copyright (c): 1999-2000 ispi, all rights reserved
  7. Version: 1.2.3 + 2 fixes marked "//BB" (bugs 1482144 and 1192125 on sourceforge)
  8. * This library is free software; you can redistribute it and/or
  9. * modify it under the terms of the GNU Lesser General Public
  10. * License as published by the Free Software Foundation; either
  11. * version 2.1 of the License, or (at your option) any later version.
  12. *
  13. * This library is distributed in the hope that it will be useful,
  14. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  15. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  16. * Lesser General Public License for more details.
  17. *
  18. * You should have received a copy of the GNU Lesser General Public
  19. * License along with this library; if not, write to the Free Software
  20. * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
  21. You may contact the author of Snoopy by e-mail at:
  22. monte@ispi.net
  23. Or, write to:
  24. Monte Ohrt
  25. CTO, ispi
  26. 237 S. 70th suite 220
  27. Lincoln, NE 68510
  28. The latest version of Snoopy can be obtained from:
  29. http://snoopy.sourceforge.net/
  30. *************************************************/
  31. class Snoopy
  32. {
  33. /**** Public variables ****/
  34. /* user definable vars */
  35. var $host = "www.php.net"; // host name we are connecting to
  36. var $port = 80; // port we are connecting to
  37. var $proxy_host = ""; // proxy host to use
  38. var $proxy_port = ""; // proxy port to use
  39. var $proxy_user = ""; // proxy user to use
  40. var $proxy_pass = ""; // proxy password to use
  41. var $agent = "Snoopy v1.2.3"; // agent we masquerade as
  42. var $referer = ""; // referer info to pass
  43. var $cookies = array(); // array of cookies to pass
  44. // $cookies["username"]="joe";
  45. var $rawheaders = array(); // array of raw headers to send
  46. // $rawheaders["Content-type"]="text/html";
  47. var $maxredirs = 5; // http redirection depth maximum. 0 = disallow
  48. var $lastredirectaddr = ""; // contains address of last redirected address
  49. var $offsiteok = true; // allows redirection off-site
  50. var $maxframes = 0; // frame content depth maximum. 0 = disallow
  51. var $expandlinks = true; // expand links to fully qualified URLs.
  52. // this only applies to fetchlinks()
  53. // submitlinks(), and submittext()
  54. var $passcookies = true; // pass set cookies back through redirects
  55. // NOTE: this currently does not respect
  56. // dates, domains or paths.
  57. var $user = ""; // user for http authentication
  58. var $pass = ""; // password for http authentication
  59. // http accept types
  60. var $accept = "image/gif, image/x-xbitmap, image/jpeg, image/pjpeg, */*";
  61. var $results = ""; // where the content is put
  62. var $error = ""; // error messages sent here
  63. var $response_code = ""; // response code returned from server
  64. var $headers = array(); // headers returned from server sent here
  65. var $maxlength = 500000; // max return data length (body)
  66. var $read_timeout = 0; // timeout on read operations, in seconds
  67. // supported only since PHP 4 Beta 4
  68. // set to 0 to disallow timeouts
  69. var $timed_out = false; // if a read operation timed out
  70. var $status = 0; // http request status
  71. var $temp_dir = "/tmp"; // temporary directory that the webserver
  72. // has permission to write to.
  73. // under Windows, this should be C:\temp
  74. var $curl_path = "/usr/local/bin/curl";
  75. // Snoopy will use cURL for fetching
  76. // SSL content if a full system path to
  77. // the cURL binary is supplied here.
  78. // set to false if you do not have
  79. // cURL installed. See http://curl.haxx.se
  80. // for details on installing cURL.
  81. // Snoopy does *not* use the cURL
  82. // library functions built into php,
  83. // as these functions are not stable
  84. // as of this Snoopy release.
  85. /**** Private variables ****/
  86. var $_maxlinelen = 4096; // max line length (headers)
  87. var $_httpmethod = "GET"; // default http request method
  88. var $_httpversion = "HTTP/1.0"; // default http request version
  89. var $_submit_method = "POST"; // default submit method
  90. var $_submit_type = "application/x-www-form-urlencoded"; // default submit type
  91. var $_mime_boundary = ""; // MIME boundary for multipart/form-data submit type
  92. var $_redirectaddr = false; // will be set if page fetched is a redirect
  93. var $_redirectdepth = 0; // increments on an http redirect
  94. var $_frameurls = array(); // frame src urls
  95. var $_framedepth = 0; // increments on frame depth
  96. var $_isproxy = false; // set if using a proxy server
  97. var $_fp_timeout = 30; // timeout for socket connection
  98. /*======================================================================*\
  99. Function: fetch
  100. Purpose: fetch the contents of a web page
  101. (and possibly other protocols in the
  102. future like ftp, nntp, gopher, etc.)
  103. Input: $URI the location of the page to fetch
  104. Output: $this->results the output text from the fetch
  105. \*======================================================================*/
  106. function fetch($URI)
  107. {
  108. //preg_match("|^([^:]+)://([^:/]+)(:[\d]+)*(.*)|",$URI,$URI_PARTS);
  109. $URI_PARTS = parse_url($URI);
  110. if (!empty($URI_PARTS["user"]))
  111. $this->user = $URI_PARTS["user"];
  112. if (!empty($URI_PARTS["pass"]))
  113. $this->pass = $URI_PARTS["pass"];
  114. if (empty($URI_PARTS["query"]))
  115. $URI_PARTS["query"] = '';
  116. if (empty($URI_PARTS["path"]))
  117. $URI_PARTS["path"] = '';
  118. switch(strtolower($URI_PARTS["scheme"]))
  119. {
  120. case "http":
  121. $this->host = $URI_PARTS["host"];
  122. if(!empty($URI_PARTS["port"]))
  123. $this->port = $URI_PARTS["port"];
  124. $fp = null;
  125. if($this->_connect($fp))
  126. {
  127. if($this->_isproxy)
  128. {
  129. // using proxy, send entire URI
  130. $this->_httprequest($URI,$fp,$URI,$this->_httpmethod);
  131. }
  132. else
  133. {
  134. $path = $URI_PARTS["path"].($URI_PARTS["query"] ? "?".$URI_PARTS["query"] : "");
  135. // no proxy, send only the path
  136. $this->_httprequest($path, $fp, $URI, $this->_httpmethod);
  137. }
  138. $this->_disconnect($fp);
  139. if($this->_redirectaddr)
  140. {
  141. /* url was redirected, check if we've hit the max depth */
  142. if($this->maxredirs > $this->_redirectdepth)
  143. {
  144. // only follow redirect if it's on this site, or offsiteok is true
  145. if(preg_match("|^http://".preg_quote($this->host)."|i",$this->_redirectaddr) || $this->offsiteok)
  146. {
  147. /* follow the redirect */
  148. ++$this->_redirectdepth;
  149. $this->lastredirectaddr=$this->_redirectaddr;
  150. $this->fetch($this->_redirectaddr);
  151. }
  152. }
  153. }
  154. if($this->_framedepth < $this->maxframes && count($this->_frameurls) > 0)
  155. {
  156. $frameurls = $this->_frameurls;
  157. $this->_frameurls = array();
  158. while(list(,$frameurl) = each($frameurls))
  159. {
  160. if($this->_framedepth < $this->maxframes)
  161. {
  162. $this->fetch($frameurl);
  163. ++$this->_framedepth;
  164. }
  165. else
  166. break;
  167. }
  168. }
  169. }
  170. else
  171. {
  172. return false;
  173. }
  174. return true;
  175. break;
  176. case "https":
  177. if(!$this->curl_path) {
  178. return false;
  179. }
  180. if(function_exists("is_executable")) {
  181. if (!is_executable($this->curl_path)) {
  182. return false;
  183. }
  184. }
  185. $this->host = $URI_PARTS["host"];
  186. if(!empty($URI_PARTS["port"]))
  187. $this->port = $URI_PARTS["port"];
  188. if($this->_isproxy)
  189. {
  190. // using proxy, send entire URI
  191. $this->_httpsrequest($URI,$URI,$this->_httpmethod);
  192. }
  193. else
  194. {
  195. $path = $URI_PARTS["path"].($URI_PARTS["query"] ? "?".$URI_PARTS["query"] : "");
  196. // no proxy, send only the path
  197. $this->_httpsrequest($path, $URI, $this->_httpmethod);
  198. }
  199. if($this->_redirectaddr)
  200. {
  201. /* url was redirected, check if we've hit the max depth */
  202. if($this->maxredirs > $this->_redirectdepth)
  203. {
  204. // only follow redirect if it's on this site, or offsiteok is true
  205. if(preg_match("|^http://".preg_quote($this->host)."|i",$this->_redirectaddr) || $this->offsiteok)
  206. {
  207. /* follow the redirect */
  208. ++$this->_redirectdepth;
  209. $this->lastredirectaddr=$this->_redirectaddr;
  210. $this->fetch($this->_redirectaddr);
  211. }
  212. }
  213. }
  214. if($this->_framedepth < $this->maxframes && count($this->_frameurls) > 0)
  215. {
  216. $frameurls = $this->_frameurls;
  217. $this->_frameurls = array();
  218. while(list(,$frameurl) = each($frameurls))
  219. {
  220. if($this->_framedepth < $this->maxframes)
  221. {
  222. $this->fetch($frameurl);
  223. ++$this->_framedepth;
  224. }
  225. else
  226. break;
  227. }
  228. }
  229. return true;
  230. break;
  231. default:
  232. // not a valid protocol
  233. $this->error = 'Invalid protocol "'.$URI_PARTS["scheme"].'"\n';
  234. return false;
  235. break;
  236. }
  237. }
  238. /*======================================================================*\
  239. Function: submit
  240. Purpose: submit an http form
  241. Input: $URI the location to post the data
  242. $formvars the formvars to use.
  243. format: $formvars["var"] = "val";
  244. $formfiles an array of files to submit
  245. format: $formfiles["var"] = "/dir/filename.ext";
  246. Output: $this->results the text output from the post
  247. \*======================================================================*/
  248. function submit($URI, $formvars="", $formfiles="")
  249. {
  250. $postdata = $this->_prepare_post_body($formvars, $formfiles);
  251. $URI_PARTS = parse_url($URI);
  252. if (!empty($URI_PARTS["user"]))
  253. $this->user = $URI_PARTS["user"];
  254. if (!empty($URI_PARTS["pass"]))
  255. $this->pass = $URI_PARTS["pass"];
  256. if (empty($URI_PARTS["query"]))
  257. $URI_PARTS["query"] = '';
  258. if (empty($URI_PARTS["path"]))
  259. $URI_PARTS["path"] = '';
  260. switch(strtolower($URI_PARTS["scheme"]))
  261. {
  262. case "http":
  263. $this->host = $URI_PARTS["host"];
  264. if(!empty($URI_PARTS["port"]))
  265. $this->port = $URI_PARTS["port"];
  266. $fp = null;
  267. if($this->_connect($fp))
  268. {
  269. if($this->_isproxy)
  270. {
  271. // using proxy, send entire URI
  272. $this->_httprequest($URI,$fp,$URI,$this->_submit_method,$this->_submit_type,$postdata);
  273. }
  274. else
  275. {
  276. $path = $URI_PARTS["path"].($URI_PARTS["query"] ? "?".$URI_PARTS["query"] : "");
  277. // no proxy, send only the path
  278. $this->_httprequest($path, $fp, $URI, $this->_submit_method, $this->_submit_type, $postdata);
  279. }
  280. $this->_disconnect($fp);
  281. if($this->_redirectaddr)
  282. {
  283. /* url was redirected, check if we've hit the max depth */
  284. if($this->maxredirs > $this->_redirectdepth)
  285. {
  286. if(!preg_match("|^".$URI_PARTS["scheme"]."://|", $this->_redirectaddr))
  287. $this->_redirectaddr = $this->_expandlinks($this->_redirectaddr,$URI_PARTS["scheme"]."://".$URI_PARTS["host"]);
  288. // only follow redirect if it's on this site, or offsiteok is true
  289. if(preg_match("|^http://".preg_quote($this->host)."|i",$this->_redirectaddr) || $this->offsiteok)
  290. {
  291. /* follow the redirect */
  292. ++$this->_redirectdepth;
  293. $this->lastredirectaddr=$this->_redirectaddr;
  294. if( strpos( $this->_redirectaddr, "?" ) > 0 )
  295. $this->fetch($this->_redirectaddr); // the redirect has changed the request method from post to get
  296. else
  297. $this->submit($this->_redirectaddr,$formvars, $formfiles);
  298. }
  299. }
  300. }
  301. if($this->_framedepth < $this->maxframes && count($this->_frameurls) > 0)
  302. {
  303. $frameurls = $this->_frameurls;
  304. $this->_frameurls = array();
  305. while(list(,$frameurl) = each($frameurls))
  306. {
  307. if($this->_framedepth < $this->maxframes)
  308. {
  309. $this->fetch($frameurl);
  310. ++$this->_framedepth;
  311. }
  312. else
  313. break;
  314. }
  315. }
  316. }
  317. else
  318. {
  319. return false;
  320. }
  321. return true;
  322. break;
  323. case "https":
  324. if(!$this->curl_path)
  325. return false;
  326. if(function_exists("is_executable")) {
  327. if (!is_executable($this->curl_path)) {
  328. return false;
  329. }
  330. }
  331. $this->host = $URI_PARTS["host"];
  332. if(!empty($URI_PARTS["port"]))
  333. $this->port = $URI_PARTS["port"];
  334. if($this->_isproxy)
  335. {
  336. // using proxy, send entire URI
  337. $this->_httpsrequest($URI, $URI, $this->_submit_method, $this->_submit_type, $postdata);
  338. }
  339. else
  340. {
  341. $path = $URI_PARTS["path"].($URI_PARTS["query"] ? "?".$URI_PARTS["query"] : "");
  342. // no proxy, send only the path
  343. $this->_httpsrequest($path, $URI, $this->_submit_method, $this->_submit_type, $postdata);
  344. }
  345. if($this->_redirectaddr)
  346. {
  347. /* url was redirected, check if we've hit the max depth */
  348. if($this->maxredirs > $this->_redirectdepth)
  349. {
  350. if(!preg_match("|^".$URI_PARTS["scheme"]."://|", $this->_redirectaddr))
  351. $this->_redirectaddr = $this->_expandlinks($this->_redirectaddr,$URI_PARTS["scheme"]."://".$URI_PARTS["host"]);
  352. // only follow redirect if it's on this site, or offsiteok is true
  353. if(preg_match("|^http://".preg_quote($this->host)."|i",$this->_redirectaddr) || $this->offsiteok)
  354. {
  355. /* follow the redirect */
  356. ++$this->_redirectdepth;
  357. $this->lastredirectaddr=$this->_redirectaddr;
  358. if( strpos( $this->_redirectaddr, "?" ) > 0 )
  359. $this->fetch($this->_redirectaddr); // the redirect has changed the request method from post to get
  360. else
  361. $this->submit($this->_redirectaddr,$formvars, $formfiles);
  362. }
  363. }
  364. }
  365. if($this->_framedepth < $this->maxframes && count($this->_frameurls) > 0)
  366. {
  367. $frameurls = $this->_frameurls;
  368. $this->_frameurls = array();
  369. while(list(,$frameurl) = each($frameurls))
  370. {
  371. if($this->_framedepth < $this->maxframes)
  372. {
  373. $this->fetch($frameurl);
  374. ++$this->_framedepth;
  375. }
  376. else
  377. break;
  378. }
  379. }
  380. return true;
  381. break;
  382. default:
  383. // not a valid protocol
  384. $this->error = 'Invalid protocol "'.$URI_PARTS["scheme"].'"\n';
  385. return false;
  386. break;
  387. }
  388. }
  389. /*======================================================================*\
  390. Function: fetchlinks
  391. Purpose: fetch the links from a web page
  392. Input: $URI where you are fetching from
  393. Output: $this->results an array of the URLs
  394. \*======================================================================*/
  395. function fetchlinks($URI)
  396. {
  397. if ($this->fetch($URI))
  398. {
  399. if($this->lastredirectaddr)
  400. $URI = $this->lastredirectaddr;
  401. if(is_array($this->results))
  402. {
  403. for($x=0;$x<count($this->results);$x++)
  404. $this->results[$x] = $this->_striplinks($this->results[$x]);
  405. }
  406. else
  407. $this->results = $this->_striplinks($this->results);
  408. if($this->expandlinks)
  409. $this->results = $this->_expandlinks($this->results, $URI);
  410. return true;
  411. }
  412. else
  413. return false;
  414. }
  415. /*======================================================================*\
  416. Function: fetchform
  417. Purpose: fetch the form elements from a web page
  418. Input: $URI where you are fetching from
  419. Output: $this->results the resulting html form
  420. \*======================================================================*/
  421. function fetchform($URI)
  422. {
  423. if ($this->fetch($URI))
  424. {
  425. if(is_array($this->results))
  426. {
  427. for($x=0;$x<count($this->results);$x++)
  428. $this->results[$x] = $this->_stripform($this->results[$x]);
  429. }
  430. else
  431. $this->results = $this->_stripform($this->results);
  432. return true;
  433. }
  434. else
  435. return false;
  436. }
  437. /*======================================================================*\
  438. Function: fetchtext
  439. Purpose: fetch the text from a web page, stripping the links
  440. Input: $URI where you are fetching from
  441. Output: $this->results the text from the web page
  442. \*======================================================================*/
  443. function fetchtext($URI)
  444. {
  445. if($this->fetch($URI))
  446. {
  447. if(is_array($this->results))
  448. {
  449. for($x=0;$x<count($this->results);$x++)
  450. $this->results[$x] = $this->_striptext($this->results[$x]);
  451. }
  452. else
  453. $this->results = $this->_striptext($this->results);
  454. return true;
  455. }
  456. else
  457. return false;
  458. }
  459. /*======================================================================*\
  460. Function: submitlinks
  461. Purpose: grab links from a form submission
  462. Input: $URI where you are submitting from
  463. Output: $this->results an array of the links from the post
  464. \*======================================================================*/
  465. function submitlinks($URI, $formvars="", $formfiles="")
  466. {
  467. if($this->submit($URI,$formvars, $formfiles))
  468. {
  469. if($this->lastredirectaddr)
  470. $URI = $this->lastredirectaddr;
  471. if(is_array($this->results))
  472. {
  473. for($x=0;$x<count($this->results);$x++)
  474. {
  475. $this->results[$x] = $this->_striplinks($this->results[$x]);
  476. if($this->expandlinks)
  477. $this->results[$x] = $this->_expandlinks($this->results[$x],$URI);
  478. }
  479. }
  480. else
  481. {
  482. $this->results = $this->_striplinks($this->results);
  483. if($this->expandlinks)
  484. $this->results = $this->_expandlinks($this->results,$URI);
  485. }
  486. return true;
  487. }
  488. else
  489. return false;
  490. }
  491. /*======================================================================*\
  492. Function: submittext
  493. Purpose: grab text from a form submission
  494. Input: $URI where you are submitting from
  495. Output: $this->results the text from the web page
  496. \*======================================================================*/
  497. function submittext($URI, $formvars = "", $formfiles = "")
  498. {
  499. if($this->submit($URI,$formvars, $formfiles))
  500. {
  501. if($this->lastredirectaddr)
  502. $URI = $this->lastredirectaddr;
  503. if(is_array($this->results))
  504. {
  505. for($x=0;$x<count($this->results);$x++)
  506. {
  507. $this->results[$x] = $this->_striptext($this->results[$x]);
  508. if($this->expandlinks)
  509. $this->results[$x] = $this->_expandlinks($this->results[$x],$URI);
  510. }
  511. }
  512. else
  513. {
  514. $this->results = $this->_striptext($this->results);
  515. if($this->expandlinks)
  516. $this->results = $this->_expandlinks($this->results,$URI);
  517. }
  518. return true;
  519. }
  520. else
  521. return false;
  522. }
  523. /*======================================================================*\
  524. Function: set_submit_multipart
  525. Purpose: Set the form submission content type to
  526. multipart/form-data
  527. \*======================================================================*/
  528. function set_submit_multipart()
  529. {
  530. $this->_submit_type = "multipart/form-data";
  531. }
  532. /*======================================================================*\
  533. Function: set_submit_normal
  534. Purpose: Set the form submission content type to
  535. application/x-www-form-urlencoded
  536. \*======================================================================*/
  537. function set_submit_normal()
  538. {
  539. $this->_submit_type = "application/x-www-form-urlencoded";
  540. }
  541. /*======================================================================*\
  542. Function: set_submit_xml
  543. Purpose: Set the form submission content type to
  544. text/xml
  545. \*======================================================================*/
  546. function set_submit_xml() //BB: function added.
  547. {
  548. $this->_submit_type = "text/xml";
  549. }
  550. /*======================================================================*\
  551. Private functions
  552. \*======================================================================*/
  553. /*======================================================================*\
  554. Function: _striplinks
  555. Purpose: strip the hyperlinks from an html document
  556. Input: $document document to strip.
  557. Output: $match an array of the links
  558. \*======================================================================*/
  559. function _striplinks($document)
  560. {
  561. $links = null;
  562. preg_match_all("'<\\s*a\\s.*?href\\s*=\\s* # find <a href=
  563. ([\"\\'])? # find single or double quote
  564. (?(1) (.*?)\\1 | ([^\\s\\>]+)) # if quote found, match up to next matching
  565. # quote, otherwise match up to next space
  566. 'isx",$document,$links);
  567. // catenate the non-empty matches from the conditional subpattern
  568. while(list($key,$val) = each($links[2]))
  569. {
  570. if(!empty($val))
  571. $match[] = $val;
  572. }
  573. while(list($key,$val) = each($links[3]))
  574. {
  575. if(!empty($val))
  576. $match[] = $val;
  577. }
  578. // return the links
  579. return $match;
  580. }
  581. /*======================================================================*\
  582. Function: _stripform
  583. Purpose: strip the form elements from an html document
  584. Input: $document document to strip.
  585. Output: $match an array of the links
  586. \*======================================================================*/
  587. function _stripform($document)
  588. {
  589. $elements = null;
  590. preg_match_all("'<\\/?(FORM|INPUT|SELECT|TEXTAREA|(OPTION))[^<>]*>(?(2)(.*(?=<\\/?(option|select)[^<>]*>[\r\n]*)|(?=[\r\n]*))|(?=[\r\n]*))'Usi",$document,$elements);
  591. // catenate the matches
  592. $match = implode("\r\n",$elements[0]);
  593. // return the links
  594. return $match;
  595. }
  596. /*======================================================================*\
  597. Function: _striptext
  598. Purpose: strip the text from an html document
  599. Input: $document document to strip.
  600. Output: $text the resulting text
  601. \*======================================================================*/
  602. function _striptext($document)
  603. {
  604. // I didn't use preg eval (//e) since that is only available in PHP 4.0.
  605. // so, list your entities one by one here. I included some of the
  606. // more common ones.
  607. $search = array("'<script[^>]*?>.*?</script>'si", // strip out javascript
  608. "'<[\\/\\!]*?[^<>]*?>'si", // strip out html tags
  609. "'([\r\n])[\\s]+'", // strip out white space
  610. "'&(quot|#34|#034|#x22);'i", // replace html entities
  611. "'&(amp|#38|#038|#x26);'i", // added hexadecimal values
  612. "'&(lt|#60|#060|#x3c);'i",
  613. "'&(gt|#62|#062|#x3e);'i",
  614. "'&(nbsp|#160|#xa0);'i",
  615. "'&(iexcl|#161);'i",
  616. "'&(cent|#162);'i",
  617. "'&(pound|#163);'i",
  618. "'&(copy|#169);'i",
  619. "'&(reg|#174);'i",
  620. "'&(deg|#176);'i",
  621. "'&(#39|#039|#x27);'",
  622. "'&(euro|#8364);'i", // europe
  623. "'&a(uml|UML);'", // german
  624. "'&o(uml|UML);'",
  625. "'&u(uml|UML);'",
  626. "'&A(uml|UML);'",
  627. "'&O(uml|UML);'",
  628. "'&U(uml|UML);'",
  629. "'&szlig;'i",
  630. );
  631. $replace = array( "",
  632. "",
  633. "\\1",
  634. "\"",
  635. "&",
  636. "<",
  637. ">",
  638. " ",
  639. chr(161),
  640. chr(162),
  641. chr(163),
  642. chr(169),
  643. chr(174),
  644. chr(176),
  645. chr(39),
  646. chr(128),
  647. "ä",
  648. "ö",
  649. "ü",
  650. "Ä",
  651. "Ö",
  652. "Ü",
  653. "ß",
  654. );
  655. $text = preg_replace($search,$replace,$document);
  656. return $text;
  657. }
  658. /*======================================================================*\
  659. Function: _expandlinks
  660. Purpose: expand each link into a fully qualified URL
  661. Input: $links the links to qualify
  662. $URI the full URI to get the base from
  663. Output: $expandedLinks the expanded links
  664. \*======================================================================*/
  665. function _expandlinks($links,$URI)
  666. {
  667. $match = null;
  668. preg_match("/^[^\\?]+/",$URI,$match);
  669. $match = preg_replace("|/[^\\/\\.]+\\.[^\\/\\.]+$|","",$match[0]);
  670. $match = preg_replace("|/$|","",$match);
  671. $match_part = parse_url($match);
  672. $match_root =
  673. $match_part["scheme"]."://".$match_part["host"];
  674. $search = array( "|^http://".preg_quote($this->host)."|i",
  675. "|^(\\/)|i",
  676. "|^(?!http://)(?!mailto:)|i",
  677. "|/\\./|",
  678. "|/[^\\/]+/\\.\\./|"
  679. );
  680. $replace = array( "",
  681. $match_root."/",
  682. $match."/",
  683. "/",
  684. "/"
  685. );
  686. $expandedLinks = preg_replace($search,$replace,$links);
  687. return $expandedLinks;
  688. }
  689. /*======================================================================*\
  690. Function: _httprequest
  691. Purpose: go get the http data from the server
  692. Input: $url the url to fetch
  693. $fp the current open file pointer
  694. $URI the full URI
  695. $body body contents to send if any (POST)
  696. Output:
  697. \*======================================================================*/
  698. function _httprequest($url,$fp,$URI,$http_method,$content_type="",$body="")
  699. {
  700. $cookie_headers = '';
  701. if($this->passcookies && $this->_redirectaddr)
  702. $this->setcookies();
  703. $URI_PARTS = parse_url($URI);
  704. if(empty($url))
  705. $url = "/";
  706. $headers = $http_method." ".$url." ".$this->_httpversion."\r\n";
  707. if(!empty($this->agent))
  708. $headers .= "User-Agent: ".$this->agent."\r\n";
  709. if(!empty($this->host) && !isset($this->rawheaders['Host'])) {
  710. $headers .= "Host: ".$this->host;
  711. if(!empty($this->port))
  712. $headers .= ":".$this->port;
  713. $headers .= "\r\n";
  714. }
  715. if(!empty($this->accept))
  716. $headers .= "Accept: ".$this->accept."\r\n";
  717. if(!empty($this->referer))
  718. $headers .= "Referer: ".$this->referer."\r\n";
  719. if(!empty($this->cookies))
  720. {
  721. if(!is_array($this->cookies))
  722. $this->cookies = (array)$this->cookies;
  723. reset($this->cookies);
  724. if ( count($this->cookies) > 0 ) {
  725. $cookie_headers .= 'Cookie: ';
  726. foreach ( $this->cookies as $cookieKey => $cookieVal ) {
  727. $cookie_headers .= $cookieKey."=".urlencode($cookieVal)."; ";
  728. }
  729. $headers .= substr($cookie_headers,0,-2) . "\r\n";
  730. }
  731. }
  732. if(!empty($this->rawheaders))
  733. {
  734. if(!is_array($this->rawheaders))
  735. $this->rawheaders = (array)$this->rawheaders;
  736. while(list($headerKey,$headerVal) = each($this->rawheaders))
  737. $headers .= $headerKey.": ".$headerVal."\r\n";
  738. }
  739. if(!empty($content_type)) {
  740. $headers .= "Content-type: $content_type";
  741. if ($content_type == "multipart/form-data")
  742. $headers .= "; boundary=".$this->_mime_boundary;
  743. $headers .= "\r\n";
  744. }
  745. if(!empty($body))
  746. $headers .= "Content-length: ".strlen($body)."\r\n";
  747. if(!empty($this->user) || !empty($this->pass))
  748. $headers .= "Authorization: Basic ".base64_encode($this->user.":".$this->pass)."\r\n";
  749. //add proxy auth headers
  750. if(!empty($this->proxy_user))
  751. $headers .= 'Proxy-Authorization: ' . 'Basic ' . base64_encode($this->proxy_user . ':' . $this->proxy_pass)."\r\n";
  752. $headers .= "\r\n";
  753. // set the read timeout if needed
  754. if ($this->read_timeout > 0)
  755. socket_set_timeout($fp, $this->read_timeout);
  756. $this->timed_out = false;
  757. fwrite($fp,$headers.$body,strlen($headers.$body));
  758. $this->_redirectaddr = false;
  759. unset($this->headers);
  760. while( false != ( $currentHeader = fgets($fp,$this->_maxlinelen)) )
  761. {
  762. if ($this->read_timeout > 0 && $this->_check_timeout($fp))
  763. {
  764. $this->status=-100;
  765. return false;
  766. }
  767. //BB fix according to sourceforge artf 1192125:
  768. if($currentHeader == "\r\n" || $currentHeader == "\r" || $currentHeader == "\n")
  769. break;
  770. // if a header begins with Location: or URI:, set the redirect
  771. if(preg_match("/^(Location:|URI:)/i",$currentHeader))
  772. {
  773. // get URL portion of the redirect
  774. $matches = null;
  775. preg_match("/^(Location:|URI:)[ ]+(.*)/i",chop($currentHeader),$matches);
  776. // look for :// in the Location header to see if hostname is included
  777. if(!preg_match("|\\:\\/\\/|",$matches[2]))
  778. {
  779. // no host in the path, so prepend
  780. $this->_redirectaddr = $URI_PARTS["scheme"]."://".$this->host.":".$this->port;
  781. // eliminate double slash
  782. if(!preg_match("|^/|",$matches[2]))
  783. $this->_redirectaddr .= "/".$matches[2];
  784. else
  785. $this->_redirectaddr .= $matches[2];
  786. }
  787. else
  788. $this->_redirectaddr = $matches[2];
  789. }
  790. if(preg_match("|^HTTP/|",$currentHeader))
  791. {
  792. $status = null;
  793. if(preg_match("|^HTTP/[^\\s]*\\s(.*?)\\s|",$currentHeader, $status))
  794. {
  795. $this->status= $status[1];
  796. }
  797. $this->response_code = $currentHeader;
  798. }
  799. $this->headers[] = $currentHeader;
  800. }
  801. //BB 4 lines to fix problem that on timeout of header, and not timeout of read-data, data returned includes header: Reported bug 1482144 on sourceforge:
  802. if ($currentHeader === false) {
  803. $this->status=-100;
  804. return false;
  805. }
  806. $results = '';
  807. do {
  808. $_data = fread($fp, $this->maxlength);
  809. if (strlen($_data) == 0) {
  810. break;
  811. }
  812. $results .= $_data;
  813. } while(true);
  814. if ($this->read_timeout > 0 && $this->_check_timeout($fp))
  815. {
  816. $this->status=-100;
  817. return false;
  818. }
  819. // check if there is a a redirect meta tag
  820. $match = 0;
  821. if(preg_match("'<meta[\\s]*http-equiv[^>]*?content[\\s]*=[\\s]*[\"\\']?\\d+;[\\s]*URL[\\s]*=[\\s]*([^\"\\']*?)[\"\\']?>'i",$results,$match))
  822. {
  823. $this->_redirectaddr = $this->_expandlinks($match[1],$URI);
  824. }
  825. // have we hit our frame depth and is there frame src to fetch?
  826. if(($this->_framedepth < $this->maxframes) && preg_match_all("'<frame\\s+.*src[\\s]*=[\\'\"]?([^\\'\"\\>]+)'i",$results,$match))
  827. {
  828. $this->results[] = $results;
  829. for($x=0; $x<count($match[1]); $x++)
  830. $this->_frameurls[] = $this->_expandlinks($match[1][$x],$URI_PARTS["scheme"]."://".$this->host);
  831. }
  832. // have we already fetched framed content?
  833. elseif(is_array($this->results))
  834. $this->results[] = $results;
  835. // no framed content
  836. else
  837. $this->results = $results;
  838. return true;
  839. }
  840. /*======================================================================*\
  841. Function: _httpsrequest
  842. Purpose: go get the https data from the server using curl
  843. Input: $url the url to fetch
  844. $URI the full URI
  845. $body body contents to send if any (POST)
  846. Output:
  847. \*======================================================================*/
  848. function _httpsrequest($url,$URI,$http_method,$content_type="",$body="")
  849. {
  850. if($this->passcookies && $this->_redirectaddr)
  851. $this->setcookies();
  852. $headers = array();
  853. $URI_PARTS = parse_url($URI);
  854. if(empty($url))
  855. $url = "/";
  856. // GET ... header not needed for curl
  857. //$headers[] = $http_method." ".$url." ".$this->_httpversion;
  858. if(!empty($this->agent))
  859. $headers[] = "User-Agent: ".$this->agent;
  860. if(!empty($this->host)) {
  861. if(!empty($this->port)) {
  862. $headers[] = "Host: ".$this->host.":".$this->port;
  863. } else {
  864. $headers[] = "Host: ".$this->host;
  865. }
  866. }
  867. if(!empty($this->accept))
  868. $headers[] = "Accept: ".$this->accept;
  869. if(!empty($this->referer))
  870. $headers[] = "Referer: ".$this->referer;
  871. if(!empty($this->cookies))
  872. {
  873. if(!is_array($this->cookies))
  874. $this->cookies = (array)$this->cookies;
  875. reset($this->cookies);
  876. if ( count($this->cookies) > 0 ) {
  877. $cookie_str = 'Cookie: ';
  878. foreach ( $this->cookies as $cookieKey => $cookieVal ) {
  879. $cookie_str .= $cookieKey."=".urlencode($cookieVal)."; ";
  880. }
  881. $headers[] = substr($cookie_str,0,-2);
  882. }
  883. }
  884. if(!empty($this->rawheaders))
  885. {
  886. if(!is_array($this->rawheaders))
  887. $this->rawheaders = (array)$this->rawheaders;
  888. while(list($headerKey,$headerVal) = each($this->rawheaders))
  889. $headers[] = $headerKey.": ".$headerVal;
  890. }
  891. if(!empty($content_type)) {
  892. if ($content_type == "multipart/form-data")
  893. $headers[] = "Content-type: $content_type; boundary=".$this->_mime_boundary;
  894. else
  895. $headers[] = "Content-type: $content_type";
  896. }
  897. if(!empty($body))
  898. $headers[] = "Content-length: ".strlen($body);
  899. if(!empty($this->user) || !empty($this->pass))
  900. $headers[] = "Authorization: BASIC ".base64_encode($this->user.":".$this->pass);
  901. $cmdline_params = ''; //BB added to fix
  902. for($curr_header = 0; $curr_header < count($headers); $curr_header++) {
  903. $safer_header = strtr( $headers[$curr_header], "\"", " " );
  904. $cmdline_params .= " -H \"".$safer_header."\"";
  905. }
  906. if(!empty($body))
  907. $cmdline_params .= ' -d "' . str_replace( '"', "\\\"", $body ) . '"';
  908. if($this->read_timeout > 0)
  909. $cmdline_params .= " -m ".$this->read_timeout;
  910. $headerfile = tempnam($this->temp_dir, "sno"); //BB bug corrected: was $temp_dir
  911. $safer_URI = strtr( $URI, "\"", " " ); // strip quotes from the URI to avoid shell access
  912. $results = null;
  913. $return = null;
  914. exec($this->curl_path." -D \"$headerfile\"".$cmdline_params." \"".$safer_URI."\"",$results,$return); // add -k for non-certified
  915. if($return)
  916. {
  917. $this->error = "Error: cURL could not retrieve the document, error $return.";
  918. return false;
  919. }
  920. $results = implode("\r\n",$results);
  921. $result_headers = file("$headerfile");
  922. $this->_redirectaddr = false;
  923. unset($this->headers);
  924. for($currentHeader = 0; $currentHeader < count($result_headers); $currentHeader++)
  925. {
  926. // if a header begins with Location: or URI:, set the redirect
  927. if(preg_match("/^(Location: |URI: )/i",$result_headers[$currentHeader]))
  928. {
  929. // get URL portion of the redirect
  930. $matches = null;
  931. preg_match("/^(Location: |URI:)\\s+(.*)/",chop($result_headers[$currentHeader]),$matches);
  932. // look for :// in the Location header to see if hostname is included
  933. if(!preg_match("|\\:\\/\\/|",$matches[2]))
  934. {
  935. // no host in the path, so prepend
  936. $this->_redirectaddr = $URI_PARTS["scheme"]."://".$this->host.":".$this->port;
  937. // eliminate double slash
  938. if(!preg_match("|^/|",$matches[2]))
  939. $this->_redirectaddr .= "/".$matches[2];
  940. else
  941. $this->_redirectaddr .= $matches[2];
  942. }
  943. else
  944. $this->_redirectaddr = $matches[2];
  945. }
  946. if ( preg_match("|^HTTP/|",$result_headers[$currentHeader]) ) {
  947. $status = null; //BB: added and fixed status return for https with this and next 2 code lines from http
  948. if(preg_match("|^HTTP/[^\\s]*\\s(.*?)\\s|",$result_headers[$currentHeader], $status))
  949. {
  950. $this->status= $status[1];
  951. }
  952. $this->response_code = $result_headers[$currentHeader];
  953. }
  954. $this->headers[] = $result_headers[$currentHeader];
  955. }
  956. // check if there is a a redirect meta tag
  957. $match = null;
  958. if(preg_match("'<meta[\\s]*http-equiv[^>]*?content[\\s]*=[\\s]*[\"\\']?\\d+;[\\s]*URL[\\s]*=[\\s]*([^\"\\']*?)[\"\\']?>'i",$results,$match))
  959. {
  960. $this->_redirectaddr = $this->_expandlinks($match[1],$URI);
  961. }
  962. // have we hit our frame depth and is there frame src to fetch?
  963. if(($this->_framedepth < $this->maxframes) && preg_match_all("'<frame\\s+.*src[\\s]*=[\\'\"]?([^\\'\"\\>]+)'i",$results,$match))
  964. {
  965. $this->results[] = $results;
  966. for($x=0; $x<count($match[1]); $x++)
  967. $this->_frameurls[] = $this->_expandlinks($match[1][$x],$URI_PARTS["scheme"]."://".$this->host);
  968. }
  969. // have we already fetched framed content?
  970. elseif(is_array($this->results))
  971. $this->results[] = $results;
  972. // no framed content
  973. else
  974. $this->results = $results;
  975. unlink("$headerfile");
  976. return true;
  977. }
  978. /*======================================================================*\
  979. Function: setcookies()
  980. Purpose: set cookies for a redirection
  981. \*======================================================================*/
  982. function setcookies()
  983. {
  984. for($x=0; $x<count($this->headers); $x++)
  985. {
  986. $match = null;
  987. if(preg_match('/^set-cookie:[\s]+([^=]+)=([^;]+)/i', $this->headers[$x],$match))
  988. $this->cookies[$match[1]] = urldecode($match[2]);
  989. }
  990. }
  991. /*======================================================================*\
  992. Function: _check_timeout
  993. Purpose: checks whether timeout has occurred
  994. Input: $fp file pointer
  995. \*======================================================================*/
  996. function _check_timeout($fp)
  997. {
  998. if ($this->read_timeout > 0) {
  999. $fp_status = socket_get_status($fp);
  1000. if ($fp_status["timed_out"]) {
  1001. $this->timed_out = true;
  1002. return true;
  1003. }
  1004. }
  1005. return false;
  1006. }
  1007. /*======================================================================*\
  1008. Function: _connect
  1009. Purpose: make a socket connection
  1010. Input: $fp file pointer
  1011. \*======================================================================*/
  1012. function _connect(&$fp)
  1013. {
  1014. if(!empty($this->proxy_host) && !empty($this->proxy_port))
  1015. {
  1016. $this->_isproxy = true;
  1017. $host = $this->proxy_host;
  1018. $port = $this->proxy_port;
  1019. }
  1020. else
  1021. {
  1022. $host = $this->host;
  1023. $port = $this->port;
  1024. }
  1025. $this->status = 0;
  1026. $errno = null;
  1027. $errstr = null;
  1028. if( false != ( $fp = fsockopen(
  1029. $host,
  1030. $port,
  1031. $errno,
  1032. $errstr,
  1033. $this->_fp_timeout
  1034. )) )
  1035. {
  1036. // socket connection succeeded
  1037. return true;
  1038. }
  1039. else
  1040. {
  1041. // socket connection failed
  1042. $this->status = $errno;
  1043. switch($errno)
  1044. {
  1045. case -3:
  1046. $this->error="socket creation failed (-3)";
  1047. case -4:
  1048. $this->error="dns lookup failure (-4)";
  1049. case -5:
  1050. $this->error="connection refused or timed out (-5)";
  1051. default:
  1052. $this->error="connection failed (".$errno.")";
  1053. }
  1054. return false;
  1055. }
  1056. }
  1057. /*======================================================================*\
  1058. Function: _disconnect
  1059. Purpose: disconnect a socket connection
  1060. Input: $fp file pointer
  1061. \*======================================================================*/
  1062. function _disconnect($fp)
  1063. {
  1064. return(fclose($fp));
  1065. }
  1066. /*======================================================================*\
  1067. Function: _prepare_post_body
  1068. Purpose: Prepare post body according to encoding type
  1069. Input: $formvars - form variables
  1070. $formfiles - form upload files
  1071. Output: post body
  1072. \*======================================================================*/
  1073. function _prepare_post_body($formvars, $formfiles)
  1074. {
  1075. settype($formvars, "array");
  1076. settype($formfiles, "array");
  1077. $postdata = '';
  1078. if (count($formvars) == 0 && count($formfiles) == 0)
  1079. return null;
  1080. switch ($this->_submit_type) {
  1081. case "application/x-www-form-urlencoded":
  1082. reset($formvars);
  1083. while(list($key,$val) = each($formvars)) {
  1084. if (is_array($val) || is_object($val)) {
  1085. while (list($cur_key, $cur_val) = each($val)) {
  1086. $postdata .= urlencode($key)."[]=".urlencode($cur_val)."&";
  1087. }
  1088. } else
  1089. $postdata .= urlencode($key)."=".urlencode($val)."&";
  1090. }
  1091. break;
  1092. case "multipart/form-data":
  1093. $this->_mime_boundary = "Snoopy".md5(uniqid(microtime()));
  1094. reset($formvars);
  1095. while(list($key,$val) = each($formvars)) {
  1096. if (is_array($val) || is_object($val)) {
  1097. while (list($cur_key, $cur_val) = each($val)) {
  1098. $postdata .= "--".$this->_mime_boundary."\r\n";
  1099. $postdata .= "Content-Disposition: form-data; name=\"$key\[\]\"\r\n\r\n";
  1100. $postdata .= "$cur_val\r\n";
  1101. }
  1102. } else {
  1103. $postdata .= "--".$this->_mime_boundary."\r\n";
  1104. $postdata .= "Content-Disposition: form-data; name=\"$key\"\r\n\r\n";
  1105. $postdata .= "$val\r\n";
  1106. }
  1107. }
  1108. reset($formfiles);
  1109. while (list($field_name, $file_names) = each($formfiles)) {
  1110. settype($file_names, "array");
  1111. while (list(, $file_name) = each($file_names)) {
  1112. if (!is_readable($file_name)) continue;
  1113. $fp = fopen($file_name, "r");
  1114. $file_content = fread($fp, filesize($file_name));
  1115. fclose($fp);
  1116. $base_name = basename($file_name);
  1117. $postdata .= "--".$this->_mime_boundary."\r\n";
  1118. $postdata .= "Content-Disposition: form-data; name=\"$field_name\"; filename=\"$base_name\"\r\n\r\n";
  1119. $postdata .= "$file_content\r\n";
  1120. }
  1121. }
  1122. $postdata .= "--".$this->_mime_boundary."--\r\n";
  1123. break;
  1124. case "text/xml": //BB case added
  1125. $postdata = $formvars['xml'];
  1126. break;
  1127. }
  1128. return $postdata;
  1129. }
  1130. }
  1131. ?>