PageRenderTime 4ms CodeModel.GetById 13ms app.highlight 52ms RepoModel.GetById 1ms app.codeStats 0ms

/snoopy.class.php

https://github.com/zhangv/wechat-php-sdk
PHP | 1275 lines | 1021 code | 78 blank | 176 comment | 79 complexity | 5a03b3587eae2184311912f03a0d5cd7 MD5 | raw file
   1<?php
   2/*************************************************
   3
   4Snoopy - the PHP net client
   5Author: Monte Ohrt <monte@ispi.net>
   6Copyright (c): 1999-2008 New Digital Group, all rights reserved
   7Version: 1.2.4
   8
   9* This library is free software; you can redistribute it and/or
  10* modify it under the terms of the GNU Lesser General Public
  11* License as published by the Free Software Foundation; either
  12* version 2.1 of the License, or (at your option) any later version.
  13*
  14* This library is distributed in the hope that it will be useful,
  15* but WITHOUT ANY WARRANTY; without even the implied warranty of
  16* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  17* Lesser General Public License for more details.
  18*
  19* You should have received a copy of the GNU Lesser General Public
  20* License along with this library; if not, write to the Free Software
  21* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  22
  23You may contact the author of Snoopy by e-mail at:
  24monte@ohrt.com
  25
  26The latest version of Snoopy can be obtained from:
  27http://snoopy.sourceforge.net/
  28
  29*************************************************/
  30
  31class Snoopy
  32{
  33	/**** Public variables ****/
  34
  35	/* user definable vars */
  36
  37	var $host			=	"www.php.net";		// host name we are connecting to
  38	var $port			=	80;					// port we are connecting to
  39	var $proxy_host		=	"";					// proxy host to use
  40	var $proxy_port		=	"";					// proxy port to use
  41	var $proxy_user		=	"";					// proxy user to use
  42	var $proxy_pass		=	"";					// proxy password to use
  43
  44	var $agent			=	"Mozilla/5.0";	// agent we masquerade as
  45	var	$referer		=	"";					// referer info to pass
  46	var $cookies		=	array();			// array of cookies to pass
  47	// $cookies["username"]="joe";
  48	var	$rawheaders		=	array();			// array of raw headers to send
  49	// $rawheaders["Content-type"]="text/html";
  50
  51	var $maxredirs		=	5;					// http redirection depth maximum. 0 = disallow
  52	var $lastredirectaddr	=	"";				// contains address of last redirected address
  53	var	$offsiteok		=	true;				// allows redirection off-site
  54	var $maxframes		=	0;					// frame content depth maximum. 0 = disallow
  55	var $expandlinks	=	true;				// expand links to fully qualified URLs.
  56	// this only applies to fetchlinks()
  57	// submitlinks(), and submittext()
  58	var $passcookies	=	true;				// pass set cookies back through redirects
  59	// NOTE: this currently does not respect
  60	// dates, domains or paths.
  61
  62	var	$user			=	"";					// user for http authentication
  63	var	$pass			=	"";					// password for http authentication
  64
  65	// http accept types
  66	var $accept			=	"application/json, text/javascript, */*; q=0.01";
  67
  68	var $results		=	"";					// where the content is put
  69
  70	var $error			=	"";					// error messages sent here
  71	var	$response_code	=	"";					// response code returned from server
  72	var	$headers		=	array();			// headers returned from server sent here
  73	var	$maxlength		=	500000;				// max return data length (body)
  74	var $read_timeout	=	0;					// timeout on read operations, in seconds
  75	// supported only since PHP 4 Beta 4
  76	// set to 0 to disallow timeouts
  77	var $timed_out		=	false;				// if a read operation timed out
  78	var	$status			=	0;					// http request status
  79
  80	var $temp_dir		=	"/tmp";				// temporary directory that the webserver
  81	// has permission to write to.
  82	// under Windows, this should be C:\temp
  83
  84	var	$curl_path		=	"/usr/local/bin/curl";
  85	// Snoopy will use cURL for fetching
  86	// SSL content if a full system path to
  87	// the cURL binary is supplied here.
  88	// set to false if you do not have
  89	// cURL installed. See http://curl.haxx.se
  90	// for details on installing cURL.
  91	// Snoopy does *not* use the cURL
  92	// library functions built into php,
  93	// as these functions are not stable
  94	// as of this Snoopy release.
  95
  96	/**** Private variables ****/
  97
  98	var	$_maxlinelen	=	4096;				// max line length (headers)
  99
 100	var $_httpmethod	=	"GET";				// default http request method
 101	var $_httpversion	=	"HTTP/1.0";			// default http request version
 102	var $_submit_method	=	"POST";				// default submit method
 103	var $_submit_type	=	"application/x-www-form-urlencoded";	// default submit type
 104	var $_mime_boundary	=   "";					// MIME boundary for multipart/form-data submit type
 105	var $_redirectaddr	=	false;				// will be set if page fetched is a redirect
 106	var $_redirectdepth	=	0;					// increments on an http redirect
 107	var $_frameurls		= 	array();			// frame src urls
 108	var $_framedepth	=	0;					// increments on frame depth
 109
 110	var $_isproxy		=	false;				// set if using a proxy server
 111	var $_fp_timeout	=	30;					// timeout for socket connection
 112
 113	/*======================================================================*\
 114	 Function:	fetch
 115	Purpose:	fetch the contents of a web page
 116	(and possibly other protocols in the
 117			future like ftp, nntp, gopher, etc.)
 118	Input:		$URI	the location of the page to fetch
 119	Output:		$this->results	the output text from the fetch
 120	\*======================================================================*/
 121
 122	function fetch($URI)
 123	{
 124
 125		//preg_match("|^([^:]+)://([^:/]+)(:[\d]+)*(.*)|",$URI,$URI_PARTS);
 126		$URI_PARTS = parse_url($URI);
 127		if (!empty($URI_PARTS["user"]))
 128			$this->user = $URI_PARTS["user"];
 129		if (!empty($URI_PARTS["pass"]))
 130			$this->pass = $URI_PARTS["pass"];
 131		if (empty($URI_PARTS["query"]))
 132			$URI_PARTS["query"] = '';
 133		if (empty($URI_PARTS["path"]))
 134			$URI_PARTS["path"] = '';
 135
 136		switch(strtolower($URI_PARTS["scheme"]))
 137		{
 138			case "http":
 139				$this->host = $URI_PARTS["host"];
 140				if(!empty($URI_PARTS["port"]))
 141					$this->port = $URI_PARTS["port"];
 142				if($this->_connect($fp))
 143				{
 144					if($this->_isproxy)
 145					{
 146						// using proxy, send entire URI
 147						$this->_httprequest($URI,$fp,$URI,$this->_httpmethod);
 148					}
 149					else
 150					{
 151						$path = $URI_PARTS["path"].($URI_PARTS["query"] ? "?".$URI_PARTS["query"] : "");
 152						// no proxy, send only the path
 153						$this->_httprequest($path, $fp, $URI, $this->_httpmethod);
 154					}
 155						
 156					$this->_disconnect($fp);
 157
 158					if($this->_redirectaddr)
 159					{
 160						/* url was redirected, check if we've hit the max depth */
 161						if($this->maxredirs > $this->_redirectdepth)
 162						{
 163							// only follow redirect if it's on this site, or offsiteok is true
 164							if(preg_match("|^http://".preg_quote($this->host)."|i",$this->_redirectaddr) || $this->offsiteok)
 165							{
 166								/* follow the redirect */
 167								$this->_redirectdepth++;
 168								$this->lastredirectaddr=$this->_redirectaddr;
 169								$this->fetch($this->_redirectaddr);
 170							}
 171						}
 172					}
 173
 174					if($this->_framedepth < $this->maxframes && count($this->_frameurls) > 0)
 175					{
 176						$frameurls = $this->_frameurls;
 177						$this->_frameurls = array();
 178
 179						while(list(,$frameurl) = each($frameurls))
 180						{
 181							if($this->_framedepth < $this->maxframes)
 182							{
 183								$this->fetch($frameurl);
 184								$this->_framedepth++;
 185							}
 186							else
 187								break;
 188						}
 189					}
 190				}
 191				else
 192				{
 193					return false;
 194				}
 195				return true;
 196				break;
 197			case "https":
 198				if (!function_exists('curl_init')) {
 199					if(!$this->curl_path)
 200						return false;
 201					if(function_exists("is_executable"))
 202						if (!is_executable($this->curl_path))
 203						return false;
 204				}
 205				$this->host = $URI_PARTS["host"];
 206				if(!empty($URI_PARTS["port"]))
 207					$this->port = $URI_PARTS["port"];
 208				if($this->_isproxy)
 209				{
 210					// using proxy, send entire URI
 211					$this->_httpsrequest($URI,$URI,$this->_httpmethod);
 212				}
 213				else
 214				{
 215					$path = $URI_PARTS["path"].($URI_PARTS["query"] ? "?".$URI_PARTS["query"] : "");
 216					// no proxy, send only the path
 217					$this->_httpsrequest($path, $URI, $this->_httpmethod);
 218				}
 219
 220				if($this->_redirectaddr)
 221				{
 222					/* url was redirected, check if we've hit the max depth */
 223					if($this->maxredirs > $this->_redirectdepth)
 224					{
 225						// only follow redirect if it's on this site, or offsiteok is true
 226						if(preg_match("|^https://".preg_quote($this->host)."|i",$this->_redirectaddr) || $this->offsiteok)
 227						{
 228							/* follow the redirect */
 229							$this->_redirectdepth++;
 230							$this->lastredirectaddr=$this->_redirectaddr;
 231							$this->fetch($this->_redirectaddr);
 232						}
 233					}
 234				}
 235
 236				if($this->_framedepth < $this->maxframes && count($this->_frameurls) > 0)
 237				{
 238					$frameurls = $this->_frameurls;
 239					$this->_frameurls = array();
 240
 241					while(list(,$frameurl) = each($frameurls))
 242					{
 243						if($this->_framedepth < $this->maxframes)
 244						{
 245							$this->fetch($frameurl);
 246							$this->_framedepth++;
 247						}
 248						else
 249							break;
 250					}
 251				}
 252				return true;
 253				break;
 254			default:
 255				// not a valid protocol
 256				$this->error	=	'Invalid protocol "'.$URI_PARTS["scheme"].'"\n';
 257				return false;
 258				break;
 259		}
 260		return true;
 261	}
 262
 263	/*======================================================================*\
 264	 Function:	submit
 265	Purpose:	submit an http form
 266	Input:		$URI	the location to post the data
 267	$formvars	the formvars to use.
 268	format: $formvars["var"] = "val";
 269	$formfiles  an array of files to submit
 270	format: $formfiles["var"] = "/dir/filename.ext";
 271	Output:		$this->results	the text output from the post
 272	\*======================================================================*/
 273
 274	function submit($URI, $formvars="", $formfiles="")
 275	{
 276		unset($postdata);
 277
 278		$postdata = $this->_prepare_post_body($formvars, $formfiles);
 279		$URI_PARTS = parse_url($URI);
 280		if (!empty($URI_PARTS["user"]))
 281			$this->user = $URI_PARTS["user"];
 282		if (!empty($URI_PARTS["pass"]))
 283			$this->pass = $URI_PARTS["pass"];
 284		if (empty($URI_PARTS["query"]))
 285			$URI_PARTS["query"] = '';
 286		if (empty($URI_PARTS["path"]))
 287			$URI_PARTS["path"] = '';
 288
 289		switch(strtolower($URI_PARTS["scheme"]))
 290		{
 291			case "http":
 292				$this->host = $URI_PARTS["host"];
 293				if(!empty($URI_PARTS["port"]))
 294					$this->port = $URI_PARTS["port"];
 295				if($this->_connect($fp))
 296				{
 297					if($this->_isproxy)
 298					{
 299						// using proxy, send entire URI
 300						$this->_httprequest($URI,$fp,$URI,$this->_submit_method,$this->_submit_type,$postdata);
 301					}
 302					else
 303					{
 304						$path = $URI_PARTS["path"].($URI_PARTS["query"] ? "?".$URI_PARTS["query"] : "");
 305						// no proxy, send only the path
 306						$this->_httprequest($path, $fp, $URI, $this->_submit_method, $this->_submit_type, $postdata);
 307					}
 308						
 309					$this->_disconnect($fp);
 310
 311					if($this->_redirectaddr)
 312					{
 313						/* url was redirected, check if we've hit the max depth */
 314						if($this->maxredirs > $this->_redirectdepth)
 315						{
 316							if(!preg_match("|^".$URI_PARTS["scheme"]."://|", $this->_redirectaddr))
 317								$this->_redirectaddr = $this->_expandlinks($this->_redirectaddr,$URI_PARTS["scheme"]."://".$URI_PARTS["host"]);
 318								
 319							// only follow redirect if it's on this site, or offsiteok is true
 320							if(preg_match("|^http://".preg_quote($this->host)."|i",$this->_redirectaddr) || $this->offsiteok)
 321							{
 322								/* follow the redirect */
 323								$this->_redirectdepth++;
 324								$this->lastredirectaddr=$this->_redirectaddr;
 325								if( strpos( $this->_redirectaddr, "?" ) > 0 )
 326									$this->fetch($this->_redirectaddr); // the redirect has changed the request method from post to get
 327								else
 328									$this->submit($this->_redirectaddr,$formvars, $formfiles);
 329							}
 330						}
 331					}
 332
 333					if($this->_framedepth < $this->maxframes && count($this->_frameurls) > 0)
 334					{
 335						$frameurls = $this->_frameurls;
 336						$this->_frameurls = array();
 337
 338						while(list(,$frameurl) = each($frameurls))
 339						{
 340							if($this->_framedepth < $this->maxframes)
 341							{
 342								$this->fetch($frameurl);
 343								$this->_framedepth++;
 344							}
 345							else
 346								break;
 347						}
 348					}
 349						
 350				}
 351				else
 352				{
 353					return false;
 354				}
 355				return true;
 356				break;
 357			case "https":
 358				if (!function_exists('curl_init')) {
 359				if(!$this->curl_path)
 360					return false;
 361				if(function_exists("is_executable"))
 362					if (!is_executable($this->curl_path))
 363					return false;
 364				}
 365				$this->host = $URI_PARTS["host"];
 366				if(!empty($URI_PARTS["port"]))
 367					$this->port = $URI_PARTS["port"];
 368				if($this->_isproxy)
 369				{
 370					// using proxy, send entire URI
 371					$this->_httpsrequest($URI, $URI, $this->_submit_method, $this->_submit_type, $postdata);
 372				}
 373				else
 374				{
 375					$path = $URI_PARTS["path"].($URI_PARTS["query"] ? "?".$URI_PARTS["query"] : "");
 376					// no proxy, send only the path
 377					$this->_httpsrequest($path, $URI, $this->_submit_method, $this->_submit_type, $postdata);
 378				}
 379
 380				if($this->_redirectaddr)
 381				{
 382					/* url was redirected, check if we've hit the max depth */
 383					if($this->maxredirs > $this->_redirectdepth)
 384					{
 385						if(!preg_match("|^".$URI_PARTS["scheme"]."://|", $this->_redirectaddr))
 386							$this->_redirectaddr = $this->_expandlinks($this->_redirectaddr,$URI_PARTS["scheme"]."://".$URI_PARTS["host"]);
 387
 388						// only follow redirect if it's on this site, or offsiteok is true
 389						if(preg_match("|^https://".preg_quote($this->host)."|i",$this->_redirectaddr) || $this->offsiteok)
 390						{
 391							/* follow the redirect */
 392							$this->_redirectdepth++;
 393							$this->lastredirectaddr=$this->_redirectaddr;
 394							if( strpos( $this->_redirectaddr, "?" ) > 0 )
 395								$this->fetch($this->_redirectaddr); // the redirect has changed the request method from post to get
 396							else
 397								$this->submit($this->_redirectaddr,$formvars, $formfiles);
 398						}
 399					}
 400				}
 401
 402				if($this->_framedepth < $this->maxframes && count($this->_frameurls) > 0)
 403				{
 404					$frameurls = $this->_frameurls;
 405					$this->_frameurls = array();
 406
 407					while(list(,$frameurl) = each($frameurls))
 408					{
 409						if($this->_framedepth < $this->maxframes)
 410						{
 411							$this->fetch($frameurl);
 412							$this->_framedepth++;
 413						}
 414						else
 415							break;
 416					}
 417				}
 418				return true;
 419				break;
 420
 421			default:
 422				// not a valid protocol
 423				$this->error	=	'Invalid protocol "'.$URI_PARTS["scheme"].'"\n';
 424				return false;
 425				break;
 426		}
 427		return true;
 428	}
 429
 430	/*======================================================================*\
 431	 Function:	fetchlinks
 432	Purpose:	fetch the links from a web page
 433	Input:		$URI	where you are fetching from
 434	Output:		$this->results	an array of the URLs
 435	\*======================================================================*/
 436
 437	function fetchlinks($URI)
 438	{
 439		if ($this->fetch($URI))
 440		{
 441			if($this->lastredirectaddr)
 442				$URI = $this->lastredirectaddr;
 443			if(is_array($this->results))
 444			{
 445				for($x=0;$x<count($this->results);$x++)
 446					$this->results[$x] = $this->_striplinks($this->results[$x]);
 447			}
 448			else
 449				$this->results = $this->_striplinks($this->results);
 450
 451			if($this->expandlinks)
 452				$this->results = $this->_expandlinks($this->results, $URI);
 453			return true;
 454		}
 455		else
 456			return false;
 457	}
 458
 459	/*======================================================================*\
 460	 Function:	fetchform
 461	Purpose:	fetch the form elements from a web page
 462	Input:		$URI	where you are fetching from
 463	Output:		$this->results	the resulting html form
 464	\*======================================================================*/
 465
 466	function fetchform($URI)
 467	{
 468
 469		if ($this->fetch($URI))
 470		{
 471
 472			if(is_array($this->results))
 473			{
 474				for($x=0;$x<count($this->results);$x++)
 475					$this->results[$x] = $this->_stripform($this->results[$x]);
 476			}
 477			else
 478				$this->results = $this->_stripform($this->results);
 479				
 480			return true;
 481		}
 482		else
 483			return false;
 484	}
 485
 486
 487	/*======================================================================*\
 488	 Function:	fetchtext
 489	Purpose:	fetch the text from a web page, stripping the links
 490	Input:		$URI	where you are fetching from
 491	Output:		$this->results	the text from the web page
 492	\*======================================================================*/
 493
 494	function fetchtext($URI)
 495	{
 496		if($this->fetch($URI))
 497		{
 498			if(is_array($this->results))
 499			{
 500				for($x=0;$x<count($this->results);$x++)
 501					$this->results[$x] = $this->_striptext($this->results[$x]);
 502			}
 503			else
 504				$this->results = $this->_striptext($this->results);
 505			return true;
 506		}
 507		else
 508			return false;
 509	}
 510
 511	/*======================================================================*\
 512	 Function:	submitlinks
 513	Purpose:	grab links from a form submission
 514	Input:		$URI	where you are submitting from
 515	Output:		$this->results	an array of the links from the post
 516	\*======================================================================*/
 517
 518	function submitlinks($URI, $formvars="", $formfiles="")
 519	{
 520		if($this->submit($URI,$formvars, $formfiles))
 521		{
 522			if($this->lastredirectaddr)
 523				$URI = $this->lastredirectaddr;
 524			if(is_array($this->results))
 525			{
 526				for($x=0;$x<count($this->results);$x++)
 527				{
 528					$this->results[$x] = $this->_striplinks($this->results[$x]);
 529					if($this->expandlinks)
 530						$this->results[$x] = $this->_expandlinks($this->results[$x],$URI);
 531				}
 532			}
 533			else
 534			{
 535				$this->results = $this->_striplinks($this->results);
 536				if($this->expandlinks)
 537					$this->results = $this->_expandlinks($this->results,$URI);
 538			}
 539			return true;
 540		}
 541		else
 542			return false;
 543	}
 544
 545	/*======================================================================*\
 546	 Function:	submittext
 547	Purpose:	grab text from a form submission
 548	Input:		$URI	where you are submitting from
 549	Output:		$this->results	the text from the web page
 550	\*======================================================================*/
 551
 552	function submittext($URI, $formvars = "", $formfiles = "")
 553	{
 554		if($this->submit($URI,$formvars, $formfiles))
 555		{
 556			if($this->lastredirectaddr)
 557				$URI = $this->lastredirectaddr;
 558			if(is_array($this->results))
 559			{
 560				for($x=0;$x<count($this->results);$x++)
 561				{
 562					$this->results[$x] = $this->_striptext($this->results[$x]);
 563					if($this->expandlinks)
 564						$this->results[$x] = $this->_expandlinks($this->results[$x],$URI);
 565				}
 566			}
 567			else
 568			{
 569				$this->results = $this->_striptext($this->results);
 570				if($this->expandlinks)
 571					$this->results = $this->_expandlinks($this->results,$URI);
 572			}
 573			return true;
 574		}
 575		else
 576			return false;
 577	}
 578
 579
 580
 581	/*======================================================================*\
 582	 Function:	set_submit_multipart
 583	Purpose:	Set the form submission content type to
 584	multipart/form-data
 585	\*======================================================================*/
 586	function set_submit_multipart()
 587	{
 588		$this->_submit_type = "multipart/form-data";
 589	}
 590
 591
 592	/*======================================================================*\
 593	 Function:	set_submit_normal
 594	Purpose:	Set the form submission content type to
 595	application/x-www-form-urlencoded
 596	\*======================================================================*/
 597	function set_submit_normal()
 598	{
 599		$this->_submit_type = "application/x-www-form-urlencoded";
 600	}
 601
 602
 603
 604
 605	/*======================================================================*\
 606	 Private functions
 607	\*======================================================================*/
 608
 609
 610	/*======================================================================*\
 611	 Function:	_striplinks
 612	Purpose:	strip the hyperlinks from an html document
 613	Input:		$document	document to strip.
 614	Output:		$match		an array of the links
 615	\*======================================================================*/
 616
 617	function _striplinks($document)
 618	{
 619		preg_match_all("'<\s*a\s.*?href\s*=\s*			# find <a href=
 620						([\"\'])?					# find single or double quote
 621						(?(1) (.*?)\\1 | ([^\s\>]+))		# if quote found, match up to next matching
 622													# quote, otherwise match up to next space
 623						'isx",$document,$links);
 624
 625
 626		// catenate the non-empty matches from the conditional subpattern
 627
 628		while(list($key,$val) = each($links[2]))
 629		{
 630			if(!empty($val))
 631				$match[] = $val;
 632		}
 633
 634		while(list($key,$val) = each($links[3]))
 635		{
 636			if(!empty($val))
 637				$match[] = $val;
 638		}
 639
 640		// return the links
 641		return $match;
 642	}
 643
 644	/*======================================================================*\
 645	 Function:	_stripform
 646	Purpose:	strip the form elements from an html document
 647	Input:		$document	document to strip.
 648	Output:		$match		an array of the links
 649	\*======================================================================*/
 650
 651	function _stripform($document)
 652	{
 653		preg_match_all("'<\/?(FORM|INPUT|SELECT|TEXTAREA|(OPTION))[^<>]*>(?(2)(.*(?=<\/?(option|select)[^<>]*>[\r\n]*)|(?=[\r\n]*))|(?=[\r\n]*))'Usi",$document,$elements);
 654
 655		// catenate the matches
 656		$match = implode("\r\n",$elements[0]);
 657
 658		// return the links
 659		return $match;
 660	}
 661
 662
 663
 664	/*======================================================================*\
 665	 Function:	_striptext
 666	Purpose:	strip the text from an html document
 667	Input:		$document	document to strip.
 668	Output:		$text		the resulting text
 669	\*======================================================================*/
 670
 671	function _striptext($document)
 672	{
 673
 674		// I didn't use preg eval (//e) since that is only available in PHP 4.0.
 675		// so, list your entities one by one here. I included some of the
 676		// more common ones.
 677
 678		$search = array("'<script[^>]*?>.*?</script>'si",	// strip out javascript
 679				"'<[\/\!]*?[^<>]*?>'si",			// strip out html tags
 680				"'([\r\n])[\s]+'",					// strip out white space
 681				"'&(quot|#34|#034|#x22);'i",		// replace html entities
 682				"'&(amp|#38|#038|#x26);'i",			// added hexadecimal values
 683				"'&(lt|#60|#060|#x3c);'i",
 684				"'&(gt|#62|#062|#x3e);'i",
 685				"'&(nbsp|#160|#xa0);'i",
 686				"'&(iexcl|#161);'i",
 687				"'&(cent|#162);'i",
 688				"'&(pound|#163);'i",
 689				"'&(copy|#169);'i",
 690				"'&(reg|#174);'i",
 691				"'&(deg|#176);'i",
 692				"'&(#39|#039|#x27);'",
 693				"'&(euro|#8364);'i",				// europe
 694				"'&a(uml|UML);'",					// german
 695				"'&o(uml|UML);'",
 696				"'&u(uml|UML);'",
 697				"'&A(uml|UML);'",
 698				"'&O(uml|UML);'",
 699				"'&U(uml|UML);'",
 700				"'&szlig;'i",
 701		);
 702		$replace = array(	"",
 703				"",
 704				"\\1",
 705				"\"",
 706				"&",
 707				"<",
 708				">",
 709				" ",
 710				chr(161),
 711				chr(162),
 712				chr(163),
 713				chr(169),
 714				chr(174),
 715				chr(176),
 716				chr(39),
 717				chr(128),
 718				"",
 719				"",
 720				"",
 721				"",
 722				"",
 723				"",
 724				"",
 725		);
 726			
 727		$text = preg_replace($search,$replace,$document);
 728
 729		return $text;
 730	}
 731
 732	/*======================================================================*\
 733	 Function:	_expandlinks
 734	Purpose:	expand each link into a fully qualified URL
 735	Input:		$links			the links to qualify
 736	$URI			the full URI to get the base from
 737	Output:		$expandedLinks	the expanded links
 738	\*======================================================================*/
 739
 740	function _expandlinks($links,$URI)
 741	{
 742
 743		preg_match("/^[^\?]+/",$URI,$match);
 744
 745		$match = preg_replace("|/[^\/\.]+\.[^\/\.]+$|","",$match[0]);
 746		$match = preg_replace("|/$|","",$match);
 747		$match_part = parse_url($match);
 748		$match_root =
 749		$match_part["scheme"]."://".$match_part["host"];
 750
 751		$search = array( 	"|^http://".preg_quote($this->host)."|i",
 752				"|^(\/)|i",
 753				"|^(?!http://)(?!mailto:)|i",
 754				"|/\./|",
 755				"|/[^\/]+/\.\./|"
 756		);
 757
 758		$replace = array(	"",
 759				$match_root."/",
 760				$match."/",
 761				"/",
 762				"/"
 763		);
 764
 765		$expandedLinks = preg_replace($search,$replace,$links);
 766
 767		return $expandedLinks;
 768	}
 769
 770	/*======================================================================*\
 771	 Function:	_httprequest
 772	Purpose:	go get the http data from the server
 773	Input:		$url		the url to fetch
 774	$fp			the current open file pointer
 775	$URI		the full URI
 776	$body		body contents to send if any (POST)
 777	Output:
 778	\*======================================================================*/
 779
 780	function _httprequest($url,$fp,$URI,$http_method,$content_type="",$body="")
 781	{
 782		$cookie_headers = '';
 783		if($this->passcookies && $this->_redirectaddr)
 784			$this->setcookies();
 785			
 786		$URI_PARTS = parse_url($URI);
 787		if(empty($url))
 788			$url = "/";
 789		$headers = $http_method." ".$url." ".$this->_httpversion."\r\n";
 790		if(!empty($this->agent))
 791			$headers .= "User-Agent: ".$this->agent."\r\n";
 792		if(!empty($this->host) && !isset($this->rawheaders['Host'])) {
 793			$headers .= "Host: ".$this->host;
 794			if(!empty($this->port) && $this->port!=80)
 795				$headers .= ":".$this->port;
 796			$headers .= "\r\n";
 797		}
 798		if(!empty($this->accept))
 799			$headers .= "Accept: ".$this->accept."\r\n";
 800		if(!empty($this->referer))
 801			$headers .= "Referer: ".$this->referer."\r\n";
 802		if(!empty($this->cookies))
 803		{
 804			if(!is_array($this->cookies))
 805				$this->cookies = (array)$this->cookies;
 806
 807			reset($this->cookies);
 808			if ( count($this->cookies) > 0 ) {
 809				$cookie_headers .= 'Cookie: ';
 810				foreach ( $this->cookies as $cookieKey => $cookieVal ) {
 811					$cookie_headers .= $cookieKey."=".urlencode($cookieVal)."; ";
 812				}
 813				$headers .= substr($cookie_headers,0,-2) . "\r\n";
 814			}
 815		}
 816		if(!empty($this->rawheaders))
 817		{
 818			if(!is_array($this->rawheaders))
 819				$this->rawheaders = (array)$this->rawheaders;
 820			while(list($headerKey,$headerVal) = each($this->rawheaders))
 821				$headers .= $headerKey.": ".$headerVal."\r\n";
 822		}
 823		if(!empty($content_type)) {
 824			$headers .= "Content-type: $content_type";
 825			if ($content_type == "multipart/form-data")
 826				$headers .= "; boundary=".$this->_mime_boundary;
 827			$headers .= "\r\n";
 828		}
 829		if(!empty($body))
 830			$headers .= "Content-length: ".strlen($body)."\r\n";
 831		if(!empty($this->user) || !empty($this->pass))
 832			$headers .= "Authorization: Basic ".base64_encode($this->user.":".$this->pass)."\r\n";
 833
 834		//add proxy auth headers
 835		if(!empty($this->proxy_user))
 836			$headers .= 'Proxy-Authorization: ' . 'Basic ' . base64_encode($this->proxy_user . ':' . $this->proxy_pass)."\r\n";
 837
 838
 839		$headers .= "\r\n";
 840
 841		// set the read timeout if needed
 842		if ($this->read_timeout > 0)
 843			socket_set_timeout($fp, $this->read_timeout);
 844		$this->timed_out = false;
 845
 846		fwrite($fp,$headers.$body,strlen($headers.$body));
 847
 848		$this->_redirectaddr = false;
 849		unset($this->headers);
 850
 851		while($currentHeader = fgets($fp,$this->_maxlinelen))
 852		{
 853			if ($this->read_timeout > 0 && $this->_check_timeout($fp))
 854			{
 855				$this->status=-100;
 856				return false;
 857			}
 858
 859			if($currentHeader == "\r\n")
 860				break;
 861
 862			// if a header begins with Location: or URI:, set the redirect
 863			if(preg_match("/^(Location:|URI:)/i",$currentHeader))
 864			{
 865				// get URL portion of the redirect
 866				preg_match("/^(Location:|URI:)[ ]+(.*)/i",chop($currentHeader),$matches);
 867				// look for :// in the Location header to see if hostname is included
 868				if (!empty($matches)) {
 869					if(!preg_match("|\:\/\/|",$matches[2]))
 870					{
 871						// no host in the path, so prepend
 872						$this->_redirectaddr = $URI_PARTS["scheme"]."://".$this->host.":".$this->port;
 873						// eliminate double slash
 874						if(!preg_match("|^/|",$matches[2]))
 875							$this->_redirectaddr .= "/".$matches[2];
 876						else
 877							$this->_redirectaddr .= $matches[2];
 878					}
 879					else
 880						$this->_redirectaddr = $matches[2];
 881				}
 882			}
 883
 884			if(preg_match("|^HTTP/|",$currentHeader))
 885			{
 886				if(preg_match("|^HTTP/[^\s]*\s(.*?)\s|",$currentHeader, $status))
 887				{
 888					$this->status= $status[1];
 889				}
 890				$this->response_code = $currentHeader;
 891			}
 892
 893			$this->headers[] = $currentHeader;
 894		}
 895
 896		$results = '';
 897		do {
 898			$_data = fread($fp, $this->maxlength);
 899			if (strlen($_data) == 0) {
 900				break;
 901			}
 902			$results .= $_data;
 903		} while(true);
 904
 905		if ($this->read_timeout > 0 && $this->_check_timeout($fp))
 906		{
 907			$this->status=-100;
 908			return false;
 909		}
 910
 911		// check if there is a a redirect meta tag
 912
 913		if(preg_match("'<meta[\s]*http-equiv[^>]*?content[\s]*=[\s]*[\"\']?\d+;[\s]*URL[\s]*=[\s]*([^\"\']*?)[\"\']?>'i",$results,$match))
 914
 915		{
 916			$this->_redirectaddr = $this->_expandlinks($match[1],$URI);
 917		}
 918
 919		// have we hit our frame depth and is there frame src to fetch?
 920		if(($this->_framedepth < $this->maxframes) && preg_match_all("'<frame\s+.*src[\s]*=[\'\"]?([^\'\"\>]+)'i",$results,$match))
 921		{
 922			$this->results[] = $results;
 923			for($x=0; $x<count($match[1]); $x++)
 924				$this->_frameurls[] = $this->_expandlinks($match[1][$x],$URI_PARTS["scheme"]."://".$this->host);
 925		}
 926		// have we already fetched framed content?
 927		elseif(is_array($this->results))
 928		$this->results[] = $results;
 929		// no framed content
 930		else
 931			$this->results = $results;
 932
 933		return true;
 934	}
 935
 936	/*======================================================================*\
 937	 Function:	_httpsrequest
 938	Purpose:	go get the https data from the server using curl
 939	Input:		$url		the url to fetch
 940	$URI		the full URI
 941	$body		body contents to send if any (POST)
 942	Output:
 943	\*======================================================================*/
 944
 945	function _httpsrequest($url,$URI,$http_method,$content_type="",$body="")
 946	{
 947		if($this->passcookies && $this->_redirectaddr)
 948			$this->setcookies();
 949
 950		$headers = array();
 951			
 952		$URI_PARTS = parse_url($URI);
 953		if(empty($url))
 954			$url = "/";
 955		// GET ... header not needed for curl
 956		//$headers[] = $http_method." ".$url." ".$this->_httpversion;
 957		if(!empty($this->agent))
 958			$headers[] = "User-Agent: ".$this->agent;
 959		if(!empty($this->host))
 960			if(!empty($this->port) && $this->port!=80)
 961			$headers[] = "Host: ".$this->host.":".$this->port;
 962		else
 963			$headers[] = "Host: ".$this->host;
 964		if(!empty($this->accept))
 965			$headers[] = "Accept: ".$this->accept;
 966		if(!empty($this->referer))
 967			$headers[] = "Referer: ".$this->referer;
 968		if(!empty($this->cookies))
 969		{
 970			if(!is_array($this->cookies))
 971				$this->cookies = (array)$this->cookies;
 972
 973			reset($this->cookies);
 974			if ( count($this->cookies) > 0 ) {
 975				$cookie_str = 'Cookie: ';
 976				foreach ( $this->cookies as $cookieKey => $cookieVal ) {
 977					$cookie_str .= $cookieKey."=".urlencode($cookieVal)."; ";
 978				}
 979				$headers[] = substr($cookie_str,0,-2);
 980			}
 981		}
 982		if(!empty($this->rawheaders))
 983		{
 984			if(!is_array($this->rawheaders))
 985				$this->rawheaders = (array)$this->rawheaders;
 986			while(list($headerKey,$headerVal) = each($this->rawheaders))
 987				$headers[] = $headerKey.": ".$headerVal;
 988		}
 989		if(!empty($content_type)) {
 990			if ($content_type == "multipart/form-data")
 991				$headers[] = "Content-type: $content_type; boundary=".$this->_mime_boundary;
 992			else
 993				$headers[] = "Content-type: $content_type";
 994		}
 995		if(!empty($body))
 996			$headers[] = "Content-length: ".strlen($body);
 997		if(!empty($this->user) || !empty($this->pass))
 998			$headers[] = "Authorization: BASIC ".base64_encode($this->user.":".$this->pass);
 999		if (function_exists('curl_init')) {
1000			$ch = curl_init();
1001			curl_setopt($ch, CURLOPT_URL, $URI);
1002			curl_setopt($ch, CURLOPT_HEADER, true); 
1003			curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, FALSE);
1004			curl_setopt($ch, CURLOPT_SSL_VERIFYHOST, FALSE);
1005			curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1); 
1006			curl_setopt($ch, CURLOPT_HTTPHEADER, $headers); 
1007			curl_setopt($ch, CURLOPT_TIMEOUT, $this->read_timeout);
1008			if(!empty($body)) {
1009				curl_setopt($ch, CURLOPT_POST, true);
1010				curl_setopt($ch, CURLOPT_POSTFIELDS, $body);
1011			}
1012			$data = curl_exec($ch);
1013			if ($data === false) {
1014				$this->error = "Error: Curl error  ".curl_error($ch);
1015					return false;
1016			}
1017			$parts = explode("\r\n\r\n",$data,2);
1018			$result_headers = explode("\r\n",$parts[0]);
1019			$results = $parts[1];
1020			unset($parts);
1021		} else {
1022				for($curr_header = 0; $curr_header < count($headers); $curr_header++) {
1023					$safer_header = strtr( $headers[$curr_header], "\"", " " );
1024					$cmdline_params .= " -H \"".$safer_header."\"";
1025				}
1026		
1027				if(!empty($body))
1028					$cmdline_params .= " -d \"$body\"";
1029		
1030				if($this->read_timeout > 0)
1031					$cmdline_params .= " -m ".$this->read_timeout;
1032		
1033				$headerfile = tempnam($temp_dir, "sno");
1034		
1035				exec($this->curl_path." -k -D \"$headerfile\"".$cmdline_params." \"".escapeshellcmd($URI)."\"",$results,$return);
1036		
1037				if($return)
1038				{
1039					$this->error = "Error: cURL could not retrieve the document, error $return.";
1040					return false;
1041				}
1042				
1043				
1044			$results = implode("\r\n",$results);
1045	
1046			$result_headers = file("$headerfile");
1047		}
1048		$this->_redirectaddr = false;
1049		unset($this->headers);
1050
1051		for($currentHeader = 0; $currentHeader < count($result_headers); $currentHeader++)
1052		{
1053				
1054			// if a header begins with Location: or URI:, set the redirect
1055			if(preg_match("/^(Location: |URI: )/i",$result_headers[$currentHeader]))
1056			{
1057				// get URL portion of the redirect
1058				preg_match("/^(Location: |URI:)\s+(.*)/",chop($result_headers[$currentHeader]),$matches);
1059				// look for :// in the Location header to see if hostname is included
1060				if (!empty($matches)) {
1061					if(!preg_match("|\:\/\/|",$matches[2]))
1062					{
1063						// no host in the path, so prepend
1064						$this->_redirectaddr = $URI_PARTS["scheme"]."://".$this->host;
1065						// eliminate double slash
1066						if(!preg_match("|^/|",$matches[2]))
1067							$this->_redirectaddr .= "/".$matches[2];
1068						else
1069							$this->_redirectaddr .= $matches[2];
1070					}
1071					else
1072						$this->_redirectaddr = $matches[2];
1073				}
1074			}
1075
1076			if(preg_match("|^HTTP/|",$result_headers[$currentHeader]))
1077				$this->response_code = $result_headers[$currentHeader];
1078
1079			$this->headers[] = $result_headers[$currentHeader];
1080		}
1081
1082		// check if there is a a redirect meta tag
1083
1084		if(preg_match("'<meta[\s]*http-equiv[^>]*?content[\s]*=[\s]*[\"\']?\d+;[\s]*URL[\s]*=[\s]*([^\"\']*?)[\"\']?>'i",$results,$match))
1085		{
1086			$this->_redirectaddr = $this->_expandlinks($match[1],$URI);
1087		}
1088
1089		// have we hit our frame depth and is there frame src to fetch?
1090		if(($this->_framedepth < $this->maxframes) && preg_match_all("'<frame\s+.*src[\s]*=[\'\"]?([^\'\"\>]+)'i",$results,$match))
1091		{
1092			$this->results[] = $results;
1093			for($x=0; $x<count($match[1]); $x++)
1094				$this->_frameurls[] = $this->_expandlinks($match[1][$x],$URI_PARTS["scheme"]."://".$this->host);
1095		}
1096		// have we already fetched framed content?
1097		elseif(is_array($this->results))
1098			$this->results[] = $results;
1099		// no framed content
1100		else
1101			$this->results = $results;
1102		if ($headerfile)
1103			unlink("$headerfile");
1104
1105		return true;
1106	}
1107
1108	/*======================================================================*\
1109	 Function:	setcookies()
1110	Purpose:	set cookies for a redirection
1111	\*======================================================================*/
1112
1113	function setcookies()
1114	{
1115		for($x=0; $x<count($this->headers); $x++)
1116		{
1117			if(preg_match('/^set-cookie:[\s]+([^=]+)=([^;]+)/i', $this->headers[$x],$match))
1118				$this->cookies[$match[1]] = urldecode($match[2]);
1119		}
1120	}
1121
1122
1123	/*======================================================================*\
1124	 Function:	_check_timeout
1125	Purpose:	checks whether timeout has occurred
1126	Input:		$fp	file pointer
1127	\*======================================================================*/
1128
1129	function _check_timeout($fp)
1130	{
1131		if ($this->read_timeout > 0) {
1132			$fp_status = socket_get_status($fp);
1133			if ($fp_status["timed_out"]) {
1134				$this->timed_out = true;
1135				return true;
1136			}
1137		}
1138		return false;
1139	}
1140
1141	/*======================================================================*\
1142	 Function:	_connect
1143	Purpose:	make a socket connection
1144	Input:		$fp	file pointer
1145	\*======================================================================*/
1146
1147	function _connect(&$fp)
1148	{
1149		if(!empty($this->proxy_host) && !empty($this->proxy_port))
1150		{
1151			$this->_isproxy = true;
1152
1153			$host = $this->proxy_host;
1154			$port = $this->proxy_port;
1155		}
1156		else
1157		{
1158			$host = $this->host;
1159			$port = $this->port;
1160		}
1161
1162		$this->status = 0;
1163
1164		if($fp = fsockopen(
1165				$host,
1166				$port,
1167				$errno,
1168				$errstr,
1169				$this->_fp_timeout
1170		))
1171		{
1172			// socket connection succeeded
1173
1174			return true;
1175		}
1176		else
1177		{
1178			// socket connection failed
1179			$this->status = $errno;
1180			switch($errno)
1181			{
1182				case -3:
1183					$this->error="socket creation failed (-3)";
1184				case -4:
1185					$this->error="dns lookup failure (-4)";
1186				case -5:
1187					$this->error="connection refused or timed out (-5)";
1188				default:
1189					$this->error="connection failed (".$errno.")";
1190			}
1191			return false;
1192		}
1193	}
1194	/*======================================================================*\
1195	 Function:	_disconnect
1196	Purpose:	disconnect a socket connection
1197	Input:		$fp	file pointer
1198	\*======================================================================*/
1199
1200	function _disconnect($fp)
1201	{
1202		return(fclose($fp));
1203	}
1204
1205
1206	/*======================================================================*\
1207	 Function:	_prepare_post_body
1208	Purpose:	Prepare post body according to encoding type
1209	Input:		$formvars  - form variables
1210	$formfiles - form upload files
1211	Output:		post body
1212	\*======================================================================*/
1213
1214	function _prepare_post_body($formvars, $formfiles)
1215	{
1216		settype($formvars, "array");
1217		settype($formfiles, "array");
1218		$postdata = '';
1219
1220		if (count($formvars) == 0 && count($formfiles) == 0)
1221			return;
1222		if (is_string($formvars)) return $formvars;
1223		if(count($formvars) == 1) return $formvars[0];
1224		switch ($this->_submit_type) {
1225			case "application/x-www-form-urlencoded":
1226				reset($formvars);
1227				while(list($key,$val) = each($formvars)) {
1228					if (is_array($val) || is_object($val)) {
1229						while (list($cur_key, $cur_val) = each($val)) {
1230							$postdata .= urlencode($key)."[]=".urlencode($cur_val)."&";
1231						}
1232					} else
1233						$postdata .= urlencode($key)."=".urlencode($val)."&";
1234				}
1235				break;
1236
1237			case "multipart/form-data":
1238				$this->_mime_boundary = "--------".md5(uniqid(microtime()));
1239
1240				reset($formvars);
1241				while(list($key,$val) = each($formvars)) {
1242					if (is_array($val) || is_object($val)) {
1243						while (list($cur_key, $cur_val) = each($val)) {
1244							$postdata .= "--".$this->_mime_boundary."\r\n";
1245							$postdata .= "Content-Disposition: form-data; name=\"$key\[\]\"\r\n\r\n";
1246							$postdata .= "$cur_val\r\n";
1247						}
1248					} else {
1249						$postdata .= "--".$this->_mime_boundary."\r\n";
1250						$postdata .= "Content-Disposition: form-data; name=\"$key\"\r\n\r\n";
1251						$postdata .= "$val\r\n";
1252					}
1253				}
1254
1255				reset($formfiles);
1256				while (list($field_name, $file_names) = each($formfiles)) {
1257					settype($file_names, "array");
1258					while (list(, $file_name) = each($file_names)) {
1259						$file_content = file_get_contents($file_name);
1260						if (!$file_content) continue;
1261
1262						$base_name = basename($file_name);
1263
1264						$postdata .= "--".$this->_mime_boundary."\r\n";
1265						$postdata .= "Content-Disposition: form-data; name=\"$field_name\"; filename=\"$base_name\"\r\nContent-Type: image/jpeg\r\n\r\n";
1266						$postdata .= "$file_content\r\n";
1267					}
1268				}
1269				$postdata .= "--".$this->_mime_boundary."--\r\n";
1270				break;
1271		}
1272
1273		return $postdata;
1274	}
1275}