PageRenderTime 394ms CodeModel.GetById 81ms app.highlight 242ms RepoModel.GetById 60ms app.codeStats 0ms

/wp-includes/class-snoopy.php

https://bitbucket.org/aqge/deptandashboard
PHP | 1256 lines | 996 code | 81 blank | 179 comment | 78 complexity | 1ea3224bb940f17228b7db290fbb01b1 MD5 | raw file
   1<?php
   2
   3/**
   4 * Deprecated. Use WP_HTTP (http.php, class-http.php) instead.
   5 */
   6_deprecated_file( basename( __FILE__ ), '3.0', WPINC . '/http.php' );
   7
   8if ( !class_exists( 'Snoopy' ) ) :
   9/*************************************************
  10
  11Snoopy - the PHP net client
  12Author: Monte Ohrt <monte@ispi.net>
  13Copyright (c): 1999-2008 New Digital Group, all rights reserved
  14Version: 1.2.4
  15
  16 * This library is free software; you can redistribute it and/or
  17 * modify it under the terms of the GNU Lesser General Public
  18 * License as published by the Free Software Foundation; either
  19 * version 2.1 of the License, or (at your option) any later version.
  20 *
  21 * This library is distributed in the hope that it will be useful,
  22 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  23 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  24 * Lesser General Public License for more details.
  25 *
  26 * You should have received a copy of the GNU Lesser General Public
  27 * License along with this library; if not, write to the Free Software
  28 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  29
  30You may contact the author of Snoopy by e-mail at:
  31monte@ohrt.com
  32
  33The latest version of Snoopy can be obtained from:
  34http://snoopy.sourceforge.net/
  35
  36*************************************************/
  37
  38class Snoopy
  39{
  40	/**** Public variables ****/
  41
  42	/* user definable vars */
  43
  44	var $host			=	"www.php.net";		// host name we are connecting to
  45	var $port			=	80;					// port we are connecting to
  46	var $proxy_host		=	"";					// proxy host to use
  47	var $proxy_port		=	"";					// proxy port to use
  48	var $proxy_user		=	"";					// proxy user to use
  49	var $proxy_pass		=	"";					// proxy password to use
  50
  51	var $agent			=	"Snoopy v1.2.4";	// agent we masquerade as
  52	var	$referer		=	"";					// referer info to pass
  53	var $cookies		=	array();			// array of cookies to pass
  54												// $cookies["username"]="joe";
  55	var	$rawheaders		=	array();			// array of raw headers to send
  56												// $rawheaders["Content-type"]="text/html";
  57
  58	var $maxredirs		=	5;					// http redirection depth maximum. 0 = disallow
  59	var $lastredirectaddr	=	"";				// contains address of last redirected address
  60	var	$offsiteok		=	true;				// allows redirection off-site
  61	var $maxframes		=	0;					// frame content depth maximum. 0 = disallow
  62	var $expandlinks	=	true;				// expand links to fully qualified URLs.
  63												// this only applies to fetchlinks()
  64												// submitlinks(), and submittext()
  65	var $passcookies	=	true;				// pass set cookies back through redirects
  66												// NOTE: this currently does not respect
  67												// dates, domains or paths.
  68
  69	var	$user			=	"";					// user for http authentication
  70	var	$pass			=	"";					// password for http authentication
  71
  72	// http accept types
  73	var $accept			=	"image/gif, image/x-xbitmap, image/jpeg, image/pjpeg, */*";
  74
  75	var $results		=	"";					// where the content is put
  76
  77	var $error			=	"";					// error messages sent here
  78	var	$response_code	=	"";					// response code returned from server
  79	var	$headers		=	array();			// headers returned from server sent here
  80	var	$maxlength		=	500000;				// max return data length (body)
  81	var $read_timeout	=	0;					// timeout on read operations, in seconds
  82												// supported only since PHP 4 Beta 4
  83												// set to 0 to disallow timeouts
  84	var $timed_out		=	false;				// if a read operation timed out
  85	var	$status			=	0;					// http request status
  86
  87	var $temp_dir		=	"/tmp";				// temporary directory that the webserver
  88												// has permission to write to.
  89												// under Windows, this should be C:\temp
  90
  91	var	$curl_path		=	"/usr/local/bin/curl";
  92												// Snoopy will use cURL for fetching
  93												// SSL content if a full system path to
  94												// the cURL binary is supplied here.
  95												// set to false if you do not have
  96												// cURL installed. See http://curl.haxx.se
  97												// for details on installing cURL.
  98												// Snoopy does *not* use the cURL
  99												// library functions built into php,
 100												// as these functions are not stable
 101												// as of this Snoopy release.
 102
 103	/**** Private variables ****/
 104
 105	var	$_maxlinelen	=	4096;				// max line length (headers)
 106
 107	var $_httpmethod	=	"GET";				// default http request method
 108	var $_httpversion	=	"HTTP/1.0";			// default http request version
 109	var $_submit_method	=	"POST";				// default submit method
 110	var $_submit_type	=	"application/x-www-form-urlencoded";	// default submit type
 111	var $_mime_boundary	=   "";					// MIME boundary for multipart/form-data submit type
 112	var $_redirectaddr	=	false;				// will be set if page fetched is a redirect
 113	var $_redirectdepth	=	0;					// increments on an http redirect
 114	var $_frameurls		= 	array();			// frame src urls
 115	var $_framedepth	=	0;					// increments on frame depth
 116
 117	var $_isproxy		=	false;				// set if using a proxy server
 118	var $_fp_timeout	=	30;					// timeout for socket connection
 119
 120/*======================================================================*\
 121	Function:	fetch
 122	Purpose:	fetch the contents of a web page
 123				(and possibly other protocols in the
 124				future like ftp, nntp, gopher, etc.)
 125	Input:		$URI	the location of the page to fetch
 126	Output:		$this->results	the output text from the fetch
 127\*======================================================================*/
 128
 129	function fetch($URI)
 130	{
 131
 132		//preg_match("|^([^:]+)://([^:/]+)(:[\d]+)*(.*)|",$URI,$URI_PARTS);
 133		$URI_PARTS = parse_url($URI);
 134		if (!empty($URI_PARTS["user"]))
 135			$this->user = $URI_PARTS["user"];
 136		if (!empty($URI_PARTS["pass"]))
 137			$this->pass = $URI_PARTS["pass"];
 138		if (empty($URI_PARTS["query"]))
 139			$URI_PARTS["query"] = '';
 140		if (empty($URI_PARTS["path"]))
 141			$URI_PARTS["path"] = '';
 142
 143		switch(strtolower($URI_PARTS["scheme"]))
 144		{
 145			case "http":
 146				$this->host = $URI_PARTS["host"];
 147				if(!empty($URI_PARTS["port"]))
 148					$this->port = $URI_PARTS["port"];
 149				if($this->_connect($fp))
 150				{
 151					if($this->_isproxy)
 152					{
 153						// using proxy, send entire URI
 154						$this->_httprequest($URI,$fp,$URI,$this->_httpmethod);
 155					}
 156					else
 157					{
 158						$path = $URI_PARTS["path"].($URI_PARTS["query"] ? "?".$URI_PARTS["query"] : "");
 159						// no proxy, send only the path
 160						$this->_httprequest($path, $fp, $URI, $this->_httpmethod);
 161					}
 162
 163					$this->_disconnect($fp);
 164
 165					if($this->_redirectaddr)
 166					{
 167						/* url was redirected, check if we've hit the max depth */
 168						if($this->maxredirs > $this->_redirectdepth)
 169						{
 170							// only follow redirect if it's on this site, or offsiteok is true
 171							if(preg_match("|^http://".preg_quote($this->host)."|i",$this->_redirectaddr) || $this->offsiteok)
 172							{
 173								/* follow the redirect */
 174								$this->_redirectdepth++;
 175								$this->lastredirectaddr=$this->_redirectaddr;
 176								$this->fetch($this->_redirectaddr);
 177							}
 178						}
 179					}
 180
 181					if($this->_framedepth < $this->maxframes && count($this->_frameurls) > 0)
 182					{
 183						$frameurls = $this->_frameurls;
 184						$this->_frameurls = array();
 185
 186						while(list(,$frameurl) = each($frameurls))
 187						{
 188							if($this->_framedepth < $this->maxframes)
 189							{
 190								$this->fetch($frameurl);
 191								$this->_framedepth++;
 192							}
 193							else
 194								break;
 195						}
 196					}
 197				}
 198				else
 199				{
 200					return false;
 201				}
 202				return true;
 203				break;
 204			case "https":
 205				if(!$this->curl_path)
 206					return false;
 207				if(function_exists("is_executable"))
 208				    if (!is_executable($this->curl_path))
 209				        return false;
 210				$this->host = $URI_PARTS["host"];
 211				if(!empty($URI_PARTS["port"]))
 212					$this->port = $URI_PARTS["port"];
 213				if($this->_isproxy)
 214				{
 215					// using proxy, send entire URI
 216					$this->_httpsrequest($URI,$URI,$this->_httpmethod);
 217				}
 218				else
 219				{
 220					$path = $URI_PARTS["path"].($URI_PARTS["query"] ? "?".$URI_PARTS["query"] : "");
 221					// no proxy, send only the path
 222					$this->_httpsrequest($path, $URI, $this->_httpmethod);
 223				}
 224
 225				if($this->_redirectaddr)
 226				{
 227					/* url was redirected, check if we've hit the max depth */
 228					if($this->maxredirs > $this->_redirectdepth)
 229					{
 230						// only follow redirect if it's on this site, or offsiteok is true
 231						if(preg_match("|^http://".preg_quote($this->host)."|i",$this->_redirectaddr) || $this->offsiteok)
 232						{
 233							/* follow the redirect */
 234							$this->_redirectdepth++;
 235							$this->lastredirectaddr=$this->_redirectaddr;
 236							$this->fetch($this->_redirectaddr);
 237						}
 238					}
 239				}
 240
 241				if($this->_framedepth < $this->maxframes && count($this->_frameurls) > 0)
 242				{
 243					$frameurls = $this->_frameurls;
 244					$this->_frameurls = array();
 245
 246					while(list(,$frameurl) = each($frameurls))
 247					{
 248						if($this->_framedepth < $this->maxframes)
 249						{
 250							$this->fetch($frameurl);
 251							$this->_framedepth++;
 252						}
 253						else
 254							break;
 255					}
 256				}
 257				return true;
 258				break;
 259			default:
 260				// not a valid protocol
 261				$this->error	=	'Invalid protocol "'.$URI_PARTS["scheme"].'"\n';
 262				return false;
 263				break;
 264		}
 265		return true;
 266	}
 267
 268/*======================================================================*\
 269	Function:	submit
 270	Purpose:	submit an http form
 271	Input:		$URI	the location to post the data
 272				$formvars	the formvars to use.
 273					format: $formvars["var"] = "val";
 274				$formfiles  an array of files to submit
 275					format: $formfiles["var"] = "/dir/filename.ext";
 276	Output:		$this->results	the text output from the post
 277\*======================================================================*/
 278
 279	function submit($URI, $formvars="", $formfiles="")
 280	{
 281		unset($postdata);
 282
 283		$postdata = $this->_prepare_post_body($formvars, $formfiles);
 284
 285		$URI_PARTS = parse_url($URI);
 286		if (!empty($URI_PARTS["user"]))
 287			$this->user = $URI_PARTS["user"];
 288		if (!empty($URI_PARTS["pass"]))
 289			$this->pass = $URI_PARTS["pass"];
 290		if (empty($URI_PARTS["query"]))
 291			$URI_PARTS["query"] = '';
 292		if (empty($URI_PARTS["path"]))
 293			$URI_PARTS["path"] = '';
 294
 295		switch(strtolower($URI_PARTS["scheme"]))
 296		{
 297			case "http":
 298				$this->host = $URI_PARTS["host"];
 299				if(!empty($URI_PARTS["port"]))
 300					$this->port = $URI_PARTS["port"];
 301				if($this->_connect($fp))
 302				{
 303					if($this->_isproxy)
 304					{
 305						// using proxy, send entire URI
 306						$this->_httprequest($URI,$fp,$URI,$this->_submit_method,$this->_submit_type,$postdata);
 307					}
 308					else
 309					{
 310						$path = $URI_PARTS["path"].($URI_PARTS["query"] ? "?".$URI_PARTS["query"] : "");
 311						// no proxy, send only the path
 312						$this->_httprequest($path, $fp, $URI, $this->_submit_method, $this->_submit_type, $postdata);
 313					}
 314
 315					$this->_disconnect($fp);
 316
 317					if($this->_redirectaddr)
 318					{
 319						/* url was redirected, check if we've hit the max depth */
 320						if($this->maxredirs > $this->_redirectdepth)
 321						{
 322							if(!preg_match("|^".$URI_PARTS["scheme"]."://|", $this->_redirectaddr))
 323								$this->_redirectaddr = $this->_expandlinks($this->_redirectaddr,$URI_PARTS["scheme"]."://".$URI_PARTS["host"]);
 324
 325							// only follow redirect if it's on this site, or offsiteok is true
 326							if(preg_match("|^http://".preg_quote($this->host)."|i",$this->_redirectaddr) || $this->offsiteok)
 327							{
 328								/* follow the redirect */
 329								$this->_redirectdepth++;
 330								$this->lastredirectaddr=$this->_redirectaddr;
 331								if( strpos( $this->_redirectaddr, "?" ) > 0 )
 332									$this->fetch($this->_redirectaddr); // the redirect has changed the request method from post to get
 333								else
 334									$this->submit($this->_redirectaddr,$formvars, $formfiles);
 335							}
 336						}
 337					}
 338
 339					if($this->_framedepth < $this->maxframes && count($this->_frameurls) > 0)
 340					{
 341						$frameurls = $this->_frameurls;
 342						$this->_frameurls = array();
 343
 344						while(list(,$frameurl) = each($frameurls))
 345						{
 346							if($this->_framedepth < $this->maxframes)
 347							{
 348								$this->fetch($frameurl);
 349								$this->_framedepth++;
 350							}
 351							else
 352								break;
 353						}
 354					}
 355
 356				}
 357				else
 358				{
 359					return false;
 360				}
 361				return true;
 362				break;
 363			case "https":
 364				if(!$this->curl_path)
 365					return false;
 366				if(function_exists("is_executable"))
 367				    if (!is_executable($this->curl_path))
 368				        return false;
 369				$this->host = $URI_PARTS["host"];
 370				if(!empty($URI_PARTS["port"]))
 371					$this->port = $URI_PARTS["port"];
 372				if($this->_isproxy)
 373				{
 374					// using proxy, send entire URI
 375					$this->_httpsrequest($URI, $URI, $this->_submit_method, $this->_submit_type, $postdata);
 376				}
 377				else
 378				{
 379					$path = $URI_PARTS["path"].($URI_PARTS["query"] ? "?".$URI_PARTS["query"] : "");
 380					// no proxy, send only the path
 381					$this->_httpsrequest($path, $URI, $this->_submit_method, $this->_submit_type, $postdata);
 382				}
 383
 384				if($this->_redirectaddr)
 385				{
 386					/* url was redirected, check if we've hit the max depth */
 387					if($this->maxredirs > $this->_redirectdepth)
 388					{
 389						if(!preg_match("|^".$URI_PARTS["scheme"]."://|", $this->_redirectaddr))
 390							$this->_redirectaddr = $this->_expandlinks($this->_redirectaddr,$URI_PARTS["scheme"]."://".$URI_PARTS["host"]);
 391
 392						// only follow redirect if it's on this site, or offsiteok is true
 393						if(preg_match("|^http://".preg_quote($this->host)."|i",$this->_redirectaddr) || $this->offsiteok)
 394						{
 395							/* follow the redirect */
 396							$this->_redirectdepth++;
 397							$this->lastredirectaddr=$this->_redirectaddr;
 398							if( strpos( $this->_redirectaddr, "?" ) > 0 )
 399								$this->fetch($this->_redirectaddr); // the redirect has changed the request method from post to get
 400							else
 401								$this->submit($this->_redirectaddr,$formvars, $formfiles);
 402						}
 403					}
 404				}
 405
 406				if($this->_framedepth < $this->maxframes && count($this->_frameurls) > 0)
 407				{
 408					$frameurls = $this->_frameurls;
 409					$this->_frameurls = array();
 410
 411					while(list(,$frameurl) = each($frameurls))
 412					{
 413						if($this->_framedepth < $this->maxframes)
 414						{
 415							$this->fetch($frameurl);
 416							$this->_framedepth++;
 417						}
 418						else
 419							break;
 420					}
 421				}
 422				return true;
 423				break;
 424
 425			default:
 426				// not a valid protocol
 427				$this->error	=	'Invalid protocol "'.$URI_PARTS["scheme"].'"\n';
 428				return false;
 429				break;
 430		}
 431		return true;
 432	}
 433
 434/*======================================================================*\
 435	Function:	fetchlinks
 436	Purpose:	fetch the links from a web page
 437	Input:		$URI	where you are fetching from
 438	Output:		$this->results	an array of the URLs
 439\*======================================================================*/
 440
 441	function fetchlinks($URI)
 442	{
 443		if ($this->fetch($URI))
 444		{
 445			if($this->lastredirectaddr)
 446				$URI = $this->lastredirectaddr;
 447			if(is_array($this->results))
 448			{
 449				for($x=0;$x<count($this->results);$x++)
 450					$this->results[$x] = $this->_striplinks($this->results[$x]);
 451			}
 452			else
 453				$this->results = $this->_striplinks($this->results);
 454
 455			if($this->expandlinks)
 456				$this->results = $this->_expandlinks($this->results, $URI);
 457			return true;
 458		}
 459		else
 460			return false;
 461	}
 462
 463/*======================================================================*\
 464	Function:	fetchform
 465	Purpose:	fetch the form elements from a web page
 466	Input:		$URI	where you are fetching from
 467	Output:		$this->results	the resulting html form
 468\*======================================================================*/
 469
 470	function fetchform($URI)
 471	{
 472
 473		if ($this->fetch($URI))
 474		{
 475
 476			if(is_array($this->results))
 477			{
 478				for($x=0;$x<count($this->results);$x++)
 479					$this->results[$x] = $this->_stripform($this->results[$x]);
 480			}
 481			else
 482				$this->results = $this->_stripform($this->results);
 483
 484			return true;
 485		}
 486		else
 487			return false;
 488	}
 489
 490
 491/*======================================================================*\
 492	Function:	fetchtext
 493	Purpose:	fetch the text from a web page, stripping the links
 494	Input:		$URI	where you are fetching from
 495	Output:		$this->results	the text from the web page
 496\*======================================================================*/
 497
 498	function fetchtext($URI)
 499	{
 500		if($this->fetch($URI))
 501		{
 502			if(is_array($this->results))
 503			{
 504				for($x=0;$x<count($this->results);$x++)
 505					$this->results[$x] = $this->_striptext($this->results[$x]);
 506			}
 507			else
 508				$this->results = $this->_striptext($this->results);
 509			return true;
 510		}
 511		else
 512			return false;
 513	}
 514
 515/*======================================================================*\
 516	Function:	submitlinks
 517	Purpose:	grab links from a form submission
 518	Input:		$URI	where you are submitting from
 519	Output:		$this->results	an array of the links from the post
 520\*======================================================================*/
 521
 522	function submitlinks($URI, $formvars="", $formfiles="")
 523	{
 524		if($this->submit($URI,$formvars, $formfiles))
 525		{
 526			if($this->lastredirectaddr)
 527				$URI = $this->lastredirectaddr;
 528			if(is_array($this->results))
 529			{
 530				for($x=0;$x<count($this->results);$x++)
 531				{
 532					$this->results[$x] = $this->_striplinks($this->results[$x]);
 533					if($this->expandlinks)
 534						$this->results[$x] = $this->_expandlinks($this->results[$x],$URI);
 535				}
 536			}
 537			else
 538			{
 539				$this->results = $this->_striplinks($this->results);
 540				if($this->expandlinks)
 541					$this->results = $this->_expandlinks($this->results,$URI);
 542			}
 543			return true;
 544		}
 545		else
 546			return false;
 547	}
 548
 549/*======================================================================*\
 550	Function:	submittext
 551	Purpose:	grab text from a form submission
 552	Input:		$URI	where you are submitting from
 553	Output:		$this->results	the text from the web page
 554\*======================================================================*/
 555
 556	function submittext($URI, $formvars = "", $formfiles = "")
 557	{
 558		if($this->submit($URI,$formvars, $formfiles))
 559		{
 560			if($this->lastredirectaddr)
 561				$URI = $this->lastredirectaddr;
 562			if(is_array($this->results))
 563			{
 564				for($x=0;$x<count($this->results);$x++)
 565				{
 566					$this->results[$x] = $this->_striptext($this->results[$x]);
 567					if($this->expandlinks)
 568						$this->results[$x] = $this->_expandlinks($this->results[$x],$URI);
 569				}
 570			}
 571			else
 572			{
 573				$this->results = $this->_striptext($this->results);
 574				if($this->expandlinks)
 575					$this->results = $this->_expandlinks($this->results,$URI);
 576			}
 577			return true;
 578		}
 579		else
 580			return false;
 581	}
 582
 583
 584
 585/*======================================================================*\
 586	Function:	set_submit_multipart
 587	Purpose:	Set the form submission content type to
 588				multipart/form-data
 589\*======================================================================*/
 590	function set_submit_multipart()
 591	{
 592		$this->_submit_type = "multipart/form-data";
 593	}
 594
 595
 596/*======================================================================*\
 597	Function:	set_submit_normal
 598	Purpose:	Set the form submission content type to
 599				application/x-www-form-urlencoded
 600\*======================================================================*/
 601	function set_submit_normal()
 602	{
 603		$this->_submit_type = "application/x-www-form-urlencoded";
 604	}
 605
 606
 607
 608
 609/*======================================================================*\
 610	Private functions
 611\*======================================================================*/
 612
 613
 614/*======================================================================*\
 615	Function:	_striplinks
 616	Purpose:	strip the hyperlinks from an html document
 617	Input:		$document	document to strip.
 618	Output:		$match		an array of the links
 619\*======================================================================*/
 620
 621	function _striplinks($document)
 622	{
 623		preg_match_all("'<\s*a\s.*?href\s*=\s*			# find <a href=
 624						([\"\'])?					# find single or double quote
 625						(?(1) (.*?)\\1 | ([^\s\>]+))		# if quote found, match up to next matching
 626													# quote, otherwise match up to next space
 627						'isx",$document,$links);
 628
 629
 630		// catenate the non-empty matches from the conditional subpattern
 631
 632		while(list($key,$val) = each($links[2]))
 633		{
 634			if(!empty($val))
 635				$match[] = $val;
 636		}
 637
 638		while(list($key,$val) = each($links[3]))
 639		{
 640			if(!empty($val))
 641				$match[] = $val;
 642		}
 643
 644		// return the links
 645		return $match;
 646	}
 647
 648/*======================================================================*\
 649	Function:	_stripform
 650	Purpose:	strip the form elements from an html document
 651	Input:		$document	document to strip.
 652	Output:		$match		an array of the links
 653\*======================================================================*/
 654
 655	function _stripform($document)
 656	{
 657		preg_match_all("'<\/?(FORM|INPUT|SELECT|TEXTAREA|(OPTION))[^<>]*>(?(2)(.*(?=<\/?(option|select)[^<>]*>[\r\n]*)|(?=[\r\n]*))|(?=[\r\n]*))'Usi",$document,$elements);
 658
 659		// catenate the matches
 660		$match = implode("\r\n",$elements[0]);
 661
 662		// return the links
 663		return $match;
 664	}
 665
 666
 667
 668/*======================================================================*\
 669	Function:	_striptext
 670	Purpose:	strip the text from an html document
 671	Input:		$document	document to strip.
 672	Output:		$text		the resulting text
 673\*======================================================================*/
 674
 675	function _striptext($document)
 676	{
 677
 678		// I didn't use preg eval (//e) since that is only available in PHP 4.0.
 679		// so, list your entities one by one here. I included some of the
 680		// more common ones.
 681
 682		$search = array("'<script[^>]*?>.*?</script>'si",	// strip out javascript
 683						"'<[\/\!]*?[^<>]*?>'si",			// strip out html tags
 684						"'([\r\n])[\s]+'",					// strip out white space
 685						"'&(quot|#34|#034|#x22);'i",		// replace html entities
 686						"'&(amp|#38|#038|#x26);'i",			// added hexadecimal values
 687						"'&(lt|#60|#060|#x3c);'i",
 688						"'&(gt|#62|#062|#x3e);'i",
 689						"'&(nbsp|#160|#xa0);'i",
 690						"'&(iexcl|#161);'i",
 691						"'&(cent|#162);'i",
 692						"'&(pound|#163);'i",
 693						"'&(copy|#169);'i",
 694						"'&(reg|#174);'i",
 695						"'&(deg|#176);'i",
 696						"'&(#39|#039|#x27);'",
 697						"'&(euro|#8364);'i",				// europe
 698						"'&a(uml|UML);'",					// german
 699						"'&o(uml|UML);'",
 700						"'&u(uml|UML);'",
 701						"'&A(uml|UML);'",
 702						"'&O(uml|UML);'",
 703						"'&U(uml|UML);'",
 704						"'&szlig;'i",
 705						);
 706		$replace = array(	"",
 707							"",
 708							"\\1",
 709							"\"",
 710							"&",
 711							"<",
 712							">",
 713							" ",
 714							chr(161),
 715							chr(162),
 716							chr(163),
 717							chr(169),
 718							chr(174),
 719							chr(176),
 720							chr(39),
 721							chr(128),
 722							chr(0xE4), // ANSI &auml;
 723							chr(0xF6), // ANSI &ouml;
 724							chr(0xFC), // ANSI &uuml;
 725							chr(0xC4), // ANSI &Auml;
 726							chr(0xD6), // ANSI &Ouml;
 727							chr(0xDC), // ANSI &Uuml;
 728							chr(0xDF), // ANSI &szlig;
 729						);
 730
 731		$text = preg_replace($search,$replace,$document);
 732
 733		return $text;
 734	}
 735
 736/*======================================================================*\
 737	Function:	_expandlinks
 738	Purpose:	expand each link into a fully qualified URL
 739	Input:		$links			the links to qualify
 740				$URI			the full URI to get the base from
 741	Output:		$expandedLinks	the expanded links
 742\*======================================================================*/
 743
 744	function _expandlinks($links,$URI)
 745	{
 746
 747		preg_match("/^[^\?]+/",$URI,$match);
 748
 749		$match = preg_replace("|/[^\/\.]+\.[^\/\.]+$|","",$match[0]);
 750		$match = preg_replace("|/$|","",$match);
 751		$match_part = parse_url($match);
 752		$match_root =
 753		$match_part["scheme"]."://".$match_part["host"];
 754
 755		$search = array( 	"|^http://".preg_quote($this->host)."|i",
 756							"|^(\/)|i",
 757							"|^(?!http://)(?!mailto:)|i",
 758							"|/\./|",
 759							"|/[^\/]+/\.\./|"
 760						);
 761
 762		$replace = array(	"",
 763							$match_root."/",
 764							$match."/",
 765							"/",
 766							"/"
 767						);
 768
 769		$expandedLinks = preg_replace($search,$replace,$links);
 770
 771		return $expandedLinks;
 772	}
 773
 774/*======================================================================*\
 775	Function:	_httprequest
 776	Purpose:	go get the http data from the server
 777	Input:		$url		the url to fetch
 778				$fp			the current open file pointer
 779				$URI		the full URI
 780				$body		body contents to send if any (POST)
 781	Output:
 782\*======================================================================*/
 783
 784	function _httprequest($url,$fp,$URI,$http_method,$content_type="",$body="")
 785	{
 786		$cookie_headers = '';
 787		if($this->passcookies && $this->_redirectaddr)
 788			$this->setcookies();
 789
 790		$URI_PARTS = parse_url($URI);
 791		if(empty($url))
 792			$url = "/";
 793		$headers = $http_method." ".$url." ".$this->_httpversion."\r\n";
 794		if(!empty($this->agent))
 795			$headers .= "User-Agent: ".$this->agent."\r\n";
 796		if(!empty($this->host) && !isset($this->rawheaders['Host'])) {
 797			$headers .= "Host: ".$this->host;
 798			if(!empty($this->port) && $this->port != 80)
 799				$headers .= ":".$this->port;
 800			$headers .= "\r\n";
 801		}
 802		if(!empty($this->accept))
 803			$headers .= "Accept: ".$this->accept."\r\n";
 804		if(!empty($this->referer))
 805			$headers .= "Referer: ".$this->referer."\r\n";
 806		if(!empty($this->cookies))
 807		{
 808			if(!is_array($this->cookies))
 809				$this->cookies = (array)$this->cookies;
 810
 811			reset($this->cookies);
 812			if ( count($this->cookies) > 0 ) {
 813				$cookie_headers .= 'Cookie: ';
 814				foreach ( $this->cookies as $cookieKey => $cookieVal ) {
 815				$cookie_headers .= $cookieKey."=".urlencode($cookieVal)."; ";
 816				}
 817				$headers .= substr($cookie_headers,0,-2) . "\r\n";
 818			}
 819		}
 820		if(!empty($this->rawheaders))
 821		{
 822			if(!is_array($this->rawheaders))
 823				$this->rawheaders = (array)$this->rawheaders;
 824			while(list($headerKey,$headerVal) = each($this->rawheaders))
 825				$headers .= $headerKey.": ".$headerVal."\r\n";
 826		}
 827		if(!empty($content_type)) {
 828			$headers .= "Content-type: $content_type";
 829			if ($content_type == "multipart/form-data")
 830				$headers .= "; boundary=".$this->_mime_boundary;
 831			$headers .= "\r\n";
 832		}
 833		if(!empty($body))
 834			$headers .= "Content-length: ".strlen($body)."\r\n";
 835		if(!empty($this->user) || !empty($this->pass))
 836			$headers .= "Authorization: Basic ".base64_encode($this->user.":".$this->pass)."\r\n";
 837
 838		//add proxy auth headers
 839		if(!empty($this->proxy_user))
 840			$headers .= 'Proxy-Authorization: ' . 'Basic ' . base64_encode($this->proxy_user . ':' . $this->proxy_pass)."\r\n";
 841
 842
 843		$headers .= "\r\n";
 844
 845		// set the read timeout if needed
 846		if ($this->read_timeout > 0)
 847			socket_set_timeout($fp, $this->read_timeout);
 848		$this->timed_out = false;
 849
 850		fwrite($fp,$headers.$body,strlen($headers.$body));
 851
 852		$this->_redirectaddr = false;
 853		unset($this->headers);
 854
 855		while($currentHeader = fgets($fp,$this->_maxlinelen))
 856		{
 857			if ($this->read_timeout > 0 && $this->_check_timeout($fp))
 858			{
 859				$this->status=-100;
 860				return false;
 861			}
 862
 863			if($currentHeader == "\r\n")
 864				break;
 865
 866			// if a header begins with Location: or URI:, set the redirect
 867			if(preg_match("/^(Location:|URI:)/i",$currentHeader))
 868			{
 869				// get URL portion of the redirect
 870				preg_match("/^(Location:|URI:)[ ]+(.*)/i",chop($currentHeader),$matches);
 871				// look for :// in the Location header to see if hostname is included
 872				if(!preg_match("|\:\/\/|",$matches[2]))
 873				{
 874					// no host in the path, so prepend
 875					$this->_redirectaddr = $URI_PARTS["scheme"]."://".$this->host.":".$this->port;
 876					// eliminate double slash
 877					if(!preg_match("|^/|",$matches[2]))
 878							$this->_redirectaddr .= "/".$matches[2];
 879					else
 880							$this->_redirectaddr .= $matches[2];
 881				}
 882				else
 883					$this->_redirectaddr = $matches[2];
 884			}
 885
 886			if(preg_match("|^HTTP/|",$currentHeader))
 887			{
 888                if(preg_match("|^HTTP/[^\s]*\s(.*?)\s|",$currentHeader, $status))
 889				{
 890					$this->status= $status[1];
 891                }
 892				$this->response_code = $currentHeader;
 893			}
 894
 895			$this->headers[] = $currentHeader;
 896		}
 897
 898		$results = '';
 899		do {
 900    		$_data = fread($fp, $this->maxlength);
 901    		if (strlen($_data) == 0) {
 902        		break;
 903    		}
 904    		$results .= $_data;
 905		} while(true);
 906
 907		if ($this->read_timeout > 0 && $this->_check_timeout($fp))
 908		{
 909			$this->status=-100;
 910			return false;
 911		}
 912
 913		// check if there is a a redirect meta tag
 914
 915		if(preg_match("'<meta[\s]*http-equiv[^>]*?content[\s]*=[\s]*[\"\']?\d+;[\s]*URL[\s]*=[\s]*([^\"\']*?)[\"\']?>'i",$results,$match))
 916
 917		{
 918			$this->_redirectaddr = $this->_expandlinks($match[1],$URI);
 919		}
 920
 921		// have we hit our frame depth and is there frame src to fetch?
 922		if(($this->_framedepth < $this->maxframes) && preg_match_all("'<frame\s+.*src[\s]*=[\'\"]?([^\'\"\>]+)'i",$results,$match))
 923		{
 924			$this->results[] = $results;
 925			for($x=0; $x<count($match[1]); $x++)
 926				$this->_frameurls[] = $this->_expandlinks($match[1][$x],$URI_PARTS["scheme"]."://".$this->host);
 927		}
 928		// have we already fetched framed content?
 929		elseif(is_array($this->results))
 930			$this->results[] = $results;
 931		// no framed content
 932		else
 933			$this->results = $results;
 934
 935		return true;
 936	}
 937
 938/*======================================================================*\
 939	Function:	_httpsrequest
 940	Purpose:	go get the https data from the server using curl
 941	Input:		$url		the url to fetch
 942				$URI		the full URI
 943				$body		body contents to send if any (POST)
 944	Output:
 945\*======================================================================*/
 946
 947	function _httpsrequest($url,$URI,$http_method,$content_type="",$body="")
 948	{
 949		if($this->passcookies && $this->_redirectaddr)
 950			$this->setcookies();
 951
 952		$headers = array();
 953
 954		$URI_PARTS = parse_url($URI);
 955		if(empty($url))
 956			$url = "/";
 957		// GET ... header not needed for curl
 958		//$headers[] = $http_method." ".$url." ".$this->_httpversion;
 959		if(!empty($this->agent))
 960			$headers[] = "User-Agent: ".$this->agent;
 961		if(!empty($this->host))
 962			if(!empty($this->port))
 963				$headers[] = "Host: ".$this->host.":".$this->port;
 964			else
 965				$headers[] = "Host: ".$this->host;
 966		if(!empty($this->accept))
 967			$headers[] = "Accept: ".$this->accept;
 968		if(!empty($this->referer))
 969			$headers[] = "Referer: ".$this->referer;
 970		if(!empty($this->cookies))
 971		{
 972			if(!is_array($this->cookies))
 973				$this->cookies = (array)$this->cookies;
 974
 975			reset($this->cookies);
 976			if ( count($this->cookies) > 0 ) {
 977				$cookie_str = 'Cookie: ';
 978				foreach ( $this->cookies as $cookieKey => $cookieVal ) {
 979				$cookie_str .= $cookieKey."=".urlencode($cookieVal)."; ";
 980				}
 981				$headers[] = substr($cookie_str,0,-2);
 982			}
 983		}
 984		if(!empty($this->rawheaders))
 985		{
 986			if(!is_array($this->rawheaders))
 987				$this->rawheaders = (array)$this->rawheaders;
 988			while(list($headerKey,$headerVal) = each($this->rawheaders))
 989				$headers[] = $headerKey.": ".$headerVal;
 990		}
 991		if(!empty($content_type)) {
 992			if ($content_type == "multipart/form-data")
 993				$headers[] = "Content-type: $content_type; boundary=".$this->_mime_boundary;
 994			else
 995				$headers[] = "Content-type: $content_type";
 996		}
 997		if(!empty($body))
 998			$headers[] = "Content-length: ".strlen($body);
 999		if(!empty($this->user) || !empty($this->pass))
1000			$headers[] = "Authorization: BASIC ".base64_encode($this->user.":".$this->pass);
1001
1002		for($curr_header = 0; $curr_header < count($headers); $curr_header++) {
1003			$safer_header = strtr( $headers[$curr_header], "\"", " " );
1004			$cmdline_params .= " -H \"".$safer_header."\"";
1005		}
1006
1007		if(!empty($body))
1008			$cmdline_params .= " -d \"$body\"";
1009
1010		if($this->read_timeout > 0)
1011			$cmdline_params .= " -m ".$this->read_timeout;
1012
1013		$headerfile = tempnam($temp_dir, "sno");
1014
1015		exec($this->curl_path." -k -D \"$headerfile\"".$cmdline_params." \"".escapeshellcmd($URI)."\"",$results,$return);
1016
1017		if($return)
1018		{
1019			$this->error = "Error: cURL could not retrieve the document, error $return.";
1020			return false;
1021		}
1022
1023
1024		$results = implode("\r\n",$results);
1025
1026		$result_headers = file("$headerfile");
1027
1028		$this->_redirectaddr = false;
1029		unset($this->headers);
1030
1031		for($currentHeader = 0; $currentHeader < count($result_headers); $currentHeader++)
1032		{
1033
1034			// if a header begins with Location: or URI:, set the redirect
1035			if(preg_match("/^(Location: |URI: )/i",$result_headers[$currentHeader]))
1036			{
1037				// get URL portion of the redirect
1038				preg_match("/^(Location: |URI:)\s+(.*)/",chop($result_headers[$currentHeader]),$matches);
1039				// look for :// in the Location header to see if hostname is included
1040				if(!preg_match("|\:\/\/|",$matches[2]))
1041				{
1042					// no host in the path, so prepend
1043					$this->_redirectaddr = $URI_PARTS["scheme"]."://".$this->host.":".$this->port;
1044					// eliminate double slash
1045					if(!preg_match("|^/|",$matches[2]))
1046							$this->_redirectaddr .= "/".$matches[2];
1047					else
1048							$this->_redirectaddr .= $matches[2];
1049				}
1050				else
1051					$this->_redirectaddr = $matches[2];
1052			}
1053
1054			if(preg_match("|^HTTP/|",$result_headers[$currentHeader]))
1055				$this->response_code = $result_headers[$currentHeader];
1056
1057			$this->headers[] = $result_headers[$currentHeader];
1058		}
1059
1060		// check if there is a a redirect meta tag
1061
1062		if(preg_match("'<meta[\s]*http-equiv[^>]*?content[\s]*=[\s]*[\"\']?\d+;[\s]*URL[\s]*=[\s]*([^\"\']*?)[\"\']?>'i",$results,$match))
1063		{
1064			$this->_redirectaddr = $this->_expandlinks($match[1],$URI);
1065		}
1066
1067		// have we hit our frame depth and is there frame src to fetch?
1068		if(($this->_framedepth < $this->maxframes) && preg_match_all("'<frame\s+.*src[\s]*=[\'\"]?([^\'\"\>]+)'i",$results,$match))
1069		{
1070			$this->results[] = $results;
1071			for($x=0; $x<count($match[1]); $x++)
1072				$this->_frameurls[] = $this->_expandlinks($match[1][$x],$URI_PARTS["scheme"]."://".$this->host);
1073		}
1074		// have we already fetched framed content?
1075		elseif(is_array($this->results))
1076			$this->results[] = $results;
1077		// no framed content
1078		else
1079			$this->results = $results;
1080
1081		unlink("$headerfile");
1082
1083		return true;
1084	}
1085
1086/*======================================================================*\
1087	Function:	setcookies()
1088	Purpose:	set cookies for a redirection
1089\*======================================================================*/
1090
1091	function setcookies()
1092	{
1093		for($x=0; $x<count($this->headers); $x++)
1094		{
1095		if(preg_match('/^set-cookie:[\s]+([^=]+)=([^;]+)/i', $this->headers[$x],$match))
1096			$this->cookies[$match[1]] = urldecode($match[2]);
1097		}
1098	}
1099
1100
1101/*======================================================================*\
1102	Function:	_check_timeout
1103	Purpose:	checks whether timeout has occurred
1104	Input:		$fp	file pointer
1105\*======================================================================*/
1106
1107	function _check_timeout($fp)
1108	{
1109		if ($this->read_timeout > 0) {
1110			$fp_status = socket_get_status($fp);
1111			if ($fp_status["timed_out"]) {
1112				$this->timed_out = true;
1113				return true;
1114			}
1115		}
1116		return false;
1117	}
1118
1119/*======================================================================*\
1120	Function:	_connect
1121	Purpose:	make a socket connection
1122	Input:		$fp	file pointer
1123\*======================================================================*/
1124
1125	function _connect(&$fp)
1126	{
1127		if(!empty($this->proxy_host) && !empty($this->proxy_port))
1128			{
1129				$this->_isproxy = true;
1130
1131				$host = $this->proxy_host;
1132				$port = $this->proxy_port;
1133			}
1134		else
1135		{
1136			$host = $this->host;
1137			$port = $this->port;
1138		}
1139
1140		$this->status = 0;
1141
1142		if($fp = fsockopen(
1143					$host,
1144					$port,
1145					$errno,
1146					$errstr,
1147					$this->_fp_timeout
1148					))
1149		{
1150			// socket connection succeeded
1151
1152			return true;
1153		}
1154		else
1155		{
1156			// socket connection failed
1157			$this->status = $errno;
1158			switch($errno)
1159			{
1160				case -3:
1161					$this->error="socket creation failed (-3)";
1162				case -4:
1163					$this->error="dns lookup failure (-4)";
1164				case -5:
1165					$this->error="connection refused or timed out (-5)";
1166				default:
1167					$this->error="connection failed (".$errno.")";
1168			}
1169			return false;
1170		}
1171	}
1172/*======================================================================*\
1173	Function:	_disconnect
1174	Purpose:	disconnect a socket connection
1175	Input:		$fp	file pointer
1176\*======================================================================*/
1177
1178	function _disconnect($fp)
1179	{
1180		return(fclose($fp));
1181	}
1182
1183
1184/*======================================================================*\
1185	Function:	_prepare_post_body
1186	Purpose:	Prepare post body according to encoding type
1187	Input:		$formvars  - form variables
1188				$formfiles - form upload files
1189	Output:		post body
1190\*======================================================================*/
1191
1192	function _prepare_post_body($formvars, $formfiles)
1193	{
1194		settype($formvars, "array");
1195		settype($formfiles, "array");
1196		$postdata = '';
1197
1198		if (count($formvars) == 0 && count($formfiles) == 0)
1199			return;
1200
1201		switch ($this->_submit_type) {
1202			case "application/x-www-form-urlencoded":
1203				reset($formvars);
1204				while(list($key,$val) = each($formvars)) {
1205					if (is_array($val) || is_object($val)) {
1206						while (list($cur_key, $cur_val) = each($val)) {
1207							$postdata .= urlencode($key)."[]=".urlencode($cur_val)."&";
1208						}
1209					} else
1210						$postdata .= urlencode($key)."=".urlencode($val)."&";
1211				}
1212				break;
1213
1214			case "multipart/form-data":
1215				$this->_mime_boundary = "Snoopy".md5(uniqid(microtime()));
1216
1217				reset($formvars);
1218				while(list($key,$val) = each($formvars)) {
1219					if (is_array($val) || is_object($val)) {
1220						while (list($cur_key, $cur_val) = each($val)) {
1221							$postdata .= "--".$this->_mime_boundary."\r\n";
1222							$postdata .= "Content-Disposition: form-data; name=\"$key\[\]\"\r\n\r\n";
1223							$postdata .= "$cur_val\r\n";
1224						}
1225					} else {
1226						$postdata .= "--".$this->_mime_boundary."\r\n";
1227						$postdata .= "Content-Disposition: form-data; name=\"$key\"\r\n\r\n";
1228						$postdata .= "$val\r\n";
1229					}
1230				}
1231
1232				reset($formfiles);
1233				while (list($field_name, $file_names) = each($formfiles)) {
1234					settype($file_names, "array");
1235					while (list(, $file_name) = each($file_names)) {
1236						if (!is_readable($file_name)) continue;
1237
1238						$fp = fopen($file_name, "r");
1239						$file_content = fread($fp, filesize($file_name));
1240						fclose($fp);
1241						$base_name = basename($file_name);
1242
1243						$postdata .= "--".$this->_mime_boundary."\r\n";
1244						$postdata .= "Content-Disposition: form-data; name=\"$field_name\"; filename=\"$base_name\"\r\n\r\n";
1245						$postdata .= "$file_content\r\n";
1246					}
1247				}
1248				$postdata .= "--".$this->_mime_boundary."--\r\n";
1249				break;
1250		}
1251
1252		return $postdata;
1253	}
1254}
1255endif;
1256?>