PageRenderTime 52ms CodeModel.GetById 9ms app.highlight 33ms RepoModel.GetById 1ms app.codeStats 0ms

/wikiRoboter/BulkPageCreator/Snoopy.class.php

https://bitbucket.org/tbayen/smallprojects
PHP | 1250 lines | 994 code | 80 blank | 176 comment | 77 complexity | 93919f8608b8c2dc8b15a11204655253 MD5 | raw file
   1<?php
   2
   3/*************************************************
   4
   5Snoopy - the PHP net client
   6Author: Monte Ohrt <monte@ispi.net>
   7Copyright (c): 1999-2008 New Digital Group, all rights reserved
   8Version: 1.2.4
   9
  10 * This library is free software; you can redistribute it and/or
  11 * modify it under the terms of the GNU Lesser General Public
  12 * License as published by the Free Software Foundation; either
  13 * version 2.1 of the License, or (at your option) any later version.
  14 *
  15 * This library is distributed in the hope that it will be useful,
  16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
  17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  18 * Lesser General Public License for more details.
  19 *
  20 * You should have received a copy of the GNU Lesser General Public
  21 * License along with this library; if not, write to the Free Software
  22 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
  23
  24You may contact the author of Snoopy by e-mail at:
  25monte@ohrt.com
  26
  27The latest version of Snoopy can be obtained from:
  28http://snoopy.sourceforge.net/
  29
  30*************************************************/
  31
  32class Snoopy
  33{
  34	/**** Public variables ****/
  35	
  36	/* user definable vars */
  37
  38	var $host			=	"www.php.net";		// host name we are connecting to
  39	var $port			=	80;					// port we are connecting to
  40	var $proxy_host		=	"";					// proxy host to use
  41	var $proxy_port		=	"";					// proxy port to use
  42	var $proxy_user		=	"";					// proxy user to use
  43	var $proxy_pass		=	"";					// proxy password to use
  44	
  45	var $agent			=	"Snoopy v1.2.4";	// agent we masquerade as
  46	var	$referer		=	"";					// referer info to pass
  47	var $cookies		=	array();			// array of cookies to pass
  48												// $cookies["username"]="joe";
  49	var	$rawheaders		=	array();			// array of raw headers to send
  50												// $rawheaders["Content-type"]="text/html";
  51
  52	var $maxredirs		=	5;					// http redirection depth maximum. 0 = disallow
  53	var $lastredirectaddr	=	"";				// contains address of last redirected address
  54	var	$offsiteok		=	true;				// allows redirection off-site
  55	var $maxframes		=	0;					// frame content depth maximum. 0 = disallow
  56	var $expandlinks	=	true;				// expand links to fully qualified URLs.
  57												// this only applies to fetchlinks()
  58												// submitlinks(), and submittext()
  59	var $passcookies	=	true;				// pass set cookies back through redirects
  60												// NOTE: this currently does not respect
  61												// dates, domains or paths.
  62	
  63	var	$user			=	"";					// user for http authentication
  64	var	$pass			=	"";					// password for http authentication
  65	
  66	// http accept types
  67	var $accept			=	"image/gif, image/x-xbitmap, image/jpeg, image/pjpeg, */*";
  68	
  69	var $results		=	"";					// where the content is put
  70		
  71	var $error			=	"";					// error messages sent here
  72	var	$response_code	=	"";					// response code returned from server
  73	var	$headers		=	array();			// headers returned from server sent here
  74	var	$maxlength		=	500000;				// max return data length (body)
  75	var $read_timeout	=	0;					// timeout on read operations, in seconds
  76												// supported only since PHP 4 Beta 4
  77												// set to 0 to disallow timeouts
  78	var $timed_out		=	false;				// if a read operation timed out
  79	var	$status			=	0;					// http request status
  80
  81	var $temp_dir		=	"/tmp";				// temporary directory that the webserver
  82												// has permission to write to.
  83												// under Windows, this should be C:\temp
  84
  85	var	$curl_path		=	"/usr/local/bin/curl";
  86												// Snoopy will use cURL for fetching
  87												// SSL content if a full system path to
  88												// the cURL binary is supplied here.
  89												// set to false if you do not have
  90												// cURL installed. See http://curl.haxx.se
  91												// for details on installing cURL.
  92												// Snoopy does *not* use the cURL
  93												// library functions built into php,
  94												// as these functions are not stable
  95												// as of this Snoopy release.
  96	
  97	/**** Private variables ****/	
  98	
  99	var	$_maxlinelen	=	4096;				// max line length (headers)
 100	
 101	var $_httpmethod	=	"GET";				// default http request method
 102	var $_httpversion	=	"HTTP/1.0";			// default http request version
 103	var $_submit_method	=	"POST";				// default submit method
 104	var $_submit_type	=	"application/x-www-form-urlencoded";	// default submit type
 105	var $_mime_boundary	=   "";					// MIME boundary for multipart/form-data submit type
 106	var $_redirectaddr	=	false;				// will be set if page fetched is a redirect
 107	var $_redirectdepth	=	0;					// increments on an http redirect
 108	var $_frameurls		= 	array();			// frame src urls
 109	var $_framedepth	=	0;					// increments on frame depth
 110	
 111	var $_isproxy		=	false;				// set if using a proxy server
 112	var $_fp_timeout	=	30;					// timeout for socket connection
 113
 114/*======================================================================*\
 115	Function:	fetch
 116	Purpose:	fetch the contents of a web page
 117				(and possibly other protocols in the
 118				future like ftp, nntp, gopher, etc.)
 119	Input:		$URI	the location of the page to fetch
 120	Output:		$this->results	the output text from the fetch
 121\*======================================================================*/
 122
 123	function fetch($URI)
 124	{
 125	
 126		//preg_match("|^([^:]+)://([^:/]+)(:[\d]+)*(.*)|",$URI,$URI_PARTS);
 127		$URI_PARTS = parse_url($URI);
 128		if (!empty($URI_PARTS["user"]))
 129			$this->user = $URI_PARTS["user"];
 130		if (!empty($URI_PARTS["pass"]))
 131			$this->pass = $URI_PARTS["pass"];
 132		if (empty($URI_PARTS["query"]))
 133			$URI_PARTS["query"] = '';
 134		if (empty($URI_PARTS["path"]))
 135			$URI_PARTS["path"] = '';
 136				
 137		switch(strtolower($URI_PARTS["scheme"]))
 138		{
 139			case "http":
 140				$this->host = $URI_PARTS["host"];
 141				if(!empty($URI_PARTS["port"]))
 142					$this->port = $URI_PARTS["port"];
 143				if($this->_connect($fp))
 144				{
 145					if($this->_isproxy)
 146					{
 147						// using proxy, send entire URI
 148						$this->_httprequest($URI,$fp,$URI,$this->_httpmethod);
 149					}
 150					else
 151					{
 152						$path = $URI_PARTS["path"].($URI_PARTS["query"] ? "?".$URI_PARTS["query"] : "");
 153						// no proxy, send only the path
 154						$this->_httprequest($path, $fp, $URI, $this->_httpmethod);
 155					}
 156					
 157					$this->_disconnect($fp);
 158
 159					if($this->_redirectaddr)
 160					{
 161						/* url was redirected, check if we've hit the max depth */
 162						if($this->maxredirs > $this->_redirectdepth)
 163						{
 164							// only follow redirect if it's on this site, or offsiteok is true
 165							if(preg_match("|^http://".preg_quote($this->host)."|i",$this->_redirectaddr) || $this->offsiteok)
 166							{
 167								/* follow the redirect */
 168								$this->_redirectdepth++;
 169								$this->lastredirectaddr=$this->_redirectaddr;
 170								$this->fetch($this->_redirectaddr);
 171							}
 172						}
 173					}
 174
 175					if($this->_framedepth < $this->maxframes && count($this->_frameurls) > 0)
 176					{
 177						$frameurls = $this->_frameurls;
 178						$this->_frameurls = array();
 179						
 180						while(list(,$frameurl) = each($frameurls))
 181						{
 182							if($this->_framedepth < $this->maxframes)
 183							{
 184								$this->fetch($frameurl);
 185								$this->_framedepth++;
 186							}
 187							else
 188								break;
 189						}
 190					}					
 191				}
 192				else
 193				{
 194					return false;
 195				}
 196				return true;					
 197				break;
 198			case "https":
 199				if(!$this->curl_path)
 200					return false;
 201				if(function_exists("is_executable"))
 202				    if (!is_executable($this->curl_path))
 203				        return false;
 204				$this->host = $URI_PARTS["host"];
 205				if(!empty($URI_PARTS["port"]))
 206					$this->port = $URI_PARTS["port"];
 207				if($this->_isproxy)
 208				{
 209					// using proxy, send entire URI
 210					$this->_httpsrequest($URI,$URI,$this->_httpmethod);
 211				}
 212				else
 213				{
 214					$path = $URI_PARTS["path"].($URI_PARTS["query"] ? "?".$URI_PARTS["query"] : "");
 215					// no proxy, send only the path
 216					$this->_httpsrequest($path, $URI, $this->_httpmethod);
 217				}
 218
 219				if($this->_redirectaddr)
 220				{
 221					/* url was redirected, check if we've hit the max depth */
 222					if($this->maxredirs > $this->_redirectdepth)
 223					{
 224						// only follow redirect if it's on this site, or offsiteok is true
 225						if(preg_match("|^http://".preg_quote($this->host)."|i",$this->_redirectaddr) || $this->offsiteok)
 226						{
 227							/* follow the redirect */
 228							$this->_redirectdepth++;
 229							$this->lastredirectaddr=$this->_redirectaddr;
 230							$this->fetch($this->_redirectaddr);
 231						}
 232					}
 233				}
 234
 235				if($this->_framedepth < $this->maxframes && count($this->_frameurls) > 0)
 236				{
 237					$frameurls = $this->_frameurls;
 238					$this->_frameurls = array();
 239
 240					while(list(,$frameurl) = each($frameurls))
 241					{
 242						if($this->_framedepth < $this->maxframes)
 243						{
 244							$this->fetch($frameurl);
 245							$this->_framedepth++;
 246						}
 247						else
 248							break;
 249					}
 250				}					
 251				return true;					
 252				break;
 253			default:
 254				// not a valid protocol
 255				$this->error	=	'Invalid protocol "'.$URI_PARTS["scheme"].'"\n';
 256				return false;
 257				break;
 258		}		
 259		return true;
 260	}
 261
 262/*======================================================================*\
 263	Function:	submit
 264	Purpose:	submit an http form
 265	Input:		$URI	the location to post the data
 266				$formvars	the formvars to use.
 267					format: $formvars["var"] = "val";
 268				$formfiles  an array of files to submit
 269					format: $formfiles["var"] = "/dir/filename.ext";
 270	Output:		$this->results	the text output from the post
 271\*======================================================================*/
 272
 273	function submit($URI, $formvars="", $formfiles="")
 274	{
 275		unset($postdata);
 276		
 277		$postdata = $this->_prepare_post_body($formvars, $formfiles);
 278			
 279		$URI_PARTS = parse_url($URI);
 280		if (!empty($URI_PARTS["user"]))
 281			$this->user = $URI_PARTS["user"];
 282		if (!empty($URI_PARTS["pass"]))
 283			$this->pass = $URI_PARTS["pass"];
 284		if (empty($URI_PARTS["query"]))
 285			$URI_PARTS["query"] = '';
 286		if (empty($URI_PARTS["path"]))
 287			$URI_PARTS["path"] = '';
 288
 289		switch(strtolower($URI_PARTS["scheme"]))
 290		{
 291			case "http":
 292				$this->host = $URI_PARTS["host"];
 293				if(!empty($URI_PARTS["port"]))
 294					$this->port = $URI_PARTS["port"];
 295				if($this->_connect($fp))
 296				{
 297					if($this->_isproxy)
 298					{
 299						// using proxy, send entire URI
 300						$this->_httprequest($URI,$fp,$URI,$this->_submit_method,$this->_submit_type,$postdata);
 301					}
 302					else
 303					{
 304						$path = $URI_PARTS["path"].($URI_PARTS["query"] ? "?".$URI_PARTS["query"] : "");
 305						// no proxy, send only the path
 306						$this->_httprequest($path, $fp, $URI, $this->_submit_method, $this->_submit_type, $postdata);
 307					}
 308					
 309					$this->_disconnect($fp);
 310
 311					if($this->_redirectaddr)
 312					{
 313						/* url was redirected, check if we've hit the max depth */
 314						if($this->maxredirs > $this->_redirectdepth)
 315						{						
 316							if(!preg_match("|^".$URI_PARTS["scheme"]."://|", $this->_redirectaddr))
 317								$this->_redirectaddr = $this->_expandlinks($this->_redirectaddr,$URI_PARTS["scheme"]."://".$URI_PARTS["host"]);						
 318							
 319							// only follow redirect if it's on this site, or offsiteok is true
 320							if(preg_match("|^http://".preg_quote($this->host)."|i",$this->_redirectaddr) || $this->offsiteok)
 321							{
 322								/* follow the redirect */
 323								$this->_redirectdepth++;
 324								$this->lastredirectaddr=$this->_redirectaddr;
 325								if( strpos( $this->_redirectaddr, "?" ) > 0 )
 326									$this->fetch($this->_redirectaddr); // the redirect has changed the request method from post to get
 327								else
 328									$this->submit($this->_redirectaddr,$formvars, $formfiles);
 329							}
 330						}
 331					}
 332
 333					if($this->_framedepth < $this->maxframes && count($this->_frameurls) > 0)
 334					{
 335						$frameurls = $this->_frameurls;
 336						$this->_frameurls = array();
 337						
 338						while(list(,$frameurl) = each($frameurls))
 339						{														
 340							if($this->_framedepth < $this->maxframes)
 341							{
 342								$this->fetch($frameurl);
 343								$this->_framedepth++;
 344							}
 345							else
 346								break;
 347						}
 348					}					
 349					
 350				}
 351				else
 352				{
 353					return false;
 354				}
 355				return true;					
 356				break;
 357			case "https":
 358				if(!$this->curl_path)
 359					return false;
 360				if(function_exists("is_executable"))
 361				    if (!is_executable($this->curl_path))
 362				        return false;
 363				$this->host = $URI_PARTS["host"];
 364				if(!empty($URI_PARTS["port"]))
 365					$this->port = $URI_PARTS["port"];
 366				if($this->_isproxy)
 367				{
 368					// using proxy, send entire URI
 369					$this->_httpsrequest($URI, $URI, $this->_submit_method, $this->_submit_type, $postdata);
 370				}
 371				else
 372				{
 373					$path = $URI_PARTS["path"].($URI_PARTS["query"] ? "?".$URI_PARTS["query"] : "");
 374					// no proxy, send only the path
 375					$this->_httpsrequest($path, $URI, $this->_submit_method, $this->_submit_type, $postdata);
 376				}
 377
 378				if($this->_redirectaddr)
 379				{
 380					/* url was redirected, check if we've hit the max depth */
 381					if($this->maxredirs > $this->_redirectdepth)
 382					{						
 383						if(!preg_match("|^".$URI_PARTS["scheme"]."://|", $this->_redirectaddr))
 384							$this->_redirectaddr = $this->_expandlinks($this->_redirectaddr,$URI_PARTS["scheme"]."://".$URI_PARTS["host"]);						
 385
 386						// only follow redirect if it's on this site, or offsiteok is true
 387						if(preg_match("|^http://".preg_quote($this->host)."|i",$this->_redirectaddr) || $this->offsiteok)
 388						{
 389							/* follow the redirect */
 390							$this->_redirectdepth++;
 391							$this->lastredirectaddr=$this->_redirectaddr;
 392							if( strpos( $this->_redirectaddr, "?" ) > 0 )
 393								$this->fetch($this->_redirectaddr); // the redirect has changed the request method from post to get
 394							else
 395								$this->submit($this->_redirectaddr,$formvars, $formfiles);
 396						}
 397					}
 398				}
 399
 400				if($this->_framedepth < $this->maxframes && count($this->_frameurls) > 0)
 401				{
 402					$frameurls = $this->_frameurls;
 403					$this->_frameurls = array();
 404
 405					while(list(,$frameurl) = each($frameurls))
 406					{														
 407						if($this->_framedepth < $this->maxframes)
 408						{
 409							$this->fetch($frameurl);
 410							$this->_framedepth++;
 411						}
 412						else
 413							break;
 414					}
 415				}					
 416				return true;					
 417				break;
 418				
 419			default:
 420				// not a valid protocol
 421				$this->error	=	'Invalid protocol "'.$URI_PARTS["scheme"].'"\n';
 422				return false;
 423				break;
 424		}		
 425		return true;
 426	}
 427
 428/*======================================================================*\
 429	Function:	fetchlinks
 430	Purpose:	fetch the links from a web page
 431	Input:		$URI	where you are fetching from
 432	Output:		$this->results	an array of the URLs
 433\*======================================================================*/
 434
 435	function fetchlinks($URI)
 436	{
 437		if ($this->fetch($URI))
 438		{			
 439			if($this->lastredirectaddr)
 440				$URI = $this->lastredirectaddr;
 441			if(is_array($this->results))
 442			{
 443				for($x=0;$x<count($this->results);$x++)
 444					$this->results[$x] = $this->_striplinks($this->results[$x]);
 445			}
 446			else
 447				$this->results = $this->_striplinks($this->results);
 448
 449			if($this->expandlinks)
 450				$this->results = $this->_expandlinks($this->results, $URI);
 451			return true;
 452		}
 453		else
 454			return false;
 455	}
 456
 457/*======================================================================*\
 458	Function:	fetchform
 459	Purpose:	fetch the form elements from a web page
 460	Input:		$URI	where you are fetching from
 461	Output:		$this->results	the resulting html form
 462\*======================================================================*/
 463
 464	function fetchform($URI)
 465	{
 466		
 467		if ($this->fetch($URI))
 468		{			
 469
 470			if(is_array($this->results))
 471			{
 472				for($x=0;$x<count($this->results);$x++)
 473					$this->results[$x] = $this->_stripform($this->results[$x]);
 474			}
 475			else
 476				$this->results = $this->_stripform($this->results);
 477			
 478			return true;
 479		}
 480		else
 481			return false;
 482	}
 483	
 484	
 485/*======================================================================*\
 486	Function:	fetchtext
 487	Purpose:	fetch the text from a web page, stripping the links
 488	Input:		$URI	where you are fetching from
 489	Output:		$this->results	the text from the web page
 490\*======================================================================*/
 491
 492	function fetchtext($URI)
 493	{
 494		if($this->fetch($URI))
 495		{			
 496			if(is_array($this->results))
 497			{
 498				for($x=0;$x<count($this->results);$x++)
 499					$this->results[$x] = $this->_striptext($this->results[$x]);
 500			}
 501			else
 502				$this->results = $this->_striptext($this->results);
 503			return true;
 504		}
 505		else
 506			return false;
 507	}
 508
 509/*======================================================================*\
 510	Function:	submitlinks
 511	Purpose:	grab links from a form submission
 512	Input:		$URI	where you are submitting from
 513	Output:		$this->results	an array of the links from the post
 514\*======================================================================*/
 515
 516	function submitlinks($URI, $formvars="", $formfiles="")
 517	{
 518		if($this->submit($URI,$formvars, $formfiles))
 519		{			
 520			if($this->lastredirectaddr)
 521				$URI = $this->lastredirectaddr;
 522			if(is_array($this->results))
 523			{
 524				for($x=0;$x<count($this->results);$x++)
 525				{
 526					$this->results[$x] = $this->_striplinks($this->results[$x]);
 527					if($this->expandlinks)
 528						$this->results[$x] = $this->_expandlinks($this->results[$x],$URI);
 529				}
 530			}
 531			else
 532			{
 533				$this->results = $this->_striplinks($this->results);
 534				if($this->expandlinks)
 535					$this->results = $this->_expandlinks($this->results,$URI);
 536			}
 537			return true;
 538		}
 539		else
 540			return false;
 541	}
 542
 543/*======================================================================*\
 544	Function:	submittext
 545	Purpose:	grab text from a form submission
 546	Input:		$URI	where you are submitting from
 547	Output:		$this->results	the text from the web page
 548\*======================================================================*/
 549
 550	function submittext($URI, $formvars = "", $formfiles = "")
 551	{
 552		if($this->submit($URI,$formvars, $formfiles))
 553		{			
 554			if($this->lastredirectaddr)
 555				$URI = $this->lastredirectaddr;
 556			if(is_array($this->results))
 557			{
 558				for($x=0;$x<count($this->results);$x++)
 559				{
 560					$this->results[$x] = $this->_striptext($this->results[$x]);
 561					if($this->expandlinks)
 562						$this->results[$x] = $this->_expandlinks($this->results[$x],$URI);
 563				}
 564			}
 565			else
 566			{
 567				$this->results = $this->_striptext($this->results);
 568				if($this->expandlinks)
 569					$this->results = $this->_expandlinks($this->results,$URI);
 570			}
 571			return true;
 572		}
 573		else
 574			return false;
 575	}
 576
 577	
 578
 579/*======================================================================*\
 580	Function:	set_submit_multipart
 581	Purpose:	Set the form submission content type to
 582				multipart/form-data
 583\*======================================================================*/
 584	function set_submit_multipart()
 585	{
 586		$this->_submit_type = "multipart/form-data";
 587	}
 588
 589	
 590/*======================================================================*\
 591	Function:	set_submit_normal
 592	Purpose:	Set the form submission content type to
 593				application/x-www-form-urlencoded
 594\*======================================================================*/
 595	function set_submit_normal()
 596	{
 597		$this->_submit_type = "application/x-www-form-urlencoded";
 598	}
 599
 600	
 601	
 602
 603/*======================================================================*\
 604	Private functions
 605\*======================================================================*/
 606	
 607	
 608/*======================================================================*\
 609	Function:	_striplinks
 610	Purpose:	strip the hyperlinks from an html document
 611	Input:		$document	document to strip.
 612	Output:		$match		an array of the links
 613\*======================================================================*/
 614
 615	function _striplinks($document)
 616	{	
 617		preg_match_all("'<\s*a\s.*?href\s*=\s*			# find <a href=
 618						([\"\'])?					# find single or double quote
 619						(?(1) (.*?)\\1 | ([^\s\>]+))		# if quote found, match up to next matching
 620													# quote, otherwise match up to next space
 621						'isx",$document,$links);
 622						
 623
 624		// catenate the non-empty matches from the conditional subpattern
 625
 626		while(list($key,$val) = each($links[2]))
 627		{
 628			if(!empty($val))
 629				$match[] = $val;
 630		}				
 631		
 632		while(list($key,$val) = each($links[3]))
 633		{
 634			if(!empty($val))
 635				$match[] = $val;
 636		}		
 637		
 638		// return the links
 639		return $match;
 640	}
 641
 642/*======================================================================*\
 643	Function:	_stripform
 644	Purpose:	strip the form elements from an html document
 645	Input:		$document	document to strip.
 646	Output:		$match		an array of the links
 647\*======================================================================*/
 648
 649	function _stripform($document)
 650	{	
 651		preg_match_all("'<\/?(FORM|INPUT|SELECT|TEXTAREA|(OPTION))[^<>]*>(?(2)(.*(?=<\/?(option|select)[^<>]*>[\r\n]*)|(?=[\r\n]*))|(?=[\r\n]*))'Usi",$document,$elements);
 652		
 653		// catenate the matches
 654		$match = implode("\r\n",$elements[0]);
 655				
 656		// return the links
 657		return $match;
 658	}
 659
 660	
 661	
 662/*======================================================================*\
 663	Function:	_striptext
 664	Purpose:	strip the text from an html document
 665	Input:		$document	document to strip.
 666	Output:		$text		the resulting text
 667\*======================================================================*/
 668
 669	function _striptext($document)
 670	{
 671		
 672		// I didn't use preg eval (//e) since that is only available in PHP 4.0.
 673		// so, list your entities one by one here. I included some of the
 674		// more common ones.
 675								
 676		$search = array("'<script[^>]*?>.*?</script>'si",	// strip out javascript
 677						"'<[\/\!]*?[^<>]*?>'si",			// strip out html tags
 678						"'([\r\n])[\s]+'",					// strip out white space
 679						"'&(quot|#34|#034|#x22);'i",		// replace html entities
 680						"'&(amp|#38|#038|#x26);'i",			// added hexadecimal values
 681						"'&(lt|#60|#060|#x3c);'i",
 682						"'&(gt|#62|#062|#x3e);'i",
 683						"'&(nbsp|#160|#xa0);'i",
 684						"'&(iexcl|#161);'i",
 685						"'&(cent|#162);'i",
 686						"'&(pound|#163);'i",
 687						"'&(copy|#169);'i",
 688						"'&(reg|#174);'i",
 689						"'&(deg|#176);'i",
 690						"'&(#39|#039|#x27);'",
 691						"'&(euro|#8364);'i",				// europe
 692						"'&a(uml|UML);'",					// german
 693						"'&o(uml|UML);'",
 694						"'&u(uml|UML);'",
 695						"'&A(uml|UML);'",
 696						"'&O(uml|UML);'",
 697						"'&U(uml|UML);'",
 698						"'&szlig;'i",
 699						);
 700		$replace = array(	"",
 701							"",
 702							"\\1",
 703							"\"",
 704							"&",
 705							"<",
 706							">",
 707							" ",
 708							chr(161),
 709							chr(162),
 710							chr(163),
 711							chr(169),
 712							chr(174),
 713							chr(176),
 714							chr(39),
 715							chr(128),
 716							"ä",
 717							"ö",
 718							"ü",
 719							"Ä",
 720							"Ö",
 721							"Ü",
 722							"ß",
 723						);
 724					
 725		$text = preg_replace($search,$replace,$document);
 726								
 727		return $text;
 728	}
 729
 730/*======================================================================*\
 731	Function:	_expandlinks
 732	Purpose:	expand each link into a fully qualified URL
 733	Input:		$links			the links to qualify
 734				$URI			the full URI to get the base from
 735	Output:		$expandedLinks	the expanded links
 736\*======================================================================*/
 737
 738	function _expandlinks($links,$URI)
 739	{
 740		
 741		preg_match("/^[^\?]+/",$URI,$match);
 742
 743		$match = preg_replace("|/[^\/\.]+\.[^\/\.]+$|","",$match[0]);
 744		$match = preg_replace("|/$|","",$match);
 745		$match_part = parse_url($match);
 746		$match_root =
 747		$match_part["scheme"]."://".$match_part["host"];
 748				
 749		$search = array( 	"|^http://".preg_quote($this->host)."|i",
 750							"|^(\/)|i",
 751							"|^(?!http://)(?!mailto:)|i",
 752							"|/\./|",
 753							"|/[^\/]+/\.\./|"
 754						);
 755						
 756		$replace = array(	"",
 757							$match_root."/",
 758							$match."/",
 759							"/",
 760							"/"
 761						);			
 762				
 763		$expandedLinks = preg_replace($search,$replace,$links);
 764
 765		return $expandedLinks;
 766	}
 767
 768/*======================================================================*\
 769	Function:	_httprequest
 770	Purpose:	go get the http data from the server
 771	Input:		$url		the url to fetch
 772				$fp			the current open file pointer
 773				$URI		the full URI
 774				$body		body contents to send if any (POST)
 775	Output:		
 776\*======================================================================*/
 777	
 778	function _httprequest($url,$fp,$URI,$http_method,$content_type="",$body="")
 779	{
 780		$cookie_headers = '';
 781		if($this->passcookies && $this->_redirectaddr)
 782			$this->setcookies();
 783			
 784		$URI_PARTS = parse_url($URI);
 785		if(empty($url))
 786			$url = "/";
 787		$headers = $http_method." ".$url." ".$this->_httpversion."\r\n";		
 788		if(!empty($this->agent))
 789			$headers .= "User-Agent: ".$this->agent."\r\n";
 790		if(!empty($this->host) && !isset($this->rawheaders['Host'])) {
 791			$headers .= "Host: ".$this->host;
 792			if(!empty($this->port))
 793				$headers .= ":".$this->port;
 794			$headers .= "\r\n";
 795		}
 796		if(!empty($this->accept))
 797			$headers .= "Accept: ".$this->accept."\r\n";
 798		if(!empty($this->referer))
 799			$headers .= "Referer: ".$this->referer."\r\n";
 800		if(!empty($this->cookies))
 801		{			
 802			if(!is_array($this->cookies))
 803				$this->cookies = (array)$this->cookies;
 804	
 805			reset($this->cookies);
 806			if ( count($this->cookies) > 0 ) {
 807				$cookie_headers .= 'Cookie: ';
 808				foreach ( $this->cookies as $cookieKey => $cookieVal ) {
 809				$cookie_headers .= $cookieKey."=".urlencode($cookieVal)."; ";
 810				}
 811				$headers .= substr($cookie_headers,0,-2) . "\r\n";
 812			} 
 813		}
 814		if(!empty($this->rawheaders))
 815		{
 816			if(!is_array($this->rawheaders))
 817				$this->rawheaders = (array)$this->rawheaders;
 818			while(list($headerKey,$headerVal) = each($this->rawheaders))
 819				$headers .= $headerKey.": ".$headerVal."\r\n";
 820		}
 821		if(!empty($content_type)) {
 822			$headers .= "Content-type: $content_type";
 823			if ($content_type == "multipart/form-data")
 824				$headers .= "; boundary=".$this->_mime_boundary;
 825			$headers .= "\r\n";
 826		}
 827		if(!empty($body))	
 828			$headers .= "Content-length: ".strlen($body)."\r\n";
 829		if(!empty($this->user) || !empty($this->pass))	
 830			$headers .= "Authorization: Basic ".base64_encode($this->user.":".$this->pass)."\r\n";
 831		
 832		//add proxy auth headers
 833		if(!empty($this->proxy_user))	
 834			$headers .= 'Proxy-Authorization: ' . 'Basic ' . base64_encode($this->proxy_user . ':' . $this->proxy_pass)."\r\n";
 835
 836
 837		$headers .= "\r\n";
 838		
 839		// set the read timeout if needed
 840		if ($this->read_timeout > 0)
 841			socket_set_timeout($fp, $this->read_timeout);
 842		$this->timed_out = false;
 843		
 844		fwrite($fp,$headers.$body,strlen($headers.$body));
 845		
 846		$this->_redirectaddr = false;
 847		unset($this->headers);
 848						
 849		while($currentHeader = fgets($fp,$this->_maxlinelen))
 850		{
 851			if ($this->read_timeout > 0 && $this->_check_timeout($fp))
 852			{
 853				$this->status=-100;
 854				return false;
 855			}
 856				
 857			if($currentHeader == "\r\n")
 858				break;
 859						
 860			// if a header begins with Location: or URI:, set the redirect
 861			if(preg_match("/^(Location:|URI:)/i",$currentHeader))
 862			{
 863				// get URL portion of the redirect
 864				preg_match("/^(Location:|URI:)[ ]+(.*)/i",chop($currentHeader),$matches);
 865				// look for :// in the Location header to see if hostname is included
 866				if(!preg_match("|\:\/\/|",$matches[2]))
 867				{
 868					// no host in the path, so prepend
 869					$this->_redirectaddr = $URI_PARTS["scheme"]."://".$this->host.":".$this->port;
 870					// eliminate double slash
 871					if(!preg_match("|^/|",$matches[2]))
 872							$this->_redirectaddr .= "/".$matches[2];
 873					else
 874							$this->_redirectaddr .= $matches[2];
 875				}
 876				else
 877					$this->_redirectaddr = $matches[2];
 878			}
 879		
 880			if(preg_match("|^HTTP/|",$currentHeader))
 881			{
 882                if(preg_match("|^HTTP/[^\s]*\s(.*?)\s|",$currentHeader, $status))
 883				{
 884					$this->status= $status[1];
 885                }				
 886				$this->response_code = $currentHeader;
 887			}
 888				
 889			$this->headers[] = $currentHeader;
 890		}
 891
 892		$results = '';
 893		do {
 894    		$_data = fread($fp, $this->maxlength);
 895    		if (strlen($_data) == 0) {
 896        		break;
 897    		}
 898    		$results .= $_data;
 899		} while(true);
 900
 901		if ($this->read_timeout > 0 && $this->_check_timeout($fp))
 902		{
 903			$this->status=-100;
 904			return false;
 905		}
 906		
 907		// check if there is a a redirect meta tag
 908		
 909		if(preg_match("'<meta[\s]*http-equiv[^>]*?content[\s]*=[\s]*[\"\']?\d+;[\s]*URL[\s]*=[\s]*([^\"\']*?)[\"\']?>'i",$results,$match))
 910
 911		{
 912			$this->_redirectaddr = $this->_expandlinks($match[1],$URI);	
 913		}
 914
 915		// have we hit our frame depth and is there frame src to fetch?
 916		if(($this->_framedepth < $this->maxframes) && preg_match_all("'<frame\s+.*src[\s]*=[\'\"]?([^\'\"\>]+)'i",$results,$match))
 917		{
 918			$this->results[] = $results;
 919			for($x=0; $x<count($match[1]); $x++)
 920				$this->_frameurls[] = $this->_expandlinks($match[1][$x],$URI_PARTS["scheme"]."://".$this->host);
 921		}
 922		// have we already fetched framed content?
 923		elseif(is_array($this->results))
 924			$this->results[] = $results;
 925		// no framed content
 926		else
 927			$this->results = $results;
 928		
 929		return true;
 930	}
 931
 932/*======================================================================*\
 933	Function:	_httpsrequest
 934	Purpose:	go get the https data from the server using curl
 935	Input:		$url		the url to fetch
 936				$URI		the full URI
 937				$body		body contents to send if any (POST)
 938	Output:		
 939\*======================================================================*/
 940	
 941	function _httpsrequest($url,$URI,$http_method,$content_type="",$body="")
 942	{  
 943		if($this->passcookies && $this->_redirectaddr)
 944			$this->setcookies();
 945
 946		$headers = array();		
 947					
 948		$URI_PARTS = parse_url($URI);
 949		if(empty($url))
 950			$url = "/";
 951		// GET ... header not needed for curl
 952		//$headers[] = $http_method." ".$url." ".$this->_httpversion;		
 953		if(!empty($this->agent))
 954			$headers[] = "User-Agent: ".$this->agent;
 955		if(!empty($this->host))
 956			if(!empty($this->port))
 957				$headers[] = "Host: ".$this->host.":".$this->port;
 958			else
 959				$headers[] = "Host: ".$this->host;
 960		if(!empty($this->accept))
 961			$headers[] = "Accept: ".$this->accept;
 962		if(!empty($this->referer))
 963			$headers[] = "Referer: ".$this->referer;
 964		if(!empty($this->cookies))
 965		{			
 966			if(!is_array($this->cookies))
 967				$this->cookies = (array)$this->cookies;
 968	
 969			reset($this->cookies);
 970			if ( count($this->cookies) > 0 ) {
 971				$cookie_str = 'Cookie: ';
 972				foreach ( $this->cookies as $cookieKey => $cookieVal ) {
 973				$cookie_str .= $cookieKey."=".urlencode($cookieVal)."; ";
 974				}
 975				$headers[] = substr($cookie_str,0,-2);
 976			}
 977		}
 978		if(!empty($this->rawheaders))
 979		{
 980			if(!is_array($this->rawheaders))
 981				$this->rawheaders = (array)$this->rawheaders;
 982			while(list($headerKey,$headerVal) = each($this->rawheaders))
 983				$headers[] = $headerKey.": ".$headerVal;
 984		}
 985		if(!empty($content_type)) {
 986			if ($content_type == "multipart/form-data")
 987				$headers[] = "Content-type: $content_type; boundary=".$this->_mime_boundary;
 988			else
 989				$headers[] = "Content-type: $content_type";
 990		}
 991		if(!empty($body))	
 992			$headers[] = "Content-length: ".strlen($body);
 993		if(!empty($this->user) || !empty($this->pass))	
 994			$headers[] = "Authorization: BASIC ".base64_encode($this->user.":".$this->pass);
 995			
 996		for($curr_header = 0; $curr_header < count($headers); $curr_header++) {
 997			$safer_header = strtr( $headers[$curr_header], "\"", " " );
 998			$cmdline_params .= " -H \"".$safer_header."\"";
 999		}
1000		
1001		if(!empty($body))
1002			$cmdline_params .= " -d \"$body\"";
1003		
1004		if($this->read_timeout > 0)
1005			$cmdline_params .= " -m ".$this->read_timeout;
1006		
1007		$headerfile = tempnam($temp_dir, "sno");
1008
1009		exec($this->curl_path." -k -D \"$headerfile\"".$cmdline_params." \"".escapeshellcmd($URI)."\"",$results,$return);
1010		
1011		if($return)
1012		{
1013			$this->error = "Error: cURL could not retrieve the document, error $return.";
1014			return false;
1015		}
1016			
1017			
1018		$results = implode("\r\n",$results);
1019		
1020		$result_headers = file("$headerfile");
1021						
1022		$this->_redirectaddr = false;
1023		unset($this->headers);
1024						
1025		for($currentHeader = 0; $currentHeader < count($result_headers); $currentHeader++)
1026		{
1027			
1028			// if a header begins with Location: or URI:, set the redirect
1029			if(preg_match("/^(Location: |URI: )/i",$result_headers[$currentHeader]))
1030			{
1031				// get URL portion of the redirect
1032				preg_match("/^(Location: |URI:)\s+(.*)/",chop($result_headers[$currentHeader]),$matches);
1033				// look for :// in the Location header to see if hostname is included
1034				if(!preg_match("|\:\/\/|",$matches[2]))
1035				{
1036					// no host in the path, so prepend
1037					$this->_redirectaddr = $URI_PARTS["scheme"]."://".$this->host.":".$this->port;
1038					// eliminate double slash
1039					if(!preg_match("|^/|",$matches[2]))
1040							$this->_redirectaddr .= "/".$matches[2];
1041					else
1042							$this->_redirectaddr .= $matches[2];
1043				}
1044				else
1045					$this->_redirectaddr = $matches[2];
1046			}
1047		
1048			if(preg_match("|^HTTP/|",$result_headers[$currentHeader]))
1049				$this->response_code = $result_headers[$currentHeader];
1050
1051			$this->headers[] = $result_headers[$currentHeader];
1052		}
1053
1054		// check if there is a a redirect meta tag
1055		
1056		if(preg_match("'<meta[\s]*http-equiv[^>]*?content[\s]*=[\s]*[\"\']?\d+;[\s]*URL[\s]*=[\s]*([^\"\']*?)[\"\']?>'i",$results,$match))
1057		{
1058			$this->_redirectaddr = $this->_expandlinks($match[1],$URI);	
1059		}
1060
1061		// have we hit our frame depth and is there frame src to fetch?
1062		if(($this->_framedepth < $this->maxframes) && preg_match_all("'<frame\s+.*src[\s]*=[\'\"]?([^\'\"\>]+)'i",$results,$match))
1063		{
1064			$this->results[] = $results;
1065			for($x=0; $x<count($match[1]); $x++)
1066				$this->_frameurls[] = $this->_expandlinks($match[1][$x],$URI_PARTS["scheme"]."://".$this->host);
1067		}
1068		// have we already fetched framed content?
1069		elseif(is_array($this->results))
1070			$this->results[] = $results;
1071		// no framed content
1072		else
1073			$this->results = $results;
1074
1075		unlink("$headerfile");
1076		
1077		return true;
1078	}
1079
1080/*======================================================================*\
1081	Function:	setcookies()
1082	Purpose:	set cookies for a redirection
1083\*======================================================================*/
1084	
1085	function setcookies()
1086	{
1087		for($x=0; $x<count($this->headers); $x++)
1088		{
1089		if(preg_match('/^set-cookie:[\s]+([^=]+)=([^;]+)/i', $this->headers[$x],$match))
1090			$this->cookies[$match[1]] = urldecode($match[2]);
1091		}
1092	}
1093
1094	
1095/*======================================================================*\
1096	Function:	_check_timeout
1097	Purpose:	checks whether timeout has occurred
1098	Input:		$fp	file pointer
1099\*======================================================================*/
1100
1101	function _check_timeout($fp)
1102	{
1103		if ($this->read_timeout > 0) {
1104			$fp_status = socket_get_status($fp);
1105			if ($fp_status["timed_out"]) {
1106				$this->timed_out = true;
1107				return true;
1108			}
1109		}
1110		return false;
1111	}
1112
1113/*======================================================================*\
1114	Function:	_connect
1115	Purpose:	make a socket connection
1116	Input:		$fp	file pointer
1117\*======================================================================*/
1118	
1119	function _connect(&$fp)
1120	{
1121		if(!empty($this->proxy_host) && !empty($this->proxy_port))
1122			{
1123				$this->_isproxy = true;
1124				
1125				$host = $this->proxy_host;
1126				$port = $this->proxy_port;
1127			}
1128		else
1129		{
1130			$host = $this->host;
1131			$port = $this->port;
1132		}
1133	
1134		$this->status = 0;
1135		
1136		if($fp = fsockopen(
1137					$host,
1138					$port,
1139					$errno,
1140					$errstr,
1141					$this->_fp_timeout
1142					))
1143		{
1144			// socket connection succeeded
1145
1146			return true;
1147		}
1148		else
1149		{
1150			// socket connection failed
1151			$this->status = $errno;
1152			switch($errno)
1153			{
1154				case -3:
1155					$this->error="socket creation failed (-3)";
1156				case -4:
1157					$this->error="dns lookup failure (-4)";
1158				case -5:
1159					$this->error="connection refused or timed out (-5)";
1160				default:
1161					$this->error="connection failed (".$errno.")";
1162			}
1163			return false;
1164		}
1165	}
1166/*======================================================================*\
1167	Function:	_disconnect
1168	Purpose:	disconnect a socket connection
1169	Input:		$fp	file pointer
1170\*======================================================================*/
1171	
1172	function _disconnect($fp)
1173	{
1174		return(fclose($fp));
1175	}
1176
1177	
1178/*======================================================================*\
1179	Function:	_prepare_post_body
1180	Purpose:	Prepare post body according to encoding type
1181	Input:		$formvars  - form variables
1182				$formfiles - form upload files
1183	Output:		post body
1184\*======================================================================*/
1185	
1186	function _prepare_post_body($formvars, $formfiles)
1187	{
1188		settype($formvars, "array");
1189		settype($formfiles, "array");
1190		$postdata = '';
1191
1192		if (count($formvars) == 0 && count($formfiles) == 0)
1193			return;
1194		
1195		switch ($this->_submit_type) {
1196			case "application/x-www-form-urlencoded":
1197				reset($formvars);
1198				while(list($key,$val) = each($formvars)) {
1199					if (is_array($val) || is_object($val)) {
1200						while (list($cur_key, $cur_val) = each($val)) {
1201							$postdata .= urlencode($key)."[]=".urlencode($cur_val)."&";
1202						}
1203					} else
1204						$postdata .= urlencode($key)."=".urlencode($val)."&";
1205				}
1206				break;
1207
1208			case "multipart/form-data":
1209				$this->_mime_boundary = "Snoopy".md5(uniqid(microtime()));
1210				
1211				reset($formvars);
1212				while(list($key,$val) = each($formvars)) {
1213					if (is_array($val) || is_object($val)) {
1214						while (list($cur_key, $cur_val) = each($val)) {
1215							$postdata .= "--".$this->_mime_boundary."\r\n";
1216							$postdata .= "Content-Disposition: form-data; name=\"$key\[\]\"\r\n\r\n";
1217							$postdata .= "$cur_val\r\n";
1218						}
1219					} else {
1220						$postdata .= "--".$this->_mime_boundary."\r\n";
1221						$postdata .= "Content-Disposition: form-data; name=\"$key\"\r\n\r\n";
1222						$postdata .= "$val\r\n";
1223					}
1224				}
1225				
1226				reset($formfiles);
1227				while (list($field_name, $file_names) = each($formfiles)) {
1228					settype($file_names, "array");
1229					while (list(, $file_name) = each($file_names)) {
1230						if (!is_readable($file_name)) continue;
1231
1232						$fp = fopen($file_name, "r");
1233						$file_content = fread($fp, filesize($file_name));
1234						fclose($fp);
1235						$base_name = basename($file_name);
1236
1237						$postdata .= "--".$this->_mime_boundary."\r\n";
1238						$postdata .= "Content-Disposition: form-data; name=\"$field_name\"; filename=\"$base_name\"\r\n\r\n";
1239						$postdata .= "$file_content\r\n";
1240					}
1241				}
1242				$postdata .= "--".$this->_mime_boundary."--\r\n";
1243				break;
1244		}
1245
1246		return $postdata;
1247	}
1248}
1249
1250?>