/common/libraries/plugin/htmlpurifier/library/HTMLPurifier/URI.php
PHP | 240 lines | 142 code | 21 blank | 77 comment | 28 complexity | 0484b58e045ec01b9c96dcf8c5930176 MD5 | raw file
Possible License(s): BSD-3-Clause, LGPL-2.1, LGPL-3.0, GPL-3.0, MIT, GPL-2.0
- <?php
-
- /**
- * HTML Purifier's internal representation of a URI.
- * @note
- * Internal data-structures are completely escaped. If the data needs
- * to be used in a non-URI context (which is very unlikely), be sure
- * to decode it first. The URI may not necessarily be well-formed until
- * validate() is called.
- */
- class HTMLPurifier_URI
- {
-
- public $scheme, $userinfo, $host, $port, $path, $query, $fragment;
-
- /**
- * @note Automatically normalizes scheme and port
- */
- public function __construct($scheme, $userinfo, $host, $port, $path, $query, $fragment)
- {
- $this->scheme = is_null($scheme) || ctype_lower($scheme) ? $scheme : strtolower($scheme);
- $this->userinfo = $userinfo;
- $this->host = $host;
- $this->port = is_null($port) ? $port : (int) $port;
- $this->path = $path;
- $this->query = $query;
- $this->fragment = $fragment;
- }
-
- /**
- * Retrieves a scheme object corresponding to the URI's scheme/default
- * @param $config Instance of HTMLPurifier_Config
- * @param $context Instance of HTMLPurifier_Context
- * @return Scheme object appropriate for validating this URI
- */
- public function getSchemeObj($config, $context)
- {
- $registry = HTMLPurifier_URISchemeRegistry :: instance();
- if ($this->scheme !== null)
- {
- $scheme_obj = $registry->getScheme($this->scheme, $config, $context);
- if (! $scheme_obj)
- return false; // invalid scheme, clean it out
- }
- else
- {
- // no scheme: retrieve the default one
- $def = $config->getDefinition('URI');
- $scheme_obj = $registry->getScheme($def->defaultScheme, $config, $context);
- if (! $scheme_obj)
- {
- // something funky happened to the default scheme object
- trigger_error('Default scheme object "' . $def->defaultScheme . '" was not readable', E_USER_WARNING);
- return false;
- }
- }
- return $scheme_obj;
- }
-
- /**
- * Generic validation method applicable for all schemes. May modify
- * this URI in order to get it into a compliant form.
- * @param $config Instance of HTMLPurifier_Config
- * @param $context Instance of HTMLPurifier_Context
- * @return True if validation/filtering succeeds, false if failure
- */
- public function validate($config, $context)
- {
-
- // ABNF definitions from RFC 3986
- $chars_sub_delims = '!$&\'()*+,;=';
- $chars_gen_delims = ':/?#[]@';
- $chars_pchar = $chars_sub_delims . ':@';
-
- // validate host
- if (! is_null($this->host))
- {
- $host_def = new HTMLPurifier_AttrDef_URI_Host();
- $this->host = $host_def->validate($this->host, $config, $context);
- if ($this->host === false)
- $this->host = null;
- }
-
- // validate scheme
- // NOTE: It's not appropriate to check whether or not this
- // scheme is in our registry, since a URIFilter may convert a
- // URI that we don't allow into one we do. So instead, we just
- // check if the scheme can be dropped because there is no host
- // and it is our default scheme.
- if (! is_null($this->scheme) && is_null($this->host) || $this->host === '')
- {
- // support for relative paths is pretty abysmal when the
- // scheme is present, so axe it when possible
- $def = $config->getDefinition('URI');
- if ($def->defaultScheme === $this->scheme)
- {
- $this->scheme = null;
- }
- }
-
- // validate username
- if (! is_null($this->userinfo))
- {
- $encoder = new HTMLPurifier_PercentEncoder($chars_sub_delims . ':');
- $this->userinfo = $encoder->encode($this->userinfo);
- }
-
- // validate port
- if (! is_null($this->port))
- {
- if ($this->port < 1 || $this->port > 65535)
- $this->port = null;
- }
-
- // validate path
- $path_parts = array();
- $segments_encoder = new HTMLPurifier_PercentEncoder($chars_pchar . '/');
- if (! is_null($this->host))
- { // this catches $this->host === ''
- // path-abempty (hier and relative)
- // http://www.example.com/my/path
- // //www.example.com/my/path (looks odd, but works, and
- // recognized by most browsers)
- // (this set is valid or invalid on a scheme by scheme
- // basis, so we'll deal with it later)
- // file:///my/path
- // ///my/path
- $this->path = $segments_encoder->encode($this->path);
- }
- elseif ($this->path !== '')
- {
- if ($this->path[0] === '/')
- {
- // path-absolute (hier and relative)
- // http:/my/path
- // /my/path
- if (strlen($this->path) >= 2 && $this->path[1] === '/')
- {
- // This could happen if both the host gets stripped
- // out
- // http://my/path
- // //my/path
- $this->path = '';
- }
- else
- {
- $this->path = $segments_encoder->encode($this->path);
- }
- }
- elseif (! is_null($this->scheme))
- {
- // path-rootless (hier)
- // http:my/path
- // Short circuit evaluation means we don't need to check nz
- $this->path = $segments_encoder->encode($this->path);
- }
- else
- {
- // path-noscheme (relative)
- // my/path
- // (once again, not checking nz)
- $segment_nc_encoder = new HTMLPurifier_PercentEncoder($chars_sub_delims . '@');
- $c = strpos($this->path, '/');
- if ($c !== false)
- {
- $this->path = $segment_nc_encoder->encode(substr($this->path, 0, $c)) . $segments_encoder->encode(substr($this->path, $c));
- }
- else
- {
- $this->path = $segment_nc_encoder->encode($this->path);
- }
- }
- }
- else
- {
- // path-empty (hier and relative)
- $this->path = ''; // just to be safe
- }
-
- // qf = query and fragment
- $qf_encoder = new HTMLPurifier_PercentEncoder($chars_pchar . '/?');
-
- if (! is_null($this->query))
- {
- $this->query = $qf_encoder->encode($this->query);
- }
-
- if (! is_null($this->fragment))
- {
- $this->fragment = $qf_encoder->encode($this->fragment);
- }
-
- return true;
-
- }
-
- /**
- * Convert URI back to string
- * @return String URI appropriate for output
- */
- public function toString()
- {
- // reconstruct authority
- $authority = null;
- // there is a rendering difference between a null authority
- // (http:foo-bar) and an empty string authority
- // (http:///foo-bar).
- if (! is_null($this->host))
- {
- $authority = '';
- if (! is_null($this->userinfo))
- $authority .= $this->userinfo . '@';
- $authority .= $this->host;
- if (! is_null($this->port))
- $authority .= ':' . $this->port;
- }
-
- // Reconstruct the result
- // One might wonder about parsing quirks from browsers after
- // this reconstruction. Unfortunately, parsing behavior depends
- // on what *scheme* was employed (file:///foo is handled *very*
- // differently than http:///foo), so unfortunately we have to
- // defer to the schemes to do the right thing.
- $result = '';
- if (! is_null($this->scheme))
- $result .= $this->scheme . ':';
- if (! is_null($authority))
- $result .= '//' . $authority;
- $result .= $this->path;
- if (! is_null($this->query))
- $result .= '?' . $this->query;
- if (! is_null($this->fragment))
- $result .= '#' . $this->fragment;
-
- return $result;
- }
-
- }
-
- // vim: et sw=4 sts=4