/lib/external/idna_convert/src/IdnaConvert.php
PHP | 405 lines | 254 code | 33 blank | 118 comment | 45 complexity | a9473cc50392d9419e40121529a3e21e MD5 | raw file
Possible License(s): GPL-2.0, MIT, LGPL-2.1, BSD-3-Clause, AGPL-3.0, Apache-2.0
- <?php
- // {{{ license
- /* vim: set expandtab tabstop=4 shiftwidth=4 softtabstop=4 foldmethod=marker: */
- //
- // +----------------------------------------------------------------------+
- // | This library is free software; you can redistribute it and/or modify |
- // | it under the terms of the GNU Lesser General Public License as |
- // | published by the Free Software Foundation; either version 2.1 of the |
- // | License, or (at your option) any later version. |
- // | |
- // | This library is distributed in the hope that it will be useful, but |
- // | WITHOUT ANY WARRANTY; without even the implied warranty of |
- // | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
- // | Lesser General Public License for more details. |
- // | |
- // | You should have received a copy of the GNU Lesser General Public |
- // | License along with this library; if not, write to the Free Software |
- // | Foundation, Inc., 51 Franklin St, Boston, MA 02110, United States |
- // +----------------------------------------------------------------------+
- //
- // }}}
- /**
- * Encode/decode Internationalized Domain Names.
- *
- * The class allows to convert internationalized domain names
- * (see RFC 3490 for details) as they can be used with various registries worldwide
- * to be translated between their original (localized) form and their encoded form
- * as it will be used in the DNS (Domain Name System).
- *
- * The class provides two public methods, encode() and decode(), which do exactly
- * what you would expect them to do. You are allowed to use complete domain names,
- * simple strings and complete email addresses as well. That means, that you might
- * use any of the following notations:
- *
- * - www.nรถrgler.com
- * - xn--nrgler-wxa
- * - xn--brse-5qa.xn--knrz-1ra.info
- *
- * Unicode input might be given as either UTF-8 string, UCS-4 string or UCS-4 array.
- * Unicode output is available in the same formats.
- * You can select your preferred format via {@link set_paramter()}.
- *
- * ACE input and output is always expected to be ASCII.
- *
- * @author Matthias Sommerfeld <mso@phlylabs.de>
- * @copyright 2004-2016 phlyLabs Berlin, http://phlylabs.de
- * @version 1.0.1-dev 2016-01-12
- */
- namespace Mso\IdnaConvert;
- class IdnaConvert {
- const Version = '1.1.0';
- const SubVersion = 'main';
- // Internal settings, do not touch!
- protected $encoding = 'utf8'; // Default input charset is UTF-8
- protected $strictMode = false; // Behave strict or not
- protected $idnVersion = '2008'; // Can be either 2003 (old) or 2008 (default)
- protected $NamePrepData = null;
- protected $UnicodeTranscoder = null;
- /**
- * the constructor
- *
- * @param array|null $params Parameters to control the class' behaviour
- * @since 0.5.2
- */
- public function __construct($params = null)
- {
- $this->UnicodeTranscoder = new UnicodeTranscoder();
- // Kept for backwarsds compatibility. Consider using the setter methods instead.
- if (!empty($params) && is_array($params)) {
- if (isset($params['encoding'])) {
- $this->setEncoding($params['encoding']);
- }
- if (isset($params['idn_version'])) {
- $this->setIdnVersion($params['idn_version']);
- }
- if (isset($params['strict_mode'])) {
- $this->setStrictMode($params['strict_mode']);
- }
- }
- $this->setIdnVersion($this->idnVersion);
- }
- public function getClassVersion()
- {
- return self::Version.'-'.self::SubVersion;
- }
- /**
- * @return string
- */
- public function getEncoding()
- {
- return $this->encoding;
- }
- /**
- * @param string $encoding
- */
- public function setEncoding($encoding)
- {
- switch ($encoding) {
- case 'utf8':
- case 'ucs4_string':
- case 'ucs4_array':
- $this->encoding = $encoding;
- break;
- default:
- throw new \InvalidArgumentException(sprintf('Invalid encoding %s', $encoding));
- }
- }
- /**
- * @return boolean
- */
- public function isStrictMode()
- {
- return $this->strictMode;
- }
- /**
- * @param boolean $strictMode
- */
- public function setStrictMode($strictMode)
- {
- $this->strictMode = ($strictMode) ? true : false;
- }
- /**
- * @return int
- */
- public function getIdnVersion()
- {
- return $this->idnVersion;
- }
- /**
- * @param int $idnVersion
- */
- public function setIdnVersion($idnVersion)
- {
- if (in_array($idnVersion, array('2003', '2008'))) {
- if (is_null($this->NamePrepData) || $idnVersion != $this->idnVersion) {
- $this->NamePrepData = null; // Ought to destroy the object's reference
- // Re-instantiate with different data set
- $this->NamePrepData = ($idnVersion == 2003)
- ? new NamePrepData2003()
- : new NamePrepData();
- }
- $this->idnVersion = $idnVersion;
- } else {
- throw new \InvalidArgumentException(sprintf('Invalid IDN version %d', $idnVersion));
- }
- }
- /**
- * Decode a given ACE domain name
- * @param string $input Domain name (ACE string)
- * [@param string $one_time_encoding Desired output encoding]
- * @return string Decoded Domain name (UTF-8 or UCS-4)
- */
- public function decode($input, $one_time_encoding = null)
- {
- $punyCode = $this->punycodeFactory();
- // Optionally set
- if ($one_time_encoding) {
- switch ($one_time_encoding) {
- case 'utf8':
- case 'ucs4_string':
- case 'ucs4_array':
- break;
- default:
- throw new \InvalidArgumentException(sprintf('Invalid encoding %s', $one_time_encoding));
- }
- }
- // Make sure to drop any newline characters around
- $input = trim($input);
- // Negotiate input and try to determine, whether it is a plain string,
- // an email address or something like a complete URL
- if (strpos($input, '@')) { // Maybe it is an email address
- // No no in strict mode
- if ($this->strictMode) {
- throw new \InvalidArgumentException('Only individual domain name parts can be handled in strict mode');
- }
- list ($email_pref, $input) = explode('@', $input, 2);
- $arr = explode('.', $input);
- foreach ($arr as $k => $v) {
- $conv = $punyCode->decode($v);
- if ($conv) {
- $arr[$k] = $conv;
- }
- }
- $input = join('.', $arr);
- $arr = explode('.', $email_pref);
- foreach ($arr as $k => $v) {
- $conv = $punyCode->decode($v);
- if ($conv) {
- $arr[$k] = $conv;
- }
- }
- $email_pref = join('.', $arr);
- $return = $email_pref . '@' . $input;
- } elseif (preg_match('![:\./]!', $input)) { // Or a complete domain name (with or without paths / parameters)
- // No no in strict mode
- if ($this->strictMode) {
- throw new \InvalidArgumentException('Only individual domain name parts can be handled in strict mode');
- }
- $parsed = parse_url($input);
- if (isset($parsed['host'])) {
- $arr = explode('.', $parsed['host']);
- foreach ($arr as $k => $v) {
- $conv = $punyCode->decode($v);
- if ($conv) {
- $arr[$k] = $conv;
- }
- }
- $parsed['host'] = join('.', $arr);
- $return = (empty($parsed['scheme']) ? '' : $parsed['scheme'] . (strtolower($parsed['scheme']) == 'mailto' ? ':' : '://')).
- (empty($parsed['user']) ? '' : $parsed['user'] . (empty($parsed['pass']) ? '' : ':' . $parsed['pass']) . '@').
- $parsed['host'].
- (empty($parsed['port']) ? '' : ':' . $parsed['port']).
- (empty($parsed['path']) ? '' : $parsed['path']).
- (empty($parsed['query']) ? '' : '?' . $parsed['query']).
- (empty($parsed['fragment']) ? '' : '#' . $parsed['fragment']);
- } else { // parse_url seems to have failed, try without it
- $arr = explode('.', $input);
- foreach ($arr as $k => $v) {
- $conv = $punyCode->decode($v);
- if ($conv) {
- $arr[$k] = $conv;
- }
- }
- $return = join('.', $arr);
- }
- } else { // Otherwise we consider it being a pure domain name string
- $return = $punyCode->decode($input);
- if (!$return) {
- $return = $input;
- }
- }
- // The output is UTF-8 by default, other output formats need conversion here
- // If one time encoding is given, use this, else the objects property
- $outputEncoding = ($one_time_encoding) ? $one_time_encoding : $this->encoding;
- switch ($outputEncoding) {
- case 'utf8':
- return $return; // break;
- case 'ucs4_string':
- return $this->UnicodeTranscoder->convert($return, 'utf8', 'ucs4'); // break;
- case 'ucs4_array':
- return $this->UnicodeTranscoder->convert($return, 'utf8', 'ucs4array'); // break;
- default:
- throw new \InvalidArgumentException(sprintf('Unsupported output encoding %s', $outputEncoding));
- }
- }
- /**
- * Encode a given UTF-8 domain name
- * @param string $decoded Domain name (UTF-8 or UCS-4)
- * [@param boolean $one_time_encoding Desired input encoding, see {@link set_parameter}]
- * @return string Encoded Domain name (ACE string)
- */
- public function encode($decoded, $one_time_encoding = false)
- {
- // Forcing conversion of input to UCS4 array
- // If one time encoding is given, use this, else the objects property
- $inputEncoding = $one_time_encoding ? $one_time_encoding : $this->encoding;
- switch ($inputEncoding) {
- case 'utf8':
- $decoded = $this->UnicodeTranscoder->convert($decoded, 'utf8', 'ucs4array');
- break;
- case 'ucs4_string':
- $decoded = $this->UnicodeTranscoder->convert($decoded, 'ucs4', 'ucs4array');
- break;
- case 'ucs4_array':
- break;
- default:
- throw new \InvalidArgumentException(sprintf('Unsupported input encoding %s', $inputEncoding));
- }
- // No input, no output, what else did you expect?
- if (empty($decoded)) {
- return '';
- }
- $punyCode = $this->punycodeFactory();
- // Anchors for iteration
- $last_begin = 0;
- // Output string
- $output = '';
- foreach ($decoded as $k => $v) {
- // Make sure to use just the plain dot
- switch ($v) {
- case 0x3002:
- case 0xFF0E:
- case 0xFF61:
- $decoded[$k] = 0x2E;
- // Right, no break here, the above are converted to dots anyway
- // Stumbling across an anchoring character
- case 0x2E:
- case 0x2F:
- case 0x3A:
- case 0x3F:
- case 0x40:
- // Neither email addresses nor URLs allowed in strict mode
- if ($this->strictMode) {
- throw new \InvalidArgumentException('Neither email addresses nor URLs are allowed in strict mode.');
- } else {
- // Skip first char
- if ($k) {
- $encoded = $punyCode->encode(array_slice($decoded, $last_begin, (($k) - $last_begin)));
- if ($encoded) {
- $output .= $encoded;
- } else {
- $output .= $this->UnicodeTranscoder->convert(array_slice($decoded, $last_begin, (($k) - $last_begin)), 'ucs4array', 'utf8');
- }
- $output .= chr($decoded[$k]);
- }
- $last_begin = $k + 1;
- }
- }
- }
- // Catch the rest of the string
- if ($last_begin) {
- $inp_len = sizeof($decoded);
- $encoded = $punyCode->encode(array_slice($decoded, $last_begin, (($inp_len) - $last_begin)));
- if ($encoded) {
- $output .= $encoded;
- } else {
- $output .= $this->UnicodeTranscoder->convert(array_slice($decoded, $last_begin, (($inp_len) - $last_begin)), 'ucs4array', 'utf8');
- }
- return $output;
- } else {
- if (false !== ($output = $punyCode->encode($decoded))) {
- return $output;
- } else {
- return $this->UnicodeTranscoder->convert($decoded, 'ucs4array', 'utf8');
- }
- }
- }
- /**
- * Mitigates a weakness of encode(), which cannot properly handle URIs but instead encodes their
- * path or query components, too.
- * @param string $uri Expects the URI as a UTF-8 (or ASCII) string
- * @return string The URI encoded to Punycode, everything but the host component is left alone
- * @since 0.6.4
- */
- public function encodeUri($uri)
- {
- $parsed = parse_url($uri);
- if (!isset($parsed['host'])) {
- throw new \InvalidArgumentException('The given string does not look like a URI');
- }
- $arr = explode('.', $parsed['host']);
- foreach ($arr as $k => $v) {
- $conv = $this->encode($v, 'utf8');
- if ($conv) {
- $arr[$k] = $conv;
- }
- }
- $parsed['host'] = join('.', $arr);
- $return = (empty($parsed['scheme']) ? '' : $parsed['scheme'] . (strtolower($parsed['scheme']) == 'mailto' ? ':' : '://')).
- (empty($parsed['user']) ? '' : $parsed['user'] . (empty($parsed['pass']) ? '' : ':' . $parsed['pass']) . '@').
- $parsed['host'].
- (empty($parsed['port']) ? '' : ':' . $parsed['port']).
- (empty($parsed['path']) ? '' : $parsed['path']).
- (empty($parsed['query']) ? '' : '?' . $parsed['query']).
- (empty($parsed['fragment']) ? '' : '#' . $parsed['fragment']);
- return $return;
- }
- /**
- * The actual punycode class is rather costly, as well as passing the huge nameprep database around.
- * This factory method allows to ease the burden when dealing with multiple IDN versions.
- *
- * @return \Mso\IdnaConvert\Punycode
- */
- protected function punycodeFactory()
- {
- static $instances = array();
- if (!isset($instances[$this->idnVersion])) {
- $instances[$this->idnVersion] = new Punycode($this->NamePrepData, $this->UnicodeTranscoder);
- }
- return $instances[$this->idnVersion];
- }
- }