PageRenderTime 58ms CodeModel.GetById 21ms RepoModel.GetById 0ms app.codeStats 1ms

/yii/framework/vendors/idna_convert/idna_convert.class.php

https://bitbucket.org/syed_webt/yii_syed
PHP | 1605 lines | 1266 code | 42 blank | 297 comment | 173 complexity | cb01aa804140e362ff6e72e466369917 MD5 | raw file
Possible License(s): GPL-2.0, LGPL-2.1, BSD-3-Clause, BSD-2-Clause

Large files files are truncated, but you can click here to view the full file

  1. <?php
  2. // {{{ license
  3. /* vim: set expandtab tabstop=4 shiftwidth=4 softtabstop=4 foldmethod=marker: */
  4. //
  5. // +----------------------------------------------------------------------+
  6. // | This library is free software; you can redistribute it and/or modify |
  7. // | it under the terms of the GNU Lesser General Public License as |
  8. // | published by the Free Software Foundation; either version 2.1 of the |
  9. // | License, or (at your option) any later version. |
  10. // | |
  11. // | This library is distributed in the hope that it will be useful, but |
  12. // | WITHOUT ANY WARRANTY; without even the implied warranty of |
  13. // | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
  14. // | Lesser General Public License for more details. |
  15. // | |
  16. // | You should have received a copy of the GNU Lesser General Public |
  17. // | License along with this library; if not, write to the Free Software |
  18. // | Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 |
  19. // | USA. |
  20. // +----------------------------------------------------------------------+
  21. //
  22. // }}}
  23. /**
  24. * Encode/decode Internationalized Domain Names.
  25. *
  26. * The class allows to convert internationalized domain names
  27. * (see RFC 3490 for details) as they can be used with various registries worldwide
  28. * to be translated between their original (localized) form and their encoded form
  29. * as it will be used in the DNS (Domain Name System).
  30. *
  31. * The class provides two public methods, encode() and decode(), which do exactly
  32. * what you would expect them to do. You are allowed to use complete domain names,
  33. * simple strings and complete email addresses as well. That means, that you might
  34. * use any of the following notations:
  35. *
  36. * - www.nรถrgler.com
  37. * - xn--nrgler-wxa
  38. * - xn--brse-5qa.xn--knrz-1ra.info
  39. *
  40. * Unicode input might be given as either UTF-8 string, UCS-4 string or UCS-4 array.
  41. * Unicode output is available in the same formats.
  42. * You can select your preferred format via {@link set_paramter()}.
  43. *
  44. * ACE input and output is always expected to be ASCII.
  45. *
  46. * @author Matthias Sommerfeld <mso@phlylabs.de>
  47. * @copyright 2004-2011 phlyLabs Berlin, http://phlylabs.de
  48. * @version 0.8.0 2011-03-11
  49. */
  50. class idna_convert
  51. {
  52. // NP See below
  53. // Internal settings, do not mess with them
  54. protected $_punycode_prefix = 'xn--';
  55. protected $_invalid_ucs = 0x80000000;
  56. protected $_max_ucs = 0x10FFFF;
  57. protected $_base = 36;
  58. protected $_tmin = 1;
  59. protected $_tmax = 26;
  60. protected $_skew = 38;
  61. protected $_damp = 700;
  62. protected $_initial_bias = 72;
  63. protected $_initial_n = 0x80;
  64. protected $_sbase = 0xAC00;
  65. protected $_lbase = 0x1100;
  66. protected $_vbase = 0x1161;
  67. protected $_tbase = 0x11A7;
  68. protected $_lcount = 19;
  69. protected $_vcount = 21;
  70. protected $_tcount = 28;
  71. protected $_ncount = 588; // _vcount * _tcount
  72. protected $_scount = 11172; // _lcount * _tcount * _vcount
  73. protected $_error = false;
  74. protected static $_mb_string_overload = null;
  75. // See {@link set_paramter()} for details of how to change the following
  76. // settings from within your script / application
  77. protected $_api_encoding = 'utf8'; // Default input charset is UTF-8
  78. protected $_allow_overlong = false; // Overlong UTF-8 encodings are forbidden
  79. protected $_strict_mode = false; // Behave strict or not
  80. protected $_idn_version = 2003; // Can be either 2003 (old, default) or 2008
  81. /**
  82. * the constructor
  83. *
  84. * @param array $options
  85. * @return boolean
  86. * @since 0.5.2
  87. */
  88. public function __construct($options = false)
  89. {
  90. $this->slast = $this->_sbase + $this->_lcount * $this->_vcount * $this->_tcount;
  91. // If parameters are given, pass these to the respective method
  92. if (is_array($options)) {
  93. $this->set_parameter($options);
  94. }
  95. // populate mbstring overloading cache if not set
  96. if (self::$_mb_string_overload === null) {
  97. self::$_mb_string_overload = (extension_loaded('mbstring')
  98. && (ini_get('mbstring.func_overload') & 0x02) === 0x02);
  99. }
  100. }
  101. /**
  102. * Sets a new option value. Available options and values:
  103. * [encoding - Use either UTF-8, UCS4 as array or UCS4 as string as input ('utf8' for UTF-8,
  104. * 'ucs4_string' and 'ucs4_array' respectively for UCS4); The output is always UTF-8]
  105. * [overlong - Unicode does not allow unnecessarily long encodings of chars,
  106. * to allow this, set this parameter to true, else to false;
  107. * default is false.]
  108. * [strict - true: strict mode, good for registration purposes - Causes errors
  109. * on failures; false: loose mode, ideal for "wildlife" applications
  110. * by silently ignoring errors and returning the original input instead
  111. *
  112. * @param mixed Parameter to set (string: single parameter; array of Parameter => Value pairs)
  113. * @param string Value to use (if parameter 1 is a string)
  114. * @return boolean true on success, false otherwise
  115. */
  116. public function set_parameter($option, $value = false)
  117. {
  118. if (!is_array($option)) {
  119. $option = array($option => $value);
  120. }
  121. foreach ($option as $k => $v) {
  122. switch ($k) {
  123. case 'encoding':
  124. switch ($v) {
  125. case 'utf8':
  126. case 'ucs4_string':
  127. case 'ucs4_array':
  128. $this->_api_encoding = $v;
  129. break;
  130. default:
  131. $this->_error('Set Parameter: Unknown parameter '.$v.' for option '.$k);
  132. return false;
  133. }
  134. break;
  135. case 'overlong':
  136. $this->_allow_overlong = ($v) ? true : false;
  137. break;
  138. case 'strict':
  139. $this->_strict_mode = ($v) ? true : false;
  140. break;
  141. case 'idn_version':
  142. if (in_array($v, array('2003', '2008'))) {
  143. $this->_idn_version = $v;
  144. } else {
  145. $this->_error('Set Parameter: Unknown parameter '.$v.' for option '.$k);
  146. }
  147. break;
  148. case 'encode_german_sz': // Deprecated
  149. if (!$v) {
  150. self::$NP['replacemaps'][0xDF] = array(0x73, 0x73);
  151. } else {
  152. unset(self::$NP['replacemaps'][0xDF]);
  153. }
  154. break;
  155. default:
  156. $this->_error('Set Parameter: Unknown option '.$k);
  157. return false;
  158. }
  159. }
  160. return true;
  161. }
  162. /**
  163. * Decode a given ACE domain name
  164. * @param string Domain name (ACE string)
  165. * [@param string Desired output encoding, see {@link set_parameter}]
  166. * @return string Decoded Domain name (UTF-8 or UCS-4)
  167. */
  168. public function decode($input, $one_time_encoding = false)
  169. {
  170. // Optionally set
  171. if ($one_time_encoding) {
  172. switch ($one_time_encoding) {
  173. case 'utf8':
  174. case 'ucs4_string':
  175. case 'ucs4_array':
  176. break;
  177. default:
  178. $this->_error('Unknown encoding '.$one_time_encoding);
  179. return false;
  180. }
  181. }
  182. // Make sure to drop any newline characters around
  183. $input = trim($input);
  184. // Negotiate input and try to determine, whether it is a plain string,
  185. // an email address or something like a complete URL
  186. if (strpos($input, '@')) { // Maybe it is an email address
  187. // No no in strict mode
  188. if ($this->_strict_mode) {
  189. $this->_error('Only simple domain name parts can be handled in strict mode');
  190. return false;
  191. }
  192. list ($email_pref, $input) = explode('@', $input, 2);
  193. $arr = explode('.', $input);
  194. foreach ($arr as $k => $v) {
  195. if (preg_match('!^'.preg_quote($this->_punycode_prefix, '!').'!', $v)) {
  196. $conv = $this->_decode($v);
  197. if ($conv) $arr[$k] = $conv;
  198. }
  199. }
  200. $input = join('.', $arr);
  201. $arr = explode('.', $email_pref);
  202. foreach ($arr as $k => $v) {
  203. if (preg_match('!^'.preg_quote($this->_punycode_prefix, '!').'!', $v)) {
  204. $conv = $this->_decode($v);
  205. if ($conv) $arr[$k] = $conv;
  206. }
  207. }
  208. $email_pref = join('.', $arr);
  209. $return = $email_pref . '@' . $input;
  210. } elseif (preg_match('![:\./]!', $input)) { // Or a complete domain name (with or without paths / parameters)
  211. // No no in strict mode
  212. if ($this->_strict_mode) {
  213. $this->_error('Only simple domain name parts can be handled in strict mode');
  214. return false;
  215. }
  216. $parsed = parse_url($input);
  217. if (isset($parsed['host'])) {
  218. $arr = explode('.', $parsed['host']);
  219. foreach ($arr as $k => $v) {
  220. $conv = $this->_decode($v);
  221. if ($conv) $arr[$k] = $conv;
  222. }
  223. $parsed['host'] = join('.', $arr);
  224. $return =
  225. (empty($parsed['scheme']) ? '' : $parsed['scheme'].(strtolower($parsed['scheme']) == 'mailto' ? ':' : '://'))
  226. .(empty($parsed['user']) ? '' : $parsed['user'].(empty($parsed['pass']) ? '' : ':'.$parsed['pass']).'@')
  227. .$parsed['host']
  228. .(empty($parsed['port']) ? '' : ':'.$parsed['port'])
  229. .(empty($parsed['path']) ? '' : $parsed['path'])
  230. .(empty($parsed['query']) ? '' : '?'.$parsed['query'])
  231. .(empty($parsed['fragment']) ? '' : '#'.$parsed['fragment']);
  232. } else { // parse_url seems to have failed, try without it
  233. $arr = explode('.', $input);
  234. foreach ($arr as $k => $v) {
  235. $conv = $this->_decode($v);
  236. $arr[$k] = ($conv) ? $conv : $v;
  237. }
  238. $return = join('.', $arr);
  239. }
  240. } else { // Otherwise we consider it being a pure domain name string
  241. $return = $this->_decode($input);
  242. if (!$return) $return = $input;
  243. }
  244. // The output is UTF-8 by default, other output formats need conversion here
  245. // If one time encoding is given, use this, else the objects property
  246. switch (($one_time_encoding) ? $one_time_encoding : $this->_api_encoding) {
  247. case 'utf8':
  248. return $return;
  249. break;
  250. case 'ucs4_string':
  251. return $this->_ucs4_to_ucs4_string($this->_utf8_to_ucs4($return));
  252. break;
  253. case 'ucs4_array':
  254. return $this->_utf8_to_ucs4($return);
  255. break;
  256. default:
  257. $this->_error('Unsupported output format');
  258. return false;
  259. }
  260. }
  261. /**
  262. * Encode a given UTF-8 domain name
  263. * @param string Domain name (UTF-8 or UCS-4)
  264. * [@param string Desired input encoding, see {@link set_parameter}]
  265. * @return string Encoded Domain name (ACE string)
  266. */
  267. public function encode($decoded, $one_time_encoding = false)
  268. {
  269. // Forcing conversion of input to UCS4 array
  270. // If one time encoding is given, use this, else the objects property
  271. switch ($one_time_encoding ? $one_time_encoding : $this->_api_encoding) {
  272. case 'utf8':
  273. $decoded = $this->_utf8_to_ucs4($decoded);
  274. break;
  275. case 'ucs4_string':
  276. $decoded = $this->_ucs4_string_to_ucs4($decoded);
  277. case 'ucs4_array':
  278. break;
  279. default:
  280. $this->_error('Unsupported input format: '.($one_time_encoding ? $one_time_encoding : $this->_api_encoding));
  281. return false;
  282. }
  283. // No input, no output, what else did you expect?
  284. if (empty($decoded)) return '';
  285. // Anchors for iteration
  286. $last_begin = 0;
  287. // Output string
  288. $output = '';
  289. foreach ($decoded as $k => $v) {
  290. // Make sure to use just the plain dot
  291. switch($v) {
  292. case 0x3002:
  293. case 0xFF0E:
  294. case 0xFF61:
  295. $decoded[$k] = 0x2E;
  296. // Right, no break here, the above are converted to dots anyway
  297. // Stumbling across an anchoring character
  298. case 0x2E:
  299. case 0x2F:
  300. case 0x3A:
  301. case 0x3F:
  302. case 0x40:
  303. // Neither email addresses nor URLs allowed in strict mode
  304. if ($this->_strict_mode) {
  305. $this->_error('Neither email addresses nor URLs are allowed in strict mode.');
  306. return false;
  307. } else {
  308. // Skip first char
  309. if ($k) {
  310. $encoded = '';
  311. $encoded = $this->_encode(array_slice($decoded, $last_begin, (($k)-$last_begin)));
  312. if ($encoded) {
  313. $output .= $encoded;
  314. } else {
  315. $output .= $this->_ucs4_to_utf8(array_slice($decoded, $last_begin, (($k)-$last_begin)));
  316. }
  317. $output .= chr($decoded[$k]);
  318. }
  319. $last_begin = $k + 1;
  320. }
  321. }
  322. }
  323. // Catch the rest of the string
  324. if ($last_begin) {
  325. $inp_len = sizeof($decoded);
  326. $encoded = '';
  327. $encoded = $this->_encode(array_slice($decoded, $last_begin, (($inp_len)-$last_begin)));
  328. if ($encoded) {
  329. $output .= $encoded;
  330. } else {
  331. $output .= $this->_ucs4_to_utf8(array_slice($decoded, $last_begin, (($inp_len)-$last_begin)));
  332. }
  333. return $output;
  334. } else {
  335. if ($output = $this->_encode($decoded)) {
  336. return $output;
  337. } else {
  338. return $this->_ucs4_to_utf8($decoded);
  339. }
  340. }
  341. }
  342. /**
  343. * Removes a weakness of encode(), which cannot properly handle URIs but instead encodes their
  344. * path or query components, too.
  345. * @param string $uri Expects the URI as a UTF-8 (or ASCII) string
  346. * @return string The URI encoded to Punycode, everything but the host component is left alone
  347. * @since 0.6.4
  348. */
  349. public function encode_uri($uri)
  350. {
  351. $parsed = parse_url($uri);
  352. if (!isset($parsed['host'])) {
  353. $this->_error('The given string does not look like a URI');
  354. return false;
  355. }
  356. $arr = explode('.', $parsed['host']);
  357. foreach ($arr as $k => $v) {
  358. $conv = $this->encode($v, 'utf8');
  359. if ($conv) $arr[$k] = $conv;
  360. }
  361. $parsed['host'] = join('.', $arr);
  362. $return =
  363. (empty($parsed['scheme']) ? '' : $parsed['scheme'].(strtolower($parsed['scheme']) == 'mailto' ? ':' : '://'))
  364. .(empty($parsed['user']) ? '' : $parsed['user'].(empty($parsed['pass']) ? '' : ':'.$parsed['pass']).'@')
  365. .$parsed['host']
  366. .(empty($parsed['port']) ? '' : ':'.$parsed['port'])
  367. .(empty($parsed['path']) ? '' : $parsed['path'])
  368. .(empty($parsed['query']) ? '' : '?'.$parsed['query'])
  369. .(empty($parsed['fragment']) ? '' : '#'.$parsed['fragment']);
  370. return $return;
  371. }
  372. /**
  373. * Use this method to get the last error ocurred
  374. * @param void
  375. * @return string The last error, that occured
  376. */
  377. public function get_last_error()
  378. {
  379. return $this->_error;
  380. }
  381. /**
  382. * The actual decoding algorithm
  383. * @param string
  384. * @return mixed
  385. */
  386. protected function _decode($encoded)
  387. {
  388. $decoded = array();
  389. // find the Punycode prefix
  390. if (!preg_match('!^'.preg_quote($this->_punycode_prefix, '!').'!', $encoded)) {
  391. $this->_error('This is not a punycode string');
  392. return false;
  393. }
  394. $encode_test = preg_replace('!^'.preg_quote($this->_punycode_prefix, '!').'!', '', $encoded);
  395. // If nothing left after removing the prefix, it is hopeless
  396. if (!$encode_test) {
  397. $this->_error('The given encoded string was empty');
  398. return false;
  399. }
  400. // Find last occurence of the delimiter
  401. $delim_pos = strrpos($encoded, '-');
  402. if ($delim_pos > self::byteLength($this->_punycode_prefix)) {
  403. for ($k = self::byteLength($this->_punycode_prefix); $k < $delim_pos; ++$k) {
  404. $decoded[] = ord($encoded{$k});
  405. }
  406. }
  407. $deco_len = count($decoded);
  408. $enco_len = self::byteLength($encoded);
  409. // Wandering through the strings; init
  410. $is_first = true;
  411. $bias = $this->_initial_bias;
  412. $idx = 0;
  413. $char = $this->_initial_n;
  414. for ($enco_idx = ($delim_pos) ? ($delim_pos + 1) : 0; $enco_idx < $enco_len; ++$deco_len) {
  415. for ($old_idx = $idx, $w = 1, $k = $this->_base; 1 ; $k += $this->_base) {
  416. $digit = $this->_decode_digit($encoded{$enco_idx++});
  417. $idx += $digit * $w;
  418. $t = ($k <= $bias) ? $this->_tmin :
  419. (($k >= $bias + $this->_tmax) ? $this->_tmax : ($k - $bias));
  420. if ($digit < $t) break;
  421. $w = (int) ($w * ($this->_base - $t));
  422. }
  423. $bias = $this->_adapt($idx - $old_idx, $deco_len + 1, $is_first);
  424. $is_first = false;
  425. $char += (int) ($idx / ($deco_len + 1));
  426. $idx %= ($deco_len + 1);
  427. if ($deco_len > 0) {
  428. // Make room for the decoded char
  429. for ($i = $deco_len; $i > $idx; $i--) $decoded[$i] = $decoded[($i - 1)];
  430. }
  431. $decoded[$idx++] = $char;
  432. }
  433. return $this->_ucs4_to_utf8($decoded);
  434. }
  435. /**
  436. * The actual encoding algorithm
  437. * @param string
  438. * @return mixed
  439. */
  440. protected function _encode($decoded)
  441. {
  442. // We cannot encode a domain name containing the Punycode prefix
  443. $extract = self::byteLength($this->_punycode_prefix);
  444. $check_pref = $this->_utf8_to_ucs4($this->_punycode_prefix);
  445. $check_deco = array_slice($decoded, 0, $extract);
  446. if ($check_pref == $check_deco) {
  447. $this->_error('This is already a punycode string');
  448. return false;
  449. }
  450. // We will not try to encode strings consisting of basic code points only
  451. $encodable = false;
  452. foreach ($decoded as $k => $v) {
  453. if ($v > 0x7a) {
  454. $encodable = true;
  455. break;
  456. }
  457. }
  458. if (!$encodable) {
  459. $this->_error('The given string does not contain encodable chars');
  460. return false;
  461. }
  462. // Do NAMEPREP
  463. $decoded = $this->_nameprep($decoded);
  464. if (!$decoded || !is_array($decoded)) return false; // NAMEPREP failed
  465. $deco_len = count($decoded);
  466. if (!$deco_len) return false; // Empty array
  467. $codecount = 0; // How many chars have been consumed
  468. $encoded = '';
  469. // Copy all basic code points to output
  470. for ($i = 0; $i < $deco_len; ++$i) {
  471. $test = $decoded[$i];
  472. // Will match [-0-9a-zA-Z]
  473. if ((0x2F < $test && $test < 0x40) || (0x40 < $test && $test < 0x5B)
  474. || (0x60 < $test && $test <= 0x7B) || (0x2D == $test)) {
  475. $encoded .= chr($decoded[$i]);
  476. $codecount++;
  477. }
  478. }
  479. if ($codecount == $deco_len) return $encoded; // All codepoints were basic ones
  480. // Start with the prefix; copy it to output
  481. $encoded = $this->_punycode_prefix.$encoded;
  482. // If we have basic code points in output, add an hyphen to the end
  483. if ($codecount) $encoded .= '-';
  484. // Now find and encode all non-basic code points
  485. $is_first = true;
  486. $cur_code = $this->_initial_n;
  487. $bias = $this->_initial_bias;
  488. $delta = 0;
  489. while ($codecount < $deco_len) {
  490. // Find the smallest code point >= the current code point and
  491. // remember the last ouccrence of it in the input
  492. for ($i = 0, $next_code = $this->_max_ucs; $i < $deco_len; $i++) {
  493. if ($decoded[$i] >= $cur_code && $decoded[$i] <= $next_code) {
  494. $next_code = $decoded[$i];
  495. }
  496. }
  497. $delta += ($next_code - $cur_code) * ($codecount + 1);
  498. $cur_code = $next_code;
  499. // Scan input again and encode all characters whose code point is $cur_code
  500. for ($i = 0; $i < $deco_len; $i++) {
  501. if ($decoded[$i] < $cur_code) {
  502. $delta++;
  503. } elseif ($decoded[$i] == $cur_code) {
  504. for ($q = $delta, $k = $this->_base; 1; $k += $this->_base) {
  505. $t = ($k <= $bias) ? $this->_tmin :
  506. (($k >= $bias + $this->_tmax) ? $this->_tmax : $k - $bias);
  507. if ($q < $t) break;
  508. $encoded .= $this->_encode_digit(intval($t + (($q - $t) % ($this->_base - $t)))); //v0.4.5 Changed from ceil() to intval()
  509. $q = (int) (($q - $t) / ($this->_base - $t));
  510. }
  511. $encoded .= $this->_encode_digit($q);
  512. $bias = $this->_adapt($delta, $codecount+1, $is_first);
  513. $codecount++;
  514. $delta = 0;
  515. $is_first = false;
  516. }
  517. }
  518. $delta++;
  519. $cur_code++;
  520. }
  521. return $encoded;
  522. }
  523. /**
  524. * Adapt the bias according to the current code point and position
  525. * @param int $delta
  526. * @param int $npoints
  527. * @param int $is_first
  528. * @return int
  529. */
  530. protected function _adapt($delta, $npoints, $is_first)
  531. {
  532. $delta = intval($is_first ? ($delta / $this->_damp) : ($delta / 2));
  533. $delta += intval($delta / $npoints);
  534. for ($k = 0; $delta > (($this->_base - $this->_tmin) * $this->_tmax) / 2; $k += $this->_base) {
  535. $delta = intval($delta / ($this->_base - $this->_tmin));
  536. }
  537. return intval($k + ($this->_base - $this->_tmin + 1) * $delta / ($delta + $this->_skew));
  538. }
  539. /**
  540. * Encoding a certain digit
  541. * @param int $d
  542. * @return string
  543. */
  544. protected function _encode_digit($d)
  545. {
  546. return chr($d + 22 + 75 * ($d < 26));
  547. }
  548. /**
  549. * Decode a certain digit
  550. * @param int $cp
  551. * @return int
  552. */
  553. protected function _decode_digit($cp)
  554. {
  555. $cp = ord($cp);
  556. return ($cp - 48 < 10) ? $cp - 22 : (($cp - 65 < 26) ? $cp - 65 : (($cp - 97 < 26) ? $cp - 97 : $this->_base));
  557. }
  558. /**
  559. * Internal error handling method
  560. * @param string $error
  561. */
  562. protected function _error($error = '')
  563. {
  564. $this->_error = $error;
  565. }
  566. /**
  567. * Do Nameprep according to RFC3491 and RFC3454
  568. * @param array Unicode Characters
  569. * @return string Unicode Characters, Nameprep'd
  570. */
  571. protected function _nameprep($input)
  572. {
  573. $output = array();
  574. $error = false;
  575. //
  576. // Mapping
  577. // Walking through the input array, performing the required steps on each of
  578. // the input chars and putting the result into the output array
  579. // While mapping required chars we apply the cannonical ordering
  580. foreach ($input as $v) {
  581. // Map to nothing == skip that code point
  582. if (in_array($v, self::$NP['map_nothing'])) continue;
  583. // Try to find prohibited input
  584. if (in_array($v, self::$NP['prohibit']) || in_array($v, self::$NP['general_prohibited'])) {
  585. $this->_error('NAMEPREP: Prohibited input U+'.sprintf('%08X', $v));
  586. return false;
  587. }
  588. foreach (self::$NP['prohibit_ranges'] as $range) {
  589. if ($range[0] <= $v && $v <= $range[1]) {
  590. $this->_error('NAMEPREP: Prohibited input U+'.sprintf('%08X', $v));
  591. return false;
  592. }
  593. }
  594. if (0xAC00 <= $v && $v <= 0xD7AF) {
  595. // Hangul syllable decomposition
  596. foreach ($this->_hangul_decompose($v) as $out) {
  597. $output[] = (int) $out;
  598. }
  599. } elseif (($this->_idn_version == '2003') && isset(self::$NP['replacemaps'][$v])) {
  600. // There's a decomposition mapping for that code point
  601. // Decompositions only in version 2003 (original) of IDNA
  602. foreach ($this->_apply_cannonical_ordering(self::$NP['replacemaps'][$v]) as $out) {
  603. $output[] = (int) $out;
  604. }
  605. } else {
  606. $output[] = (int) $v;
  607. }
  608. }
  609. // Before applying any Combining, try to rearrange any Hangul syllables
  610. $output = $this->_hangul_compose($output);
  611. //
  612. // Combine code points
  613. //
  614. $last_class = 0;
  615. $last_starter = 0;
  616. $out_len = count($output);
  617. for ($i = 0; $i < $out_len; ++$i) {
  618. $class = $this->_get_combining_class($output[$i]);
  619. if ((!$last_class || $last_class > $class) && $class) {
  620. // Try to match
  621. $seq_len = $i - $last_starter;
  622. $out = $this->_combine(array_slice($output, $last_starter, $seq_len));
  623. // On match: Replace the last starter with the composed character and remove
  624. // the now redundant non-starter(s)
  625. if ($out) {
  626. $output[$last_starter] = $out;
  627. if (count($out) != $seq_len) {
  628. for ($j = $i+1; $j < $out_len; ++$j) $output[$j-1] = $output[$j];
  629. unset($output[$out_len]);
  630. }
  631. // Rewind the for loop by one, since there can be more possible compositions
  632. $i--;
  633. $out_len--;
  634. $last_class = ($i == $last_starter) ? 0 : $this->_get_combining_class($output[$i-1]);
  635. continue;
  636. }
  637. }
  638. // The current class is 0
  639. if (!$class) $last_starter = $i;
  640. $last_class = $class;
  641. }
  642. return $output;
  643. }
  644. /**
  645. * Decomposes a Hangul syllable
  646. * (see http://www.unicode.org/unicode/reports/tr15/#Hangul
  647. * @param integer 32bit UCS4 code point
  648. * @return array Either Hangul Syllable decomposed or original 32bit value as one value array
  649. */
  650. protected function _hangul_decompose($char)
  651. {
  652. $sindex = (int) $char - $this->_sbase;
  653. if ($sindex < 0 || $sindex >= $this->_scount) return array($char);
  654. $result = array();
  655. $result[] = (int) $this->_lbase + $sindex / $this->_ncount;
  656. $result[] = (int) $this->_vbase + ($sindex % $this->_ncount) / $this->_tcount;
  657. $T = intval($this->_tbase + $sindex % $this->_tcount);
  658. if ($T != $this->_tbase) $result[] = $T;
  659. return $result;
  660. }
  661. /**
  662. * Ccomposes a Hangul syllable
  663. * (see http://www.unicode.org/unicode/reports/tr15/#Hangul
  664. * @param array Decomposed UCS4 sequence
  665. * @return array UCS4 sequence with syllables composed
  666. */
  667. protected function _hangul_compose($input)
  668. {
  669. $inp_len = count($input);
  670. if (!$inp_len) return array();
  671. $result = array();
  672. $last = (int) $input[0];
  673. $result[] = $last; // copy first char from input to output
  674. for ($i = 1; $i < $inp_len; ++$i) {
  675. $char = (int) $input[$i];
  676. $sindex = $last - $this->_sbase;
  677. $lindex = $last - $this->_lbase;
  678. $vindex = $char - $this->_vbase;
  679. $tindex = $char - $this->_tbase;
  680. // Find out, whether two current characters are LV and T
  681. if (0 <= $sindex && $sindex < $this->_scount && ($sindex % $this->_tcount == 0)
  682. && 0 <= $tindex && $tindex <= $this->_tcount) {
  683. // create syllable of form LVT
  684. $last += $tindex;
  685. $result[(count($result) - 1)] = $last; // reset last
  686. continue; // discard char
  687. }
  688. // Find out, whether two current characters form L and V
  689. if (0 <= $lindex && $lindex < $this->_lcount && 0 <= $vindex && $vindex < $this->_vcount) {
  690. // create syllable of form LV
  691. $last = (int) $this->_sbase + ($lindex * $this->_vcount + $vindex) * $this->_tcount;
  692. $result[(count($result) - 1)] = $last; // reset last
  693. continue; // discard char
  694. }
  695. // if neither case was true, just add the character
  696. $last = $char;
  697. $result[] = $char;
  698. }
  699. return $result;
  700. }
  701. /**
  702. * Returns the combining class of a certain wide char
  703. * @param integer Wide char to check (32bit integer)
  704. * @return integer Combining class if found, else 0
  705. */
  706. protected function _get_combining_class($char)
  707. {
  708. return isset(self::$NP['norm_combcls'][$char]) ? self::$NP['norm_combcls'][$char] : 0;
  709. }
  710. /**
  711. * Applies the cannonical ordering of a decomposed UCS4 sequence
  712. * @param array Decomposed UCS4 sequence
  713. * @return array Ordered USC4 sequence
  714. */
  715. protected function _apply_cannonical_ordering($input)
  716. {
  717. $swap = true;
  718. $size = count($input);
  719. while ($swap) {
  720. $swap = false;
  721. $last = $this->_get_combining_class(intval($input[0]));
  722. for ($i = 0; $i < $size-1; ++$i) {
  723. $next = $this->_get_combining_class(intval($input[$i+1]));
  724. if ($next != 0 && $last > $next) {
  725. // Move item leftward until it fits
  726. for ($j = $i + 1; $j > 0; --$j) {
  727. if ($this->_get_combining_class(intval($input[$j-1])) <= $next) break;
  728. $t = intval($input[$j]);
  729. $input[$j] = intval($input[$j-1]);
  730. $input[$j-1] = $t;
  731. $swap = true;
  732. }
  733. // Reentering the loop looking at the old character again
  734. $next = $last;
  735. }
  736. $last = $next;
  737. }
  738. }
  739. return $input;
  740. }
  741. /**
  742. * Do composition of a sequence of starter and non-starter
  743. * @param array UCS4 Decomposed sequence
  744. * @return array Ordered USC4 sequence
  745. */
  746. protected function _combine($input)
  747. {
  748. $inp_len = count($input);
  749. foreach (self::$NP['replacemaps'] as $np_src => $np_target) {
  750. if ($np_target[0] != $input[0]) continue;
  751. if (count($np_target) != $inp_len) continue;
  752. $hit = false;
  753. foreach ($input as $k2 => $v2) {
  754. if ($v2 == $np_target[$k2]) {
  755. $hit = true;
  756. } else {
  757. $hit = false;
  758. break;
  759. }
  760. }
  761. if ($hit) return $np_src;
  762. }
  763. return false;
  764. }
  765. /**
  766. * This converts an UTF-8 encoded string to its UCS-4 representation
  767. * By talking about UCS-4 "strings" we mean arrays of 32bit integers representing
  768. * each of the "chars". This is due to PHP not being able to handle strings with
  769. * bit depth different from 8. This apllies to the reverse method _ucs4_to_utf8(), too.
  770. * The following UTF-8 encodings are supported:
  771. * bytes bits representation
  772. * 1 7 0xxxxxxx
  773. * 2 11 110xxxxx 10xxxxxx
  774. * 3 16 1110xxxx 10xxxxxx 10xxxxxx
  775. * 4 21 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
  776. * 5 26 111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
  777. * 6 31 1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
  778. * Each x represents a bit that can be used to store character data.
  779. * The five and six byte sequences are part of Annex D of ISO/IEC 10646-1:2000
  780. * @param string $input
  781. * @return string
  782. */
  783. protected function _utf8_to_ucs4($input)
  784. {
  785. $output = array();
  786. $out_len = 0;
  787. $inp_len = self::byteLength($input);
  788. $mode = 'next';
  789. $test = 'none';
  790. for ($k = 0; $k < $inp_len; ++$k) {
  791. $v = ord($input{$k}); // Extract byte from input string
  792. if ($v < 128) { // We found an ASCII char - put into stirng as is
  793. $output[$out_len] = $v;
  794. ++$out_len;
  795. if ('add' == $mode) {
  796. $this->_error('Conversion from UTF-8 to UCS-4 failed: malformed input at byte '.$k);
  797. return false;
  798. }
  799. continue;
  800. }
  801. if ('next' == $mode) { // Try to find the next start byte; determine the width of the Unicode char
  802. $start_byte = $v;
  803. $mode = 'add';
  804. $test = 'range';
  805. if ($v >> 5 == 6) { // &110xxxxx 10xxxxx
  806. $next_byte = 0; // Tells, how many times subsequent bitmasks must rotate 6bits to the left
  807. $v = ($v - 192) << 6;
  808. } elseif ($v >> 4 == 14) { // &1110xxxx 10xxxxxx 10xxxxxx
  809. $next_byte = 1;
  810. $v = ($v - 224) << 12;
  811. } elseif ($v >> 3 == 30) { // &11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
  812. $next_byte = 2;
  813. $v = ($v - 240) << 18;
  814. } elseif ($v >> 2 == 62) { // &111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
  815. $next_byte = 3;
  816. $v = ($v - 248) << 24;
  817. } elseif ($v >> 1 == 126) { // &1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
  818. $next_byte = 4;
  819. $v = ($v - 252) << 30;
  820. } else {
  821. $this->_error('This might be UTF-8, but I don\'t understand it at byte '.$k);
  822. return false;
  823. }
  824. if ('add' == $mode) {
  825. $output[$out_len] = (int) $v;
  826. ++$out_len;
  827. continue;
  828. }
  829. }
  830. if ('add' == $mode) {
  831. if (!$this->_allow_overlong && $test == 'range') {
  832. $test = 'none';
  833. if (($v < 0xA0 && $start_byte == 0xE0) || ($v < 0x90 && $start_byte == 0xF0) || ($v > 0x8F && $start_byte == 0xF4)) {
  834. $this->_error('Bogus UTF-8 character detected (out of legal range) at byte '.$k);
  835. return false;
  836. }
  837. }
  838. if ($v >> 6 == 2) { // Bit mask must be 10xxxxxx
  839. $v = ($v - 128) << ($next_byte * 6);
  840. $output[($out_len - 1)] += $v;
  841. --$next_byte;
  842. } else {
  843. $this->_error('Conversion from UTF-8 to UCS-4 failed: malformed input at byte '.$k);
  844. return false;
  845. }
  846. if ($next_byte < 0) {
  847. $mode = 'next';
  848. }
  849. }
  850. } // for
  851. return $output;
  852. }
  853. /**
  854. * Convert UCS-4 string into UTF-8 string
  855. * See _utf8_to_ucs4() for details
  856. * @param string $input
  857. * @return string
  858. */
  859. protected function _ucs4_to_utf8($input)
  860. {
  861. $output = '';
  862. foreach ($input as $k => $v) {
  863. if ($v < 128) { // 7bit are transferred literally
  864. $output .= chr($v);
  865. } elseif ($v < (1 << 11)) { // 2 bytes
  866. $output .= chr(192+($v >> 6)).chr(128+($v & 63));
  867. } elseif ($v < (1 << 16)) { // 3 bytes
  868. $output .= chr(224+($v >> 12)).chr(128+(($v >> 6) & 63)).chr(128+($v & 63));
  869. } elseif ($v < (1 << 21)) { // 4 bytes
  870. $output .= chr(240+($v >> 18)).chr(128+(($v >> 12) & 63)).chr(128+(($v >> 6) & 63)).chr(128+($v & 63));
  871. } elseif (self::$safe_mode) {
  872. $output .= self::$safe_char;
  873. } else {
  874. $this->_error('Conversion from UCS-4 to UTF-8 failed: malformed input at byte '.$k);
  875. return false;
  876. }
  877. }
  878. return $output;
  879. }
  880. /**
  881. * Convert UCS-4 array into UCS-4 string
  882. *
  883. * @param array $input
  884. * @return string
  885. */
  886. protected function _ucs4_to_ucs4_string($input)
  887. {
  888. $output = '';
  889. // Take array values and split output to 4 bytes per value
  890. // The bit mask is 255, which reads &11111111
  891. foreach ($input as $v) {
  892. $output .= chr(($v >> 24) & 255).chr(($v >> 16) & 255).chr(($v >> 8) & 255).chr($v & 255);
  893. }
  894. return $output;
  895. }
  896. /**
  897. * Convert UCS-4 strin into UCS-4 garray
  898. *
  899. * @param string $input
  900. * @return array
  901. */
  902. protected function _ucs4_string_to_ucs4($input)
  903. {
  904. $output = array();
  905. $inp_len = self::byteLength($input);
  906. // Input length must be dividable by 4
  907. if ($inp_len % 4) {
  908. $this->_error('Input UCS4 string is broken');
  909. return false;
  910. }
  911. // Empty input - return empty output
  912. if (!$inp_len) return $output;
  913. for ($i = 0, $out_len = -1; $i < $inp_len; ++$i) {
  914. // Increment output position every 4 input bytes
  915. if (!($i % 4)) {
  916. $out_len++;
  917. $output[$out_len] = 0;
  918. }
  919. $output[$out_len] += ord($input{$i}) << (8 * (3 - ($i % 4) ) );
  920. }
  921. return $output;
  922. }
  923. /**
  924. * Gets the length of a string in bytes even if mbstring function
  925. * overloading is turned on
  926. *
  927. * @param string $string the string for which to get the length.
  928. * @return integer the length of the string in bytes.
  929. */
  930. protected static function byteLength($string)
  931. {
  932. if (self::$_mb_string_overload) {
  933. return mb_strlen($string, '8bit');
  934. }
  935. return strlen((binary) $string);
  936. }
  937. /**
  938. * Attempts to return a concrete IDNA instance.
  939. *
  940. * @param array $params Set of paramaters
  941. * @return idna_convert
  942. * @access public
  943. */
  944. public function getInstance($params = array())
  945. {
  946. return new idna_convert($params);
  947. }
  948. /**
  949. * Attempts to return a concrete IDNA instance for either php4 or php5,
  950. * only creating a new instance if no IDNA instance with the same
  951. * parameters currently exists.
  952. *
  953. * @param array $params Set of paramaters
  954. *
  955. * @return object idna_convert
  956. * @access public
  957. */
  958. public function singleton($params = array())
  959. {
  960. static $instances;
  961. if (!isset($instances)) {
  962. $instances = array();
  963. }
  964. $signature = serialize($params);
  965. if (!isset($instances[$signature])) {
  966. $instances[$signature] = idna_convert::getInstance($params);
  967. }
  968. return $instances[$signature];
  969. }
  970. /**
  971. * Holds all relevant mapping tables
  972. * See RFC3454 for details
  973. *
  974. * @private array
  975. * @since 0.5.2
  976. */
  977. protected static $NP = array
  978. ('map_nothing' => array(0xAD, 0x34F, 0x1806, 0x180B, 0x180C, 0x180D, 0x200B, 0x200C
  979. ,0x200D, 0x2060, 0xFE00, 0xFE01, 0xFE02, 0xFE03, 0xFE04, 0xFE05, 0xFE06, 0xFE07
  980. ,0xFE08, 0xFE09, 0xFE0A, 0xFE0B, 0xFE0C, 0xFE0D, 0xFE0E, 0xFE0F, 0xFEFF
  981. )
  982. ,'general_prohibited' => array(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19
  983. ,20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32 ,33, 34, 35, 36, 37, 38, 39, 40, 41, 42
  984. ,43, 44, 47, 59, 60, 61, 62, 63, 64, 91, 92, 93, 94, 95, 96, 123, 124, 125, 126, 127, 0x3002
  985. )
  986. ,'prohibit' => array(0xA0, 0x340, 0x341, 0x6DD, 0x70F, 0x1680, 0x180E, 0x2000, 0x2001, 0x2002, 0x2003
  987. ,0x2004, 0x2005, 0x2006, 0x2007, 0x2008, 0x2009, 0x200A, 0x200B, 0x200C, 0x200D, 0x200E, 0x200F
  988. ,0x2028, 0x2029, 0x202A, 0x202B, 0x202C, 0x202D, 0x202E, 0x202F, 0x205F, 0x206A, 0x206B, 0x206C
  989. ,0x206D, 0x206E, 0x206F, 0x3000, 0xFEFF, 0xFFF9, 0xFFFA, 0xFFFB, 0xFFFC, 0xFFFD, 0xFFFE, 0xFFFF
  990. ,0x1FFFE, 0x1FFFF, 0x2FFFE, 0x2FFFF, 0x3FFFE, 0x3FFFF, 0x4FFFE, 0x4FFFF, 0x5FFFE, 0x5FFFF, 0x6FFFE
  991. ,0x6FFFF, 0x7FFFE, 0x7FFFF, 0x8FFFE, 0x8FFFF, 0x9FFFE, 0x9FFFF, 0xAFFFE, 0xAFFFF, 0xBFFFE, 0xBFFFF
  992. ,0xCFFFE, 0xCFFFF, 0xDFFFE, 0xDFFFF, 0xE0001, 0xEFFFE, 0xEFFFF, 0xFFFFE, 0xFFFFF, 0x10FFFE, 0x10FFFF
  993. )
  994. ,'prohibit_ranges' => array(array(0x80, 0x9F), array(0x2060, 0x206F), array(0x1D173, 0x1D17A)
  995. ,array(0xE000, 0xF8FF) ,array(0xF0000, 0xFFFFD), array(0x100000, 0x10FFFD)
  996. ,array(0xFDD0, 0xFDEF), array(0xD800, 0xDFFF), array(0x2FF0, 0x2FFB), array(0xE0020, 0xE007F)
  997. )
  998. ,'replacemaps' => array(0x41 => array(0x61), 0x42 => array(0x62), 0x43 => array(0x63)
  999. ,0x44 => array(0x64), 0x45 => array(0x65), 0x46 => array(0x66), 0x47 => array(0x67)
  1000. ,0x48 => array(0x68), 0x49 => array(0x69), 0x4A => array(0x6A), 0x4B => array(0x6B)
  1001. ,0x4C => array(0x6C), 0x4D => array(0x6D), 0x4E => array(0x6E), 0x4F => array(0x6F)
  1002. ,0x50 => array(0x70), 0x51 => array(0x71), 0x52 => array(0x72), 0x53 => array(0x73)
  1003. ,0x54 => array(0x74), 0x55 => array(0x75), 0x56 => array(0x76), 0x57 => array(0x77)
  1004. ,0x58 => array(0x78), 0x59 => array(0x79), 0x5A => array(0x7A), 0xB5 => array(0x3BC)
  1005. ,0xC0 => array(0xE0), 0xC1 => array(0xE1), 0xC2 => array(0xE2), 0xC3 => array(0xE3)
  1006. ,0xC4 => array(0xE4), 0xC5 => array(0xE5), 0xC6 => array(0xE6), 0xC7 => array(0xE7)
  1007. ,0xC8 => array(0xE8), 0xC9 => array(0xE9), 0xCA => array(0xEA), 0xCB => array(0xEB)
  1008. ,0xCC => array(0xEC), 0xCD => array(0xED), 0xCE => array(0xEE), 0xCF => array(0xEF)
  1009. ,0xD0 => array(0xF0), 0xD1 => array(0xF1), 0xD2 => array(0xF2), 0xD3 => array(0xF3)
  1010. ,0xD4 => array(0xF4), 0xD5 => array(0xF5), 0xD6 => array(0xF6), 0xD8 => array(0xF8)
  1011. ,0xD9 => array(0xF9), 0xDA => array(0xFA), 0xDB => array(0xFB), 0xDC => array(0xFC)
  1012. ,0xDD => array(0xFD), 0xDE => array(0xFE), 0xDF => array(0x73, 0x73)
  1013. ,0x100 => array(0x101), 0x102 => array(0x103), 0x104 => array(0x105)
  1014. ,0x106 => array(0x107), 0x108 => array(0x109), 0x10A => array(0x10B)
  1015. ,0x10C => array(0x10D), 0x10E => array(0x10F), 0x110 => array(0x111)
  1016. ,0x112 => array(0x113), 0x114 => array(0x115), 0x116 => array(0x117)
  1017. ,0x118 => array(0x119), 0x11A => array(0x11B), 0x11C => array(0x11D)
  1018. ,0x11E => array(0x11F), 0x120 => array(0x121), 0x122 => array(0x123)
  1019. ,0x124 => array(0x125), 0x126 => array(0x127), 0x128 => array(0x129)
  1020. ,0x12A => array(0x12B), 0x12C => array(0x12D), 0x12E => array(0x12F)
  1021. ,0x130 => array(0x69, 0x307), 0x132 => array(0x133), 0x134 => array(0x135)
  1022. ,0x136 => array(0x137), 0x139 => array(0x13A), 0x13B => array(0x13C)
  1023. ,0x13D => array(0x13E), 0x13F => array(0x140), 0x141 => array(0x142)
  1024. ,0x143 => array(0x144), 0x145 => array(0x146), 0x147 => array(0x148)
  1025. ,0x149 => array(0x2BC, 0x6E), 0x14A => array(0x14B), 0x14C => array(0x14D)
  1026. ,0x14E => array(0x14F), 0x150 => array(0x151), 0x152 => array(0x153)
  1027. ,0x154 => array(0x155), 0x156 => array(0x157), 0x158 => array(0x159)
  1028. ,0x15A => array(0x15B), 0x15C => array(0x15D), 0x15E => array(0x15F)
  1029. ,0x160 => array(0x161), 0x162 => array(0x163), 0x164 => array(0x165)
  1030. ,0x166 => array(0x167), 0x168 => array(0x169), 0x16A => array(0x16B)
  1031. ,0x16C => array(0x16D), 0x16E => array(0x16F), 0x170 => array(0x171)
  1032. ,0x172 => array(0x173), 0x174 => array(0x175), 0x176 => array(0x177)
  1033. ,0x178 => array(0xFF), 0x179 => array(0x17A), 0x17B => array(0x17C)
  1034. ,0x17D => array(0x17E), 0x17F => array(0x73), 0x181 => array(0x253)
  1035. ,0x182 => array(0x183), 0x184 => array(0x185), 0x186 => array(0x254)
  1036. ,0x187 => array(0x188), 0x189 => array(0x256), 0x18A => array(0x257)
  1037. ,0x18B => array(0x18C), 0x18E => array(0x1DD), 0x18F => array(0x259)
  1038. ,0x190 => array(0x25B), 0x191 => array(0x192), 0x193 => array(0x260)
  1039. ,0x194 => array(0x263), 0x196 => array(0x269), 0x197 => array(0x268)
  1040. ,0x198 => array(0x199), 0x19C => array(0x26F), 0x19D => array(0x272)
  1041. ,0x19F => array(0x275), 0x1A0 => array(0x1A1), 0x1A2 => array(0x1A3)
  1042. ,0x1A4 => array(0x1A5), 0x1A6 => array(0x280), 0x1A7 => array(0x1A8)
  1043. ,0x1A9 => array(0x283), 0x1AC => array(0x1AD), 0x1AE => array(0x288)
  1044. ,0x1AF => array(0x1B0), 0x1B1 => array(0x28A), 0x1B2 => array(0x28B)
  1045. ,0x1B3 => array(0x1B4), 0x1B5 => array(0x1B6), 0x1B7 => array(0x292)
  1046. ,0x1B8 => array(0x1B9), 0x1BC => array(0x1BD), 0x1C4 => array(0x1C6)
  1047. ,0x1C5 => array(0x1C6), 0x1C7 => array(0x1C9), 0x1C8 => array(0x1C9)
  1048. ,0x1CA => array(0x1CC), 0x1CB => array(0x1CC), 0x1CD => array(0x1CE)
  1049. ,0x1CF => array(0x1D0), 0x1D1 => array(0x1D2), 0x1D3 => array(0x1D4)
  1050. ,0x1D5 => array(0x1D6), 0x1D7 => array(0x1D8), 0x1D9 => array(0x1DA)
  1051. ,0x1DB => array(0x1DC), 0x1DE => array(0x1DF), 0x1E0 => array(0x1E1)
  1052. ,0x1E2 => array(0x1E3), 0x1E4 => array(0x1E5), 0x1E6 => array(0x1E7)
  1053. ,0x1E8 => array(0x1E9), 0x1EA => array(0x1EB), 0x1EC => array(0x1ED)
  1054. ,0x1EE => array(0x1EF), 0x1F0 => array(0x6A, 0x30C), 0x1F1 => array(0x1F3)
  1055. ,0x1F2 => array(0x1F3), 0x1F4 => array(0x1F5), 0x1F6 => array(0x195)
  1056. ,0x1F7 => array(0x1BF), 0x1F8 => array(0x1F9), 0x1FA => array(0x1FB)
  1057. ,0x1FC => array(0x1FD), 0x1FE => array(0x1FF), 0x200 => array(0x201)
  1058. ,0x202 => array(0x203), 0x204 => array(0x205), 0x206 => array(0x207)
  1059. ,0x208 => array(0x209), 0x20A => array(0x20B), 0x20C => array(0x20D)
  1060. ,0x20E => array(0x20F), 0x210 => array(0x211), 0x212 => array(0x213)
  1061. ,0x214 => array(0x215), 0x216 => array(0x217), 0x218 => array(0x219)
  1062. ,0x21A => array(0x21B), 0x21C => array(0x21D), 0x21E => array(0x21F)
  1063. ,0x220 => array(0x19E), 0x222 => array(0x223), 0x224 => array(0x225)
  1064. ,0x226 => array(0x227), 0x228 => array(0x229), 0x22A => array(0x22B)
  1065. ,0x22C => array(0x22D), 0x22E => array(0x22F), 0x230 => array(0x231)
  1066. ,0x232 => array(0x233), 0x345 => array(0x3B9), 0x37A => array(0x20, 0x3B9)
  1067. ,0x386 => array(0x3AC), 0x388 => array(0x3AD), 0x389 => array(0x3AE)
  1068. ,0x38A => array(0x3AF), 0x38C => array(0x3CC), 0x38E => array(0x3CD)
  1069. ,0x38F => array(0x3CE), 0x390 => array(0x3B9, 0x308, 0x301)
  1070. ,0x391 => array(0x3B1), 0x392 => array(0x3B2), 0x393 => array(0x3B3)
  1071. ,0x394 => array(0x3B4), 0x395 => array(0x3B5), 0x396 => array(0x3B6)
  1072. ,0x397 => array(0x3B7), 0x398 => array(0x3B8), 0x399 => array(0x3B9)
  1073. ,0x39A => array(0x3BA), 0x39B => array(0x3BB), 0x39C => array(0x3BC)
  1074. ,0x39D => array(0x3BD), 0x39E => array(0x3BE), 0x39F => array(0x3BF)
  1075. ,0x3A0 => array(0x3C0), 0x3A1 => array(0x3C1), 0x3A3 => array(0x3C3)
  1076. ,0x3A4 => array(0x3C4), 0x3A5 => array(0x3C5), 0x3A6 => array(0x3C6)
  1077. ,0x3A7 => array(0x3C7), 0x3A8 => array(0x3C8), 0x3A9 => array(0x3C9)
  1078. ,0x3AA => array(0x3CA), 0x3AB => array(0x3CB), 0x3B0 => array(0x3C5, 0x308, 0x301)
  1079. ,0x3C2 => array(0x3C3), 0x3D0 => array(0x3B2), 0x3D1 => array(0x3B8)
  1080. ,0x3D2 => array(0x3C5), 0x3D3 => array(0x3CD), 0x3D4 => array(0x3CB)
  1081. ,0x3D5 => array(0

Large files files are truncated, but you can click here to view the full file