PageRenderTime 44ms CodeModel.GetById 19ms RepoModel.GetById 1ms app.codeStats 0ms

/src/main/php/org/ietf/PunyCode.class.php

https://github.com/xp-forge/punycode
PHP | 364 lines | 335 code | 4 blank | 25 comment | 0 complexity | 5772a452f34db721f559f01a90850584 MD5 | raw file
  1. <?php
  2. /* This class is part of the XP framework
  3. *
  4. * $Id$
  5. */
  6. uses(
  7. 'lang.IllegalArgumentException',
  8. 'lang.SystemException'
  9. );
  10. // Bootstring parameters for Punycode
  11. define('PUNYCODE_BASE', 36);
  12. define('PUNYCODE_TMIN', 1);
  13. define('PUNYCODE_TMAX', 26);
  14. define('PUNYCODE_SKEW', 38);
  15. define('PUNYCODE_DAMP', 700);
  16. define('PUNYCODE_INITIAL_BIAS', 72);
  17. define('PUNYCODE_INITIAL_N', 0x80);
  18. define('PUNYCODE_DELIMITER', 0x2d);
  19. /**
  20. * Implemented in PHP using punycode.c from RFC 3492
  21. * http://rfc-editor.org/rfc/rfc3492.txt
  22. *
  23. * punycode.c:
  24. * http://www.nicemice.net/idn/
  25. * Adam M. Costello
  26. * http://www.nicemice.net/amc/
  27. *
  28. * This is PHP code implementing Punycode (RFC 3492).
  29. *
  30. * @ext iconv
  31. * @purpose Punycode encoding/decoding
  32. */
  33. class PunyCode extends Object {
  34. /**
  35. * Return valid ASCII characters for Punycode
  36. *
  37. * @return string
  38. * @see rfc://3492
  39. */
  40. public function getASCII() {
  41. static $ascii;
  42. $ascii =
  43. "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n".
  44. "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n".
  45. " !\"#$%&'()*+,-./".
  46. "0123456789:;<=>?".
  47. "@ABCDEFGHIJKLMNO".
  48. "PQRSTUVWXYZ[\\]^_".
  49. "`abcdefghijklmno".
  50. "pqrstuvwxyz{|}~\n";
  51. return $ascii;
  52. }
  53. /**
  54. * Bias adaptation function
  55. *
  56. * @param int delta
  57. * @param int numpoints
  58. * @param bool firsttime
  59. * @return int
  60. * @see rfc://3492#6.1
  61. */
  62. protected function _adapt($delta, $numpoints, $firsttime) {
  63. $delta = $firsttime ? (int)($delta / PUNYCODE_DAMP) : $delta >> 1;
  64. // delta >> 1 is a faster way of doing delta / 2
  65. $delta += (int)($delta / $numpoints);
  66. for ($k= 0; $delta > ((PUNYCODE_BASE - PUNYCODE_TMIN) * PUNYCODE_TMAX) / 2; $k += PUNYCODE_BASE) {
  67. $delta = (int)($delta / (PUNYCODE_BASE - PUNYCODE_TMIN));
  68. }
  69. return (int)($k + (PUNYCODE_BASE - PUNYCODE_TMIN + 1) * $delta / ($delta + PUNYCODE_SKEW));
  70. }
  71. /**
  72. * Original comment from author:
  73. * decode_digit(cp) returns the numeric value of a basic code
  74. * point (for use in representing integers) in the range 0 to
  75. * base-1, or base if cp is does not represent a value.
  76. *
  77. * @param int cp
  78. * @return int
  79. * @see rfc://3492#5
  80. */
  81. protected function _decode_digit($cp) {
  82. return
  83. $cp - 48 < 10 ? $cp - 22 : ($cp - 65 < 26 ? $cp - 65 :
  84. $cp - 97 < 26 ? $cp - 97 : PUNYCODE_BASE);
  85. }
  86. /**
  87. * Does the following character mapping:
  88. * 0..25 map to ASCII a..z or A..Z
  89. * 26..35 map to ASCII 0..9
  90. *
  91. * @param int d
  92. * @param bool flag
  93. * @return int
  94. */
  95. protected function _encode_digit($d, $flag) {
  96. return $d + 22 + 75 * ($d < 26) - (($flag != 0) << 5);
  97. }
  98. /**
  99. * Encoding digits.
  100. *
  101. * @param int d
  102. * @param bool flag
  103. * @return int
  104. * @see rfc://3492#5
  105. */
  106. protected function _encode_basic($bcp, $flag) {
  107. $bcp= $bcp;
  108. $bcp -= ($bcp - 97 < 26) << 5;
  109. return $bcp + ((!$flag && ($bcp - 65 < 26)) << 5);
  110. }
  111. /**
  112. * Orignal comment from author:
  113. * flagged(bcp) tests whether a basic code point is flagged
  114. * (uppercase). The behavior is undefined if bcp is not a
  115. * basic code point.
  116. *
  117. * @param int bcp
  118. * @return int
  119. */
  120. protected function _flagged($bcp) {
  121. return ord($bcp) - 65 < 26;
  122. }
  123. /**
  124. * Decode Punycode string and return TRUE on success.
  125. *
  126. * @param string input The punycode string
  127. * @param &string result The result ASCII string
  128. * @param &array flags The flags for each character (see _flagged() function)
  129. * @return bool
  130. * @throws lang.IllegalArgumentException in case $input is not a punycode string
  131. * @throws lang.SystemException in case there's an interger overflow
  132. */
  133. public function decode($input, $result, $flags) {
  134. $in_len= strlen($input);
  135. $n= PUNYCODE_INITIAL_N;
  136. $out= $i= 0;
  137. $bias = PUNYCODE_INITIAL_BIAS;
  138. $output= $flags= array();
  139. $result= NULL;
  140. // Check for ASCII characters
  141. for ($b= 0; $b<$in_len; $b++) {
  142. if (strpos($this->getASCII(), $input[$b]) === FALSE) {
  143. throw new IllegalArgumentException('Input is not valid punycode');
  144. }
  145. }
  146. // Handle the basic code points: Let b be the number of input code
  147. // points before the last delimiter, or 0 if there is none, then
  148. // copy the first b code points to the output.
  149. for ($b= $j= 0; $j < $in_len; ++$j) {
  150. if (PUNYCODE_DELIMITER == ord($input[$j])) $b = $j;
  151. }
  152. for ($j= 0; $j < $b; ++$j) {
  153. if ($flags !== NULL) $flags[$out] = $this->_flagged($input[$j]);
  154. if (ord($input[$j]) >= 0x80) {
  155. throw new IllegalArgumentException('Input is not valid punycode');
  156. }
  157. $output[$out++] = ord($input[$j]);
  158. }
  159. // Main decoding loop: Start just after the last delimiter if any
  160. // basic code points were copied; start at the beginning otherwise.
  161. for ($in = $b > 0 ? $b + 1 : 0; $in < $in_len; ++$out) {
  162. // in is the index of the next character to be consumed, and
  163. // out is the number of code points in the output array.
  164. // Decode a generalized variable-length integer into delta,
  165. // which gets added to i. The overflow checking is easier
  166. // if we increase i as we go, then subtract off its starting
  167. // value at the end to obtain delta.
  168. for ($oldi = $i, $w = 1, $k = PUNYCODE_BASE; ; $k += PUNYCODE_BASE) {
  169. if ($in >= $in_len) {
  170. throw new IllegalArgumentException('Input is not valid punycode');
  171. }
  172. $digit = $this->_decode_digit(ord($input[$in++]));
  173. if ($digit >= PUNYCODE_BASE) {
  174. throw new IllegalArgumentException('Input is not valid punycode');
  175. }
  176. if ($digit > (LONG_MAX - $i) / $w) {
  177. throw new SystemException('Integer overflow');
  178. }
  179. $i += $digit * $w;
  180. $t =
  181. $k <= $bias ? PUNYCODE_TMIN : // +tmin not needed
  182. ($k >= $bias + PUNYCODE_TMAX ? PUNYCODE_TMAX : $k - $bias);
  183. if ($digit < $t) break;
  184. if ($w > LONG_MAX / (PUNYCODE_BASE - $t)) {
  185. throw new SystemException('Integer overflow');
  186. }
  187. $w *= (PUNYCODE_BASE - $t);
  188. }
  189. $bias = $this->_adapt($i - $oldi, $out + 1, $oldi == 0);
  190. // i was supposed to wrap around from out+1 to 0,
  191. // incrementing n each time, so we'll fix that now:
  192. if ($i / ($out + 1) > LONG_MAX - $n) {
  193. throw new SystemException('Integer overflow');
  194. }
  195. $n += (int)($i / ($out + 1));
  196. $i %= ($out + 1);
  197. // Insert n at position i of the output:
  198. if ($flags !== NULL) {
  199. for ($x= ($out - $i) - 1; $x >= 0; $x--) $flags[$x+$i+1] = $flags[$x+$i];
  200. $flags[$i]= $this->_flagged($input[$in-1]);
  201. }
  202. for ($x= ($out - $i) - 1; $x >= 0; $x--) $output[$x+$i+1] = $output[$x+$i];
  203. $output[$i]= $n;
  204. $i++;
  205. }
  206. // Transform it to UCS-4 string
  207. $result= '';
  208. foreach ($output as $v) {
  209. $result.= chr(($v >> 24) & 255);
  210. $result.= chr(($v >> 16) & 255);
  211. $result.= chr(($v >> 8) & 255);
  212. $result.= chr($v & 255);
  213. }
  214. return TRUE;
  215. }
  216. /**
  217. * Encode ASCII string to Punycode string and return TRUE on success.
  218. *
  219. * @param string input The ASCII string
  220. * @param &string result The result punycode string
  221. * @param array flags The flags for each character (see _flagged() function)
  222. * @return bool
  223. * @throws lang.IllegalArgumentException in case $input is not a punycode string
  224. * @throws lang.SystemException in case there's an interger overflow
  225. */
  226. public function encode($input, $result, $flags) {
  227. $in_len= strlen($input);
  228. $n = PUNYCODE_INITIAL_N;
  229. $delta = $out = 0;
  230. $bias = PUNYCODE_INITIAL_BIAS;
  231. $output= array();
  232. $result= NULL;
  233. // Handle the basic code points:
  234. for ($j= 0; $j < $in_len; ++$j) {
  235. if (ord($input[$j]) < 0x80) {
  236. $output[$out++] =
  237. chr(isset($flags[$j]) ? $this->_encode_basic(ord($input[$j]), $flags[$j]) : $input[$j]);
  238. }
  239. }
  240. $h = $b = $out;
  241. // h is the number of code points that have been handled, b is the
  242. // number of basic code points, and out is the number of characters
  243. // that have been output.
  244. if ($b > 0) $output[$out++] = chr(PUNYCODE_DELIMITER);
  245. // Main encoding loop:
  246. while ($h < $in_len) {
  247. // All non-basic code points < n have been
  248. // handled already. Find the next larger one:
  249. for ($m= LONG_MAX, $j= 0; $j < $in_len; ++$j) {
  250. // if (basic(input[j])) continue;
  251. // (not needed for Punycode)
  252. if ((ord($input[$j]) >= $n) && (ord($input[$j]) < $m)) $m = ord($input[$j]);
  253. }
  254. // Increase delta enough to advance the decoder's
  255. // <n,i> state to <m,0>, but guard against overflow:
  256. if ($m - $n > (LONG_MAX - $delta) / ($h + 1)) {
  257. throw new SystemException('Integer overflow');
  258. }
  259. $delta += ($m - $n) * ($h + 1);
  260. $n = $m;
  261. for ($j= 0; $j < $in_len; ++$j) {
  262. // Punycode does not need to check whether input[j] is basic:
  263. if (ord($input[$j]) < $n) {
  264. if (++$delta == 0) {
  265. throw new SystemException('Integer overflow');
  266. }
  267. }
  268. if (ord($input[$j]) == $n) {
  269. // Represent delta as a generalized variable-length integer:
  270. for ($q= $delta, $k= PUNYCODE_BASE; ; $k += PUNYCODE_BASE) {
  271. $t =
  272. $k <= $bias ? PUNYCODE_TMIN : // +tmin not needed
  273. ($k >= $bias + PUNYCODE_TMAX ? PUNYCODE_TMAX : $k - $bias);
  274. if ($q < $t) break;
  275. $output[$out++] = chr($this->_encode_digit($t + ($q - $t) % (PUNYCODE_BASE - $t), 0));
  276. $q = (int)($q - $t) / (PUNYCODE_BASE - $t);
  277. }
  278. $output[$out++] = chr($this->_encode_digit($q, isset($flags[$j]) && $flags[$j]));
  279. $bias = $this->_adapt($delta, $h + 1, $h == $b);
  280. $delta = 0;
  281. ++$h;
  282. }
  283. }
  284. ++$delta;
  285. ++$n;
  286. }
  287. $result= implode('', $output);
  288. return TRUE;
  289. }
  290. /**
  291. * Decode Punycode string and return TRUE on success.
  292. *
  293. * @param string str The Punycode string
  294. * @return bool
  295. * @throws lang.XPException from _decode()
  296. */
  297. public function decodeString($str, $charset= 'ISO-8859-1') {
  298. $out= '';
  299. $flags= array();
  300. $p= new PunyCode();
  301. $p->decode($str, $out, $flags);
  302. if ($charset != 'UCS-4') {
  303. if (($out= iconv('UCS-4', $charset, $out)) === FALSE) {
  304. throw new XPException('Can not convert string to requested encoding('.$charset.')');
  305. }
  306. }
  307. return $out;
  308. }
  309. /**
  310. * Encode ASCII string to Punycode string and return TRUE on success.
  311. *
  312. * @param string str The ASCII string
  313. * @return bool
  314. * @throws lang.XPException from _encode()
  315. */
  316. public function encodeString($str) {
  317. $out= '';
  318. $flags= array_fill(0, strlen($str)+ 1, FALSE);
  319. array_pop($flags);
  320. $p= new PunyCode();
  321. $p->encode($str, $out, $flags);
  322. return $out;
  323. }
  324. }
  325. ?>