PageRenderTime 46ms CodeModel.GetById 19ms RepoModel.GetById 0ms app.codeStats 0ms

/include/utf8/ord.php

https://bitbucket.org/gencer/fluxbb
PHP | 78 lines | 51 code | 14 blank | 13 comment | 19 complexity | 7c069cacd55436bef46a0868e07d6ac8 MD5 | raw file
Possible License(s): GPL-2.0
  1. <?php
  2. /**
  3. * @version $Id: ord.php,v 1.4 2006/09/11 15:22:54 harryf Exp $
  4. * @package utf8
  5. * @subpackage strings
  6. */
  7. /**
  8. * UTF-8 aware alternative to ord
  9. * Returns the unicode ordinal for a character
  10. * @param string UTF-8 encoded character
  11. * @return int unicode ordinal for the character
  12. * @see http://www.php.net/ord
  13. * @see http://www.php.net/manual/en/function.ord.php#46267
  14. */
  15. function utf8_ord($chr)
  16. {
  17. $ord0 = ord($chr);
  18. if ($ord0 >= 0 && $ord0 <= 127)
  19. return $ord0;
  20. if (!isset($chr{1}))
  21. {
  22. trigger_error('Short sequence - at least 2 bytes expected, only 1 seen');
  23. return false;
  24. }
  25. $ord1 = ord($chr{1});
  26. if ($ord0 >= 192 && $ord0 <= 223)
  27. return ($ord0 - 192) * 64 + ($ord1 - 128);
  28. if (!isset($chr{2}))
  29. {
  30. trigger_error('Short sequence - at least 3 bytes expected, only 2 seen');
  31. return false;
  32. }
  33. $ord2 = ord($chr{2});
  34. if ($ord0 >= 224 && $ord0 <= 239)
  35. return ($ord0-224)*4096 + ($ord1-128)*64 + ($ord2-128);
  36. if (!isset($chr{3}))
  37. {
  38. trigger_error('Short sequence - at least 4 bytes expected, only 3 seen');
  39. return false;
  40. }
  41. $ord3 = ord($chr{3});
  42. if ($ord0>=240 && $ord0<=247)
  43. return ($ord0-240)*262144 + ($ord1-128)*4096 + ($ord2-128)*64 + ($ord3-128);
  44. if (!isset($chr{4}))
  45. {
  46. trigger_error('Short sequence - at least 5 bytes expected, only 4 seen');
  47. return false;
  48. }
  49. $ord4 = ord($chr{4});
  50. if ($ord0>=248 && $ord0<=251)
  51. return ($ord0-248)*16777216 + ($ord1-128)*262144 + ($ord2-128)*4096 + ($ord3-128)*64 + ($ord4-128);
  52. if (!isset($chr{5}))
  53. {
  54. trigger_error('Short sequence - at least 6 bytes expected, only 5 seen');
  55. return false;
  56. }
  57. if ($ord0>=252 && $ord0<=253)
  58. return ($ord0-252) * 1073741824 + ($ord1-128)*16777216 + ($ord2-128)*262144 + ($ord3-128)*4096 + ($ord4-128)*64 + (ord($c{5})-128);
  59. if ($ord0 >= 254 && $ord0 <= 255)
  60. {
  61. trigger_error('Invalid UTF-8 with surrogate ordinal '.$ord0);
  62. return false;
  63. }
  64. }