PageRenderTime 61ms CodeModel.GetById 23ms RepoModel.GetById 0ms app.codeStats 1ms

/classes/fUTF8.php

https://bitbucket.org/dsqmoore/flourish
PHP | 1640 lines | 1010 code | 187 blank | 443 comment | 110 complexity | c9f6b48fb2a19f8390197d74b4538bcb MD5 | raw file

Large files files are truncated, but you can click here to view the full file

  1. <?php
  2. /**
  3. * Provides string functions for UTF-8 strings
  4. *
  5. * This class is implemented to provide a UTF-8 version of almost every built-in
  6. * PHP string function. For more information about UTF-8, please visit
  7. * http://flourishlib.com/docs/UTF-8.
  8. *
  9. * @copyright Copyright (c) 2008-2011 Will Bond
  10. * @author Will Bond [wb] <will@flourishlib.com>
  11. * @license http://flourishlib.com/license
  12. *
  13. * @package Flourish
  14. * @link http://flourishlib.com/fUTF8
  15. *
  16. * @version 1.0.0b15
  17. * @changes 1.0.0b15 Fixed a bug with using IBM's iconv implementation on AIX [wb, 2011-07-29]
  18. * @changes 1.0.0b14 Added a workaround for iconv having issues in MAMP 1.9.4+ [wb, 2011-07-26]
  19. * @changes 1.0.0b13 Fixed notices from being thrown when invalid data is sent to ::clean() [wb, 2011-06-10]
  20. * @changes 1.0.0b12 Fixed a variable name typo in ::sub() [wb, 2011-05-09]
  21. * @changes 1.0.0b11 Updated the class to not using phpinfo() to determine the iconv implementation [wb, 2010-11-04]
  22. * @changes 1.0.0b10 Fixed a bug with capitalizing a lowercase i resulting in a dotted upper-case I [wb, 2010-11-01]
  23. * @changes 1.0.0b9 Updated class to use fCore::startErrorCapture() instead of `error_reporting()` [wb, 2010-08-09]
  24. * @changes 1.0.0b8 Removed `e` flag from preg_replace() calls [wb, 2010-06-08]
  25. * @changes 1.0.0b7 Added the methods ::trim(), ::rtrim() and ::ltrim() [wb, 2010-05-11]
  26. * @changes 1.0.0b6 Fixed ::clean() to work with PHP installs that use an iconv library that doesn't support //IGNORE [wb, 2010-03-02]
  27. * @changes 1.0.0b5 Changed ::ucwords() to also uppercase words right after various punctuation [wb, 2009-09-18]
  28. * @changes 1.0.0b4 Changed replacement values in preg_replace() calls to be properly escaped [wb, 2009-06-11]
  29. * @changes 1.0.0b3 Fixed a parameter name in ::rpos() from `$search` to `$needle` [wb, 2009-02-06]
  30. * @changes 1.0.0b2 Fixed a bug in ::explode() with newlines and zero-length delimiters [wb, 2009-02-05]
  31. * @changes 1.0.0b The initial implementation [wb, 2008-06-01]
  32. */
  33. class fUTF8
  34. {
  35. // The following constants allow for nice looking callbacks to static methods
  36. const ascii = 'fUTF8::ascii';
  37. const chr = 'fUTF8::chr';
  38. const clean = 'fUTF8::clean';
  39. const cmp = 'fUTF8::cmp';
  40. const explode = 'fUTF8::explode';
  41. const icmp = 'fUTF8::icmp';
  42. const inatcmp = 'fUTF8::inatcmp';
  43. const ipos = 'fUTF8::ipos';
  44. const ireplace = 'fUTF8::ireplace';
  45. const irpos = 'fUTF8::irpos';
  46. const istr = 'fUTF8::istr';
  47. const len = 'fUTF8::len';
  48. const lower = 'fUTF8::lower';
  49. const ltrim = 'fUTF8::ltrim';
  50. const natcmp = 'fUTF8::natcmp';
  51. const ord = 'fUTF8::ord';
  52. const pad = 'fUTF8::pad';
  53. const pos = 'fUTF8::pos';
  54. const replace = 'fUTF8::replace';
  55. const reset = 'fUTF8::reset';
  56. const rev = 'fUTF8::rev';
  57. const rpos = 'fUTF8::rpos';
  58. const rtrim = 'fUTF8::rtrim';
  59. const str = 'fUTF8::str';
  60. const sub = 'fUTF8::sub';
  61. const trim = 'fUTF8::trim';
  62. const ucfirst = 'fUTF8::ucfirst';
  63. const ucwords = 'fUTF8::ucwords';
  64. const upper = 'fUTF8::upper';
  65. const wordwrap = 'fUTF8::wordwrap';
  66. /**
  67. * Depending how things are compiled, NetBSD and Solaris don't support //IGNORE in iconv()
  68. *
  69. * If //IGNORE support is not provided strings with invalid characters will be truncated
  70. *
  71. * @var boolean
  72. */
  73. static private $can_ignore_invalid = NULL;
  74. /**
  75. * All lowercase UTF-8 characters mapped to uppercase characters
  76. *
  77. * @var array
  78. */
  79. static private $lower_to_upper = array(
  80. 'a' => 'A', 'b' => 'B', 'c' => 'C', 'd' => 'D', 'e' => 'E', 'f' => 'F',
  81. 'g' => 'G', 'h' => 'H', 'i' => 'I', 'j' => 'J', 'k' => 'K', 'l' => 'L',
  82. 'm' => 'M', 'n' => 'N', 'o' => 'O', 'p' => 'P', 'q' => 'Q', 'r' => 'R',
  83. 's' => 'S', 't' => 'T', 'u' => 'U', 'v' => 'V', 'w' => 'W', 'x' => 'X',
  84. 'y' => 'Y', 'z' => 'Z', 'à' => 'À', 'á' => 'Á', 'â' => 'Â', 'ã' => 'Ã',
  85. 'ä' => 'Ä', 'å' => 'Å', 'æ' => 'Æ', 'ç' => 'Ç', 'è' => 'È', 'é' => 'É',
  86. 'ê' => 'Ê', 'ë' => 'Ë', 'ì' => 'Ì', 'í' => 'Í', 'î' => 'Î', 'ï' => 'Ï',
  87. 'ð' => 'Ð', 'ñ' => 'Ñ', 'ò' => 'Ò', 'ó' => 'Ó', 'ô' => 'Ô', 'õ' => 'Õ',
  88. 'ö' => 'Ö', 'ø' => 'Ø', 'ù' => 'Ù', 'ú' => 'Ú', 'û' => 'Û', 'ü' => 'Ü',
  89. 'ý' => 'Ý', 'þ' => 'Þ', 'ā' => 'Ā', 'ă' => 'Ă', 'ą' => 'Ą', 'ć' => 'Ć',
  90. 'ĉ' => 'Ĉ', 'ċ' => 'Ċ', 'č' => 'Č', 'ď' => 'Ď', 'đ' => 'Đ', 'ē' => 'Ē',
  91. 'ĕ' => 'Ĕ', 'ė' => 'Ė', 'ę' => 'Ę', 'ě' => 'Ě', 'ĝ' => 'Ĝ', 'ğ' => 'Ğ',
  92. 'ġ' => 'Ġ', 'ģ' => 'Ģ', 'ĥ' => 'Ĥ', 'ħ' => 'Ħ', 'ĩ' => 'Ĩ', 'ī' => 'Ī',
  93. 'ĭ' => 'Ĭ', 'į' => 'Į', 'ij' => 'IJ', 'ĵ' => 'Ĵ', 'ķ' => 'Ķ', 'ĺ' => 'Ĺ',
  94. 'ļ' => 'Ļ', 'ľ' => 'Ľ', 'ŀ' => 'Ŀ', 'ł' => 'Ł', 'ń' => 'Ń', 'ņ' => 'Ņ',
  95. 'ň' => 'Ň', 'ŋ' => 'Ŋ', 'ō' => 'Ō', 'ŏ' => 'Ŏ', 'ő' => 'Ő', 'œ' => 'Œ',
  96. 'ŕ' => 'Ŕ', 'ŗ' => 'Ŗ', 'ř' => 'Ř', 'ś' => 'Ś', 'ŝ' => 'Ŝ', 'ş' => 'Ş',
  97. 'š' => 'Š', 'ţ' => 'Ţ', 'ť' => 'Ť', 'ŧ' => 'Ŧ', 'ũ' => 'Ũ', 'ū' => 'Ū',
  98. 'ŭ' => 'Ŭ', 'ů' => 'Ů', 'ű' => 'Ű', 'ų' => 'Ų', 'ŵ' => 'Ŵ', 'ŷ' => 'Ŷ',
  99. 'ÿ' => 'Ÿ', 'ź' => 'Ź', 'ż' => 'Ż', 'ž' => 'Ž', 'ɓ' => 'Ɓ', 'ƃ' => 'Ƃ',
  100. 'ƅ' => 'Ƅ', 'ɔ' => 'Ɔ', 'ƈ' => 'Ƈ', 'ɗ' => 'Ɗ', 'ƌ' => 'Ƌ', 'ɘ' => 'Ǝ',
  101. 'ə' => 'Ə', 'ɛ' => 'Ɛ', 'ƒ' => 'Ƒ', 'ɠ' => 'Ɠ', 'ɣ' => 'Ɣ', 'ɩ' => 'Ɩ',
  102. 'ɨ' => 'Ɨ', 'ƙ' => 'Ƙ', 'ɯ' => 'Ɯ', 'ɲ' => 'Ɲ', 'ɵ' => 'Ɵ', 'ơ' => 'Ơ',
  103. 'ƣ' => 'Ƣ', 'ƥ' => 'Ƥ', 'ƨ' => 'Ƨ', 'ʃ' => 'Ʃ', 'ƭ' => 'Ƭ', 'ʈ' => 'Ʈ',
  104. 'ư' => 'Ư', 'ʊ' => 'Ʊ', 'ʋ' => 'Ʋ', 'ƴ' => 'Ƴ', 'ƶ' => 'Ƶ', 'ʒ' => 'Ʒ',
  105. 'ƹ' => 'Ƹ', 'ƽ' => 'Ƽ', 'dž' => 'DŽ', 'dž' => 'Dž', 'lj' => 'LJ', 'lj' => 'Lj',
  106. 'nj' => 'NJ', 'nj' => 'Nj', 'ǎ' => 'Ǎ', 'ǐ' => 'Ǐ', 'ǒ' => 'Ǒ', 'ǔ' => 'Ǔ',
  107. 'ǖ' => 'Ǖ', 'ǘ' => 'Ǘ', 'ǚ' => 'Ǚ', 'ǜ' => 'Ǜ', 'ǟ' => 'Ǟ', 'ǡ' => 'Ǡ',
  108. 'ǣ' => 'Ǣ', 'ǥ' => 'Ǥ', 'ǧ' => 'Ǧ', 'ǩ' => 'Ǩ', 'ǫ' => 'Ǫ', 'ǭ' => 'Ǭ',
  109. 'ǯ' => 'Ǯ', 'dz' => 'DZ', 'ǵ' => 'Ǵ', 'ǻ' => 'Ǻ', 'ǽ' => 'Ǽ', 'ǿ' => 'Ǿ',
  110. 'ȁ' => 'Ȁ', 'ȃ' => 'Ȃ', 'ȅ' => 'Ȅ', 'ȇ' => 'Ȇ', 'ȉ' => 'Ȉ', 'ȋ' => 'Ȋ',
  111. 'ȍ' => 'Ȍ', 'ȏ' => 'Ȏ', 'ȑ' => 'Ȑ', 'ȓ' => 'Ȓ', 'ȕ' => 'Ȕ', 'ȗ' => 'Ȗ',
  112. 'ά' => 'Ά', 'έ' => 'Έ', 'ή' => 'Ή', 'ί' => 'Ί', 'ό' => 'Ό', 'ύ' => 'Ύ',
  113. 'ώ' => 'Ώ', 'α' => 'Α', 'β' => 'Β', 'γ' => 'Γ', 'δ' => 'Δ', 'ε' => 'Ε',
  114. 'ζ' => 'Ζ', 'η' => 'Η', 'θ' => 'Θ', 'ι' => 'Ι', 'κ' => 'Κ', 'λ' => 'Λ',
  115. 'μ' => 'Μ', 'ν' => 'Ν', 'ξ' => 'Ξ', 'ο' => 'Ο', 'π' => 'Π', 'ρ' => 'Ρ',
  116. 'σ' => 'Σ', 'τ' => 'Τ', 'υ' => 'Υ', 'φ' => 'Φ', 'χ' => 'Χ', 'ψ' => 'Ψ',
  117. 'ω' => 'Ω', 'ϊ' => 'Ϊ', 'ϋ' => 'Ϋ', 'ϣ' => 'Ϣ', 'ϥ' => 'Ϥ', 'ϧ' => 'Ϧ',
  118. 'ϩ' => 'Ϩ', 'ϫ' => 'Ϫ', 'ϭ' => 'Ϭ', 'ϯ' => 'Ϯ', 'ё' => 'Ё', 'ђ' => 'Ђ',
  119. 'ѓ' => 'Ѓ', 'є' => 'Є', 'ѕ' => 'Ѕ', 'і' => 'І', 'ї' => 'Ї', 'ј' => 'Ј',
  120. 'љ' => 'Љ', 'њ' => 'Њ', 'ћ' => 'Ћ', 'ќ' => 'Ќ', 'ў' => 'Ў', 'џ' => 'Џ',
  121. 'а' => 'А', 'б' => 'Б', 'в' => 'В', 'г' => 'Г', 'д' => 'Д', 'е' => 'Е',
  122. 'ж' => 'Ж', 'з' => 'З', 'и' => 'И', 'й' => 'Й', 'к' => 'К', 'л' => 'Л',
  123. 'м' => 'М', 'н' => 'Н', 'о' => 'О', 'п' => 'П', 'р' => 'Р', 'с' => 'С',
  124. 'т' => 'Т', 'у' => 'У', 'ф' => 'Ф', 'х' => 'Х', 'ц' => 'Ц', 'ч' => 'Ч',
  125. 'ш' => 'Ш', 'щ' => 'Щ', 'ъ' => 'Ъ', 'ы' => 'Ы', 'ь' => 'Ь', 'э' => 'Э',
  126. 'ю' => 'Ю', 'я' => 'Я', 'ѡ' => 'Ѡ', 'ѣ' => 'Ѣ', 'ѥ' => 'Ѥ', 'ѧ' => 'Ѧ',
  127. 'ѩ' => 'Ѩ', 'ѫ' => 'Ѫ', 'ѭ' => 'Ѭ', 'ѯ' => 'Ѯ', 'ѱ' => 'Ѱ', 'ѳ' => 'Ѳ',
  128. 'ѵ' => 'Ѵ', 'ѷ' => 'Ѷ', 'ѹ' => 'Ѹ', 'ѻ' => 'Ѻ', 'ѽ' => 'Ѽ', 'ѿ' => 'Ѿ',
  129. 'ҁ' => 'Ҁ', 'ґ' => 'Ґ', 'ғ' => 'Ғ', 'ҕ' => 'Ҕ', 'җ' => 'Җ', 'ҙ' => 'Ҙ',
  130. 'қ' => 'Қ', 'ҝ' => 'Ҝ', 'ҟ' => 'Ҟ', 'ҡ' => 'Ҡ', 'ң' => 'Ң', 'ҥ' => 'Ҥ',
  131. 'ҧ' => 'Ҧ', 'ҩ' => 'Ҩ', 'ҫ' => 'Ҫ', 'ҭ' => 'Ҭ', 'ү' => 'Ү', 'ұ' => 'Ұ',
  132. 'ҳ' => 'Ҳ', 'ҵ' => 'Ҵ', 'ҷ' => 'Ҷ', 'ҹ' => 'Ҹ', 'һ' => 'Һ', 'ҽ' => 'Ҽ',
  133. 'ҿ' => 'Ҿ', 'ӂ' => 'Ӂ', 'ӄ' => 'Ӄ', 'ӈ' => 'Ӈ', 'ӌ' => 'Ӌ', 'ӑ' => 'Ӑ',
  134. 'ӓ' => 'Ӓ', 'ӕ' => 'Ӕ', 'ӗ' => 'Ӗ', 'ә' => 'Ә', 'ӛ' => 'Ӛ', 'ӝ' => 'Ӝ',
  135. 'ӟ' => 'Ӟ', 'ӡ' => 'Ӡ', 'ӣ' => 'Ӣ', 'ӥ' => 'Ӥ', 'ӧ' => 'Ӧ', 'ө' => 'Ө',
  136. 'ӫ' => 'Ӫ', 'ӯ' => 'Ӯ', 'ӱ' => 'Ӱ', 'ӳ' => 'Ӳ', 'ӵ' => 'Ӵ', 'ӹ' => 'Ӹ',
  137. 'ա' => 'Ա', 'բ' => 'Բ', 'գ' => 'Գ', 'դ' => 'Դ', 'ե' => 'Ե', 'զ' => 'Զ',
  138. 'է' => 'Է', 'ը' => 'Ը', 'թ' => 'Թ', 'ժ' => 'Ժ', 'ի' => 'Ի', 'լ' => 'Լ',
  139. 'խ' => 'Խ', 'ծ' => 'Ծ', 'կ' => 'Կ', 'հ' => 'Հ', 'ձ' => 'Ձ', 'ղ' => 'Ղ',
  140. 'ճ' => 'Ճ', 'մ' => 'Մ', 'յ' => 'Յ', 'ն' => 'Ն', 'շ' => 'Շ', 'ո' => 'Ո',
  141. 'չ' => 'Չ', 'պ' => 'Պ', 'ջ' => 'Ջ', 'ռ' => 'Ռ', 'ս' => 'Ս', 'վ' => 'Վ',
  142. 'տ' => 'Տ', 'ր' => 'Ր', 'ց' => 'Ց', 'ւ' => 'Ւ', 'փ' => 'Փ', 'ք' => 'Ք',
  143. 'օ' => 'Օ', 'ֆ' => 'Ֆ', 'ა' => 'Ⴀ', 'ბ' => 'Ⴁ', 'გ' => 'Ⴂ', 'დ' => 'Ⴃ',
  144. 'ე' => 'Ⴄ', 'ვ' => 'Ⴅ', 'ზ' => 'Ⴆ', 'თ' => 'Ⴇ', 'ი' => 'Ⴈ', 'კ' => 'Ⴉ',
  145. 'ლ' => 'Ⴊ', 'მ' => 'Ⴋ', 'ნ' => 'Ⴌ', 'ო' => 'Ⴍ', 'პ' => 'Ⴎ', 'ჟ' => 'Ⴏ',
  146. 'რ' => 'Ⴐ', 'ს' => 'Ⴑ', 'ტ' => 'Ⴒ', 'უ' => 'Ⴓ', 'ფ' => 'Ⴔ', 'ქ' => 'Ⴕ',
  147. 'ღ' => 'Ⴖ', 'ყ' => 'Ⴗ', 'შ' => 'Ⴘ', 'ჩ' => 'Ⴙ', 'ც' => 'Ⴚ', 'ძ' => 'Ⴛ',
  148. 'წ' => 'Ⴜ', 'ჭ' => 'Ⴝ', 'ხ' => 'Ⴞ', 'ჯ' => 'Ⴟ', 'ჰ' => 'Ⴠ', 'ჱ' => 'Ⴡ',
  149. 'ჲ' => 'Ⴢ', 'ჳ' => 'Ⴣ', 'ჴ' => 'Ⴤ', 'ჵ' => 'Ⴥ', 'ḁ' => 'Ḁ', 'ḃ' => 'Ḃ',
  150. 'ḅ' => 'Ḅ', 'ḇ' => 'Ḇ', 'ḉ' => 'Ḉ', 'ḋ' => 'Ḋ', 'ḍ' => 'Ḍ', 'ḏ' => 'Ḏ',
  151. 'ḑ' => 'Ḑ', 'ḓ' => 'Ḓ', 'ḕ' => 'Ḕ', 'ḗ' => 'Ḗ', 'ḙ' => 'Ḙ', 'ḛ' => 'Ḛ',
  152. 'ḝ' => 'Ḝ', 'ḟ' => 'Ḟ', 'ḡ' => 'Ḡ', 'ḣ' => 'Ḣ', 'ḥ' => 'Ḥ', 'ḧ' => 'Ḧ',
  153. 'ḩ' => 'Ḩ', 'ḫ' => 'Ḫ', 'ḭ' => 'Ḭ', 'ḯ' => 'Ḯ', 'ḱ' => 'Ḱ', 'ḳ' => 'Ḳ',
  154. 'ḵ' => 'Ḵ', 'ḷ' => 'Ḷ', 'ḹ' => 'Ḹ', 'ḻ' => 'Ḻ', 'ḽ' => 'Ḽ', 'ḿ' => 'Ḿ',
  155. 'ṁ' => 'Ṁ', 'ṃ' => 'Ṃ', 'ṅ' => 'Ṅ', 'ṇ' => 'Ṇ', 'ṉ' => 'Ṉ', 'ṋ' => 'Ṋ',
  156. 'ṍ' => 'Ṍ', 'ṏ' => 'Ṏ', 'ṑ' => 'Ṑ', 'ṓ' => 'Ṓ', 'ṕ' => 'Ṕ', 'ṗ' => 'Ṗ',
  157. 'ṙ' => 'Ṙ', 'ṛ' => 'Ṛ', 'ṝ' => 'Ṝ', 'ṟ' => 'Ṟ', 'ṡ' => 'Ṡ', 'ṣ' => 'Ṣ',
  158. 'ṥ' => 'Ṥ', 'ṧ' => 'Ṧ', 'ṩ' => 'Ṩ', 'ṫ' => 'Ṫ', 'ṭ' => 'Ṭ', 'ṯ' => 'Ṯ',
  159. 'ṱ' => 'Ṱ', 'ṳ' => 'Ṳ', 'ṵ' => 'Ṵ', 'ṷ' => 'Ṷ', 'ṹ' => 'Ṹ', 'ṻ' => 'Ṻ',
  160. 'ṽ' => 'Ṽ', 'ṿ' => 'Ṿ', 'ẁ' => 'Ẁ', 'ẃ' => 'Ẃ', 'ẅ' => 'Ẅ', 'ẇ' => 'Ẇ',
  161. 'ẉ' => 'Ẉ', 'ẋ' => 'Ẋ', 'ẍ' => 'Ẍ', 'ẏ' => 'Ẏ', 'ẑ' => 'Ẑ', 'ẓ' => 'Ẓ',
  162. 'ẕ' => 'Ẕ', 'ạ' => 'Ạ', 'ả' => 'Ả', 'ấ' => 'Ấ', 'ầ' => 'Ầ', 'ẩ' => 'Ẩ',
  163. 'ẫ' => 'Ẫ', 'ậ' => 'Ậ', 'ắ' => 'Ắ', 'ằ' => 'Ằ', 'ẳ' => 'Ẳ', 'ẵ' => 'Ẵ',
  164. 'ặ' => 'Ặ', 'ẹ' => 'Ẹ', 'ẻ' => 'Ẻ', 'ẽ' => 'Ẽ', 'ế' => 'Ế', 'ề' => 'Ề',
  165. 'ể' => 'Ể', 'ễ' => 'Ễ', 'ệ' => 'Ệ', 'ỉ' => 'Ỉ', 'ị' => 'Ị', 'ọ' => 'Ọ',
  166. 'ỏ' => 'Ỏ', 'ố' => 'Ố', 'ồ' => 'Ồ', 'ổ' => 'Ổ', 'ỗ' => 'Ỗ', 'ộ' => 'Ộ',
  167. 'ớ' => 'Ớ', 'ờ' => 'Ờ', 'ở' => 'Ở', 'ỡ' => 'Ỡ', 'ợ' => 'Ợ', 'ụ' => 'Ụ',
  168. 'ủ' => 'Ủ', 'ứ' => 'Ứ', 'ừ' => 'Ừ', 'ử' => 'Ử', 'ữ' => 'Ữ', 'ự' => 'Ự',
  169. 'ỳ' => 'Ỳ', 'ỵ' => 'Ỵ', 'ỷ' => 'Ỷ', 'ỹ' => 'Ỹ', 'ἀ' => 'Ἀ', 'ἁ' => 'Ἁ',
  170. 'ἂ' => 'Ἂ', 'ἃ' => 'Ἃ', 'ἄ' => 'Ἄ', 'ἅ' => 'Ἅ', 'ἆ' => 'Ἆ', 'ἇ' => 'Ἇ',
  171. 'ἐ' => 'Ἐ', 'ἑ' => 'Ἑ', 'ἒ' => 'Ἒ', 'ἓ' => 'Ἓ', 'ἔ' => 'Ἔ', 'ἕ' => 'Ἕ',
  172. 'ἠ' => 'Ἠ', 'ἡ' => 'Ἡ', 'ἢ' => 'Ἢ', 'ἣ' => 'Ἣ', 'ἤ' => 'Ἤ', 'ἥ' => 'Ἥ',
  173. 'ἦ' => 'Ἦ', 'ἧ' => 'Ἧ', 'ἰ' => 'Ἰ', 'ἱ' => 'Ἱ', 'ἲ' => 'Ἲ', 'ἳ' => 'Ἳ',
  174. 'ἴ' => 'Ἴ', 'ἵ' => 'Ἵ', 'ἶ' => 'Ἶ', 'ἷ' => 'Ἷ', 'ὀ' => 'Ὀ', 'ὁ' => 'Ὁ',
  175. 'ὂ' => 'Ὂ', 'ὃ' => 'Ὃ', 'ὄ' => 'Ὄ', 'ὅ' => 'Ὅ', 'ὑ' => 'Ὑ', 'ὓ' => 'Ὓ',
  176. 'ὕ' => 'Ὕ', 'ὗ' => 'Ὗ', 'ὠ' => 'Ὠ', 'ὡ' => 'Ὡ', 'ὢ' => 'Ὢ', 'ὣ' => 'Ὣ',
  177. 'ὤ' => 'Ὤ', 'ὥ' => 'Ὥ', 'ὦ' => 'Ὦ', 'ὧ' => 'Ὧ', 'ᾀ' => 'ᾈ', 'ᾁ' => 'ᾉ',
  178. 'ᾂ' => 'ᾊ', 'ᾃ' => 'ᾋ', 'ᾄ' => 'ᾌ', 'ᾅ' => 'ᾍ', 'ᾆ' => 'ᾎ', 'ᾇ' => 'ᾏ',
  179. 'ᾐ' => 'ᾘ', 'ᾑ' => 'ᾙ', 'ᾒ' => 'ᾚ', 'ᾓ' => 'ᾛ', 'ᾔ' => 'ᾜ', 'ᾕ' => 'ᾝ',
  180. 'ᾖ' => 'ᾞ', 'ᾗ' => 'ᾟ', 'ᾠ' => 'ᾨ', 'ᾡ' => 'ᾩ', 'ᾢ' => 'ᾪ', 'ᾣ' => 'ᾫ',
  181. 'ᾤ' => 'ᾬ', 'ᾥ' => 'ᾭ', 'ᾦ' => 'ᾮ', 'ᾧ' => 'ᾯ', 'ᾰ' => 'Ᾰ', 'ᾱ' => 'Ᾱ',
  182. 'ῐ' => 'Ῐ', 'ῑ' => 'Ῑ', 'ῠ' => 'Ῠ', 'ῡ' => 'Ῡ', 'ⓐ' => 'Ⓐ', 'ⓑ' => 'Ⓑ',
  183. 'ⓒ' => 'Ⓒ', 'ⓓ' => 'Ⓓ', 'ⓔ' => 'Ⓔ', 'ⓕ' => 'Ⓕ', 'ⓖ' => 'Ⓖ', 'ⓗ' => 'Ⓗ',
  184. 'ⓘ' => 'Ⓘ', 'ⓙ' => 'Ⓙ', 'ⓚ' => 'Ⓚ', 'ⓛ' => 'Ⓛ', 'ⓜ' => 'Ⓜ', 'ⓝ' => 'Ⓝ',
  185. 'ⓞ' => 'Ⓞ', 'ⓟ' => 'Ⓟ', 'ⓠ' => 'Ⓠ', 'ⓡ' => 'Ⓡ', 'ⓢ' => 'Ⓢ', 'ⓣ' => 'Ⓣ',
  186. 'ⓤ' => 'Ⓤ', 'ⓥ' => 'Ⓥ', 'ⓦ' => 'Ⓦ', 'ⓧ' => 'Ⓧ', 'ⓨ' => 'Ⓨ', 'ⓩ' => 'Ⓩ',
  187. 'a' => 'A', 'b' => 'B', 'c' => 'C', 'd' => 'D', 'e' => 'E', 'f' => 'F',
  188. 'g' => 'G', 'h' => 'H', 'i' => 'I', 'j' => 'J', 'k' => 'K', 'l' => 'L',
  189. 'm' => 'M', 'n' => 'N', 'o' => 'O', 'p' => 'P', 'q' => 'Q', 'r' => 'R',
  190. 's' => 'S', 't' => 'T', 'u' => 'U', 'v' => 'V', 'w' => 'W', 'x' => 'X',
  191. 'y' => 'Y', 'z' => 'Z'
  192. );
  193. /**
  194. * All lowercase UTF-8 characters not properly handled by [http://php.net/mb_strtoupper mb_strtoupper()] mapped to uppercase characters
  195. *
  196. * @var array
  197. */
  198. static private $mb_lower_to_upper_fix = array(
  199. 'ɘ' => 'Ǝ', 'Dz' => 'DZ', 'ა' => 'Ⴀ', 'ბ' => 'Ⴁ', 'გ' => 'Ⴂ', 'დ' => 'Ⴃ',
  200. 'ე' => 'Ⴄ', 'ვ' => 'Ⴅ', 'ზ' => 'Ⴆ', 'თ' => 'Ⴇ', 'ი' => 'Ⴈ', 'კ' => 'Ⴉ',
  201. 'ლ' => 'Ⴊ', 'მ' => 'Ⴋ', 'ნ' => 'Ⴌ', 'ო' => 'Ⴍ', 'პ' => 'Ⴎ', 'ჟ' => 'Ⴏ',
  202. 'რ' => 'Ⴐ', 'ს' => 'Ⴑ', 'ტ' => 'Ⴒ', 'უ' => 'Ⴓ', 'ფ' => 'Ⴔ', 'ქ' => 'Ⴕ',
  203. 'ღ' => 'Ⴖ', 'ყ' => 'Ⴗ', 'შ' => 'Ⴘ', 'ჩ' => 'Ⴙ', 'ც' => 'Ⴚ', 'ძ' => 'Ⴛ',
  204. 'წ' => 'Ⴜ', 'ჭ' => 'Ⴝ', 'ხ' => 'Ⴞ', 'ჯ' => 'Ⴟ', 'ჰ' => 'Ⴠ', 'ჱ' => 'Ⴡ',
  205. 'ჲ' => 'Ⴢ', 'ჳ' => 'Ⴣ', 'ჴ' => 'Ⴤ', 'ჵ' => 'Ⴥ', 'ⓐ' => 'Ⓐ', 'ⓑ' => 'Ⓑ',
  206. 'ⓒ' => 'Ⓒ', 'ⓓ' => 'Ⓓ', 'ⓔ' => 'Ⓔ', 'ⓕ' => 'Ⓕ', 'ⓖ' => 'Ⓖ', 'ⓗ' => 'Ⓗ',
  207. 'ⓘ' => 'Ⓘ', 'ⓙ' => 'Ⓙ', 'ⓚ' => 'Ⓚ', 'ⓛ' => 'Ⓛ', 'ⓜ' => 'Ⓜ', 'ⓝ' => 'Ⓝ',
  208. 'ⓞ' => 'Ⓞ', 'ⓟ' => 'Ⓟ', 'ⓠ' => 'Ⓠ', 'ⓡ' => 'Ⓡ', 'ⓢ' => 'Ⓢ', 'ⓣ' => 'Ⓣ',
  209. 'ⓤ' => 'Ⓤ', 'ⓥ' => 'Ⓥ', 'ⓦ' => 'Ⓦ', 'ⓧ' => 'Ⓧ', 'ⓨ' => 'Ⓨ', 'ⓩ' => 'Ⓩ'
  210. );
  211. /**
  212. * All uppercase UTF-8 characters not properly handled by [http://php.net/mb_strtolower mb_strtolower()] mapped to lowercase characters
  213. *
  214. * @var array
  215. */
  216. static private $mb_upper_to_lower_fix = array(
  217. 'ǝ' => 'ɘ', 'Dž' => 'dž', 'Lj' => 'lj', 'Nj' => 'nj', 'Ⴀ' => 'ა', 'Ⴁ' => 'ბ',
  218. 'Ⴂ' => 'გ', 'Ⴃ' => 'დ', 'Ⴄ' => 'ე', 'Ⴅ' => 'ვ', 'Ⴆ' => 'ზ', 'Ⴇ' => 'თ',
  219. 'Ⴈ' => 'ი', 'Ⴉ' => 'კ', 'Ⴊ' => 'ლ', 'Ⴋ' => 'მ', 'Ⴌ' => 'ნ', 'Ⴍ' => 'ო',
  220. 'Ⴎ' => 'პ', 'Ⴏ' => 'ჟ', 'Ⴐ' => 'რ', 'Ⴑ' => 'ს', 'Ⴒ' => 'ტ', 'Ⴓ' => 'უ',
  221. 'Ⴔ' => 'ფ', 'Ⴕ' => 'ქ', 'Ⴖ' => 'ღ', 'Ⴗ' => 'ყ', 'Ⴘ' => 'შ', 'Ⴙ' => 'ჩ',
  222. 'Ⴚ' => 'ც', 'Ⴛ' => 'ძ', 'Ⴜ' => 'წ', 'Ⴝ' => 'ჭ', 'Ⴞ' => 'ხ', 'Ⴟ' => 'ჯ',
  223. 'Ⴠ' => 'ჰ', 'Ⴡ' => 'ჱ', 'Ⴢ' => 'ჲ', 'Ⴣ' => 'ჳ', 'Ⴤ' => 'ჴ', 'Ⴥ' => 'ჵ',
  224. 'ᾈ' => 'ᾀ', 'ᾉ' => 'ᾁ', 'ᾊ' => 'ᾂ', 'ᾋ' => 'ᾃ', 'ᾌ' => 'ᾄ', 'ᾍ' => 'ᾅ',
  225. 'ᾎ' => 'ᾆ', 'ᾏ' => 'ᾇ', 'ᾘ' => 'ᾐ', 'ᾙ' => 'ᾑ', 'ᾚ' => 'ᾒ', 'ᾛ' => 'ᾓ',
  226. 'ᾜ' => 'ᾔ', 'ᾝ' => 'ᾕ', 'ᾞ' => 'ᾖ', 'ᾟ' => 'ᾗ', 'ᾨ' => 'ᾠ', 'ᾩ' => 'ᾡ',
  227. 'ᾪ' => 'ᾢ', 'ᾫ' => 'ᾣ', 'ᾬ' => 'ᾤ', 'ᾭ' => 'ᾥ', 'ᾮ' => 'ᾦ', 'ᾯ' => 'ᾧ',
  228. 'Ⓐ' => 'ⓐ', 'Ⓑ' => 'ⓑ', 'Ⓒ' => 'ⓒ', 'Ⓓ' => 'ⓓ', 'Ⓔ' => 'ⓔ', 'Ⓕ' => 'ⓕ',
  229. 'Ⓖ' => 'ⓖ', 'Ⓗ' => 'ⓗ', 'Ⓘ' => 'ⓘ', 'Ⓙ' => 'ⓙ', 'Ⓚ' => 'ⓚ', 'Ⓛ' => 'ⓛ',
  230. 'Ⓜ' => 'ⓜ', 'Ⓝ' => 'ⓝ', 'Ⓞ' => 'ⓞ', 'Ⓟ' => 'ⓟ', 'Ⓠ' => 'ⓠ', 'Ⓡ' => 'ⓡ',
  231. 'Ⓢ' => 'ⓢ', 'Ⓣ' => 'ⓣ', 'Ⓤ' => 'ⓤ', 'Ⓥ' => 'ⓥ', 'Ⓦ' => 'ⓦ', 'Ⓧ' => 'ⓧ',
  232. 'Ⓨ' => 'ⓨ', 'Ⓩ' => 'ⓩ'
  233. );
  234. /**
  235. * All uppercase UTF-8 characters mapped to lowercase characters
  236. *
  237. * @var array
  238. */
  239. static private $upper_to_lower = array(
  240. 'A' => 'a', 'B' => 'b', 'C' => 'c', 'D' => 'd', 'E' => 'e', 'F' => 'f',
  241. 'G' => 'g', 'H' => 'h', 'I' => 'i', 'J' => 'j', 'K' => 'k', 'L' => 'l',
  242. 'M' => 'm', 'N' => 'n', 'O' => 'o', 'P' => 'p', 'Q' => 'q', 'R' => 'r',
  243. 'S' => 's', 'T' => 't', 'U' => 'u', 'V' => 'v', 'W' => 'w', 'X' => 'x',
  244. 'Y' => 'y', 'Z' => 'z', 'À' => 'à', 'Á' => 'á', 'Â' => 'â', 'Ã' => 'ã',
  245. 'Ä' => 'ä', 'Å' => 'å', 'Æ' => 'æ', 'Ç' => 'ç', 'È' => 'è', 'É' => 'é',
  246. 'Ê' => 'ê', 'Ë' => 'ë', 'Ì' => 'ì', 'Í' => 'í', 'Î' => 'î', 'Ï' => 'ï',
  247. 'Ð' => 'ð', 'Ñ' => 'ñ', 'Ò' => 'ò', 'Ó' => 'ó', 'Ô' => 'ô', 'Õ' => 'õ',
  248. 'Ö' => 'ö', 'Ø' => 'ø', 'Ù' => 'ù', 'Ú' => 'ú', 'Û' => 'û', 'Ü' => 'ü',
  249. 'Ý' => 'ý', 'Þ' => 'þ', 'Ā' => 'ā', 'Ă' => 'ă', 'Ą' => 'ą', 'Ć' => 'ć',
  250. 'Ĉ' => 'ĉ', 'Ċ' => 'ċ', 'Č' => 'č', 'Ď' => 'ď', 'Đ' => 'đ', 'Ē' => 'ē',
  251. 'Ĕ' => 'ĕ', 'Ė' => 'ė', 'Ę' => 'ę', 'Ě' => 'ě', 'Ĝ' => 'ĝ', 'Ğ' => 'ğ',
  252. 'Ġ' => 'ġ', 'Ģ' => 'ģ', 'Ĥ' => 'ĥ', 'Ħ' => 'ħ', 'Ĩ' => 'ĩ', 'Ī' => 'ī',
  253. 'Ĭ' => 'ĭ', 'Į' => 'į', 'İ' => 'i', 'IJ' => 'ij', 'Ĵ' => 'ĵ', 'Ķ' => 'ķ',
  254. 'Ĺ' => 'ĺ', 'Ļ' => 'ļ', 'Ľ' => 'ľ', 'Ŀ' => 'ŀ', 'Ł' => 'ł', 'Ń' => 'ń',
  255. 'Ņ' => 'ņ', 'Ň' => 'ň', 'Ŋ' => 'ŋ', 'Ō' => 'ō', 'Ŏ' => 'ŏ', 'Ő' => 'ő',
  256. 'Œ' => 'œ', 'Ŕ' => 'ŕ', 'Ŗ' => 'ŗ', 'Ř' => 'ř', 'Ś' => 'ś', 'Ŝ' => 'ŝ',
  257. 'Ş' => 'ş', 'Š' => 'š', 'Ţ' => 'ţ', 'Ť' => 'ť', 'Ŧ' => 'ŧ', 'Ũ' => 'ũ',
  258. 'Ū' => 'ū', 'Ŭ' => 'ŭ', 'Ů' => 'ů', 'Ű' => 'ű', 'Ų' => 'ų', 'Ŵ' => 'ŵ',
  259. 'Ŷ' => 'ŷ', 'Ÿ' => 'ÿ', 'Ź' => 'ź', 'Ż' => 'ż', 'Ž' => 'ž', 'Ɓ' => 'ɓ',
  260. 'Ƃ' => 'ƃ', 'Ƅ' => 'ƅ', 'Ɔ' => 'ɔ', 'Ƈ' => 'ƈ', 'Ɗ' => 'ɗ', 'Ƌ' => 'ƌ',
  261. 'Ǝ' => 'ɘ', 'Ə' => 'ə', 'Ɛ' => 'ɛ', 'Ƒ' => 'ƒ', 'Ɠ' => 'ɠ', 'Ɣ' => 'ɣ',
  262. 'Ɩ' => 'ɩ', 'Ɨ' => 'ɨ', 'Ƙ' => 'ƙ', 'Ɯ' => 'ɯ', 'Ɲ' => 'ɲ', 'Ɵ' => 'ɵ',
  263. 'Ơ' => 'ơ', 'Ƣ' => 'ƣ', 'Ƥ' => 'ƥ', 'Ƨ' => 'ƨ', 'Ʃ' => 'ʃ', 'Ƭ' => 'ƭ',
  264. 'Ʈ' => 'ʈ', 'Ư' => 'ư', 'Ʊ' => 'ʊ', 'Ʋ' => 'ʋ', 'Ƴ' => 'ƴ', 'Ƶ' => 'ƶ',
  265. 'Ʒ' => 'ʒ', 'Ƹ' => 'ƹ', 'Ƽ' => 'ƽ', 'DŽ' => 'dž', 'Dž' => 'dž', 'LJ' => 'lj',
  266. 'Lj' => 'lj', 'NJ' => 'nj', 'Nj' => 'nj', 'Ǎ' => 'ǎ', 'Ǐ' => 'ǐ', 'Ǒ' => 'ǒ',
  267. 'Ǔ' => 'ǔ', 'Ǖ' => 'ǖ', 'Ǘ' => 'ǘ', 'Ǚ' => 'ǚ', 'Ǜ' => 'ǜ', 'Ǟ' => 'ǟ',
  268. 'Ǡ' => 'ǡ', 'Ǣ' => 'ǣ', 'Ǥ' => 'ǥ', 'Ǧ' => 'ǧ', 'Ǩ' => 'ǩ', 'Ǫ' => 'ǫ',
  269. 'Ǭ' => 'ǭ', 'Ǯ' => 'ǯ', 'DZ' => 'dz', 'Ǵ' => 'ǵ', 'Ǻ' => 'ǻ', 'Ǽ' => 'ǽ',
  270. 'Ǿ' => 'ǿ', 'Ȁ' => 'ȁ', 'Ȃ' => 'ȃ', 'Ȅ' => 'ȅ', 'Ȇ' => 'ȇ', 'Ȉ' => 'ȉ',
  271. 'Ȋ' => 'ȋ', 'Ȍ' => 'ȍ', 'Ȏ' => 'ȏ', 'Ȑ' => 'ȑ', 'Ȓ' => 'ȓ', 'Ȕ' => 'ȕ',
  272. 'Ȗ' => 'ȗ', 'Ά' => 'ά', 'Έ' => 'έ', 'Ή' => 'ή', 'Ί' => 'ί', 'Ό' => 'ό',
  273. 'Ύ' => 'ύ', 'Ώ' => 'ώ', 'Α' => 'α', 'Β' => 'β', 'Γ' => 'γ', 'Δ' => 'δ',
  274. 'Ε' => 'ε', 'Ζ' => 'ζ', 'Η' => 'η', 'Θ' => 'θ', 'Ι' => 'ι', 'Κ' => 'κ',
  275. 'Λ' => 'λ', 'Μ' => 'μ', 'Ν' => 'ν', 'Ξ' => 'ξ', 'Ο' => 'ο', 'Π' => 'π',
  276. 'Ρ' => 'ρ', 'Σ' => 'σ', 'Τ' => 'τ', 'Υ' => 'υ', 'Φ' => 'φ', 'Χ' => 'χ',
  277. 'Ψ' => 'ψ', 'Ω' => 'ω', 'Ϊ' => 'ϊ', 'Ϋ' => 'ϋ', 'Ϣ' => 'ϣ', 'Ϥ' => 'ϥ',
  278. 'Ϧ' => 'ϧ', 'Ϩ' => 'ϩ', 'Ϫ' => 'ϫ', 'Ϭ' => 'ϭ', 'Ϯ' => 'ϯ', 'Ё' => 'ё',
  279. 'Ђ' => 'ђ', 'Ѓ' => 'ѓ', 'Є' => 'є', 'Ѕ' => 'ѕ', 'І' => 'і', 'Ї' => 'ї',
  280. 'Ј' => 'ј', 'Љ' => 'љ', 'Њ' => 'њ', 'Ћ' => 'ћ', 'Ќ' => 'ќ', 'Ў' => 'ў',
  281. 'Џ' => 'џ', 'А' => 'а', 'Б' => 'б', 'В' => 'в', 'Г' => 'г', 'Д' => 'д',
  282. 'Е' => 'е', 'Ж' => 'ж', 'З' => 'з', 'И' => 'и', 'Й' => 'й', 'К' => 'к',
  283. 'Л' => 'л', 'М' => 'м', 'Н' => 'н', 'О' => 'о', 'П' => 'п', 'Р' => 'р',
  284. 'С' => 'с', 'Т' => 'т', 'У' => 'у', 'Ф' => 'ф', 'Х' => 'х', 'Ц' => 'ц',
  285. 'Ч' => 'ч', 'Ш' => 'ш', 'Щ' => 'щ', 'Ъ' => 'ъ', 'Ы' => 'ы', 'Ь' => 'ь',
  286. 'Э' => 'э', 'Ю' => 'ю', 'Я' => 'я', 'Ѡ' => 'ѡ', 'Ѣ' => 'ѣ', 'Ѥ' => 'ѥ',
  287. 'Ѧ' => 'ѧ', 'Ѩ' => 'ѩ', 'Ѫ' => 'ѫ', 'Ѭ' => 'ѭ', 'Ѯ' => 'ѯ', 'Ѱ' => 'ѱ',
  288. 'Ѳ' => 'ѳ', 'Ѵ' => 'ѵ', 'Ѷ' => 'ѷ', 'Ѹ' => 'ѹ', 'Ѻ' => 'ѻ', 'Ѽ' => 'ѽ',
  289. 'Ѿ' => 'ѿ', 'Ҁ' => 'ҁ', 'Ґ' => 'ґ', 'Ғ' => 'ғ', 'Ҕ' => 'ҕ', 'Җ' => 'җ',
  290. 'Ҙ' => 'ҙ', 'Қ' => 'қ', 'Ҝ' => 'ҝ', 'Ҟ' => 'ҟ', 'Ҡ' => 'ҡ', 'Ң' => 'ң',
  291. 'Ҥ' => 'ҥ', 'Ҧ' => 'ҧ', 'Ҩ' => 'ҩ', 'Ҫ' => 'ҫ', 'Ҭ' => 'ҭ', 'Ү' => 'ү',
  292. 'Ұ' => 'ұ', 'Ҳ' => 'ҳ', 'Ҵ' => 'ҵ', 'Ҷ' => 'ҷ', 'Ҹ' => 'ҹ', 'Һ' => 'һ',
  293. 'Ҽ' => 'ҽ', 'Ҿ' => 'ҿ', 'Ӂ' => 'ӂ', 'Ӄ' => 'ӄ', 'Ӈ' => 'ӈ', 'Ӌ' => 'ӌ',
  294. 'Ӑ' => 'ӑ', 'Ӓ' => 'ӓ', 'Ӕ' => 'ӕ', 'Ӗ' => 'ӗ', 'Ә' => 'ә', 'Ӛ' => 'ӛ',
  295. 'Ӝ' => 'ӝ', 'Ӟ' => 'ӟ', 'Ӡ' => 'ӡ', 'Ӣ' => 'ӣ', 'Ӥ' => 'ӥ', 'Ӧ' => 'ӧ',
  296. 'Ө' => 'ө', 'Ӫ' => 'ӫ', 'Ӯ' => 'ӯ', 'Ӱ' => 'ӱ', 'Ӳ' => 'ӳ', 'Ӵ' => 'ӵ',
  297. 'Ӹ' => 'ӹ', 'Ա' => 'ա', 'Բ' => 'բ', 'Գ' => 'գ', 'Դ' => 'դ', 'Ե' => 'ե',
  298. 'Զ' => 'զ', 'Է' => 'է', 'Ը' => 'ը', 'Թ' => 'թ', 'Ժ' => 'ժ', 'Ի' => 'ի',
  299. 'Լ' => 'լ', 'Խ' => 'խ', 'Ծ' => 'ծ', 'Կ' => 'կ', 'Հ' => 'հ', 'Ձ' => 'ձ',
  300. 'Ղ' => 'ղ', 'Ճ' => 'ճ', 'Մ' => 'մ', 'Յ' => 'յ', 'Ն' => 'ն', 'Շ' => 'շ',
  301. 'Ո' => 'ո', 'Չ' => 'չ', 'Պ' => 'պ', 'Ջ' => 'ջ', 'Ռ' => 'ռ', 'Ս' => 'ս',
  302. 'Վ' => 'վ', 'Տ' => 'տ', 'Ր' => 'ր', 'Ց' => 'ց', 'Ւ' => 'ւ', 'Փ' => 'փ',
  303. 'Ք' => 'ք', 'Օ' => 'օ', 'Ֆ' => 'ֆ', 'Ⴀ' => 'ა', 'Ⴁ' => 'ბ', 'Ⴂ' => 'გ',
  304. 'Ⴃ' => 'დ', 'Ⴄ' => 'ე', 'Ⴅ' => 'ვ', 'Ⴆ' => 'ზ', 'Ⴇ' => 'თ', 'Ⴈ' => 'ი',
  305. 'Ⴉ' => 'კ', 'Ⴊ' => 'ლ', 'Ⴋ' => 'მ', 'Ⴌ' => 'ნ', 'Ⴍ' => 'ო', 'Ⴎ' => 'პ',
  306. 'Ⴏ' => 'ჟ', 'Ⴐ' => 'რ', 'Ⴑ' => 'ს', 'Ⴒ' => 'ტ', 'Ⴓ' => 'უ', 'Ⴔ' => 'ფ',
  307. 'Ⴕ' => 'ქ', 'Ⴖ' => 'ღ', 'Ⴗ' => 'ყ', 'Ⴘ' => 'შ', 'Ⴙ' => 'ჩ', 'Ⴚ' => 'ც',
  308. 'Ⴛ' => 'ძ', 'Ⴜ' => 'წ', 'Ⴝ' => 'ჭ', 'Ⴞ' => 'ხ', 'Ⴟ' => 'ჯ', 'Ⴠ' => 'ჰ',
  309. 'Ⴡ' => 'ჱ', 'Ⴢ' => 'ჲ', 'Ⴣ' => 'ჳ', 'Ⴤ' => 'ჴ', 'Ⴥ' => 'ჵ', 'Ḁ' => 'ḁ',
  310. 'Ḃ' => 'ḃ', 'Ḅ' => 'ḅ', 'Ḇ' => 'ḇ', 'Ḉ' => 'ḉ', 'Ḋ' => 'ḋ', 'Ḍ' => 'ḍ',
  311. 'Ḏ' => 'ḏ', 'Ḑ' => 'ḑ', 'Ḓ' => 'ḓ', 'Ḕ' => 'ḕ', 'Ḗ' => 'ḗ', 'Ḙ' => 'ḙ',
  312. 'Ḛ' => 'ḛ', 'Ḝ' => 'ḝ', 'Ḟ' => 'ḟ', 'Ḡ' => 'ḡ', 'Ḣ' => 'ḣ', 'Ḥ' => 'ḥ',
  313. 'Ḧ' => 'ḧ', 'Ḩ' => 'ḩ', 'Ḫ' => 'ḫ', 'Ḭ' => 'ḭ', 'Ḯ' => 'ḯ', 'Ḱ' => 'ḱ',
  314. 'Ḳ' => 'ḳ', 'Ḵ' => 'ḵ', 'Ḷ' => 'ḷ', 'Ḹ' => 'ḹ', 'Ḻ' => 'ḻ', 'Ḽ' => 'ḽ',
  315. 'Ḿ' => 'ḿ', 'Ṁ' => 'ṁ', 'Ṃ' => 'ṃ', 'Ṅ' => 'ṅ', 'Ṇ' => 'ṇ', 'Ṉ' => 'ṉ',
  316. 'Ṋ' => 'ṋ', 'Ṍ' => 'ṍ', 'Ṏ' => 'ṏ', 'Ṑ' => 'ṑ', 'Ṓ' => 'ṓ', 'Ṕ' => 'ṕ',
  317. 'Ṗ' => 'ṗ', 'Ṙ' => 'ṙ', 'Ṛ' => 'ṛ', 'Ṝ' => 'ṝ', 'Ṟ' => 'ṟ', 'Ṡ' => 'ṡ',
  318. 'Ṣ' => 'ṣ', 'Ṥ' => 'ṥ', 'Ṧ' => 'ṧ', 'Ṩ' => 'ṩ', 'Ṫ' => 'ṫ', 'Ṭ' => 'ṭ',
  319. 'Ṯ' => 'ṯ', 'Ṱ' => 'ṱ', 'Ṳ' => 'ṳ', 'Ṵ' => 'ṵ', 'Ṷ' => 'ṷ', 'Ṹ' => 'ṹ',
  320. 'Ṻ' => 'ṻ', 'Ṽ' => 'ṽ', 'Ṿ' => 'ṿ', 'Ẁ' => 'ẁ', 'Ẃ' => 'ẃ', 'Ẅ' => 'ẅ',
  321. 'Ẇ' => 'ẇ', 'Ẉ' => 'ẉ', 'Ẋ' => 'ẋ', 'Ẍ' => 'ẍ', 'Ẏ' => 'ẏ', 'Ẑ' => 'ẑ',
  322. 'Ẓ' => 'ẓ', 'Ẕ' => 'ẕ', 'Ạ' => 'ạ', 'Ả' => 'ả', 'Ấ' => 'ấ', 'Ầ' => 'ầ',
  323. 'Ẩ' => 'ẩ', 'Ẫ' => 'ẫ', 'Ậ' => 'ậ', 'Ắ' => 'ắ', 'Ằ' => 'ằ', 'Ẳ' => 'ẳ',
  324. 'Ẵ' => 'ẵ', 'Ặ' => 'ặ', 'Ẹ' => 'ẹ', 'Ẻ' => 'ẻ', 'Ẽ' => 'ẽ', 'Ế' => 'ế',
  325. 'Ề' => 'ề', 'Ể' => 'ể', 'Ễ' => 'ễ', 'Ệ' => 'ệ', 'Ỉ' => 'ỉ', 'Ị' => 'ị',
  326. 'Ọ' => 'ọ', 'Ỏ' => 'ỏ', 'Ố' => 'ố', 'Ồ' => 'ồ', 'Ổ' => 'ổ', 'Ỗ' => 'ỗ',
  327. 'Ộ' => 'ộ', 'Ớ' => 'ớ', 'Ờ' => 'ờ', 'Ở' => 'ở', 'Ỡ' => 'ỡ', 'Ợ' => 'ợ',
  328. 'Ụ' => 'ụ', 'Ủ' => 'ủ', 'Ứ' => 'ứ', 'Ừ' => 'ừ', 'Ử' => 'ử', 'Ữ' => 'ữ',
  329. 'Ự' => 'ự', 'Ỳ' => 'ỳ', 'Ỵ' => 'ỵ', 'Ỷ' => 'ỷ', 'Ỹ' => 'ỹ', 'Ἀ' => 'ἀ',
  330. 'Ἁ' => 'ἁ', 'Ἂ' => 'ἂ', 'Ἃ' => 'ἃ', 'Ἄ' => 'ἄ', 'Ἅ' => 'ἅ', 'Ἆ' => 'ἆ',
  331. 'Ἇ' => 'ἇ', 'Ἐ' => 'ἐ', 'Ἑ' => 'ἑ', 'Ἒ' => 'ἒ', 'Ἓ' => 'ἓ', 'Ἔ' => 'ἔ',
  332. 'Ἕ' => 'ἕ', 'Ἠ' => 'ἠ', 'Ἡ' => 'ἡ', 'Ἢ' => 'ἢ', 'Ἣ' => 'ἣ', 'Ἤ' => 'ἤ',
  333. 'Ἥ' => 'ἥ', 'Ἦ' => 'ἦ', 'Ἧ' => 'ἧ', 'Ἰ' => 'ἰ', 'Ἱ' => 'ἱ', 'Ἲ' => 'ἲ',
  334. 'Ἳ' => 'ἳ', 'Ἴ' => 'ἴ', 'Ἵ' => 'ἵ', 'Ἶ' => 'ἶ', 'Ἷ' => 'ἷ', 'Ὀ' => 'ὀ',
  335. 'Ὁ' => 'ὁ', 'Ὂ' => 'ὂ', 'Ὃ' => 'ὃ', 'Ὄ' => 'ὄ', 'Ὅ' => 'ὅ', 'Ὑ' => 'ὑ',
  336. 'Ὓ' => 'ὓ', 'Ὕ' => 'ὕ', 'Ὗ' => 'ὗ', 'Ὠ' => 'ὠ', 'Ὡ' => 'ὡ', 'Ὢ' => 'ὢ',
  337. 'Ὣ' => 'ὣ', 'Ὤ' => 'ὤ', 'Ὥ' => 'ὥ', 'Ὦ' => 'ὦ', 'Ὧ' => 'ὧ', 'ᾈ' => 'ᾀ',
  338. 'ᾉ' => 'ᾁ', 'ᾊ' => 'ᾂ', 'ᾋ' => 'ᾃ', 'ᾌ' => 'ᾄ', 'ᾍ' => 'ᾅ', 'ᾎ' => 'ᾆ',
  339. 'ᾏ' => 'ᾇ', 'ᾘ' => 'ᾐ', 'ᾙ' => 'ᾑ', 'ᾚ' => 'ᾒ', 'ᾛ' => 'ᾓ', 'ᾜ' => 'ᾔ',
  340. 'ᾝ' => 'ᾕ', 'ᾞ' => 'ᾖ', 'ᾟ' => 'ᾗ', 'ᾨ' => 'ᾠ', 'ᾩ' => 'ᾡ', 'ᾪ' => 'ᾢ',
  341. 'ᾫ' => 'ᾣ', 'ᾬ' => 'ᾤ', 'ᾭ' => 'ᾥ', 'ᾮ' => 'ᾦ', 'ᾯ' => 'ᾧ', 'Ᾰ' => 'ᾰ',
  342. 'Ᾱ' => 'ᾱ', 'Ῐ' => 'ῐ', 'Ῑ' => 'ῑ', 'Ῠ' => 'ῠ', 'Ῡ' => 'ῡ', 'Ⓐ' => 'ⓐ',
  343. 'Ⓑ' => 'ⓑ', 'Ⓒ' => 'ⓒ', 'Ⓓ' => 'ⓓ', 'Ⓔ' => 'ⓔ', 'Ⓕ' => 'ⓕ', 'Ⓖ' => 'ⓖ',
  344. 'Ⓗ' => 'ⓗ', 'Ⓘ' => 'ⓘ', 'Ⓙ' => 'ⓙ', 'Ⓚ' => 'ⓚ', 'Ⓛ' => 'ⓛ', 'Ⓜ' => 'ⓜ',
  345. 'Ⓝ' => 'ⓝ', 'Ⓞ' => 'ⓞ', 'Ⓟ' => 'ⓟ', 'Ⓠ' => 'ⓠ', 'Ⓡ' => 'ⓡ', 'Ⓢ' => 'ⓢ',
  346. 'Ⓣ' => 'ⓣ', 'Ⓤ' => 'ⓤ', 'Ⓥ' => 'ⓥ', 'Ⓦ' => 'ⓦ', 'Ⓧ' => 'ⓧ', 'Ⓨ' => 'ⓨ',
  347. 'Ⓩ' => 'ⓩ', 'A' => 'a', 'B' => 'b', 'C' => 'c', 'D' => 'd', 'E' => 'e',
  348. 'F' => 'f', 'G' => 'g', 'H' => 'h', 'I' => 'i', 'J' => 'j', 'K' => 'k',
  349. 'L' => 'l', 'M' => 'm', 'N' => 'n', 'O' => 'o', 'P' => 'p', 'Q' => 'q',
  350. 'R' => 'r', 'S' => 's', 'T' => 't', 'U' => 'u', 'V' => 'v', 'W' => 'w',
  351. 'X' => 'x', 'Y' => 'y', 'Z' => 'z'
  352. );
  353. /**
  354. * A mapping of all ASCII-based latin characters, puntuation, symbols and number forms to ASCII.
  355. *
  356. * Includes elements form the following unicode blocks:
  357. *
  358. * - Latin-1 Supplement
  359. * - Latin Extended-A
  360. * - Latin Extended-B
  361. * - IPA Extensions
  362. * - Latin Extended Additional
  363. * - General Punctuation
  364. * - Letterlike symbols
  365. * - Number Forms
  366. *
  367. * @var array
  368. */
  369. static private $utf8_to_ascii = array(
  370. // Latin-1 Supplement
  371. '©' => '(c)', '«' => '<<', '®' => '(R)', '»' => '>>', '¼' => '1/4',
  372. '½' => '1/2', '¾' => '3/4', 'À' => 'A', 'Á' => 'A', 'Â' => 'A',
  373. 'Ã' => 'A', 'Ä' => 'A', 'Å' => 'A', 'Æ' => 'AE', 'Ç' => 'C',
  374. 'È' => 'E', 'É' => 'E', 'Ê' => 'E', 'Ë' => 'E', 'Ì' => 'I',
  375. 'Í' => 'I', 'Î' => 'I', 'Ï' => 'I', 'Ñ' => 'N', 'Ò' => 'O',
  376. 'Ó' => 'O', 'Ô' => 'O', 'Õ' => 'O', 'Ö' => 'O', 'Ø' => 'O',
  377. 'Ù' => 'U', 'Ú' => 'U', 'Û' => 'U', 'Ü' => 'U', 'Ý' => 'Y',
  378. 'à' => 'a', 'á' => 'a', 'â' => 'a', 'ã' => 'a', 'ä' => 'a',
  379. 'å' => 'a', 'æ' => 'ae', 'ç' => 'c', 'è' => 'e', 'é' => 'e',
  380. 'ê' => 'e', 'ë' => 'e', 'ì' => 'i', 'í' => 'i', 'î' => 'i',
  381. 'ï' => 'i', 'ñ' => 'n', 'ò' => 'o', 'ó' => 'o', 'ô' => 'o',
  382. 'õ' => 'o', 'ö' => 'o', 'ø' => 'o', 'ù' => 'u', 'ú' => 'u',
  383. 'û' => 'u', 'ü' => 'u', 'ý' => 'y', 'ÿ' => 'y',
  384. // Latin Extended-A
  385. 'Ā' => 'A', 'ā' => 'a', 'Ă' => 'A', 'ă' => 'a', 'Ą' => 'A',
  386. 'ą' => 'a', 'Ć' => 'C', 'ć' => 'c', 'Ĉ' => 'C', 'ĉ' => 'c',
  387. 'Ċ' => 'C', 'ċ' => 'c', 'Č' => 'C', 'č' => 'c', 'Ď' => 'D',
  388. 'ď' => 'd', 'Đ' => 'D', 'đ' => 'd', 'Ē' => 'E', 'ē' => 'e',
  389. 'Ĕ' => 'E', 'ĕ' => 'e', 'Ė' => 'E', 'ė' => 'e', 'Ę' => 'E',
  390. 'ę' => 'e', 'Ě' => 'E', 'ě' => 'e', 'Ĝ' => 'G', 'ĝ' => 'g',
  391. 'Ğ' => 'G', 'ğ' => 'g', 'Ġ' => 'G', 'ġ' => 'g', 'Ģ' => 'G',
  392. 'ģ' => 'g', 'Ĥ' => 'H', 'ĥ' => 'h', 'Ħ' => 'H', 'ħ' => 'h',
  393. 'Ĩ' => 'I', 'ĩ' => 'i', 'Ī' => 'I', 'ī' => 'i', 'Ĭ' => 'I',
  394. 'ĭ' => 'i', 'Į' => 'I', 'į' => 'i', 'İ' => 'I', 'ı' => 'i',
  395. 'IJ' => 'IJ', 'ij' => 'ij', 'Ĵ' => 'J', 'ĵ' => 'j', 'Ķ' => 'K',
  396. 'ķ' => 'k', 'Ĺ' => 'L', 'ĺ' => 'l', 'Ļ' => 'L', 'ļ' => 'l',
  397. 'Ľ' => 'L', 'ľ' => 'l', 'Ŀ' => 'L', 'ŀ' => 'l', 'Ł' => 'L',
  398. 'ł' => 'l', 'Ń' => 'N', 'ń' => 'n', 'Ņ' => 'N', 'ņ' => 'n',
  399. 'Ň' => 'N', 'ň' => 'n', 'ʼn' => "'n", 'Ŋ' => 'N', 'ŋ' => 'n',
  400. 'Ō' => 'O', 'ō' => 'o', 'Ŏ' => 'O', 'ŏ' => 'o', 'Ő' => 'O',
  401. 'ő' => 'o', 'Œ' => 'OE', 'œ' => 'oe', 'Ŕ' => 'R', 'ŕ' => 'r',
  402. 'Ŗ' => 'R', 'ŗ' => 'r', 'Ř' => 'R', 'ř' => 'r', 'Ś' => 'S',
  403. 'ś' => 's', 'Ŝ' => 'S', 'ŝ' => 's', 'Ş' => 'S', 'ş' => 's',
  404. 'Š' => 'S', 'š' => 's', 'Ţ' => 'T', 'ţ' => 't', 'Ť' => 'T',
  405. 'ť' => 't', 'Ŧ' => 'T', 'ŧ' => 't', 'Ũ' => 'U', 'ũ' => 'u',
  406. 'Ū' => 'U', 'ū' => 'u', 'Ŭ' => 'U', 'ŭ' => 'u', 'Ů' => 'U',
  407. 'ů' => 'u', 'Ű' => 'U', 'ű' => 'u', 'Ų' => 'U', 'ų' => 'u',
  408. 'Ŵ' => 'W', 'ŵ' => 'w', 'Ŷ' => 'Y', 'ŷ' => 'y', 'Ÿ' => 'Y',
  409. 'Ź' => 'Z', 'ź' => 'z', 'Ż' => 'Z', 'ż' => 'z', 'Ž' => 'Z',
  410. 'ž' => 'z',
  411. // Latin Extended-B
  412. 'ƀ' => 'b', 'Ɓ' => 'B', 'Ƃ' => 'B', 'ƃ' => 'b', 'Ɔ' => 'O',
  413. 'Ƈ' => 'C', 'ƈ' => 'c', 'Ɖ' => 'D', 'Ɗ' => 'D', 'Ƌ' => 'D',
  414. 'ƌ' => 'd', 'Ǝ' => 'E', 'Ɛ' => 'E', 'Ƒ' => 'F', 'ƒ' => 'f',
  415. 'Ɠ' => 'G', 'Ɨ' => 'I', 'Ƙ' => 'K', 'ƙ' => 'k', 'ƚ' => 'l',
  416. 'Ɯ' => 'M', 'Ɲ' => 'N', 'ƞ' => 'n', 'Ɵ' => 'O', 'Ơ' => 'O',
  417. 'ơ' => 'o', 'Ƣ' => 'OI', 'ƣ' => 'oi', 'Ƥ' => 'P', 'ƥ' => 'p',
  418. 'ƫ' => 't', 'Ƭ' => 'T', 'ƭ' => 't', 'Ʈ' => 'T', 'Ư' => 'U',
  419. 'ư' => 'u', 'Ʋ' => 'V', 'Ƴ' => 'Y', 'ƴ' => 'y', 'Ƶ' => 'Z',
  420. 'ƶ' => 'z', 'ƻ' => '2', 'DŽ' => 'DZ', 'Dž' => 'Dz', 'dž' => 'dz',
  421. 'LJ' => 'LJ', 'Lj' => 'Lj', 'lj' => 'lj', 'NJ' => 'Nj', 'Nj' => 'Nj',
  422. 'nj' => 'nj', 'Ǎ' => 'A', 'ǎ' => 'a', 'Ǐ' => 'I', 'ǐ' => 'i',
  423. 'Ǒ' => 'O', 'ǒ' => 'o', 'Ǔ' => 'U', 'ǔ' => 'u', 'Ǖ' => 'U',
  424. 'ǖ' => 'u', 'Ǘ' => 'U', 'ǘ' => 'u', 'Ǚ' => 'U', 'ǚ' => 'u',
  425. 'Ǜ' => 'U', 'ǜ' => 'u', 'ǝ' => 'e', 'Ǟ' => 'A', 'ǟ' => 'a',
  426. 'Ǡ' => 'A', 'ǡ' => 'a', 'Ǣ' => 'AE', 'ǣ' => 'ae', 'Ǥ' => 'G',
  427. 'ǥ' => 'g', 'Ǧ' => 'G', 'ǧ' => 'g', 'Ǩ' => 'K', 'ǩ' => 'k',
  428. 'Ǫ' => 'O', 'ǫ' => 'o', 'Ǭ' => 'O', 'ǭ' => 'o', 'ǰ' => 'j',
  429. 'DZ' => 'DZ', 'Dz' => 'Dz', 'dz' => 'dz', 'Ǵ' => 'G', 'ǵ' => 'g',
  430. 'Ǹ' => 'N', 'ǹ' => 'n', 'Ǻ' => 'A', 'ǻ' => 'a', 'Ǽ' => 'AE',
  431. 'ǽ' => 'ae', 'Ǿ' => 'O', 'ǿ' => 'o', 'Ȁ' => 'A', 'ȁ' => 'a',
  432. 'Ȃ' => 'A', 'ȃ' => 'a', 'Ȅ' => 'E', 'ȅ' => 'e', 'Ȇ' => 'E',
  433. 'ȇ' => 'e', 'Ȉ' => 'I', 'ȉ' => 'i', 'Ȋ' => 'I', 'ȋ' => 'i',
  434. 'Ȍ' => 'O', 'ȍ' => 'o', 'Ȏ' => 'O', 'ȏ' => 'o', 'Ȑ' => 'R',
  435. 'ȑ' => 'r', 'Ȓ' => 'R', 'ȓ' => 'r', 'Ȕ' => 'U', 'ȕ' => 'u',
  436. 'Ȗ' => 'U', 'ȗ' => 'u', 'Ș' => 'S', 'ș' => 's', 'Ț' => 'T',
  437. 'ț' => 't', 'Ȟ' => 'H', 'ȟ' => 'h', 'Ƞ' => 'N', 'ȡ' => 'd',
  438. 'Ȥ' => 'Z', 'ȥ' => 'z', 'Ȧ' => 'A', 'ȧ' => 'a', 'Ȩ' => 'E',
  439. 'ȩ' => 'e', 'Ȫ' => 'O', 'ȫ' => 'o', 'Ȭ' => 'O', 'ȭ' => 'o',
  440. 'Ȯ' => 'O', 'ȯ' => 'o', 'Ȱ' => 'O', 'ȱ' => 'o', 'Ȳ' => 'Y',
  441. 'ȳ' => 'y', 'ȴ' => 'l', 'ȵ' => 'n', 'ȶ' => 't', 'ȷ' => 'j',
  442. 'ȸ' => 'db', 'ȹ' => 'qp', 'Ⱥ' => 'A', 'Ȼ' => 'C', 'ȼ' => 'c',
  443. 'Ƚ' => 'L', 'Ⱦ' => 'T', 'ȿ' => 's', 'ɀ' => 'z', 'Ƀ' => 'B',
  444. 'Ʉ' => 'U', 'Ʌ' => 'V', 'Ɇ' => 'E', 'ɇ' => 'e', 'Ɉ' => 'J',
  445. 'ɉ' => 'j', 'Ɋ' => 'Q', 'ɋ' => 'q', 'Ɍ' => 'R', 'ɍ' => 'r',
  446. 'Ɏ' => 'Y', 'ɏ' => 'y',
  447. // IPA Extensions
  448. 'ɐ' => 'a', 'ɓ' => 'b', 'ɔ' => 'o', 'ɕ' => 'c', 'ɖ' => 'd',
  449. 'ɗ' => 'd', 'ɘ' => 'e', 'ɛ' => 'e', 'ɜ' => 'e', 'ɝ' => 'e',
  450. 'ɞ' => 'e', 'ɟ' => 'j', 'ɠ' => 'g', 'ɡ' => 'g', 'ɢ' => 'G',
  451. 'ɥ' => 'h', 'ɦ' => 'h', 'ɨ' => 'i', 'ɪ' => 'I', 'ɫ' => 'l',
  452. 'ɬ' => 'l', 'ɭ' => 'l', 'ɯ' => 'm', 'ɰ' => 'm', 'ɱ' => 'm',
  453. 'ɲ' => 'n', 'ɳ' => 'n', 'ɴ' => 'N', 'ɵ' => 'o', 'ɶ' => 'OE',
  454. 'ɹ' => 'r', 'ɺ' => 'r', 'ɻ' => 'r', 'ɼ' => 'r', 'ɽ' => 'r',
  455. 'ɾ' => 'r', 'ɿ' => 'r', 'ʀ' => 'R', 'ʁ' => 'R', 'ʂ' => 's',
  456. 'ʇ' => 't', 'ʈ' => 't', 'ʉ' => 'u', 'ʋ' => 'v', 'ʌ' => 'v',
  457. 'ʍ' => 'w', 'ʎ' => 'y', 'ʏ' => 'Y', 'ʐ' => 'z', 'ʑ' => 'z',
  458. 'ʗ' => 'C', 'ʙ' => 'B', 'ʚ' => 'e', 'ʛ' => 'G', 'ʜ' => 'H',
  459. 'ʝ' => 'j', 'ʞ' => 'k', 'ʟ' => 'L', 'ʠ' => 'q', 'ʣ' => 'dz',
  460. 'ʥ' => 'dz', 'ʦ' => 'ts', 'ʨ' => 'tc', 'ʪ' => 'ls', 'ʫ' => 'lz',
  461. 'ʮ' => 'h', 'ʯ' => 'h',
  462. // Latin Extended Additional
  463. 'Ḁ' => 'A', 'ḁ' => 'a', 'Ḃ' => 'B', 'ḃ' => 'b', 'Ḅ' => 'B',
  464. 'ḅ' => 'b', 'Ḇ' => 'B', 'ḇ' => 'b', 'Ḉ' => 'C', 'ḉ' => 'c',
  465. 'Ḋ' => 'D', 'ḋ' => 'd', 'Ḍ' => 'D', 'ḍ' => 'd', 'Ḏ' => 'D',
  466. 'ḏ' => 'd', 'Ḑ' => 'D', 'ḑ' => 'd', 'Ḓ' => 'D', 'ḓ' => 'd',
  467. 'Ḕ' => 'E', 'ḕ' => 'e', 'Ḗ' => 'E', 'ḗ' => 'e', 'Ḙ' => 'E',
  468. 'ḙ' => 'e', 'Ḛ' => 'E', 'ḛ' => 'e', 'Ḝ' => 'E', 'ḝ' => 'e',
  469. 'Ḟ' => 'F', 'ḟ' => 'f', 'Ḡ' => 'G', 'ḡ' => 'g', 'Ḣ' => 'H',
  470. 'ḣ' => 'h', 'Ḥ' => 'H', 'ḥ' => 'h', 'Ḧ' => 'H', 'ḧ' => 'h',
  471. 'Ḩ' => 'H', 'ḩ' => 'h', 'Ḫ' => 'H', 'ḫ' => 'h', 'Ḭ' => 'I',
  472. 'ḭ' => 'i', 'Ḯ' => 'I', 'ḯ' => 'i', 'Ḱ' => 'K', 'ḱ' => 'k',
  473. 'Ḳ' => 'K', 'ḳ' => 'k', 'Ḵ' => 'K', 'ḵ' => 'k', 'Ḷ' => 'L',
  474. 'ḷ' => 'l', 'Ḹ' => 'L', 'ḹ' => 'l', 'Ḻ' => 'L', 'ḻ' => 'l',
  475. 'Ḽ' => 'L', 'ḽ' => 'l', 'Ḿ' => 'M', 'ḿ' => 'm', 'Ṁ' => 'M',
  476. 'ṁ' => 'm', 'Ṃ' => 'M', 'ṃ' => 'm', 'Ṅ' => 'N', 'ṅ' => 'n',
  477. 'Ṇ' => 'N', 'ṇ' => 'n', 'Ṉ' => 'N', 'ṉ' => 'n', 'Ṋ' => 'N',
  478. 'ṋ' => 'n', 'Ṍ' => 'O', 'ṍ' => 'o', 'Ṏ' => 'O', 'ṏ' => 'o',
  479. 'Ṑ' => 'O', 'ṑ' => 'o', 'Ṓ' => 'O', 'ṓ' => 'o', 'Ṕ' => 'P',
  480. 'ṕ' => 'p', 'Ṗ' => 'P', 'ṗ' => 'p', 'Ṙ' => 'R', 'ṙ' => 'r',
  481. 'Ṛ' => 'R', 'ṛ' => 'r', 'Ṝ' => 'R', 'ṝ' => 'r', 'Ṟ' => 'R',
  482. 'ṟ' => 'r', 'Ṡ' => 'S', 'ṡ' => 's', 'Ṣ' => 'S', 'ṣ' => 's',
  483. 'Ṥ' => 'S', 'ṥ' => 's', 'Ṧ' => 'S', 'ṧ' => 's', 'Ṩ' => 'S',
  484. 'ṩ' => 's', 'Ṫ' => 'T', 'ṫ' => 't', 'Ṭ' => 'T', 'ṭ' => 't',
  485. 'Ṯ' => 'T', 'ṯ' => 't', 'Ṱ' => 'T', 'ṱ' => 't', 'Ṳ' => 'U',
  486. 'ṳ' => 'u', 'Ṵ' => 'U', 'ṵ' => 'u', 'Ṷ' => 'U', 'ṷ' => 'u',
  487. 'Ṹ' => 'U', 'ṹ' => 'u', 'Ṻ' => 'U', 'ṻ' => 'u', 'Ṽ' => 'V',
  488. 'ṽ' => 'v', 'Ṿ' => 'V', 'ṿ' => 'v', 'Ẁ' => 'W', 'ẁ' => 'w',
  489. 'Ẃ' => 'W', 'ẃ' => 'w', 'Ẅ' => 'W', 'ẅ' => 'w', 'Ẇ' => 'W',
  490. 'ẇ' => 'w', 'Ẉ' => 'W', 'ẉ' => 'w', 'Ẋ' => 'X', 'ẋ' => 'x',
  491. 'Ẍ' => 'X', 'ẍ' => 'x', 'Ẏ' => 'Y', 'ẏ' => 'y', 'Ẑ' => 'Z',
  492. 'ẑ' => 'z', 'Ẓ' => 'Z', 'ẓ' => 'z', 'Ẕ' => 'Z', 'ẕ' => 'z',
  493. 'ẖ' => 'h', 'ẗ' => 't', 'ẘ' => 'w', 'ẙ' => 'y', 'ẚ' => 'a',
  494. 'Ạ' => 'A', 'ạ' => 'a', 'Ả' => 'A', 'ả' => 'a', 'Ấ' => 'A',
  495. 'ấ' => 'a', 'Ầ' => 'A', 'ầ' => 'a', 'Ẩ' => 'A', 'ẩ' => 'a',
  496. 'Ẫ' => 'A', 'ẫ' => 'a', 'Ậ' => 'A', 'ậ' => 'a', 'Ắ' => 'A',
  497. 'ắ' => 'a', 'Ằ' => 'A', 'ằ' => 'a', 'Ẳ' => 'A', 'ẳ' => 'a',
  498. 'Ẵ' => 'A', 'ẵ' => 'a', 'Ặ' => 'A', 'ặ' => 'a', 'Ẹ' => 'E',
  499. 'ẹ' => 'e', 'Ẻ' => 'E', 'ẻ' => 'e', 'Ẽ' => 'E', 'ẽ' => 'e',
  500. 'Ế' => 'E', 'ế' => 'e', 'Ề' => 'E', 'ề' => 'e', 'Ể' => 'E',
  501. 'ể' => 'e', 'Ễ' => 'E', 'ễ' => 'e', 'Ệ' => 'E', 'ệ' => 'e',
  502. 'Ỉ' => 'I', 'ỉ' => 'i', 'Ị' => 'I', 'ị' => 'i', 'Ọ' => 'O',
  503. 'ọ' => 'o', 'Ỏ' => 'O', 'ỏ' => 'o', 'Ố' => 'O', 'ố' => 'o',
  504. 'Ồ' => 'O', 'ồ' => 'o', 'Ổ' => 'O', 'ổ' => 'o', 'Ỗ' => 'O',
  505. 'ỗ' => 'o', 'Ộ' => 'O', 'ộ' => 'o', 'Ớ' => 'O', 'ớ' => 'o',
  506. 'Ờ' => 'O', 'ờ' => 'o', 'Ở' => 'O', 'ở' => 'o', 'Ỡ' => 'O',
  507. 'ỡ' => 'o', 'Ợ' => 'O', 'ợ' => 'o', 'Ụ' => 'U', 'ụ' => 'u',
  508. 'Ủ' => 'U', 'ủ' => 'u', 'Ứ' => 'U', 'ứ' => 'u', 'Ừ' => 'U',
  509. 'ừ' => 'u', 'Ử' => 'U', 'ử' => 'u', 'Ữ' => 'U', 'ữ' => 'u',
  510. 'Ự' => 'U', 'ự' => 'u', 'Ỳ' => 'Y', 'ỳ' => 'y', 'Ỵ' => 'Y',
  511. 'ỵ' => 'y', 'Ỷ' => 'Y', 'ỷ' => 'y', 'Ỹ' => 'Y', 'ỹ' => 'y',
  512. // General Punctuation
  513. ' ' => ' ', ' ' => ' ', ' ' => ' ', ' ' => ' ', ' ' => ' ',
  514. ' ' => ' ', ' ' => ' ', ' ' => ' ', ' ' => ' ', ' ' => ' ',
  515. ' ' => ' ', '​' => '', '‌' => '', '‍' => '', '‐' => '-',
  516. '‑' => '-', '‒' => '-', '–' => '-', '—' => '-', '―' => '-',
  517. '‖' => '||', '‘' => "'", '’' => "'", '‚' => ',', '‛' => "'",
  518. '“' => '"', '”' => '"', '‟' => '"', '․' => '.', '‥' => '..',
  519. '…' => '...', ' ' => ' ', '′' => "'", '″' => '"', '‴' => '\'"',
  520. '‵' => "'", '‶' => '"', '‷' => '"\'', '‹' => '<', '›' => '>',
  521. '‼' => '!!', '‽' => '?!', '⁄' => '/', '⁇' => '?/', '⁈' => '?!',
  522. '⁉' => '!?',
  523. // Letterlike Symbols
  524. '℠' => 'SM', '™' => 'TM',
  525. // Number Forms
  526. '⅓' => '1/3', '⅔' => '2/3', '⅕' => '1/5', '⅖' => '2/5', '⅗' => '3/5',
  527. '⅘' => '4/5', '⅙' => '1/6', '⅚' => '5/6', '⅛' => '1/8', '⅜' => '3/8',
  528. '⅝' => '5/8', '⅞' => '7/8', 'Ⅰ' => 'I', 'Ⅱ' => 'II', 'Ⅲ' => 'III',
  529. 'Ⅳ' => 'IV', 'Ⅴ' => 'V', 'Ⅵ' => 'Vi', 'Ⅶ' => 'VII', 'Ⅷ' => 'VIII',
  530. 'Ⅸ' => 'IX', 'Ⅹ' => 'X', 'Ⅺ' => 'XI', 'Ⅻ' => 'XII', 'Ⅼ' => 'L',
  531. 'Ⅽ' => 'C', 'Ⅾ' => 'D', 'Ⅿ' => 'M', 'ⅰ' => 'i', 'ⅱ' => 'ii',
  532. 'ⅲ' => 'iii', 'ⅳ' => 'iv', 'ⅴ' => 'v', 'ⅵ' => 'vi', 'ⅶ' => 'vii',
  533. 'ⅷ' => 'viii','ⅸ' => 'ix', 'ⅹ' => 'x', 'ⅺ' => 'xi', 'ⅻ' => 'xii',
  534. 'ⅼ' => 'l', 'ⅽ' => 'c', 'ⅾ' => 'd', 'ⅿ' => 'm'
  535. );
  536. /**
  537. * If the [http://php.net/mbstring mbstring] extension is available
  538. *
  539. * @var boolean
  540. */
  541. static private $mbstring_available = NULL;
  542. /**
  543. * Maps UTF-8 ASCII-based latin characters, puntuation, symbols and number forms to ASCII
  544. *
  545. * Any characters or symbols that can not be translated will be removed.
  546. *
  547. * This function is most useful for situation that only allows ASCII, such
  548. * as in URLs.
  549. *
  550. * Translates elements form the following unicode blocks:
  551. *
  552. * - Latin-1 Supplement
  553. * - Latin Extended-A
  554. * - Latin Extended-B
  555. * - IPA Extensions
  556. * - Latin Extended Additional
  557. * - General Punctuation
  558. * - Letterlike symbols
  559. * - Number Forms
  560. *
  561. * @internal
  562. *
  563. * @param string $string The string to convert
  564. * @return string The input string in pure ASCII
  565. */
  566. static public function ascii($string)
  567. {
  568. if (!self::detect($string)) {
  569. return $string;
  570. }
  571. $string = strtr($string, self::$utf8_to_ascii);
  572. return preg_replace('#[^\x00-\x7F]#', '', $string);
  573. }
  574. /**
  575. * Checks to see if the [http://php.net/mbstring mbstring] extension is available
  576. *
  577. * @return void
  578. */
  579. static private function checkMbString()
  580. {
  581. self::$mbstring_available = extension_loaded('mbstring');
  582. }
  583. /**
  584. * Converts a unicode value into a UTF-8 character
  585. *
  586. * @param mixed $unicode_code_point The character to create, either the `U+hex` or decimal code point
  587. * @return string The UTF-8 character
  588. */
  589. static public function chr($unicode_code_point)
  590. {
  591. if (is_string($unicode_code_point) && substr($unicode_code_point, 0, 2) == 'U+') {
  592. $unicode_code_point = substr($unicode_code_point, 2);
  593. $unicode_code_point = hexdec($unicode_code_point);
  594. }
  595. $bin = decbin($unicode_code_point);
  596. $digits = strlen($bin);
  597. $first = $second = $third = $fourth = NULL;
  598. // One byte characters
  599. if ($digits <= 7) {
  600. $first = chr(bindec($bin));
  601. // Two byte characters
  602. } elseif ($digits <= 11) {
  603. $first = chr(bindec('110' . str_pad(substr($bin, 0, -6), 5, '0', STR_PAD_LEFT)));
  604. $second = chr(bindec('10' . substr($bin, -6)));
  605. // Three byte characters
  606. } elseif ($digits <= 16) {
  607. $first = chr(bindec('1110' . str_pad(substr($bin, 0, -12), 4, '0', STR_PAD_LEFT)));
  608. $second = chr(bindec('10' . substr($bin, -12, -6)));
  609. $third = chr(bindec('10' . substr($bin, -6)));
  610. // Four byte characters
  611. } elseif ($digits <= 21) {
  612. $first = chr(bindec('11110' . str_pad(substr($bin, 0, -18), 3, '0', STR_PAD_LEFT)));
  613. $second = chr(bindec('10' . substr($bin, -18, -12)));
  614. $third = chr(bindec('10' . substr($bin, -12, -6)));
  615. $fourth = chr(bindec('10' . substr($bin, -6)));
  616. }
  617. $ord = ord($first);
  618. if ($digits > 21 || $ord == 0xC0 || $ord == 0xC1 || $ord > 0xF4) {
  619. throw new fProgrammerException(
  620. 'The code point specified, %s, is invalid.',
  621. $unicode_code_point
  622. );
  623. }
  624. return $first . $second . $third . $fourth;
  625. }
  626. /**
  627. * Removes any invalid UTF-8 characters from a string or array of strings
  628. *
  629. * @param array|string $value The string or array of strings to clean
  630. * @return string The cleaned string
  631. */
  632. static public function clean($value)
  633. {
  634. if (!is_array($value)) {
  635. if (self::$can_ignore_invalid === NULL) {
  636. self::$can_ignore_invalid = !in_array(strtolower(ICONV_IMPL), array('unknown', 'ibm iconv'));
  637. }
  638. fCore::startErrorCapture(E_NOTICE);
  639. $value = self::iconv('UTF-8', 'UTF-8' . (self::$can_ignore_invalid ? '//IGNORE' : ''), (string) $value);
  640. fCore::stopErrorCapture();
  641. return $value;
  642. }
  643. $keys = array_keys($value);
  644. $num_keys = sizeof($keys);
  645. for ($i=0; $i<$num_keys; $i++) {
  646. $value[$keys[$i]] = self::clean($value[$keys[$i]]);
  647. }
  648. return $value;
  649. }
  650. /**
  651. * Compares strings, with the resulting order having latin characters that are based on ASCII letters placed after the relative ASCII characters
  652. *
  653. * Please note that this function sorts based on English language sorting
  654. * rules only. Locale-sepcific sorting is done by
  655. * [http://php.net/strcoll strcoll()], however there are technical
  656. * limitations.
  657. *
  658. * @param string $str1 The first string to compare
  659. * @param string $str2 The second string to compare
  660. * @return integer < 0 if $str1 < $str2, 0 if they are equal, > 0 if $str1 > $str2
  661. */
  662. static public function cmp($str1, $str2)
  663. {
  664. $ascii_str1 = strtr($str1, self::$utf8_to_ascii);
  665. $ascii_str2 = strtr($str2, self::$utf8_to_ascii);
  666. $res = strcmp($ascii_str1, $ascii_str2);
  667. // If the ASCII representations are the same, sort by the UTF-8 representations
  668. if ($res === 0) {
  669. $res = strcmp($str1, $str2);
  670. }
  671. return $res;
  672. }
  673. /**
  674. * Converts an offset in characters to an offset in bytes to that we can use the built-in functions for some operations
  675. *
  676. * @param string $string The string to base the offset on
  677. * @param integer $offset The character offset to conver to bytes
  678. * @return integer The converted offset
  679. */
  680. static private function convertOffsetToBytes($string, $offset)
  681. {
  682. if ($offset == 0) {
  683. return 0;
  684. }
  685. $len = strlen($string);
  686. $byte_offset = 0;
  687. $measured_offset = 0;
  688. $sign = 1;
  689. // Negative offsets require us to reverse some stuff
  690. if ($offset < 0) {
  691. $string = strrev($string);
  692. $sign = -1;
  693. $offset = abs($offset);
  694. }
  695. for ($i=0; $i<$len && $measured_offset<$offset; $i++) {
  696. $char = $string[$i];
  697. ++$byte_offset;
  698. if (ord($char) < 0x80) {
  699. ++$measured_offset;
  700. } else {
  701. switch (ord($char) & 0xF0) {
  702. case 0xF0:
  703. case 0xE0:
  704. case 0xD0:
  705. case 0xC0:
  706. ++$measured_offset;
  707. break;
  708. }
  709. }
  710. }
  711. return $byte_offset * $sign;
  712. }
  713. /**
  714. * Detects if a UTF-8 string contains any non-ASCII characters
  715. *
  716. * @param string $string The string to check
  717. * @return boolean If the string contains any non-ASCII characters
  718. */
  719. static private function detect($string)
  720. {
  721. return (boolean) preg_match('#[^\x00-\x7F]#', $string);
  722. }
  723. /**
  724. * Explodes a string on a delimiter
  725. *
  726. * If no delimiter is provided, the string will be exploded with each
  727. * characters being an element in the array.
  728. *
  729. * @param string $string The string to explode
  730. * @param string $delimiter The string to explode on. If `NULL` or `''` this method will return one character per array index.
  731. * @return array The exploded string
  732. */
  733. static public function explode($string, $delimiter=NULL)
  734. {
  735. // If a delimiter was passed, we just do an explode
  736. if ($delimiter || (!$delimiter && is_numeric($delimiter))) {
  737. return explode($delimiter, $string);
  738. }
  739. // If no delimiter was passed, we explode the characters into an array
  740. preg_match_all('#.|^\z#us', $string, $matches);
  741. return $matches[0];
  742. }
  743. /**
  744. * This works around a bug in MAMP 1.9.4+ and PHP 5.3 where iconv()
  745. * does not seem to properly assign the return value to a variable, but
  746. * does work when returning the value.
  747. *
  748. * @param string $in_charset The incoming character encoding
  749. * @param string $out_charset The outgoing character encoding
  750. * @param string $string The string to convert
  751. * @return string The converted string
  752. */
  753. static private function iconv($in_charset, $out_charset, $string)
  754. {
  755. return iconv($in_charset, $out_charset, $string);
  756. }
  757. /**
  758. * Compares strings in a case-insensitive manner, with the resulting order having characters that are based on ASCII letters placed after the relative ASCII characters
  759. *
  760. * Please note that this function sorts based on English language sorting
  761. * rules only. Locale-sepcific sorting is done by
  762. * [http://php.net/strcoll strcoll()], however there are technical
  763. * limitations.
  764. *
  765. * @param string $str1 The first string to compare
  766. * @param string $str2 The second string to compare
  767. * @return integer < 0 if $str1 < $str2, 0 if they are equal, > 0 if $str1 > $str2
  768. */
  769. static public function icmp($str1, $str2)
  770. {
  771. $str1 = self::lower($str1);
  772. $str2 = self::lower($str2);
  773. return self::cmp($str1, $str2);
  774. }
  775. /**
  776. * Compares strings using a natural order algorithm in a case-insensitive manner, with the resulting order having latin characters that are based on ASCII letters placed after the relative ASCII characters
  777. *
  778. * Please note that this function sorts based on English language sorting
  779. * rules only. Locale-sepcific sorting is done by
  780. * [http://php.net/strcoll strcoll()], however there are technical
  781. * limitations.
  782. *
  783. * @param string $str1 The first string to compare
  784. * @param string $str2 The second string to compare
  785. * @return integer `< 0` if `$str1 < $str2`, `0` if they are equal, `> 0` if `$str1 > $str2`
  786. */
  787. static public function inatcmp($str1, $str2)
  788. {
  789. $str1 = self::lower($str1);
  790. $str2 = self::lower($str2);
  791. return self::natcmp($str1, $str2);
  792. }
  793. /**
  794. * Finds the first position (in characters) of the search value in the string - case is ignored when doing performing a match
  795. *
  796. * @param string $haystack The string to search in
  797. * @param string $needle The string to search for. This match will be done in a case-insensitive manner.
  798. * @param integer $offset The character position to start searching from
  799. * @return mixed The integer character position of the first occurence of the needle or `FALSE` if no match
  800. */
  801. static public function ipos($haystack, $needle, $offset=0)
  802. {
  803. // We get better performance falling back for ASCII strings
  804. if (!self::detect($haystack)) {
  805. return stripos($haystack, $needle, $offset);
  806. }
  807. if (self::$mbstring_available === NULL) {
  808. self::checkMbString();
  809. }
  810. if (self::$mbstring_available && function_exists('mb_stripos')) {
  811. return mb_stripos($haystack, $needle, $offset, 'UTF-8');
  812. }
  813. $haystack = self::lower($haystack);
  814. $needle = self::lower($needle);
  815. return self::pos($haystack, $needle, $offset);
  816. }
  817. /**
  818. * Replaces matching parts of the string, with matches being done in a a case-insensitive manner
  819. *
  820. * If `$search` and `$replace` are both arrays and `$replace` is shorter,
  821. * the extra `$search` string will be replaced with an empty string. If
  822. * `$search` is an array and `$replace` is a string, all `$search` values
  823. * will be replaced with the string specified.
  824. *
  825. * @param string $string The string to perform the replacements on
  826. * @param mixed $search The string (or array of strings) to search for - see method description for details
  827. * @param …

Large files files are truncated, but you can click here to view the full file