PageRenderTime 66ms CodeModel.GetById 23ms RepoModel.GetById 0ms app.codeStats 1ms

/classes/fUTF8.php

https://bitbucket.org/wbond/flourish/
PHP | 1628 lines | 1010 code | 187 blank | 431 comment | 110 complexity | c9f6b48fb2a19f8390197d74b4538bcb MD5 | raw file

Large files files are truncated, but you can click here to view the full file

  1. <?php
  2. /**
  3. * Provides string functions for UTF-8 strings
  4. *
  5. * This class is implemented to provide a UTF-8 version of almost every built-in
  6. * PHP string function. For more information about UTF-8, please visit
  7. * http://flourishlib.com/docs/UTF-8.
  8. *
  9. * @copyright Copyright (c) 2008-2011 Will Bond
  10. * @author Will Bond [wb] <will@flourishlib.com>
  11. * @license http://flourishlib.com/license
  12. *
  13. * @package Flourish
  14. * @link http://flourishlib.com/fUTF8
  15. *
  16. * @version 1.0.0b15
  17. * @changes 1.0.0b15 Fixed a bug with using IBM's iconv implementation on AIX [wb, 2011-07-29]
  18. * @changes 1.0.0b14 Added a workaround for iconv having issues in MAMP 1.9.4+ [wb, 2011-07-26]
  19. * @changes 1.0.0b13 Fixed notices from being thrown when invalid data is sent to ::clean() [wb, 2011-06-10]
  20. * @changes 1.0.0b12 Fixed a variable name typo in ::sub() [wb, 2011-05-09]
  21. * @changes 1.0.0b11 Updated the class to not using phpinfo() to determine the iconv implementation [wb, 2010-11-04]
  22. * @changes 1.0.0b10 Fixed a bug with capitalizing a lowercase i resulting in a dotted upper-case I [wb, 2010-11-01]
  23. * @changes 1.0.0b9 Updated class to use fCore::startErrorCapture() instead of `error_reporting()` [wb, 2010-08-09]
  24. * @changes 1.0.0b8 Removed `e` flag from preg_replace() calls [wb, 2010-06-08]
  25. * @changes 1.0.0b7 Added the methods ::trim(), ::rtrim() and ::ltrim() [wb, 2010-05-11]
  26. * @changes 1.0.0b6 Fixed ::clean() to work with PHP installs that use an iconv library that doesn't support //IGNORE [wb, 2010-03-02]
  27. * @changes 1.0.0b5 Changed ::ucwords() to also uppercase words right after various punctuation [wb, 2009-09-18]
  28. * @changes 1.0.0b4 Changed replacement values in preg_replace() calls to be properly escaped [wb, 2009-06-11]
  29. * @changes 1.0.0b3 Fixed a parameter name in ::rpos() from `$search` to `$needle` [wb, 2009-02-06]
  30. * @changes 1.0.0b2 Fixed a bug in ::explode() with newlines and zero-length delimiters [wb, 2009-02-05]
  31. * @changes 1.0.0b The initial implementation [wb, 2008-06-01]
  32. */
  33. class fUTF8
  34. {
  35. // The following constants allow for nice looking callbacks to static methods
  36. const ascii = 'fUTF8::ascii';
  37. const chr = 'fUTF8::chr';
  38. const clean = 'fUTF8::clean';
  39. const cmp = 'fUTF8::cmp';
  40. const explode = 'fUTF8::explode';
  41. const icmp = 'fUTF8::icmp';
  42. const inatcmp = 'fUTF8::inatcmp';
  43. const ipos = 'fUTF8::ipos';
  44. const ireplace = 'fUTF8::ireplace';
  45. const irpos = 'fUTF8::irpos';
  46. const istr = 'fUTF8::istr';
  47. const len = 'fUTF8::len';
  48. const lower = 'fUTF8::lower';
  49. const ltrim = 'fUTF8::ltrim';
  50. const natcmp = 'fUTF8::natcmp';
  51. const ord = 'fUTF8::ord';
  52. const pad = 'fUTF8::pad';
  53. const pos = 'fUTF8::pos';
  54. const replace = 'fUTF8::replace';
  55. const reset = 'fUTF8::reset';
  56. const rev = 'fUTF8::rev';
  57. const rpos = 'fUTF8::rpos';
  58. const rtrim = 'fUTF8::rtrim';
  59. const str = 'fUTF8::str';
  60. const sub = 'fUTF8::sub';
  61. const trim = 'fUTF8::trim';
  62. const ucfirst = 'fUTF8::ucfirst';
  63. const ucwords = 'fUTF8::ucwords';
  64. const upper = 'fUTF8::upper';
  65. const wordwrap = 'fUTF8::wordwrap';
  66. /**
  67. * Depending how things are compiled, NetBSD and Solaris don't support //IGNORE in iconv()
  68. *
  69. * If //IGNORE support is not provided strings with invalid characters will be truncated
  70. *
  71. * @var boolean
  72. */
  73. static private $can_ignore_invalid = NULL;
  74. /**
  75. * All lowercase UTF-8 characters mapped to uppercase characters
  76. *
  77. * @var array
  78. */
  79. static private $lower_to_upper = array(
  80. 'a' => 'A', 'b' => 'B', 'c' => 'C', 'd' => 'D', 'e' => 'E', 'f' => 'F',
  81. 'g' => 'G', 'h' => 'H', 'i' => 'I', 'j' => 'J', 'k' => 'K', 'l' => 'L',
  82. 'm' => 'M', 'n' => 'N', 'o' => 'O', 'p' => 'P', 'q' => 'Q', 'r' => 'R',
  83. 's' => 'S', 't' => 'T', 'u' => 'U', 'v' => 'V', 'w' => 'W', 'x' => 'X',
  84. 'y' => 'Y', 'z' => 'Z', 'ŕ' => 'Ŕ', 'á' => 'Á', 'â' => 'Â', 'ă' => 'Ă',
  85. 'ä' => 'Ä', 'ĺ' => 'Ĺ', 'ć' => 'Ć', 'ç' => 'Ç', 'č' => 'Č', 'é' => 'É',
  86. 'ę' => 'Ę', 'ë' => 'Ë', 'ě' => 'Ě', 'í' => 'Í', 'î' => 'Î', 'ď' => 'Ď',
  87. 'đ' => 'Đ', 'ń' => 'Ń', 'ň' => 'Ň', 'ó' => 'Ó', 'ô' => 'Ô', 'ő' => 'Ő',
  88. 'ö' => 'Ö', 'ř' => 'Ř', 'ů' => 'Ů', 'ú' => 'Ú', 'ű' => 'Ű', 'ü' => 'Ü',
  89. 'ý' => 'Ý', 'ţ' => 'Ţ', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
  90. '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
  91. '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
  92. '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
  93. '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
  94. '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
  95. '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', 'œ' => 'Œ',
  96. '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
  97. 'š' => 'Š', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
  98. '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
  99. '˙' => 'Ÿ', '?' => '?', '?' => '?', 'ž' => 'Ž', '?' => '?', '?' => '?',
  100. '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
  101. '?' => '?', '?' => '?', 'ƒ' => '?', '?' => '?', '?' => '?', '?' => '?',
  102. '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
  103. '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
  104. '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
  105. '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
  106. '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
  107. '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
  108. '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
  109. '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
  110. '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
  111. '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
  112. '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
  113. '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
  114. '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
  115. '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
  116. '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
  117. '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
  118. '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
  119. '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
  120. '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
  121. '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
  122. '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
  123. '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
  124. '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
  125. '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
  126. '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
  127. '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
  128. '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
  129. '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
  130. '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
  131. '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
  132. '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
  133. '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
  134. '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
  135. '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
  136. '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
  137. '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
  138. '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
  139. '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
  140. '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
  141. '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
  142. '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
  143. '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
  144. '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
  145. '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
  146. '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
  147. '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
  148. '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
  149. '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
  150. '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
  151. '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
  152. '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
  153. '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
  154. '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
  155. '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
  156. '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
  157. '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
  158. '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
  159. '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
  160. '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
  161. '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
  162. '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
  163. '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
  164. '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
  165. '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
  166. '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
  167. '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
  168. '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
  169. '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
  170. '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
  171. '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
  172. '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
  173. '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
  174. '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
  175. '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
  176. '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
  177. '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
  178. '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
  179. '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
  180. '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
  181. '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
  182. '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
  183. '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
  184. '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
  185. '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
  186. '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
  187. '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
  188. '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
  189. '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
  190. '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
  191. '?' => '?', '?' => '?'
  192. );
  193. /**
  194. * All lowercase UTF-8 characters not properly handled by [http://php.net/mb_strtoupper mb_strtoupper()] mapped to uppercase characters
  195. *
  196. * @var array
  197. */
  198. static private $mb_lower_to_upper_fix = array(
  199. '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
  200. '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
  201. '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
  202. '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
  203. '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
  204. '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
  205. '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
  206. '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
  207. '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
  208. '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
  209. '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?'
  210. );
  211. /**
  212. * All uppercase UTF-8 characters not properly handled by [http://php.net/mb_strtolower mb_strtolower()] mapped to lowercase characters
  213. *
  214. * @var array
  215. */
  216. static private $mb_upper_to_lower_fix = array(
  217. '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
  218. '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
  219. '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
  220. '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
  221. '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
  222. '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
  223. '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
  224. '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
  225. '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
  226. '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
  227. '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
  228. '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
  229. '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
  230. '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
  231. '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
  232. '?' => '?', '?' => '?'
  233. );
  234. /**
  235. * All uppercase UTF-8 characters mapped to lowercase characters
  236. *
  237. * @var array
  238. */
  239. static private $upper_to_lower = array(
  240. 'A' => 'a', 'B' => 'b', 'C' => 'c', 'D' => 'd', 'E' => 'e', 'F' => 'f',
  241. 'G' => 'g', 'H' => 'h', 'I' => 'i', 'J' => 'j', 'K' => 'k', 'L' => 'l',
  242. 'M' => 'm', 'N' => 'n', 'O' => 'o', 'P' => 'p', 'Q' => 'q', 'R' => 'r',
  243. 'S' => 's', 'T' => 't', 'U' => 'u', 'V' => 'v', 'W' => 'w', 'X' => 'x',
  244. 'Y' => 'y', 'Z' => 'z', 'Ŕ' => 'ŕ', 'Á' => 'á', 'Â' => 'â', 'Ă' => 'ă',
  245. 'Ä' => 'ä', 'Ĺ' => 'ĺ', 'Ć' => 'ć', 'Ç' => 'ç', 'Č' => 'č', 'É' => 'é',
  246. 'Ę' => 'ę', 'Ë' => 'ë', 'Ě' => 'ě', 'Í' => 'í', 'Î' => 'î', 'Ď' => 'ď',
  247. 'Đ' => 'đ', 'Ń' => 'ń', 'Ň' => 'ň', 'Ó' => 'ó', 'Ô' => 'ô', 'Ő' => 'ő',
  248. 'Ö' => 'ö', 'Ř' => 'ř', 'Ů' => 'ů', 'Ú' => 'ú', 'Ű' => 'ű', 'Ü' => 'ü',
  249. 'Ý' => 'ý', 'Ţ' => 'ţ', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
  250. '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
  251. '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
  252. '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
  253. '?' => '?', '?' => '?', '?' => 'i', '?' => '?', '?' => '?', '?' => '?',
  254. '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
  255. '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
  256. 'Œ' => 'œ', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
  257. '?' => '?', 'Š' => 'š', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
  258. '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
  259. '?' => '?', 'Ÿ' => '˙', '?' => '?', '?' => '?', 'Ž' => 'ž', '?' => '?',
  260. '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
  261. '?' => '?', '?' => '?', '?' => '?', '?' => 'ƒ', '?' => '?', '?' => '?',
  262. '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
  263. '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
  264. '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
  265. '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
  266. '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
  267. '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
  268. '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
  269. '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
  270. '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
  271. '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
  272. '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
  273. '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
  274. '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
  275. '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
  276. '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
  277. '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
  278. '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
  279. '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
  280. '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
  281. '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
  282. '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
  283. '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
  284. '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
  285. '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
  286. '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
  287. '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
  288. '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
  289. '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
  290. '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
  291. '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
  292. '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
  293. '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
  294. '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
  295. '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
  296. '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
  297. '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
  298. '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
  299. '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
  300. '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
  301. '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
  302. '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
  303. '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
  304. '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
  305. '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
  306. '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
  307. '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
  308. '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
  309. '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
  310. '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
  311. '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
  312. '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
  313. '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
  314. '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
  315. '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
  316. '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
  317. '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
  318. '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
  319. '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
  320. '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
  321. '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
  322. '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
  323. '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
  324. '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
  325. '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
  326. '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
  327. '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
  328. '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
  329. '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
  330. '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
  331. '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
  332. '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
  333. '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
  334. '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
  335. '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
  336. '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
  337. '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
  338. '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
  339. '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
  340. '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
  341. '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
  342. '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
  343. '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
  344. '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
  345. '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
  346. '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
  347. '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
  348. '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
  349. '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
  350. '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?', '?' => '?',
  351. '?' => '?', '?' => '?', '?' => '?'
  352. );
  353. /**
  354. * A mapping of all ASCII-based latin characters, puntuation, symbols and number forms to ASCII.
  355. *
  356. * Includes elements form the following unicode blocks:
  357. *
  358. * - Latin-1 Supplement
  359. * - Latin Extended-A
  360. * - Latin Extended-B
  361. * - IPA Extensions
  362. * - Latin Extended Additional
  363. * - General Punctuation
  364. * - Letterlike symbols
  365. * - Number Forms
  366. *
  367. * @var array
  368. */
  369. static private $utf8_to_ascii = array(
  370. // Latin-1 Supplement
  371. 'Š' => '(c)', 'Ť' => '<<', 'Ž' => '(R)', 'ť' => '>>', 'ź' => '1/4',
  372. '˝' => '1/2', 'ž' => '3/4', 'Ŕ' => 'A', 'Á' => 'A', 'Â' => 'A',
  373. 'Ă' => 'A', 'Ä' => 'A', 'Ĺ' => 'A', 'Ć' => 'AE', 'Ç' => 'C',
  374. 'Č' => 'E', 'É' => 'E', 'Ę' => 'E', 'Ë' => 'E', 'Ě' => 'I',
  375. 'Í' => 'I', 'Î' => 'I', 'Ď' => 'I', 'Ń' => 'N', 'Ň' => 'O',
  376. 'Ó' => 'O', 'Ô' => 'O', 'Ő' => 'O', 'Ö' => 'O', 'Ř' => 'O',
  377. 'Ů' => 'U', 'Ú' => 'U', 'Ű' => 'U', 'Ü' => 'U', 'Ý' => 'Y',
  378. 'ŕ' => 'a', 'á' => 'a', 'â' => 'a', 'ă' => 'a', 'ä' => 'a',
  379. 'ĺ' => 'a', 'ć' => 'ae', 'ç' => 'c', 'č' => 'e', 'é' => 'e',
  380. 'ę' => 'e', 'ë' => 'e', 'ě' => 'i', 'í' => 'i', 'î' => 'i',
  381. 'ď' => 'i', 'ń' => 'n', 'ň' => 'o', 'ó' => 'o', 'ô' => 'o',
  382. 'ő' => 'o', 'ö' => 'o', 'ř' => 'o', 'ů' => 'u', 'ú' => 'u',
  383. 'ű' => 'u', 'ü' => 'u', 'ý' => 'y', '˙' => 'y',
  384. // Latin Extended-A
  385. '?' => 'A', '?' => 'a', '?' => 'A', '?' => 'a', '?' => 'A',
  386. '?' => 'a', '?' => 'C', '?' => 'c', '?' => 'C', '?' => 'c',
  387. '?' => 'C', '?' => 'c', '?' => 'C', '?' => 'c', '?' => 'D',
  388. '?' => 'd', '?' => 'D', '?' => 'd', '?' => 'E', '?' => 'e',
  389. '?' => 'E', '?' => 'e', '?' => 'E', '?' => 'e', '?' => 'E',
  390. '?' => 'e', '?' => 'E', '?' => 'e', '?' => 'G', '?' => 'g',
  391. '?' => 'G', '?' => 'g', '?' => 'G', '?' => 'g', '?' => 'G',
  392. '?' => 'g', '?' => 'H', '?' => 'h', '?' => 'H', '?' => 'h',
  393. '?' => 'I', '?' => 'i', '?' => 'I', '?' => 'i', '?' => 'I',
  394. '?' => 'i', '?' => 'I', '?' => 'i', '?' => 'I', '?' => 'i',
  395. '?' => 'IJ', '?' => 'ij', '?' => 'J', '?' => 'j', '?' => 'K',
  396. '?' => 'k', '?' => 'L', '?' => 'l', '?' => 'L', '?' => 'l',
  397. '?' => 'L', '?' => 'l', '?' => 'L', '?' => 'l', '?' => 'L',
  398. '?' => 'l', '?' => 'N', '?' => 'n', '?' => 'N', '?' => 'n',
  399. '?' => 'N', '?' => 'n', '?' => "'n", '?' => 'N', '?' => 'n',
  400. '?' => 'O', '?' => 'o', '?' => 'O', '?' => 'o', '?' => 'O',
  401. '?' => 'o', 'Œ' => 'OE', 'œ' => 'oe', '?' => 'R', '?' => 'r',
  402. '?' => 'R', '?' => 'r', '?' => 'R', '?' => 'r', '?' => 'S',
  403. '?' => 's', '?' => 'S', '?' => 's', '?' => 'S', '?' => 's',
  404. 'Š' => 'S', 'š' => 's', '?' => 'T', '?' => 't', '?' => 'T',
  405. '?' => 't', '?' => 'T', '?' => 't', '?' => 'U', '?' => 'u',
  406. '?' => 'U', '?' => 'u', '?' => 'U', '?' => 'u', '?' => 'U',
  407. '?' => 'u', '?' => 'U', '?' => 'u', '?' => 'U', '?' => 'u',
  408. '?' => 'W', '?' => 'w', '?' => 'Y', '?' => 'y', 'Ÿ' => 'Y',
  409. '?' => 'Z', '?' => 'z', '?' => 'Z', '?' => 'z', 'Ž' => 'Z',
  410. 'ž' => 'z',
  411. // Latin Extended-B
  412. '?' => 'b', '?' => 'B', '?' => 'B', '?' => 'b', '?' => 'O',
  413. '?' => 'C', '?' => 'c', '?' => 'D', '?' => 'D', '?' => 'D',
  414. '?' => 'd', '?' => 'E', '?' => 'E', '?' => 'F', 'ƒ' => 'f',
  415. '?' => 'G', '?' => 'I', '?' => 'K', '?' => 'k', '?' => 'l',
  416. '?' => 'M', '?' => 'N', '?' => 'n', '?' => 'O', '?' => 'O',
  417. '?' => 'o', '?' => 'OI', '?' => 'oi', '?' => 'P', '?' => 'p',
  418. '?' => 't', '?' => 'T', '?' => 't', '?' => 'T', '?' => 'U',
  419. '?' => 'u', '?' => 'V', '?' => 'Y', '?' => 'y', '?' => 'Z',
  420. '?' => 'z', '?' => '2', '?' => 'DZ', '?' => 'Dz', '?' => 'dz',
  421. '?' => 'LJ', '?' => 'Lj', '?' => 'lj', '?' => 'Nj', '?' => 'Nj',
  422. '?' => 'nj', '?' => 'A', '?' => 'a', '?' => 'I', '?' => 'i',
  423. '?' => 'O', '?' => 'o', '?' => 'U', '?' => 'u', '?' => 'U',
  424. '?' => 'u', '?' => 'U', '?' => 'u', '?' => 'U', '?' => 'u',
  425. '?' => 'U', '?' => 'u', '?' => 'e', '?' => 'A', '?' => 'a',
  426. '?' => 'A', '?' => 'a', '?' => 'AE', '?' => 'ae', '?' => 'G',
  427. '?' => 'g', '?' => 'G', '?' => 'g', '?' => 'K', '?' => 'k',
  428. '?' => 'O', '?' => 'o', '?' => 'O', '?' => 'o', '?' => 'j',
  429. '?' => 'DZ', '?' => 'Dz', '?' => 'dz', '?' => 'G', '?' => 'g',
  430. '?' => 'N', '?' => 'n', '?' => 'A', '?' => 'a', '?' => 'AE',
  431. '?' => 'ae', '?' => 'O', '?' => 'o', '?' => 'A', '?' => 'a',
  432. '?' => 'A', '?' => 'a', '?' => 'E', '?' => 'e', '?' => 'E',
  433. '?' => 'e', '?' => 'I', '?' => 'i', '?' => 'I', '?' => 'i',
  434. '?' => 'O', '?' => 'o', '?' => 'O', '?' => 'o', '?' => 'R',
  435. '?' => 'r', '?' => 'R', '?' => 'r', '?' => 'U', '?' => 'u',
  436. '?' => 'U', '?' => 'u', '?' => 'S', '?' => 's', '?' => 'T',
  437. '?' => 't', '?' => 'H', '?' => 'h', '?' => 'N', '?' => 'd',
  438. '?' => 'Z', '?' => 'z', '?' => 'A', '?' => 'a', '?' => 'E',
  439. '?' => 'e', '?' => 'O', '?' => 'o', '?' => 'O', '?' => 'o',
  440. '?' => 'O', '?' => 'o', '?' => 'O', '?' => 'o', '?' => 'Y',
  441. '?' => 'y', '?' => 'l', '?' => 'n', '?' => 't', '?' => 'j',
  442. '?' => 'db', '?' => 'qp', '?' => 'A', '?' => 'C', '?' => 'c',
  443. '?' => 'L', '?' => 'T', '?' => 's', '?' => 'z', '?' => 'B',
  444. '?' => 'U', '?' => 'V', '?' => 'E', '?' => 'e', '?' => 'J',
  445. '?' => 'j', '?' => 'Q', '?' => 'q', '?' => 'R', '?' => 'r',
  446. '?' => 'Y', '?' => 'y',
  447. // IPA Extensions
  448. '?' => 'a', '?' => 'b', '?' => 'o', '?' => 'c', '?' => 'd',
  449. '?' => 'd', '?' => 'e', '?' => 'e', '?' => 'e', '?' => 'e',
  450. '?' => 'e', '?' => 'j', '?' => 'g', '?' => 'g', '?' => 'G',
  451. '?' => 'h', '?' => 'h', '?' => 'i', '?' => 'I', '?' => 'l',
  452. '?' => 'l', '?' => 'l', '?' => 'm', '?' => 'm', '?' => 'm',
  453. '?' => 'n', '?' => 'n', '?' => 'N', '?' => 'o', '?' => 'OE',
  454. '?' => 'r', '?' => 'r', '?' => 'r', '?' => 'r', '?' => 'r',
  455. '?' => 'r', '?' => 'r', '?' => 'R', '?' => 'R', '?' => 's',
  456. '?' => 't', '?' => 't', '?' => 'u', '?' => 'v', '?' => 'v',
  457. '?' => 'w', '?' => 'y', '?' => 'Y', '?' => 'z', '?' => 'z',
  458. '?' => 'C', '?' => 'B', '?' => 'e', '?' => 'G', '?' => 'H',
  459. '?' => 'j', '?' => 'k', '?' => 'L', '?' => 'q', '?' => 'dz',
  460. '?' => 'dz', '?' => 'ts', '?' => 'tc', '?' => 'ls', '?' => 'lz',
  461. '?' => 'h', '?' => 'h',
  462. // Latin Extended Additional
  463. '?' => 'A', '?' => 'a', '?' => 'B', '?' => 'b', '?' => 'B',
  464. '?' => 'b', '?' => 'B', '?' => 'b', '?' => 'C', '?' => 'c',
  465. '?' => 'D', '?' => 'd', '?' => 'D', '?' => 'd', '?' => 'D',
  466. '?' => 'd', '?' => 'D', '?' => 'd', '?' => 'D', '?' => 'd',
  467. '?' => 'E', '?' => 'e', '?' => 'E', '?' => 'e', '?' => 'E',
  468. '?' => 'e', '?' => 'E', '?' => 'e', '?' => 'E', '?' => 'e',
  469. '?' => 'F', '?' => 'f', '?' => 'G', '?' => 'g', '?' => 'H',
  470. '?' => 'h', '?' => 'H', '?' => 'h', '?' => 'H', '?' => 'h',
  471. '?' => 'H', '?' => 'h', '?' => 'H', '?' => 'h', '?' => 'I',
  472. '?' => 'i', '?' => 'I', '?' => 'i', '?' => 'K', '?' => 'k',
  473. '?' => 'K', '?' => 'k', '?' => 'K', '?' => 'k', '?' => 'L',
  474. '?' => 'l', '?' => 'L', '?' => 'l', '?' => 'L', '?' => 'l',
  475. '?' => 'L', '?' => 'l', '?' => 'M', '?' => 'm', '?' => 'M',
  476. '?' => 'm', '?' => 'M', '?' => 'm', '?' => 'N', '?' => 'n',
  477. '?' => 'N', '?' => 'n', '?' => 'N', '?' => 'n', '?' => 'N',
  478. '?' => 'n', '?' => 'O', '?' => 'o', '?' => 'O', '?' => 'o',
  479. '?' => 'O', '?' => 'o', '?' => 'O', '?' => 'o', '?' => 'P',
  480. '?' => 'p', '?' => 'P', '?' => 'p', '?' => 'R', '?' => 'r',
  481. '?' => 'R', '?' => 'r', '?' => 'R', '?' => 'r', '?' => 'R',
  482. '?' => 'r', '?' => 'S', '?' => 's', '?' => 'S', '?' => 's',
  483. '?' => 'S', '?' => 's', '?' => 'S', '?' => 's', '?' => 'S',
  484. '?' => 's', '?' => 'T', '?' => 't', '?' => 'T', '?' => 't',
  485. '?' => 'T', '?' => 't', '?' => 'T', '?' => 't', '?' => 'U',
  486. '?' => 'u', '?' => 'U', '?' => 'u', '?' => 'U', '?' => 'u',
  487. '?' => 'U', '?' => 'u', '?' => 'U', '?' => 'u', '?' => 'V',
  488. '?' => 'v', '?' => 'V', '?' => 'v', '?' => 'W', '?' => 'w',
  489. '?' => 'W', '?' => 'w', '?' => 'W', '?' => 'w', '?' => 'W',
  490. '?' => 'w', '?' => 'W', '?' => 'w', '?' => 'X', '?' => 'x',
  491. '?' => 'X', '?' => 'x', '?' => 'Y', '?' => 'y', '?' => 'Z',
  492. '?' => 'z', '?' => 'Z', '?' => 'z', '?' => 'Z', '?' => 'z',
  493. '?' => 'h', '?' => 't', '?' => 'w', '?' => 'y', '?' => 'a',
  494. '?' => 'A', '?' => 'a', '?' => 'A', '?' => 'a', '?' => 'A',
  495. '?' => 'a', '?' => 'A', '?' => 'a', '?' => 'A', '?' => 'a',
  496. '?' => 'A', '?' => 'a', '?' => 'A', '?' => 'a', '?' => 'A',
  497. '?' => 'a', '?' => 'A', '?' => 'a', '?' => 'A', '?' => 'a',
  498. '?' => 'A', '?' => 'a', '?' => 'A', '?' => 'a', '?' => 'E',
  499. '?' => 'e', '?' => 'E', '?' => 'e', '?' => 'E', '?' => 'e',
  500. '?' => 'E', '?' => 'e', '?' => 'E', '?' => 'e', '?' => 'E',
  501. '?' => 'e', '?' => 'E', '?' => 'e', '?' => 'E', '?' => 'e',
  502. '?' => 'I', '?' => 'i', '?' => 'I', '?' => 'i', '?' => 'O',
  503. '?' => 'o', '?' => 'O', '?' => 'o', '?' => 'O', '?' => 'o',
  504. '?' => 'O', '?' => 'o', '?' => 'O', '?' => 'o', '?' => 'O',
  505. '?' => 'o', '?' => 'O', '?' => 'o', '?' => 'O', '?' => 'o',
  506. '?' => 'O', '?' => 'o', '?' => 'O', '?' => 'o', '?' => 'O',
  507. '?' => 'o', '?' => 'O', '?' => 'o', '?' => 'U', '?' => 'u',
  508. '?' => 'U', '?' => 'u', '?' => 'U', '?' => 'u', '?' => 'U',
  509. '?' => 'u', '?' => 'U', '?' => 'u', '?' => 'U', '?' => 'u',
  510. '?' => 'U', '?' => 'u', '?' => 'Y', '?' => 'y', '?' => 'Y',
  511. '?' => 'y', '?' => 'Y', '?' => 'y', '?' => 'Y', '?' => 'y',
  512. // General Punctuation
  513. '?' => ' ', '?' => ' ', '?' => ' ', '?' => ' ', '?' => ' ',
  514. '?' => ' ', '?' => ' ', '?' => ' ', '?' => ' ', '?' => ' ',
  515. '?' => ' ', '?' => '', '?' => '', '?' => '', '?' => '-',
  516. '?' => '-', '?' => '-', '–' => '-', '—' => '-', '?' => '-',
  517. '?' => '||', '‘' => "'", '’' => "'", '‚' => ',', '?' => "'",
  518. '“' => '"', '”' => '"', '?' => '"', '?' => '.', '?' => '..',
  519. '…' => '...', '?' => ' ', '?' => "'", '?' => '"', '?' => '\'"',
  520. '?' => "'", '?' => '"', '?' => '"\'', '‹' => '<', '›' => '>',
  521. '?' => '!!', '?' => '?!', '?' => '/', '?' => '?/', '?' => '?!',
  522. '?' => '!?',
  523. // Letterlike Symbols
  524. '?' => 'SM', '™' => 'TM',
  525. // Number Forms
  526. '?' => '1/3', '?' => '2/3', '?' => '1/5', '?' => '2/5', '?' => '3/5',
  527. '?' => '4/5', '?' => '1/6', '?' => '5/6', '?' => '1/8', '?' => '3/8',
  528. '?' => '5/8', '?' => '7/8', '?' => 'I', '?' => 'II', '?' => 'III',
  529. '?' => 'IV', '?' => 'V', '?' => 'Vi', '?' => 'VII', '?' => 'VIII',
  530. '?' => 'IX', '?' => 'X', '?' => 'XI', '?' => 'XII', '?' => 'L',
  531. '?' => 'C', '?' => 'D', '?' => 'M', '?' => 'i', '?' => 'ii',
  532. '?' => 'iii', '?' => 'iv', '?' => 'v', '?' => 'vi', '?' => 'vii',
  533. '?' => 'viii','?' => 'ix', '?' => 'x', '?' => 'xi', '?' => 'xii',
  534. '?' => 'l', '?' => 'c', '?' => 'd', '?' => 'm'
  535. );
  536. /**
  537. * If the [http://php.net/mbstring mbstring] extension is available
  538. *
  539. * @var boolean
  540. */
  541. static private $mbstring_available = NULL;
  542. /**
  543. * Maps UTF-8 ASCII-based latin characters, puntuation, symbols and number forms to ASCII
  544. *
  545. * Any characters or symbols that can not be translated will be removed.
  546. *
  547. * This function is most useful for situation that only allows ASCII, such
  548. * as in URLs.
  549. *
  550. * Translates elements form the following unicode blocks:
  551. *
  552. * - Latin-1 Supplement
  553. * - Latin Extended-A
  554. * - Latin Extended-B
  555. * - IPA Extensions
  556. * - Latin Extended Additional
  557. * - General Punctuation
  558. * - Letterlike symbols
  559. * - Number Forms
  560. *
  561. * @internal
  562. *
  563. * @param string $string The string to convert
  564. * @return string The input string in pure ASCII
  565. */
  566. static public function ascii($string)
  567. {
  568. if (!self::detect($string)) {
  569. return $string;
  570. }
  571. $string = strtr($string, self::$utf8_to_ascii);
  572. return preg_replace('#[^\x00-\x7F]#', '', $string);
  573. }
  574. /**
  575. * Checks to see if the [http://php.net/mbstring mbstring] extension is available
  576. *
  577. * @return void
  578. */
  579. static private function checkMbString()
  580. {
  581. self::$mbstring_available = extension_loaded('mbstring');
  582. }
  583. /**
  584. * Converts a unicode value into a UTF-8 character
  585. *
  586. * @param mixed $unicode_code_point The character to create, either the `U+hex` or decimal code point
  587. * @return string The UTF-8 character
  588. */
  589. static public function chr($unicode_code_point)
  590. {
  591. if (is_string($unicode_code_point) && substr($unicode_code_point, 0, 2) == 'U+') {
  592. $unicode_code_point = substr($unicode_code_point, 2);
  593. $unicode_code_point = hexdec($unicode_code_point);
  594. }
  595. $bin = decbin($unicode_code_point);
  596. $digits = strlen($bin);
  597. $first = $second = $third = $fourth = NULL;
  598. // One byte characters
  599. if ($digits <= 7) {
  600. $first = chr(bindec($bin));
  601. // Two byte characters
  602. } elseif ($digits <= 11) {
  603. $first = chr(bindec('110' . str_pad(substr($bin, 0, -6), 5, '0', STR_PAD_LEFT)));
  604. $second = chr(bindec('10' . substr($bin, -6)));
  605. // Three byte characters
  606. } elseif ($digits <= 16) {
  607. $first = chr(bindec('1110' . str_pad(substr($bin, 0, -12), 4, '0', STR_PAD_LEFT)));
  608. $second = chr(bindec('10' . substr($bin, -12, -6)));
  609. $third = chr(bindec('10' . substr($bin, -6)));
  610. // Four byte characters
  611. } elseif ($digits <= 21) {
  612. $first = chr(bindec('11110' . str_pad(substr($bin, 0, -18), 3, '0', STR_PAD_LEFT)));
  613. $second = chr(bindec('10' . substr($bin, -18, -12)));
  614. $third = chr(bindec('10' . substr($bin, -12, -6)));
  615. $fourth = chr(bindec('10' . substr($bin, -6)));
  616. }
  617. $ord = ord($first);
  618. if ($digits > 21 || $ord == 0xC0 || $ord == 0xC1 || $ord > 0xF4) {
  619. throw new fProgrammerException(
  620. 'The code point specified, %s, is invalid.',
  621. $unicode_code_point
  622. );
  623. }
  624. return $first . $second . $third . $fourth;
  625. }
  626. /**
  627. * Removes any invalid UTF-8 characters from a string or array of strings
  628. *
  629. * @param array|string $value The string or array of strings to clean
  630. * @return string The cleaned string
  631. */
  632. static public function clean($value)
  633. {
  634. if (!is_array($value)) {
  635. if (self::$can_ignore_invalid === NULL) {
  636. self::$can_ignore_invalid = !in_array(strtolower(ICONV_IMPL), array('unknown', 'ibm iconv'));
  637. }
  638. fCore::startErrorCapture(E_NOTICE);
  639. $value = self::iconv('UTF-8', 'UTF-8' . (self::$can_ignore_invalid ? '//IGNORE' : ''), (string) $value);
  640. fCore::stopErrorCapture();
  641. return $value;
  642. }
  643. $keys = array_keys($value);
  644. $num_keys = sizeof($keys);
  645. for ($i=0; $i<$num_keys; $i++) {
  646. $value[$keys[$i]] = self::clean($value[$keys[$i]]);
  647. }
  648. return $value;
  649. }
  650. /**
  651. * Compares strings, with the resulting order having latin characters that are based on ASCII letters placed after the relative ASCII characters
  652. *
  653. * Please note that this function sorts based on English language sorting
  654. * rules only. Locale-sepcific sorting is done by
  655. * [http://php.net/strcoll strcoll()], however there are technical
  656. * limitations.
  657. *
  658. * @param string $str1 The first string to compare
  659. * @param string $str2 The second string to compare
  660. * @return integer < 0 if $str1 < $str2, 0 if they are equal, > 0 if $str1 > $str2
  661. */
  662. static public function cmp($str1, $str2)
  663. {
  664. $ascii_str1 = strtr($str1, self::$utf8_to_ascii);
  665. $ascii_str2 = strtr($str2, self::$utf8_to_ascii);
  666. $res = strcmp($ascii_str1, $ascii_str2);
  667. // If the ASCII representations are the same, sort by the UTF-8 representations
  668. if ($res === 0) {
  669. $res = strcmp($str1, $str2);
  670. }
  671. return $res;
  672. }
  673. /**
  674. * Converts an offset in characters to an offset in bytes to that we can use the built-in functions for some operations
  675. *
  676. * @param string $string The string to base the offset on
  677. * @param integer $offset The character offset to conver to bytes
  678. * @return integer The converted offset
  679. */
  680. static private function convertOffsetToBytes($string, $offset)
  681. {
  682. if ($offset == 0) {
  683. return 0;
  684. }
  685. $len = strlen($string);
  686. $byte_offset = 0;
  687. $measured_offset = 0;
  688. $sign = 1;
  689. // Negative offsets require us to reverse some stuff
  690. if ($offset < 0) {
  691. $string = strrev($string);
  692. $sign = -1;
  693. $offset = abs($offset);
  694. }
  695. for ($i=0; $i<$len && $measured_offset<$offset; $i++) {
  696. $char = $string[$i];
  697. ++$byte_offset;
  698. if (ord($char) < 0x80) {
  699. ++$measured_offset;
  700. } else {
  701. switch (ord($char) & 0xF0) {
  702. case 0xF0:
  703. case 0xE0:
  704. case 0xD0:
  705. case 0xC0:
  706. ++$measured_offset;
  707. break;
  708. }
  709. }
  710. }
  711. return $byte_offset * $sign;
  712. }
  713. /**
  714. * Detects if a UTF-8 string contains any non-ASCII characters
  715. *
  716. * @param string $string The string to check
  717. * @return boolean If the string contains any non-ASCII characters
  718. */
  719. static private function detect($string)
  720. {
  721. return (boolean) preg_match('#[^\x00-\x7F]#', $string);
  722. }
  723. /**
  724. * Explodes a string on a delimiter
  725. *
  726. * If no delimiter is provided, the string will be exploded with each
  727. * characters being an element in the array.
  728. *
  729. * @param string $string The string to explode
  730. * @param string $delimiter The string to explode on. If `NULL` or `''` this method will return one character per array index.
  731. * @return array The exploded string
  732. */
  733. static public function explode($string, $delimiter=NULL)
  734. {
  735. // If a delimiter was passed, we just do an explode
  736. if ($delimiter || (!$delimiter && is_numeric($delimiter))) {
  737. return explode($delimiter, $string);
  738. }
  739. // If no delimiter was passed, we explode the characters into an array
  740. preg_match_all('#.|^\z#us', $string, $matches);
  741. return $matches[0];
  742. }
  743. /**
  744. * This works around a bug in MAMP 1.9.4+ and PHP 5.3 where iconv()
  745. * does not seem to properly assign the return value to a variable, but
  746. * does work when returning the value.
  747. *
  748. * @param string $in_charset The incoming character encoding
  749. * @param string $out_charset The outgoing character encoding
  750. * @param string $string The string to convert
  751. * @return string The converted string
  752. */
  753. static private function iconv($in_charset, $out_charset, $string)
  754. {
  755. return iconv($in_charset, $out_charset, $string);
  756. }
  757. /**
  758. * Compares strings in a case-insensitive manner, with the resulting order having characters that are based on ASCII letters placed after the relative ASCII characters
  759. *
  760. * Please note that this function sorts based on English language sorting
  761. * rules only. Locale-sepcific sorting is done by
  762. * [http://php.net/strcoll strcoll()], however there are technical
  763. * limitations.
  764. *
  765. * @param string $str1 The first string to compare
  766. * @param string $str2 The second string to compare
  767. * @return integer < 0 if $str1 < $str2, 0 if they are equal, > 0 if $str1 > $str2
  768. */
  769. static public function icmp($str1, $str2)
  770. {
  771. $str1 = self::lower($str1);
  772. $str2 = self::lower($str2);
  773. return self::cmp($str1, $str2);
  774. }
  775. /**
  776. * Compares strings using a natural order algorithm in a case-insensitive manner, with the resulting order having latin characters that are based on ASCII letters placed after the relative ASCII characters
  777. *
  778. * Please note that this function sorts based on English language sorting
  779. * rules only. Locale-sepcific sorting is done by
  780. * [http://php.net/strcoll strcoll()], however there are technical
  781. * limitations.
  782. *
  783. * @param string $str1 The first string to compare
  784. * @param string $str2 The second string to compare
  785. * @return integer `< 0` if `$str1 < $str2`, `0` if they are equal, `> 0` if `$str1 > $str2`
  786. */
  787. static public function inatcmp($str1, $str2)
  788. {
  789. $str1 = self::lower($str1);
  790. $str2 = self::lower($str2);
  791. return self::natcmp($str1, $str2);
  792. }
  793. /**
  794. * Finds the first position (in characters) of the search value in the string - case is ignored when doing performing a match
  795. *
  796. * @param string $haystack The string to search in
  797. * @param string $needle The string to search for. This match will be done in a case-insensitive manner.
  798. * @param integer $offset The character position to start searching from
  799. * @return mixed The integer character position of the first occurence of the needle or `FALSE` if no match
  800. */
  801. static public function ipos($haystack, $needle, $offset=0)
  802. {
  803. // We get better performance falling back for ASCII strings
  804. if (!self::detect($haystack)) {
  805. return stripos($haystack, $needle, $offset);
  806. }
  807. if (self::$mbstring_available === NULL) {
  808. self::checkMbString();
  809. }
  810. if (self::$mbstring_available && function_exists('mb_stripos')) {
  811. return mb_stripos($haystack, $needle, $offset, 'UTF-8');
  812. }
  813. $haystack = self::lower($haystack);
  814. $needle = self::lower($needle);
  815. return self::pos($haystack, $needle, $offset);
  816. }
  817. /**
  818. * Replaces matching parts of the string, with matches being done in a a case-insensitive manner
  819. *
  820. * If `$search` and `$replace` are both arrays and `$replace` is shorter,
  821. * the extra `$search` string will be replaced with an empty string. If
  822. * `$search` is an array and `$replace` is a string, all `$search` values
  823. * will be replaced with the string specified.
  824. *
  825. * @param string $string The string to perform the replacements on
  826. * @param mixed $search The string (or array of strings) to search for - see method description for details
  827. * @param mixed $replace The string (or array of strings) to replace with - see method description for details
  828. * @return string The input string with the specified replacements
  829. */
  830. static public function ireplace($string, $search, $replace)
  831. {
  832. if (is_array($search)) {
  833. foreach ($search as &$needle) {
  834. $needle = '#' . preg_quote($needle, '#') . '#ui';
  835. }
  836. } else {
  837. $search = '#' . preg_quote($search, '#') . '#ui';
  838. }
  839. return preg_replace(
  840. $search,
  841. strtr($replace, array('\\' => '\\\\', '$' => '\\$')),
  842. $string
  843. );
  844. }
  845. /**
  846. * Finds the last position (in characters) of the search value in the string - case is ignored when doing performing a match
  847. *
  848. * @param string $haystack The string to search in
  849. * @param string $needle The string to search for. This match will be done in a case-insensitive manner.
  850. * @param integer $offset The character position to start searching from. A negative value will stop looking that many characters from the end of the string
  851. * @return mixed The integer character position of the last occurence of the needle or `FALSE` if no match
  852. */
  853. static public function irpos($haystack, $needle, $offset=0)
  854. {
  855. // We get better performance falling back for ASCII strings
  856. if (!self::detect($haystack)) {
  857. return strripos($haystack, $needle, $offset);
  858. }
  859. if (self::$mbstring_available === NULL) {
  860. self::checkMbString();
  861. }
  862. if (self::$mbstring_available && function_exists('mb_strripos')) {
  863. return mb_strripos($haystack, $needle, $offset, 'UTF-8');
  864. }
  865. $haystack = self::lower($haystack);
  866. $needle = self::lower($needle);
  867. return self::rpos($haystack, $needle, $offset);
  868. }
  869. /**
  870. * Matches a string needle in the string haystack, returning a substring from the beginning of the needle to the end of the haystack
  871. *
  872. * Can optionally return the part of the haystack before the needle. Matching
  873. * is done in a case-insensitive manner.
  874. *
  875. * @param string $haystack The string to search in
  876. * @param string $needle The string to search for. This match will be done in a case-insensitive manner.
  877. * @param boolean $before_needle If a substring of the haystack before the needle should be returned instead of the substring from the needle to the end of the haystack
  878. * @return mixed The specified part of the haystack, or `FALSE` if the needle was not found
  879. */
  880. static public function istr($haystack, $needle, $before_needle=FALSE)
  881. {
  882. // We get better performance falling back for ASCII strings
  883. if ($before_needle == FALSE && !self::detect($haystack)) {
  884. return stristr($haystack, $needle);
  885. }
  886. if (self::$mbstring_available === NULL) {
  887. self::checkMbString();
  888. }
  889. if (self::$mbstring_available && function_exists('mb_stristr')) {
  890. return mb_stristr($haystack, $needle, $before_needle, 'UTF-8');
  891. }
  892. $lower_haystack = self::lower($haystack);
  893. $lower_needle = self::lower($needle);
  894. $pos = strpos($lower_haystack, $lower_needle);
  895. if ($before_needle) {
  896. return substr($haystack, 0, $pos);
  897. }
  898. return substr($haystack, $pos);
  899. }
  900. /**
  901. * Determines the length (in characters) of a string
  902. *
  903. * @param string $string The string to measure
  904. * @return integer The number of characters in the string
  905. */
  906. static public function len($string)
  907. {
  908. if (self::$mbstring_available === NULL) {
  909. self::checkMbString();
  910. }
  911. if (self::$mbstring_available) {
  912. return mb_strlen($string, 'UTF-8');
  913. }
  914. return strlen(utf8_decode($string));
  915. }
  916. /**
  917. * Converts all uppercase characters to lowercase
  918. *
  919. * @param string $string The string to convert
  920. * @return string The input string with all uppercase characters in lowercase
  921. */
  922. static public function lower($string)
  923. {
  924. // We get better performance falling back for ASCII strings
  925. if (!self::detect($string)) {
  926. return strtolower($string);
  927. }
  928. if (self::$mbstring_available === NULL) {
  929. self::checkMbString();
  930. }
  931. if (self::$mbstring_available) {
  932. $string = mb_strtolower($string, 'utf-8');
  933. // For some reason mb_strtolower misses some character
  934. return strtr($string, self::$mb_upper_to_lower_fix);
  935. }
  936. return strtr($string, self::$upper_to_lower);
  937. }
  938. /**
  939. * Trims whitespace, or any specified characters, from the beginning of a string
  940. *
  941. * @param string $string The string to trim
  942. * @param string $charlist The characters to trim
  943. * @return string The trimmed string
  944. */
  945. static public function ltrim($string, $charlist=NULL)
  946. {
  947. if (strlen($charlist) === 0) {
  948. return ltrim($string);
  949. }
  950. $search = preg_quote($charlist, '#');
  951. $search = str_replace('-', '\-', $search);
  952. $search = str_replace('\.\.', '-', $search);
  953. return preg_replace('#^[' . $search . ']+#Du', '', $string);
  954. }
  955. /**
  956. * Compares strings using a natural order algorithm, with the resulting order having latin characters that are based on ASCII letters placed after the relative ASCII characters
  957. *
  958. * Please note that this function sorts based on English language sorting
  959. * rules only. Locale-sepcific sorting is done by
  960. * [http://php.net/strcoll strcoll()], however there are technical
  961. * limitations.
  962. *
  963. * @param …

Large files files are truncated, but you can click here to view the full file