PageRenderTime 88ms CodeModel.GetById 11ms RepoModel.GetById 1ms app.codeStats 0ms

/vendor/patchwork/utf8/class/Patchwork/PHP/Shim/Mbstring.php

https://gitlab.com/xolotsoft/pumasruiz
PHP | 442 lines | 314 code | 74 blank | 54 comment | 75 complexity | 70301ad0d06b41e2efdd93a528d5b6cb MD5 | raw file
  1. <?php // vi: set fenc=utf-8 ts=4 sw=4 et:
  2. /*
  3. * Copyright (C) 2013 Nicolas Grekas - p@tchwork.com
  4. *
  5. * This library is free software; you can redistribute it and/or modify it
  6. * under the terms of the (at your option):
  7. * Apache License v2.0 (http://apache.org/licenses/LICENSE-2.0.txt), or
  8. * GNU General Public License v2.0 (http://gnu.org/licenses/gpl-2.0.txt).
  9. */
  10. namespace Patchwork\PHP\Shim;
  11. /**
  12. * Partial mbstring implementation in PHP, iconv based, UTF-8 centric.
  13. *
  14. * Implemented:
  15. * - mb_convert_encoding - Convert character encoding
  16. * - mb_decode_mimeheader - Decode string in MIME header field
  17. * - mb_encode_mimeheader - Encode string for MIME header XXX NATIVE IMPLEMENTATION IS REALLY BUGGED
  18. * - mb_convert_case - Perform case folding on a string
  19. * - mb_internal_encoding - Set/Get internal character encoding
  20. * - mb_list_encodings - Returns an array of all supported encodings
  21. * - mb_strlen - Get string length
  22. * - mb_strpos - Find position of first occurrence of string in a string
  23. * - mb_strrpos - Find position of last occurrence of a string in a string
  24. * - mb_strtolower - Make a string lowercase
  25. * - mb_strtoupper - Make a string uppercase
  26. * - mb_substitute_character - Set/Get substitution character
  27. * - mb_substr - Get part of string
  28. * - mb_stripos - Finds position of first occurrence of a string within another, case insensitive
  29. * - mb_stristr - Finds first occurrence of a string within another, case insensitive
  30. * - mb_strrchr - Finds the last occurrence of a character in a string within another
  31. * - mb_strrichr - Finds the last occurrence of a character in a string within another, case insensitive
  32. * - mb_strripos - Finds position of last occurrence of a string within another, case insensitive
  33. * - mb_strstr - Finds first occurrence of a string within anothers
  34. *
  35. * Not implemented:
  36. * - mb_convert_kana - Convert "kana" one from another ("zen-kaku", "han-kaku" and more)
  37. * - mb_convert_variables - Convert character code in variable(s)
  38. * - mb_decode_numericentity - Decode HTML numeric string reference to character
  39. * - mb_encode_numericentity - Encode character to HTML numeric string reference
  40. * - mb_ereg* - Regular expression with multibyte support
  41. * - mb_get_info - Get internal settings of mbstring
  42. * - mb_http_input - Detect HTTP input character encoding
  43. * - mb_http_output - Set/Get HTTP output character encoding
  44. * - mb_list_mime_names - Returns an array or string of all supported mime names
  45. * - mb_output_handler - Callback function converts character encoding in output buffer
  46. * - mb_parse_str - Parse GET/POST/COOKIE data and set global variable
  47. * - mb_preferred_mime_name - Get MIME charset string
  48. * - mb_regex_encoding - Returns current encoding for multibyte regex as string
  49. * - mb_regex_set_options - Set/Get the default options for mbregex functions
  50. * - mb_send_mail - Send encoded mail
  51. * - mb_split - Split multibyte string using regular expression
  52. * - mb_strcut - Get part of string
  53. * - mb_strimwidth - Get truncated string with specified width
  54. * - mb_strwidth - Return width of string
  55. * - mb_substr_count - Count the number of substring occurrences
  56. */
  57. class Mbstring
  58. {
  59. const MB_CASE_FOLD = PHP_INT_MAX;
  60. protected static
  61. $encoding_list = array('ASCII', 'UTF-8'),
  62. $language = 'neutral',
  63. $internal_encoding = 'UTF-8',
  64. $caseFold = array(
  65. array('µ','ſ',"\xCD\x85",'ς',"\xCF\x90","\xCF\x91","\xCF\x95","\xCF\x96","\xCF\xB0","\xCF\xB1","\xCF\xB5","\xE1\xBA\x9B","\xE1\xBE\xBE"),
  66. array('μ','s','ι', 'σ','β', 'θ', 'φ', 'π', 'κ', 'ρ', 'ε', "\xE1\xB9\xA1",'ι' )
  67. );
  68. static function mb_convert_encoding($s, $to_encoding, $from_encoding = INF)
  69. {
  70. INF === $from_encoding && $from_encoding = self::$internal_encoding;
  71. $from_encoding = strtolower($from_encoding);
  72. $to_encoding = strtolower($to_encoding);
  73. if ('base64' === $from_encoding)
  74. {
  75. $s = base64_decode($s);
  76. $from_encoding = $to_encoding;
  77. }
  78. if ('base64' === $to_encoding) return base64_encode($s);
  79. if ('html-entities' === $to_encoding)
  80. {
  81. 'html-entities' === $from_encoding && $from_encoding = 'Windows-1252';
  82. 'utf-8' === $from_encoding
  83. || 'utf8' === $from_encoding
  84. || $s = iconv($from_encoding, 'UTF-8//IGNORE', $s);
  85. return preg_replace_callback('/[\x80-\xFF]+/', array(__CLASS__, 'html_encoding_callback'), $s);
  86. }
  87. if ('html-entities' === $from_encoding)
  88. {
  89. $s = html_entity_decode($s, ENT_COMPAT, 'UTF-8');
  90. $from_encoding = 'UTF-8';
  91. }
  92. return iconv($from_encoding, $to_encoding . '//IGNORE', $s);
  93. }
  94. static function mb_decode_mimeheader($s)
  95. {
  96. return iconv_mime_decode($s, 2, self::$internal_encoding . '//IGNORE');
  97. }
  98. static function mb_encode_mimeheader($s, $charset = INF, $transfer_encoding = INF, $linefeed = INF, $indent = INF)
  99. {
  100. user_error('mb_encode_mimeheader() is bugged. Please use iconv_mime_encode() instead', E_USER_WARNING);
  101. }
  102. static function mb_convert_case($s, $mode, $encoding = INF)
  103. {
  104. if ('' === $s .= '') return '';
  105. if (INF === $encoding) $encoding = self::$internal_encoding;
  106. else $encoding = strtoupper($encoding);
  107. if ('UTF-8' === $encoding || 'UTF8' === $encoding) $encoding = INF;
  108. else $s = iconv($encoding, 'UTF-8//IGNORE', $s);
  109. if (MB_CASE_TITLE == $mode)
  110. {
  111. $s = preg_replace_callback('/\b\p{Ll}/u', array(__CLASS__, 'title_case_upper'), $s);
  112. $s = preg_replace_callback('/\B[\p{Lu}\p{Lt}]+/u', array(__CLASS__, 'title_case_lower'), $s);
  113. }
  114. else
  115. {
  116. if (MB_CASE_UPPER == $mode)
  117. {
  118. static $upper;
  119. isset($upper) || $upper = static::getData('upperCase');
  120. $map = $upper;
  121. }
  122. else
  123. {
  124. if (self::MB_CASE_FOLD === $mode) $s = str_replace(self::$caseFold[0], self::$caseFold[1], $s);
  125. static $lower;
  126. isset($lower) || $lower = static::getData('lowerCase');
  127. $map = $lower;
  128. }
  129. static $ulen_mask = array("\xC0" => 2, "\xD0" => 2, "\xE0" => 3, "\xF0" => 4);
  130. $i = 0;
  131. $len = strlen($s);
  132. while ($i < $len)
  133. {
  134. $ulen = $s[$i] < "\x80" ? 1 : $ulen_mask[$s[$i] & "\xF0"];
  135. $uchr = substr($s, $i, $ulen);
  136. $i += $ulen;
  137. if (isset($map[$uchr]))
  138. {
  139. $uchr = $map[$uchr];
  140. $nlen = strlen($uchr);
  141. if ($nlen == $ulen)
  142. {
  143. $nlen = $i;
  144. do $s[--$nlen] = $uchr[--$ulen];
  145. while ($ulen);
  146. }
  147. else
  148. {
  149. $s = substr_replace($s, $uchr, $i - $ulen, $ulen);
  150. $len += $nlen - $ulen;
  151. $i += $nlen - $ulen;
  152. }
  153. }
  154. }
  155. }
  156. if (INF === $encoding) return $s;
  157. else return iconv('UTF-8', $encoding, $s);
  158. }
  159. static function mb_internal_encoding($encoding = INF)
  160. {
  161. if (INF === $encoding) return self::$internal_encoding;
  162. else $encoding = strtoupper($encoding);
  163. if ('UTF-8' === $encoding || 'UTF8' === $encoding || false !== @iconv($encoding, $encoding, ' '))
  164. {
  165. self::$internal_encoding = 'UTF8' === $encoding ? 'UTF-8' : $encoding;
  166. return true;
  167. }
  168. return false;
  169. }
  170. static function mb_language($lang = INF)
  171. {
  172. if (INF === $lang) return self::$language;
  173. switch ($lang = strtolower($lang))
  174. {
  175. case 'uni':
  176. case 'neutral':
  177. self::$language = $lang;
  178. return true;
  179. }
  180. return false;
  181. }
  182. static function mb_list_encodings()
  183. {
  184. return array('UTF-8');
  185. }
  186. static function mb_encoding_aliases($encoding)
  187. {
  188. switch (strtolower($encoding))
  189. {
  190. case 'utf8':
  191. case 'utf-8': return array('utf8');
  192. }
  193. return false;
  194. }
  195. static function mb_check_encoding($var = INF, $encoding = INF)
  196. {
  197. if (INF === $encoding)
  198. {
  199. if (INF === $var) return false;
  200. $encoding = self::$internal_encoding;
  201. }
  202. return false !== mb_detect_encoding($var, array($encoding), true);
  203. }
  204. static function mb_detect_encoding($str, $encoding_list = INF, $strict = false)
  205. {
  206. if (INF === $encoding_list) $encoding_list = self::$encoding_list;
  207. else
  208. {
  209. if (! is_array($encoding_list)) $encoding_list = array_map('trim', explode(',', $encoding_list));
  210. $encoding_list = array_map('strtoupper', $encoding_list);
  211. }
  212. foreach ($encoding_list as $enc)
  213. {
  214. switch ($enc)
  215. {
  216. case 'ASCII':
  217. if (! preg_match('/[\x80-\xFF]/', $str)) return $enc;
  218. break;
  219. case 'UTF8':
  220. case 'UTF-8':
  221. if (preg_match('//u', $str)) return $enc;
  222. break;
  223. default:
  224. return strncmp($enc, 'ISO-8859-', 9) ? false : $enc;
  225. }
  226. }
  227. return false;
  228. }
  229. static function mb_detect_order($encoding_list = INF)
  230. {
  231. if (INF === $encoding_list) return self::$encoding_list;
  232. if (! is_array($encoding_list)) $encoding_list = array_map('trim', explode(',', $encoding_list));
  233. $encoding_list = array_map('strtoupper', $encoding_list);
  234. foreach ($encoding_list as $enc)
  235. {
  236. switch ($enc)
  237. {
  238. default: if (strncmp($enc, 'ISO-8859-', 9)) return false;
  239. case 'ASCII':
  240. case 'UTF8':
  241. case 'UTF-8':
  242. }
  243. }
  244. self::$encoding_list = $encoding_list;
  245. return true;
  246. }
  247. static function mb_strlen($s, $encoding = INF)
  248. {
  249. INF === $encoding && $encoding = self::$internal_encoding;
  250. return iconv_strlen($s, $encoding . '//IGNORE');
  251. }
  252. static function mb_strpos ($haystack, $needle, $offset = 0, $encoding = INF)
  253. {
  254. INF === $encoding && $encoding = self::$internal_encoding;
  255. if ('' === $needle .= '')
  256. {
  257. user_error(__METHOD__ . ': Empty delimiter', E_USER_WARNING);
  258. return false;
  259. }
  260. else return iconv_strpos($haystack, $needle, $offset, $encoding . '//IGNORE');
  261. }
  262. static function mb_strrpos($haystack, $needle, $offset = 0, $encoding = INF)
  263. {
  264. INF === $encoding && $encoding = self::$internal_encoding;
  265. if ($offset != (int) $offset)
  266. {
  267. $offset = 0;
  268. }
  269. else if ($offset = (int) $offset)
  270. {
  271. $haystack = self::mb_substr($haystack, $offset, 2147483647, $encoding);
  272. }
  273. $pos = iconv_strrpos($haystack, $needle, $encoding . '//IGNORE');
  274. return false !== $pos ? $offset + $pos : false;
  275. }
  276. static function mb_strtolower($s, $encoding = INF)
  277. {
  278. return self::mb_convert_case($s, MB_CASE_LOWER, $encoding);
  279. }
  280. static function mb_strtoupper($s, $encoding = INF)
  281. {
  282. return self::mb_convert_case($s, MB_CASE_UPPER, $encoding);
  283. }
  284. static function mb_substitute_character($c = INF)
  285. {
  286. return INF !== $c ? false : 'none';
  287. }
  288. static function mb_substr($s, $start, $length = null, $encoding = INF)
  289. {
  290. INF === $encoding && $encoding = self::$internal_encoding;
  291. if ($start < 0)
  292. {
  293. $start = iconv_strlen($s, $encoding . '//IGNORE') + $start;
  294. if ($start < 0) $start = 0;
  295. }
  296. if (null === $length) $length = 2147483647;
  297. else if ($length < 0)
  298. {
  299. $length = iconv_strlen($s, $encoding . '//IGNORE') + $length - $start;
  300. if ($length < 0) return '';
  301. }
  302. return iconv_substr($s, $start, $length, $encoding . '//IGNORE') . '';
  303. }
  304. static function mb_stripos($haystack, $needle, $offset = 0, $encoding = INF)
  305. {
  306. INF === $encoding && $encoding = self::$internal_encoding;
  307. $haystack = self::mb_convert_case($haystack, self::MB_CASE_FOLD, $encoding);
  308. $needle = self::mb_convert_case($needle, self::MB_CASE_FOLD, $encoding);
  309. return self::mb_strpos($haystack, $needle, $offset, $encoding);
  310. }
  311. static function mb_stristr($haystack, $needle, $part = false, $encoding = INF)
  312. {
  313. $pos = self::mb_stripos($haystack, $needle, 0, $encoding);
  314. return self::getSubpart($pos, $part, $haystack, $encoding);
  315. }
  316. static function mb_strrchr($haystack, $needle, $part = false, $encoding = INF)
  317. {
  318. INF === $encoding && $encoding = self::$internal_encoding;
  319. $needle = self::mb_substr($needle, 0, 1, $encoding);
  320. $pos = iconv_strrpos($haystack, $needle, $encoding);
  321. return self::getSubpart($pos, $part, $haystack, $encoding);
  322. }
  323. static function mb_strrichr($haystack, $needle, $part = false, $encoding = INF)
  324. {
  325. $needle = self::mb_substr($needle, 0, 1, $encoding);
  326. $pos = self::mb_strripos($haystack, $needle, $encoding);
  327. return self::getSubpart($pos, $part, $haystack, $encoding);
  328. }
  329. static function mb_strripos($haystack, $needle, $offset = 0, $encoding = INF)
  330. {
  331. INF === $encoding && $encoding = self::$internal_encoding;
  332. $haystack = self::mb_convert_case($haystack, self::MB_CASE_FOLD, $encoding);
  333. $needle = self::mb_convert_case($needle, self::MB_CASE_FOLD, $encoding);
  334. return self::mb_strrpos($haystack, $needle, $offset, $encoding);
  335. }
  336. static function mb_strstr($haystack, $needle, $part = false, $encoding = INF)
  337. {
  338. $pos = strpos($haystack, $needle);
  339. if (false === $pos) return false;
  340. if ($part) return substr($haystack, 0, $pos);
  341. else return substr($haystack, $pos);
  342. }
  343. protected static function getSubpart($pos, $part, $haystack, $encoding)
  344. {
  345. INF === $encoding && $encoding = self::$internal_encoding;
  346. if (false === $pos) return false;
  347. if ($part) return self::mb_substr($haystack, 0, $pos, $encoding);
  348. else return self::mb_substr($haystack, $pos, null, $encoding);
  349. }
  350. protected static function html_encoding_callback($m)
  351. {
  352. return htmlentities($m[0], ENT_COMPAT, 'UTF-8');
  353. }
  354. protected static function title_case_lower($s)
  355. {
  356. return self::mb_convert_case($s[0], MB_CASE_LOWER, 'UTF-8');
  357. }
  358. protected static function title_case_upper($s)
  359. {
  360. return self::mb_convert_case($s[0], MB_CASE_UPPER, 'UTF-8');
  361. }
  362. protected static function getData($file)
  363. {
  364. $file = __DIR__ . '/unidata/' . $file . '.ser';
  365. if (file_exists($file)) return unserialize(file_get_contents($file));
  366. else return false;
  367. }
  368. }