PageRenderTime 46ms CodeModel.GetById 15ms RepoModel.GetById 1ms app.codeStats 0ms

/vendor/patchwork/utf8/src/Patchwork/Utf8.php

https://gitlab.com/xolotsoft/pumasruiz
PHP | 773 lines | 649 code | 101 blank | 23 comment | 81 complexity | d5c98edafe7b5229a7f2798e0d58dba6 MD5 | raw file
  1. <?php
  2. /*
  3. * Copyright (C) 2013 Nicolas Grekas - p@tchwork.com
  4. *
  5. * This library is free software; you can redistribute it and/or modify it
  6. * under the terms of the (at your option):
  7. * Apache License v2.0 (http://apache.org/licenses/LICENSE-2.0.txt), or
  8. * GNU General Public License v2.0 (http://gnu.org/licenses/gpl-2.0.txt).
  9. */
  10. namespace Patchwork;
  11. use Normalizer as n;
  12. /**
  13. * UTF-8 Grapheme Cluster aware string manipulations implementing the quasi complete
  14. * set of native PHP string functions that need UTF-8 awareness and more.
  15. * Missing are printf-family functions.
  16. */
  17. class Utf8
  18. {
  19. protected static $pathPrefix;
  20. protected static $commonCaseFold = array(
  21. array('µ','ſ',"\xCD\x85",'ς',"\xCF\x90","\xCF\x91","\xCF\x95","\xCF\x96","\xCF\xB0","\xCF\xB1","\xCF\xB5","\xE1\xBA\x9B","\xE1\xBE\xBE"),
  22. array('μ','s','ι', 'σ','β', 'θ', 'φ', 'π', 'κ', 'ρ', 'ε', "\xE1\xB9\xA1",'ι'),
  23. );
  24. protected static $cp1252 = array('€','‚','ƒ','„','…','†','‡','ˆ','‰','Š','‹','Œ','Ž','‘','’','“','”','•','–','—','˜','™','š','›','œ','ž','Ÿ');
  25. protected static $utf8 = array('€','‚','ƒ','„','…','†','‡','ˆ','‰','Š','‹','Œ','Ž','‘','’','“','”','•','–','—','˜','™','š','›','œ','ž','Ÿ');
  26. public static function isUtf8($s)
  27. {
  28. return (bool) preg_match('//u', $s); // Since PHP 5.2.5, this also excludes invalid five and six bytes sequences
  29. }
  30. // Generic UTF-8 to ASCII transliteration
  31. public static function toAscii($s, $subst_chr = '?')
  32. {
  33. if (preg_match("/[\x80-\xFF]/", $s)) {
  34. static $translitExtra = array();
  35. $translitExtra or $translitExtra = static::getData('translit_extra');
  36. $s = n::normalize($s, n::NFKC);
  37. $glibc = 'glibc' === ICONV_IMPL;
  38. preg_match_all('/./u', $s, $s);
  39. foreach ($s[0] as &$c) {
  40. if (!isset($c[1])) {
  41. continue;
  42. }
  43. if ($glibc) {
  44. $t = iconv('UTF-8', 'ASCII//TRANSLIT', $c);
  45. } else {
  46. $t = iconv('UTF-8', 'ASCII//IGNORE//TRANSLIT', $c);
  47. if (!isset($t[0])) {
  48. $t = '?';
  49. } elseif (isset($t[1])) {
  50. $t = ltrim($t, '\'`"^~');
  51. }
  52. }
  53. if ('?' === $t) {
  54. if (isset($translitExtra[$c])) {
  55. $t = $translitExtra[$c];
  56. } else {
  57. $t = n::normalize($c, n::NFD);
  58. if ($t[0] < "\x80") {
  59. $t = $t[0];
  60. } else {
  61. $t = $subst_chr;
  62. }
  63. }
  64. }
  65. $c = $t;
  66. }
  67. $s = implode('', $s[0]);
  68. }
  69. return $s;
  70. }
  71. public static function wrapPath($path = '')
  72. {
  73. if (null === static::$pathPrefix) {
  74. static $hasWfio;
  75. isset($hasWfio) or $hasWfio = extension_loaded('wfio');
  76. if ($hasWfio) {
  77. static::$pathPrefix = 'wfio://';
  78. } elseif ('\\' === DIRECTORY_SEPARATOR && class_exists('COM', false)) {
  79. static::$pathPrefix = 'utf8'.mt_rand();
  80. stream_wrapper_register(static::$pathPrefix, 'Patchwork\Utf8\WindowsStreamWrapper');
  81. static::$pathPrefix .= '://';
  82. } else {
  83. if ('\\' === DIRECTORY_SEPARATOR) {
  84. trigger_error('The `wfio` or `com_dotnet` extension is required to handle UTF-8 filesystem access on Windows');
  85. }
  86. static::$pathPrefix = 'file://';
  87. }
  88. }
  89. return static::$pathPrefix.$path;
  90. }
  91. public static function filter($var, $normalization_form = 4 /* n::NFC */, $leading_combining = '◌')
  92. {
  93. switch (gettype($var)) {
  94. case 'array':
  95. foreach ($var as $k => $v) {
  96. $var[$k] = static::filter($v, $normalization_form, $leading_combining);
  97. }
  98. break;
  99. case 'object':
  100. foreach ($var as $k => $v) {
  101. $var->$k = static::filter($v, $normalization_form, $leading_combining);
  102. }
  103. break;
  104. case 'string':
  105. if (false !== strpos($var, "\r")) {
  106. // Workaround https://bugs.php.net/65732
  107. $var = str_replace("\r\n", "\n", $var);
  108. $var = strtr($var, "\r", "\n");
  109. }
  110. if (preg_match('/[\x80-\xFF]/', $var)) {
  111. if (n::isNormalized($var, $normalization_form)) {
  112. $n = '-';
  113. } else {
  114. $n = n::normalize($var, $normalization_form);
  115. if (isset($n[0])) {
  116. $var = $n;
  117. } else {
  118. $var = static::utf8_encode($var);
  119. }
  120. }
  121. if ($var[0] >= "\x80" && isset($n[0], $leading_combining[0]) && preg_match('/^\p{Mn}/u', $var)) {
  122. // Prevent leading combining chars
  123. // for NFC-safe concatenations.
  124. $var = $leading_combining.$var;
  125. }
  126. }
  127. break;
  128. }
  129. return $var;
  130. }
  131. // Unicode transformation for caseless matching
  132. // see http://unicode.org/reports/tr21/tr21-5.html
  133. public static function strtocasefold($s, $full = true)
  134. {
  135. $s = str_replace(self::$commonCaseFold[0], self::$commonCaseFold[1], $s);
  136. if ($full) {
  137. static $fullCaseFold = false;
  138. $fullCaseFold or $fullCaseFold = static::getData('caseFolding_full');
  139. $s = str_replace($fullCaseFold[0], $fullCaseFold[1], $s);
  140. }
  141. return static::strtolower($s);
  142. }
  143. // Generic case sensitive collation support for self::strnatcmp()
  144. public static function strtonatfold($s)
  145. {
  146. $s = n::normalize($s, n::NFD);
  147. return preg_replace('/\p{Mn}+/u', '', $s);
  148. }
  149. // PHP string functions that need UTF-8 awareness
  150. public static function filter_input($type, $var, $filter = FILTER_DEFAULT, $option = null)
  151. {
  152. if (4 > func_num_args()) {
  153. $var = filter_input($type, $var, $filter);
  154. } else {
  155. $var = filter_input($type, $var, $filter, $option);
  156. }
  157. return static::filter($var);
  158. }
  159. public static function filter_input_array($type, $def = null, $add_empty = true)
  160. {
  161. if (2 > func_num_args()) {
  162. $a = filter_input_array($type);
  163. } else {
  164. $a = filter_input_array($type, $def, $add_empty);
  165. }
  166. return static::filter($a);
  167. }
  168. public static function json_decode($json, $assoc = false, $depth = 512, $options = 0)
  169. {
  170. if (PHP_VERSION_ID < 50400) {
  171. $json = json_decode($json, $assoc, $depth);
  172. } else {
  173. $json = json_decode($json, $assoc, $depth, $options);
  174. }
  175. return static::filter($json);
  176. }
  177. public static function substr($s, $start, $len = 2147483647)
  178. {
  179. static $bug62759;
  180. isset($bug62759) or $bug62759 = extension_loaded('intl') && 'à' === grapheme_substr('éà', 1, -2);
  181. if ($bug62759) {
  182. return PHP\Shim\Intl::grapheme_substr_workaround62759($s, $start, $len);
  183. } else {
  184. return grapheme_substr($s, $start, $len);
  185. }
  186. }
  187. public static function strlen($s)
  188. {
  189. return grapheme_strlen($s);
  190. }
  191. public static function strpos($s, $needle, $offset = 0)
  192. {
  193. return grapheme_strpos($s, $needle, $offset);
  194. }
  195. public static function strrpos($s, $needle, $offset = 0)
  196. {
  197. return grapheme_strrpos($s, $needle, $offset);
  198. }
  199. public static function stripos($s, $needle, $offset = 0)
  200. {
  201. if (50418 > PHP_VERSION_ID || 50500 == PHP_VERSION_ID) {
  202. // Don't use grapheme_stripos because of https://bugs.php.net/61860
  203. if (!preg_match('//u', $s .= '')) {
  204. return false;
  205. }
  206. if ($offset < 0) {
  207. $offset = 0;
  208. }
  209. if (!$needle = mb_stripos($s, $needle .= '', $offset, 'UTF-8')) {
  210. return $needle;
  211. }
  212. return grapheme_strlen(iconv_substr($s, 0, $needle, 'UTF-8'));
  213. }
  214. return grapheme_stripos($s, $needle, $offset);
  215. }
  216. public static function strripos($s, $needle, $offset = 0)
  217. {
  218. if (50418 > PHP_VERSION_ID || 50500 == PHP_VERSION_ID) {
  219. // Don't use grapheme_strripos because of https://bugs.php.net/61860
  220. if (!preg_match('//u', $s .= '')) {
  221. return false;
  222. }
  223. if ($offset < 0) {
  224. $offset = 0;
  225. }
  226. if (!$needle = mb_strripos($s, $needle .= '', $offset, 'UTF-8')) {
  227. return $needle;
  228. }
  229. return grapheme_strlen(iconv_substr($s, 0, $needle, 'UTF-8'));
  230. }
  231. return grapheme_strripos($s, $needle, $offset);
  232. }
  233. public static function stristr($s, $needle, $before_needle = false)
  234. {
  235. if ('' === $needle .= '') {
  236. return false;
  237. }
  238. return mb_stristr($s, $needle, $before_needle, 'UTF-8');
  239. }
  240. public static function strstr($s, $needle, $before_needle = false)
  241. {
  242. return grapheme_strstr($s, $needle, $before_needle);
  243. }
  244. public static function strrchr($s, $needle, $before_needle = false)
  245. {
  246. return mb_strrchr($s, $needle, $before_needle, 'UTF-8');
  247. }
  248. public static function strrichr($s, $needle, $before_needle = false)
  249. {
  250. return mb_strrichr($s, $needle, $before_needle, 'UTF-8');
  251. }
  252. public static function strtolower($s)
  253. {
  254. return mb_strtolower($s, 'UTF-8');
  255. }
  256. public static function strtoupper($s)
  257. {
  258. return mb_strtoupper($s, 'UTF-8');
  259. }
  260. public static function wordwrap($s, $width = 75, $break = "\n", $cut = false)
  261. {
  262. if (false === wordwrap('-', $width, $break, $cut)) {
  263. return false;
  264. }
  265. is_string($break) or $break = (string) $break;
  266. $w = '';
  267. $s = explode($break, $s);
  268. $iLen = count($s);
  269. $chars = array();
  270. if (1 === $iLen && '' === $s[0]) {
  271. return '';
  272. }
  273. for ($i = 0; $i < $iLen; ++$i) {
  274. if ($i) {
  275. $chars[] = $break;
  276. $w .= '#';
  277. }
  278. $c = $s[$i];
  279. unset($s[$i]);
  280. foreach (self::str_split($c) as $c) {
  281. $chars[] = $c;
  282. $w .= ' ' === $c ? ' ' : '?';
  283. }
  284. }
  285. $s = '';
  286. $j = 0;
  287. $b = $i = -1;
  288. $w = wordwrap($w, $width, '#', $cut);
  289. while (false !== $b = strpos($w, '#', $b + 1)) {
  290. for (++$i; $i < $b; ++$i) {
  291. $s .= $chars[$j];
  292. unset($chars[$j++]);
  293. }
  294. if ($break === $chars[$j] || ' ' === $chars[$j]) {
  295. unset($chars[$j++]);
  296. }
  297. $s .= $break;
  298. }
  299. return $s.implode('', $chars);
  300. }
  301. public static function chr($c)
  302. {
  303. if (0x80 > $c %= 0x200000) {
  304. return chr($c);
  305. }
  306. if (0x800 > $c) {
  307. return chr(0xC0 | $c >> 6).chr(0x80 | $c & 0x3F);
  308. }
  309. if (0x10000 > $c) {
  310. return chr(0xE0 | $c >> 12).chr(0x80 | $c >> 6 & 0x3F).chr(0x80 | $c & 0x3F);
  311. }
  312. return chr(0xF0 | $c >> 18).chr(0x80 | $c >> 12 & 0x3F).chr(0x80 | $c >> 6 & 0x3F).chr(0x80 | $c & 0x3F);
  313. }
  314. public static function count_chars($s, $mode = 0)
  315. {
  316. if (1 != $mode) {
  317. user_error(__METHOD__.'(): the only allowed $mode is 1', E_USER_WARNING);
  318. }
  319. $s = self::str_split($s);
  320. return array_count_values($s);
  321. }
  322. public static function ltrim($s, $charlist = INF)
  323. {
  324. $charlist = INF === $charlist ? '\s' : self::rxClass($charlist);
  325. return preg_replace("/^{$charlist}+/u", '', $s);
  326. }
  327. public static function ord($s)
  328. {
  329. $a = ($s = unpack('C*', substr($s, 0, 4))) ? $s[1] : 0;
  330. if (0xF0 <= $a) {
  331. return (($a - 0xF0) << 18) + (($s[2] - 0x80) << 12) + (($s[3] - 0x80) << 6) + $s[4] - 0x80;
  332. }
  333. if (0xE0 <= $a) {
  334. return (($a - 0xE0) << 12) + (($s[2] - 0x80) << 6) + $s[3] - 0x80;
  335. }
  336. if (0xC0 <= $a) {
  337. return (($a - 0xC0) << 6) + $s[2] - 0x80;
  338. }
  339. return $a;
  340. }
  341. public static function rtrim($s, $charlist = INF)
  342. {
  343. $charlist = INF === $charlist ? '\s' : self::rxClass($charlist);
  344. return preg_replace("/{$charlist}+$/u", '', $s);
  345. }
  346. public static function trim($s, $charlist = INF)
  347. {
  348. return self::rtrim(self::ltrim($s, $charlist), $charlist);
  349. }
  350. public static function str_ireplace($search, $replace, $subject, &$count = null)
  351. {
  352. $search = (array) $search;
  353. foreach ($search as $i => $s) {
  354. if ('' === $s .= '') {
  355. $s = '/^(?<=.)$/';
  356. } else {
  357. $s = '/'.preg_quote($s, '/').'/ui';
  358. }
  359. $search[$i] = $s;
  360. }
  361. $subject = preg_replace($search, $replace, $subject, -1, $replace);
  362. $count = $replace;
  363. return $subject;
  364. }
  365. public static function str_pad($s, $len, $pad = ' ', $type = STR_PAD_RIGHT)
  366. {
  367. $slen = grapheme_strlen($s);
  368. if ($len <= $slen) {
  369. return $s;
  370. }
  371. $padlen = grapheme_strlen($pad);
  372. $freelen = $len - $slen;
  373. $len = $freelen % $padlen;
  374. if (STR_PAD_RIGHT == $type) {
  375. return $s.str_repeat($pad, $freelen / $padlen).($len ? grapheme_substr($pad, 0, $len) : '');
  376. }
  377. if (STR_PAD_LEFT == $type) {
  378. return str_repeat($pad, $freelen / $padlen).($len ? grapheme_substr($pad, 0, $len) : '').$s;
  379. }
  380. if (STR_PAD_BOTH == $type) {
  381. $freelen /= 2;
  382. $type = ceil($freelen);
  383. $len = $type % $padlen;
  384. $s .= str_repeat($pad, $type / $padlen).($len ? grapheme_substr($pad, 0, $len) : '');
  385. $type = floor($freelen);
  386. $len = $type % $padlen;
  387. return str_repeat($pad, $type / $padlen).($len ? grapheme_substr($pad, 0, $len) : '').$s;
  388. }
  389. user_error(__METHOD__.'(): Padding type has to be STR_PAD_LEFT, STR_PAD_RIGHT, or STR_PAD_BOTH', E_USER_WARNING);
  390. }
  391. public static function str_shuffle($s)
  392. {
  393. $s = self::str_split($s);
  394. shuffle($s);
  395. return implode('', $s);
  396. }
  397. public static function str_split($s, $len = 1)
  398. {
  399. if (1 > $len = (int) $len) {
  400. $len = func_get_arg(1);
  401. return str_split($s, $len);
  402. }
  403. static $hasIntl;
  404. isset($hasIntl) or $hasIntl = extension_loaded('intl');
  405. if ($hasIntl) {
  406. $a = array();
  407. $p = 0;
  408. $l = strlen($s);
  409. while ($p < $l) {
  410. $a[] = grapheme_extract($s, 1, GRAPHEME_EXTR_COUNT, $p, $p);
  411. }
  412. } else {
  413. preg_match_all('/'.GRAPHEME_CLUSTER_RX.'/u', $s, $a);
  414. $a = $a[0];
  415. }
  416. if (1 == $len) {
  417. return $a;
  418. }
  419. $s = array();
  420. $p = -1;
  421. foreach ($a as $l => $a) {
  422. if ($l % $len) {
  423. $s[$p] .= $a;
  424. } else {
  425. $s[++$p] = $a;
  426. }
  427. }
  428. return $s;
  429. }
  430. public static function str_word_count($s, $format = 0, $charlist = '')
  431. {
  432. $charlist = self::rxClass($charlist, '\pL');
  433. $s = preg_split("/({$charlist}+(?:[\p{Pd}’']{$charlist}+)*)/u", $s, -1, PREG_SPLIT_DELIM_CAPTURE);
  434. $charlist = array();
  435. $len = count($s);
  436. if (1 == $format) {
  437. for ($i = 1; $i < $len; $i += 2) {
  438. $charlist[] = $s[$i];
  439. }
  440. } elseif (2 == $format) {
  441. $offset = grapheme_strlen($s[0]);
  442. for ($i = 1; $i < $len; $i += 2) {
  443. $charlist[$offset] = $s[$i];
  444. $offset += grapheme_strlen($s[$i]) + grapheme_strlen($s[$i + 1]);
  445. }
  446. } else {
  447. $charlist = ($len - 1) / 2;
  448. }
  449. return $charlist;
  450. }
  451. public static function strcmp($a, $b)
  452. {
  453. return $a.'' === $b.'' ? 0 : strcmp(n::normalize($a, n::NFD), n::normalize($b, n::NFD));
  454. }
  455. public static function strnatcmp($a, $b)
  456. {
  457. return $a.'' === $b.'' ? 0 : strnatcmp(self::strtonatfold($a), self::strtonatfold($b));
  458. }
  459. public static function strcasecmp($a, $b)
  460. {
  461. return self::strcmp(static::strtocasefold($a), static::strtocasefold($b));
  462. }
  463. public static function strnatcasecmp($a, $b)
  464. {
  465. return self::strnatcmp(static::strtocasefold($a), static::strtocasefold($b));
  466. }
  467. public static function strncasecmp($a, $b, $len)
  468. {
  469. return self::strncmp(static::strtocasefold($a), static::strtocasefold($b), $len);
  470. }
  471. public static function strncmp($a, $b, $len)
  472. {
  473. return self::strcmp(self::substr($a, 0, $len), self::substr($b, 0, $len));
  474. }
  475. public static function strcspn($s, $charlist, $start = 0, $len = 2147483647)
  476. {
  477. if ('' === $charlist .= '') {
  478. return;
  479. }
  480. if ($start || 2147483647 != $len) {
  481. $s = self::substr($s, $start, $len);
  482. }
  483. return preg_match('/^(.*?)'.self::rxClass($charlist).'/us', $s, $len) ? grapheme_strlen($len[1]) : grapheme_strlen($s);
  484. }
  485. public static function strpbrk($s, $charlist)
  486. {
  487. if (preg_match('/'.self::rxClass($charlist).'/us', $s, $m)) {
  488. return substr($s, strpos($s, $m[0]));
  489. } else {
  490. return false;
  491. }
  492. }
  493. public static function strrev($s)
  494. {
  495. $s = self::str_split($s);
  496. return implode('', array_reverse($s));
  497. }
  498. public static function strspn($s, $mask, $start = 0, $len = 2147483647)
  499. {
  500. if ($start || 2147483647 != $len) {
  501. $s = self::substr($s, $start, $len);
  502. }
  503. return preg_match('/^'.self::rxClass($mask).'+/u', $s, $s) ? grapheme_strlen($s[0]) : 0;
  504. }
  505. public static function strtr($s, $from, $to = INF)
  506. {
  507. if (INF !== $to) {
  508. $from = self::str_split($from);
  509. $to = self::str_split($to);
  510. $a = count($from);
  511. $b = count($to);
  512. if ($a > $b) {
  513. $from = array_slice($from, 0, $b);
  514. } elseif ($a < $b) {
  515. $to = array_slice($to, 0, $a);
  516. }
  517. $from = array_combine($from, $to);
  518. }
  519. return strtr($s, $from);
  520. }
  521. public static function substr_compare($a, $b, $offset, $len = 2147483647, $i = 0)
  522. {
  523. $a = self::substr($a, $offset, $len);
  524. return $i ? static::strcasecmp($a, $b) : self::strcmp($a, $b);
  525. }
  526. public static function substr_count($s, $needle, $offset = 0, $len = 2147483647)
  527. {
  528. return substr_count(self::substr($s, $offset, $len), $needle);
  529. }
  530. public static function substr_replace($s, $replace, $start, $len = 2147483647)
  531. {
  532. $s = self::str_split($s);
  533. $replace = self::str_split($replace);
  534. array_splice($s, $start, $len, $replace);
  535. return implode('', $s);
  536. }
  537. public static function ucfirst($s)
  538. {
  539. $c = iconv_substr($s, 0, 1, 'UTF-8');
  540. return static::ucwords($c).substr($s, strlen($c));
  541. }
  542. public static function lcfirst($s)
  543. {
  544. $c = iconv_substr($s, 0, 1, 'UTF-8');
  545. return static::strtolower($c).substr($s, strlen($c));
  546. }
  547. public static function ucwords($s)
  548. {
  549. return preg_replace_callback(
  550. "/\b(.)/u",
  551. function ($matches) {
  552. return mb_convert_case($matches[1], MB_CASE_TITLE, 'UTF-8');
  553. },
  554. $s
  555. );
  556. }
  557. public static function number_format($number, $decimals = 0, $dec_point = '.', $thousands_sep = ',')
  558. {
  559. if (PHP_VERSION_ID < 50400) {
  560. if (isset($thousands_sep[1]) || isset($dec_point[1])) {
  561. return str_replace(
  562. array('.', ','),
  563. array($dec_point, $thousands_sep),
  564. number_format($number, $decimals, '.', ',')
  565. );
  566. }
  567. }
  568. return number_format($number, $decimals, $dec_point, $thousands_sep);
  569. }
  570. public static function utf8_encode($s)
  571. {
  572. $s = utf8_encode($s);
  573. if (false === strpos($s, "\xC2")) {
  574. return $s;
  575. } else {
  576. return str_replace(self::$cp1252, self::$utf8, $s);
  577. }
  578. }
  579. public static function utf8_decode($s)
  580. {
  581. $s = str_replace(self::$utf8, self::$cp1252, $s);
  582. return utf8_decode($s);
  583. }
  584. public static function strwidth($s)
  585. {
  586. if (false !== strpos($s, "\r")) {
  587. $s = str_replace("\r\n", "\n", $s);
  588. $s = strtr($s, "\r", "\n");
  589. }
  590. $width = 0;
  591. foreach (explode("\n", $s) as $s) {
  592. $s = preg_replace('/\x1B\[[\d;]*m/', '', $s);
  593. $c = substr_count($s, "\xAD") - substr_count($s, "\x08");
  594. $s = preg_replace('/[\x00\x05\x07\p{Mn}\p{Me}\p{Cf}\x{1160}-\x{11FF}\x{200B}]+/u', '', $s);
  595. preg_replace('/[\x{1100}-\x{115F}\x{2329}\x{232A}\x{2E80}-\x{303E}\x{3040}-\x{A4CF}\x{AC00}-\x{D7A3}\x{F900}-\x{FAFF}\x{FE10}-\x{FE19}\x{FE30}-\x{FE6F}\x{FF00}-\x{FF60}\x{FFE0}-\x{FFE6}\x{20000}-\x{2FFFD}\x{30000}-\x{3FFFD}]/u', '', $s, -1, $wide);
  596. if ($width < $c = iconv_strlen($s, 'UTF-8') + $wide + $c) {
  597. $width = $c;
  598. }
  599. }
  600. return $width;
  601. }
  602. protected static function rxClass($s, $class = '')
  603. {
  604. $class = array($class);
  605. foreach (self::str_split($s) as $s) {
  606. if ('-' === $s) {
  607. $class[0] = '-'.$class[0];
  608. } elseif (!isset($s[2])) {
  609. $class[0] .= preg_quote($s, '/');
  610. } elseif (1 === iconv_strlen($s, 'UTF-8')) {
  611. $class[0] .= $s;
  612. } else {
  613. $class[] = $s;
  614. }
  615. }
  616. $class[0] = '['.$class[0].']';
  617. if (1 === count($class)) {
  618. return $class[0];
  619. } else {
  620. return '(?:'.implode('|', $class).')';
  621. }
  622. }
  623. protected static function getData($file)
  624. {
  625. $file = __DIR__.'/Utf8/data/'.$file.'.ser';
  626. if (file_exists($file)) {
  627. return unserialize(file_get_contents($file));
  628. } else {
  629. return false;
  630. }
  631. }
  632. }