PageRenderTime 50ms CodeModel.GetById 21ms RepoModel.GetById 0ms app.codeStats 0ms

/Scorpio/Sco/Text/Helper.php

https://github.com/bluelovers/Scophp
PHP | 560 lines | 456 code | 26 blank | 78 comment | 26 complexity | 39e9ccab52c583e522d27f3608a1db8d MD5 | raw file
  1. <?php
  2. /**
  3. * @author bluelovers
  4. * @copyright 2012
  5. */
  6. class Sco_Text_Helper
  7. {
  8. public static $map_h2f;
  9. /**
  10. * @see http://www.php.net/manual/en/function.preg-replace.php#87816
  11. */
  12. public static function lf($str, $eol = NL, $search = CR)
  13. {
  14. /*
  15. http://www.php.net/manual/en/function.preg-replace.php#87816
  16. $sql = preg_replace("/(?<!\\n)\\r+(?!\\n)/", "\r\n", $sql);
  17. $sql = preg_replace("/(?<!\\r)\\n+(?!\\r)/", "\r\n", $sql);
  18. $sql = preg_replace("/(?<!\\r)\\n\\r+(?!\\n)/", "\r\n", $sql);
  19. */
  20. ($search === null || $search === false) && $search = CR;
  21. if (strpos($str, $search) !== false)
  22. {
  23. $str = preg_replace("/(?<!\\n)\\r+(?!\\n)/", CRLF, $str);
  24. $str = preg_replace("/(?<!\\r)\\n+(?!\\r)/", CRLF, $str);
  25. $str = preg_replace("/(?<!\\r)\\n\\r+(?!\\n)/", CRLF, $str);
  26. ($eol === null || $eol === false) && $eol = NL;
  27. ($eol != CRLF) && $str = str_replace(CRLF, $eol, $str);
  28. }
  29. return $str;
  30. }
  31. public static function str2hex($string)
  32. {
  33. $hex = '';
  34. for ($i = 0; $i < strlen($string); $i++)
  35. {
  36. $hex .= dechex(ord($string[$i]));
  37. }
  38. return $hex;
  39. }
  40. public static function hex2str($hex)
  41. {
  42. $string = '';
  43. for ($i = 0; $i < strlen($hex) - 1; $i += 2)
  44. {
  45. $string .= chr(hexdec($hex[$i] . $hex[$i + 1]));
  46. }
  47. return $string;
  48. }
  49. function str_f2h($str, $case = 0)
  50. {
  51. if (!isset($map_h2f))
  52. {
  53. $map_h2f = array(
  54. ' ' => ' ',
  55. '0' => '0',
  56. '1' => '1',
  57. '2' => '2',
  58. '3' => '3',
  59. '4' => '4',
  60. '5' => '5',
  61. '6' => '6',
  62. '7' => '7',
  63. '8' => '8',
  64. '9' => '9',
  65. 'A' => 'A',
  66. 'B' => 'B',
  67. 'C' => 'C',
  68. 'D' => 'D',
  69. 'E' => 'E',
  70. 'F' => 'F',
  71. 'G' => 'G',
  72. 'H' => 'H',
  73. 'I' => 'I',
  74. 'J' => 'J',
  75. 'K' => 'K',
  76. 'L' => 'L',
  77. 'M' => 'M',
  78. 'N' => 'N',
  79. 'O' => 'O',
  80. 'P' => 'P',
  81. 'Q' => 'Q',
  82. 'R' => 'R',
  83. 'S' => 'S',
  84. 'T' => 'T',
  85. 'U' => 'U',
  86. 'V' => 'V',
  87. 'W' => 'W',
  88. 'X' => 'X',
  89. 'Y' => 'Y',
  90. 'Z' => 'Z',
  91. 'a' => 'a',
  92. 'b' => 'b',
  93. 'c' => 'c',
  94. 'd' => 'd',
  95. 'e' => 'e',
  96. 'f' => 'f',
  97. 'g' => 'g',
  98. 'h' => 'h',
  99. 'i' => 'i',
  100. 'j' => 'j',
  101. 'k' => 'k',
  102. 'l' => 'l',
  103. 'm' => 'm',
  104. 'n' => 'n',
  105. 'o' => 'o',
  106. 'p' => 'p',
  107. 'q' => 'q',
  108. 'r' => 'r',
  109. 's' => 's',
  110. 't' => 't',
  111. 'u' => 'u',
  112. 'v' => 'v',
  113. 'w' => 'w',
  114. 'x' => 'x',
  115. 'y' => 'y',
  116. 'z' => 'z',
  117. '~' => '~',
  118. '!' => '!',
  119. '@' => '@',
  120. '#' => '#',
  121. '$' => '$',
  122. '%' => '%',
  123. '^' => '︿',
  124. '&' => '&',
  125. '*' => '*',
  126. '(' => '(',
  127. ')' => ')',
  128. '_' => '_',
  129. '+' => '+',
  130. '|' => '|',
  131. '`' => '‘',
  132. '-' => '-',
  133. '=' => '=',
  134. '\\' => '\',
  135. '{' => '{',
  136. '}' => '}',
  137. '[' => '〔',
  138. ']' => '〕',
  139. ':' => ':',
  140. '"' => '”',
  141. ';' => ';',
  142. '\'' => '’',
  143. '<' => '<',
  144. '>' => '>',
  145. '?' => '?',
  146. ',' => ',',
  147. '.' => '.',
  148. '/' => '/',
  149. );
  150. }
  151. return $case ? strtr((string )$str, $map_h2f) : strtr((string )$str, array_flip($map_h2f));
  152. }
  153. public static function chunk_split_unicode($str, $l = 76, $e = NL)
  154. {
  155. $tmp = array_chunk(preg_split('//u', $str, -1, PREG_SPLIT_NO_EMPTY), $l);
  156. $str = '';
  157. foreach ($tmp as $t)
  158. {
  159. $str .= join('', $t) . $e;
  160. }
  161. return $str;
  162. }
  163. /**
  164. * Tests whether a string contains only 7bit ASCII bytes. This is used to
  165. * determine when to use native functions or UTF-8 functions.
  166. *
  167. * @see http://sourceforge.net/projects/phputf8/
  168. * @copyright (c) 2007-2009 Kohana Team
  169. * @copyright (c) 2005 Harry Fuecks
  170. * @license http://www.gnu.org/licenses/old-licenses/lgpl-2.1.txt
  171. *
  172. * @param string string to check
  173. * @return bool
  174. */
  175. public static function is_ascii($str)
  176. {
  177. return is_string($str) and !preg_match('/[^\x00-\x7F]/S', $str);
  178. }
  179. /**
  180. * Strips out device control codes in the ASCII range.
  181. *
  182. * @see http://sourceforge.net/projects/phputf8/
  183. * @copyright (c) 2007-2009 Kohana Team
  184. * @copyright (c) 2005 Harry Fuecks
  185. * @license http://www.gnu.org/licenses/old-licenses/lgpl-2.1.txt
  186. *
  187. * @param string string to clean
  188. * @return string
  189. */
  190. public static function strip_ascii_ctrl($str)
  191. {
  192. return preg_replace('/[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]+/S', '', $str);
  193. }
  194. /**
  195. * Strips out all non-7bit ASCII bytes.
  196. *
  197. * @see http://sourceforge.net/projects/phputf8/
  198. * @copyright (c) 2007-2009 Kohana Team
  199. * @copyright (c) 2005 Harry Fuecks
  200. * @license http://www.gnu.org/licenses/old-licenses/lgpl-2.1.txt
  201. *
  202. * @param string string to clean
  203. * @return string
  204. */
  205. public static function strip_non_ascii($str)
  206. {
  207. return preg_replace('/[^\x00-\x7F]+/S', '', $str);
  208. }
  209. /**
  210. * Replaces special/accented UTF-8 characters by ASCII-7 'equivalents'.
  211. *
  212. * @author Andreas Gohr <andi@splitbrain.org>
  213. * @see http://sourceforge.net/projects/phputf8/
  214. * @copyright (c) 2007-2009 Kohana Team
  215. * @copyright (c) 2005 Harry Fuecks
  216. * @license http://www.gnu.org/licenses/old-licenses/lgpl-2.1.txt
  217. *
  218. * @param string string to transliterate
  219. * @param integer -1 lowercase only, +1 uppercase only, 0 both cases
  220. * @return string
  221. */
  222. public static function transliterate_to_ascii($str, $case = 0)
  223. {
  224. static $UTF8_LOWER_ACCENTS = null;
  225. static $UTF8_UPPER_ACCENTS = null;
  226. if ($case <= 0)
  227. {
  228. if ($UTF8_LOWER_ACCENTS === null)
  229. {
  230. $UTF8_LOWER_ACCENTS = array(
  231. 'à' => 'a',
  232. 'ô' => 'o',
  233. 'ď' => 'd',
  234. 'ḟ' => 'f',
  235. 'ë' => 'e',
  236. 'š' => 's',
  237. 'ơ' => 'o',
  238. 'ß' => 'ss',
  239. 'ă' => 'a',
  240. 'ř' => 'r',
  241. 'ț' => 't',
  242. 'ň' => 'n',
  243. 'ā' => 'a',
  244. 'ķ' => 'k',
  245. 'ŝ' => 's',
  246. 'ỳ' => 'y',
  247. 'ņ' => 'n',
  248. 'ĺ' => 'l',
  249. 'ħ' => 'h',
  250. 'ṗ' => 'p',
  251. 'ó' => 'o',
  252. 'ú' => 'u',
  253. 'ě' => 'e',
  254. 'é' => 'e',
  255. 'ç' => 'c',
  256. 'ẁ' => 'w',
  257. 'ċ' => 'c',
  258. 'õ' => 'o',
  259. 'ṡ' => 's',
  260. 'ø' => 'o',
  261. 'ģ' => 'g',
  262. 'ŧ' => 't',
  263. 'ș' => 's',
  264. 'ė' => 'e',
  265. 'ĉ' => 'c',
  266. 'ś' => 's',
  267. 'î' => 'i',
  268. 'ű' => 'u',
  269. 'ć' => 'c',
  270. 'ę' => 'e',
  271. 'ŵ' => 'w',
  272. 'ṫ' => 't',
  273. 'ū' => 'u',
  274. 'č' => 'c',
  275. 'ö' => 'o',
  276. 'è' => 'e',
  277. 'ŷ' => 'y',
  278. 'ą' => 'a',
  279. 'ł' => 'l',
  280. 'ų' => 'u',
  281. 'ů' => 'u',
  282. 'ş' => 's',
  283. 'ğ' => 'g',
  284. 'ļ' => 'l',
  285. 'ƒ' => 'f',
  286. 'ž' => 'z',
  287. 'ẃ' => 'w',
  288. 'ḃ' => 'b',
  289. 'å' => 'a',
  290. 'ì' => 'i',
  291. 'ï' => 'i',
  292. 'ḋ' => 'd',
  293. 'ť' => 't',
  294. 'ŗ' => 'r',
  295. 'ä' => 'a',
  296. 'í' => 'i',
  297. 'ŕ' => 'r',
  298. 'ê' => 'e',
  299. 'ü' => 'u',
  300. 'ò' => 'o',
  301. 'ē' => 'e',
  302. 'ñ' => 'n',
  303. 'ń' => 'n',
  304. 'ĥ' => 'h',
  305. 'ĝ' => 'g',
  306. 'đ' => 'd',
  307. 'ĵ' => 'j',
  308. 'ÿ' => 'y',
  309. 'ũ' => 'u',
  310. 'ŭ' => 'u',
  311. 'ư' => 'u',
  312. 'ţ' => 't',
  313. 'ý' => 'y',
  314. 'ő' => 'o',
  315. 'â' => 'a',
  316. 'ľ' => 'l',
  317. 'ẅ' => 'w',
  318. 'ż' => 'z',
  319. 'ī' => 'i',
  320. 'ã' => 'a',
  321. 'ġ' => 'g',
  322. 'ṁ' => 'm',
  323. 'ō' => 'o',
  324. 'ĩ' => 'i',
  325. 'ù' => 'u',
  326. 'į' => 'i',
  327. 'ź' => 'z',
  328. 'á' => 'a',
  329. 'û' => 'u',
  330. 'þ' => 'th',
  331. 'ð' => 'dh',
  332. 'æ' => 'ae',
  333. 'µ' => 'u',
  334. 'ĕ' => 'e',
  335. 'ı' => 'i',
  336. );
  337. }
  338. //$str = str_replace(array_keys($UTF8_LOWER_ACCENTS), array_values($UTF8_LOWER_ACCENTS), $str);
  339. $str = strtr((string )$str, $UTF8_LOWER_ACCENTS);
  340. }
  341. if ($case >= 0)
  342. {
  343. if ($UTF8_UPPER_ACCENTS === null)
  344. {
  345. $UTF8_UPPER_ACCENTS = array(
  346. 'À' => 'A',
  347. 'Ô' => 'O',
  348. 'Ď' => 'D',
  349. 'Ḟ' => 'F',
  350. 'Ë' => 'E',
  351. 'Š' => 'S',
  352. 'Ơ' => 'O',
  353. 'Ă' => 'A',
  354. 'Ř' => 'R',
  355. 'Ț' => 'T',
  356. 'Ň' => 'N',
  357. 'Ā' => 'A',
  358. 'Ķ' => 'K',
  359. 'Ĕ' => 'E',
  360. 'Ŝ' => 'S',
  361. 'Ỳ' => 'Y',
  362. 'Ņ' => 'N',
  363. 'Ĺ' => 'L',
  364. 'Ħ' => 'H',
  365. 'Ṗ' => 'P',
  366. 'Ó' => 'O',
  367. 'Ú' => 'U',
  368. 'Ě' => 'E',
  369. 'É' => 'E',
  370. 'Ç' => 'C',
  371. 'Ẁ' => 'W',
  372. 'Ċ' => 'C',
  373. 'Õ' => 'O',
  374. 'Ṡ' => 'S',
  375. 'Ø' => 'O',
  376. 'Ģ' => 'G',
  377. 'Ŧ' => 'T',
  378. 'Ș' => 'S',
  379. 'Ė' => 'E',
  380. 'Ĉ' => 'C',
  381. 'Ś' => 'S',
  382. 'Î' => 'I',
  383. 'Ű' => 'U',
  384. 'Ć' => 'C',
  385. 'Ę' => 'E',
  386. 'Ŵ' => 'W',
  387. 'Ṫ' => 'T',
  388. 'Ū' => 'U',
  389. 'Č' => 'C',
  390. 'Ö' => 'O',
  391. 'È' => 'E',
  392. 'Ŷ' => 'Y',
  393. 'Ą' => 'A',
  394. 'Ł' => 'L',
  395. 'Ų' => 'U',
  396. 'Ů' => 'U',
  397. 'Ş' => 'S',
  398. 'Ğ' => 'G',
  399. 'Ļ' => 'L',
  400. 'Ƒ' => 'F',
  401. 'Ž' => 'Z',
  402. 'Ẃ' => 'W',
  403. 'Ḃ' => 'B',
  404. 'Å' => 'A',
  405. 'Ì' => 'I',
  406. 'Ï' => 'I',
  407. 'Ḋ' => 'D',
  408. 'Ť' => 'T',
  409. 'Ŗ' => 'R',
  410. 'Ä' => 'A',
  411. 'Í' => 'I',
  412. 'Ŕ' => 'R',
  413. 'Ê' => 'E',
  414. 'Ü' => 'U',
  415. 'Ò' => 'O',
  416. 'Ē' => 'E',
  417. 'Ñ' => 'N',
  418. 'Ń' => 'N',
  419. 'Ĥ' => 'H',
  420. 'Ĝ' => 'G',
  421. 'Đ' => 'D',
  422. 'Ĵ' => 'J',
  423. 'Ÿ' => 'Y',
  424. 'Ũ' => 'U',
  425. 'Ŭ' => 'U',
  426. 'Ư' => 'U',
  427. 'Ţ' => 'T',
  428. 'Ý' => 'Y',
  429. 'Ő' => 'O',
  430. 'Â' => 'A',
  431. 'Ľ' => 'L',
  432. 'Ẅ' => 'W',
  433. 'Ż' => 'Z',
  434. 'Ī' => 'I',
  435. 'Ã' => 'A',
  436. 'Ġ' => 'G',
  437. 'Ṁ' => 'M',
  438. 'Ō' => 'O',
  439. 'Ĩ' => 'I',
  440. 'Ù' => 'U',
  441. 'Į' => 'I',
  442. 'Ź' => 'Z',
  443. 'Á' => 'A',
  444. 'Û' => 'U',
  445. 'Þ' => 'Th',
  446. 'Ð' => 'Dh',
  447. 'Æ' => 'Ae',
  448. 'İ' => 'I',
  449. );
  450. }
  451. //$str = str_replace(array_keys($UTF8_UPPER_ACCENTS), array_values($UTF8_UPPER_ACCENTS), $str);
  452. $str = strtr((string )$str, $UTF8_UPPER_ACCENTS);
  453. }
  454. return $str;
  455. }
  456. /**
  457. * @see http://tw2.php.net/manual/en/function.urldecode.php#62707
  458. */
  459. function code2utf($num)
  460. {
  461. if ($num < 128) return chr($num);
  462. if ($num < 1024) return chr(($num >> 6) + 192) . chr(($num & 63) + 128);
  463. if ($num < 32768) return chr(($num >> 12) + 224) . chr((($num >> 6) & 63) + 128) . chr(($num & 63) + 128);
  464. if ($num < 2097152) return chr(($num >> 18) + 240) . chr((($num >> 12) & 63) + 128) . chr((($num >> 6) & 63) + 128) . chr(($num & 63) + 128);
  465. return '';
  466. }
  467. /**
  468. * @see http://tw2.php.net/manual/en/function.urldecode.php#62707
  469. */
  470. function unescape($strIn, $iconv_to = 'UTF-8')
  471. {
  472. $strOut = '';
  473. $iPos = 0;
  474. $len = strlen($strIn);
  475. while ($iPos < $len)
  476. {
  477. $charAt = substr($strIn, $iPos, 1);
  478. if ($charAt == '%')
  479. {
  480. $iPos++;
  481. $charAt = substr($strIn, $iPos, 1);
  482. if ($charAt == 'u')
  483. {
  484. // Unicode character
  485. $iPos++;
  486. $unicodeHexVal = substr($strIn, $iPos, 4);
  487. $unicode = hexdec($unicodeHexVal);
  488. $strOut .= self::code2utf($unicode);
  489. $iPos += 4;
  490. }
  491. else
  492. {
  493. // Escaped ascii character
  494. $hexVal = substr($strIn, $iPos, 2);
  495. if (hexdec($hexVal) > 127)
  496. {
  497. // Convert to Unicode
  498. $strOut .= self::code2utf(hexdec($hexVal));
  499. }
  500. else
  501. {
  502. $strOut .= chr(hexdec($hexVal));
  503. }
  504. $iPos += 2;
  505. }
  506. }
  507. else
  508. {
  509. $strOut .= $charAt;
  510. $iPos++;
  511. }
  512. }
  513. if ($iconv_to != "UTF-8")
  514. {
  515. $strOut = iconv("UTF-8", $iconv_to, $strOut);
  516. }
  517. return $strOut;
  518. }
  519. /**
  520. * @see http://tw2.php.net/manual/en/function.urldecode.php#29272
  521. * For compatibility of new and old brousers:
  522. * %xx -> char
  523. * %u0xxxx -> char
  524. */
  525. function unicode_decode($txt)
  526. {
  527. $txt = ereg_replace('%u0([[:alnum:]]{3})', '&#x\1;', $txt);
  528. $txt = ereg_replace('%([[:alnum:]]{2})', '&#x\1;', $txt);
  529. return ($txt);
  530. }
  531. }