PageRenderTime 50ms CodeModel.GetById 18ms RepoModel.GetById 0ms app.codeStats 0ms

/xandra.org/www/system/classes/Kohana/UTF8.php

https://bitbucket.org/ekkl/tanora
PHP | 765 lines | 280 code | 92 blank | 393 comment | 35 complexity | 66c4b02b1f12d8022790ab4d56c09b29 MD5 | raw file
Possible License(s): GPL-2.0, BSD-3-Clause
  1. <?php defined('SYSPATH') OR die('No direct script access.');
  2. /**
  3. * A port of [phputf8](http://phputf8.sourceforge.net/) to a unified set
  4. * of files. Provides multi-byte aware replacement string functions.
  5. *
  6. * For UTF-8 support to work correctly, the following requirements must be met:
  7. *
  8. * - PCRE needs to be compiled with UTF-8 support (--enable-utf8)
  9. * - Support for [Unicode properties](http://php.net/manual/reference.pcre.pattern.modifiers.php)
  10. * is highly recommended (--enable-unicode-properties)
  11. * - The [mbstring extension](http://php.net/mbstring) is highly recommended,
  12. * but must not be overloading string functions
  13. *
  14. * [!!] This file is licensed differently from the rest of Kohana. As a port of
  15. * [phputf8](http://phputf8.sourceforge.net/), this file is released under the LGPL.
  16. *
  17. * @package Kohana
  18. * @category Base
  19. * @author Kohana Team
  20. * @copyright (c) 2007-2012 Kohana Team
  21. * @copyright (c) 2005 Harry Fuecks
  22. * @license http://www.gnu.org/licenses/old-licenses/lgpl-2.1.txt
  23. */
  24. class Kohana_UTF8 {
  25. /**
  26. * @var boolean Does the server support UTF-8 natively?
  27. */
  28. public static $server_utf8 = NULL;
  29. /**
  30. * @var array List of called methods that have had their required file included.
  31. */
  32. public static $called = array();
  33. /**
  34. * Recursively cleans arrays, objects, and strings. Removes ASCII control
  35. * codes and converts to the requested charset while silently discarding
  36. * incompatible characters.
  37. *
  38. * UTF8::clean($_GET); // Clean GET data
  39. *
  40. * @param mixed $var variable to clean
  41. * @param string $charset character set, defaults to Kohana::$charset
  42. * @return mixed
  43. * @uses UTF8::clean
  44. * @uses UTF8::strip_ascii_ctrl
  45. * @uses UTF8::is_ascii
  46. */
  47. public static function clean($var, $charset = NULL)
  48. {
  49. if ( ! $charset)
  50. {
  51. // Use the application character set
  52. $charset = Kohana::$charset;
  53. }
  54. if (is_array($var) OR is_object($var))
  55. {
  56. foreach ($var as $key => $val)
  57. {
  58. // Recursion!
  59. $var[UTF8::clean($key)] = UTF8::clean($val);
  60. }
  61. }
  62. elseif (is_string($var) AND $var !== '')
  63. {
  64. // Remove control characters
  65. $var = UTF8::strip_ascii_ctrl($var);
  66. if ( ! UTF8::is_ascii($var))
  67. {
  68. // Disable notices
  69. $error_reporting = error_reporting(~E_NOTICE);
  70. $var = mb_convert_encoding($var, $charset, $charset);
  71. // Turn notices back on
  72. error_reporting($error_reporting);
  73. }
  74. }
  75. return $var;
  76. }
  77. /**
  78. * Tests whether a string contains only 7-bit ASCII bytes. This is used to
  79. * determine when to use native functions or UTF-8 functions.
  80. *
  81. * $ascii = UTF8::is_ascii($str);
  82. *
  83. * @param mixed $str string or array of strings to check
  84. * @return boolean
  85. */
  86. public static function is_ascii($str)
  87. {
  88. if (is_array($str))
  89. {
  90. $str = implode($str);
  91. }
  92. return ! preg_match('/[^\x00-\x7F]/S', $str);
  93. }
  94. /**
  95. * Strips out device control codes in the ASCII range.
  96. *
  97. * $str = UTF8::strip_ascii_ctrl($str);
  98. *
  99. * @param string $str string to clean
  100. * @return string
  101. */
  102. public static function strip_ascii_ctrl($str)
  103. {
  104. return preg_replace('/[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]+/S', '', $str);
  105. }
  106. /**
  107. * Strips out all non-7bit ASCII bytes.
  108. *
  109. * $str = UTF8::strip_non_ascii($str);
  110. *
  111. * @param string $str string to clean
  112. * @return string
  113. */
  114. public static function strip_non_ascii($str)
  115. {
  116. return preg_replace('/[^\x00-\x7F]+/S', '', $str);
  117. }
  118. /**
  119. * Replaces special/accented UTF-8 characters by ASCII-7 "equivalents".
  120. *
  121. * $ascii = UTF8::transliterate_to_ascii($utf8);
  122. *
  123. * @author Andreas Gohr <andi@splitbrain.org>
  124. * @param string $str string to transliterate
  125. * @param integer $case -1 lowercase only, +1 uppercase only, 0 both cases
  126. * @return string
  127. */
  128. public static function transliterate_to_ascii($str, $case = 0)
  129. {
  130. if ( ! isset(UTF8::$called[__FUNCTION__]))
  131. {
  132. require Kohana::find_file('utf8', __FUNCTION__);
  133. // Function has been called
  134. UTF8::$called[__FUNCTION__] = TRUE;
  135. }
  136. return _transliterate_to_ascii($str, $case);
  137. }
  138. /**
  139. * Returns the length of the given string. This is a UTF8-aware version
  140. * of [strlen](http://php.net/strlen).
  141. *
  142. * $length = UTF8::strlen($str);
  143. *
  144. * @param string $str string being measured for length
  145. * @return integer
  146. * @uses UTF8::$server_utf8
  147. * @uses Kohana::$charset
  148. */
  149. public static function strlen($str)
  150. {
  151. if (UTF8::$server_utf8)
  152. return mb_strlen($str, Kohana::$charset);
  153. if ( ! isset(UTF8::$called[__FUNCTION__]))
  154. {
  155. require Kohana::find_file('utf8', __FUNCTION__);
  156. // Function has been called
  157. UTF8::$called[__FUNCTION__] = TRUE;
  158. }
  159. return _strlen($str);
  160. }
  161. /**
  162. * Finds position of first occurrence of a UTF-8 string. This is a
  163. * UTF8-aware version of [strpos](http://php.net/strpos).
  164. *
  165. * $position = UTF8::strpos($str, $search);
  166. *
  167. * @author Harry Fuecks <hfuecks@gmail.com>
  168. * @param string $str haystack
  169. * @param string $search needle
  170. * @param integer $offset offset from which character in haystack to start searching
  171. * @return integer position of needle
  172. * @return boolean FALSE if the needle is not found
  173. * @uses UTF8::$server_utf8
  174. * @uses Kohana::$charset
  175. */
  176. public static function strpos($str, $search, $offset = 0)
  177. {
  178. if (UTF8::$server_utf8)
  179. return mb_strpos($str, $search, $offset, Kohana::$charset);
  180. if ( ! isset(UTF8::$called[__FUNCTION__]))
  181. {
  182. require Kohana::find_file('utf8', __FUNCTION__);
  183. // Function has been called
  184. UTF8::$called[__FUNCTION__] = TRUE;
  185. }
  186. return _strpos($str, $search, $offset);
  187. }
  188. /**
  189. * Finds position of last occurrence of a char in a UTF-8 string. This is
  190. * a UTF8-aware version of [strrpos](http://php.net/strrpos).
  191. *
  192. * $position = UTF8::strrpos($str, $search);
  193. *
  194. * @author Harry Fuecks <hfuecks@gmail.com>
  195. * @param string $str haystack
  196. * @param string $search needle
  197. * @param integer $offset offset from which character in haystack to start searching
  198. * @return integer position of needle
  199. * @return boolean FALSE if the needle is not found
  200. * @uses UTF8::$server_utf8
  201. */
  202. public static function strrpos($str, $search, $offset = 0)
  203. {
  204. if (UTF8::$server_utf8)
  205. return mb_strrpos($str, $search, $offset, Kohana::$charset);
  206. if ( ! isset(UTF8::$called[__FUNCTION__]))
  207. {
  208. require Kohana::find_file('utf8', __FUNCTION__);
  209. // Function has been called
  210. UTF8::$called[__FUNCTION__] = TRUE;
  211. }
  212. return _strrpos($str, $search, $offset);
  213. }
  214. /**
  215. * Returns part of a UTF-8 string. This is a UTF8-aware version
  216. * of [substr](http://php.net/substr).
  217. *
  218. * $sub = UTF8::substr($str, $offset);
  219. *
  220. * @author Chris Smith <chris@jalakai.co.uk>
  221. * @param string $str input string
  222. * @param integer $offset offset
  223. * @param integer $length length limit
  224. * @return string
  225. * @uses UTF8::$server_utf8
  226. * @uses Kohana::$charset
  227. */
  228. public static function substr($str, $offset, $length = NULL)
  229. {
  230. if (UTF8::$server_utf8)
  231. return ($length === NULL)
  232. ? mb_substr($str, $offset, mb_strlen($str), Kohana::$charset)
  233. : mb_substr($str, $offset, $length, Kohana::$charset);
  234. if ( ! isset(UTF8::$called[__FUNCTION__]))
  235. {
  236. require Kohana::find_file('utf8', __FUNCTION__);
  237. // Function has been called
  238. UTF8::$called[__FUNCTION__] = TRUE;
  239. }
  240. return _substr($str, $offset, $length);
  241. }
  242. /**
  243. * Replaces text within a portion of a UTF-8 string. This is a UTF8-aware
  244. * version of [substr_replace](http://php.net/substr_replace).
  245. *
  246. * $str = UTF8::substr_replace($str, $replacement, $offset);
  247. *
  248. * @author Harry Fuecks <hfuecks@gmail.com>
  249. * @param string $str input string
  250. * @param string $replacement replacement string
  251. * @param integer $offset offset
  252. * @return string
  253. */
  254. public static function substr_replace($str, $replacement, $offset, $length = NULL)
  255. {
  256. if ( ! isset(UTF8::$called[__FUNCTION__]))
  257. {
  258. require Kohana::find_file('utf8', __FUNCTION__);
  259. // Function has been called
  260. UTF8::$called[__FUNCTION__] = TRUE;
  261. }
  262. return _substr_replace($str, $replacement, $offset, $length);
  263. }
  264. /**
  265. * Makes a UTF-8 string lowercase. This is a UTF8-aware version
  266. * of [strtolower](http://php.net/strtolower).
  267. *
  268. * $str = UTF8::strtolower($str);
  269. *
  270. * @author Andreas Gohr <andi@splitbrain.org>
  271. * @param string $str mixed case string
  272. * @return string
  273. * @uses UTF8::$server_utf8
  274. * @uses Kohana::$charset
  275. */
  276. public static function strtolower($str)
  277. {
  278. if (UTF8::$server_utf8)
  279. return mb_strtolower($str, Kohana::$charset);
  280. if ( ! isset(UTF8::$called[__FUNCTION__]))
  281. {
  282. require Kohana::find_file('utf8', __FUNCTION__);
  283. // Function has been called
  284. UTF8::$called[__FUNCTION__] = TRUE;
  285. }
  286. return _strtolower($str);
  287. }
  288. /**
  289. * Makes a UTF-8 string uppercase. This is a UTF8-aware version
  290. * of [strtoupper](http://php.net/strtoupper).
  291. *
  292. * @author Andreas Gohr <andi@splitbrain.org>
  293. * @param string $str mixed case string
  294. * @return string
  295. * @uses UTF8::$server_utf8
  296. * @uses Kohana::$charset
  297. */
  298. public static function strtoupper($str)
  299. {
  300. if (UTF8::$server_utf8)
  301. return mb_strtoupper($str, Kohana::$charset);
  302. if ( ! isset(UTF8::$called[__FUNCTION__]))
  303. {
  304. require Kohana::find_file('utf8', __FUNCTION__);
  305. // Function has been called
  306. UTF8::$called[__FUNCTION__] = TRUE;
  307. }
  308. return _strtoupper($str);
  309. }
  310. /**
  311. * Makes a UTF-8 string's first character uppercase. This is a UTF8-aware
  312. * version of [ucfirst](http://php.net/ucfirst).
  313. *
  314. * $str = UTF8::ucfirst($str);
  315. *
  316. * @author Harry Fuecks <hfuecks@gmail.com>
  317. * @param string $str mixed case string
  318. * @return string
  319. */
  320. public static function ucfirst($str)
  321. {
  322. if ( ! isset(UTF8::$called[__FUNCTION__]))
  323. {
  324. require Kohana::find_file('utf8', __FUNCTION__);
  325. // Function has been called
  326. UTF8::$called[__FUNCTION__] = TRUE;
  327. }
  328. return _ucfirst($str);
  329. }
  330. /**
  331. * Makes the first character of every word in a UTF-8 string uppercase.
  332. * This is a UTF8-aware version of [ucwords](http://php.net/ucwords).
  333. *
  334. * $str = UTF8::ucwords($str);
  335. *
  336. * @author Harry Fuecks <hfuecks@gmail.com>
  337. * @param string $str mixed case string
  338. * @return string
  339. */
  340. public static function ucwords($str)
  341. {
  342. if ( ! isset(UTF8::$called[__FUNCTION__]))
  343. {
  344. require Kohana::find_file('utf8', __FUNCTION__);
  345. // Function has been called
  346. UTF8::$called[__FUNCTION__] = TRUE;
  347. }
  348. return _ucwords($str);
  349. }
  350. /**
  351. * Case-insensitive UTF-8 string comparison. This is a UTF8-aware version
  352. * of [strcasecmp](http://php.net/strcasecmp).
  353. *
  354. * $compare = UTF8::strcasecmp($str1, $str2);
  355. *
  356. * @author Harry Fuecks <hfuecks@gmail.com>
  357. * @param string $str1 string to compare
  358. * @param string $str2 string to compare
  359. * @return integer less than 0 if str1 is less than str2
  360. * @return integer greater than 0 if str1 is greater than str2
  361. * @return integer 0 if they are equal
  362. */
  363. public static function strcasecmp($str1, $str2)
  364. {
  365. if ( ! isset(UTF8::$called[__FUNCTION__]))
  366. {
  367. require Kohana::find_file('utf8', __FUNCTION__);
  368. // Function has been called
  369. UTF8::$called[__FUNCTION__] = TRUE;
  370. }
  371. return _strcasecmp($str1, $str2);
  372. }
  373. /**
  374. * Returns a string or an array with all occurrences of search in subject
  375. * (ignoring case) and replaced with the given replace value. This is a
  376. * UTF8-aware version of [str_ireplace](http://php.net/str_ireplace).
  377. *
  378. * [!!] This function is very slow compared to the native version. Avoid
  379. * using it when possible.
  380. *
  381. * @author Harry Fuecks <hfuecks@gmail.com
  382. * @param string|array $search text to replace
  383. * @param string|array $replace replacement text
  384. * @param string|array $str subject text
  385. * @param integer $count number of matched and replaced needles will be returned via this parameter which is passed by reference
  386. * @return string if the input was a string
  387. * @return array if the input was an array
  388. */
  389. public static function str_ireplace($search, $replace, $str, & $count = NULL)
  390. {
  391. if ( ! isset(UTF8::$called[__FUNCTION__]))
  392. {
  393. require Kohana::find_file('utf8', __FUNCTION__);
  394. // Function has been called
  395. UTF8::$called[__FUNCTION__] = TRUE;
  396. }
  397. return _str_ireplace($search, $replace, $str, $count);
  398. }
  399. /**
  400. * Case-insensitive UTF-8 version of strstr. Returns all of input string
  401. * from the first occurrence of needle to the end. This is a UTF8-aware
  402. * version of [stristr](http://php.net/stristr).
  403. *
  404. * $found = UTF8::stristr($str, $search);
  405. *
  406. * @author Harry Fuecks <hfuecks@gmail.com>
  407. * @param string $str input string
  408. * @param string $search needle
  409. * @return string matched substring if found
  410. * @return FALSE if the substring was not found
  411. */
  412. public static function stristr($str, $search)
  413. {
  414. if ( ! isset(UTF8::$called[__FUNCTION__]))
  415. {
  416. require Kohana::find_file('utf8', __FUNCTION__);
  417. // Function has been called
  418. UTF8::$called[__FUNCTION__] = TRUE;
  419. }
  420. return _stristr($str, $search);
  421. }
  422. /**
  423. * Finds the length of the initial segment matching mask. This is a
  424. * UTF8-aware version of [strspn](http://php.net/strspn).
  425. *
  426. * $found = UTF8::strspn($str, $mask);
  427. *
  428. * @author Harry Fuecks <hfuecks@gmail.com>
  429. * @param string $str input string
  430. * @param string $mask mask for search
  431. * @param integer $offset start position of the string to examine
  432. * @param integer $length length of the string to examine
  433. * @return integer length of the initial segment that contains characters in the mask
  434. */
  435. public static function strspn($str, $mask, $offset = NULL, $length = NULL)
  436. {
  437. if ( ! isset(UTF8::$called[__FUNCTION__]))
  438. {
  439. require Kohana::find_file('utf8', __FUNCTION__);
  440. // Function has been called
  441. UTF8::$called[__FUNCTION__] = TRUE;
  442. }
  443. return _strspn($str, $mask, $offset, $length);
  444. }
  445. /**
  446. * Finds the length of the initial segment not matching mask. This is a
  447. * UTF8-aware version of [strcspn](http://php.net/strcspn).
  448. *
  449. * $found = UTF8::strcspn($str, $mask);
  450. *
  451. * @author Harry Fuecks <hfuecks@gmail.com>
  452. * @param string $str input string
  453. * @param string $mask mask for search
  454. * @param integer $offset start position of the string to examine
  455. * @param integer $length length of the string to examine
  456. * @return integer length of the initial segment that contains characters not in the mask
  457. */
  458. public static function strcspn($str, $mask, $offset = NULL, $length = NULL)
  459. {
  460. if ( ! isset(UTF8::$called[__FUNCTION__]))
  461. {
  462. require Kohana::find_file('utf8', __FUNCTION__);
  463. // Function has been called
  464. UTF8::$called[__FUNCTION__] = TRUE;
  465. }
  466. return _strcspn($str, $mask, $offset, $length);
  467. }
  468. /**
  469. * Pads a UTF-8 string to a certain length with another string. This is a
  470. * UTF8-aware version of [str_pad](http://php.net/str_pad).
  471. *
  472. * $str = UTF8::str_pad($str, $length);
  473. *
  474. * @author Harry Fuecks <hfuecks@gmail.com>
  475. * @param string $str input string
  476. * @param integer $final_str_length desired string length after padding
  477. * @param string $pad_str string to use as padding
  478. * @param string $pad_type padding type: STR_PAD_RIGHT, STR_PAD_LEFT, or STR_PAD_BOTH
  479. * @return string
  480. */
  481. public static function str_pad($str, $final_str_length, $pad_str = ' ', $pad_type = STR_PAD_RIGHT)
  482. {
  483. if ( ! isset(UTF8::$called[__FUNCTION__]))
  484. {
  485. require Kohana::find_file('utf8', __FUNCTION__);
  486. // Function has been called
  487. UTF8::$called[__FUNCTION__] = TRUE;
  488. }
  489. return _str_pad($str, $final_str_length, $pad_str, $pad_type);
  490. }
  491. /**
  492. * Converts a UTF-8 string to an array. This is a UTF8-aware version of
  493. * [str_split](http://php.net/str_split).
  494. *
  495. * $array = UTF8::str_split($str);
  496. *
  497. * @author Harry Fuecks <hfuecks@gmail.com>
  498. * @param string $str input string
  499. * @param integer $split_length maximum length of each chunk
  500. * @return array
  501. */
  502. public static function str_split($str, $split_length = 1)
  503. {
  504. if ( ! isset(UTF8::$called[__FUNCTION__]))
  505. {
  506. require Kohana::find_file('utf8', __FUNCTION__);
  507. // Function has been called
  508. UTF8::$called[__FUNCTION__] = TRUE;
  509. }
  510. return _str_split($str, $split_length);
  511. }
  512. /**
  513. * Reverses a UTF-8 string. This is a UTF8-aware version of [strrev](http://php.net/strrev).
  514. *
  515. * $str = UTF8::strrev($str);
  516. *
  517. * @author Harry Fuecks <hfuecks@gmail.com>
  518. * @param string $str string to be reversed
  519. * @return string
  520. */
  521. public static function strrev($str)
  522. {
  523. if ( ! isset(UTF8::$called[__FUNCTION__]))
  524. {
  525. require Kohana::find_file('utf8', __FUNCTION__);
  526. // Function has been called
  527. UTF8::$called[__FUNCTION__] = TRUE;
  528. }
  529. return _strrev($str);
  530. }
  531. /**
  532. * Strips whitespace (or other UTF-8 characters) from the beginning and
  533. * end of a string. This is a UTF8-aware version of [trim](http://php.net/trim).
  534. *
  535. * $str = UTF8::trim($str);
  536. *
  537. * @author Andreas Gohr <andi@splitbrain.org>
  538. * @param string $str input string
  539. * @param string $charlist string of characters to remove
  540. * @return string
  541. */
  542. public static function trim($str, $charlist = NULL)
  543. {
  544. if ( ! isset(UTF8::$called[__FUNCTION__]))
  545. {
  546. require Kohana::find_file('utf8', __FUNCTION__);
  547. // Function has been called
  548. UTF8::$called[__FUNCTION__] = TRUE;
  549. }
  550. return _trim($str, $charlist);
  551. }
  552. /**
  553. * Strips whitespace (or other UTF-8 characters) from the beginning of
  554. * a string. This is a UTF8-aware version of [ltrim](http://php.net/ltrim).
  555. *
  556. * $str = UTF8::ltrim($str);
  557. *
  558. * @author Andreas Gohr <andi@splitbrain.org>
  559. * @param string $str input string
  560. * @param string $charlist string of characters to remove
  561. * @return string
  562. */
  563. public static function ltrim($str, $charlist = NULL)
  564. {
  565. if ( ! isset(UTF8::$called[__FUNCTION__]))
  566. {
  567. require Kohana::find_file('utf8', __FUNCTION__);
  568. // Function has been called
  569. UTF8::$called[__FUNCTION__] = TRUE;
  570. }
  571. return _ltrim($str, $charlist);
  572. }
  573. /**
  574. * Strips whitespace (or other UTF-8 characters) from the end of a string.
  575. * This is a UTF8-aware version of [rtrim](http://php.net/rtrim).
  576. *
  577. * $str = UTF8::rtrim($str);
  578. *
  579. * @author Andreas Gohr <andi@splitbrain.org>
  580. * @param string $str input string
  581. * @param string $charlist string of characters to remove
  582. * @return string
  583. */
  584. public static function rtrim($str, $charlist = NULL)
  585. {
  586. if ( ! isset(UTF8::$called[__FUNCTION__]))
  587. {
  588. require Kohana::find_file('utf8', __FUNCTION__);
  589. // Function has been called
  590. UTF8::$called[__FUNCTION__] = TRUE;
  591. }
  592. return _rtrim($str, $charlist);
  593. }
  594. /**
  595. * Returns the unicode ordinal for a character. This is a UTF8-aware
  596. * version of [ord](http://php.net/ord).
  597. *
  598. * $digit = UTF8::ord($character);
  599. *
  600. * @author Harry Fuecks <hfuecks@gmail.com>
  601. * @param string $chr UTF-8 encoded character
  602. * @return integer
  603. */
  604. public static function ord($chr)
  605. {
  606. if ( ! isset(UTF8::$called[__FUNCTION__]))
  607. {
  608. require Kohana::find_file('utf8', __FUNCTION__);
  609. // Function has been called
  610. UTF8::$called[__FUNCTION__] = TRUE;
  611. }
  612. return _ord($chr);
  613. }
  614. /**
  615. * Takes an UTF-8 string and returns an array of ints representing the Unicode characters.
  616. * Astral planes are supported i.e. the ints in the output can be > 0xFFFF.
  617. * Occurrences of the BOM are ignored. Surrogates are not allowed.
  618. *
  619. * $array = UTF8::to_unicode($str);
  620. *
  621. * The Original Code is Mozilla Communicator client code.
  622. * The Initial Developer of the Original Code is Netscape Communications Corporation.
  623. * Portions created by the Initial Developer are Copyright (C) 1998 the Initial Developer.
  624. * Ported to PHP by Henri Sivonen <hsivonen@iki.fi>, see <http://hsivonen.iki.fi/php-utf8/>
  625. * Slight modifications to fit with phputf8 library by Harry Fuecks <hfuecks@gmail.com>
  626. *
  627. * @param string $str UTF-8 encoded string
  628. * @return array unicode code points
  629. * @return FALSE if the string is invalid
  630. */
  631. public static function to_unicode($str)
  632. {
  633. if ( ! isset(UTF8::$called[__FUNCTION__]))
  634. {
  635. require Kohana::find_file('utf8', __FUNCTION__);
  636. // Function has been called
  637. UTF8::$called[__FUNCTION__] = TRUE;
  638. }
  639. return _to_unicode($str);
  640. }
  641. /**
  642. * Takes an array of ints representing the Unicode characters and returns a UTF-8 string.
  643. * Astral planes are supported i.e. the ints in the input can be > 0xFFFF.
  644. * Occurrences of the BOM are ignored. Surrogates are not allowed.
  645. *
  646. * $str = UTF8::to_unicode($array);
  647. *
  648. * The Original Code is Mozilla Communicator client code.
  649. * The Initial Developer of the Original Code is Netscape Communications Corporation.
  650. * Portions created by the Initial Developer are Copyright (C) 1998 the Initial Developer.
  651. * Ported to PHP by Henri Sivonen <hsivonen@iki.fi>, see http://hsivonen.iki.fi/php-utf8/
  652. * Slight modifications to fit with phputf8 library by Harry Fuecks <hfuecks@gmail.com>.
  653. *
  654. * @param array $str unicode code points representing a string
  655. * @return string utf8 string of characters
  656. * @return boolean FALSE if a code point cannot be found
  657. */
  658. public static function from_unicode($arr)
  659. {
  660. if ( ! isset(UTF8::$called[__FUNCTION__]))
  661. {
  662. require Kohana::find_file('utf8', __FUNCTION__);
  663. // Function has been called
  664. UTF8::$called[__FUNCTION__] = TRUE;
  665. }
  666. return _from_unicode($arr);
  667. }
  668. }
  669. if (Kohana_UTF8::$server_utf8 === NULL)
  670. {
  671. // Determine if this server supports UTF-8 natively
  672. Kohana_UTF8::$server_utf8 = extension_loaded('mbstring');
  673. }