PageRenderTime 38ms CodeModel.GetById 11ms RepoModel.GetById 0ms app.codeStats 1ms

/system/classes/kohana/utf8.php

https://bitbucket.org/seyar/parshin.local
PHP | 767 lines | 280 code | 92 blank | 395 comment | 35 complexity | 1966a1dbfe25d024e2aef09b1ec2ba82 MD5 | raw file
Possible License(s): BSD-3-Clause, LGPL-2.1
  1. <?php defined('SYSPATH') or die('No direct script access.');
  2. /**
  3. * A port of [phputf8](http://phputf8.sourceforge.net/) to a unified set
  4. * of files. Provides multi-byte aware replacement string functions.
  5. *
  6. * For UTF-8 support to work correctly, the following requirements must be met:
  7. *
  8. * - PCRE needs to be compiled with UTF-8 support (--enable-utf8)
  9. * - Support for [Unicode properties](http://php.net/manual/reference.pcre.pattern.modifiers.php)
  10. * is highly recommended (--enable-unicode-properties)
  11. * - UTF-8 conversion will be much more reliable if the
  12. * [iconv extension](http://php.net/iconv) is loaded
  13. * - The [mbstring extension](http://php.net/mbstring) is highly recommended,
  14. * but must not be overloading string functions
  15. *
  16. * [!!] This file is licensed differently from the rest of Kohana. As a port of
  17. * [phputf8](http://phputf8.sourceforge.net/), this file is released under the LGPL.
  18. *
  19. * @package Kohana
  20. * @category Base
  21. * @author Kohana Team
  22. * @copyright (c) 2007-2010 Kohana Team
  23. * @copyright (c) 2005 Harry Fuecks
  24. * @license http://www.gnu.org/licenses/old-licenses/lgpl-2.1.txt
  25. */
  26. class Kohana_UTF8 {
  27. /**
  28. * @var boolean Does the server support UTF-8 natively?
  29. */
  30. public static $server_utf8 = NULL;
  31. /**
  32. * @var array List of called methods that have had their required file included.
  33. */
  34. public static $called = array();
  35. /**
  36. * Recursively cleans arrays, objects, and strings. Removes ASCII control
  37. * codes and converts to the requested charset while silently discarding
  38. * incompatible characters.
  39. *
  40. * UTF8::clean($_GET); // Clean GET data
  41. *
  42. * [!!] This method requires [Iconv](http://php.net/iconv)
  43. *
  44. * @param mixed variable to clean
  45. * @param string character set, defaults to Kohana::$charset
  46. * @return mixed
  47. * @uses UTF8::strip_ascii_ctrl
  48. * @uses UTF8::is_ascii
  49. */
  50. public static function clean($var, $charset = NULL)
  51. {
  52. if ( ! $charset)
  53. {
  54. // Use the application character set
  55. $charset = Kohana::$charset;
  56. }
  57. if (is_array($var) OR is_object($var))
  58. {
  59. foreach ($var as $key => $val)
  60. {
  61. // Recursion!
  62. $var[self::clean($key)] = self::clean($val);
  63. }
  64. }
  65. elseif (is_string($var) AND $var !== '')
  66. {
  67. // Remove control characters
  68. $var = self::strip_ascii_ctrl($var);
  69. if ( ! self::is_ascii($var))
  70. {
  71. // Disable notices
  72. $error_reporting = error_reporting(~E_NOTICE);
  73. // iconv is expensive, so it is only used when needed
  74. $var = iconv($charset, $charset.'//IGNORE', $var);
  75. // Turn notices back on
  76. error_reporting($error_reporting);
  77. }
  78. }
  79. return $var;
  80. }
  81. /**
  82. * Tests whether a string contains only 7-bit ASCII bytes. This is used to
  83. * determine when to use native functions or UTF-8 functions.
  84. *
  85. * $ascii = UTF8::is_ascii($str);
  86. *
  87. * @param mixed string or array of strings to check
  88. * @return boolean
  89. */
  90. public static function is_ascii($str)
  91. {
  92. if (is_array($str))
  93. {
  94. $str = implode($str);
  95. }
  96. return ! preg_match('/[^\x00-\x7F]/S', $str);
  97. }
  98. /**
  99. * Strips out device control codes in the ASCII range.
  100. *
  101. * $str = UTF8::strip_ascii_ctrl($str);
  102. *
  103. * @param string string to clean
  104. * @return string
  105. */
  106. public static function strip_ascii_ctrl($str)
  107. {
  108. return preg_replace('/[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]+/S', '', $str);
  109. }
  110. /**
  111. * Strips out all non-7bit ASCII bytes.
  112. *
  113. * $str = UTF8::strip_non_ascii($str);
  114. *
  115. * @param string string to clean
  116. * @return string
  117. */
  118. public static function strip_non_ascii($str)
  119. {
  120. return preg_replace('/[^\x00-\x7F]+/S', '', $str);
  121. }
  122. /**
  123. * Replaces special/accented UTF-8 characters by ASCII-7 "equivalents".
  124. *
  125. * $ascii = UTF8::transliterate_to_ascii($utf8);
  126. *
  127. * @author Andreas Gohr <andi@splitbrain.org>
  128. * @param string string to transliterate
  129. * @param integer -1 lowercase only, +1 uppercase only, 0 both cases
  130. * @return string
  131. */
  132. public static function transliterate_to_ascii($str, $case = 0)
  133. {
  134. if ( ! isset(self::$called[__FUNCTION__]))
  135. {
  136. require SYSPATH.'utf8'.DIRECTORY_SEPARATOR.__FUNCTION__.EXT;
  137. // Function has been called
  138. self::$called[__FUNCTION__] = TRUE;
  139. }
  140. return _transliterate_to_ascii($str, $case);
  141. }
  142. /**
  143. * Returns the length of the given string. This is a UTF8-aware version
  144. * of [strlen](http://php.net/strlen).
  145. *
  146. * $length = UTF8::strlen($str);
  147. *
  148. * @param string string being measured for length
  149. * @return integer
  150. * @uses UTF8::$server_utf8
  151. */
  152. public static function strlen($str)
  153. {
  154. if (UTF8::$server_utf8)
  155. return mb_strlen($str, Kohana::$charset);
  156. if ( ! isset(self::$called[__FUNCTION__]))
  157. {
  158. require SYSPATH.'utf8'.DIRECTORY_SEPARATOR.__FUNCTION__.EXT;
  159. // Function has been called
  160. self::$called[__FUNCTION__] = TRUE;
  161. }
  162. return _strlen($str);
  163. }
  164. /**
  165. * Finds position of first occurrence of a UTF-8 string. This is a
  166. * UTF8-aware version of [strpos](http://php.net/strpos).
  167. *
  168. * $position = UTF8::strpos($str, $search);
  169. *
  170. * @author Harry Fuecks <hfuecks@gmail.com>
  171. * @param string haystack
  172. * @param string needle
  173. * @param integer offset from which character in haystack to start searching
  174. * @return integer position of needle
  175. * @return boolean FALSE if the needle is not found
  176. * @uses UTF8::$server_utf8
  177. */
  178. public static function strpos($str, $search, $offset = 0)
  179. {
  180. if (UTF8::$server_utf8)
  181. return mb_strpos($str, $search, $offset, Kohana::$charset);
  182. if ( ! isset(self::$called[__FUNCTION__]))
  183. {
  184. require SYSPATH.'utf8'.DIRECTORY_SEPARATOR.__FUNCTION__.EXT;
  185. // Function has been called
  186. self::$called[__FUNCTION__] = TRUE;
  187. }
  188. return _strpos($str, $search, $offset);
  189. }
  190. /**
  191. * Finds position of last occurrence of a char in a UTF-8 string. This is
  192. * a UTF8-aware version of [strrpos](http://php.net/strrpos).
  193. *
  194. * $position = UTF8::strrpos($str, $search);
  195. *
  196. * @author Harry Fuecks <hfuecks@gmail.com>
  197. * @param string haystack
  198. * @param string needle
  199. * @param integer offset from which character in haystack to start searching
  200. * @return integer position of needle
  201. * @return boolean FALSE if the needle is not found
  202. * @uses UTF8::$server_utf8
  203. */
  204. public static function strrpos($str, $search, $offset = 0)
  205. {
  206. if (UTF8::$server_utf8)
  207. return mb_strrpos($str, $search, $offset, Kohana::$charset);
  208. if ( ! isset(self::$called[__FUNCTION__]))
  209. {
  210. require SYSPATH.'utf8'.DIRECTORY_SEPARATOR.__FUNCTION__.EXT;
  211. // Function has been called
  212. self::$called[__FUNCTION__] = TRUE;
  213. }
  214. return _strrpos($str, $search, $offset);
  215. }
  216. /**
  217. * Returns part of a UTF-8 string. This is a UTF8-aware version
  218. * of [substr](http://php.net/substr).
  219. *
  220. * $sub = UTF8::substr($str, $offset);
  221. *
  222. * @author Chris Smith <chris@jalakai.co.uk>
  223. * @param string input string
  224. * @param integer offset
  225. * @param integer length limit
  226. * @return string
  227. * @uses UTF8::$server_utf8
  228. * @uses Kohana::$charset
  229. */
  230. public static function substr($str, $offset, $length = NULL)
  231. {
  232. if (UTF8::$server_utf8)
  233. return ($length === NULL)
  234. ? mb_substr($str, $offset, mb_strlen($str), Kohana::$charset)
  235. : mb_substr($str, $offset, $length, Kohana::$charset);
  236. if ( ! isset(self::$called[__FUNCTION__]))
  237. {
  238. require SYSPATH.'utf8'.DIRECTORY_SEPARATOR.__FUNCTION__.EXT;
  239. // Function has been called
  240. self::$called[__FUNCTION__] = TRUE;
  241. }
  242. return _substr($str, $offset, $length);
  243. }
  244. /**
  245. * Replaces text within a portion of a UTF-8 string. This is a UTF8-aware
  246. * version of [substr_replace](http://php.net/substr_replace).
  247. *
  248. * $str = UTF8::substr_replace($str, $replacement, $offset);
  249. *
  250. * @author Harry Fuecks <hfuecks@gmail.com>
  251. * @param string input string
  252. * @param string replacement string
  253. * @param integer offset
  254. * @return string
  255. */
  256. public static function substr_replace($str, $replacement, $offset, $length = NULL)
  257. {
  258. if ( ! isset(self::$called[__FUNCTION__]))
  259. {
  260. require SYSPATH.'utf8'.DIRECTORY_SEPARATOR.__FUNCTION__.EXT;
  261. // Function has been called
  262. self::$called[__FUNCTION__] = TRUE;
  263. }
  264. return _substr_replace($str, $replacement, $offset, $length);
  265. }
  266. /**
  267. * Makes a UTF-8 string lowercase. This is a UTF8-aware version
  268. * of [strtolower](http://php.net/strtolower).
  269. *
  270. * $str = UTF8::strtolower($str);
  271. *
  272. * @author Andreas Gohr <andi@splitbrain.org>
  273. * @param string mixed case string
  274. * @return string
  275. * @uses UTF8::$server_utf8
  276. */
  277. public static function strtolower($str)
  278. {
  279. if (UTF8::$server_utf8)
  280. return mb_strtolower($str, Kohana::$charset);
  281. if ( ! isset(self::$called[__FUNCTION__]))
  282. {
  283. require SYSPATH.'utf8'.DIRECTORY_SEPARATOR.__FUNCTION__.EXT;
  284. // Function has been called
  285. self::$called[__FUNCTION__] = TRUE;
  286. }
  287. return _strtolower($str);
  288. }
  289. /**
  290. * Makes a UTF-8 string uppercase. This is a UTF8-aware version
  291. * of [strtoupper](http://php.net/strtoupper).
  292. *
  293. * @author Andreas Gohr <andi@splitbrain.org>
  294. * @param string mixed case string
  295. * @return string
  296. * @uses UTF8::$server_utf8
  297. * @uses Kohana::$charset
  298. */
  299. public static function strtoupper($str)
  300. {
  301. if (UTF8::$server_utf8)
  302. return mb_strtoupper($str, Kohana::$charset);
  303. if ( ! isset(self::$called[__FUNCTION__]))
  304. {
  305. require SYSPATH.'utf8'.DIRECTORY_SEPARATOR.__FUNCTION__.EXT;
  306. // Function has been called
  307. self::$called[__FUNCTION__] = TRUE;
  308. }
  309. return _strtoupper($str);
  310. }
  311. /**
  312. * Makes a UTF-8 string's first character uppercase. This is a UTF8-aware
  313. * version of [ucfirst](http://php.net/ucfirst).
  314. *
  315. * $str = UTF8::ucfirst($str);
  316. *
  317. * @author Harry Fuecks <hfuecks@gmail.com>
  318. * @param string mixed case string
  319. * @return string
  320. */
  321. public static function ucfirst($str)
  322. {
  323. if ( ! isset(self::$called[__FUNCTION__]))
  324. {
  325. require SYSPATH.'utf8'.DIRECTORY_SEPARATOR.__FUNCTION__.EXT;
  326. // Function has been called
  327. self::$called[__FUNCTION__] = TRUE;
  328. }
  329. return _ucfirst($str);
  330. }
  331. /**
  332. * Makes the first character of every word in a UTF-8 string uppercase.
  333. * This is a UTF8-aware version of [ucwords](http://php.net/ucwords).
  334. *
  335. * $str = UTF8::ucwords($str);
  336. *
  337. * @author Harry Fuecks <hfuecks@gmail.com>
  338. * @param string mixed case string
  339. * @return string
  340. * @uses UTF8::$server_utf8
  341. */
  342. public static function ucwords($str)
  343. {
  344. if ( ! isset(self::$called[__FUNCTION__]))
  345. {
  346. require SYSPATH.'utf8'.DIRECTORY_SEPARATOR.__FUNCTION__.EXT;
  347. // Function has been called
  348. self::$called[__FUNCTION__] = TRUE;
  349. }
  350. return _ucwords($str);
  351. }
  352. /**
  353. * Case-insensitive UTF-8 string comparison. This is a UTF8-aware version
  354. * of [strcasecmp](http://php.net/strcasecmp).
  355. *
  356. * $compare = UTF8::strcasecmp($str1, $str2);
  357. *
  358. * @author Harry Fuecks <hfuecks@gmail.com>
  359. * @param string string to compare
  360. * @param string string to compare
  361. * @return integer less than 0 if str1 is less than str2
  362. * @return integer greater than 0 if str1 is greater than str2
  363. * @return integer 0 if they are equal
  364. */
  365. public static function strcasecmp($str1, $str2)
  366. {
  367. if ( ! isset(self::$called[__FUNCTION__]))
  368. {
  369. require SYSPATH.'utf8'.DIRECTORY_SEPARATOR.__FUNCTION__.EXT;
  370. // Function has been called
  371. self::$called[__FUNCTION__] = TRUE;
  372. }
  373. return _strcasecmp($str1, $str2);
  374. }
  375. /**
  376. * Returns a string or an array with all occurrences of search in subject
  377. * (ignoring case) and replaced with the given replace value. This is a
  378. * UTF8-aware version of [str_ireplace](http://php.net/str_ireplace).
  379. *
  380. * [!!] This function is very slow compared to the native version. Avoid
  381. * using it when possible.
  382. *
  383. * @author Harry Fuecks <hfuecks@gmail.com
  384. * @param string|array text to replace
  385. * @param string|array replacement text
  386. * @param string|array subject text
  387. * @param integer number of matched and replaced needles will be returned via this parameter which is passed by reference
  388. * @return string if the input was a string
  389. * @return array if the input was an array
  390. */
  391. public static function str_ireplace($search, $replace, $str, & $count = NULL)
  392. {
  393. if ( ! isset(self::$called[__FUNCTION__]))
  394. {
  395. require SYSPATH.'utf8'.DIRECTORY_SEPARATOR.__FUNCTION__.EXT;
  396. // Function has been called
  397. self::$called[__FUNCTION__] = TRUE;
  398. }
  399. return _str_ireplace($search, $replace, $str, $count);
  400. }
  401. /**
  402. * Case-insenstive UTF-8 version of strstr. Returns all of input string
  403. * from the first occurrence of needle to the end. This is a UTF8-aware
  404. * version of [stristr](http://php.net/stristr).
  405. *
  406. * $found = UTF8::stristr($str, $search);
  407. *
  408. * @author Harry Fuecks <hfuecks@gmail.com>
  409. * @param string input string
  410. * @param string needle
  411. * @return string matched substring if found
  412. * @return FALSE if the substring was not found
  413. */
  414. public static function stristr($str, $search)
  415. {
  416. if ( ! isset(self::$called[__FUNCTION__]))
  417. {
  418. require SYSPATH.'utf8'.DIRECTORY_SEPARATOR.__FUNCTION__.EXT;
  419. // Function has been called
  420. self::$called[__FUNCTION__] = TRUE;
  421. }
  422. return _stristr($str, $search);
  423. }
  424. /**
  425. * Finds the length of the initial segment matching mask. This is a
  426. * UTF8-aware version of [strspn](http://php.net/strspn).
  427. *
  428. * $found = UTF8::strspn($str, $mask);
  429. *
  430. * @author Harry Fuecks <hfuecks@gmail.com>
  431. * @param string input string
  432. * @param string mask for search
  433. * @param integer start position of the string to examine
  434. * @param integer length of the string to examine
  435. * @return integer length of the initial segment that contains characters in the mask
  436. */
  437. public static function strspn($str, $mask, $offset = NULL, $length = NULL)
  438. {
  439. if ( ! isset(self::$called[__FUNCTION__]))
  440. {
  441. require SYSPATH.'utf8'.DIRECTORY_SEPARATOR.__FUNCTION__.EXT;
  442. // Function has been called
  443. self::$called[__FUNCTION__] = TRUE;
  444. }
  445. return _strspn($str, $mask, $offset, $length);
  446. }
  447. /**
  448. * Finds the length of the initial segment not matching mask. This is a
  449. * UTF8-aware version of [strcspn](http://php.net/strcspn).
  450. *
  451. * $found = UTF8::strcspn($str, $mask);
  452. *
  453. * @author Harry Fuecks <hfuecks@gmail.com>
  454. * @param string input string
  455. * @param string mask for search
  456. * @param integer start position of the string to examine
  457. * @param integer length of the string to examine
  458. * @return integer length of the initial segment that contains characters not in the mask
  459. */
  460. public static function strcspn($str, $mask, $offset = NULL, $length = NULL)
  461. {
  462. if ( ! isset(self::$called[__FUNCTION__]))
  463. {
  464. require SYSPATH.'utf8'.DIRECTORY_SEPARATOR.__FUNCTION__.EXT;
  465. // Function has been called
  466. self::$called[__FUNCTION__] = TRUE;
  467. }
  468. return _strcspn($str, $mask, $offset, $length);
  469. }
  470. /**
  471. * Pads a UTF-8 string to a certain length with another string. This is a
  472. * UTF8-aware version of [str_pad](http://php.net/str_pad).
  473. *
  474. * $str = UTF8::str_pad($str, $length);
  475. *
  476. * @author Harry Fuecks <hfuecks@gmail.com>
  477. * @param string input string
  478. * @param integer desired string length after padding
  479. * @param string string to use as padding
  480. * @param string padding type: STR_PAD_RIGHT, STR_PAD_LEFT, or STR_PAD_BOTH
  481. * @return string
  482. */
  483. public static function str_pad($str, $final_str_length, $pad_str = ' ', $pad_type = STR_PAD_RIGHT)
  484. {
  485. if ( ! isset(self::$called[__FUNCTION__]))
  486. {
  487. require SYSPATH.'utf8'.DIRECTORY_SEPARATOR.__FUNCTION__.EXT;
  488. // Function has been called
  489. self::$called[__FUNCTION__] = TRUE;
  490. }
  491. return _str_pad($str, $final_str_length, $pad_str, $pad_type);
  492. }
  493. /**
  494. * Converts a UTF-8 string to an array. This is a UTF8-aware version of
  495. * [str_split](http://php.net/str_split).
  496. *
  497. * $array = UTF8::str_split($str);
  498. *
  499. * @author Harry Fuecks <hfuecks@gmail.com>
  500. * @param string input string
  501. * @param integer maximum length of each chunk
  502. * @return array
  503. */
  504. public static function str_split($str, $split_length = 1)
  505. {
  506. if ( ! isset(self::$called[__FUNCTION__]))
  507. {
  508. require SYSPATH.'utf8'.DIRECTORY_SEPARATOR.__FUNCTION__.EXT;
  509. // Function has been called
  510. self::$called[__FUNCTION__] = TRUE;
  511. }
  512. return _str_split($str, $split_length);
  513. }
  514. /**
  515. * Reverses a UTF-8 string. This is a UTF8-aware version of [strrev](http://php.net/strrev).
  516. *
  517. * $str = UTF8::strrev($str);
  518. *
  519. * @author Harry Fuecks <hfuecks@gmail.com>
  520. * @param string string to be reversed
  521. * @return string
  522. */
  523. public static function strrev($str)
  524. {
  525. if ( ! isset(self::$called[__FUNCTION__]))
  526. {
  527. require SYSPATH.'utf8'.DIRECTORY_SEPARATOR.__FUNCTION__.EXT;
  528. // Function has been called
  529. self::$called[__FUNCTION__] = TRUE;
  530. }
  531. return _strrev($str);
  532. }
  533. /**
  534. * Strips whitespace (or other UTF-8 characters) from the beginning and
  535. * end of a string. This is a UTF8-aware version of [trim](http://php.net/trim).
  536. *
  537. * $str = UTF8::trim($str);
  538. *
  539. * @author Andreas Gohr <andi@splitbrain.org>
  540. * @param string input string
  541. * @param string string of characters to remove
  542. * @return string
  543. */
  544. public static function trim($str, $charlist = NULL)
  545. {
  546. if ( ! isset(self::$called[__FUNCTION__]))
  547. {
  548. require SYSPATH.'utf8'.DIRECTORY_SEPARATOR.__FUNCTION__.EXT;
  549. // Function has been called
  550. self::$called[__FUNCTION__] = TRUE;
  551. }
  552. return _trim($str, $charlist);
  553. }
  554. /**
  555. * Strips whitespace (or other UTF-8 characters) from the beginning of
  556. * a string. This is a UTF8-aware version of [ltrim](http://php.net/ltrim).
  557. *
  558. * $str = UTF8::ltrim($str);
  559. *
  560. * @author Andreas Gohr <andi@splitbrain.org>
  561. * @param string input string
  562. * @param string string of characters to remove
  563. * @return string
  564. */
  565. public static function ltrim($str, $charlist = NULL)
  566. {
  567. if ( ! isset(self::$called[__FUNCTION__]))
  568. {
  569. require SYSPATH.'utf8'.DIRECTORY_SEPARATOR.__FUNCTION__.EXT;
  570. // Function has been called
  571. self::$called[__FUNCTION__] = TRUE;
  572. }
  573. return _ltrim($str, $charlist);
  574. }
  575. /**
  576. * Strips whitespace (or other UTF-8 characters) from the end of a string.
  577. * This is a UTF8-aware version of [rtrim](http://php.net/rtrim).
  578. *
  579. * $str = UTF8::rtrim($str);
  580. *
  581. * @author Andreas Gohr <andi@splitbrain.org>
  582. * @param string input string
  583. * @param string string of characters to remove
  584. * @return string
  585. */
  586. public static function rtrim($str, $charlist = NULL)
  587. {
  588. if ( ! isset(self::$called[__FUNCTION__]))
  589. {
  590. require SYSPATH.'utf8'.DIRECTORY_SEPARATOR.__FUNCTION__.EXT;
  591. // Function has been called
  592. self::$called[__FUNCTION__] = TRUE;
  593. }
  594. return _rtrim($str, $charlist);
  595. }
  596. /**
  597. * Returns the unicode ordinal for a character. This is a UTF8-aware
  598. * version of [ord](http://php.net/ord).
  599. *
  600. * $digit = UTF8::ord($character);
  601. *
  602. * @author Harry Fuecks <hfuecks@gmail.com>
  603. * @param string UTF-8 encoded character
  604. * @return integer
  605. */
  606. public static function ord($chr)
  607. {
  608. if ( ! isset(self::$called[__FUNCTION__]))
  609. {
  610. require SYSPATH.'utf8'.DIRECTORY_SEPARATOR.__FUNCTION__.EXT;
  611. // Function has been called
  612. self::$called[__FUNCTION__] = TRUE;
  613. }
  614. return _ord($chr);
  615. }
  616. /**
  617. * Takes an UTF-8 string and returns an array of ints representing the Unicode characters.
  618. * Astral planes are supported i.e. the ints in the output can be > 0xFFFF.
  619. * Occurrences of the BOM are ignored. Surrogates are not allowed.
  620. *
  621. * $array = UTF8::to_unicode($str);
  622. *
  623. * The Original Code is Mozilla Communicator client code.
  624. * The Initial Developer of the Original Code is Netscape Communications Corporation.
  625. * Portions created by the Initial Developer are Copyright (C) 1998 the Initial Developer.
  626. * Ported to PHP by Henri Sivonen <hsivonen@iki.fi>, see <http://hsivonen.iki.fi/php-utf8/>
  627. * Slight modifications to fit with phputf8 library by Harry Fuecks <hfuecks@gmail.com>
  628. *
  629. * @param string UTF-8 encoded string
  630. * @return array unicode code points
  631. * @return FALSE if the string is invalid
  632. */
  633. public static function to_unicode($str)
  634. {
  635. if ( ! isset(self::$called[__FUNCTION__]))
  636. {
  637. require SYSPATH.'utf8'.DIRECTORY_SEPARATOR.__FUNCTION__.EXT;
  638. // Function has been called
  639. self::$called[__FUNCTION__] = TRUE;
  640. }
  641. return _to_unicode($str);
  642. }
  643. /**
  644. * Takes an array of ints representing the Unicode characters and returns a UTF-8 string.
  645. * Astral planes are supported i.e. the ints in the input can be > 0xFFFF.
  646. * Occurrances of the BOM are ignored. Surrogates are not allowed.
  647. *
  648. * $str = UTF8::to_unicode($array);
  649. *
  650. * The Original Code is Mozilla Communicator client code.
  651. * The Initial Developer of the Original Code is Netscape Communications Corporation.
  652. * Portions created by the Initial Developer are Copyright (C) 1998 the Initial Developer.
  653. * Ported to PHP by Henri Sivonen <hsivonen@iki.fi>, see http://hsivonen.iki.fi/php-utf8/
  654. * Slight modifications to fit with phputf8 library by Harry Fuecks <hfuecks@gmail.com>.
  655. *
  656. * @param array unicode code points representing a string
  657. * @return string utf8 string of characters
  658. * @return boolean FALSE if a code point cannot be found
  659. */
  660. public static function from_unicode($arr)
  661. {
  662. if ( ! isset(self::$called[__FUNCTION__]))
  663. {
  664. require SYSPATH.'utf8'.DIRECTORY_SEPARATOR.__FUNCTION__.EXT;
  665. // Function has been called
  666. self::$called[__FUNCTION__] = TRUE;
  667. }
  668. return _from_unicode($arr);
  669. }
  670. } // End UTF8
  671. if (Kohana_UTF8::$server_utf8 === NULL)
  672. {
  673. // Determine if this server supports UTF-8 natively
  674. Kohana_UTF8::$server_utf8 = extension_loaded('mbstring');
  675. }