PageRenderTime 58ms CodeModel.GetById 26ms RepoModel.GetById 1ms app.codeStats 0ms

/framework/web/helpers/CJSON.php

http://github.com/yiisoft/yii
PHP | 717 lines | 435 code | 87 blank | 195 comment | 93 complexity | 47bbeac9067b5cedfbb01f68ffce5a48 MD5 | raw file
Possible License(s): BSD-3-Clause
  1. <?php
  2. /**
  3. * JSON (JavaScript Object Notation) is a lightweight data-interchange
  4. * format. It is easy for humans to read and write. It is easy for machines
  5. * to parse and generate. It is based on a subset of the JavaScript
  6. * Programming Language, Standard ECMA-262 3rd Edition - December 1999.
  7. * This feature can also be found in Python. JSON is a text format that is
  8. * completely language independent but uses conventions that are familiar
  9. * to programmers of the C-family of languages, including C, C++, C#, Java,
  10. * JavaScript, Perl, TCL, and many others. These properties make JSON an
  11. * ideal data-interchange language.
  12. *
  13. * This package provides a simple encoder and decoder for JSON notation. It
  14. * is intended for use with client-side Javascript applications that make
  15. * use of HTTPRequest to perform server communication functions - data can
  16. * be encoded into JSON notation for use in a client-side javascript, or
  17. * decoded from incoming Javascript requests. JSON format is native to
  18. * Javascript, and can be directly eval()'ed with no further parsing
  19. * overhead
  20. *
  21. * All strings should be in ASCII or UTF-8 format!
  22. *
  23. * LICENSE: Redistribution and use in source and binary forms, with or
  24. * without modification, are permitted provided that the following
  25. * conditions are met: Redistributions of source code must retain the
  26. * above copyright notice, this list of conditions and the following
  27. * disclaimer. Redistributions in binary form must reproduce the above
  28. * copyright notice, this list of conditions and the following disclaimer
  29. * in the documentation and/or other materials provided with the
  30. * distribution.
  31. *
  32. * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
  33. * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
  34. * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN
  35. * NO EVENT SHALL CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
  36. * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
  37. * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
  38. * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
  39. * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR
  40. * TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
  41. * USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
  42. * DAMAGE.
  43. *
  44. * @author Michal Migurski <mike-json@teczno.com>
  45. * @author Matt Knapp <mdknapp[at]gmail[dot]com>
  46. * @author Brett Stimmerman <brettstimmerman[at]gmail[dot]com>
  47. * @copyright 2005 Michal Migurski
  48. * @license http://www.opensource.org/licenses/bsd-license.php
  49. * @link http://pear.php.net/pepr/pepr-proposal-show.php?id=198
  50. */
  51. /**
  52. * CJSON converts PHP data to and from JSON format.
  53. *
  54. * @author Michal Migurski <mike-json@teczno.com>
  55. * @author Matt Knapp <mdknapp[at]gmail[dot]com>
  56. * @author Brett Stimmerman <brettstimmerman[at]gmail[dot]com>
  57. * @package system.web.helpers
  58. * @since 1.0
  59. */
  60. class CJSON
  61. {
  62. /**
  63. * Marker constant for JSON::decode(), used to flag stack state
  64. */
  65. const JSON_SLICE = 1;
  66. /**
  67. * Marker constant for JSON::decode(), used to flag stack state
  68. */
  69. const JSON_IN_STR = 2;
  70. /**
  71. * Marker constant for JSON::decode(), used to flag stack state
  72. */
  73. const JSON_IN_ARR = 4;
  74. /**
  75. * Marker constant for JSON::decode(), used to flag stack state
  76. */
  77. const JSON_IN_OBJ = 8;
  78. /**
  79. * Marker constant for JSON::decode(), used to flag stack state
  80. */
  81. const JSON_IN_CMT = 16;
  82. /**
  83. * Encodes an arbitrary variable into JSON format
  84. *
  85. * @param mixed $var any number, boolean, string, array, or object to be encoded.
  86. * If var is a string, it will be converted to UTF-8 format first before being encoded.
  87. * @return string JSON string representation of input var
  88. */
  89. public static function encode($var)
  90. {
  91. switch (gettype($var)) {
  92. case 'boolean':
  93. return $var ? 'true' : 'false';
  94. case 'NULL':
  95. return 'null';
  96. case 'integer':
  97. return (int) $var;
  98. case 'double':
  99. case 'float':
  100. return str_replace(',','.',(float)$var); // locale-independent representation
  101. case 'string':
  102. if (($enc=strtoupper(Yii::app()->charset))!=='UTF-8')
  103. $var=iconv($enc, 'UTF-8', $var);
  104. if(function_exists('json_encode'))
  105. return json_encode($var);
  106. // STRINGS ARE EXPECTED TO BE IN ASCII OR UTF-8 FORMAT
  107. $ascii = '';
  108. $strlen_var = strlen($var);
  109. /*
  110. * Iterate over every character in the string,
  111. * escaping with a slash or encoding to UTF-8 where necessary
  112. */
  113. for ($c = 0; $c < $strlen_var; ++$c) {
  114. $ord_var_c = ord($var[$c]);
  115. switch (true) {
  116. case $ord_var_c == 0x08:
  117. $ascii .= '\b';
  118. break;
  119. case $ord_var_c == 0x09:
  120. $ascii .= '\t';
  121. break;
  122. case $ord_var_c == 0x0A:
  123. $ascii .= '\n';
  124. break;
  125. case $ord_var_c == 0x0C:
  126. $ascii .= '\f';
  127. break;
  128. case $ord_var_c == 0x0D:
  129. $ascii .= '\r';
  130. break;
  131. case $ord_var_c == 0x22:
  132. case $ord_var_c == 0x2F:
  133. case $ord_var_c == 0x5C:
  134. // double quote, slash, slosh
  135. $ascii .= '\\'.$var[$c];
  136. break;
  137. case (($ord_var_c >= 0x20) && ($ord_var_c <= 0x7F)):
  138. // characters U-00000000 - U-0000007F (same as ASCII)
  139. $ascii .= $var[$c];
  140. break;
  141. case (($ord_var_c & 0xE0) == 0xC0):
  142. // characters U-00000080 - U-000007FF, mask 110XXXXX
  143. // see http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8
  144. $char = pack('C*', $ord_var_c, ord($var[$c+1]));
  145. $c+=1;
  146. $utf16 = self::utf8ToUTF16BE($char);
  147. $ascii .= sprintf('\u%04s', bin2hex($utf16));
  148. break;
  149. case (($ord_var_c & 0xF0) == 0xE0):
  150. // characters U-00000800 - U-0000FFFF, mask 1110XXXX
  151. // see http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8
  152. $char = pack('C*', $ord_var_c,
  153. ord($var[$c+1]),
  154. ord($var[$c+2]));
  155. $c+=2;
  156. $utf16 = self::utf8ToUTF16BE($char);
  157. $ascii .= sprintf('\u%04s', bin2hex($utf16));
  158. break;
  159. case (($ord_var_c & 0xF8) == 0xF0):
  160. // characters U-00010000 - U-001FFFFF, mask 11110XXX
  161. // see http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8
  162. $char = pack('C*', $ord_var_c,
  163. ord($var[$c+1]),
  164. ord($var[$c+2]),
  165. ord($var[$c+3]));
  166. $c+=3;
  167. $utf16 = self::utf8ToUTF16BE($char);
  168. $ascii .= sprintf('\u%04s', bin2hex($utf16));
  169. break;
  170. case (($ord_var_c & 0xFC) == 0xF8):
  171. // characters U-00200000 - U-03FFFFFF, mask 111110XX
  172. // see http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8
  173. $char = pack('C*', $ord_var_c,
  174. ord($var[$c+1]),
  175. ord($var[$c+2]),
  176. ord($var[$c+3]),
  177. ord($var[$c+4]));
  178. $c+=4;
  179. $utf16 = self::utf8ToUTF16BE($char);
  180. $ascii .= sprintf('\u%04s', bin2hex($utf16));
  181. break;
  182. case (($ord_var_c & 0xFE) == 0xFC):
  183. // characters U-04000000 - U-7FFFFFFF, mask 1111110X
  184. // see http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8
  185. $char = pack('C*', $ord_var_c,
  186. ord($var[$c+1]),
  187. ord($var[$c+2]),
  188. ord($var[$c+3]),
  189. ord($var[$c+4]),
  190. ord($var[$c+5]));
  191. $c+=5;
  192. $utf16 = self::utf8ToUTF16BE($char);
  193. $ascii .= sprintf('\u%04s', bin2hex($utf16));
  194. break;
  195. }
  196. }
  197. return '"'.$ascii.'"';
  198. case 'array':
  199. /*
  200. * As per JSON spec if any array key is not an integer
  201. * we must treat the the whole array as an object. We
  202. * also try to catch a sparsely populated associative
  203. * array with numeric keys here because some JS engines
  204. * will create an array with empty indexes up to
  205. * max_index which can cause memory issues and because
  206. * the keys, which may be relevant, will be remapped
  207. * otherwise.
  208. *
  209. * As per the ECMA and JSON specification an object may
  210. * have any string as a property. Unfortunately due to
  211. * a hole in the ECMA specification if the key is a
  212. * ECMA reserved word or starts with a digit the
  213. * parameter is only accessible using ECMAScript's
  214. * bracket notation.
  215. */
  216. // treat as a JSON object
  217. if (is_array($var) && count($var) && (array_keys($var) !== range(0, sizeof($var) - 1))) {
  218. return '{' .
  219. join(',', array_map(array('CJSON', 'nameValue'),
  220. array_keys($var),
  221. array_values($var)))
  222. . '}';
  223. }
  224. // treat it like a regular array
  225. return '[' . join(',', array_map(array('CJSON', 'encode'), $var)) . ']';
  226. case 'object':
  227. // Check for the JsonSerializable interface available in PHP5.4
  228. // Note that instanceof returns false in case it doesnt know the interface.
  229. if (interface_exists('JsonSerializable', false) && $var instanceof JsonSerializable)
  230. {
  231. // We use the function defined in the interface instead of json_encode.
  232. // This way even for PHP < 5.4 one could define the interface and use it.
  233. return self::encode($var->jsonSerialize());
  234. }
  235. elseif ($var instanceof Traversable)
  236. {
  237. $vars = array();
  238. foreach ($var as $k=>$v)
  239. $vars[$k] = $v;
  240. }
  241. else
  242. $vars = get_object_vars($var);
  243. return '{' .
  244. join(',', array_map(array('CJSON', 'nameValue'),
  245. array_keys($vars),
  246. array_values($vars)))
  247. . '}';
  248. default:
  249. return '';
  250. }
  251. }
  252. /**
  253. * array-walking function for use in generating JSON-formatted name-value pairs
  254. *
  255. * @param string $name name of key to use
  256. * @param mixed $value reference to an array element to be encoded
  257. *
  258. * @return string JSON-formatted name-value pair, like '"name":value'
  259. * @access private
  260. */
  261. protected static function nameValue($name, $value)
  262. {
  263. return self::encode(strval($name)) . ':' . self::encode($value);
  264. }
  265. /**
  266. * reduce a string by removing leading and trailing comments and whitespace
  267. *
  268. * @param string $str string value to strip of comments and whitespace
  269. *
  270. * @return string string value stripped of comments and whitespace
  271. * @access private
  272. */
  273. protected static function reduceString($str)
  274. {
  275. $str = preg_replace(array(
  276. // eliminate single line comments in '// ...' form
  277. '#^\s*//(.+)$#m',
  278. // eliminate multi-line comments in '/* ... */' form, at start of string
  279. '#^\s*/\*(.+)\*/#Us',
  280. // eliminate multi-line comments in '/* ... */' form, at end of string
  281. '#/\*(.+)\*/\s*$#Us'
  282. ), '', $str);
  283. // eliminate extraneous space
  284. return trim($str);
  285. }
  286. /**
  287. * decodes a JSON string into appropriate variable
  288. *
  289. * @param string $str JSON-formatted string
  290. * @param boolean $useArray whether to use associative array to represent object data
  291. * @return mixed number, boolean, string, array, or object corresponding to given JSON input string.
  292. * Note that decode() always returns strings in ASCII or UTF-8 format!
  293. * @access public
  294. */
  295. public static function decode($str, $useArray=true)
  296. {
  297. if(function_exists('json_decode'))
  298. {
  299. $json = json_decode($str,$useArray);
  300. // based on investigation, native fails sometimes returning null.
  301. // see: http://gggeek.altervista.org/sw/article_20070425.html
  302. // As of PHP 5.3.6 it still fails on some valid JSON strings
  303. if($json !== null)
  304. return $json;
  305. }
  306. $str = self::reduceString($str);
  307. switch (strtolower($str)) {
  308. case 'true':
  309. return true;
  310. case 'false':
  311. return false;
  312. case 'null':
  313. return null;
  314. default:
  315. if (is_numeric($str)) {
  316. // Lookie-loo, it's a number
  317. // This would work on its own, but I'm trying to be
  318. // good about returning integers where appropriate:
  319. // return (float)$str;
  320. // Return float or int, as appropriate
  321. return ((float)$str == (integer)$str)
  322. ? (integer)$str
  323. : (float)$str;
  324. } elseif (preg_match('/^("|\').+(\1)$/s', $str, $m) && $m[1] == $m[2]) {
  325. // STRINGS RETURNED IN UTF-8 FORMAT
  326. $delim = substr($str, 0, 1);
  327. $chrs = substr($str, 1, -1);
  328. $utf8 = '';
  329. $strlen_chrs = strlen($chrs);
  330. for ($c = 0; $c < $strlen_chrs; ++$c) {
  331. $substr_chrs_c_2 = substr($chrs, $c, 2);
  332. $ord_chrs_c = ord($chrs[$c]);
  333. switch (true) {
  334. case $substr_chrs_c_2 == '\b':
  335. $utf8 .= chr(0x08);
  336. ++$c;
  337. break;
  338. case $substr_chrs_c_2 == '\t':
  339. $utf8 .= chr(0x09);
  340. ++$c;
  341. break;
  342. case $substr_chrs_c_2 == '\n':
  343. $utf8 .= chr(0x0A);
  344. ++$c;
  345. break;
  346. case $substr_chrs_c_2 == '\f':
  347. $utf8 .= chr(0x0C);
  348. ++$c;
  349. break;
  350. case $substr_chrs_c_2 == '\r':
  351. $utf8 .= chr(0x0D);
  352. ++$c;
  353. break;
  354. case $substr_chrs_c_2 == '\\"':
  355. case $substr_chrs_c_2 == '\\\'':
  356. case $substr_chrs_c_2 == '\\\\':
  357. case $substr_chrs_c_2 == '\\/':
  358. if (($delim == '"' && $substr_chrs_c_2 != '\\\'') ||
  359. ($delim == "'" && $substr_chrs_c_2 != '\\"')) {
  360. $utf8 .= $chrs[++$c];
  361. }
  362. break;
  363. case preg_match('/\\\u[0-9A-F]{4}/i', substr($chrs, $c, 6)):
  364. // single, escaped unicode character
  365. $utf16 = chr(hexdec(substr($chrs, ($c+2), 2)))
  366. . chr(hexdec(substr($chrs, ($c+4), 2)));
  367. $utf8 .= self::utf16beToUTF8($utf16);
  368. $c+=5;
  369. break;
  370. case ($ord_chrs_c >= 0x20) && ($ord_chrs_c <= 0x7F):
  371. $utf8 .= $chrs[$c];
  372. break;
  373. case ($ord_chrs_c & 0xE0) == 0xC0:
  374. // characters U-00000080 - U-000007FF, mask 110XXXXX
  375. //see http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8
  376. $utf8 .= substr($chrs, $c, 2);
  377. ++$c;
  378. break;
  379. case ($ord_chrs_c & 0xF0) == 0xE0:
  380. // characters U-00000800 - U-0000FFFF, mask 1110XXXX
  381. // see http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8
  382. $utf8 .= substr($chrs, $c, 3);
  383. $c += 2;
  384. break;
  385. case ($ord_chrs_c & 0xF8) == 0xF0:
  386. // characters U-00010000 - U-001FFFFF, mask 11110XXX
  387. // see http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8
  388. $utf8 .= substr($chrs, $c, 4);
  389. $c += 3;
  390. break;
  391. case ($ord_chrs_c & 0xFC) == 0xF8:
  392. // characters U-00200000 - U-03FFFFFF, mask 111110XX
  393. // see http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8
  394. $utf8 .= substr($chrs, $c, 5);
  395. $c += 4;
  396. break;
  397. case ($ord_chrs_c & 0xFE) == 0xFC:
  398. // characters U-04000000 - U-7FFFFFFF, mask 1111110X
  399. // see http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8
  400. $utf8 .= substr($chrs, $c, 6);
  401. $c += 5;
  402. break;
  403. }
  404. }
  405. return $utf8;
  406. } elseif (preg_match('/^\[.*\]$/s', $str) || preg_match('/^\{.*\}$/s', $str)) {
  407. // array, or object notation
  408. if ($str[0] == '[') {
  409. $stk = array(self::JSON_IN_ARR);
  410. $arr = array();
  411. } else {
  412. if ($useArray) {
  413. $stk = array(self::JSON_IN_OBJ);
  414. $obj = array();
  415. } else {
  416. $stk = array(self::JSON_IN_OBJ);
  417. $obj = new stdClass();
  418. }
  419. }
  420. $stk[] = array('what' => self::JSON_SLICE, 'where' => 0, 'delim' => false);
  421. $chrs = substr($str, 1, -1);
  422. $chrs = self::reduceString($chrs);
  423. if ($chrs == '') {
  424. if (reset($stk) == self::JSON_IN_ARR) {
  425. return $arr;
  426. } else {
  427. return $obj;
  428. }
  429. }
  430. //print("\nparsing {$chrs}\n");
  431. $strlen_chrs = strlen($chrs);
  432. for ($c = 0; $c <= $strlen_chrs; ++$c) {
  433. $top = end($stk);
  434. $substr_chrs_c_2 = substr($chrs, $c, 2);
  435. if (($c == $strlen_chrs) || (($chrs[$c] == ',') && ($top['what'] == self::JSON_SLICE))) {
  436. // found a comma that is not inside a string, array, etc.,
  437. // OR we've reached the end of the character list
  438. $slice = substr($chrs, $top['where'], ($c - $top['where']));
  439. $stk[] = array('what' => self::JSON_SLICE, 'where' => ($c + 1), 'delim' => false);
  440. //print("Found split at {$c}: ".substr($chrs, $top['where'], (1 + $c - $top['where']))."\n");
  441. if (reset($stk) == self::JSON_IN_ARR) {
  442. // we are in an array, so just push an element onto the stack
  443. $arr[] = self::decode($slice,$useArray);
  444. } elseif (reset($stk) == self::JSON_IN_OBJ) {
  445. // we are in an object, so figure
  446. // out the property name and set an
  447. // element in an associative array,
  448. // for now
  449. if (preg_match('/^\s*(["\'].*[^\\\]["\'])\s*:\s*(\S.*),?$/Uis', $slice, $parts)) {
  450. // "name":value pair
  451. $key = self::decode($parts[1],$useArray);
  452. $val = self::decode($parts[2],$useArray);
  453. if ($useArray) {
  454. $obj[$key] = $val;
  455. } else {
  456. $obj->$key = $val;
  457. }
  458. } elseif (preg_match('/^\s*(\w+)\s*:\s*(\S.*),?$/Uis', $slice, $parts)) {
  459. // name:value pair, where name is unquoted
  460. $key = $parts[1];
  461. $val = self::decode($parts[2],$useArray);
  462. if ($useArray) {
  463. $obj[$key] = $val;
  464. } else {
  465. $obj->$key = $val;
  466. }
  467. }
  468. }
  469. } elseif ((($chrs[$c] == '"') || ($chrs[$c] == "'")) && ($top['what'] != self::JSON_IN_STR)) {
  470. // found a quote, and we are not inside a string
  471. $stk[] = array('what' => self::JSON_IN_STR, 'where' => $c, 'delim' => $chrs[$c]);
  472. //print("Found start of string at {$c}\n");
  473. } elseif (($chrs[$c] == $top['delim']) &&
  474. ($top['what'] == self::JSON_IN_STR) &&
  475. (($chrs[$c - 1] != "\\") ||
  476. ($chrs[$c - 1] == "\\" && $chrs[$c - 2] == "\\"))) {
  477. // found a quote, we're in a string, and it's not escaped
  478. array_pop($stk);
  479. //print("Found end of string at {$c}: ".substr($chrs, $top['where'], (1 + 1 + $c - $top['where']))."\n");
  480. } elseif (($chrs[$c] == '[') &&
  481. in_array($top['what'], array(self::JSON_SLICE, self::JSON_IN_ARR, self::JSON_IN_OBJ))) {
  482. // found a left-bracket, and we are in an array, object, or slice
  483. $stk[] = array('what' => self::JSON_IN_ARR, 'where' => $c, 'delim' => false);
  484. //print("Found start of array at {$c}\n");
  485. } elseif (($chrs[$c] == ']') && ($top['what'] == self::JSON_IN_ARR)) {
  486. // found a right-bracket, and we're in an array
  487. array_pop($stk);
  488. //print("Found end of array at {$c}: ".substr($chrs, $top['where'], (1 + $c - $top['where']))."\n");
  489. } elseif (($chrs[$c] == '{') &&
  490. in_array($top['what'], array(self::JSON_SLICE, self::JSON_IN_ARR, self::JSON_IN_OBJ))) {
  491. // found a left-brace, and we are in an array, object, or slice
  492. $stk[] = array('what' => self::JSON_IN_OBJ, 'where' => $c, 'delim' => false);
  493. //print("Found start of object at {$c}\n");
  494. } elseif (($chrs[$c] == '}') && ($top['what'] == self::JSON_IN_OBJ)) {
  495. // found a right-brace, and we're in an object
  496. array_pop($stk);
  497. //print("Found end of object at {$c}: ".substr($chrs, $top['where'], (1 + $c - $top['where']))."\n");
  498. } elseif (($substr_chrs_c_2 == '/*') &&
  499. in_array($top['what'], array(self::JSON_SLICE, self::JSON_IN_ARR, self::JSON_IN_OBJ))) {
  500. // found a comment start, and we are in an array, object, or slice
  501. $stk[] = array('what' => self::JSON_IN_CMT, 'where' => $c, 'delim' => false);
  502. $c++;
  503. //print("Found start of comment at {$c}\n");
  504. } elseif (($substr_chrs_c_2 == '*/') && ($top['what'] == self::JSON_IN_CMT)) {
  505. // found a comment end, and we're in one now
  506. array_pop($stk);
  507. $c++;
  508. for ($i = $top['where']; $i <= $c; ++$i)
  509. $chrs = substr_replace($chrs, ' ', $i, 1);
  510. //print("Found end of comment at {$c}: ".substr($chrs, $top['where'], (1 + $c - $top['where']))."\n");
  511. }
  512. }
  513. if (reset($stk) == self::JSON_IN_ARR) {
  514. return $arr;
  515. } elseif (reset($stk) == self::JSON_IN_OBJ) {
  516. return $obj;
  517. }
  518. }
  519. }
  520. }
  521. /**
  522. * This function returns any UTF-8 encoded text as a list of
  523. * Unicode values:
  524. * @param string $str string to convert
  525. * @return string
  526. * @author Scott Michael Reynen <scott@randomchaos.com>
  527. * @link http://www.randomchaos.com/document.php?source=php_and_unicode
  528. * @see unicodeToUTF8()
  529. */
  530. protected static function utf8ToUnicode( &$str )
  531. {
  532. $unicode = array();
  533. $values = array();
  534. $lookingFor = 1;
  535. for ($i = 0; $i < strlen( $str ); $i++ )
  536. {
  537. $thisValue = ord( $str[ $i ] );
  538. if ( $thisValue < 128 )
  539. $unicode[] = $thisValue;
  540. else
  541. {
  542. if ( count( $values ) == 0 )
  543. $lookingFor = ( $thisValue < 224 ) ? 2 : 3;
  544. $values[] = $thisValue;
  545. if ( count( $values ) == $lookingFor )
  546. {
  547. $number = ( $lookingFor == 3 ) ?
  548. ( ( $values[0] % 16 ) * 4096 ) + ( ( $values[1] % 64 ) * 64 ) + ( $values[2] % 64 ):
  549. ( ( $values[0] % 32 ) * 64 ) + ( $values[1] % 64 );
  550. $unicode[] = $number;
  551. $values = array();
  552. $lookingFor = 1;
  553. }
  554. }
  555. }
  556. return $unicode;
  557. }
  558. /**
  559. * This function converts a Unicode array back to its UTF-8 representation
  560. * @param string $str string to convert
  561. * @return string
  562. * @author Scott Michael Reynen <scott@randomchaos.com>
  563. * @link http://www.randomchaos.com/document.php?source=php_and_unicode
  564. * @see utf8ToUnicode()
  565. */
  566. protected static function unicodeToUTF8( &$str )
  567. {
  568. $utf8 = '';
  569. foreach( $str as $unicode )
  570. {
  571. if ( $unicode < 128 )
  572. {
  573. $utf8.= chr( $unicode );
  574. }
  575. elseif ( $unicode < 2048 )
  576. {
  577. $utf8.= chr( 192 + ( ( $unicode - ( $unicode % 64 ) ) / 64 ) );
  578. $utf8.= chr( 128 + ( $unicode % 64 ) );
  579. }
  580. else
  581. {
  582. $utf8.= chr( 224 + ( ( $unicode - ( $unicode % 4096 ) ) / 4096 ) );
  583. $utf8.= chr( 128 + ( ( ( $unicode % 4096 ) - ( $unicode % 64 ) ) / 64 ) );
  584. $utf8.= chr( 128 + ( $unicode % 64 ) );
  585. }
  586. }
  587. return $utf8;
  588. }
  589. /**
  590. * UTF-8 to UTF-16BE conversion.
  591. *
  592. * Maybe really UCS-2 without mb_string due to utf8ToUnicode limits
  593. * @param string $str string to convert
  594. * @param boolean $bom whether to output BOM header
  595. * @return string
  596. */
  597. protected static function utf8ToUTF16BE(&$str, $bom = false)
  598. {
  599. $out = $bom ? "\xFE\xFF" : '';
  600. if(function_exists('mb_convert_encoding'))
  601. return $out.mb_convert_encoding($str,'UTF-16BE','UTF-8');
  602. $uni = self::utf8ToUnicode($str);
  603. foreach($uni as $cp)
  604. $out .= pack('n',$cp);
  605. return $out;
  606. }
  607. /**
  608. * UTF-8 to UTF-16BE conversion.
  609. *
  610. * Maybe really UCS-2 without mb_string due to utf8ToUnicode limits
  611. * @param string $str string to convert
  612. * @return string
  613. */
  614. protected static function utf16beToUTF8(&$str)
  615. {
  616. $uni = unpack('n*',$str);
  617. return self::unicodeToUTF8($uni);
  618. }
  619. }