PageRenderTime 32ms CodeModel.GetById 31ms RepoModel.GetById 0ms app.codeStats 1ms

/includes/CJSON.php

http://mapleleaf.googlecode.com/
PHP | 707 lines | 427 code | 86 blank | 194 comment | 90 complexity | 1528b4bb56888b20025fdea893041f53 MD5 | raw file
  1. <?php
  2. /**
  3. * JSON (JavaScript Object Notation) is a lightweight data-interchange
  4. * format. It is easy for humans to read and write. It is easy for machines
  5. * to parse and generate. It is based on a subset of the JavaScript
  6. * Programming Language, Standard ECMA-262 3rd Edition - December 1999.
  7. * This feature can also be found in Python. JSON is a text format that is
  8. * completely language independent but uses conventions that are familiar
  9. * to programmers of the C-family of languages, including C, C++, C#, Java,
  10. * JavaScript, Perl, TCL, and many others. These properties make JSON an
  11. * ideal data-interchange language.
  12. *
  13. * This package provides a simple encoder and decoder for JSON notation. It
  14. * is intended for use with client-side Javascript applications that make
  15. * use of HTTPRequest to perform server communication functions - data can
  16. * be encoded into JSON notation for use in a client-side javascript, or
  17. * decoded from incoming Javascript requests. JSON format is native to
  18. * Javascript, and can be directly eval()'ed with no further parsing
  19. * overhead
  20. *
  21. * All strings should be in ASCII or UTF-8 format!
  22. *
  23. * LICENSE: Redistribution and use in source and binary forms, with or
  24. * without modification, are permitted provided that the following
  25. * conditions are met: Redistributions of source code must retain the
  26. * above copyright notice, this list of conditions and the following
  27. * disclaimer. Redistributions in binary form must reproduce the above
  28. * copyright notice, this list of conditions and the following disclaimer
  29. * in the documentation and/or other materials provided with the
  30. * distribution.
  31. *
  32. * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
  33. * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
  34. * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN
  35. * NO EVENT SHALL CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
  36. * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
  37. * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
  38. * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
  39. * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR
  40. * TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
  41. * USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
  42. * DAMAGE.
  43. *
  44. * @author Michal Migurski <mike-json@teczno.com>
  45. * @author Matt Knapp <mdknapp[at]gmail[dot]com>
  46. * @author Brett Stimmerman <brettstimmerman[at]gmail[dot]com>
  47. * @copyright 2005 Michal Migurski
  48. * @license http://www.opensource.org/licenses/bsd-license.php
  49. * @link http://pear.php.net/pepr/pepr-proposal-show.php?id=198
  50. */
  51. /**
  52. * CJSON converts PHP data to and from JSON format.
  53. *
  54. * @author Michal Migurski <mike-json@teczno.com>
  55. * @author Matt Knapp <mdknapp[at]gmail[dot]com>
  56. * @author Brett Stimmerman <brettstimmerman[at]gmail[dot]com>
  57. * @version $Id: CJSON.php 2569 2010-10-25 18:50:08Z qiang.xue $
  58. * @package system.web.helpers
  59. * @since 1.0
  60. */
  61. class CJSON
  62. {
  63. /**
  64. * Marker constant for JSON::decode(), used to flag stack state
  65. */
  66. const JSON_SLICE = 1;
  67. /**
  68. * Marker constant for JSON::decode(), used to flag stack state
  69. */
  70. const JSON_IN_STR = 2;
  71. /**
  72. * Marker constant for JSON::decode(), used to flag stack state
  73. */
  74. const JSON_IN_ARR = 4;
  75. /**
  76. * Marker constant for JSON::decode(), used to flag stack state
  77. */
  78. const JSON_IN_OBJ = 8;
  79. /**
  80. * Marker constant for JSON::decode(), used to flag stack state
  81. */
  82. const JSON_IN_CMT = 16;
  83. /**
  84. * Encodes an arbitrary variable into JSON format
  85. *
  86. * @param mixed $var any number, boolean, string, array, or object to be encoded.
  87. * see argument 1 to JSON() above for array-parsing behavior.
  88. * if var is a strng, note that encode() always expects it
  89. * to be in ASCII or UTF-8 format!
  90. *
  91. * @return string JSON string representation of input var
  92. */
  93. public static function encode($var)
  94. {
  95. switch (gettype($var)) {
  96. case 'boolean':
  97. return $var ? 'true' : 'false';
  98. case 'NULL':
  99. return 'null';
  100. case 'integer':
  101. return (int) $var;
  102. case 'double':
  103. case 'float':
  104. return rtrim(sprintf('%.16F',$var),'0'); // locale-independent representation
  105. case 'string':
  106. if(function_exists('json_encode'))
  107. return json_encode($var);
  108. #if (($enc=strtoupper(Yii::app()->charset))!=='UTF-8')
  109. # $var=iconv($enc, 'UTF-8', $var);
  110. // STRINGS ARE EXPECTED TO BE IN ASCII OR UTF-8 FORMAT
  111. $ascii = '';
  112. $strlen_var = strlen($var);
  113. /*
  114. * Iterate over every character in the string,
  115. * escaping with a slash or encoding to UTF-8 where necessary
  116. */
  117. for ($c = 0; $c < $strlen_var; ++$c) {
  118. $ord_var_c = ord($var{$c});
  119. switch (true) {
  120. case $ord_var_c == 0x08:
  121. $ascii .= '\b';
  122. break;
  123. case $ord_var_c == 0x09:
  124. $ascii .= '\t';
  125. break;
  126. case $ord_var_c == 0x0A:
  127. $ascii .= '\n';
  128. break;
  129. case $ord_var_c == 0x0C:
  130. $ascii .= '\f';
  131. break;
  132. case $ord_var_c == 0x0D:
  133. $ascii .= '\r';
  134. break;
  135. case $ord_var_c == 0x22:
  136. case $ord_var_c == 0x2F:
  137. case $ord_var_c == 0x5C:
  138. // double quote, slash, slosh
  139. $ascii .= '\\'.$var{$c};
  140. break;
  141. case (($ord_var_c >= 0x20) && ($ord_var_c <= 0x7F)):
  142. // characters U-00000000 - U-0000007F (same as ASCII)
  143. $ascii .= $var{$c};
  144. break;
  145. case (($ord_var_c & 0xE0) == 0xC0):
  146. // characters U-00000080 - U-000007FF, mask 110XXXXX
  147. // see http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8
  148. $char = pack('C*', $ord_var_c, ord($var{$c+1}));
  149. $c+=1;
  150. $utf16 = self::utf8ToUTF16BE($char);
  151. $ascii .= sprintf('\u%04s', bin2hex($utf16));
  152. break;
  153. case (($ord_var_c & 0xF0) == 0xE0):
  154. // characters U-00000800 - U-0000FFFF, mask 1110XXXX
  155. // see http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8
  156. $char = pack('C*', $ord_var_c,
  157. ord($var{$c+1}),
  158. ord($var{$c+2}));
  159. $c+=2;
  160. $utf16 = self::utf8ToUTF16BE($char);
  161. $ascii .= sprintf('\u%04s', bin2hex($utf16));
  162. break;
  163. case (($ord_var_c & 0xF8) == 0xF0):
  164. // characters U-00010000 - U-001FFFFF, mask 11110XXX
  165. // see http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8
  166. $char = pack('C*', $ord_var_c,
  167. ord($var{$c+1}),
  168. ord($var{$c+2}),
  169. ord($var{$c+3}));
  170. $c+=3;
  171. $utf16 = self::utf8ToUTF16BE($char);
  172. $ascii .= sprintf('\u%04s', bin2hex($utf16));
  173. break;
  174. case (($ord_var_c & 0xFC) == 0xF8):
  175. // characters U-00200000 - U-03FFFFFF, mask 111110XX
  176. // see http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8
  177. $char = pack('C*', $ord_var_c,
  178. ord($var{$c+1}),
  179. ord($var{$c+2}),
  180. ord($var{$c+3}),
  181. ord($var{$c+4}));
  182. $c+=4;
  183. $utf16 = self::utf8ToUTF16BE($char);
  184. $ascii .= sprintf('\u%04s', bin2hex($utf16));
  185. break;
  186. case (($ord_var_c & 0xFE) == 0xFC):
  187. // characters U-04000000 - U-7FFFFFFF, mask 1111110X
  188. // see http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8
  189. $char = pack('C*', $ord_var_c,
  190. ord($var{$c+1}),
  191. ord($var{$c+2}),
  192. ord($var{$c+3}),
  193. ord($var{$c+4}),
  194. ord($var{$c+5}));
  195. $c+=5;
  196. $utf16 = self::utf8ToUTF16BE($char);
  197. $ascii .= sprintf('\u%04s', bin2hex($utf16));
  198. break;
  199. }
  200. }
  201. return '"'.$ascii.'"';
  202. case 'array':
  203. /*
  204. * As per JSON spec if any array key is not an integer
  205. * we must treat the the whole array as an object. We
  206. * also try to catch a sparsely populated associative
  207. * array with numeric keys here because some JS engines
  208. * will create an array with empty indexes up to
  209. * max_index which can cause memory issues and because
  210. * the keys, which may be relevant, will be remapped
  211. * otherwise.
  212. *
  213. * As per the ECMA and JSON specification an object may
  214. * have any string as a property. Unfortunately due to
  215. * a hole in the ECMA specification if the key is a
  216. * ECMA reserved word or starts with a digit the
  217. * parameter is only accessible using ECMAScript's
  218. * bracket notation.
  219. */
  220. // treat as a JSON object
  221. if (is_array($var) && count($var) && (array_keys($var) !== range(0, sizeof($var) - 1))) {
  222. return '{' .
  223. join(',', array_map(array('CJSON', 'nameValue'),
  224. array_keys($var),
  225. array_values($var)))
  226. . '}';
  227. }
  228. // treat it like a regular array
  229. return '[' . join(',', array_map(array('CJSON', 'encode'), $var)) . ']';
  230. case 'object':
  231. if ($var instanceof Traversable)
  232. {
  233. $vars = array();
  234. foreach ($var as $k=>$v)
  235. $vars[$k] = $v;
  236. }
  237. else
  238. $vars = get_object_vars($var);
  239. return '{' .
  240. join(',', array_map(array('CJSON', 'nameValue'),
  241. array_keys($vars),
  242. array_values($vars)))
  243. . '}';
  244. default:
  245. return '';
  246. }
  247. }
  248. /**
  249. * array-walking function for use in generating JSON-formatted name-value pairs
  250. *
  251. * @param string $name name of key to use
  252. * @param mixed $value reference to an array element to be encoded
  253. *
  254. * @return string JSON-formatted name-value pair, like '"name":value'
  255. * @access private
  256. */
  257. protected static function nameValue($name, $value)
  258. {
  259. return self::encode(strval($name)) . ':' . self::encode($value);
  260. }
  261. /**
  262. * reduce a string by removing leading and trailing comments and whitespace
  263. *
  264. * @param string $str string value to strip of comments and whitespace
  265. *
  266. * @return string string value stripped of comments and whitespace
  267. * @access private
  268. */
  269. protected static function reduceString($str)
  270. {
  271. $str = preg_replace(array(
  272. // eliminate single line comments in '// ...' form
  273. '#^\s*//(.+)$#m',
  274. // eliminate multi-line comments in '/* ... */' form, at start of string
  275. '#^\s*/\*(.+)\*/#Us',
  276. // eliminate multi-line comments in '/* ... */' form, at end of string
  277. '#/\*(.+)\*/\s*$#Us'
  278. ), '', $str);
  279. // eliminate extraneous space
  280. return trim($str);
  281. }
  282. /**
  283. * decodes a JSON string into appropriate variable
  284. *
  285. * @param string $str JSON-formatted string
  286. * @param boolean $useArray whether to use associative array to represent object data
  287. *
  288. * @return mixed number, boolean, string, array, or object
  289. * corresponding to given JSON input string.
  290. * See argument 1 to JSON() above for object-output behavior.
  291. * Note that decode() always returns strings
  292. * in ASCII or UTF-8 format!
  293. * @access public
  294. */
  295. public static function decode($str, $useArray=true)
  296. {
  297. if(function_exists('json_decode'))
  298. return json_decode($str,$useArray);
  299. $str = self::reduceString($str);
  300. switch (strtolower($str)) {
  301. case 'true':
  302. return true;
  303. case 'false':
  304. return false;
  305. case 'null':
  306. return null;
  307. default:
  308. if (is_numeric($str)) {
  309. // Lookie-loo, it's a number
  310. // This would work on its own, but I'm trying to be
  311. // good about returning integers where appropriate:
  312. // return (float)$str;
  313. // Return float or int, as appropriate
  314. return ((float)$str == (integer)$str)
  315. ? (integer)$str
  316. : (float)$str;
  317. } elseif (preg_match('/^("|\').+(\1)$/s', $str, $m) && $m[1] == $m[2]) {
  318. // STRINGS RETURNED IN UTF-8 FORMAT
  319. $delim = substr($str, 0, 1);
  320. $chrs = substr($str, 1, -1);
  321. $utf8 = '';
  322. $strlen_chrs = strlen($chrs);
  323. for ($c = 0; $c < $strlen_chrs; ++$c) {
  324. $substr_chrs_c_2 = substr($chrs, $c, 2);
  325. $ord_chrs_c = ord($chrs{$c});
  326. switch (true) {
  327. case $substr_chrs_c_2 == '\b':
  328. $utf8 .= chr(0x08);
  329. ++$c;
  330. break;
  331. case $substr_chrs_c_2 == '\t':
  332. $utf8 .= chr(0x09);
  333. ++$c;
  334. break;
  335. case $substr_chrs_c_2 == '\n':
  336. $utf8 .= chr(0x0A);
  337. ++$c;
  338. break;
  339. case $substr_chrs_c_2 == '\f':
  340. $utf8 .= chr(0x0C);
  341. ++$c;
  342. break;
  343. case $substr_chrs_c_2 == '\r':
  344. $utf8 .= chr(0x0D);
  345. ++$c;
  346. break;
  347. case $substr_chrs_c_2 == '\\"':
  348. case $substr_chrs_c_2 == '\\\'':
  349. case $substr_chrs_c_2 == '\\\\':
  350. case $substr_chrs_c_2 == '\\/':
  351. if (($delim == '"' && $substr_chrs_c_2 != '\\\'') ||
  352. ($delim == "'" && $substr_chrs_c_2 != '\\"')) {
  353. $utf8 .= $chrs{++$c};
  354. }
  355. break;
  356. case preg_match('/\\\u[0-9A-F]{4}/i', substr($chrs, $c, 6)):
  357. // single, escaped unicode character
  358. $utf16 = chr(hexdec(substr($chrs, ($c+2), 2)))
  359. . chr(hexdec(substr($chrs, ($c+4), 2)));
  360. $utf8 .= self::utf16beToUTF8($utf16);
  361. $c+=5;
  362. break;
  363. case ($ord_chrs_c >= 0x20) && ($ord_chrs_c <= 0x7F):
  364. $utf8 .= $chrs{$c};
  365. break;
  366. case ($ord_chrs_c & 0xE0) == 0xC0:
  367. // characters U-00000080 - U-000007FF, mask 110XXXXX
  368. //see http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8
  369. $utf8 .= substr($chrs, $c, 2);
  370. ++$c;
  371. break;
  372. case ($ord_chrs_c & 0xF0) == 0xE0:
  373. // characters U-00000800 - U-0000FFFF, mask 1110XXXX
  374. // see http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8
  375. $utf8 .= substr($chrs, $c, 3);
  376. $c += 2;
  377. break;
  378. case ($ord_chrs_c & 0xF8) == 0xF0:
  379. // characters U-00010000 - U-001FFFFF, mask 11110XXX
  380. // see http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8
  381. $utf8 .= substr($chrs, $c, 4);
  382. $c += 3;
  383. break;
  384. case ($ord_chrs_c & 0xFC) == 0xF8:
  385. // characters U-00200000 - U-03FFFFFF, mask 111110XX
  386. // see http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8
  387. $utf8 .= substr($chrs, $c, 5);
  388. $c += 4;
  389. break;
  390. case ($ord_chrs_c & 0xFE) == 0xFC:
  391. // characters U-04000000 - U-7FFFFFFF, mask 1111110X
  392. // see http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8
  393. $utf8 .= substr($chrs, $c, 6);
  394. $c += 5;
  395. break;
  396. }
  397. }
  398. return $utf8;
  399. } elseif (preg_match('/^\[.*\]$/s', $str) || preg_match('/^\{.*\}$/s', $str)) {
  400. // array, or object notation
  401. if ($str{0} == '[') {
  402. $stk = array(self::JSON_IN_ARR);
  403. $arr = array();
  404. } else {
  405. if ($useArray) {
  406. $stk = array(self::JSON_IN_OBJ);
  407. $obj = array();
  408. } else {
  409. $stk = array(self::JSON_IN_OBJ);
  410. $obj = new stdClass();
  411. }
  412. }
  413. array_push($stk, array('what' => self::JSON_SLICE,
  414. 'where' => 0,
  415. 'delim' => false));
  416. $chrs = substr($str, 1, -1);
  417. $chrs = self::reduceString($chrs);
  418. if ($chrs == '') {
  419. if (reset($stk) == self::JSON_IN_ARR) {
  420. return $arr;
  421. } else {
  422. return $obj;
  423. }
  424. }
  425. //print("\nparsing {$chrs}\n");
  426. $strlen_chrs = strlen($chrs);
  427. for ($c = 0; $c <= $strlen_chrs; ++$c) {
  428. $top = end($stk);
  429. $substr_chrs_c_2 = substr($chrs, $c, 2);
  430. if (($c == $strlen_chrs) || (($chrs{$c} == ',') && ($top['what'] == self::JSON_SLICE))) {
  431. // found a comma that is not inside a string, array, etc.,
  432. // OR we've reached the end of the character list
  433. $slice = substr($chrs, $top['where'], ($c - $top['where']));
  434. array_push($stk, array('what' => self::JSON_SLICE, 'where' => ($c + 1), 'delim' => false));
  435. //print("Found split at {$c}: ".substr($chrs, $top['where'], (1 + $c - $top['where']))."\n");
  436. if (reset($stk) == self::JSON_IN_ARR) {
  437. // we are in an array, so just push an element onto the stack
  438. array_push($arr, self::decode($slice,$useArray));
  439. } elseif (reset($stk) == self::JSON_IN_OBJ) {
  440. // we are in an object, so figure
  441. // out the property name and set an
  442. // element in an associative array,
  443. // for now
  444. if (preg_match('/^\s*(["\'].*[^\\\]["\'])\s*:\s*(\S.*),?$/Uis', $slice, $parts)) {
  445. // "name":value pair
  446. $key = self::decode($parts[1],$useArray);
  447. $val = self::decode($parts[2],$useArray);
  448. if ($useArray) {
  449. $obj[$key] = $val;
  450. } else {
  451. $obj->$key = $val;
  452. }
  453. } elseif (preg_match('/^\s*(\w+)\s*:\s*(\S.*),?$/Uis', $slice, $parts)) {
  454. // name:value pair, where name is unquoted
  455. $key = $parts[1];
  456. $val = self::decode($parts[2],$useArray);
  457. if ($useArray) {
  458. $obj[$key] = $val;
  459. } else {
  460. $obj->$key = $val;
  461. }
  462. }
  463. }
  464. } elseif ((($chrs{$c} == '"') || ($chrs{$c} == "'")) && ($top['what'] != self::JSON_IN_STR)) {
  465. // found a quote, and we are not inside a string
  466. array_push($stk, array('what' => self::JSON_IN_STR, 'where' => $c, 'delim' => $chrs{$c}));
  467. //print("Found start of string at {$c}\n");
  468. } elseif (($chrs{$c} == $top['delim']) &&
  469. ($top['what'] == self::JSON_IN_STR) &&
  470. (($chrs{$c - 1} != "\\") ||
  471. ($chrs{$c - 1} == "\\" && $chrs{$c - 2} == "\\"))) {
  472. // found a quote, we're in a string, and it's not escaped
  473. array_pop($stk);
  474. //print("Found end of string at {$c}: ".substr($chrs, $top['where'], (1 + 1 + $c - $top['where']))."\n");
  475. } elseif (($chrs{$c} == '[') &&
  476. in_array($top['what'], array(self::JSON_SLICE, self::JSON_IN_ARR, self::JSON_IN_OBJ))) {
  477. // found a left-bracket, and we are in an array, object, or slice
  478. array_push($stk, array('what' => self::JSON_IN_ARR, 'where' => $c, 'delim' => false));
  479. //print("Found start of array at {$c}\n");
  480. } elseif (($chrs{$c} == ']') && ($top['what'] == self::JSON_IN_ARR)) {
  481. // found a right-bracket, and we're in an array
  482. array_pop($stk);
  483. //print("Found end of array at {$c}: ".substr($chrs, $top['where'], (1 + $c - $top['where']))."\n");
  484. } elseif (($chrs{$c} == '{') &&
  485. in_array($top['what'], array(self::JSON_SLICE, self::JSON_IN_ARR, self::JSON_IN_OBJ))) {
  486. // found a left-brace, and we are in an array, object, or slice
  487. array_push($stk, array('what' => self::JSON_IN_OBJ, 'where' => $c, 'delim' => false));
  488. //print("Found start of object at {$c}\n");
  489. } elseif (($chrs{$c} == '}') && ($top['what'] == self::JSON_IN_OBJ)) {
  490. // found a right-brace, and we're in an object
  491. array_pop($stk);
  492. //print("Found end of object at {$c}: ".substr($chrs, $top['where'], (1 + $c - $top['where']))."\n");
  493. } elseif (($substr_chrs_c_2 == '/*') &&
  494. in_array($top['what'], array(self::JSON_SLICE, self::JSON_IN_ARR, self::JSON_IN_OBJ))) {
  495. // found a comment start, and we are in an array, object, or slice
  496. array_push($stk, array('what' => self::JSON_IN_CMT, 'where' => $c, 'delim' => false));
  497. $c++;
  498. //print("Found start of comment at {$c}\n");
  499. } elseif (($substr_chrs_c_2 == '*/') && ($top['what'] == self::JSON_IN_CMT)) {
  500. // found a comment end, and we're in one now
  501. array_pop($stk);
  502. $c++;
  503. for ($i = $top['where']; $i <= $c; ++$i)
  504. $chrs = substr_replace($chrs, ' ', $i, 1);
  505. //print("Found end of comment at {$c}: ".substr($chrs, $top['where'], (1 + $c - $top['where']))."\n");
  506. }
  507. }
  508. if (reset($stk) == self::JSON_IN_ARR) {
  509. return $arr;
  510. } elseif (reset($stk) == self::JSON_IN_OBJ) {
  511. return $obj;
  512. }
  513. }
  514. }
  515. }
  516. /**
  517. * This function returns any UTF-8 encoded text as a list of
  518. * Unicode values:
  519. * @param string $str string to convert
  520. * @author Scott Michael Reynen <scott@randomchaos.com>
  521. * @link http://www.randomchaos.com/document.php?source=php_and_unicode
  522. * @see unicodeToUTF8()
  523. */
  524. protected static function utf8ToUnicode( &$str )
  525. {
  526. $unicode = array();
  527. $values = array();
  528. $lookingFor = 1;
  529. for ($i = 0; $i < strlen( $str ); $i++ )
  530. {
  531. $thisValue = ord( $str[ $i ] );
  532. if ( $thisValue < 128 )
  533. $unicode[] = $thisValue;
  534. else
  535. {
  536. if ( count( $values ) == 0 )
  537. $lookingFor = ( $thisValue < 224 ) ? 2 : 3;
  538. $values[] = $thisValue;
  539. if ( count( $values ) == $lookingFor )
  540. {
  541. $number = ( $lookingFor == 3 ) ?
  542. ( ( $values[0] % 16 ) * 4096 ) + ( ( $values[1] % 64 ) * 64 ) + ( $values[2] % 64 ):
  543. ( ( $values[0] % 32 ) * 64 ) + ( $values[1] % 64 );
  544. $unicode[] = $number;
  545. $values = array();
  546. $lookingFor = 1;
  547. }
  548. }
  549. }
  550. return $unicode;
  551. }
  552. /**
  553. * This function converts a Unicode array back to its UTF-8 representation
  554. * @param string $str string to convert
  555. * @author Scott Michael Reynen <scott@randomchaos.com>
  556. * @link http://www.randomchaos.com/document.php?source=php_and_unicode
  557. * @see utf8ToUnicode()
  558. */
  559. protected static function unicodeToUTF8( &$str )
  560. {
  561. $utf8 = '';
  562. foreach( $str as $unicode )
  563. {
  564. if ( $unicode < 128 )
  565. {
  566. $utf8.= chr( $unicode );
  567. }
  568. elseif ( $unicode < 2048 )
  569. {
  570. $utf8.= chr( 192 + ( ( $unicode - ( $unicode % 64 ) ) / 64 ) );
  571. $utf8.= chr( 128 + ( $unicode % 64 ) );
  572. }
  573. else
  574. {
  575. $utf8.= chr( 224 + ( ( $unicode - ( $unicode % 4096 ) ) / 4096 ) );
  576. $utf8.= chr( 128 + ( ( ( $unicode % 4096 ) - ( $unicode % 64 ) ) / 64 ) );
  577. $utf8.= chr( 128 + ( $unicode % 64 ) );
  578. }
  579. }
  580. return $utf8;
  581. }
  582. /**
  583. * UTF-8 to UTF-16BE conversion.
  584. *
  585. * Maybe really UCS-2 without mb_string due to utf8ToUnicode limits
  586. * @param string $str string to convert
  587. * @param boolean $bom whether to output BOM header
  588. */
  589. protected static function utf8ToUTF16BE(&$str, $bom = false)
  590. {
  591. $out = $bom ? "\xFE\xFF" : '';
  592. if(function_exists('mb_convert_encoding'))
  593. return $out.mb_convert_encoding($str,'UTF-16BE','UTF-8');
  594. $uni = self::utf8ToUnicode($str);
  595. foreach($uni as $cp)
  596. $out .= pack('n',$cp);
  597. return $out;
  598. }
  599. /**
  600. * UTF-8 to UTF-16BE conversion.
  601. *
  602. * Maybe really UCS-2 without mb_string due to utf8ToUnicode limits
  603. * @param string $str string to convert
  604. */
  605. protected static function utf16beToUTF8(&$str)
  606. {
  607. $uni = unpack('n*',$str);
  608. return self::unicodeToUTF8($uni);
  609. }
  610. }