PageRenderTime 133ms CodeModel.GetById 75ms app.highlight 51ms RepoModel.GetById 1ms app.codeStats 0ms

/framework/web/helpers/CJSON.php

http://github.com/yiisoft/yii
PHP | 717 lines | 435 code | 87 blank | 195 comment | 93 complexity | 47bbeac9067b5cedfbb01f68ffce5a48 MD5 | raw file
  1<?php
  2/**
  3* JSON (JavaScript Object Notation) is a lightweight data-interchange
  4* format. It is easy for humans to read and write. It is easy for machines
  5* to parse and generate. It is based on a subset of the JavaScript
  6* Programming Language, Standard ECMA-262 3rd Edition - December 1999.
  7* This feature can also be found in  Python. JSON is a text format that is
  8* completely language independent but uses conventions that are familiar
  9* to programmers of the C-family of languages, including C, C++, C#, Java,
 10* JavaScript, Perl, TCL, and many others. These properties make JSON an
 11* ideal data-interchange language.
 12*
 13* This package provides a simple encoder and decoder for JSON notation. It
 14* is intended for use with client-side Javascript applications that make
 15* use of HTTPRequest to perform server communication functions - data can
 16* be encoded into JSON notation for use in a client-side javascript, or
 17* decoded from incoming Javascript requests. JSON format is native to
 18* Javascript, and can be directly eval()'ed with no further parsing
 19* overhead
 20*
 21* All strings should be in ASCII or UTF-8 format!
 22*
 23* LICENSE: Redistribution and use in source and binary forms, with or
 24* without modification, are permitted provided that the following
 25* conditions are met: Redistributions of source code must retain the
 26* above copyright notice, this list of conditions and the following
 27* disclaimer. Redistributions in binary form must reproduce the above
 28* copyright notice, this list of conditions and the following disclaimer
 29* in the documentation and/or other materials provided with the
 30* distribution.
 31*
 32* THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
 33* WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
 34* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN
 35* NO EVENT SHALL CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
 36* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
 37* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
 38* OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
 39* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR
 40* TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
 41* USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
 42* DAMAGE.
 43*
 44* @author	  Michal Migurski <mike-json@teczno.com>
 45* @author	  Matt Knapp <mdknapp[at]gmail[dot]com>
 46* @author	  Brett Stimmerman <brettstimmerman[at]gmail[dot]com>
 47* @copyright   2005 Michal Migurski
 48* @license	 http://www.opensource.org/licenses/bsd-license.php
 49* @link		http://pear.php.net/pepr/pepr-proposal-show.php?id=198
 50*/
 51
 52/**
 53 * CJSON converts PHP data to and from JSON format.
 54 *
 55 * @author	 Michal Migurski <mike-json@teczno.com>
 56 * @author	 Matt Knapp <mdknapp[at]gmail[dot]com>
 57 * @author	 Brett Stimmerman <brettstimmerman[at]gmail[dot]com>
 58 * @package	system.web.helpers
 59 * @since 1.0
 60 */
 61class CJSON
 62{
 63	/**
 64	 * Marker constant for JSON::decode(), used to flag stack state
 65	 */
 66	const JSON_SLICE = 1;
 67
 68	/**
 69	* Marker constant for JSON::decode(), used to flag stack state
 70	*/
 71	const JSON_IN_STR = 2;
 72
 73	/**
 74	* Marker constant for JSON::decode(), used to flag stack state
 75	*/
 76	const JSON_IN_ARR = 4;
 77
 78	/**
 79	* Marker constant for JSON::decode(), used to flag stack state
 80	*/
 81	const JSON_IN_OBJ = 8;
 82
 83	/**
 84	* Marker constant for JSON::decode(), used to flag stack state
 85	*/
 86	const JSON_IN_CMT = 16;
 87
 88	/**
 89	 * Encodes an arbitrary variable into JSON format
 90	 *
 91	 * @param mixed $var any number, boolean, string, array, or object to be encoded.
 92	 * If var is a string, it will be converted to UTF-8 format first before being encoded.
 93	 * @return string JSON string representation of input var
 94	 */
 95	public static function encode($var)
 96	{
 97		switch (gettype($var)) {
 98			case 'boolean':
 99				return $var ? 'true' : 'false';
100
101			case 'NULL':
102				return 'null';
103
104			case 'integer':
105				return (int) $var;
106
107			case 'double':
108			case 'float':
109				return str_replace(',','.',(float)$var); // locale-independent representation
110
111			case 'string':
112				if (($enc=strtoupper(Yii::app()->charset))!=='UTF-8')
113					$var=iconv($enc, 'UTF-8', $var);
114
115				if(function_exists('json_encode'))
116					return json_encode($var);
117
118				// STRINGS ARE EXPECTED TO BE IN ASCII OR UTF-8 FORMAT
119				$ascii = '';
120				$strlen_var = strlen($var);
121
122			   /*
123				* Iterate over every character in the string,
124				* escaping with a slash or encoding to UTF-8 where necessary
125				*/
126				for ($c = 0; $c < $strlen_var; ++$c) {
127
128					$ord_var_c = ord($var[$c]);
129
130					switch (true) {
131						case $ord_var_c == 0x08:
132							$ascii .= '\b';
133							break;
134						case $ord_var_c == 0x09:
135							$ascii .= '\t';
136							break;
137						case $ord_var_c == 0x0A:
138							$ascii .= '\n';
139							break;
140						case $ord_var_c == 0x0C:
141							$ascii .= '\f';
142							break;
143						case $ord_var_c == 0x0D:
144							$ascii .= '\r';
145							break;
146
147						case $ord_var_c == 0x22:
148						case $ord_var_c == 0x2F:
149						case $ord_var_c == 0x5C:
150							// double quote, slash, slosh
151							$ascii .= '\\'.$var[$c];
152							break;
153
154						case (($ord_var_c >= 0x20) && ($ord_var_c <= 0x7F)):
155							// characters U-00000000 - U-0000007F (same as ASCII)
156							$ascii .= $var[$c];
157							break;
158
159						case (($ord_var_c & 0xE0) == 0xC0):
160							// characters U-00000080 - U-000007FF, mask 110XXXXX
161							// see http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8
162							$char = pack('C*', $ord_var_c, ord($var[$c+1]));
163							$c+=1;
164							$utf16 =  self::utf8ToUTF16BE($char);
165							$ascii .= sprintf('\u%04s', bin2hex($utf16));
166							break;
167
168						case (($ord_var_c & 0xF0) == 0xE0):
169							// characters U-00000800 - U-0000FFFF, mask 1110XXXX
170							// see http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8
171							$char = pack('C*', $ord_var_c,
172										 ord($var[$c+1]),
173										 ord($var[$c+2]));
174							$c+=2;
175							$utf16 = self::utf8ToUTF16BE($char);
176							$ascii .= sprintf('\u%04s', bin2hex($utf16));
177							break;
178
179						case (($ord_var_c & 0xF8) == 0xF0):
180							// characters U-00010000 - U-001FFFFF, mask 11110XXX
181							// see http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8
182							$char = pack('C*', $ord_var_c,
183										 ord($var[$c+1]),
184										 ord($var[$c+2]),
185										 ord($var[$c+3]));
186							$c+=3;
187							$utf16 = self::utf8ToUTF16BE($char);
188							$ascii .= sprintf('\u%04s', bin2hex($utf16));
189							break;
190
191						case (($ord_var_c & 0xFC) == 0xF8):
192							// characters U-00200000 - U-03FFFFFF, mask 111110XX
193							// see http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8
194							$char = pack('C*', $ord_var_c,
195										 ord($var[$c+1]),
196										 ord($var[$c+2]),
197										 ord($var[$c+3]),
198										 ord($var[$c+4]));
199							$c+=4;
200							$utf16 = self::utf8ToUTF16BE($char);
201							$ascii .= sprintf('\u%04s', bin2hex($utf16));
202							break;
203
204						case (($ord_var_c & 0xFE) == 0xFC):
205							// characters U-04000000 - U-7FFFFFFF, mask 1111110X
206							// see http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8
207							$char = pack('C*', $ord_var_c,
208										 ord($var[$c+1]),
209										 ord($var[$c+2]),
210										 ord($var[$c+3]),
211										 ord($var[$c+4]),
212										 ord($var[$c+5]));
213							$c+=5;
214							$utf16 = self::utf8ToUTF16BE($char);
215							$ascii .= sprintf('\u%04s', bin2hex($utf16));
216							break;
217					}
218				}
219
220				return '"'.$ascii.'"';
221
222			case 'array':
223			   /*
224				* As per JSON spec if any array key is not an integer
225				* we must treat the the whole array as an object. We
226				* also try to catch a sparsely populated associative
227				* array with numeric keys here because some JS engines
228				* will create an array with empty indexes up to
229				* max_index which can cause memory issues and because
230				* the keys, which may be relevant, will be remapped
231				* otherwise.
232				*
233				* As per the ECMA and JSON specification an object may
234				* have any string as a property. Unfortunately due to
235				* a hole in the ECMA specification if the key is a
236				* ECMA reserved word or starts with a digit the
237				* parameter is only accessible using ECMAScript's
238				* bracket notation.
239				*/
240
241				// treat as a JSON object
242				if (is_array($var) && count($var) && (array_keys($var) !== range(0, sizeof($var) - 1))) {
243					return '{' .
244						   join(',', array_map(array('CJSON', 'nameValue'),
245											   array_keys($var),
246											   array_values($var)))
247						   . '}';
248				}
249
250				// treat it like a regular array
251				return '[' . join(',', array_map(array('CJSON', 'encode'), $var)) . ']';
252
253			case 'object':
254				// Check for the JsonSerializable interface available in PHP5.4
255				// Note that instanceof returns false in case it doesnt know the interface.
256				if (interface_exists('JsonSerializable', false) && $var instanceof JsonSerializable)
257				{
258					// We use the function defined in the interface instead of json_encode.
259					// This way even for PHP < 5.4 one could define the interface and use it.
260					return self::encode($var->jsonSerialize());
261				}
262				elseif ($var instanceof Traversable)
263				{
264					$vars = array();
265					foreach ($var as $k=>$v)
266						$vars[$k] = $v;
267				}
268				else
269					$vars = get_object_vars($var);
270				return '{' .
271					   join(',', array_map(array('CJSON', 'nameValue'),
272										   array_keys($vars),
273										   array_values($vars)))
274					   . '}';
275
276			default:
277				return '';
278		}
279	}
280
281	/**
282	 * array-walking function for use in generating JSON-formatted name-value pairs
283	 *
284	 * @param string $name  name of key to use
285	 * @param mixed $value reference to an array element to be encoded
286	 *
287	 * @return   string  JSON-formatted name-value pair, like '"name":value'
288	 * @access   private
289	 */
290	protected static function nameValue($name, $value)
291	{
292		return self::encode(strval($name)) . ':' . self::encode($value);
293	}
294
295	/**
296	 * reduce a string by removing leading and trailing comments and whitespace
297	 *
298	 * @param string $str string value to strip of comments and whitespace
299	 *
300	 * @return string string value stripped of comments and whitespace
301	 * @access   private
302	 */
303	protected static function reduceString($str)
304	{
305		$str = preg_replace(array(
306
307				// eliminate single line comments in '// ...' form
308				'#^\s*//(.+)$#m',
309
310				// eliminate multi-line comments in '/* ... */' form, at start of string
311				'#^\s*/\*(.+)\*/#Us',
312
313				// eliminate multi-line comments in '/* ... */' form, at end of string
314				'#/\*(.+)\*/\s*$#Us'
315
316			), '', $str);
317
318		// eliminate extraneous space
319		return trim($str);
320	}
321
322	/**
323	 * decodes a JSON string into appropriate variable
324	 *
325	 * @param string $str  JSON-formatted string
326	 * @param boolean $useArray  whether to use associative array to represent object data
327	 * @return mixed   number, boolean, string, array, or object corresponding to given JSON input string.
328	 *    Note that decode() always returns strings in ASCII or UTF-8 format!
329	 * @access   public
330	 */
331	public static function decode($str, $useArray=true)
332	{
333		if(function_exists('json_decode'))
334		{
335			$json = json_decode($str,$useArray);
336
337			// based on investigation, native fails sometimes returning null.
338			// see: http://gggeek.altervista.org/sw/article_20070425.html
339			// As of PHP 5.3.6 it still fails on some valid JSON strings
340			if($json !== null)
341				return $json;
342		}
343
344		$str = self::reduceString($str);
345
346		switch (strtolower($str)) {
347			case 'true':
348				return true;
349
350			case 'false':
351				return false;
352
353			case 'null':
354				return null;
355
356			default:
357				if (is_numeric($str)) {
358					// Lookie-loo, it's a number
359
360					// This would work on its own, but I'm trying to be
361					// good about returning integers where appropriate:
362					// return (float)$str;
363
364					// Return float or int, as appropriate
365					return ((float)$str == (integer)$str)
366						? (integer)$str
367						: (float)$str;
368
369				} elseif (preg_match('/^("|\').+(\1)$/s', $str, $m) && $m[1] == $m[2]) {
370					// STRINGS RETURNED IN UTF-8 FORMAT
371					$delim = substr($str, 0, 1);
372					$chrs = substr($str, 1, -1);
373					$utf8 = '';
374					$strlen_chrs = strlen($chrs);
375
376					for ($c = 0; $c < $strlen_chrs; ++$c) {
377
378						$substr_chrs_c_2 = substr($chrs, $c, 2);
379						$ord_chrs_c = ord($chrs[$c]);
380
381						switch (true) {
382							case $substr_chrs_c_2 == '\b':
383								$utf8 .= chr(0x08);
384								++$c;
385								break;
386							case $substr_chrs_c_2 == '\t':
387								$utf8 .= chr(0x09);
388								++$c;
389								break;
390							case $substr_chrs_c_2 == '\n':
391								$utf8 .= chr(0x0A);
392								++$c;
393								break;
394							case $substr_chrs_c_2 == '\f':
395								$utf8 .= chr(0x0C);
396								++$c;
397								break;
398							case $substr_chrs_c_2 == '\r':
399								$utf8 .= chr(0x0D);
400								++$c;
401								break;
402
403							case $substr_chrs_c_2 == '\\"':
404							case $substr_chrs_c_2 == '\\\'':
405							case $substr_chrs_c_2 == '\\\\':
406							case $substr_chrs_c_2 == '\\/':
407								if (($delim == '"' && $substr_chrs_c_2 != '\\\'') ||
408								   ($delim == "'" && $substr_chrs_c_2 != '\\"')) {
409									$utf8 .= $chrs[++$c];
410								}
411								break;
412
413							case preg_match('/\\\u[0-9A-F]{4}/i', substr($chrs, $c, 6)):
414								// single, escaped unicode character
415								$utf16 = chr(hexdec(substr($chrs, ($c+2), 2)))
416									   . chr(hexdec(substr($chrs, ($c+4), 2)));
417								$utf8 .= self::utf16beToUTF8($utf16);
418								$c+=5;
419								break;
420
421							case ($ord_chrs_c >= 0x20) && ($ord_chrs_c <= 0x7F):
422								$utf8 .= $chrs[$c];
423								break;
424
425							case ($ord_chrs_c & 0xE0) == 0xC0:
426								// characters U-00000080 - U-000007FF, mask 110XXXXX
427								//see http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8
428								$utf8 .= substr($chrs, $c, 2);
429								++$c;
430								break;
431
432							case ($ord_chrs_c & 0xF0) == 0xE0:
433								// characters U-00000800 - U-0000FFFF, mask 1110XXXX
434								// see http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8
435								$utf8 .= substr($chrs, $c, 3);
436								$c += 2;
437								break;
438
439							case ($ord_chrs_c & 0xF8) == 0xF0:
440								// characters U-00010000 - U-001FFFFF, mask 11110XXX
441								// see http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8
442								$utf8 .= substr($chrs, $c, 4);
443								$c += 3;
444								break;
445
446							case ($ord_chrs_c & 0xFC) == 0xF8:
447								// characters U-00200000 - U-03FFFFFF, mask 111110XX
448								// see http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8
449								$utf8 .= substr($chrs, $c, 5);
450								$c += 4;
451								break;
452
453							case ($ord_chrs_c & 0xFE) == 0xFC:
454								// characters U-04000000 - U-7FFFFFFF, mask 1111110X
455								// see http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8
456								$utf8 .= substr($chrs, $c, 6);
457								$c += 5;
458								break;
459
460						}
461
462					}
463
464					return $utf8;
465
466				} elseif (preg_match('/^\[.*\]$/s', $str) || preg_match('/^\{.*\}$/s', $str)) {
467					// array, or object notation
468
469					if ($str[0] == '[') {
470						$stk = array(self::JSON_IN_ARR);
471						$arr = array();
472					} else {
473						if ($useArray) {
474							$stk = array(self::JSON_IN_OBJ);
475							$obj = array();
476						} else {
477							$stk = array(self::JSON_IN_OBJ);
478							$obj = new stdClass();
479						}
480					}
481
482					$stk[] = array('what' => self::JSON_SLICE, 'where' => 0, 'delim' => false);
483
484					$chrs = substr($str, 1, -1);
485					$chrs = self::reduceString($chrs);
486
487					if ($chrs == '') {
488						if (reset($stk) == self::JSON_IN_ARR) {
489							return $arr;
490
491						} else {
492							return $obj;
493
494						}
495					}
496
497					//print("\nparsing {$chrs}\n");
498
499					$strlen_chrs = strlen($chrs);
500
501					for ($c = 0; $c <= $strlen_chrs; ++$c) {
502
503						$top = end($stk);
504						$substr_chrs_c_2 = substr($chrs, $c, 2);
505
506						if (($c == $strlen_chrs) || (($chrs[$c] == ',') && ($top['what'] == self::JSON_SLICE))) {
507							// found a comma that is not inside a string, array, etc.,
508							// OR we've reached the end of the character list
509							$slice = substr($chrs, $top['where'], ($c - $top['where']));
510							$stk[] = array('what' => self::JSON_SLICE, 'where' => ($c + 1), 'delim' => false);
511							//print("Found split at {$c}: ".substr($chrs, $top['where'], (1 + $c - $top['where']))."\n");
512
513							if (reset($stk) == self::JSON_IN_ARR) {
514								// we are in an array, so just push an element onto the stack
515								$arr[] = self::decode($slice,$useArray);
516
517							} elseif (reset($stk) == self::JSON_IN_OBJ) {
518								// we are in an object, so figure
519								// out the property name and set an
520								// element in an associative array,
521								// for now
522								if (preg_match('/^\s*(["\'].*[^\\\]["\'])\s*:\s*(\S.*),?$/Uis', $slice, $parts)) {
523									// "name":value pair
524									$key = self::decode($parts[1],$useArray);
525									$val = self::decode($parts[2],$useArray);
526
527									if ($useArray) {
528										$obj[$key] = $val;
529									} else {
530										$obj->$key = $val;
531									}
532								} elseif (preg_match('/^\s*(\w+)\s*:\s*(\S.*),?$/Uis', $slice, $parts)) {
533									// name:value pair, where name is unquoted
534									$key = $parts[1];
535									$val = self::decode($parts[2],$useArray);
536
537									if ($useArray) {
538										$obj[$key] = $val;
539									} else {
540										$obj->$key = $val;
541									}
542								}
543
544							}
545
546						} elseif ((($chrs[$c] == '"') || ($chrs[$c] == "'")) && ($top['what'] != self::JSON_IN_STR)) {
547							// found a quote, and we are not inside a string
548							$stk[] = array('what' => self::JSON_IN_STR, 'where' => $c, 'delim' => $chrs[$c]);
549							//print("Found start of string at {$c}\n");
550
551						} elseif (($chrs[$c] == $top['delim']) &&
552								 ($top['what'] == self::JSON_IN_STR) &&
553								 (($chrs[$c - 1] != "\\") ||
554								 ($chrs[$c - 1] == "\\" && $chrs[$c - 2] == "\\"))) {
555							// found a quote, we're in a string, and it's not escaped
556							array_pop($stk);
557							//print("Found end of string at {$c}: ".substr($chrs, $top['where'], (1 + 1 + $c - $top['where']))."\n");
558
559						} elseif (($chrs[$c] == '[') &&
560								 in_array($top['what'], array(self::JSON_SLICE, self::JSON_IN_ARR, self::JSON_IN_OBJ))) {
561							// found a left-bracket, and we are in an array, object, or slice
562							$stk[] = array('what' => self::JSON_IN_ARR, 'where' => $c, 'delim' => false);
563							//print("Found start of array at {$c}\n");
564
565						} elseif (($chrs[$c] == ']') && ($top['what'] == self::JSON_IN_ARR)) {
566							// found a right-bracket, and we're in an array
567							array_pop($stk);
568							//print("Found end of array at {$c}: ".substr($chrs, $top['where'], (1 + $c - $top['where']))."\n");
569
570						} elseif (($chrs[$c] == '{') &&
571								 in_array($top['what'], array(self::JSON_SLICE, self::JSON_IN_ARR, self::JSON_IN_OBJ))) {
572							// found a left-brace, and we are in an array, object, or slice
573							$stk[] = array('what' => self::JSON_IN_OBJ, 'where' => $c, 'delim' => false);
574							//print("Found start of object at {$c}\n");
575
576						} elseif (($chrs[$c] == '}') && ($top['what'] == self::JSON_IN_OBJ)) {
577							// found a right-brace, and we're in an object
578							array_pop($stk);
579							//print("Found end of object at {$c}: ".substr($chrs, $top['where'], (1 + $c - $top['where']))."\n");
580
581						} elseif (($substr_chrs_c_2 == '/*') &&
582								 in_array($top['what'], array(self::JSON_SLICE, self::JSON_IN_ARR, self::JSON_IN_OBJ))) {
583							// found a comment start, and we are in an array, object, or slice
584							$stk[] = array('what' => self::JSON_IN_CMT, 'where' => $c, 'delim' => false);
585							$c++;
586							//print("Found start of comment at {$c}\n");
587
588						} elseif (($substr_chrs_c_2 == '*/') && ($top['what'] == self::JSON_IN_CMT)) {
589							// found a comment end, and we're in one now
590							array_pop($stk);
591							$c++;
592
593							for ($i = $top['where']; $i <= $c; ++$i)
594								$chrs = substr_replace($chrs, ' ', $i, 1);
595
596							//print("Found end of comment at {$c}: ".substr($chrs, $top['where'], (1 + $c - $top['where']))."\n");
597
598						}
599
600					}
601
602					if (reset($stk) == self::JSON_IN_ARR) {
603						return $arr;
604
605					} elseif (reset($stk) == self::JSON_IN_OBJ) {
606						return $obj;
607
608					}
609
610				}
611		}
612	}
613
614	/**
615	 * This function returns any UTF-8 encoded text as a list of
616	 * Unicode values:
617	 * @param string $str string to convert
618	 * @return string
619	 * @author Scott Michael Reynen <scott@randomchaos.com>
620	 * @link   http://www.randomchaos.com/document.php?source=php_and_unicode
621	 * @see	unicodeToUTF8()
622	 */
623	protected static function utf8ToUnicode( &$str )
624	{
625		$unicode = array();
626		$values = array();
627		$lookingFor = 1;
628
629		for ($i = 0; $i < strlen( $str ); $i++ )
630		{
631			$thisValue = ord( $str[ $i ] );
632			if ( $thisValue < 128 )
633				$unicode[] = $thisValue;
634			else
635			{
636				if ( count( $values ) == 0 )
637					$lookingFor = ( $thisValue < 224 ) ? 2 : 3;
638				$values[] = $thisValue;
639				if ( count( $values ) == $lookingFor )
640				{
641					$number = ( $lookingFor == 3 ) ?
642						( ( $values[0] % 16 ) * 4096 ) + ( ( $values[1] % 64 ) * 64 ) + ( $values[2] % 64 ):
643						( ( $values[0] % 32 ) * 64 ) + ( $values[1] % 64 );
644					$unicode[] = $number;
645					$values = array();
646					$lookingFor = 1;
647				}
648			}
649		}
650		return $unicode;
651	}
652
653	/**
654	 * This function converts a Unicode array back to its UTF-8 representation
655	 * @param string $str string to convert
656	 * @return string
657	 * @author Scott Michael Reynen <scott@randomchaos.com>
658	 * @link   http://www.randomchaos.com/document.php?source=php_and_unicode
659	 * @see	utf8ToUnicode()
660	 */
661	protected static function unicodeToUTF8( &$str )
662	{
663		$utf8 = '';
664		foreach( $str as $unicode )
665		{
666			if ( $unicode < 128 )
667			{
668				$utf8.= chr( $unicode );
669			}
670			elseif ( $unicode < 2048 )
671			{
672				$utf8.= chr( 192 +  ( ( $unicode - ( $unicode % 64 ) ) / 64 ) );
673				$utf8.= chr( 128 + ( $unicode % 64 ) );
674			}
675			else
676			{
677				$utf8.= chr( 224 + ( ( $unicode - ( $unicode % 4096 ) ) / 4096 ) );
678				$utf8.= chr( 128 + ( ( ( $unicode % 4096 ) - ( $unicode % 64 ) ) / 64 ) );
679				$utf8.= chr( 128 + ( $unicode % 64 ) );
680			}
681		}
682		return $utf8;
683	}
684
685	/**
686	 * UTF-8 to UTF-16BE conversion.
687	 *
688	 * Maybe really UCS-2 without mb_string due to utf8ToUnicode limits
689	 * @param string $str string to convert
690	 * @param boolean $bom whether to output BOM header
691	 * @return string
692	 */
693	protected static function utf8ToUTF16BE(&$str, $bom = false)
694	{
695		$out = $bom ? "\xFE\xFF" : '';
696		if(function_exists('mb_convert_encoding'))
697			return $out.mb_convert_encoding($str,'UTF-16BE','UTF-8');
698
699		$uni = self::utf8ToUnicode($str);
700		foreach($uni as $cp)
701			$out .= pack('n',$cp);
702		return $out;
703	}
704
705	/**
706	 * UTF-8 to UTF-16BE conversion.
707	 *
708	 * Maybe really UCS-2 without mb_string due to utf8ToUnicode limits
709	 * @param string $str string to convert
710	 * @return string
711	 */
712	protected static function utf16beToUTF8(&$str)
713	{
714		$uni = unpack('n*',$str);
715		return self::unicodeToUTF8($uni);
716	}
717}