PageRenderTime 55ms CodeModel.GetById 18ms app.highlight 29ms RepoModel.GetById 1ms app.codeStats 0ms

/Genv/Json.php

https://gitlab.com/winiceo/levengit
PHP | 898 lines | 487 code | 109 blank | 302 comment | 116 complexity | 097c60724cbbc2db9963ae3b9eba1958 MD5 | raw file
  1<?php
  2/**
  3 * 
  4 * Class for wrapping JSON encoding/decoding functionality.
  5 * 
  6 * Given that the json extension to PHP will be enabled by default in
  7 * PHP 5.2.0+, Genv_Json allows users to get a jump on JSON encoding and
  8 * decoding early if the native json_* functions are not present.
  9 * 
 10 * Genv_Json::encode and Genv_Json::decode functions are designed
 11 * to pass the same unit tests bundled with the native PHP json ext.
 12 * 
 13 * Based largely on the Services_JSON package by Michal Migurski, Matt Knapp
 14 * and Brett Stimmerman. See the original code at
 15 * <http://mike.teczno.com/JSON/JSON.phps>
 16 * 
 17 * @category Genv
 18 * 
 19 * @package Genv_Json JSON data formatting and checking.
 20 * 
 21 * @author Michal Migurski <mike-json@teczno.com>
 22 * 
 23 * @author Matt Knapp <mdknapp[at]gmail[dot]com>
 24 * 
 25 * @author Brett Stimmerman <brettstimmerman[at]gmail[dot]com>
 26 * 
 27 * @author Clay Loveless <clay@killersoft.com>
 28 * 
 29 * @license http://opensource.org/licenses/bsd-license.php BSD
 30 * 
 31 * @version $Id: Json.php 4380 2010-02-14 16:06:52Z pmjones $
 32 * 
 33 */
 34class Genv_Json extends Genv_Base
 35{
 36    /**
 37     * 
 38     * Default configuration values.
 39     * 
 40     * @config bool bypass_ext Flag to instruct Genv_Json to bypass
 41     *   native json extension, if installed.
 42     * 
 43     * @config bool bypass_mb Flag to instruct Genv_Json to bypass
 44     *   native mb_convert_encoding() function, if
 45     *   installed.
 46     * 
 47     * @config bool noerror Flag to instruct Genv_Json to return null
 48     *   for values it cannot encode rather than throwing
 49     *   an exceptions (PHP-only encoding) or PHP warnings
 50     *   (native json_encode() function).
 51     * 
 52     * @var array
 53     * 
 54     */
 55    protected $_Genv_Json = array(
 56        'bypass_ext' => false,
 57        'bypass_mb'  => false,
 58        'noerror'    => false
 59    );
 60    
 61    /**
 62     * 
 63     * Marker constants for use in _json_decode()
 64     * 
 65     * @constant
 66     * 
 67     */
 68    const SLICE  = 1;
 69    const IN_STR = 2;
 70    const IN_ARR = 3;
 71    const IN_OBJ = 4;
 72    const IN_CMT = 5;
 73    
 74    /**
 75     * 
 76     * Nest level counter for determining correct behavior of decoding string
 77     * representations of numbers and boolean values.
 78     * 
 79     * @var int
 80     */
 81    protected $_level;
 82    
 83    /**
 84     * 
 85     * Encodes the mixed $valueToEncode into JSON format.
 86     * 
 87     * @param mixed $valueToEncode Value to be encoded into JSON format
 88     * 
 89     * @param array $deQuote Array of keys whose values should **not** be
 90     * quoted in encoded string.
 91     * 
 92     * @return string JSON encoded value
 93     * 
 94     */
 95    public function encode($valueToEncode, $deQuote = array())
 96    {
 97        if (!$this->_config['bypass_ext'] && function_exists('json_encode')) {
 98            
 99            if ($this->_config['noerror']) {
100                $old_errlevel = error_reporting(E_ERROR ^ E_WARNING);
101            }
102            
103            $encoded = json_encode($valueToEncode);
104            
105            if ($this->_config['noerror']) {
106                error_reporting($old_errlevel);
107            }
108        
109        } else {
110            
111            // Fall back to PHP-only method
112            $encoded = $this->_json_encode($valueToEncode);
113        
114        }
115        
116        // Sometimes you just don't want some values quoted
117        if (!empty($deQuote)) {
118            $encoded = $this->_deQuote($encoded, $deQuote);
119        }
120        
121        return $encoded;
122    
123    }
124    
125    /**
126     * 
127     * Accepts a JSON-encoded string, and removes quotes around values of
128     * keys specified in the $keys array.
129     * 
130     * Sometimes, such as when constructing behaviors on the fly for "onSuccess"
131     * handlers to an Ajax request, the value needs to **not** have quotes around
132     * it. This method will remove those quotes and perform stripslashes on any
133     * escaped quotes within the quoted value.
134     * 
135     * @param string $encoded JSON-encoded string
136     * 
137     * @param array $keys Array of keys whose values should be de-quoted
138     * 
139     * @return string $encoded Cleaned string
140     * 
141     */
142    protected function _deQuote($encoded, $keys)
143    {
144        foreach ($keys as $key) {
145            $pattern = "/(\"".$key."\"\:)(\".*(?:[^\\\]\"))/U";
146            $encoded = preg_replace_callback(
147                $pattern,
148                array($this, '_stripvalueslashes'),
149                $encoded
150            );
151        }
152        
153        return $encoded;
154    }
155    
156    /**
157     * 
158     * Method for use with preg_replace_callback in the _deQuote() method.
159     * 
160     * Returns \["keymatch":\]\[value\] where value has had its leading and
161     * trailing double-quotes removed, and stripslashes() run on the rest of
162     * the value.
163     * 
164     * @param array $matches Regexp matches
165     * 
166     * @return string replacement string
167     * 
168     */
169    protected function _stripvalueslashes($matches)
170    {
171        return $matches[1].stripslashes(substr($matches[2], 1, -1));
172    }
173    
174    /**
175     * 
176     * Decodes the $encodedValue string which is encoded in the JSON format.
177     * 
178     * For compatibility with the native json_decode() function, this static
179     * method accepts the $encodedValue string and an optional boolean value
180     * $asArray which indicates whether or not the decoded value should be
181     * returned as an array. The default is false, meaning the default return
182     * from this method is an object.
183     * 
184     * For compliance with the [JSON specification][], no attempt is made to 
185     * decode strings that are obviously not an encoded arrays or objects. 
186     * 
187     * [JSON specification]: http://www.ietf.org/rfc/rfc4627.txt
188     * 
189     * @param string $encodedValue String encoded in JSON format
190     * 
191     * @param bool $asArray Optional argument to decode as an array.
192     * Default false.
193     * 
194     * @return mixed decoded value
195     * 
196     */
197    public function decode($encodedValue, $asArray = false)
198    {
199        $first_char = substr(ltrim($encodedValue), 0, 1);
200        if ($first_char != '{' && $first_char != '[') {
201            return null;
202        }
203        
204        if (!$this->_config['bypass_ext'] && function_exists('json_decode')) {
205            return json_decode($encodedValue, (bool) $asArray);
206        }
207        
208        // Fall back to PHP-only method
209        $this->_level = 0;
210        $checker = Genv::factory('Genv_Json_Checker');
211        if ($checker->isValid($encodedValue)) {
212            return $this->_json_decode($encodedValue, (bool) $asArray);
213        } else {
214            return null;
215        }
216    }
217    
218    /**
219     * 
220     * Encodes the mixed $valueToEncode into the JSON format, without use of
221     * native PHP json extension.
222     * 
223     * @param mixed $var Any number, boolean, string, array, or object
224     * to be encoded. Strings are expected to be in ASCII or UTF-8 format.
225     * 
226     * @return mixed JSON string representation of input value
227     * 
228     */
229    protected function _json_encode($var)
230    {
231        switch (gettype($var)) {
232            case 'boolean':
233                return $var ? 'true' : 'false';
234            
235            case 'NULL':
236                return 'null';
237            
238            case 'integer':
239                // BREAK WITH Services_JSON:
240                // disabled for compatibility with ext/json. ext/json returns
241                // a string for integers, so we will to.
242                //return (int) $var;
243                return (string) $var;
244            
245            case 'double':
246            case 'float':
247                // BREAK WITH Services_JSON:
248                // disabled for compatibility with ext/json. ext/json returns
249                // a string for floats and doubles, so we will to.
250                //return (float) $var;
251                return (string) $var;
252            
253            case 'string':
254                // STRINGS ARE EXPECTED TO BE IN ASCII OR UTF-8 FORMAT
255                $ascii = '';
256                $strlen_var = strlen($var);
257               
258               /**
259                * Iterate over every character in the string,
260                * escaping with a slash or encoding to UTF-8 where necessary
261                */
262                for ($c = 0; $c < $strlen_var; ++$c) {
263                    
264                    $ord_var_c = ord($var{$c});
265                    
266                    switch (true) {
267                        case $ord_var_c == 0x08:
268                            $ascii .= '\b';
269                            break;
270                        case $ord_var_c == 0x09:
271                            $ascii .= '\t';
272                            break;
273                        case $ord_var_c == 0x0A:
274                            $ascii .= '\n';
275                            break;
276                        case $ord_var_c == 0x0C:
277                            $ascii .= '\f';
278                            break;
279                        case $ord_var_c == 0x0D:
280                            $ascii .= '\r';
281                            break;
282                        
283                        case $ord_var_c == 0x22:
284                        case $ord_var_c == 0x2F:
285                        case $ord_var_c == 0x5C:
286                            // double quote, slash, slosh
287                            $ascii .= '\\'.$var{$c};
288                            break;
289                        
290                        case (($ord_var_c >= 0x20) && ($ord_var_c <= 0x7F)):
291                            // characters U-00000000 - U-0000007F (same as ASCII)
292                            $ascii .= $var{$c};
293                            break;
294                        
295                        case (($ord_var_c & 0xE0) == 0xC0):
296                            // characters U-00000080 - U-000007FF, mask 110XXXXX
297                            // see http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8
298                            $char = pack('C*', $ord_var_c, ord($var{$c + 1}));
299                            $c += 1;
300                            $utf16 = $this->_utf82utf16($char);
301                            $ascii .= sprintf('\u%04s', bin2hex($utf16));
302                            break;
303                        
304                        case (($ord_var_c & 0xF0) == 0xE0):
305                            // characters U-00000800 - U-0000FFFF, mask 1110XXXX
306                            // see http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8
307                            $char = pack('C*', $ord_var_c,
308                                         ord($var{$c + 1}),
309                                         ord($var{$c + 2}));
310                            $c += 2;
311                            $utf16 = $this->_utf82utf16($char);
312                            $ascii .= sprintf('\u%04s', bin2hex($utf16));
313                            break;
314                        
315                        case (($ord_var_c & 0xF8) == 0xF0):
316                            // characters U-00010000 - U-001FFFFF, mask 11110XXX
317                            // see http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8
318                            $char = pack('C*', $ord_var_c,
319                                         ord($var{$c + 1}),
320                                         ord($var{$c + 2}),
321                                         ord($var{$c + 3}));
322                            $c += 3;
323                            $utf16 = $this->_utf82utf16($char);
324                            $ascii .= sprintf('\u%04s', bin2hex($utf16));
325                            break;
326                        
327                        case (($ord_var_c & 0xFC) == 0xF8):
328                            // characters U-00200000 - U-03FFFFFF, mask 111110XX
329                            // see http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8
330                            $char = pack('C*', $ord_var_c,
331                                         ord($var{$c + 1}),
332                                         ord($var{$c + 2}),
333                                         ord($var{$c + 3}),
334                                         ord($var{$c + 4}));
335                            $c += 4;
336                            $utf16 = $this->_utf82utf16($char);
337                            $ascii .= sprintf('\u%04s', bin2hex($utf16));
338                            break;
339                        
340                        case (($ord_var_c & 0xFE) == 0xFC):
341                            // characters U-04000000 - U-7FFFFFFF, mask 1111110X
342                            // see http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8
343                            $char = pack('C*', $ord_var_c,
344                                         ord($var{$c + 1}),
345                                         ord($var{$c + 2}),
346                                         ord($var{$c + 3}),
347                                         ord($var{$c + 4}),
348                                         ord($var{$c + 5}));
349                            $c += 5;
350                            $utf16 = $this->_utf82utf16($char);
351                            $ascii .= sprintf('\u%04s', bin2hex($utf16));
352                            break;
353                    }
354                }
355                
356                return '"'.$ascii.'"';
357            
358            case 'array':
359               /**
360                * 
361                * As per JSON spec if any array key is not an integer
362                * we must treat the the whole array as an object. We
363                * also try to catch a sparsely populated associative
364                * array with numeric keys here because some JS engines
365                * will create an array with empty indexes up to
366                * max_index which can cause memory issues and because
367                * the keys, which may be relevant, will be remapped
368                * otherwise.
369                * 
370                * As per the ECMA and JSON specification an object may
371                * have any string as a property. Unfortunately due to
372                * a hole in the ECMA specification if the key is a
373                * ECMA reserved word or starts with a digit the
374                * parameter is only accessible using ECMAScript's
375                * bracket notation.
376                * 
377                */
378                
379                // treat as a JSON object
380                if (is_array($var) && count($var) &&
381                    (array_keys($var) !== range(0, sizeof($var) - 1))) {
382                        $properties = array_map(array($this, '_name_value'),
383                                            array_keys($var),
384                                            array_values($var));
385                    
386                    return '{' . join(',', $properties) . '}';
387                }
388                
389                // treat it like a regular array
390                $elements = array_map(array($this, '_json_encode'), $var);
391                
392                return '[' . join(',', $elements) . ']';
393            
394            case 'object':
395                $vars = get_object_vars($var);
396                
397                $properties = array_map(array($this, '_name_value'),
398                                        array_keys($vars),
399                                        array_values($vars));
400                
401                return '{' . join(',', $properties) . '}';
402            
403            default:
404                
405                if ($this->_config['noerror']) {
406                    return 'null';
407                }
408                
409                throw Genv::exception(
410                    'Genv_Json',
411                    'ERR_CANNOT_ENCODE',
412                    gettype($var) . ' cannot be encoded as a JSON string',
413                    array('var' => $var)
414                );
415        }
416    }
417    
418    /**
419     * 
420     * Decodes a JSON string into appropriate variable.
421     * 
422     * Note: several changes were made in translating this method from
423     * Services_JSON, particularly related to how strings are handled. According
424     * to JSON_checker test suite from <http://www.json.org/JSON_checker/>,
425     * a JSON payload should be an object or an array, not a string.
426     * 
427     * Therefore, returning bool(true) for 'true' is invalid JSON decoding
428     * behavior, unless nested inside of an array or object.
429     * 
430     * Similarly, a string of '1' should return null, not int(1), unless
431     * nested inside of an array or object.
432     * 
433     * @param string $str String encoded in JSON format
434     * 
435     * @param bool $asArray Optional argument to decode as an array.
436     * 
437     * @return mixed decoded value
438     * 
439     * @todo Rewrite this based off of method used in Genv_Json_Checker
440     * 
441     */
442    protected function _json_decode($str, $asArray = false)
443    {
444        $str = $this->_reduce_string($str);
445        
446        switch (strtolower($str)) {
447            case 'true':
448                // JSON_checker test suite claims
449                // "A JSON payload should be an object or array, not a string."
450                // Thus, returning bool(true) is invalid parsing, unless
451                // we're nested inside an array or object.
452                if (in_array($this->_level, array(self::IN_ARR, self::IN_OBJ))) {
453                    return true;
454                } else {
455                    return null;
456                }
457                break;
458            
459            case 'false':
460                // JSON_checker test suite claims
461                // "A JSON payload should be an object or array, not a string."
462                // Thus, returning bool(false) is invalid parsing, unless
463                // we're nested inside an array or object.
464                if (in_array($this->_level, array(self::IN_ARR, self::IN_OBJ))) {
465                    return false;
466                } else {
467                    return null;
468                }
469                break;
470            
471            case 'null':
472                return null;
473            
474            default:
475                $m = array();
476                
477                if (is_numeric($str) || ctype_digit($str) || ctype_xdigit($str)) {
478                    // Return float or int, or null as appropriate
479                    if (in_array($this->_level, array(self::IN_ARR, self::IN_OBJ))) {
480                        return ((float) $str == (integer) $str)
481                            ? (integer) $str
482                            : (float) $str;
483                    } else {
484                        return null;
485                    }
486                    break;
487                
488                } elseif (preg_match('/^("|\').*(\1)$/s', $str, $m)
489                            && $m[1] == $m[2]) {
490                    // STRINGS RETURNED IN UTF-8 FORMAT
491                    $delim = substr($str, 0, 1);
492                    $chrs = substr($str, 1, -1);
493                    $utf8 = '';
494                    $strlen_chrs = strlen($chrs);
495                    
496                    for ($c = 0; $c < $strlen_chrs; ++$c) {
497                        
498                        $substr_chrs_c_2 = substr($chrs, $c, 2);
499                        $ord_chrs_c = ord($chrs{$c});
500                        
501                        switch (true) {
502                            case $substr_chrs_c_2 == '\b':
503                                $utf8 .= chr(0x08);
504                                ++$c;
505                                break;
506                            case $substr_chrs_c_2 == '\t':
507                                $utf8 .= chr(0x09);
508                                ++$c;
509                                break;
510                            case $substr_chrs_c_2 == '\n':
511                                $utf8 .= chr(0x0A);
512                                ++$c;
513                                break;
514                            case $substr_chrs_c_2 == '\f':
515                                $utf8 .= chr(0x0C);
516                                ++$c;
517                                break;
518                            case $substr_chrs_c_2 == '\r':
519                                $utf8 .= chr(0x0D);
520                                ++$c;
521                                break;
522                            
523                            case $substr_chrs_c_2 == '\\"':
524                            case $substr_chrs_c_2 == '\\\'':
525                            case $substr_chrs_c_2 == '\\\\':
526                            case $substr_chrs_c_2 == '\\/':
527                                if (($delim == '"' && $substr_chrs_c_2 != '\\\'') ||
528                                   ($delim == "'" && $substr_chrs_c_2 != '\\"')) {
529                                    $utf8 .= $chrs{++$c};
530                                }
531                                break;
532                            
533                            case preg_match('/\\\u[0-9A-F]{4}/i', substr($chrs, $c, 6)):
534                                // single, escaped unicode character
535                                $utf16 = chr(hexdec(substr($chrs, ($c + 2), 2)))
536                                       . chr(hexdec(substr($chrs, ($c + 4), 2)));
537                                $utf8 .= $this->_utf162utf8($utf16);
538                                $c += 5;
539                                break;
540                            
541                            case ($ord_chrs_c >= 0x20) && ($ord_chrs_c <= 0x7F):
542                                $utf8 .= $chrs{$c};
543                                break;
544                            
545                            case ($ord_chrs_c & 0xE0) == 0xC0:
546                                // characters U-00000080 - U-000007FF, mask 110XXXXX
547                                //see http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8
548                                $utf8 .= substr($chrs, $c, 2);
549                                ++$c;
550                                break;
551                            
552                            case ($ord_chrs_c & 0xF0) == 0xE0:
553                                // characters U-00000800 - U-0000FFFF, mask 1110XXXX
554                                // see http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8
555                                $utf8 .= substr($chrs, $c, 3);
556                                $c += 2;
557                                break;
558                            
559                            case ($ord_chrs_c & 0xF8) == 0xF0:
560                                // characters U-00010000 - U-001FFFFF, mask 11110XXX
561                                // see http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8
562                                $utf8 .= substr($chrs, $c, 4);
563                                $c += 3;
564                                break;
565                            
566                            case ($ord_chrs_c & 0xFC) == 0xF8:
567                                // characters U-00200000 - U-03FFFFFF, mask 111110XX
568                                // see http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8
569                                $utf8 .= substr($chrs, $c, 5);
570                                $c += 4;
571                                break;
572                            
573                            case ($ord_chrs_c & 0xFE) == 0xFC:
574                                // characters U-04000000 - U-7FFFFFFF, mask 1111110X
575                                // see http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8
576                                $utf8 .= substr($chrs, $c, 6);
577                                $c += 5;
578                                break;
579                        
580                        }
581                    
582                    }
583                    
584                    if (in_array($this->_level, array(self::IN_ARR, self::IN_OBJ))) {
585                        return $utf8;
586                    } else {
587                        return null;
588                    }
589                
590                } elseif (preg_match('/^\[.*\]$/s', $str) || preg_match('/^\{.*\}$/s', $str)) {
591                    // array, or object notation
592                    
593                    if ($str{0} == '[') {
594                        $stk = array(self::IN_ARR);
595                        $this->_level = self::IN_ARR;
596                        $arr = array();
597                    } else {
598                        if ($asArray) {
599                            $stk = array(self::IN_OBJ);
600                            $obj = array();
601                        } else {
602                            $stk = array(self::IN_OBJ);
603                            $obj = new stdClass();
604                        }
605                        $this->_level = self::IN_OBJ;
606                    }
607                    
608                    array_push($stk, array('what'  => self::SLICE,
609                                           'where' => 0,
610                                           'delim' => false));
611                    
612                    $chrs = substr($str, 1, -1);
613                    $chrs = $this->_reduce_string($chrs);
614                    
615                    if ($chrs == '') {
616                        if (reset($stk) == self::IN_ARR) {
617                            return $arr;
618                        
619                        } else {
620                            return $obj;
621                        
622                        }
623                    }
624                    
625                    $strlen_chrs = strlen($chrs);
626                    
627                    for ($c = 0; $c <= $strlen_chrs; ++$c) {
628                        
629                        $top = end($stk);
630                        $substr_chrs_c_2 = substr($chrs, $c, 2);
631                        
632                        if (($c == $strlen_chrs) || (($chrs{$c} == ',') && ($top['what'] == self::SLICE))) {
633                            // found a comma that is not inside a string, array, etc.,
634                            // OR we've reached the end of the character list
635                            $slice = substr($chrs, $top['where'], ($c - $top['where']));
636                            array_push($stk, array('what' => self::SLICE, 'where' => ($c + 1), 'delim' => false));
637                            //print("Found split at {$c}: ".substr($chrs, $top['where'], (1 + $c - $top['where']))."\n");
638                            
639                            if (reset($stk) == self::IN_ARR) {
640                                $this->_level = self::IN_ARR;
641                                // we are in an array, so just push an element onto the stack
642                                array_push($arr, $this->_json_decode($slice));
643                            
644                            } elseif (reset($stk) == self::IN_OBJ) {
645                                $this->_level = self::IN_OBJ;
646                                // we are in an object, so figure
647                                // out the property name and set an
648                                // element in an associative array,
649                                // for now
650                                $parts = array();
651                                
652                                if (preg_match('/^\s*(["\'].*[^\\\]["\'])\s*:\s*(\S.*),?$/Uis', $slice, $parts)) {
653                                    // "name":value pair
654                                    $key = $this->_json_decode($parts[1]);
655                                    $val = $this->_json_decode($parts[2]);
656                                    
657                                    if ($asArray) {
658                                        $obj[$key] = $val;
659                                    } else {
660                                        $obj->$key = $val;
661                                    }
662                                } elseif (preg_match('/^\s*(\w+)\s*:\s*(\S.*),?$/Uis', $slice, $parts)) {
663                                    // name:value pair, where name is unquoted
664                                    $key = $parts[1];
665                                    $val = $this->_json_decode($parts[2]);
666                                    
667                                    if ($asArray) {
668                                        $obj[$key] = $val;
669                                    } else {
670                                        $obj->$key = $val;
671                                    }
672                                } elseif (preg_match('/^\s*(["\']["\'])\s*:\s*(\S.*),?$/Uis', $slice, $parts)) {
673                                    // "":value pair
674                                    //$key = $this->_json_decode($parts[1]);
675                                    // use string that matches ext/json
676                                    $key = '_empty_';
677                                    $val = $this->_json_decode($parts[2]);
678                                    
679                                    if ($asArray) {
680                                        $obj[$key] = $val;
681                                    } else {
682                                        $obj->$key = $val;
683                                    }
684                                }
685                            
686                            }
687                        
688                        } elseif ((($chrs{$c} == '"') || ($chrs{$c} == "'")) && ($top['what'] != self::IN_STR)) {
689                            // found a quote, and we are not inside a string
690                            array_push($stk, array('what' => self::IN_STR, 'where' => $c, 'delim' => $chrs{$c}));
691                            //print("Found start of string at {$c}\n");
692                        
693                        } elseif (($chrs{$c} == $top['delim']) &&
694                                 ($top['what'] == self::IN_STR) &&
695                                 ((strlen(substr($chrs, 0, $c)) - strlen(rtrim(substr($chrs, 0, $c), '\\'))) % 2 != 1)) {
696                            // found a quote, we're in a string, and it's not escaped
697                            // we know that it's not escaped becase there is _not_ an
698                            // odd number of backslashes at the end of the string so far
699                            array_pop($stk);
700                            //print("Found end of string at {$c}: ".substr($chrs, $top['where'], (1 + 1 + $c - $top['where']))."\n");
701                        
702                        } elseif (($chrs{$c} == '[') &&
703                                 in_array($top['what'], array(self::SLICE, self::IN_ARR, self::IN_OBJ))) {
704                            // found a left-bracket, and we are in an array, object, or slice
705                            array_push($stk, array('what' => self::IN_ARR, 'where' => $c, 'delim' => false));
706                            //print("Found start of array at {$c}\n");
707                        
708                        } elseif (($chrs{$c} == ']') && ($top['what'] == self::IN_ARR)) {
709                            // found a right-bracket, and we're in an array
710                            $this->_level = null;
711                            array_pop($stk);
712                            //print("Found end of array at {$c}: ".substr($chrs, $top['where'], (1 + $c - $top['where']))."\n");
713                        
714                        } elseif (($chrs{$c} == '{') &&
715                                 in_array($top['what'], array(self::SLICE, self::IN_ARR, self::IN_OBJ))) {
716                            // found a left-brace, and we are in an array, object, or slice
717                            array_push($stk, array('what' => self::IN_OBJ, 'where' => $c, 'delim' => false));
718                            //print("Found start of object at {$c}\n");
719                        
720                        } elseif (($chrs{$c} == '}') && ($top['what'] == self::IN_OBJ)) {
721                            // found a right-brace, and we're in an object
722                            $this->_level = null;
723                            array_pop($stk);
724                            //print("Found end of object at {$c}: ".substr($chrs, $top['where'], (1 + $c - $top['where']))."\n");
725                        
726                        } elseif (($substr_chrs_c_2 == '/*') &&
727                                 in_array($top['what'], array(self::SLICE, self::IN_ARR, self::IN_OBJ))) {
728                            // found a comment start, and we are in an array, object, or slice
729                            array_push($stk, array('what' => self::IN_CMT, 'where' => $c, 'delim' => false));
730                            $c++;
731                            //print("Found start of comment at {$c}\n");
732                        
733                        } elseif (($substr_chrs_c_2 == '*/') && ($top['what'] == self::IN_CMT)) {
734                            // found a comment end, and we're in one now
735                            array_pop($stk);
736                            $c++;
737                            
738                            for ($i = $top['where']; $i <= $c; ++$i)
739                                $chrs = substr_replace($chrs, ' ', $i, 1);
740                            
741                            //print("Found end of comment at {$c}: ".substr($chrs, $top['where'], (1 + $c - $top['where']))."\n");
742                        
743                        }
744                    
745                    }
746                    
747                    if (reset($stk) == self::IN_ARR) {
748                        return $arr;
749                    
750                    } elseif (reset($stk) == self::IN_OBJ) {
751                        return $obj;
752                    
753                    }
754                
755                }
756        }
757    }
758    
759    /**
760     * 
761     * Array-walking method for use in generating JSON-formatted name-value
762     * pairs in the form of '"name":value'.
763     * 
764     * @param string $name name of key to use
765     * 
766     * @param mixed $value element to be encoded
767     * 
768     * @return string JSON-formatted name-value pair
769     * 
770     */
771    protected function _name_value($name, $value)
772    {
773        $encoded_value = $this->_json_encode($value);
774        return $this->_json_encode(strval($name)) . ':' . $encoded_value;
775    }
776    
777    /**
778     * 
779     * Convert a string from one UTF-16 char to one UTF-8 char.
780     * 
781     * Normally should be handled by mb_convert_encoding, but
782     * provides a slower PHP-only method for installations
783     * that lack the multibye string extension.
784     * 
785     * @param string $utf16 UTF-16 character
786     * 
787     * @return string UTF-8 character
788     * 
789     */
790    protected function _utf162utf8($utf16)
791    {
792        // oh please oh please oh please oh please oh please
793        if(!$this->_config['bypass_mb'] &&
794            function_exists('mb_convert_encoding')) {
795                return mb_convert_encoding($utf16, 'UTF-8', 'UTF-16');
796        }
797        
798        $bytes = (ord($utf16{0}) << 8) | ord($utf16{1});
799        
800        switch (true) {
801            case ((0x7F & $bytes) == $bytes):
802                // this case should never be reached, because we are in ASCII range
803                // see: http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8
804                return chr(0x7F & $bytes);
805            
806            case (0x07FF & $bytes) == $bytes:
807                // return a 2-byte UTF-8 character
808                // see: http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8
809                return chr(0xC0 | (($bytes >> 6) & 0x1F))
810                     . chr(0x80 | ($bytes & 0x3F));
811            
812            case (0xFFFF & $bytes) == $bytes:
813                // return a 3-byte UTF-8 character
814                // see: http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8
815                return chr(0xE0 | (($bytes >> 12) & 0x0F))
816                     . chr(0x80 | (($bytes >> 6) & 0x3F))
817                     . chr(0x80 | ($bytes & 0x3F));
818        }
819        
820        // ignoring UTF-32 for now, sorry
821        return '';
822    }
823    
824    /**
825     * 
826     * Convert a string from one UTF-8 char to one UTF-16 char.
827     * 
828     * Normally should be handled by mb_convert_encoding, but
829     * provides a slower PHP-only method for installations
830     * that lack the multibye string extension.
831     * 
832     * @param string $utf8 UTF-8 character
833     * 
834     * @return string UTF-16 character
835     * 
836     */
837    protected function _utf82utf16($utf8)
838    {
839        // oh please oh please oh please oh please oh please
840        if (!$this->_config['bypass_mb'] &&
841            function_exists('mb_convert_encoding')) {
842                return mb_convert_encoding($utf8, 'UTF-16', 'UTF-8');
843        }
844        
845        switch (strlen($utf8)) {
846            case 1:
847                // this case should never be reached, because we are in ASCII range
848                // see: http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8
849                return $utf8;
850            
851            case 2:
852                // return a UTF-16 character from a 2-byte UTF-8 char
853                // see: http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8
854                return chr(0x07 & (ord($utf8{0}) >> 2))
855                     . chr((0xC0 & (ord($utf8{0}) << 6))
856                         | (0x3F & ord($utf8{1})));
857            
858            case 3:
859                // return a UTF-16 character from a 3-byte UTF-8 char
860                // see: http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8
861                return chr((0xF0 & (ord($utf8{0}) << 4))
862                         | (0x0F & (ord($utf8{1}) >> 2)))
863                     . chr((0xC0 & (ord($utf8{1}) << 6))
864                         | (0x7F & ord($utf8{2})));
865        }
866        
867        // ignoring UTF-32 for now, sorry
868        return '';
869    }
870    
871    /**
872     * 
873     * Reduce a string by removing leading and trailing comments and whitespace.
874     * 
875     * @param string $str string value to strip of comments and whitespace
876     * 
877     * @return string string value stripped of comments and whitespace
878     * 
879     */
880    protected function _reduce_string($str)
881    {
882        $str = preg_replace(array(
883            
884            // eliminate single line comments in '// ...' form
885            '#^\s*//(.+)$#m',
886            
887            // eliminate multi-line comments in '/* ... */' form, at start of string
888            '#^\s*/\*(.+)\*/#Us',
889            
890            // eliminate multi-line comments in '/* ... */' form, at end of string
891            '#/\*(.+)\*/\s*$#Us'
892        
893        ), '', $str);
894        
895        // eliminate extraneous space
896        return trim($str);
897    }
898}