PageRenderTime 60ms CodeModel.GetById 12ms app.highlight 41ms RepoModel.GetById 1ms app.codeStats 1ms

/framework/vendor/zend/Zend/Json/Decoder.php

http://zoop.googlecode.com/
PHP | 578 lines | 401 code | 41 blank | 136 comment | 32 complexity | d1ba0bb41eb5746c441040893698b58f MD5 | raw file
  1<?php
  2/**
  3 * Zend Framework
  4 *
  5 * LICENSE
  6 *
  7 * This source file is subject to the new BSD license that is bundled
  8 * with this package in the file LICENSE.txt.
  9 * It is also available through the world-wide-web at this URL:
 10 * http://framework.zend.com/license/new-bsd
 11 * If you did not receive a copy of the license and are unable to
 12 * obtain it through the world-wide-web, please send an email
 13 * to license@zend.com so we can send you a copy immediately.
 14 *
 15 * @category   Zend
 16 * @package    Zend_Json
 17 * @copyright  Copyright (c) 2005-2010 Zend Technologies USA Inc. (http://www.zend.com)
 18 * @license    http://framework.zend.com/license/new-bsd     New BSD License
 19 * @version    $Id: Decoder.php 20096 2010-01-06 02:05:09Z bkarwin $
 20 */
 21
 22/**
 23 * @see Zend_Json
 24 */
 25require_once 'Zend/Json.php';
 26
 27/**
 28 * Decode JSON encoded string to PHP variable constructs
 29 *
 30 * @category   Zend
 31 * @package    Zend_Json
 32 * @copyright  Copyright (c) 2005-2010 Zend Technologies USA Inc. (http://www.zend.com)
 33 * @license    http://framework.zend.com/license/new-bsd     New BSD License
 34 */
 35class Zend_Json_Decoder
 36{
 37    /**
 38     * Parse tokens used to decode the JSON object. These are not
 39     * for public consumption, they are just used internally to the
 40     * class.
 41     */
 42    const EOF         = 0;
 43    const DATUM        = 1;
 44    const LBRACE    = 2;
 45    const LBRACKET    = 3;
 46    const RBRACE     = 4;
 47    const RBRACKET    = 5;
 48    const COMMA       = 6;
 49    const COLON        = 7;
 50
 51    /**
 52     * Use to maintain a "pointer" to the source being decoded
 53     *
 54     * @var string
 55     */
 56    protected $_source;
 57
 58    /**
 59     * Caches the source length
 60     *
 61     * @var int
 62     */
 63    protected $_sourceLength;
 64
 65    /**
 66     * The offset within the souce being decoded
 67     *
 68     * @var int
 69     *
 70     */
 71    protected $_offset;
 72
 73    /**
 74     * The current token being considered in the parser cycle
 75     *
 76     * @var int
 77     */
 78    protected $_token;
 79
 80    /**
 81     * Flag indicating how objects should be decoded
 82     *
 83     * @var int
 84     * @access protected
 85     */
 86    protected $_decodeType;
 87
 88    /**
 89     * Constructor
 90     *
 91     * @param string $source String source to decode
 92     * @param int $decodeType How objects should be decoded -- see
 93     * {@link Zend_Json::TYPE_ARRAY} and {@link Zend_Json::TYPE_OBJECT} for
 94     * valid values
 95     * @return void
 96     */
 97    protected function __construct($source, $decodeType)
 98    {
 99        // Set defaults
100        $this->_source       = self::decodeUnicodeString($source);
101        $this->_sourceLength = strlen($this->_source);
102        $this->_token        = self::EOF;
103        $this->_offset       = 0;
104
105        // Normalize and set $decodeType
106        if (!in_array($decodeType, array(Zend_Json::TYPE_ARRAY, Zend_Json::TYPE_OBJECT)))
107        {
108            $decodeType = Zend_Json::TYPE_ARRAY;
109        }
110        $this->_decodeType   = $decodeType;
111
112        // Set pointer at first token
113        $this->_getNextToken();
114    }
115
116    /**
117     * Decode a JSON source string
118     *
119     * Decodes a JSON encoded string. The value returned will be one of the
120     * following:
121     *        - integer
122     *        - float
123     *        - boolean
124     *        - null
125     *      - StdClass
126     *      - array
127     *         - array of one or more of the above types
128     *
129     * By default, decoded objects will be returned as associative arrays; to
130     * return a StdClass object instead, pass {@link Zend_Json::TYPE_OBJECT} to
131     * the $objectDecodeType parameter.
132     *
133     * Throws a Zend_Json_Exception if the source string is null.
134     *
135     * @static
136     * @access public
137     * @param string $source String to be decoded
138     * @param int $objectDecodeType How objects should be decoded; should be
139     * either or {@link Zend_Json::TYPE_ARRAY} or
140     * {@link Zend_Json::TYPE_OBJECT}; defaults to TYPE_ARRAY
141     * @return mixed
142     * @throws Zend_Json_Exception
143     */
144    public static function decode($source = null, $objectDecodeType = Zend_Json::TYPE_ARRAY)
145    {
146        if (null === $source) {
147            require_once 'Zend/Json/Exception.php';
148            throw new Zend_Json_Exception('Must specify JSON encoded source for decoding');
149        } elseif (!is_string($source)) {
150            require_once 'Zend/Json/Exception.php';
151            throw new Zend_Json_Exception('Can only decode JSON encoded strings');
152        }
153
154        $decoder = new self($source, $objectDecodeType);
155
156        return $decoder->_decodeValue();
157    }
158
159
160    /**
161     * Recursive driving rountine for supported toplevel tops
162     *
163     * @return mixed
164     */
165    protected function _decodeValue()
166    {
167        switch ($this->_token) {
168            case self::DATUM:
169                $result  = $this->_tokenValue;
170                $this->_getNextToken();
171                return($result);
172                break;
173            case self::LBRACE:
174                return($this->_decodeObject());
175                break;
176            case self::LBRACKET:
177                return($this->_decodeArray());
178                break;
179            default:
180                return null;
181                break;
182        }
183    }
184
185    /**
186     * Decodes an object of the form:
187     *  { "attribute: value, "attribute2" : value,...}
188     *
189     * If Zend_Json_Encoder was used to encode the original object then
190     * a special attribute called __className which specifies a class
191     * name that should wrap the data contained within the encoded source.
192     *
193     * Decodes to either an array or StdClass object, based on the value of
194     * {@link $_decodeType}. If invalid $_decodeType present, returns as an
195     * array.
196     *
197     * @return array|StdClass
198     */
199    protected function _decodeObject()
200    {
201        $members = array();
202        $tok = $this->_getNextToken();
203
204        while ($tok && $tok != self::RBRACE) {
205            if ($tok != self::DATUM || ! is_string($this->_tokenValue)) {
206                require_once 'Zend/Json/Exception.php';
207                throw new Zend_Json_Exception('Missing key in object encoding: ' . $this->_source);
208            }
209
210            $key = $this->_tokenValue;
211            $tok = $this->_getNextToken();
212
213            if ($tok != self::COLON) {
214                require_once 'Zend/Json/Exception.php';
215                throw new Zend_Json_Exception('Missing ":" in object encoding: ' . $this->_source);
216            }
217
218            $tok = $this->_getNextToken();
219            $members[$key] = $this->_decodeValue();
220            $tok = $this->_token;
221
222            if ($tok == self::RBRACE) {
223                break;
224            }
225
226            if ($tok != self::COMMA) {
227                require_once 'Zend/Json/Exception.php';
228                throw new Zend_Json_Exception('Missing "," in object encoding: ' . $this->_source);
229            }
230
231            $tok = $this->_getNextToken();
232        }
233
234        switch ($this->_decodeType) {
235            case Zend_Json::TYPE_OBJECT:
236                // Create new StdClass and populate with $members
237                $result = new StdClass();
238                foreach ($members as $key => $value) {
239                    $result->$key = $value;
240                }
241                break;
242            case Zend_Json::TYPE_ARRAY:
243            default:
244                $result = $members;
245                break;
246        }
247
248        $this->_getNextToken();
249        return $result;
250    }
251
252    /**
253     * Decodes a JSON array format:
254     *    [element, element2,...,elementN]
255     *
256     * @return array
257     */
258    protected function _decodeArray()
259    {
260        $result = array();
261        $starttok = $tok = $this->_getNextToken(); // Move past the '['
262        $index  = 0;
263
264        while ($tok && $tok != self::RBRACKET) {
265            $result[$index++] = $this->_decodeValue();
266
267            $tok = $this->_token;
268
269            if ($tok == self::RBRACKET || !$tok) {
270                break;
271            }
272
273            if ($tok != self::COMMA) {
274                require_once 'Zend/Json/Exception.php';
275                throw new Zend_Json_Exception('Missing "," in array encoding: ' . $this->_source);
276            }
277
278            $tok = $this->_getNextToken();
279        }
280
281        $this->_getNextToken();
282        return($result);
283    }
284
285
286    /**
287     * Removes whitepsace characters from the source input
288     */
289    protected function _eatWhitespace()
290    {
291        if (preg_match(
292                '/([\t\b\f\n\r ])*/s',
293                $this->_source,
294                $matches,
295                PREG_OFFSET_CAPTURE,
296                $this->_offset)
297            && $matches[0][1] == $this->_offset)
298        {
299            $this->_offset += strlen($matches[0][0]);
300        }
301    }
302
303
304    /**
305     * Retrieves the next token from the source stream
306     *
307     * @return int Token constant value specified in class definition
308     */
309    protected function _getNextToken()
310    {
311        $this->_token      = self::EOF;
312        $this->_tokenValue = null;
313        $this->_eatWhitespace();
314
315        if ($this->_offset >= $this->_sourceLength) {
316            return(self::EOF);
317        }
318
319        $str        = $this->_source;
320        $str_length = $this->_sourceLength;
321        $i          = $this->_offset;
322        $start      = $i;
323
324        switch ($str{$i}) {
325            case '{':
326               $this->_token = self::LBRACE;
327               break;
328            case '}':
329                $this->_token = self::RBRACE;
330                break;
331            case '[':
332                $this->_token = self::LBRACKET;
333                break;
334            case ']':
335                $this->_token = self::RBRACKET;
336                break;
337            case ',':
338                $this->_token = self::COMMA;
339                break;
340            case ':':
341                $this->_token = self::COLON;
342                break;
343            case  '"':
344                $result = '';
345                do {
346                    $i++;
347                    if ($i >= $str_length) {
348                        break;
349                    }
350
351                    $chr = $str{$i};
352
353                    if ($chr == '\\') {
354                        $i++;
355                        if ($i >= $str_length) {
356                            break;
357                        }
358                        $chr = $str{$i};
359                        switch ($chr) {
360                            case '"' :
361                                $result .= '"';
362                                break;
363                            case '\\':
364                                $result .= '\\';
365                                break;
366                            case '/' :
367                                $result .= '/';
368                                break;
369                            case 'b' :
370                                $result .= chr(8);
371                                break;
372                            case 'f' :
373                                $result .= chr(12);
374                                break;
375                            case 'n' :
376                                $result .= chr(10);
377                                break;
378                            case 'r' :
379                                $result .= chr(13);
380                                break;
381                            case 't' :
382                                $result .= chr(9);
383                                break;
384                            case '\'' :
385                                $result .= '\'';
386                                break;
387                            default:
388                                require_once 'Zend/Json/Exception.php';
389                                throw new Zend_Json_Exception("Illegal escape "
390                                    .  "sequence '" . $chr . "'");
391                        }
392                    } elseif($chr == '"') {
393                        break;
394                    } else {
395                        $result .= $chr;
396                    }
397                } while ($i < $str_length);
398
399                $this->_token = self::DATUM;
400                //$this->_tokenValue = substr($str, $start + 1, $i - $start - 1);
401                $this->_tokenValue = $result;
402                break;
403            case 't':
404                if (($i+ 3) < $str_length && substr($str, $start, 4) == "true") {
405                    $this->_token = self::DATUM;
406                }
407                $this->_tokenValue = true;
408                $i += 3;
409                break;
410            case 'f':
411                if (($i+ 4) < $str_length && substr($str, $start, 5) == "false") {
412                    $this->_token = self::DATUM;
413                }
414                $this->_tokenValue = false;
415                $i += 4;
416                break;
417            case 'n':
418                if (($i+ 3) < $str_length && substr($str, $start, 4) == "null") {
419                    $this->_token = self::DATUM;
420                }
421                $this->_tokenValue = NULL;
422                $i += 3;
423                break;
424        }
425
426        if ($this->_token != self::EOF) {
427            $this->_offset = $i + 1; // Consume the last token character
428            return($this->_token);
429        }
430
431        $chr = $str{$i};
432        if ($chr == '-' || $chr == '.' || ($chr >= '0' && $chr <= '9')) {
433            if (preg_match('/-?([0-9])*(\.[0-9]*)?((e|E)((-|\+)?)[0-9]+)?/s',
434                $str, $matches, PREG_OFFSET_CAPTURE, $start) && $matches[0][1] == $start) {
435
436                $datum = $matches[0][0];
437
438                if (is_numeric($datum)) {
439                    if (preg_match('/^0\d+$/', $datum)) {
440                        require_once 'Zend/Json/Exception.php';
441                        throw new Zend_Json_Exception("Octal notation not supported by JSON (value: $datum)");
442                    } else {
443                        $val  = intval($datum);
444                        $fVal = floatval($datum);
445                        $this->_tokenValue = ($val == $fVal ? $val : $fVal);
446                    }
447                } else {
448                    require_once 'Zend/Json/Exception.php';
449                    throw new Zend_Json_Exception("Illegal number format: $datum");
450                }
451
452                $this->_token = self::DATUM;
453                $this->_offset = $start + strlen($datum);
454            }
455        } else {
456            require_once 'Zend/Json/Exception.php';
457            throw new Zend_Json_Exception('Illegal Token');
458        }
459
460        return($this->_token);
461    }
462
463    /**
464     * Decode Unicode Characters from \u0000 ASCII syntax.
465     *
466     * This algorithm was originally developed for the
467     * Solar Framework by Paul M. Jones
468     *
469     * @link   http://solarphp.com/
470     * @link   http://svn.solarphp.com/core/trunk/Solar/Json.php
471     * @param  string $value
472     * @return string
473     */
474    public static function decodeUnicodeString($chrs)
475    {
476        $delim       = substr($chrs, 0, 1);
477        $utf8        = '';
478        $strlen_chrs = strlen($chrs);
479
480        for($i = 0; $i < $strlen_chrs; $i++) {
481
482            $substr_chrs_c_2 = substr($chrs, $i, 2);
483            $ord_chrs_c = ord($chrs[$i]);
484
485            switch (true) {
486                case preg_match('/\\\u[0-9A-F]{4}/i', substr($chrs, $i, 6)):
487                    // single, escaped unicode character
488                    $utf16 = chr(hexdec(substr($chrs, ($i + 2), 2)))
489                           . chr(hexdec(substr($chrs, ($i + 4), 2)));
490                    $utf8 .= self::_utf162utf8($utf16);
491                    $i += 5;
492                    break;
493                case ($ord_chrs_c >= 0x20) && ($ord_chrs_c <= 0x7F):
494                    $utf8 .= $chrs{$i};
495                    break;
496                case ($ord_chrs_c & 0xE0) == 0xC0:
497                    // characters U-00000080 - U-000007FF, mask 110XXXXX
498                    //see http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8
499                    $utf8 .= substr($chrs, $i, 2);
500                    ++$i;
501                    break;
502                case ($ord_chrs_c & 0xF0) == 0xE0:
503                    // characters U-00000800 - U-0000FFFF, mask 1110XXXX
504                    // see http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8
505                    $utf8 .= substr($chrs, $i, 3);
506                    $i += 2;
507                    break;
508                case ($ord_chrs_c & 0xF8) == 0xF0:
509                    // characters U-00010000 - U-001FFFFF, mask 11110XXX
510                    // see http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8
511                    $utf8 .= substr($chrs, $i, 4);
512                    $i += 3;
513                    break;
514                case ($ord_chrs_c & 0xFC) == 0xF8:
515                    // characters U-00200000 - U-03FFFFFF, mask 111110XX
516                    // see http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8
517                    $utf8 .= substr($chrs, $i, 5);
518                    $i += 4;
519                    break;
520                case ($ord_chrs_c & 0xFE) == 0xFC:
521                    // characters U-04000000 - U-7FFFFFFF, mask 1111110X
522                    // see http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8
523                    $utf8 .= substr($chrs, $i, 6);
524                    $i += 5;
525                    break;
526            }
527        }
528
529        return $utf8;
530    }
531
532    /**
533     * Convert a string from one UTF-16 char to one UTF-8 char.
534     *
535     * Normally should be handled by mb_convert_encoding, but
536     * provides a slower PHP-only method for installations
537     * that lack the multibye string extension.
538     *
539     * This method is from the Solar Framework by Paul M. Jones
540     *
541     * @link   http://solarphp.com
542     * @param  string $utf16 UTF-16 character
543     * @return string UTF-8 character
544     */
545    protected static function _utf162utf8($utf16)
546    {
547        // Check for mb extension otherwise do by hand.
548        if( function_exists('mb_convert_encoding') ) {
549            return mb_convert_encoding($utf16, 'UTF-8', 'UTF-16');
550        }
551
552        $bytes = (ord($utf16{0}) << 8) | ord($utf16{1});
553
554        switch (true) {
555            case ((0x7F & $bytes) == $bytes):
556                // this case should never be reached, because we are in ASCII range
557                // see: http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8
558                return chr(0x7F & $bytes);
559
560            case (0x07FF & $bytes) == $bytes:
561                // return a 2-byte UTF-8 character
562                // see: http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8
563                return chr(0xC0 | (($bytes >> 6) & 0x1F))
564                     . chr(0x80 | ($bytes & 0x3F));
565
566            case (0xFFFF & $bytes) == $bytes:
567                // return a 3-byte UTF-8 character
568                // see: http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8
569                return chr(0xE0 | (($bytes >> 12) & 0x0F))
570                     . chr(0x80 | (($bytes >> 6) & 0x3F))
571                     . chr(0x80 | ($bytes & 0x3F));
572        }
573
574        // ignoring UTF-32 for now, sorry
575        return '';
576    }
577}
578