PageRenderTime 29ms CodeModel.GetById 11ms app.highlight 13ms RepoModel.GetById 1ms app.codeStats 0ms

/vendor/ZF2/library/Zend/Json/Decoder.php

https://github.com/XataWork/zf2-project
PHP | 557 lines | 398 code | 41 blank | 118 comment | 33 complexity | af35eb7a105057525d64bb1f76fab06d MD5 | raw file
  1<?php
  2/**
  3 * Zend Framework (http://framework.zend.com/)
  4 *
  5 * @link      http://github.com/zendframework/zf2 for the canonical source repository
  6 * @copyright Copyright (c) 2005-2014 Zend Technologies USA Inc. (http://www.zend.com)
  7 * @license   http://framework.zend.com/license/new-bsd New BSD License
  8 */
  9
 10namespace Zend\Json;
 11
 12use stdClass;
 13use Zend\Json\Exception\InvalidArgumentException;
 14use Zend\Json\Exception\RuntimeException;
 15
 16/**
 17 * Decode JSON encoded string to PHP variable constructs
 18 */
 19class Decoder
 20{
 21
 22    /**
 23     * Parse tokens used to decode the JSON object. These are not
 24     * for public consumption, they are just used internally to the
 25     * class.
 26     */
 27    const EOF       = 0;
 28    const DATUM     = 1;
 29    const LBRACE    = 2;
 30    const LBRACKET  = 3;
 31    const RBRACE    = 4;
 32    const RBRACKET  = 5;
 33    const COMMA     = 6;
 34    const COLON     = 7;
 35
 36    /**
 37     * Use to maintain a "pointer" to the source being decoded
 38     *
 39     * @var string
 40     */
 41    protected $source;
 42
 43    /**
 44     * Caches the source length
 45     *
 46     * @var int
 47     */
 48    protected $sourceLength;
 49
 50    /**
 51     * The offset within the source being decoded
 52     *
 53     * @var int
 54     *
 55     */
 56    protected $offset;
 57
 58    /**
 59     * The current token being considered in the parser cycle
 60     *
 61     * @var int
 62     */
 63    protected $token;
 64
 65    /**
 66     * Flag indicating how objects should be decoded
 67     *
 68     * @var int
 69     * @access protected
 70     */
 71    protected $decodeType;
 72
 73    /**
 74     * @var $_tokenValue
 75     */
 76    protected $tokenValue;
 77
 78    /**
 79     * Constructor
 80     *
 81     * @param string $source     String source to decode
 82     * @param int    $decodeType How objects should be decoded -- see
 83     * {@link Zend\Json\Json::TYPE_ARRAY} and {@link Zend\Json\Json::TYPE_OBJECT} for
 84     * valid values
 85     * @throws InvalidArgumentException
 86     */
 87    protected function __construct($source, $decodeType)
 88    {
 89        // Set defaults
 90        $this->source       = self::decodeUnicodeString($source);
 91        $this->sourceLength = strlen($this->source);
 92        $this->token        = self::EOF;
 93        $this->offset       = 0;
 94
 95        switch ($decodeType) {
 96            case Json::TYPE_ARRAY:
 97            case Json::TYPE_OBJECT:
 98                $this->decodeType = $decodeType;
 99                break;
100            default:
101                throw new InvalidArgumentException("Unknown decode type '{$decodeType}', please use one of the constants Json::TYPE_*");
102        }
103
104        // Set pointer at first token
105        $this->_getNextToken();
106    }
107
108    /**
109     * Decode a JSON source string
110     *
111     * Decodes a JSON encoded string. The value returned will be one of the
112     * following:
113     *        - integer
114     *        - float
115     *        - boolean
116     *        - null
117     *      - stdClass
118     *      - array
119     *         - array of one or more of the above types
120     *
121     * By default, decoded objects will be returned as associative arrays; to
122     * return a stdClass object instead, pass {@link Zend\Json\Json::TYPE_OBJECT} to
123     * the $objectDecodeType parameter.
124     *
125     * @static
126     * @access public
127     * @param string $source String to be decoded
128     * @param int $objectDecodeType How objects should be decoded; should be
129     * either or {@link Zend\Json\Json::TYPE_ARRAY} or
130     * {@link Zend\Json\Json::TYPE_OBJECT}; defaults to TYPE_ARRAY
131     * @return mixed
132     */
133    public static function decode($source, $objectDecodeType = Json::TYPE_OBJECT)
134    {
135        $decoder = new static($source, $objectDecodeType);
136        return $decoder->_decodeValue();
137    }
138
139    /**
140     * Recursive driving routine for supported toplevel tops
141     *
142     * @return mixed
143     */
144    protected function _decodeValue()
145    {
146        switch ($this->token) {
147            case self::DATUM:
148                $result  = $this->tokenValue;
149                $this->_getNextToken();
150                return($result);
151                break;
152            case self::LBRACE:
153                return($this->_decodeObject());
154                break;
155            case self::LBRACKET:
156                return($this->_decodeArray());
157                break;
158            default:
159                return null;
160                break;
161        }
162    }
163
164    /**
165     * Decodes an object of the form:
166     *  { "attribute: value, "attribute2" : value,...}
167     *
168     * If Zend\Json\Encoder was used to encode the original object then
169     * a special attribute called __className which specifies a class
170     * name that should wrap the data contained within the encoded source.
171     *
172     * Decodes to either an array or stdClass object, based on the value of
173     * {@link $decodeType}. If invalid $decodeType present, returns as an
174     * array.
175     *
176     * @return array|stdClass
177     * @throws RuntimeException
178     */
179    protected function _decodeObject()
180    {
181        $members = array();
182        $tok = $this->_getNextToken();
183
184        while ($tok && $tok != self::RBRACE) {
185            if ($tok != self::DATUM || ! is_string($this->tokenValue)) {
186                throw new RuntimeException('Missing key in object encoding: ' . $this->source);
187            }
188
189            $key = $this->tokenValue;
190            $tok = $this->_getNextToken();
191
192            if ($tok != self::COLON) {
193                throw new RuntimeException('Missing ":" in object encoding: ' . $this->source);
194            }
195
196            $tok = $this->_getNextToken();
197            $members[$key] = $this->_decodeValue();
198            $tok = $this->token;
199
200            if ($tok == self::RBRACE) {
201                break;
202            }
203
204            if ($tok != self::COMMA) {
205                throw new RuntimeException('Missing "," in object encoding: ' . $this->source);
206            }
207
208            $tok = $this->_getNextToken();
209        }
210
211        switch ($this->decodeType) {
212            case Json::TYPE_OBJECT:
213                // Create new stdClass and populate with $members
214                $result = new stdClass();
215                foreach ($members as $key => $value) {
216                    if ($key === '') {
217                        $key = '_empty_';
218                    }
219                    $result->$key = $value;
220                }
221                break;
222            case Json::TYPE_ARRAY:
223            default:
224                $result = $members;
225                break;
226        }
227
228        $this->_getNextToken();
229        return $result;
230    }
231
232    /**
233     * Decodes a JSON array format:
234     *    [element, element2,...,elementN]
235     *
236     * @return array
237     * @throws RuntimeException
238     */
239    protected function _decodeArray()
240    {
241        $result = array();
242        $tok = $this->_getNextToken(); // Move past the '['
243        $index  = 0;
244
245        while ($tok && $tok != self::RBRACKET) {
246            $result[$index++] = $this->_decodeValue();
247
248            $tok = $this->token;
249
250            if ($tok == self::RBRACKET || !$tok) {
251                break;
252            }
253
254            if ($tok != self::COMMA) {
255                throw new RuntimeException('Missing "," in array encoding: ' . $this->source);
256            }
257
258            $tok = $this->_getNextToken();
259        }
260
261        $this->_getNextToken();
262        return $result;
263    }
264
265
266    /**
267     * Removes whitespace characters from the source input
268     */
269    protected function _eatWhitespace()
270    {
271        if (preg_match(
272                '/([\t\b\f\n\r ])*/s',
273                $this->source,
274                $matches,
275                PREG_OFFSET_CAPTURE,
276                $this->offset)
277            && $matches[0][1] == $this->offset)
278        {
279            $this->offset += strlen($matches[0][0]);
280        }
281    }
282
283
284    /**
285     * Retrieves the next token from the source stream
286     *
287     * @return int Token constant value specified in class definition
288     * @throws RuntimeException
289     */
290    protected function _getNextToken()
291    {
292        $this->token      = self::EOF;
293        $this->tokenValue = null;
294        $this->_eatWhitespace();
295
296        if ($this->offset >= $this->sourceLength) {
297            return(self::EOF);
298        }
299
300        $str       = $this->source;
301        $strLength = $this->sourceLength;
302        $i         = $this->offset;
303        $start     = $i;
304
305        switch ($str{$i}) {
306            case '{':
307               $this->token = self::LBRACE;
308               break;
309            case '}':
310                $this->token = self::RBRACE;
311                break;
312            case '[':
313                $this->token = self::LBRACKET;
314                break;
315            case ']':
316                $this->token = self::RBRACKET;
317                break;
318            case ',':
319                $this->token = self::COMMA;
320                break;
321            case ':':
322                $this->token = self::COLON;
323                break;
324            case  '"':
325                $result = '';
326                do {
327                    $i++;
328                    if ($i >= $strLength) {
329                        break;
330                    }
331
332                    $chr = $str{$i};
333
334                    if ($chr == '\\') {
335                        $i++;
336                        if ($i >= $strLength) {
337                            break;
338                        }
339                        $chr = $str{$i};
340                        switch ($chr) {
341                            case '"' :
342                                $result .= '"';
343                                break;
344                            case '\\':
345                                $result .= '\\';
346                                break;
347                            case '/' :
348                                $result .= '/';
349                                break;
350                            case 'b' :
351                                $result .= "\x08";
352                                break;
353                            case 'f' :
354                                $result .= "\x0c";
355                                break;
356                            case 'n' :
357                                $result .= "\x0a";
358                                break;
359                            case 'r' :
360                                $result .= "\x0d";
361                                break;
362                            case 't' :
363                                $result .= "\x09";
364                                break;
365                            case '\'' :
366                                $result .= '\'';
367                                break;
368                            default:
369                                throw new RuntimeException("Illegal escape sequence '{$chr}'");
370                        }
371                    } elseif ($chr == '"') {
372                        break;
373                    } else {
374                        $result .= $chr;
375                    }
376                } while ($i < $strLength);
377
378                $this->token = self::DATUM;
379                //$this->tokenValue = substr($str, $start + 1, $i - $start - 1);
380                $this->tokenValue = $result;
381                break;
382            case 't':
383                if (($i+ 3) < $strLength && substr($str, $start, 4) == "true") {
384                    $this->token = self::DATUM;
385                }
386                $this->tokenValue = true;
387                $i += 3;
388                break;
389            case 'f':
390                if (($i+ 4) < $strLength && substr($str, $start, 5) == "false") {
391                    $this->token = self::DATUM;
392                }
393                $this->tokenValue = false;
394                $i += 4;
395                break;
396            case 'n':
397                if (($i+ 3) < $strLength && substr($str, $start, 4) == "null") {
398                    $this->token = self::DATUM;
399                }
400                $this->tokenValue = NULL;
401                $i += 3;
402                break;
403        }
404
405        if ($this->token != self::EOF) {
406            $this->offset = $i + 1; // Consume the last token character
407            return($this->token);
408        }
409
410        $chr = $str{$i};
411        if ($chr == '-' || $chr == '.' || ($chr >= '0' && $chr <= '9')) {
412            if (preg_match('/-?([0-9])*(\.[0-9]*)?((e|E)((-|\+)?)[0-9]+)?/s',
413                $str, $matches, PREG_OFFSET_CAPTURE, $start) && $matches[0][1] == $start) {
414
415                $datum = $matches[0][0];
416
417                if (is_numeric($datum)) {
418                    if (preg_match('/^0\d+$/', $datum)) {
419                        throw new RuntimeException("Octal notation not supported by JSON (value: {$datum})");
420                    } else {
421                        $val  = intval($datum);
422                        $fVal = floatval($datum);
423                        $this->tokenValue = ($val == $fVal ? $val : $fVal);
424                    }
425                } else {
426                    throw new RuntimeException("Illegal number format: {$datum}");
427                }
428
429                $this->token = self::DATUM;
430                $this->offset = $start + strlen($datum);
431            }
432        } else {
433            throw new RuntimeException('Illegal Token');
434        }
435
436        return $this->token;
437    }
438
439    /**
440     * Decode Unicode Characters from \u0000 ASCII syntax.
441     *
442     * This algorithm was originally developed for the
443     * Solar Framework by Paul M. Jones
444     *
445     * @link   http://solarphp.com/
446     * @link   https://github.com/solarphp/core/blob/master/Solar/Json.php
447     * @param  string $chrs
448     * @return string
449     */
450    public static function decodeUnicodeString($chrs)
451    {
452        $chrs       = (string) $chrs;
453        $utf8       = '';
454        $strlenChrs = strlen($chrs);
455
456        for ($i = 0; $i < $strlenChrs; $i++) {
457            $ordChrsC = ord($chrs[$i]);
458
459            switch (true) {
460                case preg_match('/\\\u[0-9A-F]{4}/i', substr($chrs, $i, 6)):
461                    // single, escaped unicode character
462                    $utf16 = chr(hexdec(substr($chrs, ($i + 2), 2)))
463                           . chr(hexdec(substr($chrs, ($i + 4), 2)));
464                    $utf8char = self::_utf162utf8($utf16);
465                    $search  = array('\\', "\n", "\t", "\r", chr(0x08), chr(0x0C), '"', '\'', '/');
466                    if (in_array($utf8char, $search)) {
467                        $replace = array('\\\\', '\\n', '\\t', '\\r', '\\b', '\\f', '\\"', '\\\'', '\\/');
468                        $utf8char  = str_replace($search, $replace, $utf8char);
469                    }
470                    $utf8 .= $utf8char;
471                    $i += 5;
472                    break;
473                case ($ordChrsC >= 0x20) && ($ordChrsC <= 0x7F):
474                    $utf8 .= $chrs{$i};
475                    break;
476                case ($ordChrsC & 0xE0) == 0xC0:
477                    // characters U-00000080 - U-000007FF, mask 110XXXXX
478                    //see http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8
479                    $utf8 .= substr($chrs, $i, 2);
480                    ++$i;
481                    break;
482                case ($ordChrsC & 0xF0) == 0xE0:
483                    // characters U-00000800 - U-0000FFFF, mask 1110XXXX
484                    // see http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8
485                    $utf8 .= substr($chrs, $i, 3);
486                    $i += 2;
487                    break;
488                case ($ordChrsC & 0xF8) == 0xF0:
489                    // characters U-00010000 - U-001FFFFF, mask 11110XXX
490                    // see http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8
491                    $utf8 .= substr($chrs, $i, 4);
492                    $i += 3;
493                    break;
494                case ($ordChrsC & 0xFC) == 0xF8:
495                    // characters U-00200000 - U-03FFFFFF, mask 111110XX
496                    // see http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8
497                    $utf8 .= substr($chrs, $i, 5);
498                    $i += 4;
499                    break;
500                case ($ordChrsC & 0xFE) == 0xFC:
501                    // characters U-04000000 - U-7FFFFFFF, mask 1111110X
502                    // see http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8
503                    $utf8 .= substr($chrs, $i, 6);
504                    $i += 5;
505                    break;
506            }
507        }
508
509        return $utf8;
510    }
511
512    /**
513     * Convert a string from one UTF-16 char to one UTF-8 char.
514     *
515     * Normally should be handled by mb_convert_encoding, but
516     * provides a slower PHP-only method for installations
517     * that lack the multibyte string extension.
518     *
519     * This method is from the Solar Framework by Paul M. Jones
520     *
521     * @link   http://solarphp.com
522     * @param  string $utf16 UTF-16 character
523     * @return string UTF-8 character
524     */
525    protected static function _utf162utf8($utf16)
526    {
527        // Check for mb extension otherwise do by hand.
528        if (function_exists('mb_convert_encoding')) {
529            return mb_convert_encoding($utf16, 'UTF-8', 'UTF-16');
530        }
531
532        $bytes = (ord($utf16{0}) << 8) | ord($utf16{1});
533
534        switch (true) {
535            case ((0x7F & $bytes) == $bytes):
536                // this case should never be reached, because we are in ASCII range
537                // see: http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8
538                return chr(0x7F & $bytes);
539
540            case (0x07FF & $bytes) == $bytes:
541                // return a 2-byte UTF-8 character
542                // see: http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8
543                return chr(0xC0 | (($bytes >> 6) & 0x1F))
544                     . chr(0x80 | ($bytes & 0x3F));
545
546            case (0xFFFF & $bytes) == $bytes:
547                // return a 3-byte UTF-8 character
548                // see: http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8
549                return chr(0xE0 | (($bytes >> 12) & 0x0F))
550                     . chr(0x80 | (($bytes >> 6) & 0x3F))
551                     . chr(0x80 | ($bytes & 0x3F));
552        }
553
554        // ignoring UTF-32 for now, sorry
555        return '';
556    }
557}