PageRenderTime 38ms CodeModel.GetById 8ms RepoModel.GetById 0ms app.codeStats 0ms

/Json/Decoder.php

https://bitbucket.org/gkawka/zend-framework
PHP | 581 lines | 404 code | 41 blank | 136 comment | 33 complexity | 1d002ea797a67c95cc6c569be661b313 MD5 | raw file
  1. <?php
  2. /**
  3. * Zend Framework
  4. *
  5. * LICENSE
  6. *
  7. * This source file is subject to the new BSD license that is bundled
  8. * with this package in the file LICENSE.txt.
  9. * It is also available through the world-wide-web at this URL:
  10. * http://framework.zend.com/license/new-bsd
  11. * If you did not receive a copy of the license and are unable to
  12. * obtain it through the world-wide-web, please send an email
  13. * to license@zend.com so we can send you a copy immediately.
  14. *
  15. * @category Zend
  16. * @package Zend_Json
  17. * @copyright Copyright (c) 2005-2012 Zend Technologies USA Inc. (http://www.zend.com)
  18. * @license http://framework.zend.com/license/new-bsd New BSD License
  19. * @version $Id: Decoder.php 24799 2012-05-12 19:27:07Z adamlundrigan $
  20. */
  21. /**
  22. * @see Zend_Json
  23. */
  24. require_once 'Zend/Json.php';
  25. /**
  26. * Decode JSON encoded string to PHP variable constructs
  27. *
  28. * @category Zend
  29. * @package Zend_Json
  30. * @copyright Copyright (c) 2005-2012 Zend Technologies USA Inc. (http://www.zend.com)
  31. * @license http://framework.zend.com/license/new-bsd New BSD License
  32. */
  33. class Zend_Json_Decoder
  34. {
  35. /**
  36. * Parse tokens used to decode the JSON object. These are not
  37. * for public consumption, they are just used internally to the
  38. * class.
  39. */
  40. const EOF = 0;
  41. const DATUM = 1;
  42. const LBRACE = 2;
  43. const LBRACKET = 3;
  44. const RBRACE = 4;
  45. const RBRACKET = 5;
  46. const COMMA = 6;
  47. const COLON = 7;
  48. /**
  49. * Use to maintain a "pointer" to the source being decoded
  50. *
  51. * @var string
  52. */
  53. protected $_source;
  54. /**
  55. * Caches the source length
  56. *
  57. * @var int
  58. */
  59. protected $_sourceLength;
  60. /**
  61. * The offset within the souce being decoded
  62. *
  63. * @var int
  64. *
  65. */
  66. protected $_offset;
  67. /**
  68. * The current token being considered in the parser cycle
  69. *
  70. * @var int
  71. */
  72. protected $_token;
  73. /**
  74. * Flag indicating how objects should be decoded
  75. *
  76. * @var int
  77. * @access protected
  78. */
  79. protected $_decodeType;
  80. /**
  81. * Constructor
  82. *
  83. * @param string $source String source to decode
  84. * @param int $decodeType How objects should be decoded -- see
  85. * {@link Zend_Json::TYPE_ARRAY} and {@link Zend_Json::TYPE_OBJECT} for
  86. * valid values
  87. * @return void
  88. */
  89. protected function __construct($source, $decodeType)
  90. {
  91. // Set defaults
  92. $this->_source = self::decodeUnicodeString($source);
  93. $this->_sourceLength = strlen($this->_source);
  94. $this->_token = self::EOF;
  95. $this->_offset = 0;
  96. // Normalize and set $decodeType
  97. if (!in_array($decodeType, array(Zend_Json::TYPE_ARRAY, Zend_Json::TYPE_OBJECT)))
  98. {
  99. $decodeType = Zend_Json::TYPE_ARRAY;
  100. }
  101. $this->_decodeType = $decodeType;
  102. // Set pointer at first token
  103. $this->_getNextToken();
  104. }
  105. /**
  106. * Decode a JSON source string
  107. *
  108. * Decodes a JSON encoded string. The value returned will be one of the
  109. * following:
  110. * - integer
  111. * - float
  112. * - boolean
  113. * - null
  114. * - StdClass
  115. * - array
  116. * - array of one or more of the above types
  117. *
  118. * By default, decoded objects will be returned as associative arrays; to
  119. * return a StdClass object instead, pass {@link Zend_Json::TYPE_OBJECT} to
  120. * the $objectDecodeType parameter.
  121. *
  122. * Throws a Zend_Json_Exception if the source string is null.
  123. *
  124. * @static
  125. * @access public
  126. * @param string $source String to be decoded
  127. * @param int $objectDecodeType How objects should be decoded; should be
  128. * either or {@link Zend_Json::TYPE_ARRAY} or
  129. * {@link Zend_Json::TYPE_OBJECT}; defaults to TYPE_ARRAY
  130. * @return mixed
  131. * @throws Zend_Json_Exception
  132. */
  133. public static function decode($source = null, $objectDecodeType = Zend_Json::TYPE_ARRAY)
  134. {
  135. if (null === $source) {
  136. require_once 'Zend/Json/Exception.php';
  137. throw new Zend_Json_Exception('Must specify JSON encoded source for decoding');
  138. } elseif (!is_string($source)) {
  139. require_once 'Zend/Json/Exception.php';
  140. throw new Zend_Json_Exception('Can only decode JSON encoded strings');
  141. }
  142. $decoder = new self($source, $objectDecodeType);
  143. return $decoder->_decodeValue();
  144. }
  145. /**
  146. * Recursive driving rountine for supported toplevel tops
  147. *
  148. * @return mixed
  149. */
  150. protected function _decodeValue()
  151. {
  152. switch ($this->_token) {
  153. case self::DATUM:
  154. $result = $this->_tokenValue;
  155. $this->_getNextToken();
  156. return($result);
  157. break;
  158. case self::LBRACE:
  159. return($this->_decodeObject());
  160. break;
  161. case self::LBRACKET:
  162. return($this->_decodeArray());
  163. break;
  164. default:
  165. return null;
  166. break;
  167. }
  168. }
  169. /**
  170. * Decodes an object of the form:
  171. * { "attribute: value, "attribute2" : value,...}
  172. *
  173. * If Zend_Json_Encoder was used to encode the original object then
  174. * a special attribute called __className which specifies a class
  175. * name that should wrap the data contained within the encoded source.
  176. *
  177. * Decodes to either an array or StdClass object, based on the value of
  178. * {@link $_decodeType}. If invalid $_decodeType present, returns as an
  179. * array.
  180. *
  181. * @return array|StdClass
  182. */
  183. protected function _decodeObject()
  184. {
  185. $members = array();
  186. $tok = $this->_getNextToken();
  187. while ($tok && $tok != self::RBRACE) {
  188. if ($tok != self::DATUM || ! is_string($this->_tokenValue)) {
  189. require_once 'Zend/Json/Exception.php';
  190. throw new Zend_Json_Exception('Missing key in object encoding: ' . $this->_source);
  191. }
  192. $key = $this->_tokenValue;
  193. $tok = $this->_getNextToken();
  194. if ($tok != self::COLON) {
  195. require_once 'Zend/Json/Exception.php';
  196. throw new Zend_Json_Exception('Missing ":" in object encoding: ' . $this->_source);
  197. }
  198. $tok = $this->_getNextToken();
  199. $members[$key] = $this->_decodeValue();
  200. $tok = $this->_token;
  201. if ($tok == self::RBRACE) {
  202. break;
  203. }
  204. if ($tok != self::COMMA) {
  205. require_once 'Zend/Json/Exception.php';
  206. throw new Zend_Json_Exception('Missing "," in object encoding: ' . $this->_source);
  207. }
  208. $tok = $this->_getNextToken();
  209. }
  210. switch ($this->_decodeType) {
  211. case Zend_Json::TYPE_OBJECT:
  212. // Create new StdClass and populate with $members
  213. $result = new StdClass();
  214. foreach ($members as $key => $value) {
  215. if ($key === '') {
  216. $key = '_empty_';
  217. }
  218. $result->$key = $value;
  219. }
  220. break;
  221. case Zend_Json::TYPE_ARRAY:
  222. default:
  223. $result = $members;
  224. break;
  225. }
  226. $this->_getNextToken();
  227. return $result;
  228. }
  229. /**
  230. * Decodes a JSON array format:
  231. * [element, element2,...,elementN]
  232. *
  233. * @return array
  234. */
  235. protected function _decodeArray()
  236. {
  237. $result = array();
  238. $starttok = $tok = $this->_getNextToken(); // Move past the '['
  239. $index = 0;
  240. while ($tok && $tok != self::RBRACKET) {
  241. $result[$index++] = $this->_decodeValue();
  242. $tok = $this->_token;
  243. if ($tok == self::RBRACKET || !$tok) {
  244. break;
  245. }
  246. if ($tok != self::COMMA) {
  247. require_once 'Zend/Json/Exception.php';
  248. throw new Zend_Json_Exception('Missing "," in array encoding: ' . $this->_source);
  249. }
  250. $tok = $this->_getNextToken();
  251. }
  252. $this->_getNextToken();
  253. return($result);
  254. }
  255. /**
  256. * Removes whitepsace characters from the source input
  257. */
  258. protected function _eatWhitespace()
  259. {
  260. if (preg_match(
  261. '/([\t\b\f\n\r ])*/s',
  262. $this->_source,
  263. $matches,
  264. PREG_OFFSET_CAPTURE,
  265. $this->_offset)
  266. && $matches[0][1] == $this->_offset)
  267. {
  268. $this->_offset += strlen($matches[0][0]);
  269. }
  270. }
  271. /**
  272. * Retrieves the next token from the source stream
  273. *
  274. * @return int Token constant value specified in class definition
  275. */
  276. protected function _getNextToken()
  277. {
  278. $this->_token = self::EOF;
  279. $this->_tokenValue = null;
  280. $this->_eatWhitespace();
  281. if ($this->_offset >= $this->_sourceLength) {
  282. return(self::EOF);
  283. }
  284. $str = $this->_source;
  285. $str_length = $this->_sourceLength;
  286. $i = $this->_offset;
  287. $start = $i;
  288. switch ($str{$i}) {
  289. case '{':
  290. $this->_token = self::LBRACE;
  291. break;
  292. case '}':
  293. $this->_token = self::RBRACE;
  294. break;
  295. case '[':
  296. $this->_token = self::LBRACKET;
  297. break;
  298. case ']':
  299. $this->_token = self::RBRACKET;
  300. break;
  301. case ',':
  302. $this->_token = self::COMMA;
  303. break;
  304. case ':':
  305. $this->_token = self::COLON;
  306. break;
  307. case '"':
  308. $result = '';
  309. do {
  310. $i++;
  311. if ($i >= $str_length) {
  312. break;
  313. }
  314. $chr = $str{$i};
  315. if ($chr == '\\') {
  316. $i++;
  317. if ($i >= $str_length) {
  318. break;
  319. }
  320. $chr = $str{$i};
  321. switch ($chr) {
  322. case '"' :
  323. $result .= '"';
  324. break;
  325. case '\\':
  326. $result .= '\\';
  327. break;
  328. case '/' :
  329. $result .= '/';
  330. break;
  331. case 'b' :
  332. $result .= "\x08";
  333. break;
  334. case 'f' :
  335. $result .= "\x0c";
  336. break;
  337. case 'n' :
  338. $result .= "\x0a";
  339. break;
  340. case 'r' :
  341. $result .= "\x0d";
  342. break;
  343. case 't' :
  344. $result .= "\x09";
  345. break;
  346. case '\'' :
  347. $result .= '\'';
  348. break;
  349. default:
  350. require_once 'Zend/Json/Exception.php';
  351. throw new Zend_Json_Exception("Illegal escape "
  352. . "sequence '" . $chr . "'");
  353. }
  354. } elseif($chr == '"') {
  355. break;
  356. } else {
  357. $result .= $chr;
  358. }
  359. } while ($i < $str_length);
  360. $this->_token = self::DATUM;
  361. //$this->_tokenValue = substr($str, $start + 1, $i - $start - 1);
  362. $this->_tokenValue = $result;
  363. break;
  364. case 't':
  365. if (($i+ 3) < $str_length && substr($str, $start, 4) == "true") {
  366. $this->_token = self::DATUM;
  367. }
  368. $this->_tokenValue = true;
  369. $i += 3;
  370. break;
  371. case 'f':
  372. if (($i+ 4) < $str_length && substr($str, $start, 5) == "false") {
  373. $this->_token = self::DATUM;
  374. }
  375. $this->_tokenValue = false;
  376. $i += 4;
  377. break;
  378. case 'n':
  379. if (($i+ 3) < $str_length && substr($str, $start, 4) == "null") {
  380. $this->_token = self::DATUM;
  381. }
  382. $this->_tokenValue = NULL;
  383. $i += 3;
  384. break;
  385. }
  386. if ($this->_token != self::EOF) {
  387. $this->_offset = $i + 1; // Consume the last token character
  388. return($this->_token);
  389. }
  390. $chr = $str{$i};
  391. if ($chr == '-' || $chr == '.' || ($chr >= '0' && $chr <= '9')) {
  392. if (preg_match('/-?([0-9])*(\.[0-9]*)?((e|E)((-|\+)?)[0-9]+)?/s',
  393. $str, $matches, PREG_OFFSET_CAPTURE, $start) && $matches[0][1] == $start) {
  394. $datum = $matches[0][0];
  395. if (is_numeric($datum)) {
  396. if (preg_match('/^0\d+$/', $datum)) {
  397. require_once 'Zend/Json/Exception.php';
  398. throw new Zend_Json_Exception("Octal notation not supported by JSON (value: $datum)");
  399. } else {
  400. $val = intval($datum);
  401. $fVal = floatval($datum);
  402. $this->_tokenValue = ($val == $fVal ? $val : $fVal);
  403. }
  404. } else {
  405. require_once 'Zend/Json/Exception.php';
  406. throw new Zend_Json_Exception("Illegal number format: $datum");
  407. }
  408. $this->_token = self::DATUM;
  409. $this->_offset = $start + strlen($datum);
  410. }
  411. } else {
  412. require_once 'Zend/Json/Exception.php';
  413. throw new Zend_Json_Exception('Illegal Token');
  414. }
  415. return($this->_token);
  416. }
  417. /**
  418. * Decode Unicode Characters from \u0000 ASCII syntax.
  419. *
  420. * This algorithm was originally developed for the
  421. * Solar Framework by Paul M. Jones
  422. *
  423. * @link http://solarphp.com/
  424. * @link http://svn.solarphp.com/core/trunk/Solar/Json.php
  425. * @param string $value
  426. * @return string
  427. */
  428. public static function decodeUnicodeString($chrs)
  429. {
  430. $delim = substr($chrs, 0, 1);
  431. $utf8 = '';
  432. $strlen_chrs = strlen($chrs);
  433. for($i = 0; $i < $strlen_chrs; $i++) {
  434. $substr_chrs_c_2 = substr($chrs, $i, 2);
  435. $ord_chrs_c = ord($chrs[$i]);
  436. switch (true) {
  437. case preg_match('/\\\u[0-9A-F]{4}/i', substr($chrs, $i, 6)):
  438. // single, escaped unicode character
  439. $utf16 = chr(hexdec(substr($chrs, ($i + 2), 2)))
  440. . chr(hexdec(substr($chrs, ($i + 4), 2)));
  441. $utf8 .= self::_utf162utf8($utf16);
  442. $i += 5;
  443. break;
  444. case ($ord_chrs_c >= 0x20) && ($ord_chrs_c <= 0x7F):
  445. $utf8 .= $chrs{$i};
  446. break;
  447. case ($ord_chrs_c & 0xE0) == 0xC0:
  448. // characters U-00000080 - U-000007FF, mask 110XXXXX
  449. //see http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8
  450. $utf8 .= substr($chrs, $i, 2);
  451. ++$i;
  452. break;
  453. case ($ord_chrs_c & 0xF0) == 0xE0:
  454. // characters U-00000800 - U-0000FFFF, mask 1110XXXX
  455. // see http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8
  456. $utf8 .= substr($chrs, $i, 3);
  457. $i += 2;
  458. break;
  459. case ($ord_chrs_c & 0xF8) == 0xF0:
  460. // characters U-00010000 - U-001FFFFF, mask 11110XXX
  461. // see http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8
  462. $utf8 .= substr($chrs, $i, 4);
  463. $i += 3;
  464. break;
  465. case ($ord_chrs_c & 0xFC) == 0xF8:
  466. // characters U-00200000 - U-03FFFFFF, mask 111110XX
  467. // see http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8
  468. $utf8 .= substr($chrs, $i, 5);
  469. $i += 4;
  470. break;
  471. case ($ord_chrs_c & 0xFE) == 0xFC:
  472. // characters U-04000000 - U-7FFFFFFF, mask 1111110X
  473. // see http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8
  474. $utf8 .= substr($chrs, $i, 6);
  475. $i += 5;
  476. break;
  477. }
  478. }
  479. return $utf8;
  480. }
  481. /**
  482. * Convert a string from one UTF-16 char to one UTF-8 char.
  483. *
  484. * Normally should be handled by mb_convert_encoding, but
  485. * provides a slower PHP-only method for installations
  486. * that lack the multibye string extension.
  487. *
  488. * This method is from the Solar Framework by Paul M. Jones
  489. *
  490. * @link http://solarphp.com
  491. * @param string $utf16 UTF-16 character
  492. * @return string UTF-8 character
  493. */
  494. protected static function _utf162utf8($utf16)
  495. {
  496. // Check for mb extension otherwise do by hand.
  497. if( function_exists('mb_convert_encoding') ) {
  498. return mb_convert_encoding($utf16, 'UTF-8', 'UTF-16');
  499. }
  500. $bytes = (ord($utf16{0}) << 8) | ord($utf16{1});
  501. switch (true) {
  502. case ((0x7F & $bytes) == $bytes):
  503. // this case should never be reached, because we are in ASCII range
  504. // see: http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8
  505. return chr(0x7F & $bytes);
  506. case (0x07FF & $bytes) == $bytes:
  507. // return a 2-byte UTF-8 character
  508. // see: http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8
  509. return chr(0xC0 | (($bytes >> 6) & 0x1F))
  510. . chr(0x80 | ($bytes & 0x3F));
  511. case (0xFFFF & $bytes) == $bytes:
  512. // return a 3-byte UTF-8 character
  513. // see: http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8
  514. return chr(0xE0 | (($bytes >> 12) & 0x0F))
  515. . chr(0x80 | (($bytes >> 6) & 0x3F))
  516. . chr(0x80 | ($bytes & 0x3F));
  517. }
  518. // ignoring UTF-32 for now, sorry
  519. return '';
  520. }
  521. }