PageRenderTime 48ms CodeModel.GetById 19ms RepoModel.GetById 0ms app.codeStats 0ms

/library/Zend/Json/Decoder.php

https://github.com/Ikke/Centurion
PHP | 578 lines | 394 code | 41 blank | 143 comment | 32 complexity | 35edcfd2ad45d8b990a92ef2c7013d38 MD5 | raw file
  1. <?php
  2. /**
  3. * Zend Framework
  4. *
  5. * LICENSE
  6. *
  7. * This source file is subject to the new BSD license that is bundled
  8. * with this package in the file LICENSE.txt.
  9. * It is also available through the world-wide-web at this URL:
  10. * http://framework.zend.com/license/new-bsd
  11. * If you did not receive a copy of the license and are unable to
  12. * obtain it through the world-wide-web, please send an email
  13. * to license@zend.com so we can send you a copy immediately.
  14. *
  15. * @category Zend
  16. * @package Zend_Json
  17. * @copyright Copyright (c) 2005-2010 Zend Technologies USA Inc. (http://www.zend.com)
  18. * @license http://framework.zend.com/license/new-bsd New BSD License
  19. * @version $Id$
  20. */
  21. /**
  22. * @see Zend_Json
  23. */
  24. //$1 'Zend/Json.php';
  25. /**
  26. * Decode JSON encoded string to PHP variable constructs
  27. *
  28. * @category Zend
  29. * @package Zend_Json
  30. * @copyright Copyright (c) 2005-2010 Zend Technologies USA Inc. (http://www.zend.com)
  31. * @license http://framework.zend.com/license/new-bsd New BSD License
  32. */
  33. class Zend_Json_Decoder
  34. {
  35. /**
  36. * Parse tokens used to decode the JSON object. These are not
  37. * for public consumption, they are just used internally to the
  38. * class.
  39. */
  40. const EOF = 0;
  41. const DATUM = 1;
  42. const LBRACE = 2;
  43. const LBRACKET = 3;
  44. const RBRACE = 4;
  45. const RBRACKET = 5;
  46. const COMMA = 6;
  47. const COLON = 7;
  48. /**
  49. * Use to maintain a "pointer" to the source being decoded
  50. *
  51. * @var string
  52. */
  53. protected $_source;
  54. /**
  55. * Caches the source length
  56. *
  57. * @var int
  58. */
  59. protected $_sourceLength;
  60. /**
  61. * The offset within the souce being decoded
  62. *
  63. * @var int
  64. *
  65. */
  66. protected $_offset;
  67. /**
  68. * The current token being considered in the parser cycle
  69. *
  70. * @var int
  71. */
  72. protected $_token;
  73. /**
  74. * Flag indicating how objects should be decoded
  75. *
  76. * @var int
  77. * @access protected
  78. */
  79. protected $_decodeType;
  80. /**
  81. * Constructor
  82. *
  83. * @param string $source String source to decode
  84. * @param int $decodeType How objects should be decoded -- see
  85. * {@link Zend_Json::TYPE_ARRAY} and {@link Zend_Json::TYPE_OBJECT} for
  86. * valid values
  87. * @return void
  88. */
  89. protected function __construct($source, $decodeType)
  90. {
  91. // Set defaults
  92. $this->_source = self::decodeUnicodeString($source);
  93. $this->_sourceLength = strlen($this->_source);
  94. $this->_token = self::EOF;
  95. $this->_offset = 0;
  96. // Normalize and set $decodeType
  97. if (!in_array($decodeType, array(Zend_Json::TYPE_ARRAY, Zend_Json::TYPE_OBJECT)))
  98. {
  99. $decodeType = Zend_Json::TYPE_ARRAY;
  100. }
  101. $this->_decodeType = $decodeType;
  102. // Set pointer at first token
  103. $this->_getNextToken();
  104. }
  105. /**
  106. * Decode a JSON source string
  107. *
  108. * Decodes a JSON encoded string. The value returned will be one of the
  109. * following:
  110. * - integer
  111. * - float
  112. * - boolean
  113. * - null
  114. * - StdClass
  115. * - array
  116. * - array of one or more of the above types
  117. *
  118. * By default, decoded objects will be returned as associative arrays; to
  119. * return a StdClass object instead, pass {@link Zend_Json::TYPE_OBJECT} to
  120. * the $objectDecodeType parameter.
  121. *
  122. * Throws a Zend_Json_Exception if the source string is null.
  123. *
  124. * @static
  125. * @access public
  126. * @param string $source String to be decoded
  127. * @param int $objectDecodeType How objects should be decoded; should be
  128. * either or {@link Zend_Json::TYPE_ARRAY} or
  129. * {@link Zend_Json::TYPE_OBJECT}; defaults to TYPE_ARRAY
  130. * @return mixed
  131. * @throws Zend_Json_Exception
  132. */
  133. public static function decode($source = null, $objectDecodeType = Zend_Json::TYPE_ARRAY)
  134. {
  135. if (null === $source) {
  136. //$1 'Zend/Json/Exception.php';
  137. throw new Zend_Json_Exception('Must specify JSON encoded source for decoding');
  138. } elseif (!is_string($source)) {
  139. //$1 'Zend/Json/Exception.php';
  140. throw new Zend_Json_Exception('Can only decode JSON encoded strings');
  141. }
  142. $decoder = new self($source, $objectDecodeType);
  143. return $decoder->_decodeValue();
  144. }
  145. /**
  146. * Recursive driving rountine for supported toplevel tops
  147. *
  148. * @return mixed
  149. */
  150. protected function _decodeValue()
  151. {
  152. switch ($this->_token) {
  153. case self::DATUM:
  154. $result = $this->_tokenValue;
  155. $this->_getNextToken();
  156. return($result);
  157. break;
  158. case self::LBRACE:
  159. return($this->_decodeObject());
  160. break;
  161. case self::LBRACKET:
  162. return($this->_decodeArray());
  163. break;
  164. default:
  165. return null;
  166. break;
  167. }
  168. }
  169. /**
  170. * Decodes an object of the form:
  171. * { "attribute: value, "attribute2" : value,...}
  172. *
  173. * If Zend_Json_Encoder was used to encode the original object then
  174. * a special attribute called __className which specifies a class
  175. * name that should wrap the data contained within the encoded source.
  176. *
  177. * Decodes to either an array or StdClass object, based on the value of
  178. * {@link $_decodeType}. If invalid $_decodeType present, returns as an
  179. * array.
  180. *
  181. * @return array|StdClass
  182. */
  183. protected function _decodeObject()
  184. {
  185. $members = array();
  186. $tok = $this->_getNextToken();
  187. while ($tok && $tok != self::RBRACE) {
  188. if ($tok != self::DATUM || ! is_string($this->_tokenValue)) {
  189. //$1 'Zend/Json/Exception.php';
  190. throw new Zend_Json_Exception('Missing key in object encoding: ' . $this->_source);
  191. }
  192. $key = $this->_tokenValue;
  193. $tok = $this->_getNextToken();
  194. if ($tok != self::COLON) {
  195. //$1 'Zend/Json/Exception.php';
  196. throw new Zend_Json_Exception('Missing ":" in object encoding: ' . $this->_source);
  197. }
  198. $tok = $this->_getNextToken();
  199. $members[$key] = $this->_decodeValue();
  200. $tok = $this->_token;
  201. if ($tok == self::RBRACE) {
  202. break;
  203. }
  204. if ($tok != self::COMMA) {
  205. //$1 'Zend/Json/Exception.php';
  206. throw new Zend_Json_Exception('Missing "," in object encoding: ' . $this->_source);
  207. }
  208. $tok = $this->_getNextToken();
  209. }
  210. switch ($this->_decodeType) {
  211. case Zend_Json::TYPE_OBJECT:
  212. // Create new StdClass and populate with $members
  213. $result = new StdClass();
  214. foreach ($members as $key => $value) {
  215. $result->$key = $value;
  216. }
  217. break;
  218. case Zend_Json::TYPE_ARRAY:
  219. default:
  220. $result = $members;
  221. break;
  222. }
  223. $this->_getNextToken();
  224. return $result;
  225. }
  226. /**
  227. * Decodes a JSON array format:
  228. * [element, element2,...,elementN]
  229. *
  230. * @return array
  231. */
  232. protected function _decodeArray()
  233. {
  234. $result = array();
  235. $starttok = $tok = $this->_getNextToken(); // Move past the '['
  236. $index = 0;
  237. while ($tok && $tok != self::RBRACKET) {
  238. $result[$index++] = $this->_decodeValue();
  239. $tok = $this->_token;
  240. if ($tok == self::RBRACKET || !$tok) {
  241. break;
  242. }
  243. if ($tok != self::COMMA) {
  244. //$1 'Zend/Json/Exception.php';
  245. throw new Zend_Json_Exception('Missing "," in array encoding: ' . $this->_source);
  246. }
  247. $tok = $this->_getNextToken();
  248. }
  249. $this->_getNextToken();
  250. return($result);
  251. }
  252. /**
  253. * Removes whitepsace characters from the source input
  254. */
  255. protected function _eatWhitespace()
  256. {
  257. if (preg_match(
  258. '/([\t\b\f\n\r ])*/s',
  259. $this->_source,
  260. $matches,
  261. PREG_OFFSET_CAPTURE,
  262. $this->_offset)
  263. && $matches[0][1] == $this->_offset)
  264. {
  265. $this->_offset += strlen($matches[0][0]);
  266. }
  267. }
  268. /**
  269. * Retrieves the next token from the source stream
  270. *
  271. * @return int Token constant value specified in class definition
  272. */
  273. protected function _getNextToken()
  274. {
  275. $this->_token = self::EOF;
  276. $this->_tokenValue = null;
  277. $this->_eatWhitespace();
  278. if ($this->_offset >= $this->_sourceLength) {
  279. return(self::EOF);
  280. }
  281. $str = $this->_source;
  282. $str_length = $this->_sourceLength;
  283. $i = $this->_offset;
  284. $start = $i;
  285. switch ($str{$i}) {
  286. case '{':
  287. $this->_token = self::LBRACE;
  288. break;
  289. case '}':
  290. $this->_token = self::RBRACE;
  291. break;
  292. case '[':
  293. $this->_token = self::LBRACKET;
  294. break;
  295. case ']':
  296. $this->_token = self::RBRACKET;
  297. break;
  298. case ',':
  299. $this->_token = self::COMMA;
  300. break;
  301. case ':':
  302. $this->_token = self::COLON;
  303. break;
  304. case '"':
  305. $result = '';
  306. do {
  307. $i++;
  308. if ($i >= $str_length) {
  309. break;
  310. }
  311. $chr = $str{$i};
  312. if ($chr == '\\') {
  313. $i++;
  314. if ($i >= $str_length) {
  315. break;
  316. }
  317. $chr = $str{$i};
  318. switch ($chr) {
  319. case '"' :
  320. $result .= '"';
  321. break;
  322. case '\\':
  323. $result .= '\\';
  324. break;
  325. case '/' :
  326. $result .= '/';
  327. break;
  328. case 'b' :
  329. $result .= chr(8);
  330. break;
  331. case 'f' :
  332. $result .= chr(12);
  333. break;
  334. case 'n' :
  335. $result .= chr(10);
  336. break;
  337. case 'r' :
  338. $result .= chr(13);
  339. break;
  340. case 't' :
  341. $result .= chr(9);
  342. break;
  343. case '\'' :
  344. $result .= '\'';
  345. break;
  346. default:
  347. //$1 'Zend/Json/Exception.php';
  348. throw new Zend_Json_Exception("Illegal escape "
  349. . "sequence '" . $chr . "'");
  350. }
  351. } elseif($chr == '"') {
  352. break;
  353. } else {
  354. $result .= $chr;
  355. }
  356. } while ($i < $str_length);
  357. $this->_token = self::DATUM;
  358. //$this->_tokenValue = substr($str, $start + 1, $i - $start - 1);
  359. $this->_tokenValue = $result;
  360. break;
  361. case 't':
  362. if (($i+ 3) < $str_length && substr($str, $start, 4) == "true") {
  363. $this->_token = self::DATUM;
  364. }
  365. $this->_tokenValue = true;
  366. $i += 3;
  367. break;
  368. case 'f':
  369. if (($i+ 4) < $str_length && substr($str, $start, 5) == "false") {
  370. $this->_token = self::DATUM;
  371. }
  372. $this->_tokenValue = false;
  373. $i += 4;
  374. break;
  375. case 'n':
  376. if (($i+ 3) < $str_length && substr($str, $start, 4) == "null") {
  377. $this->_token = self::DATUM;
  378. }
  379. $this->_tokenValue = NULL;
  380. $i += 3;
  381. break;
  382. }
  383. if ($this->_token != self::EOF) {
  384. $this->_offset = $i + 1; // Consume the last token character
  385. return($this->_token);
  386. }
  387. $chr = $str{$i};
  388. if ($chr == '-' || $chr == '.' || ($chr >= '0' && $chr <= '9')) {
  389. if (preg_match('/-?([0-9])*(\.[0-9]*)?((e|E)((-|\+)?)[0-9]+)?/s',
  390. $str, $matches, PREG_OFFSET_CAPTURE, $start) && $matches[0][1] == $start) {
  391. $datum = $matches[0][0];
  392. if (is_numeric($datum)) {
  393. if (preg_match('/^0\d+$/', $datum)) {
  394. //$1 'Zend/Json/Exception.php';
  395. throw new Zend_Json_Exception("Octal notation not supported by JSON (value: $datum)");
  396. } else {
  397. $val = intval($datum);
  398. $fVal = floatval($datum);
  399. $this->_tokenValue = ($val == $fVal ? $val : $fVal);
  400. }
  401. } else {
  402. //$1 'Zend/Json/Exception.php';
  403. throw new Zend_Json_Exception("Illegal number format: $datum");
  404. }
  405. $this->_token = self::DATUM;
  406. $this->_offset = $start + strlen($datum);
  407. }
  408. } else {
  409. //$1 'Zend/Json/Exception.php';
  410. throw new Zend_Json_Exception('Illegal Token');
  411. }
  412. return($this->_token);
  413. }
  414. /**
  415. * Decode Unicode Characters from \u0000 ASCII syntax.
  416. *
  417. * This algorithm was originally developed for the
  418. * Solar Framework by Paul M. Jones
  419. *
  420. * @link http://solarphp.com/
  421. * @link http://svn.solarphp.com/core/trunk/Solar/Json.php
  422. * @param string $value
  423. * @return string
  424. */
  425. public static function decodeUnicodeString($chrs)
  426. {
  427. $delim = substr($chrs, 0, 1);
  428. $utf8 = '';
  429. $strlen_chrs = strlen($chrs);
  430. for($i = 0; $i < $strlen_chrs; $i++) {
  431. $substr_chrs_c_2 = substr($chrs, $i, 2);
  432. $ord_chrs_c = ord($chrs[$i]);
  433. switch (true) {
  434. case preg_match('/\\\u[0-9A-F]{4}/i', substr($chrs, $i, 6)):
  435. // single, escaped unicode character
  436. $utf16 = chr(hexdec(substr($chrs, ($i + 2), 2)))
  437. . chr(hexdec(substr($chrs, ($i + 4), 2)));
  438. $utf8 .= self::_utf162utf8($utf16);
  439. $i += 5;
  440. break;
  441. case ($ord_chrs_c >= 0x20) && ($ord_chrs_c <= 0x7F):
  442. $utf8 .= $chrs{$i};
  443. break;
  444. case ($ord_chrs_c & 0xE0) == 0xC0:
  445. // characters U-00000080 - U-000007FF, mask 110XXXXX
  446. //see http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8
  447. $utf8 .= substr($chrs, $i, 2);
  448. ++$i;
  449. break;
  450. case ($ord_chrs_c & 0xF0) == 0xE0:
  451. // characters U-00000800 - U-0000FFFF, mask 1110XXXX
  452. // see http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8
  453. $utf8 .= substr($chrs, $i, 3);
  454. $i += 2;
  455. break;
  456. case ($ord_chrs_c & 0xF8) == 0xF0:
  457. // characters U-00010000 - U-001FFFFF, mask 11110XXX
  458. // see http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8
  459. $utf8 .= substr($chrs, $i, 4);
  460. $i += 3;
  461. break;
  462. case ($ord_chrs_c & 0xFC) == 0xF8:
  463. // characters U-00200000 - U-03FFFFFF, mask 111110XX
  464. // see http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8
  465. $utf8 .= substr($chrs, $i, 5);
  466. $i += 4;
  467. break;
  468. case ($ord_chrs_c & 0xFE) == 0xFC:
  469. // characters U-04000000 - U-7FFFFFFF, mask 1111110X
  470. // see http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8
  471. $utf8 .= substr($chrs, $i, 6);
  472. $i += 5;
  473. break;
  474. }
  475. }
  476. return $utf8;
  477. }
  478. /**
  479. * Convert a string from one UTF-16 char to one UTF-8 char.
  480. *
  481. * Normally should be handled by mb_convert_encoding, but
  482. * provides a slower PHP-only method for installations
  483. * that lack the multibye string extension.
  484. *
  485. * This method is from the Solar Framework by Paul M. Jones
  486. *
  487. * @link http://solarphp.com
  488. * @param string $utf16 UTF-16 character
  489. * @return string UTF-8 character
  490. */
  491. protected static function _utf162utf8($utf16)
  492. {
  493. // Check for mb extension otherwise do by hand.
  494. if( function_exists('mb_convert_encoding') ) {
  495. return mb_convert_encoding($utf16, 'UTF-8', 'UTF-16');
  496. }
  497. $bytes = (ord($utf16{0}) << 8) | ord($utf16{1});
  498. switch (true) {
  499. case ((0x7F & $bytes) == $bytes):
  500. // this case should never be reached, because we are in ASCII range
  501. // see: http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8
  502. return chr(0x7F & $bytes);
  503. case (0x07FF & $bytes) == $bytes:
  504. // return a 2-byte UTF-8 character
  505. // see: http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8
  506. return chr(0xC0 | (($bytes >> 6) & 0x1F))
  507. . chr(0x80 | ($bytes & 0x3F));
  508. case (0xFFFF & $bytes) == $bytes:
  509. // return a 3-byte UTF-8 character
  510. // see: http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8
  511. return chr(0xE0 | (($bytes >> 12) & 0x0F))
  512. . chr(0x80 | (($bytes >> 6) & 0x3F))
  513. . chr(0x80 | ($bytes & 0x3F));
  514. }
  515. // ignoring UTF-32 for now, sorry
  516. return '';
  517. }
  518. }