PageRenderTime 58ms CodeModel.GetById 30ms RepoModel.GetById 0ms app.codeStats 1ms

/vendor/ZF2/library/Zend/Json/Decoder.php

https://github.com/XataWork/zf2-project
PHP | 557 lines | 398 code | 41 blank | 118 comment | 33 complexity | af35eb7a105057525d64bb1f76fab06d MD5 | raw file
Possible License(s): BSD-3-Clause
  1. <?php
  2. /**
  3. * Zend Framework (http://framework.zend.com/)
  4. *
  5. * @link http://github.com/zendframework/zf2 for the canonical source repository
  6. * @copyright Copyright (c) 2005-2014 Zend Technologies USA Inc. (http://www.zend.com)
  7. * @license http://framework.zend.com/license/new-bsd New BSD License
  8. */
  9. namespace Zend\Json;
  10. use stdClass;
  11. use Zend\Json\Exception\InvalidArgumentException;
  12. use Zend\Json\Exception\RuntimeException;
  13. /**
  14. * Decode JSON encoded string to PHP variable constructs
  15. */
  16. class Decoder
  17. {
  18. /**
  19. * Parse tokens used to decode the JSON object. These are not
  20. * for public consumption, they are just used internally to the
  21. * class.
  22. */
  23. const EOF = 0;
  24. const DATUM = 1;
  25. const LBRACE = 2;
  26. const LBRACKET = 3;
  27. const RBRACE = 4;
  28. const RBRACKET = 5;
  29. const COMMA = 6;
  30. const COLON = 7;
  31. /**
  32. * Use to maintain a "pointer" to the source being decoded
  33. *
  34. * @var string
  35. */
  36. protected $source;
  37. /**
  38. * Caches the source length
  39. *
  40. * @var int
  41. */
  42. protected $sourceLength;
  43. /**
  44. * The offset within the source being decoded
  45. *
  46. * @var int
  47. *
  48. */
  49. protected $offset;
  50. /**
  51. * The current token being considered in the parser cycle
  52. *
  53. * @var int
  54. */
  55. protected $token;
  56. /**
  57. * Flag indicating how objects should be decoded
  58. *
  59. * @var int
  60. * @access protected
  61. */
  62. protected $decodeType;
  63. /**
  64. * @var $_tokenValue
  65. */
  66. protected $tokenValue;
  67. /**
  68. * Constructor
  69. *
  70. * @param string $source String source to decode
  71. * @param int $decodeType How objects should be decoded -- see
  72. * {@link Zend\Json\Json::TYPE_ARRAY} and {@link Zend\Json\Json::TYPE_OBJECT} for
  73. * valid values
  74. * @throws InvalidArgumentException
  75. */
  76. protected function __construct($source, $decodeType)
  77. {
  78. // Set defaults
  79. $this->source = self::decodeUnicodeString($source);
  80. $this->sourceLength = strlen($this->source);
  81. $this->token = self::EOF;
  82. $this->offset = 0;
  83. switch ($decodeType) {
  84. case Json::TYPE_ARRAY:
  85. case Json::TYPE_OBJECT:
  86. $this->decodeType = $decodeType;
  87. break;
  88. default:
  89. throw new InvalidArgumentException("Unknown decode type '{$decodeType}', please use one of the constants Json::TYPE_*");
  90. }
  91. // Set pointer at first token
  92. $this->_getNextToken();
  93. }
  94. /**
  95. * Decode a JSON source string
  96. *
  97. * Decodes a JSON encoded string. The value returned will be one of the
  98. * following:
  99. * - integer
  100. * - float
  101. * - boolean
  102. * - null
  103. * - stdClass
  104. * - array
  105. * - array of one or more of the above types
  106. *
  107. * By default, decoded objects will be returned as associative arrays; to
  108. * return a stdClass object instead, pass {@link Zend\Json\Json::TYPE_OBJECT} to
  109. * the $objectDecodeType parameter.
  110. *
  111. * @static
  112. * @access public
  113. * @param string $source String to be decoded
  114. * @param int $objectDecodeType How objects should be decoded; should be
  115. * either or {@link Zend\Json\Json::TYPE_ARRAY} or
  116. * {@link Zend\Json\Json::TYPE_OBJECT}; defaults to TYPE_ARRAY
  117. * @return mixed
  118. */
  119. public static function decode($source, $objectDecodeType = Json::TYPE_OBJECT)
  120. {
  121. $decoder = new static($source, $objectDecodeType);
  122. return $decoder->_decodeValue();
  123. }
  124. /**
  125. * Recursive driving routine for supported toplevel tops
  126. *
  127. * @return mixed
  128. */
  129. protected function _decodeValue()
  130. {
  131. switch ($this->token) {
  132. case self::DATUM:
  133. $result = $this->tokenValue;
  134. $this->_getNextToken();
  135. return($result);
  136. break;
  137. case self::LBRACE:
  138. return($this->_decodeObject());
  139. break;
  140. case self::LBRACKET:
  141. return($this->_decodeArray());
  142. break;
  143. default:
  144. return null;
  145. break;
  146. }
  147. }
  148. /**
  149. * Decodes an object of the form:
  150. * { "attribute: value, "attribute2" : value,...}
  151. *
  152. * If Zend\Json\Encoder was used to encode the original object then
  153. * a special attribute called __className which specifies a class
  154. * name that should wrap the data contained within the encoded source.
  155. *
  156. * Decodes to either an array or stdClass object, based on the value of
  157. * {@link $decodeType}. If invalid $decodeType present, returns as an
  158. * array.
  159. *
  160. * @return array|stdClass
  161. * @throws RuntimeException
  162. */
  163. protected function _decodeObject()
  164. {
  165. $members = array();
  166. $tok = $this->_getNextToken();
  167. while ($tok && $tok != self::RBRACE) {
  168. if ($tok != self::DATUM || ! is_string($this->tokenValue)) {
  169. throw new RuntimeException('Missing key in object encoding: ' . $this->source);
  170. }
  171. $key = $this->tokenValue;
  172. $tok = $this->_getNextToken();
  173. if ($tok != self::COLON) {
  174. throw new RuntimeException('Missing ":" in object encoding: ' . $this->source);
  175. }
  176. $tok = $this->_getNextToken();
  177. $members[$key] = $this->_decodeValue();
  178. $tok = $this->token;
  179. if ($tok == self::RBRACE) {
  180. break;
  181. }
  182. if ($tok != self::COMMA) {
  183. throw new RuntimeException('Missing "," in object encoding: ' . $this->source);
  184. }
  185. $tok = $this->_getNextToken();
  186. }
  187. switch ($this->decodeType) {
  188. case Json::TYPE_OBJECT:
  189. // Create new stdClass and populate with $members
  190. $result = new stdClass();
  191. foreach ($members as $key => $value) {
  192. if ($key === '') {
  193. $key = '_empty_';
  194. }
  195. $result->$key = $value;
  196. }
  197. break;
  198. case Json::TYPE_ARRAY:
  199. default:
  200. $result = $members;
  201. break;
  202. }
  203. $this->_getNextToken();
  204. return $result;
  205. }
  206. /**
  207. * Decodes a JSON array format:
  208. * [element, element2,...,elementN]
  209. *
  210. * @return array
  211. * @throws RuntimeException
  212. */
  213. protected function _decodeArray()
  214. {
  215. $result = array();
  216. $tok = $this->_getNextToken(); // Move past the '['
  217. $index = 0;
  218. while ($tok && $tok != self::RBRACKET) {
  219. $result[$index++] = $this->_decodeValue();
  220. $tok = $this->token;
  221. if ($tok == self::RBRACKET || !$tok) {
  222. break;
  223. }
  224. if ($tok != self::COMMA) {
  225. throw new RuntimeException('Missing "," in array encoding: ' . $this->source);
  226. }
  227. $tok = $this->_getNextToken();
  228. }
  229. $this->_getNextToken();
  230. return $result;
  231. }
  232. /**
  233. * Removes whitespace characters from the source input
  234. */
  235. protected function _eatWhitespace()
  236. {
  237. if (preg_match(
  238. '/([\t\b\f\n\r ])*/s',
  239. $this->source,
  240. $matches,
  241. PREG_OFFSET_CAPTURE,
  242. $this->offset)
  243. && $matches[0][1] == $this->offset)
  244. {
  245. $this->offset += strlen($matches[0][0]);
  246. }
  247. }
  248. /**
  249. * Retrieves the next token from the source stream
  250. *
  251. * @return int Token constant value specified in class definition
  252. * @throws RuntimeException
  253. */
  254. protected function _getNextToken()
  255. {
  256. $this->token = self::EOF;
  257. $this->tokenValue = null;
  258. $this->_eatWhitespace();
  259. if ($this->offset >= $this->sourceLength) {
  260. return(self::EOF);
  261. }
  262. $str = $this->source;
  263. $strLength = $this->sourceLength;
  264. $i = $this->offset;
  265. $start = $i;
  266. switch ($str{$i}) {
  267. case '{':
  268. $this->token = self::LBRACE;
  269. break;
  270. case '}':
  271. $this->token = self::RBRACE;
  272. break;
  273. case '[':
  274. $this->token = self::LBRACKET;
  275. break;
  276. case ']':
  277. $this->token = self::RBRACKET;
  278. break;
  279. case ',':
  280. $this->token = self::COMMA;
  281. break;
  282. case ':':
  283. $this->token = self::COLON;
  284. break;
  285. case '"':
  286. $result = '';
  287. do {
  288. $i++;
  289. if ($i >= $strLength) {
  290. break;
  291. }
  292. $chr = $str{$i};
  293. if ($chr == '\\') {
  294. $i++;
  295. if ($i >= $strLength) {
  296. break;
  297. }
  298. $chr = $str{$i};
  299. switch ($chr) {
  300. case '"' :
  301. $result .= '"';
  302. break;
  303. case '\\':
  304. $result .= '\\';
  305. break;
  306. case '/' :
  307. $result .= '/';
  308. break;
  309. case 'b' :
  310. $result .= "\x08";
  311. break;
  312. case 'f' :
  313. $result .= "\x0c";
  314. break;
  315. case 'n' :
  316. $result .= "\x0a";
  317. break;
  318. case 'r' :
  319. $result .= "\x0d";
  320. break;
  321. case 't' :
  322. $result .= "\x09";
  323. break;
  324. case '\'' :
  325. $result .= '\'';
  326. break;
  327. default:
  328. throw new RuntimeException("Illegal escape sequence '{$chr}'");
  329. }
  330. } elseif ($chr == '"') {
  331. break;
  332. } else {
  333. $result .= $chr;
  334. }
  335. } while ($i < $strLength);
  336. $this->token = self::DATUM;
  337. //$this->tokenValue = substr($str, $start + 1, $i - $start - 1);
  338. $this->tokenValue = $result;
  339. break;
  340. case 't':
  341. if (($i+ 3) < $strLength && substr($str, $start, 4) == "true") {
  342. $this->token = self::DATUM;
  343. }
  344. $this->tokenValue = true;
  345. $i += 3;
  346. break;
  347. case 'f':
  348. if (($i+ 4) < $strLength && substr($str, $start, 5) == "false") {
  349. $this->token = self::DATUM;
  350. }
  351. $this->tokenValue = false;
  352. $i += 4;
  353. break;
  354. case 'n':
  355. if (($i+ 3) < $strLength && substr($str, $start, 4) == "null") {
  356. $this->token = self::DATUM;
  357. }
  358. $this->tokenValue = NULL;
  359. $i += 3;
  360. break;
  361. }
  362. if ($this->token != self::EOF) {
  363. $this->offset = $i + 1; // Consume the last token character
  364. return($this->token);
  365. }
  366. $chr = $str{$i};
  367. if ($chr == '-' || $chr == '.' || ($chr >= '0' && $chr <= '9')) {
  368. if (preg_match('/-?([0-9])*(\.[0-9]*)?((e|E)((-|\+)?)[0-9]+)?/s',
  369. $str, $matches, PREG_OFFSET_CAPTURE, $start) && $matches[0][1] == $start) {
  370. $datum = $matches[0][0];
  371. if (is_numeric($datum)) {
  372. if (preg_match('/^0\d+$/', $datum)) {
  373. throw new RuntimeException("Octal notation not supported by JSON (value: {$datum})");
  374. } else {
  375. $val = intval($datum);
  376. $fVal = floatval($datum);
  377. $this->tokenValue = ($val == $fVal ? $val : $fVal);
  378. }
  379. } else {
  380. throw new RuntimeException("Illegal number format: {$datum}");
  381. }
  382. $this->token = self::DATUM;
  383. $this->offset = $start + strlen($datum);
  384. }
  385. } else {
  386. throw new RuntimeException('Illegal Token');
  387. }
  388. return $this->token;
  389. }
  390. /**
  391. * Decode Unicode Characters from \u0000 ASCII syntax.
  392. *
  393. * This algorithm was originally developed for the
  394. * Solar Framework by Paul M. Jones
  395. *
  396. * @link http://solarphp.com/
  397. * @link https://github.com/solarphp/core/blob/master/Solar/Json.php
  398. * @param string $chrs
  399. * @return string
  400. */
  401. public static function decodeUnicodeString($chrs)
  402. {
  403. $chrs = (string) $chrs;
  404. $utf8 = '';
  405. $strlenChrs = strlen($chrs);
  406. for ($i = 0; $i < $strlenChrs; $i++) {
  407. $ordChrsC = ord($chrs[$i]);
  408. switch (true) {
  409. case preg_match('/\\\u[0-9A-F]{4}/i', substr($chrs, $i, 6)):
  410. // single, escaped unicode character
  411. $utf16 = chr(hexdec(substr($chrs, ($i + 2), 2)))
  412. . chr(hexdec(substr($chrs, ($i + 4), 2)));
  413. $utf8char = self::_utf162utf8($utf16);
  414. $search = array('\\', "\n", "\t", "\r", chr(0x08), chr(0x0C), '"', '\'', '/');
  415. if (in_array($utf8char, $search)) {
  416. $replace = array('\\\\', '\\n', '\\t', '\\r', '\\b', '\\f', '\\"', '\\\'', '\\/');
  417. $utf8char = str_replace($search, $replace, $utf8char);
  418. }
  419. $utf8 .= $utf8char;
  420. $i += 5;
  421. break;
  422. case ($ordChrsC >= 0x20) && ($ordChrsC <= 0x7F):
  423. $utf8 .= $chrs{$i};
  424. break;
  425. case ($ordChrsC & 0xE0) == 0xC0:
  426. // characters U-00000080 - U-000007FF, mask 110XXXXX
  427. //see http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8
  428. $utf8 .= substr($chrs, $i, 2);
  429. ++$i;
  430. break;
  431. case ($ordChrsC & 0xF0) == 0xE0:
  432. // characters U-00000800 - U-0000FFFF, mask 1110XXXX
  433. // see http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8
  434. $utf8 .= substr($chrs, $i, 3);
  435. $i += 2;
  436. break;
  437. case ($ordChrsC & 0xF8) == 0xF0:
  438. // characters U-00010000 - U-001FFFFF, mask 11110XXX
  439. // see http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8
  440. $utf8 .= substr($chrs, $i, 4);
  441. $i += 3;
  442. break;
  443. case ($ordChrsC & 0xFC) == 0xF8:
  444. // characters U-00200000 - U-03FFFFFF, mask 111110XX
  445. // see http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8
  446. $utf8 .= substr($chrs, $i, 5);
  447. $i += 4;
  448. break;
  449. case ($ordChrsC & 0xFE) == 0xFC:
  450. // characters U-04000000 - U-7FFFFFFF, mask 1111110X
  451. // see http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8
  452. $utf8 .= substr($chrs, $i, 6);
  453. $i += 5;
  454. break;
  455. }
  456. }
  457. return $utf8;
  458. }
  459. /**
  460. * Convert a string from one UTF-16 char to one UTF-8 char.
  461. *
  462. * Normally should be handled by mb_convert_encoding, but
  463. * provides a slower PHP-only method for installations
  464. * that lack the multibyte string extension.
  465. *
  466. * This method is from the Solar Framework by Paul M. Jones
  467. *
  468. * @link http://solarphp.com
  469. * @param string $utf16 UTF-16 character
  470. * @return string UTF-8 character
  471. */
  472. protected static function _utf162utf8($utf16)
  473. {
  474. // Check for mb extension otherwise do by hand.
  475. if (function_exists('mb_convert_encoding')) {
  476. return mb_convert_encoding($utf16, 'UTF-8', 'UTF-16');
  477. }
  478. $bytes = (ord($utf16{0}) << 8) | ord($utf16{1});
  479. switch (true) {
  480. case ((0x7F & $bytes) == $bytes):
  481. // this case should never be reached, because we are in ASCII range
  482. // see: http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8
  483. return chr(0x7F & $bytes);
  484. case (0x07FF & $bytes) == $bytes:
  485. // return a 2-byte UTF-8 character
  486. // see: http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8
  487. return chr(0xC0 | (($bytes >> 6) & 0x1F))
  488. . chr(0x80 | ($bytes & 0x3F));
  489. case (0xFFFF & $bytes) == $bytes:
  490. // return a 3-byte UTF-8 character
  491. // see: http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8
  492. return chr(0xE0 | (($bytes >> 12) & 0x0F))
  493. . chr(0x80 | (($bytes >> 6) & 0x3F))
  494. . chr(0x80 | ($bytes & 0x3F));
  495. }
  496. // ignoring UTF-32 for now, sorry
  497. return '';
  498. }
  499. }