PageRenderTime 54ms CodeModel.GetById 26ms RepoModel.GetById 0ms app.codeStats 0ms

/Genv/Json.php

https://gitlab.com/winiceo/levengit
PHP | 898 lines | 487 code | 109 blank | 302 comment | 116 complexity | 097c60724cbbc2db9963ae3b9eba1958 MD5 | raw file
  1. <?php
  2. /**
  3. *
  4. * Class for wrapping JSON encoding/decoding functionality.
  5. *
  6. * Given that the json extension to PHP will be enabled by default in
  7. * PHP 5.2.0+, Genv_Json allows users to get a jump on JSON encoding and
  8. * decoding early if the native json_* functions are not present.
  9. *
  10. * Genv_Json::encode and Genv_Json::decode functions are designed
  11. * to pass the same unit tests bundled with the native PHP json ext.
  12. *
  13. * Based largely on the Services_JSON package by Michal Migurski, Matt Knapp
  14. * and Brett Stimmerman. See the original code at
  15. * <http://mike.teczno.com/JSON/JSON.phps>
  16. *
  17. * @category Genv
  18. *
  19. * @package Genv_Json JSON data formatting and checking.
  20. *
  21. * @author Michal Migurski <mike-json@teczno.com>
  22. *
  23. * @author Matt Knapp <mdknapp[at]gmail[dot]com>
  24. *
  25. * @author Brett Stimmerman <brettstimmerman[at]gmail[dot]com>
  26. *
  27. * @author Clay Loveless <clay@killersoft.com>
  28. *
  29. * @license http://opensource.org/licenses/bsd-license.php BSD
  30. *
  31. * @version $Id: Json.php 4380 2010-02-14 16:06:52Z pmjones $
  32. *
  33. */
  34. class Genv_Json extends Genv_Base
  35. {
  36. /**
  37. *
  38. * Default configuration values.
  39. *
  40. * @config bool bypass_ext Flag to instruct Genv_Json to bypass
  41. * native json extension, if installed.
  42. *
  43. * @config bool bypass_mb Flag to instruct Genv_Json to bypass
  44. * native mb_convert_encoding() function, if
  45. * installed.
  46. *
  47. * @config bool noerror Flag to instruct Genv_Json to return null
  48. * for values it cannot encode rather than throwing
  49. * an exceptions (PHP-only encoding) or PHP warnings
  50. * (native json_encode() function).
  51. *
  52. * @var array
  53. *
  54. */
  55. protected $_Genv_Json = array(
  56. 'bypass_ext' => false,
  57. 'bypass_mb' => false,
  58. 'noerror' => false
  59. );
  60. /**
  61. *
  62. * Marker constants for use in _json_decode()
  63. *
  64. * @constant
  65. *
  66. */
  67. const SLICE = 1;
  68. const IN_STR = 2;
  69. const IN_ARR = 3;
  70. const IN_OBJ = 4;
  71. const IN_CMT = 5;
  72. /**
  73. *
  74. * Nest level counter for determining correct behavior of decoding string
  75. * representations of numbers and boolean values.
  76. *
  77. * @var int
  78. */
  79. protected $_level;
  80. /**
  81. *
  82. * Encodes the mixed $valueToEncode into JSON format.
  83. *
  84. * @param mixed $valueToEncode Value to be encoded into JSON format
  85. *
  86. * @param array $deQuote Array of keys whose values should **not** be
  87. * quoted in encoded string.
  88. *
  89. * @return string JSON encoded value
  90. *
  91. */
  92. public function encode($valueToEncode, $deQuote = array())
  93. {
  94. if (!$this->_config['bypass_ext'] && function_exists('json_encode')) {
  95. if ($this->_config['noerror']) {
  96. $old_errlevel = error_reporting(E_ERROR ^ E_WARNING);
  97. }
  98. $encoded = json_encode($valueToEncode);
  99. if ($this->_config['noerror']) {
  100. error_reporting($old_errlevel);
  101. }
  102. } else {
  103. // Fall back to PHP-only method
  104. $encoded = $this->_json_encode($valueToEncode);
  105. }
  106. // Sometimes you just don't want some values quoted
  107. if (!empty($deQuote)) {
  108. $encoded = $this->_deQuote($encoded, $deQuote);
  109. }
  110. return $encoded;
  111. }
  112. /**
  113. *
  114. * Accepts a JSON-encoded string, and removes quotes around values of
  115. * keys specified in the $keys array.
  116. *
  117. * Sometimes, such as when constructing behaviors on the fly for "onSuccess"
  118. * handlers to an Ajax request, the value needs to **not** have quotes around
  119. * it. This method will remove those quotes and perform stripslashes on any
  120. * escaped quotes within the quoted value.
  121. *
  122. * @param string $encoded JSON-encoded string
  123. *
  124. * @param array $keys Array of keys whose values should be de-quoted
  125. *
  126. * @return string $encoded Cleaned string
  127. *
  128. */
  129. protected function _deQuote($encoded, $keys)
  130. {
  131. foreach ($keys as $key) {
  132. $pattern = "/(\"".$key."\"\:)(\".*(?:[^\\\]\"))/U";
  133. $encoded = preg_replace_callback(
  134. $pattern,
  135. array($this, '_stripvalueslashes'),
  136. $encoded
  137. );
  138. }
  139. return $encoded;
  140. }
  141. /**
  142. *
  143. * Method for use with preg_replace_callback in the _deQuote() method.
  144. *
  145. * Returns \["keymatch":\]\[value\] where value has had its leading and
  146. * trailing double-quotes removed, and stripslashes() run on the rest of
  147. * the value.
  148. *
  149. * @param array $matches Regexp matches
  150. *
  151. * @return string replacement string
  152. *
  153. */
  154. protected function _stripvalueslashes($matches)
  155. {
  156. return $matches[1].stripslashes(substr($matches[2], 1, -1));
  157. }
  158. /**
  159. *
  160. * Decodes the $encodedValue string which is encoded in the JSON format.
  161. *
  162. * For compatibility with the native json_decode() function, this static
  163. * method accepts the $encodedValue string and an optional boolean value
  164. * $asArray which indicates whether or not the decoded value should be
  165. * returned as an array. The default is false, meaning the default return
  166. * from this method is an object.
  167. *
  168. * For compliance with the [JSON specification][], no attempt is made to
  169. * decode strings that are obviously not an encoded arrays or objects.
  170. *
  171. * [JSON specification]: http://www.ietf.org/rfc/rfc4627.txt
  172. *
  173. * @param string $encodedValue String encoded in JSON format
  174. *
  175. * @param bool $asArray Optional argument to decode as an array.
  176. * Default false.
  177. *
  178. * @return mixed decoded value
  179. *
  180. */
  181. public function decode($encodedValue, $asArray = false)
  182. {
  183. $first_char = substr(ltrim($encodedValue), 0, 1);
  184. if ($first_char != '{' && $first_char != '[') {
  185. return null;
  186. }
  187. if (!$this->_config['bypass_ext'] && function_exists('json_decode')) {
  188. return json_decode($encodedValue, (bool) $asArray);
  189. }
  190. // Fall back to PHP-only method
  191. $this->_level = 0;
  192. $checker = Genv::factory('Genv_Json_Checker');
  193. if ($checker->isValid($encodedValue)) {
  194. return $this->_json_decode($encodedValue, (bool) $asArray);
  195. } else {
  196. return null;
  197. }
  198. }
  199. /**
  200. *
  201. * Encodes the mixed $valueToEncode into the JSON format, without use of
  202. * native PHP json extension.
  203. *
  204. * @param mixed $var Any number, boolean, string, array, or object
  205. * to be encoded. Strings are expected to be in ASCII or UTF-8 format.
  206. *
  207. * @return mixed JSON string representation of input value
  208. *
  209. */
  210. protected function _json_encode($var)
  211. {
  212. switch (gettype($var)) {
  213. case 'boolean':
  214. return $var ? 'true' : 'false';
  215. case 'NULL':
  216. return 'null';
  217. case 'integer':
  218. // BREAK WITH Services_JSON:
  219. // disabled for compatibility with ext/json. ext/json returns
  220. // a string for integers, so we will to.
  221. //return (int) $var;
  222. return (string) $var;
  223. case 'double':
  224. case 'float':
  225. // BREAK WITH Services_JSON:
  226. // disabled for compatibility with ext/json. ext/json returns
  227. // a string for floats and doubles, so we will to.
  228. //return (float) $var;
  229. return (string) $var;
  230. case 'string':
  231. // STRINGS ARE EXPECTED TO BE IN ASCII OR UTF-8 FORMAT
  232. $ascii = '';
  233. $strlen_var = strlen($var);
  234. /**
  235. * Iterate over every character in the string,
  236. * escaping with a slash or encoding to UTF-8 where necessary
  237. */
  238. for ($c = 0; $c < $strlen_var; ++$c) {
  239. $ord_var_c = ord($var{$c});
  240. switch (true) {
  241. case $ord_var_c == 0x08:
  242. $ascii .= '\b';
  243. break;
  244. case $ord_var_c == 0x09:
  245. $ascii .= '\t';
  246. break;
  247. case $ord_var_c == 0x0A:
  248. $ascii .= '\n';
  249. break;
  250. case $ord_var_c == 0x0C:
  251. $ascii .= '\f';
  252. break;
  253. case $ord_var_c == 0x0D:
  254. $ascii .= '\r';
  255. break;
  256. case $ord_var_c == 0x22:
  257. case $ord_var_c == 0x2F:
  258. case $ord_var_c == 0x5C:
  259. // double quote, slash, slosh
  260. $ascii .= '\\'.$var{$c};
  261. break;
  262. case (($ord_var_c >= 0x20) && ($ord_var_c <= 0x7F)):
  263. // characters U-00000000 - U-0000007F (same as ASCII)
  264. $ascii .= $var{$c};
  265. break;
  266. case (($ord_var_c & 0xE0) == 0xC0):
  267. // characters U-00000080 - U-000007FF, mask 110XXXXX
  268. // see http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8
  269. $char = pack('C*', $ord_var_c, ord($var{$c + 1}));
  270. $c += 1;
  271. $utf16 = $this->_utf82utf16($char);
  272. $ascii .= sprintf('\u%04s', bin2hex($utf16));
  273. break;
  274. case (($ord_var_c & 0xF0) == 0xE0):
  275. // characters U-00000800 - U-0000FFFF, mask 1110XXXX
  276. // see http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8
  277. $char = pack('C*', $ord_var_c,
  278. ord($var{$c + 1}),
  279. ord($var{$c + 2}));
  280. $c += 2;
  281. $utf16 = $this->_utf82utf16($char);
  282. $ascii .= sprintf('\u%04s', bin2hex($utf16));
  283. break;
  284. case (($ord_var_c & 0xF8) == 0xF0):
  285. // characters U-00010000 - U-001FFFFF, mask 11110XXX
  286. // see http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8
  287. $char = pack('C*', $ord_var_c,
  288. ord($var{$c + 1}),
  289. ord($var{$c + 2}),
  290. ord($var{$c + 3}));
  291. $c += 3;
  292. $utf16 = $this->_utf82utf16($char);
  293. $ascii .= sprintf('\u%04s', bin2hex($utf16));
  294. break;
  295. case (($ord_var_c & 0xFC) == 0xF8):
  296. // characters U-00200000 - U-03FFFFFF, mask 111110XX
  297. // see http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8
  298. $char = pack('C*', $ord_var_c,
  299. ord($var{$c + 1}),
  300. ord($var{$c + 2}),
  301. ord($var{$c + 3}),
  302. ord($var{$c + 4}));
  303. $c += 4;
  304. $utf16 = $this->_utf82utf16($char);
  305. $ascii .= sprintf('\u%04s', bin2hex($utf16));
  306. break;
  307. case (($ord_var_c & 0xFE) == 0xFC):
  308. // characters U-04000000 - U-7FFFFFFF, mask 1111110X
  309. // see http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8
  310. $char = pack('C*', $ord_var_c,
  311. ord($var{$c + 1}),
  312. ord($var{$c + 2}),
  313. ord($var{$c + 3}),
  314. ord($var{$c + 4}),
  315. ord($var{$c + 5}));
  316. $c += 5;
  317. $utf16 = $this->_utf82utf16($char);
  318. $ascii .= sprintf('\u%04s', bin2hex($utf16));
  319. break;
  320. }
  321. }
  322. return '"'.$ascii.'"';
  323. case 'array':
  324. /**
  325. *
  326. * As per JSON spec if any array key is not an integer
  327. * we must treat the the whole array as an object. We
  328. * also try to catch a sparsely populated associative
  329. * array with numeric keys here because some JS engines
  330. * will create an array with empty indexes up to
  331. * max_index which can cause memory issues and because
  332. * the keys, which may be relevant, will be remapped
  333. * otherwise.
  334. *
  335. * As per the ECMA and JSON specification an object may
  336. * have any string as a property. Unfortunately due to
  337. * a hole in the ECMA specification if the key is a
  338. * ECMA reserved word or starts with a digit the
  339. * parameter is only accessible using ECMAScript's
  340. * bracket notation.
  341. *
  342. */
  343. // treat as a JSON object
  344. if (is_array($var) && count($var) &&
  345. (array_keys($var) !== range(0, sizeof($var) - 1))) {
  346. $properties = array_map(array($this, '_name_value'),
  347. array_keys($var),
  348. array_values($var));
  349. return '{' . join(',', $properties) . '}';
  350. }
  351. // treat it like a regular array
  352. $elements = array_map(array($this, '_json_encode'), $var);
  353. return '[' . join(',', $elements) . ']';
  354. case 'object':
  355. $vars = get_object_vars($var);
  356. $properties = array_map(array($this, '_name_value'),
  357. array_keys($vars),
  358. array_values($vars));
  359. return '{' . join(',', $properties) . '}';
  360. default:
  361. if ($this->_config['noerror']) {
  362. return 'null';
  363. }
  364. throw Genv::exception(
  365. 'Genv_Json',
  366. 'ERR_CANNOT_ENCODE',
  367. gettype($var) . ' cannot be encoded as a JSON string',
  368. array('var' => $var)
  369. );
  370. }
  371. }
  372. /**
  373. *
  374. * Decodes a JSON string into appropriate variable.
  375. *
  376. * Note: several changes were made in translating this method from
  377. * Services_JSON, particularly related to how strings are handled. According
  378. * to JSON_checker test suite from <http://www.json.org/JSON_checker/>,
  379. * a JSON payload should be an object or an array, not a string.
  380. *
  381. * Therefore, returning bool(true) for 'true' is invalid JSON decoding
  382. * behavior, unless nested inside of an array or object.
  383. *
  384. * Similarly, a string of '1' should return null, not int(1), unless
  385. * nested inside of an array or object.
  386. *
  387. * @param string $str String encoded in JSON format
  388. *
  389. * @param bool $asArray Optional argument to decode as an array.
  390. *
  391. * @return mixed decoded value
  392. *
  393. * @todo Rewrite this based off of method used in Genv_Json_Checker
  394. *
  395. */
  396. protected function _json_decode($str, $asArray = false)
  397. {
  398. $str = $this->_reduce_string($str);
  399. switch (strtolower($str)) {
  400. case 'true':
  401. // JSON_checker test suite claims
  402. // "A JSON payload should be an object or array, not a string."
  403. // Thus, returning bool(true) is invalid parsing, unless
  404. // we're nested inside an array or object.
  405. if (in_array($this->_level, array(self::IN_ARR, self::IN_OBJ))) {
  406. return true;
  407. } else {
  408. return null;
  409. }
  410. break;
  411. case 'false':
  412. // JSON_checker test suite claims
  413. // "A JSON payload should be an object or array, not a string."
  414. // Thus, returning bool(false) is invalid parsing, unless
  415. // we're nested inside an array or object.
  416. if (in_array($this->_level, array(self::IN_ARR, self::IN_OBJ))) {
  417. return false;
  418. } else {
  419. return null;
  420. }
  421. break;
  422. case 'null':
  423. return null;
  424. default:
  425. $m = array();
  426. if (is_numeric($str) || ctype_digit($str) || ctype_xdigit($str)) {
  427. // Return float or int, or null as appropriate
  428. if (in_array($this->_level, array(self::IN_ARR, self::IN_OBJ))) {
  429. return ((float) $str == (integer) $str)
  430. ? (integer) $str
  431. : (float) $str;
  432. } else {
  433. return null;
  434. }
  435. break;
  436. } elseif (preg_match('/^("|\').*(\1)$/s', $str, $m)
  437. && $m[1] == $m[2]) {
  438. // STRINGS RETURNED IN UTF-8 FORMAT
  439. $delim = substr($str, 0, 1);
  440. $chrs = substr($str, 1, -1);
  441. $utf8 = '';
  442. $strlen_chrs = strlen($chrs);
  443. for ($c = 0; $c < $strlen_chrs; ++$c) {
  444. $substr_chrs_c_2 = substr($chrs, $c, 2);
  445. $ord_chrs_c = ord($chrs{$c});
  446. switch (true) {
  447. case $substr_chrs_c_2 == '\b':
  448. $utf8 .= chr(0x08);
  449. ++$c;
  450. break;
  451. case $substr_chrs_c_2 == '\t':
  452. $utf8 .= chr(0x09);
  453. ++$c;
  454. break;
  455. case $substr_chrs_c_2 == '\n':
  456. $utf8 .= chr(0x0A);
  457. ++$c;
  458. break;
  459. case $substr_chrs_c_2 == '\f':
  460. $utf8 .= chr(0x0C);
  461. ++$c;
  462. break;
  463. case $substr_chrs_c_2 == '\r':
  464. $utf8 .= chr(0x0D);
  465. ++$c;
  466. break;
  467. case $substr_chrs_c_2 == '\\"':
  468. case $substr_chrs_c_2 == '\\\'':
  469. case $substr_chrs_c_2 == '\\\\':
  470. case $substr_chrs_c_2 == '\\/':
  471. if (($delim == '"' && $substr_chrs_c_2 != '\\\'') ||
  472. ($delim == "'" && $substr_chrs_c_2 != '\\"')) {
  473. $utf8 .= $chrs{++$c};
  474. }
  475. break;
  476. case preg_match('/\\\u[0-9A-F]{4}/i', substr($chrs, $c, 6)):
  477. // single, escaped unicode character
  478. $utf16 = chr(hexdec(substr($chrs, ($c + 2), 2)))
  479. . chr(hexdec(substr($chrs, ($c + 4), 2)));
  480. $utf8 .= $this->_utf162utf8($utf16);
  481. $c += 5;
  482. break;
  483. case ($ord_chrs_c >= 0x20) && ($ord_chrs_c <= 0x7F):
  484. $utf8 .= $chrs{$c};
  485. break;
  486. case ($ord_chrs_c & 0xE0) == 0xC0:
  487. // characters U-00000080 - U-000007FF, mask 110XXXXX
  488. //see http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8
  489. $utf8 .= substr($chrs, $c, 2);
  490. ++$c;
  491. break;
  492. case ($ord_chrs_c & 0xF0) == 0xE0:
  493. // characters U-00000800 - U-0000FFFF, mask 1110XXXX
  494. // see http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8
  495. $utf8 .= substr($chrs, $c, 3);
  496. $c += 2;
  497. break;
  498. case ($ord_chrs_c & 0xF8) == 0xF0:
  499. // characters U-00010000 - U-001FFFFF, mask 11110XXX
  500. // see http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8
  501. $utf8 .= substr($chrs, $c, 4);
  502. $c += 3;
  503. break;
  504. case ($ord_chrs_c & 0xFC) == 0xF8:
  505. // characters U-00200000 - U-03FFFFFF, mask 111110XX
  506. // see http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8
  507. $utf8 .= substr($chrs, $c, 5);
  508. $c += 4;
  509. break;
  510. case ($ord_chrs_c & 0xFE) == 0xFC:
  511. // characters U-04000000 - U-7FFFFFFF, mask 1111110X
  512. // see http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8
  513. $utf8 .= substr($chrs, $c, 6);
  514. $c += 5;
  515. break;
  516. }
  517. }
  518. if (in_array($this->_level, array(self::IN_ARR, self::IN_OBJ))) {
  519. return $utf8;
  520. } else {
  521. return null;
  522. }
  523. } elseif (preg_match('/^\[.*\]$/s', $str) || preg_match('/^\{.*\}$/s', $str)) {
  524. // array, or object notation
  525. if ($str{0} == '[') {
  526. $stk = array(self::IN_ARR);
  527. $this->_level = self::IN_ARR;
  528. $arr = array();
  529. } else {
  530. if ($asArray) {
  531. $stk = array(self::IN_OBJ);
  532. $obj = array();
  533. } else {
  534. $stk = array(self::IN_OBJ);
  535. $obj = new stdClass();
  536. }
  537. $this->_level = self::IN_OBJ;
  538. }
  539. array_push($stk, array('what' => self::SLICE,
  540. 'where' => 0,
  541. 'delim' => false));
  542. $chrs = substr($str, 1, -1);
  543. $chrs = $this->_reduce_string($chrs);
  544. if ($chrs == '') {
  545. if (reset($stk) == self::IN_ARR) {
  546. return $arr;
  547. } else {
  548. return $obj;
  549. }
  550. }
  551. $strlen_chrs = strlen($chrs);
  552. for ($c = 0; $c <= $strlen_chrs; ++$c) {
  553. $top = end($stk);
  554. $substr_chrs_c_2 = substr($chrs, $c, 2);
  555. if (($c == $strlen_chrs) || (($chrs{$c} == ',') && ($top['what'] == self::SLICE))) {
  556. // found a comma that is not inside a string, array, etc.,
  557. // OR we've reached the end of the character list
  558. $slice = substr($chrs, $top['where'], ($c - $top['where']));
  559. array_push($stk, array('what' => self::SLICE, 'where' => ($c + 1), 'delim' => false));
  560. //print("Found split at {$c}: ".substr($chrs, $top['where'], (1 + $c - $top['where']))."\n");
  561. if (reset($stk) == self::IN_ARR) {
  562. $this->_level = self::IN_ARR;
  563. // we are in an array, so just push an element onto the stack
  564. array_push($arr, $this->_json_decode($slice));
  565. } elseif (reset($stk) == self::IN_OBJ) {
  566. $this->_level = self::IN_OBJ;
  567. // we are in an object, so figure
  568. // out the property name and set an
  569. // element in an associative array,
  570. // for now
  571. $parts = array();
  572. if (preg_match('/^\s*(["\'].*[^\\\]["\'])\s*:\s*(\S.*),?$/Uis', $slice, $parts)) {
  573. // "name":value pair
  574. $key = $this->_json_decode($parts[1]);
  575. $val = $this->_json_decode($parts[2]);
  576. if ($asArray) {
  577. $obj[$key] = $val;
  578. } else {
  579. $obj->$key = $val;
  580. }
  581. } elseif (preg_match('/^\s*(\w+)\s*:\s*(\S.*),?$/Uis', $slice, $parts)) {
  582. // name:value pair, where name is unquoted
  583. $key = $parts[1];
  584. $val = $this->_json_decode($parts[2]);
  585. if ($asArray) {
  586. $obj[$key] = $val;
  587. } else {
  588. $obj->$key = $val;
  589. }
  590. } elseif (preg_match('/^\s*(["\']["\'])\s*:\s*(\S.*),?$/Uis', $slice, $parts)) {
  591. // "":value pair
  592. //$key = $this->_json_decode($parts[1]);
  593. // use string that matches ext/json
  594. $key = '_empty_';
  595. $val = $this->_json_decode($parts[2]);
  596. if ($asArray) {
  597. $obj[$key] = $val;
  598. } else {
  599. $obj->$key = $val;
  600. }
  601. }
  602. }
  603. } elseif ((($chrs{$c} == '"') || ($chrs{$c} == "'")) && ($top['what'] != self::IN_STR)) {
  604. // found a quote, and we are not inside a string
  605. array_push($stk, array('what' => self::IN_STR, 'where' => $c, 'delim' => $chrs{$c}));
  606. //print("Found start of string at {$c}\n");
  607. } elseif (($chrs{$c} == $top['delim']) &&
  608. ($top['what'] == self::IN_STR) &&
  609. ((strlen(substr($chrs, 0, $c)) - strlen(rtrim(substr($chrs, 0, $c), '\\'))) % 2 != 1)) {
  610. // found a quote, we're in a string, and it's not escaped
  611. // we know that it's not escaped becase there is _not_ an
  612. // odd number of backslashes at the end of the string so far
  613. array_pop($stk);
  614. //print("Found end of string at {$c}: ".substr($chrs, $top['where'], (1 + 1 + $c - $top['where']))."\n");
  615. } elseif (($chrs{$c} == '[') &&
  616. in_array($top['what'], array(self::SLICE, self::IN_ARR, self::IN_OBJ))) {
  617. // found a left-bracket, and we are in an array, object, or slice
  618. array_push($stk, array('what' => self::IN_ARR, 'where' => $c, 'delim' => false));
  619. //print("Found start of array at {$c}\n");
  620. } elseif (($chrs{$c} == ']') && ($top['what'] == self::IN_ARR)) {
  621. // found a right-bracket, and we're in an array
  622. $this->_level = null;
  623. array_pop($stk);
  624. //print("Found end of array at {$c}: ".substr($chrs, $top['where'], (1 + $c - $top['where']))."\n");
  625. } elseif (($chrs{$c} == '{') &&
  626. in_array($top['what'], array(self::SLICE, self::IN_ARR, self::IN_OBJ))) {
  627. // found a left-brace, and we are in an array, object, or slice
  628. array_push($stk, array('what' => self::IN_OBJ, 'where' => $c, 'delim' => false));
  629. //print("Found start of object at {$c}\n");
  630. } elseif (($chrs{$c} == '}') && ($top['what'] == self::IN_OBJ)) {
  631. // found a right-brace, and we're in an object
  632. $this->_level = null;
  633. array_pop($stk);
  634. //print("Found end of object at {$c}: ".substr($chrs, $top['where'], (1 + $c - $top['where']))."\n");
  635. } elseif (($substr_chrs_c_2 == '/*') &&
  636. in_array($top['what'], array(self::SLICE, self::IN_ARR, self::IN_OBJ))) {
  637. // found a comment start, and we are in an array, object, or slice
  638. array_push($stk, array('what' => self::IN_CMT, 'where' => $c, 'delim' => false));
  639. $c++;
  640. //print("Found start of comment at {$c}\n");
  641. } elseif (($substr_chrs_c_2 == '*/') && ($top['what'] == self::IN_CMT)) {
  642. // found a comment end, and we're in one now
  643. array_pop($stk);
  644. $c++;
  645. for ($i = $top['where']; $i <= $c; ++$i)
  646. $chrs = substr_replace($chrs, ' ', $i, 1);
  647. //print("Found end of comment at {$c}: ".substr($chrs, $top['where'], (1 + $c - $top['where']))."\n");
  648. }
  649. }
  650. if (reset($stk) == self::IN_ARR) {
  651. return $arr;
  652. } elseif (reset($stk) == self::IN_OBJ) {
  653. return $obj;
  654. }
  655. }
  656. }
  657. }
  658. /**
  659. *
  660. * Array-walking method for use in generating JSON-formatted name-value
  661. * pairs in the form of '"name":value'.
  662. *
  663. * @param string $name name of key to use
  664. *
  665. * @param mixed $value element to be encoded
  666. *
  667. * @return string JSON-formatted name-value pair
  668. *
  669. */
  670. protected function _name_value($name, $value)
  671. {
  672. $encoded_value = $this->_json_encode($value);
  673. return $this->_json_encode(strval($name)) . ':' . $encoded_value;
  674. }
  675. /**
  676. *
  677. * Convert a string from one UTF-16 char to one UTF-8 char.
  678. *
  679. * Normally should be handled by mb_convert_encoding, but
  680. * provides a slower PHP-only method for installations
  681. * that lack the multibye string extension.
  682. *
  683. * @param string $utf16 UTF-16 character
  684. *
  685. * @return string UTF-8 character
  686. *
  687. */
  688. protected function _utf162utf8($utf16)
  689. {
  690. // oh please oh please oh please oh please oh please
  691. if(!$this->_config['bypass_mb'] &&
  692. function_exists('mb_convert_encoding')) {
  693. return mb_convert_encoding($utf16, 'UTF-8', 'UTF-16');
  694. }
  695. $bytes = (ord($utf16{0}) << 8) | ord($utf16{1});
  696. switch (true) {
  697. case ((0x7F & $bytes) == $bytes):
  698. // this case should never be reached, because we are in ASCII range
  699. // see: http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8
  700. return chr(0x7F & $bytes);
  701. case (0x07FF & $bytes) == $bytes:
  702. // return a 2-byte UTF-8 character
  703. // see: http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8
  704. return chr(0xC0 | (($bytes >> 6) & 0x1F))
  705. . chr(0x80 | ($bytes & 0x3F));
  706. case (0xFFFF & $bytes) == $bytes:
  707. // return a 3-byte UTF-8 character
  708. // see: http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8
  709. return chr(0xE0 | (($bytes >> 12) & 0x0F))
  710. . chr(0x80 | (($bytes >> 6) & 0x3F))
  711. . chr(0x80 | ($bytes & 0x3F));
  712. }
  713. // ignoring UTF-32 for now, sorry
  714. return '';
  715. }
  716. /**
  717. *
  718. * Convert a string from one UTF-8 char to one UTF-16 char.
  719. *
  720. * Normally should be handled by mb_convert_encoding, but
  721. * provides a slower PHP-only method for installations
  722. * that lack the multibye string extension.
  723. *
  724. * @param string $utf8 UTF-8 character
  725. *
  726. * @return string UTF-16 character
  727. *
  728. */
  729. protected function _utf82utf16($utf8)
  730. {
  731. // oh please oh please oh please oh please oh please
  732. if (!$this->_config['bypass_mb'] &&
  733. function_exists('mb_convert_encoding')) {
  734. return mb_convert_encoding($utf8, 'UTF-16', 'UTF-8');
  735. }
  736. switch (strlen($utf8)) {
  737. case 1:
  738. // this case should never be reached, because we are in ASCII range
  739. // see: http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8
  740. return $utf8;
  741. case 2:
  742. // return a UTF-16 character from a 2-byte UTF-8 char
  743. // see: http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8
  744. return chr(0x07 & (ord($utf8{0}) >> 2))
  745. . chr((0xC0 & (ord($utf8{0}) << 6))
  746. | (0x3F & ord($utf8{1})));
  747. case 3:
  748. // return a UTF-16 character from a 3-byte UTF-8 char
  749. // see: http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8
  750. return chr((0xF0 & (ord($utf8{0}) << 4))
  751. | (0x0F & (ord($utf8{1}) >> 2)))
  752. . chr((0xC0 & (ord($utf8{1}) << 6))
  753. | (0x7F & ord($utf8{2})));
  754. }
  755. // ignoring UTF-32 for now, sorry
  756. return '';
  757. }
  758. /**
  759. *
  760. * Reduce a string by removing leading and trailing comments and whitespace.
  761. *
  762. * @param string $str string value to strip of comments and whitespace
  763. *
  764. * @return string string value stripped of comments and whitespace
  765. *
  766. */
  767. protected function _reduce_string($str)
  768. {
  769. $str = preg_replace(array(
  770. // eliminate single line comments in '// ...' form
  771. '#^\s*//(.+)$#m',
  772. // eliminate multi-line comments in '/* ... */' form, at start of string
  773. '#^\s*/\*(.+)\*/#Us',
  774. // eliminate multi-line comments in '/* ... */' form, at end of string
  775. '#/\*(.+)\*/\s*$#Us'
  776. ), '', $str);
  777. // eliminate extraneous space
  778. return trim($str);
  779. }
  780. }