PageRenderTime 63ms CodeModel.GetById 28ms RepoModel.GetById 0ms app.codeStats 0ms

/gedlab-khmer-filter-abund/pymodules/python2.7/lib/python/simplejson/decoder.py

https://gitlab.com/pooja043/Globus_Docker_4
Python | 400 lines | 387 code | 7 blank | 6 comment | 5 complexity | cdc4966637590d89df39a33b2cc31b80 MD5 | raw file
  1. """Implementation of JSONDecoder
  2. """
  3. from __future__ import absolute_import
  4. import re
  5. import sys
  6. import struct
  7. from .compat import fromhex, b, u, text_type, binary_type, PY3, unichr
  8. from .scanner import make_scanner, JSONDecodeError
  9. def _import_c_scanstring():
  10. try:
  11. from ._speedups import scanstring
  12. return scanstring
  13. except ImportError:
  14. return None
  15. c_scanstring = _import_c_scanstring()
  16. # NOTE (3.1.0): JSONDecodeError may still be imported from this module for
  17. # compatibility, but it was never in the __all__
  18. __all__ = ['JSONDecoder']
  19. FLAGS = re.VERBOSE | re.MULTILINE | re.DOTALL
  20. def _floatconstants():
  21. _BYTES = fromhex('7FF80000000000007FF0000000000000')
  22. # The struct module in Python 2.4 would get frexp() out of range here
  23. # when an endian is specified in the format string. Fixed in Python 2.5+
  24. if sys.byteorder != 'big':
  25. _BYTES = _BYTES[:8][::-1] + _BYTES[8:][::-1]
  26. nan, inf = struct.unpack('dd', _BYTES)
  27. return nan, inf, -inf
  28. NaN, PosInf, NegInf = _floatconstants()
  29. _CONSTANTS = {
  30. '-Infinity': NegInf,
  31. 'Infinity': PosInf,
  32. 'NaN': NaN,
  33. }
  34. STRINGCHUNK = re.compile(r'(.*?)(["\\\x00-\x1f])', FLAGS)
  35. BACKSLASH = {
  36. '"': u('"'), '\\': u('\u005c'), '/': u('/'),
  37. 'b': u('\b'), 'f': u('\f'), 'n': u('\n'), 'r': u('\r'), 't': u('\t'),
  38. }
  39. DEFAULT_ENCODING = "utf-8"
  40. def py_scanstring(s, end, encoding=None, strict=True,
  41. _b=BACKSLASH, _m=STRINGCHUNK.match, _join=u('').join,
  42. _PY3=PY3, _maxunicode=sys.maxunicode):
  43. """Scan the string s for a JSON string. End is the index of the
  44. character in s after the quote that started the JSON string.
  45. Unescapes all valid JSON string escape sequences and raises ValueError
  46. on attempt to decode an invalid string. If strict is False then literal
  47. control characters are allowed in the string.
  48. Returns a tuple of the decoded string and the index of the character in s
  49. after the end quote."""
  50. if encoding is None:
  51. encoding = DEFAULT_ENCODING
  52. chunks = []
  53. _append = chunks.append
  54. begin = end - 1
  55. while 1:
  56. chunk = _m(s, end)
  57. if chunk is None:
  58. raise JSONDecodeError(
  59. "Unterminated string starting at", s, begin)
  60. end = chunk.end()
  61. content, terminator = chunk.groups()
  62. # Content is contains zero or more unescaped string characters
  63. if content:
  64. if not _PY3 and not isinstance(content, text_type):
  65. content = text_type(content, encoding)
  66. _append(content)
  67. # Terminator is the end of string, a literal control character,
  68. # or a backslash denoting that an escape sequence follows
  69. if terminator == '"':
  70. break
  71. elif terminator != '\\':
  72. if strict:
  73. msg = "Invalid control character %r at"
  74. raise JSONDecodeError(msg, s, end)
  75. else:
  76. _append(terminator)
  77. continue
  78. try:
  79. esc = s[end]
  80. except IndexError:
  81. raise JSONDecodeError(
  82. "Unterminated string starting at", s, begin)
  83. # If not a unicode escape sequence, must be in the lookup table
  84. if esc != 'u':
  85. try:
  86. char = _b[esc]
  87. except KeyError:
  88. msg = "Invalid \\X escape sequence %r"
  89. raise JSONDecodeError(msg, s, end)
  90. end += 1
  91. else:
  92. # Unicode escape sequence
  93. msg = "Invalid \\uXXXX escape sequence"
  94. esc = s[end + 1:end + 5]
  95. escX = esc[1:2]
  96. if len(esc) != 4 or escX == 'x' or escX == 'X':
  97. raise JSONDecodeError(msg, s, end - 1)
  98. try:
  99. uni = int(esc, 16)
  100. except ValueError:
  101. raise JSONDecodeError(msg, s, end - 1)
  102. end += 5
  103. # Check for surrogate pair on UCS-4 systems
  104. # Note that this will join high/low surrogate pairs
  105. # but will also pass unpaired surrogates through
  106. if (_maxunicode > 65535 and
  107. uni & 0xfc00 == 0xd800 and
  108. s[end:end + 2] == '\\u'):
  109. esc2 = s[end + 2:end + 6]
  110. escX = esc2[1:2]
  111. if len(esc2) == 4 and not (escX == 'x' or escX == 'X'):
  112. try:
  113. uni2 = int(esc2, 16)
  114. except ValueError:
  115. raise JSONDecodeError(msg, s, end)
  116. if uni2 & 0xfc00 == 0xdc00:
  117. uni = 0x10000 + (((uni - 0xd800) << 10) |
  118. (uni2 - 0xdc00))
  119. end += 6
  120. char = unichr(uni)
  121. # Append the unescaped character
  122. _append(char)
  123. return _join(chunks), end
  124. # Use speedup if available
  125. scanstring = c_scanstring or py_scanstring
  126. WHITESPACE = re.compile(r'[ \t\n\r]*', FLAGS)
  127. WHITESPACE_STR = ' \t\n\r'
  128. def JSONObject(state, encoding, strict, scan_once, object_hook,
  129. object_pairs_hook, memo=None,
  130. _w=WHITESPACE.match, _ws=WHITESPACE_STR):
  131. (s, end) = state
  132. # Backwards compatibility
  133. if memo is None:
  134. memo = {}
  135. memo_get = memo.setdefault
  136. pairs = []
  137. # Use a slice to prevent IndexError from being raised, the following
  138. # check will raise a more specific ValueError if the string is empty
  139. nextchar = s[end:end + 1]
  140. # Normally we expect nextchar == '"'
  141. if nextchar != '"':
  142. if nextchar in _ws:
  143. end = _w(s, end).end()
  144. nextchar = s[end:end + 1]
  145. # Trivial empty object
  146. if nextchar == '}':
  147. if object_pairs_hook is not None:
  148. result = object_pairs_hook(pairs)
  149. return result, end + 1
  150. pairs = {}
  151. if object_hook is not None:
  152. pairs = object_hook(pairs)
  153. return pairs, end + 1
  154. elif nextchar != '"':
  155. raise JSONDecodeError(
  156. "Expecting property name enclosed in double quotes",
  157. s, end)
  158. end += 1
  159. while True:
  160. key, end = scanstring(s, end, encoding, strict)
  161. key = memo_get(key, key)
  162. # To skip some function call overhead we optimize the fast paths where
  163. # the JSON key separator is ": " or just ":".
  164. if s[end:end + 1] != ':':
  165. end = _w(s, end).end()
  166. if s[end:end + 1] != ':':
  167. raise JSONDecodeError("Expecting ':' delimiter", s, end)
  168. end += 1
  169. try:
  170. if s[end] in _ws:
  171. end += 1
  172. if s[end] in _ws:
  173. end = _w(s, end + 1).end()
  174. except IndexError:
  175. pass
  176. value, end = scan_once(s, end)
  177. pairs.append((key, value))
  178. try:
  179. nextchar = s[end]
  180. if nextchar in _ws:
  181. end = _w(s, end + 1).end()
  182. nextchar = s[end]
  183. except IndexError:
  184. nextchar = ''
  185. end += 1
  186. if nextchar == '}':
  187. break
  188. elif nextchar != ',':
  189. raise JSONDecodeError("Expecting ',' delimiter or '}'", s, end - 1)
  190. try:
  191. nextchar = s[end]
  192. if nextchar in _ws:
  193. end += 1
  194. nextchar = s[end]
  195. if nextchar in _ws:
  196. end = _w(s, end + 1).end()
  197. nextchar = s[end]
  198. except IndexError:
  199. nextchar = ''
  200. end += 1
  201. if nextchar != '"':
  202. raise JSONDecodeError(
  203. "Expecting property name enclosed in double quotes",
  204. s, end - 1)
  205. if object_pairs_hook is not None:
  206. result = object_pairs_hook(pairs)
  207. return result, end
  208. pairs = dict(pairs)
  209. if object_hook is not None:
  210. pairs = object_hook(pairs)
  211. return pairs, end
  212. def JSONArray(state, scan_once, _w=WHITESPACE.match, _ws=WHITESPACE_STR):
  213. (s, end) = state
  214. values = []
  215. nextchar = s[end:end + 1]
  216. if nextchar in _ws:
  217. end = _w(s, end + 1).end()
  218. nextchar = s[end:end + 1]
  219. # Look-ahead for trivial empty array
  220. if nextchar == ']':
  221. return values, end + 1
  222. elif nextchar == '':
  223. raise JSONDecodeError("Expecting value or ']'", s, end)
  224. _append = values.append
  225. while True:
  226. value, end = scan_once(s, end)
  227. _append(value)
  228. nextchar = s[end:end + 1]
  229. if nextchar in _ws:
  230. end = _w(s, end + 1).end()
  231. nextchar = s[end:end + 1]
  232. end += 1
  233. if nextchar == ']':
  234. break
  235. elif nextchar != ',':
  236. raise JSONDecodeError("Expecting ',' delimiter or ']'", s, end - 1)
  237. try:
  238. if s[end] in _ws:
  239. end += 1
  240. if s[end] in _ws:
  241. end = _w(s, end + 1).end()
  242. except IndexError:
  243. pass
  244. return values, end
  245. class JSONDecoder(object):
  246. """Simple JSON <http://json.org> decoder
  247. Performs the following translations in decoding by default:
  248. +---------------+-------------------+
  249. | JSON | Python |
  250. +===============+===================+
  251. | object | dict |
  252. +---------------+-------------------+
  253. | array | list |
  254. +---------------+-------------------+
  255. | string | str, unicode |
  256. +---------------+-------------------+
  257. | number (int) | int, long |
  258. +---------------+-------------------+
  259. | number (real) | float |
  260. +---------------+-------------------+
  261. | true | True |
  262. +---------------+-------------------+
  263. | false | False |
  264. +---------------+-------------------+
  265. | null | None |
  266. +---------------+-------------------+
  267. It also understands ``NaN``, ``Infinity``, and ``-Infinity`` as
  268. their corresponding ``float`` values, which is outside the JSON spec.
  269. """
  270. def __init__(self, encoding=None, object_hook=None, parse_float=None,
  271. parse_int=None, parse_constant=None, strict=True,
  272. object_pairs_hook=None):
  273. """
  274. *encoding* determines the encoding used to interpret any
  275. :class:`str` objects decoded by this instance (``'utf-8'`` by
  276. default). It has no effect when decoding :class:`unicode` objects.
  277. Note that currently only encodings that are a superset of ASCII work,
  278. strings of other encodings should be passed in as :class:`unicode`.
  279. *object_hook*, if specified, will be called with the result of every
  280. JSON object decoded and its return value will be used in place of the
  281. given :class:`dict`. This can be used to provide custom
  282. deserializations (e.g. to support JSON-RPC class hinting).
  283. *object_pairs_hook* is an optional function that will be called with
  284. the result of any object literal decode with an ordered list of pairs.
  285. The return value of *object_pairs_hook* will be used instead of the
  286. :class:`dict`. This feature can be used to implement custom decoders
  287. that rely on the order that the key and value pairs are decoded (for
  288. example, :func:`collections.OrderedDict` will remember the order of
  289. insertion). If *object_hook* is also defined, the *object_pairs_hook*
  290. takes priority.
  291. *parse_float*, if specified, will be called with the string of every
  292. JSON float to be decoded. By default, this is equivalent to
  293. ``float(num_str)``. This can be used to use another datatype or parser
  294. for JSON floats (e.g. :class:`decimal.Decimal`).
  295. *parse_int*, if specified, will be called with the string of every
  296. JSON int to be decoded. By default, this is equivalent to
  297. ``int(num_str)``. This can be used to use another datatype or parser
  298. for JSON integers (e.g. :class:`float`).
  299. *parse_constant*, if specified, will be called with one of the
  300. following strings: ``'-Infinity'``, ``'Infinity'``, ``'NaN'``. This
  301. can be used to raise an exception if invalid JSON numbers are
  302. encountered.
  303. *strict* controls the parser's behavior when it encounters an
  304. invalid control character in a string. The default setting of
  305. ``True`` means that unescaped control characters are parse errors, if
  306. ``False`` then control characters will be allowed in strings.
  307. """
  308. if encoding is None:
  309. encoding = DEFAULT_ENCODING
  310. self.encoding = encoding
  311. self.object_hook = object_hook
  312. self.object_pairs_hook = object_pairs_hook
  313. self.parse_float = parse_float or float
  314. self.parse_int = parse_int or int
  315. self.parse_constant = parse_constant or _CONSTANTS.__getitem__
  316. self.strict = strict
  317. self.parse_object = JSONObject
  318. self.parse_array = JSONArray
  319. self.parse_string = scanstring
  320. self.memo = {}
  321. self.scan_once = make_scanner(self)
  322. def decode(self, s, _w=WHITESPACE.match, _PY3=PY3):
  323. """Return the Python representation of ``s`` (a ``str`` or ``unicode``
  324. instance containing a JSON document)
  325. """
  326. if _PY3 and isinstance(s, binary_type):
  327. s = s.decode(self.encoding)
  328. obj, end = self.raw_decode(s)
  329. end = _w(s, end).end()
  330. if end != len(s):
  331. raise JSONDecodeError("Extra data", s, end, len(s))
  332. return obj
  333. def raw_decode(self, s, idx=0, _w=WHITESPACE.match, _PY3=PY3):
  334. """Decode a JSON document from ``s`` (a ``str`` or ``unicode``
  335. beginning with a JSON document) and return a 2-tuple of the Python
  336. representation and the index in ``s`` where the document ended.
  337. Optionally, ``idx`` can be used to specify an offset in ``s`` where
  338. the JSON document begins.
  339. This can be used to decode a JSON document from a string that may
  340. have extraneous data at the end.
  341. """
  342. if idx < 0:
  343. # Ensure that raw_decode bails on negative indexes, the regex
  344. # would otherwise mask this behavior. #98
  345. raise JSONDecodeError('Expecting value', s, idx)
  346. if _PY3 and not isinstance(s, text_type):
  347. raise TypeError("Input string must be text, not bytes")
  348. # strip UTF-8 bom
  349. if len(s) > idx:
  350. ord0 = ord(s[idx])
  351. if ord0 == 0xfeff:
  352. idx += 1
  353. elif ord0 == 0xef and s[idx:idx + 3] == '\xef\xbb\xbf':
  354. idx += 3
  355. return self.scan_once(s, idx=_w(s, idx).end())