PageRenderTime 46ms CodeModel.GetById 17ms RepoModel.GetById 0ms app.codeStats 0ms

/Lib/json/decoder.py

https://bitbucket.org/arigo/cpython-withatomic/
Python | 369 lines | 337 code | 14 blank | 18 comment | 13 complexity | c2ec3e9ef4a39f65e22f3978e713a020 MD5 | raw file
Possible License(s): 0BSD
  1. """Implementation of JSONDecoder
  2. """
  3. import binascii
  4. import re
  5. import sys
  6. import struct
  7. from json import scanner
  8. try:
  9. from _json import scanstring as c_scanstring
  10. except ImportError:
  11. c_scanstring = None
  12. __all__ = ['JSONDecoder']
  13. FLAGS = re.VERBOSE | re.MULTILINE | re.DOTALL
  14. def _floatconstants():
  15. _BYTES = binascii.unhexlify(b'7FF80000000000007FF0000000000000')
  16. if sys.byteorder != 'big':
  17. _BYTES = _BYTES[:8][::-1] + _BYTES[8:][::-1]
  18. nan, inf = struct.unpack('dd', _BYTES)
  19. return nan, inf, -inf
  20. NaN, PosInf, NegInf = _floatconstants()
  21. def linecol(doc, pos):
  22. if isinstance(doc, bytes):
  23. newline = b'\n'
  24. else:
  25. newline = '\n'
  26. lineno = doc.count(newline, 0, pos) + 1
  27. if lineno == 1:
  28. colno = pos
  29. else:
  30. colno = pos - doc.rindex(newline, 0, pos)
  31. return lineno, colno
  32. def errmsg(msg, doc, pos, end=None):
  33. # Note that this function is called from _json
  34. lineno, colno = linecol(doc, pos)
  35. if end is None:
  36. fmt = '{0}: line {1} column {2} (char {3})'
  37. return fmt.format(msg, lineno, colno, pos)
  38. #fmt = '%s: line %d column %d (char %d)'
  39. #return fmt % (msg, lineno, colno, pos)
  40. endlineno, endcolno = linecol(doc, end)
  41. fmt = '{0}: line {1} column {2} - line {3} column {4} (char {5} - {6})'
  42. return fmt.format(msg, lineno, colno, endlineno, endcolno, pos, end)
  43. #fmt = '%s: line %d column %d - line %d column %d (char %d - %d)'
  44. #return fmt % (msg, lineno, colno, endlineno, endcolno, pos, end)
  45. _CONSTANTS = {
  46. '-Infinity': NegInf,
  47. 'Infinity': PosInf,
  48. 'NaN': NaN,
  49. }
  50. STRINGCHUNK = re.compile(r'(.*?)(["\\\x00-\x1f])', FLAGS)
  51. BACKSLASH = {
  52. '"': '"', '\\': '\\', '/': '/',
  53. 'b': '\b', 'f': '\f', 'n': '\n', 'r': '\r', 't': '\t',
  54. }
  55. def py_scanstring(s, end, strict=True,
  56. _b=BACKSLASH, _m=STRINGCHUNK.match):
  57. """Scan the string s for a JSON string. End is the index of the
  58. character in s after the quote that started the JSON string.
  59. Unescapes all valid JSON string escape sequences and raises ValueError
  60. on attempt to decode an invalid string. If strict is False then literal
  61. control characters are allowed in the string.
  62. Returns a tuple of the decoded string and the index of the character in s
  63. after the end quote."""
  64. chunks = []
  65. _append = chunks.append
  66. begin = end - 1
  67. while 1:
  68. chunk = _m(s, end)
  69. if chunk is None:
  70. raise ValueError(
  71. errmsg("Unterminated string starting at", s, begin))
  72. end = chunk.end()
  73. content, terminator = chunk.groups()
  74. # Content is contains zero or more unescaped string characters
  75. if content:
  76. _append(content)
  77. # Terminator is the end of string, a literal control character,
  78. # or a backslash denoting that an escape sequence follows
  79. if terminator == '"':
  80. break
  81. elif terminator != '\\':
  82. if strict:
  83. #msg = "Invalid control character %r at" % (terminator,)
  84. msg = "Invalid control character {0!r} at".format(terminator)
  85. raise ValueError(errmsg(msg, s, end))
  86. else:
  87. _append(terminator)
  88. continue
  89. try:
  90. esc = s[end]
  91. except IndexError:
  92. raise ValueError(
  93. errmsg("Unterminated string starting at", s, begin))
  94. # If not a unicode escape sequence, must be in the lookup table
  95. if esc != 'u':
  96. try:
  97. char = _b[esc]
  98. except KeyError:
  99. msg = "Invalid \\escape: {0!r}".format(esc)
  100. raise ValueError(errmsg(msg, s, end))
  101. end += 1
  102. else:
  103. esc = s[end + 1:end + 5]
  104. next_end = end + 5
  105. if len(esc) != 4:
  106. msg = "Invalid \\uXXXX escape"
  107. raise ValueError(errmsg(msg, s, end))
  108. uni = int(esc, 16)
  109. if 0xd800 <= uni <= 0xdbff:
  110. msg = "Invalid \\uXXXX\\uXXXX surrogate pair"
  111. if not s[end + 5:end + 7] == '\\u':
  112. raise ValueError(errmsg(msg, s, end))
  113. esc2 = s[end + 7:end + 11]
  114. if len(esc2) != 4:
  115. raise ValueError(errmsg(msg, s, end))
  116. uni2 = int(esc2, 16)
  117. uni = 0x10000 + (((uni - 0xd800) << 10) | (uni2 - 0xdc00))
  118. next_end += 6
  119. char = chr(uni)
  120. end = next_end
  121. _append(char)
  122. return ''.join(chunks), end
  123. # Use speedup if available
  124. scanstring = c_scanstring or py_scanstring
  125. WHITESPACE = re.compile(r'[ \t\n\r]*', FLAGS)
  126. WHITESPACE_STR = ' \t\n\r'
  127. def JSONObject(s_and_end, strict, scan_once, object_hook, object_pairs_hook,
  128. memo=None, _w=WHITESPACE.match, _ws=WHITESPACE_STR):
  129. s, end = s_and_end
  130. pairs = []
  131. pairs_append = pairs.append
  132. # Backwards compatibility
  133. if memo is None:
  134. memo = {}
  135. memo_get = memo.setdefault
  136. # Use a slice to prevent IndexError from being raised, the following
  137. # check will raise a more specific ValueError if the string is empty
  138. nextchar = s[end:end + 1]
  139. # Normally we expect nextchar == '"'
  140. if nextchar != '"':
  141. if nextchar in _ws:
  142. end = _w(s, end).end()
  143. nextchar = s[end:end + 1]
  144. # Trivial empty object
  145. if nextchar == '}':
  146. if object_pairs_hook is not None:
  147. result = object_pairs_hook(pairs)
  148. return result, end
  149. pairs = {}
  150. if object_hook is not None:
  151. pairs = object_hook(pairs)
  152. return pairs, end + 1
  153. elif nextchar != '"':
  154. raise ValueError(errmsg("Expecting property name", s, end))
  155. end += 1
  156. while True:
  157. key, end = scanstring(s, end, strict)
  158. key = memo_get(key, key)
  159. # To skip some function call overhead we optimize the fast paths where
  160. # the JSON key separator is ": " or just ":".
  161. if s[end:end + 1] != ':':
  162. end = _w(s, end).end()
  163. if s[end:end + 1] != ':':
  164. raise ValueError(errmsg("Expecting : delimiter", s, end))
  165. end += 1
  166. try:
  167. if s[end] in _ws:
  168. end += 1
  169. if s[end] in _ws:
  170. end = _w(s, end + 1).end()
  171. except IndexError:
  172. pass
  173. try:
  174. value, end = scan_once(s, end)
  175. except StopIteration:
  176. raise ValueError(errmsg("Expecting object", s, end))
  177. pairs_append((key, value))
  178. try:
  179. nextchar = s[end]
  180. if nextchar in _ws:
  181. end = _w(s, end + 1).end()
  182. nextchar = s[end]
  183. except IndexError:
  184. nextchar = ''
  185. end += 1
  186. if nextchar == '}':
  187. break
  188. elif nextchar != ',':
  189. raise ValueError(errmsg("Expecting , delimiter", s, end - 1))
  190. end = _w(s, end).end()
  191. nextchar = s[end:end + 1]
  192. end += 1
  193. if nextchar != '"':
  194. raise ValueError(errmsg("Expecting property name", s, end - 1))
  195. if object_pairs_hook is not None:
  196. result = object_pairs_hook(pairs)
  197. return result, end
  198. pairs = dict(pairs)
  199. if object_hook is not None:
  200. pairs = object_hook(pairs)
  201. return pairs, end
  202. def JSONArray(s_and_end, scan_once, _w=WHITESPACE.match, _ws=WHITESPACE_STR):
  203. s, end = s_and_end
  204. values = []
  205. nextchar = s[end:end + 1]
  206. if nextchar in _ws:
  207. end = _w(s, end + 1).end()
  208. nextchar = s[end:end + 1]
  209. # Look-ahead for trivial empty array
  210. if nextchar == ']':
  211. return values, end + 1
  212. _append = values.append
  213. while True:
  214. try:
  215. value, end = scan_once(s, end)
  216. except StopIteration:
  217. raise ValueError(errmsg("Expecting object", s, end))
  218. _append(value)
  219. nextchar = s[end:end + 1]
  220. if nextchar in _ws:
  221. end = _w(s, end + 1).end()
  222. nextchar = s[end:end + 1]
  223. end += 1
  224. if nextchar == ']':
  225. break
  226. elif nextchar != ',':
  227. raise ValueError(errmsg("Expecting , delimiter", s, end))
  228. try:
  229. if s[end] in _ws:
  230. end += 1
  231. if s[end] in _ws:
  232. end = _w(s, end + 1).end()
  233. except IndexError:
  234. pass
  235. return values, end
  236. class JSONDecoder(object):
  237. """Simple JSON <http://json.org> decoder
  238. Performs the following translations in decoding by default:
  239. +---------------+-------------------+
  240. | JSON | Python |
  241. +===============+===================+
  242. | object | dict |
  243. +---------------+-------------------+
  244. | array | list |
  245. +---------------+-------------------+
  246. | string | str |
  247. +---------------+-------------------+
  248. | number (int) | int |
  249. +---------------+-------------------+
  250. | number (real) | float |
  251. +---------------+-------------------+
  252. | true | True |
  253. +---------------+-------------------+
  254. | false | False |
  255. +---------------+-------------------+
  256. | null | None |
  257. +---------------+-------------------+
  258. It also understands ``NaN``, ``Infinity``, and ``-Infinity`` as
  259. their corresponding ``float`` values, which is outside the JSON spec.
  260. """
  261. def __init__(self, object_hook=None, parse_float=None,
  262. parse_int=None, parse_constant=None, strict=True,
  263. object_pairs_hook=None):
  264. """``object_hook``, if specified, will be called with the result
  265. of every JSON object decoded and its return value will be used in
  266. place of the given ``dict``. This can be used to provide custom
  267. deserializations (e.g. to support JSON-RPC class hinting).
  268. ``object_pairs_hook``, if specified will be called with the result of
  269. every JSON object decoded with an ordered list of pairs. The return
  270. value of ``object_pairs_hook`` will be used instead of the ``dict``.
  271. This feature can be used to implement custom decoders that rely on the
  272. order that the key and value pairs are decoded (for example,
  273. collections.OrderedDict will remember the order of insertion). If
  274. ``object_hook`` is also defined, the ``object_pairs_hook`` takes
  275. priority.
  276. ``parse_float``, if specified, will be called with the string
  277. of every JSON float to be decoded. By default this is equivalent to
  278. float(num_str). This can be used to use another datatype or parser
  279. for JSON floats (e.g. decimal.Decimal).
  280. ``parse_int``, if specified, will be called with the string
  281. of every JSON int to be decoded. By default this is equivalent to
  282. int(num_str). This can be used to use another datatype or parser
  283. for JSON integers (e.g. float).
  284. ``parse_constant``, if specified, will be called with one of the
  285. following strings: -Infinity, Infinity, NaN.
  286. This can be used to raise an exception if invalid JSON numbers
  287. are encountered.
  288. If ``strict`` is false (true is the default), then control
  289. characters will be allowed inside strings. Control characters in
  290. this context are those with character codes in the 0-31 range,
  291. including ``'\\t'`` (tab), ``'\\n'``, ``'\\r'`` and ``'\\0'``.
  292. """
  293. self.object_hook = object_hook
  294. self.parse_float = parse_float or float
  295. self.parse_int = parse_int or int
  296. self.parse_constant = parse_constant or _CONSTANTS.__getitem__
  297. self.strict = strict
  298. self.object_pairs_hook = object_pairs_hook
  299. self.parse_object = JSONObject
  300. self.parse_array = JSONArray
  301. self.parse_string = scanstring
  302. self.memo = {}
  303. self.scan_once = scanner.make_scanner(self)
  304. def decode(self, s, _w=WHITESPACE.match):
  305. """Return the Python representation of ``s`` (a ``str`` instance
  306. containing a JSON document).
  307. """
  308. obj, end = self.raw_decode(s, idx=_w(s, 0).end())
  309. end = _w(s, end).end()
  310. if end != len(s):
  311. raise ValueError(errmsg("Extra data", s, end, len(s)))
  312. return obj
  313. def raw_decode(self, s, idx=0):
  314. """Decode a JSON document from ``s`` (a ``str`` beginning with
  315. a JSON document) and return a 2-tuple of the Python
  316. representation and the index in ``s`` where the document ended.
  317. This can be used to decode a JSON document from a string that may
  318. have extraneous data at the end.
  319. """
  320. try:
  321. obj, end = self.scan_once(s, idx)
  322. except StopIteration:
  323. raise ValueError("No JSON object could be decoded")
  324. return obj, end