PageRenderTime 48ms CodeModel.GetById 21ms RepoModel.GetById 0ms app.codeStats 0ms

/lib-python/2.7/json/decoder.py

https://bitbucket.org/bwesterb/pypy
Python | 384 lines | 352 code | 14 blank | 18 comment | 13 complexity | f270530a98616072bd2dbd35f155ec64 MD5 | raw file
  1. """Implementation of JSONDecoder
  2. """
  3. import re
  4. import sys
  5. import struct
  6. from json import scanner
  7. try:
  8. from _json import scanstring as c_scanstring
  9. except ImportError:
  10. c_scanstring = None
  11. __all__ = ['JSONDecoder']
  12. FLAGS = re.VERBOSE | re.MULTILINE | re.DOTALL
  13. def _floatconstants():
  14. _BYTES = '7FF80000000000007FF0000000000000'.decode('hex')
  15. if sys.byteorder != 'big':
  16. _BYTES = _BYTES[:8][::-1] + _BYTES[8:][::-1]
  17. nan, inf = struct.unpack('dd', _BYTES)
  18. return nan, inf, -inf
  19. NaN, PosInf, NegInf = _floatconstants()
  20. def linecol(doc, pos):
  21. lineno = doc.count('\n', 0, pos) + 1
  22. if lineno == 1:
  23. colno = pos
  24. else:
  25. colno = pos - doc.rindex('\n', 0, pos)
  26. return lineno, colno
  27. def errmsg(msg, doc, pos, end=None):
  28. # Note that this function is called from _json
  29. lineno, colno = linecol(doc, pos)
  30. if end is None:
  31. fmt = '{0}: line {1} column {2} (char {3})'
  32. return fmt.format(msg, lineno, colno, pos)
  33. #fmt = '%s: line %d column %d (char %d)'
  34. #return fmt % (msg, lineno, colno, pos)
  35. endlineno, endcolno = linecol(doc, end)
  36. fmt = '{0}: line {1} column {2} - line {3} column {4} (char {5} - {6})'
  37. return fmt.format(msg, lineno, colno, endlineno, endcolno, pos, end)
  38. #fmt = '%s: line %d column %d - line %d column %d (char %d - %d)'
  39. #return fmt % (msg, lineno, colno, endlineno, endcolno, pos, end)
  40. _CONSTANTS = {
  41. '-Infinity': NegInf,
  42. 'Infinity': PosInf,
  43. 'NaN': NaN,
  44. }
  45. STRINGCHUNK = re.compile(r'(.*?)(["\\\x00-\x1f])', FLAGS)
  46. BACKSLASH = {
  47. '"': u'"', '\\': u'\\', '/': u'/',
  48. 'b': u'\b', 'f': u'\f', 'n': u'\n', 'r': u'\r', 't': u'\t',
  49. }
  50. DEFAULT_ENCODING = "utf-8"
  51. def py_scanstring(s, end, encoding=None, strict=True):
  52. """Scan the string s for a JSON string. End is the index of the
  53. character in s after the quote that started the JSON string.
  54. Unescapes all valid JSON string escape sequences and raises ValueError
  55. on attempt to decode an invalid string. If strict is False then literal
  56. control characters are allowed in the string.
  57. Returns a tuple of the decoded string and the index of the character in s
  58. after the end quote."""
  59. if encoding is None:
  60. encoding = DEFAULT_ENCODING
  61. chunks = []
  62. _append = chunks.append
  63. begin = end - 1
  64. while 1:
  65. chunk = STRINGCHUNK.match(s, end)
  66. if chunk is None:
  67. raise ValueError(
  68. errmsg("Unterminated string starting at", s, begin))
  69. end = chunk.end()
  70. content, terminator = chunk.groups()
  71. # Content is contains zero or more unescaped string characters
  72. if content:
  73. if not isinstance(content, unicode):
  74. content = unicode(content, encoding)
  75. _append(content)
  76. # Terminator is the end of string, a literal control character,
  77. # or a backslash denoting that an escape sequence follows
  78. if terminator == '"':
  79. break
  80. elif terminator != '\\':
  81. if strict:
  82. #msg = "Invalid control character %r at" % (terminator,)
  83. msg = "Invalid control character {0!r} at".format(terminator)
  84. raise ValueError(errmsg(msg, s, end))
  85. else:
  86. _append(terminator)
  87. continue
  88. try:
  89. esc = s[end]
  90. except IndexError:
  91. raise ValueError(
  92. errmsg("Unterminated string starting at", s, begin))
  93. # If not a unicode escape sequence, must be in the lookup table
  94. if esc != 'u':
  95. try:
  96. char = BACKSLASH[esc]
  97. except KeyError:
  98. msg = "Invalid \\escape: " + repr(esc)
  99. raise ValueError(errmsg(msg, s, end))
  100. end += 1
  101. else:
  102. # Unicode escape sequence
  103. esc = s[end + 1:end + 5]
  104. next_end = end + 5
  105. if len(esc) != 4:
  106. msg = "Invalid \\uXXXX escape"
  107. raise ValueError(errmsg(msg, s, end))
  108. uni = int(esc, 16)
  109. # Check for surrogate pair on UCS-4 systems
  110. if 0xd800 <= uni <= 0xdbff and sys.maxunicode > 65535:
  111. msg = "Invalid \\uXXXX\\uXXXX surrogate pair"
  112. if not s[end + 5:end + 7] == '\\u':
  113. raise ValueError(errmsg(msg, s, end))
  114. esc2 = s[end + 7:end + 11]
  115. if len(esc2) != 4:
  116. raise ValueError(errmsg(msg, s, end))
  117. uni2 = int(esc2, 16)
  118. uni = 0x10000 + (((uni - 0xd800) << 10) | (uni2 - 0xdc00))
  119. next_end += 6
  120. char = unichr(uni)
  121. end = next_end
  122. # Append the unescaped character
  123. _append(char)
  124. return u''.join(chunks), end
  125. # Use speedup if available
  126. scanstring = c_scanstring or py_scanstring
  127. WHITESPACE = re.compile(r'[ \t\n\r]*', FLAGS)
  128. WHITESPACE_STR = ' \t\n\r'
  129. def JSONObject(s_and_end, encoding, strict, scan_once, object_hook,
  130. object_pairs_hook):
  131. s, end = s_and_end
  132. pairs = []
  133. pairs_append = pairs.append
  134. # Use a slice to prevent IndexError from being raised, the following
  135. # check will raise a more specific ValueError if the string is empty
  136. nextchar = s[end:end + 1]
  137. # Normally we expect nextchar == '"'
  138. if nextchar != '"':
  139. if nextchar in WHITESPACE_STR:
  140. end = WHITESPACE.match(s, end).end()
  141. nextchar = s[end:end + 1]
  142. # Trivial empty object
  143. if nextchar == '}':
  144. if object_pairs_hook is not None:
  145. result = object_pairs_hook(pairs)
  146. return result, end
  147. pairs = {}
  148. if object_hook is not None:
  149. pairs = object_hook(pairs)
  150. return pairs, end + 1
  151. elif nextchar != '"':
  152. raise ValueError(errmsg("Expecting property name", s, end))
  153. end += 1
  154. while True:
  155. key, end = scanstring(s, end, encoding, strict)
  156. # To skip some function call overhead we optimize the fast paths where
  157. # the JSON key separator is ": " or just ":".
  158. if s[end:end + 1] != ':':
  159. end = WHITESPACE.match(s, end).end()
  160. if s[end:end + 1] != ':':
  161. raise ValueError(errmsg("Expecting : delimiter", s, end))
  162. end += 1
  163. try:
  164. if s[end] in WHITESPACE_STR:
  165. end += 1
  166. if s[end] in WHITESPACE_STR:
  167. end = WHITESPACE.match(s, end + 1).end()
  168. except IndexError:
  169. pass
  170. try:
  171. value, end = scan_once(s, end)
  172. except StopIteration:
  173. raise ValueError(errmsg("Expecting object", s, end))
  174. pairs_append((key, value))
  175. try:
  176. nextchar = s[end]
  177. if nextchar in WHITESPACE_STR:
  178. end = WHITESPACE.match(s, end + 1).end()
  179. nextchar = s[end]
  180. except IndexError:
  181. nextchar = ''
  182. end += 1
  183. if nextchar == '}':
  184. break
  185. elif nextchar != ',':
  186. raise ValueError(errmsg("Expecting , delimiter", s, end - 1))
  187. try:
  188. nextchar = s[end]
  189. if nextchar in WHITESPACE_STR:
  190. end += 1
  191. nextchar = s[end]
  192. if nextchar in WHITESPACE_STR:
  193. end = WHITESPACE.match(s, end + 1).end()
  194. nextchar = s[end]
  195. except IndexError:
  196. nextchar = ''
  197. end += 1
  198. if nextchar != '"':
  199. raise ValueError(errmsg("Expecting property name", s, end - 1))
  200. if object_pairs_hook is not None:
  201. result = object_pairs_hook(pairs)
  202. return result, end
  203. pairs = dict(pairs)
  204. if object_hook is not None:
  205. pairs = object_hook(pairs)
  206. return pairs, end
  207. def JSONArray(s_and_end, scan_once):
  208. s, end = s_and_end
  209. values = []
  210. nextchar = s[end:end + 1]
  211. if nextchar in WHITESPACE_STR:
  212. end = WHITESPACE.match(s, end + 1).end()
  213. nextchar = s[end:end + 1]
  214. # Look-ahead for trivial empty array
  215. if nextchar == ']':
  216. return values, end + 1
  217. _append = values.append
  218. while True:
  219. try:
  220. value, end = scan_once(s, end)
  221. except StopIteration:
  222. raise ValueError(errmsg("Expecting object", s, end))
  223. _append(value)
  224. nextchar = s[end:end + 1]
  225. if nextchar in WHITESPACE_STR:
  226. end = WHITESPACE.match(s, end + 1).end()
  227. nextchar = s[end:end + 1]
  228. end += 1
  229. if nextchar == ']':
  230. break
  231. elif nextchar != ',':
  232. raise ValueError(errmsg("Expecting , delimiter", s, end))
  233. try:
  234. if s[end] in WHITESPACE_STR:
  235. end += 1
  236. if s[end] in WHITESPACE_STR:
  237. end = WHITESPACE.match(s, end + 1).end()
  238. except IndexError:
  239. pass
  240. return values, end
  241. class JSONDecoder(object):
  242. """Simple JSON <http://json.org> decoder
  243. Performs the following translations in decoding by default:
  244. +---------------+-------------------+
  245. | JSON | Python |
  246. +===============+===================+
  247. | object | dict |
  248. +---------------+-------------------+
  249. | array | list |
  250. +---------------+-------------------+
  251. | string | unicode |
  252. +---------------+-------------------+
  253. | number (int) | int, long |
  254. +---------------+-------------------+
  255. | number (real) | float |
  256. +---------------+-------------------+
  257. | true | True |
  258. +---------------+-------------------+
  259. | false | False |
  260. +---------------+-------------------+
  261. | null | None |
  262. +---------------+-------------------+
  263. It also understands ``NaN``, ``Infinity``, and ``-Infinity`` as
  264. their corresponding ``float`` values, which is outside the JSON spec.
  265. """
  266. def __init__(self, encoding=None, object_hook=None, parse_float=None,
  267. parse_int=None, parse_constant=None, strict=True,
  268. object_pairs_hook=None):
  269. """``encoding`` determines the encoding used to interpret any ``str``
  270. objects decoded by this instance (utf-8 by default). It has no
  271. effect when decoding ``unicode`` objects.
  272. Note that currently only encodings that are a superset of ASCII work,
  273. strings of other encodings should be passed in as ``unicode``.
  274. ``object_hook``, if specified, will be called with the result
  275. of every JSON object decoded and its return value will be used in
  276. place of the given ``dict``. This can be used to provide custom
  277. deserializations (e.g. to support JSON-RPC class hinting).
  278. ``object_pairs_hook``, if specified will be called with the result of
  279. every JSON object decoded with an ordered list of pairs. The return
  280. value of ``object_pairs_hook`` will be used instead of the ``dict``.
  281. This feature can be used to implement custom decoders that rely on the
  282. order that the key and value pairs are decoded (for example,
  283. collections.OrderedDict will remember the order of insertion). If
  284. ``object_hook`` is also defined, the ``object_pairs_hook`` takes
  285. priority.
  286. ``parse_float``, if specified, will be called with the string
  287. of every JSON float to be decoded. By default this is equivalent to
  288. float(num_str). This can be used to use another datatype or parser
  289. for JSON floats (e.g. decimal.Decimal).
  290. ``parse_int``, if specified, will be called with the string
  291. of every JSON int to be decoded. By default this is equivalent to
  292. int(num_str). This can be used to use another datatype or parser
  293. for JSON integers (e.g. float).
  294. ``parse_constant``, if specified, will be called with one of the
  295. following strings: -Infinity, Infinity, NaN.
  296. This can be used to raise an exception if invalid JSON numbers
  297. are encountered.
  298. If ``strict`` is false (true is the default), then control
  299. characters will be allowed inside strings. Control characters in
  300. this context are those with character codes in the 0-31 range,
  301. including ``'\\t'`` (tab), ``'\\n'``, ``'\\r'`` and ``'\\0'``.
  302. """
  303. self.encoding = encoding
  304. self.object_hook = object_hook
  305. self.object_pairs_hook = object_pairs_hook
  306. self.parse_float = parse_float or float
  307. self.parse_int = parse_int or int
  308. self.parse_constant = parse_constant or _CONSTANTS.__getitem__
  309. self.strict = strict
  310. self.parse_object = JSONObject
  311. self.parse_array = JSONArray
  312. self.parse_string = scanstring
  313. self.scan_once = scanner.make_scanner(self)
  314. def decode(self, s):
  315. """Return the Python representation of ``s`` (a ``str`` or ``unicode``
  316. instance containing a JSON document)
  317. """
  318. obj, end = self.raw_decode(s, idx=WHITESPACE.match(s, 0).end())
  319. end = WHITESPACE.match(s, end).end()
  320. if end != len(s):
  321. raise ValueError(errmsg("Extra data", s, end, len(s)))
  322. return obj
  323. def raw_decode(self, s, idx=0):
  324. """Decode a JSON document from ``s`` (a ``str`` or ``unicode``
  325. beginning with a JSON document) and return a 2-tuple of the Python
  326. representation and the index in ``s`` where the document ended.
  327. This can be used to decode a JSON document from a string that may
  328. have extraneous data at the end.
  329. """
  330. try:
  331. obj, end = self.scan_once(s, idx)
  332. except StopIteration:
  333. raise ValueError("No JSON object could be decoded")
  334. return obj, end