PageRenderTime 36ms CodeModel.GetById 15ms app.highlight 16ms RepoModel.GetById 2ms app.codeStats 0ms

/Lib/json/encoder.py

http://unladen-swallow.googlecode.com/
Python | 384 lines | 283 code | 19 blank | 82 comment | 14 complexity | 3550688516110f12336a52f12cee7c3c MD5 | raw file
  1"""Implementation of JSONEncoder
  2"""
  3
  4import re
  5import math
  6
  7try:
  8    from _json import encode_basestring_ascii as c_encode_basestring_ascii
  9except ImportError:
 10    c_encode_basestring_ascii = None
 11
 12__all__ = ['JSONEncoder']
 13
 14ESCAPE = re.compile(r'[\x00-\x1f\\"\b\f\n\r\t]')
 15ESCAPE_ASCII = re.compile(r'([\\"]|[^\ -~])')
 16HAS_UTF8 = re.compile(r'[\x80-\xff]')
 17ESCAPE_DCT = {
 18    '\\': '\\\\',
 19    '"': '\\"',
 20    '\b': '\\b',
 21    '\f': '\\f',
 22    '\n': '\\n',
 23    '\r': '\\r',
 24    '\t': '\\t',
 25}
 26for i in range(0x20):
 27    ESCAPE_DCT.setdefault(chr(i), '\\u{0:04x}'.format(i))
 28
 29FLOAT_REPR = repr
 30
 31def floatstr(o, allow_nan=True):
 32    # Check for specials.  Note that this type of test is processor- and/or
 33    # platform-specific, so do tests which don't depend on the internals.
 34
 35    if math.isnan(o):
 36        text = 'NaN'
 37    elif math.isinf(o):
 38        if math.copysign(1., o) == 1.:
 39            text = 'Infinity'
 40        else:
 41            text = '-Infinity'
 42    else:
 43        return FLOAT_REPR(o)
 44
 45    if not allow_nan:
 46        msg = "Out of range float values are not JSON compliant: " + repr(o)
 47        raise ValueError(msg)
 48
 49    return text
 50
 51
 52def encode_basestring(s):
 53    """Return a JSON representation of a Python string
 54
 55    """
 56    def replace(match):
 57        return ESCAPE_DCT[match.group(0)]
 58    return '"' + ESCAPE.sub(replace, s) + '"'
 59
 60
 61def py_encode_basestring_ascii(s):
 62    if isinstance(s, str) and HAS_UTF8.search(s) is not None:
 63        s = s.decode('utf-8')
 64    def replace(match):
 65        s = match.group(0)
 66        try:
 67            return ESCAPE_DCT[s]
 68        except KeyError:
 69            n = ord(s)
 70            if n < 0x10000:
 71                return '\\u{0:04x}'.format(n)
 72            else:
 73                # surrogate pair
 74                n -= 0x10000
 75                s1 = 0xd800 | ((n >> 10) & 0x3ff)
 76                s2 = 0xdc00 | (n & 0x3ff)
 77                return '\\u{0:04x}\\u{1:04x}'.format(s1, s2)
 78    return '"' + str(ESCAPE_ASCII.sub(replace, s)) + '"'
 79
 80
 81if c_encode_basestring_ascii is not None:
 82    encode_basestring_ascii = c_encode_basestring_ascii
 83else:
 84    encode_basestring_ascii = py_encode_basestring_ascii
 85
 86
 87class JSONEncoder(object):
 88    """Extensible JSON <http://json.org> encoder for Python data structures.
 89
 90    Supports the following objects and types by default:
 91
 92    +-------------------+---------------+
 93    | Python            | JSON          |
 94    +===================+===============+
 95    | dict              | object        |
 96    +-------------------+---------------+
 97    | list, tuple       | array         |
 98    +-------------------+---------------+
 99    | str, unicode      | string        |
100    +-------------------+---------------+
101    | int, long, float  | number        |
102    +-------------------+---------------+
103    | True              | true          |
104    +-------------------+---------------+
105    | False             | false         |
106    +-------------------+---------------+
107    | None              | null          |
108    +-------------------+---------------+
109
110    To extend this to recognize other objects, subclass and implement a
111    ``.default()`` method with another method that returns a serializable
112    object for ``o`` if possible, otherwise it should call the superclass
113    implementation (to raise ``TypeError``).
114
115    """
116    __all__ = ['__init__', 'default', 'encode', 'iterencode']
117    item_separator = ', '
118    key_separator = ': '
119    def __init__(self, skipkeys=False, ensure_ascii=True,
120            check_circular=True, allow_nan=True, sort_keys=False,
121            indent=None, separators=None, encoding='utf-8', default=None):
122        """Constructor for JSONEncoder, with sensible defaults.
123
124        If skipkeys is False, then it is a TypeError to attempt
125        encoding of keys that are not str, int, long, float or None.  If
126        skipkeys is True, such items are simply skipped.
127
128        If ensure_ascii is True, the output is guaranteed to be str
129        objects with all incoming unicode characters escaped.  If
130        ensure_ascii is false, the output will be unicode object.
131
132        If check_circular is True, then lists, dicts, and custom encoded
133        objects will be checked for circular references during encoding to
134        prevent an infinite recursion (which would cause an OverflowError).
135        Otherwise, no such check takes place.
136
137        If allow_nan is True, then NaN, Infinity, and -Infinity will be
138        encoded as such.  This behavior is not JSON specification compliant,
139        but is consistent with most JavaScript based encoders and decoders.
140        Otherwise, it will be a ValueError to encode such floats.
141
142        If sort_keys is True, then the output of dictionaries will be
143        sorted by key; this is useful for regression tests to ensure
144        that JSON serializations can be compared on a day-to-day basis.
145
146        If indent is a non-negative integer, then JSON array
147        elements and object members will be pretty-printed with that
148        indent level.  An indent level of 0 will only insert newlines.
149        None is the most compact representation.
150
151        If specified, separators should be a (item_separator, key_separator)
152        tuple.  The default is (', ', ': ').  To get the most compact JSON
153        representation you should specify (',', ':') to eliminate whitespace.
154
155        If specified, default is a function that gets called for objects
156        that can't otherwise be serialized.  It should return a JSON encodable
157        version of the object or raise a ``TypeError``.
158
159        If encoding is not None, then all input strings will be
160        transformed into unicode using that encoding prior to JSON-encoding.
161        The default is UTF-8.
162
163        """
164        self.skipkeys = skipkeys
165        self.ensure_ascii = ensure_ascii
166        self.check_circular = check_circular
167        self.allow_nan = allow_nan
168        self.sort_keys = sort_keys
169        self.indent = indent
170        self.current_indent_level = 0
171        if separators is not None:
172            self.item_separator, self.key_separator = separators
173        if default is not None:
174            self.default = default
175        self.encoding = encoding
176
177    def _newline_indent(self):
178        return '\n' + (' ' * (self.indent * self.current_indent_level))
179
180    def _iterencode_list(self, lst, markers=None):
181        if not lst:
182            yield '[]'
183            return
184        if markers is not None:
185            markerid = id(lst)
186            if markerid in markers:
187                raise ValueError("Circular reference detected")
188            markers[markerid] = lst
189        yield '['
190        if self.indent is not None:
191            self.current_indent_level += 1
192            newline_indent = self._newline_indent()
193            separator = self.item_separator + newline_indent
194            yield newline_indent
195        else:
196            newline_indent = None
197            separator = self.item_separator
198        first = True
199        for value in lst:
200            if first:
201                first = False
202            else:
203                yield separator
204            for chunk in self._iterencode(value, markers):
205                yield chunk
206        if newline_indent is not None:
207            self.current_indent_level -= 1
208            yield self._newline_indent()
209        yield ']'
210        if markers is not None:
211            del markers[markerid]
212
213    def _iterencode_dict(self, dct, markers=None):
214        if not dct:
215            yield '{}'
216            return
217        if markers is not None:
218            markerid = id(dct)
219            if markerid in markers:
220                raise ValueError("Circular reference detected")
221            markers[markerid] = dct
222        yield '{'
223        key_separator = self.key_separator
224        if self.indent is not None:
225            self.current_indent_level += 1
226            newline_indent = self._newline_indent()
227            item_separator = self.item_separator + newline_indent
228            yield newline_indent
229        else:
230            newline_indent = None
231            item_separator = self.item_separator
232        first = True
233        if self.ensure_ascii:
234            encoder = encode_basestring_ascii
235        else:
236            encoder = encode_basestring
237        allow_nan = self.allow_nan
238        if self.sort_keys:
239            keys = dct.keys()
240            keys.sort()
241            items = [(k, dct[k]) for k in keys]
242        else:
243            items = dct.iteritems()
244        _encoding = self.encoding
245        _do_decode = (_encoding is not None
246            and not (_encoding == 'utf-8'))
247        for key, value in items:
248            if isinstance(key, str):
249                if _do_decode:
250                    key = key.decode(_encoding)
251            elif isinstance(key, basestring):
252                pass
253            # JavaScript is weakly typed for these, so it makes sense to
254            # also allow them.  Many encoders seem to do something like this.
255            elif isinstance(key, float):
256                key = floatstr(key, allow_nan)
257            elif isinstance(key, (int, long)):
258                key = str(key)
259            elif key is True:
260                key = 'true'
261            elif key is False:
262                key = 'false'
263            elif key is None:
264                key = 'null'
265            elif self.skipkeys:
266                continue
267            else:
268                raise TypeError("key {0!r} is not a string".format(key))
269            if first:
270                first = False
271            else:
272                yield item_separator
273            yield encoder(key)
274            yield key_separator
275            for chunk in self._iterencode(value, markers):
276                yield chunk
277        if newline_indent is not None:
278            self.current_indent_level -= 1
279            yield self._newline_indent()
280        yield '}'
281        if markers is not None:
282            del markers[markerid]
283
284    def _iterencode(self, o, markers=None):
285        if isinstance(o, basestring):
286            if self.ensure_ascii:
287                encoder = encode_basestring_ascii
288            else:
289                encoder = encode_basestring
290            _encoding = self.encoding
291            if (_encoding is not None and isinstance(o, str)
292                    and not (_encoding == 'utf-8')):
293                o = o.decode(_encoding)
294            yield encoder(o)
295        elif o is None:
296            yield 'null'
297        elif o is True:
298            yield 'true'
299        elif o is False:
300            yield 'false'
301        elif isinstance(o, (int, long)):
302            yield str(o)
303        elif isinstance(o, float):
304            yield floatstr(o, self.allow_nan)
305        elif isinstance(o, (list, tuple)):
306            for chunk in self._iterencode_list(o, markers):
307                yield chunk
308        elif isinstance(o, dict):
309            for chunk in self._iterencode_dict(o, markers):
310                yield chunk
311        else:
312            if markers is not None:
313                markerid = id(o)
314                if markerid in markers:
315                    raise ValueError("Circular reference detected")
316                markers[markerid] = o
317            for chunk in self._iterencode_default(o, markers):
318                yield chunk
319            if markers is not None:
320                del markers[markerid]
321
322    def _iterencode_default(self, o, markers=None):
323        newobj = self.default(o)
324        return self._iterencode(newobj, markers)
325
326    def default(self, o):
327        """Implement this method in a subclass such that it returns a serializable
328        object for ``o``, or calls the base implementation (to raise a
329        ``TypeError``).
330
331        For example, to support arbitrary iterators, you could implement
332        default like this::
333
334            def default(self, o):
335                try:
336                    iterable = iter(o)
337                except TypeError:
338                    pass
339                else:
340                    return list(iterable)
341                return JSONEncoder.default(self, o)
342
343        """
344        raise TypeError(repr(o) + " is not JSON serializable")
345
346    def encode(self, o):
347        """Return a JSON string representation of a Python data structure.
348
349        >>> JSONEncoder().encode({"foo": ["bar", "baz"]})
350        '{"foo": ["bar", "baz"]}'
351
352        """
353        # This is for extremely simple cases and benchmarks.
354        if isinstance(o, basestring):
355            if isinstance(o, str):
356                _encoding = self.encoding
357                if (_encoding is not None
358                        and not (_encoding == 'utf-8')):
359                    o = o.decode(_encoding)
360            if self.ensure_ascii:
361                return encode_basestring_ascii(o)
362            else:
363                return encode_basestring(o)
364        # This doesn't pass the iterator directly to ''.join() because the
365        # exceptions aren't as detailed.  The list call should be roughly
366        # equivalent to the PySequence_Fast that ''.join() would do.
367        chunks = list(self.iterencode(o))
368        return ''.join(chunks)
369
370    def iterencode(self, o):
371        """Encode the given object and yield each string representation as
372        available.
373
374        For example::
375
376            for chunk in JSONEncoder().iterencode(bigobject):
377                mysocket.write(chunk)
378
379        """
380        if self.check_circular:
381            markers = {}
382        else:
383            markers = None
384        return self._iterencode(o, markers)