PageRenderTime 16ms CodeModel.GetById 1ms app.highlight 12ms RepoModel.GetById 1ms app.codeStats 0ms

/examples/jsonrpc/public/services/simplejson/encoder.py

http://pyjamas.googlecode.com/
Python | 307 lines | 225 code | 13 blank | 69 comment | 6 complexity | 4ac8f59b7794e6a4a5dfb13146a80107 MD5 | raw file
  1"""
  2Implementation of JSONEncoder
  3"""
  4import re
  5
  6ESCAPE = re.compile(r'[\x00-\x19\\"\b\f\n\r\t]')
  7ESCAPE_ASCII = re.compile(r'([\\"/]|[^\ -~])')
  8ESCAPE_DCT = {
  9    # escape all forward slashes to prevent </script> attack
 10    '/': '\\/',
 11    '\\': '\\\\',
 12    '"': '\\"',
 13    '\b': '\\b',
 14    '\f': '\\f',
 15    '\n': '\\n',
 16    '\r': '\\r',
 17    '\t': '\\t',
 18}
 19for i in range(20):
 20    ESCAPE_DCT.setdefault(chr(i), '\\u%04x' % (i,))
 21
 22def floatstr(o, allow_nan=True):
 23    # Check for specials.  Note that this type of test is processor- and/or
 24    # platform-specific, so do tests which don't depend on the internals.
 25
 26    # assume this produces an infinity on all machines (probably not guaranteed)
 27    INFINITY = 1e66666
 28
 29    if o != o:
 30        text = 'NaN'
 31    elif o == INFINITY:
 32        text = 'Infinity'
 33    elif o == -INFINITY:
 34        text = '-Infinity'
 35    else:
 36        return str(o)
 37
 38    if not allow_nan:
 39        raise ValueError("Out of range float values are not JSON compliant: %r"
 40            % (o,))
 41
 42    return text
 43
 44
 45def encode_basestring(s):
 46    """
 47    Return a JSON representation of a Python string
 48    """
 49    def replace(match):
 50        return ESCAPE_DCT[match.group(0)]
 51    return '"' + ESCAPE.sub(replace, s) + '"'
 52
 53def encode_basestring_ascii(s):
 54    def replace(match):
 55        s = match.group(0)
 56        try:
 57            return ESCAPE_DCT[s]
 58        except KeyError:
 59            return '\\u%04x' % (ord(s),)
 60    return '"' + str(ESCAPE_ASCII.sub(replace, s)) + '"'
 61        
 62
 63class JSONEncoder(object):
 64    """
 65    Extensible JSON <http://json.org> encoder for Python data structures.
 66
 67    Supports the following objects and types by default:
 68    
 69    +-------------------+---------------+
 70    | Python            | JSON          |
 71    +===================+===============+
 72    | dict              | object        |
 73    +-------------------+---------------+
 74    | list, tuple       | array         |
 75    +-------------------+---------------+
 76    | str, unicode      | string        |
 77    +-------------------+---------------+
 78    | int, long, float  | number        |
 79    +-------------------+---------------+
 80    | True              | true          |
 81    +-------------------+---------------+
 82    | False             | false         |
 83    +-------------------+---------------+
 84    | None              | null          |
 85    +-------------------+---------------+
 86
 87    To extend this to recognize other objects, subclass and implement a
 88    ``.default()`` method with another method that returns a serializable
 89    object for ``o`` if possible, otherwise it should call the superclass
 90    implementation (to raise ``TypeError``).
 91    """
 92    __all__ = ['__init__', 'default', 'encode', 'iterencode']
 93    def __init__(self, skipkeys=False, ensure_ascii=True,
 94            check_circular=True, allow_nan=True, sort_keys=False, indent=None):
 95        """
 96        Constructor for JSONEncoder, with sensible defaults.
 97
 98        If skipkeys is False, then it is a TypeError to attempt
 99        encoding of keys that are not str, int, long, float or None.  If
100        skipkeys is True, such items are simply skipped.
101
102        If ensure_ascii is True, the output is guaranteed to be str
103        objects with all incoming unicode characters escaped.  If
104        ensure_ascii is false, the output will be unicode object.
105
106        If check_circular is True, then lists, dicts, and custom encoded
107        objects will be checked for circular references during encoding to
108        prevent an infinite recursion (which would cause an OverflowError).
109        Otherwise, no such check takes place.
110
111        If allow_nan is True, then NaN, Infinity, and -Infinity will be
112        encoded as such.  This behavior is not JSON specification compliant,
113        but is consistent with most JavaScript based encoders and decoders.
114        Otherwise, it will be a ValueError to encode such floats.
115
116        If sort_keys is True, then the output of dictionaries will be
117        sorted by key; this is useful for regression tests to ensure
118        that JSON serializations can be compared on a day-to-day basis.
119
120        If ``indent`` is a non-negative integer, then JSON array
121        elements and object members will be pretty-printed with that
122        indent level.  An indent level of 0 will only insert newlines.
123        ``None`` is the most compact representation.
124        """
125
126        self.skipkeys = skipkeys
127        self.ensure_ascii = ensure_ascii
128        self.check_circular = check_circular
129        self.allow_nan = allow_nan
130        self.sort_keys = sort_keys
131        self.indent = indent
132        self.current_indent_level = 0
133
134    def _newline_indent(self):
135        if self.indent is None:
136            return ''
137        return '\n' + (' ' * (self.indent * self.current_indent_level))
138
139    def _iterencode_list(self, lst, markers=None):
140        if not lst:
141            yield '[]'
142            return
143        if markers is not None:
144            markerid = id(lst)
145            if markerid in markers:
146                raise ValueError("Circular reference detected")
147            markers[markerid] = lst
148        self.current_indent_level += 1
149        newline_indent = self._newline_indent()
150        yield '[' + newline_indent
151        first = True
152        for value in lst:
153            if first:
154                first = False
155            else:
156                yield ', ' + newline_indent
157            for chunk in self._iterencode(value, markers):
158                yield chunk
159        self.current_indent_level -= 1
160        yield self._newline_indent() + ']'
161        if markers is not None:
162            del markers[markerid]
163
164    def _iterencode_dict(self, dct, markers=None):
165        if not dct:
166            yield '{}'
167            return
168        if markers is not None:
169            markerid = id(dct)
170            if markerid in markers:
171                raise ValueError("Circular reference detected")
172            markers[markerid] = dct
173        self.current_indent_level += 1
174        newline_indent = self._newline_indent()
175        yield '{' + newline_indent
176        first = True
177        if self.ensure_ascii:
178            encoder = encode_basestring_ascii
179        else:
180            encoder = encode_basestring
181        allow_nan = self.allow_nan
182        if self.sort_keys:
183            keys = dct.keys()
184            keys.sort()
185            items = [(k,dct[k]) for k in keys]
186        else:
187            items = dct.iteritems()
188        for key, value in items:
189            if isinstance(key, basestring):
190                pass
191            # JavaScript is weakly typed for these, so it makes sense to
192            # also allow them.  Many encoders seem to do something like this.
193            elif isinstance(key, float):
194                key = floatstr(key, allow_nan)
195            elif isinstance(key, (int, long)):
196                key = str(key)
197            elif key is True:
198                key = 'true'
199            elif key is False:
200                key = 'false'
201            elif key is None:
202                key = 'null'
203            elif self.skipkeys:
204                continue
205            else:
206                raise TypeError("key %r is not a string" % (key,))
207            if first:
208                first = False
209            else:
210                yield ', ' + newline_indent
211            yield encoder(key)
212            yield ': '
213            for chunk in self._iterencode(value, markers):
214                yield chunk
215        self.current_indent_level -= 1
216        yield self._newline_indent() + '}'
217        if markers is not None:
218            del markers[markerid]
219
220    def _iterencode(self, o, markers=None):
221        if isinstance(o, basestring):
222            if self.ensure_ascii:
223                encoder = encode_basestring_ascii
224            else:
225                encoder = encode_basestring
226            yield encoder(o)
227        elif o is None:
228            yield 'null'
229        elif o is True:
230            yield 'true'
231        elif o is False:
232            yield 'false'
233        elif isinstance(o, (int, long)):
234            yield str(o)
235        elif isinstance(o, float):
236            yield floatstr(o, self.allow_nan)
237        elif isinstance(o, (list, tuple)):
238            for chunk in self._iterencode_list(o, markers):
239                yield chunk
240        elif isinstance(o, dict):
241            for chunk in self._iterencode_dict(o, markers):
242                yield chunk
243        else:
244            if markers is not None:
245                markerid = id(o)
246                if markerid in markers:
247                    raise ValueError("Circular reference detected")
248                markers[markerid] = o
249            for chunk in self._iterencode_default(o, markers):
250                yield chunk
251            if markers is not None:
252                del markers[markerid]
253
254    def _iterencode_default(self, o, markers=None):
255        newobj = self.default(o)
256        return self._iterencode(newobj, markers)
257
258    def default(self, o):
259        """
260        Implement this method in a subclass such that it returns
261        a serializable object for ``o``, or calls the base implementation
262        (to raise a ``TypeError``).
263
264        For example, to support arbitrary iterators, you could
265        implement default like this::
266            
267            def default(self, o):
268                try:
269                    iterable = iter(o)
270                except TypeError:
271                    pass
272                else:
273                    return list(iterable)
274                return JSONEncoder.default(self, o)
275        """
276        raise TypeError("%r is not JSON serializable" % (o,))
277
278    def encode(self, o):
279        """
280        Return a JSON string representation of a Python data structure.
281
282        >>> JSONEncoder().encode({"foo": ["bar", "baz"]})
283        '{"foo":["bar", "baz"]}'
284        """
285        # This doesn't pass the iterator directly to ''.join() because it
286        # sucks at reporting exceptions.  It's going to do this internally
287        # anyway because it uses PySequence_Fast or similar.
288        chunks = list(self.iterencode(o))
289        return ''.join(chunks)
290
291    def iterencode(self, o):
292        """
293        Encode the given object and yield each string
294        representation as available.
295        
296        For example::
297            
298            for chunk in JSONEncoder().iterencode(bigobject):
299                mysocket.write(chunk)
300        """
301        if self.check_circular:
302            markers = {}
303        else:
304            markers = None
305        return self._iterencode(o, markers)
306
307__all__ = ['JSONEncoder']