blob: 4b214eb60e82130cb24d14f097b9c8f4896762cf [file] [log] [blame]
Christian Heimes90540002008-05-08 14:29:10 +00001"""Implementation of JSONEncoder
2"""
Christian Heimes90540002008-05-08 14:29:10 +00003import re
4
5try:
6 from _json import encode_basestring_ascii as c_encode_basestring_ascii
7except ImportError:
8 c_encode_basestring_ascii = None
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00009try:
10 from _json import make_encoder as c_make_encoder
11except ImportError:
12 c_make_encoder = None
Christian Heimes90540002008-05-08 14:29:10 +000013
14ESCAPE = re.compile(r'[\x00-\x1f\\"\b\f\n\r\t]')
15ESCAPE_ASCII = re.compile(r'([\\"]|[^\ -~])')
Benjamin Petersonc6b607d2009-05-02 12:36:44 +000016HAS_UTF8 = re.compile(b'[\x80-\xff]')
Christian Heimes90540002008-05-08 14:29:10 +000017ESCAPE_DCT = {
18 '\\': '\\\\',
19 '"': '\\"',
20 '\b': '\\b',
21 '\f': '\\f',
22 '\n': '\\n',
23 '\r': '\\r',
24 '\t': '\\t',
25}
26for i in range(0x20):
27 ESCAPE_DCT.setdefault(chr(i), '\\u{0:04x}'.format(i))
Benjamin Petersonc6b607d2009-05-02 12:36:44 +000028 #ESCAPE_DCT.setdefault(chr(i), '\\u%04x' % (i,))
Christian Heimes90540002008-05-08 14:29:10 +000029
Benjamin Petersonc6b607d2009-05-02 12:36:44 +000030# Assume this produces an infinity on all machines (probably not guaranteed)
31INFINITY = float('1e66666')
Christian Heimes90540002008-05-08 14:29:10 +000032FLOAT_REPR = repr
33
Christian Heimes90540002008-05-08 14:29:10 +000034def encode_basestring(s):
35 """Return a JSON representation of a Python string
36
37 """
38 def replace(match):
39 return ESCAPE_DCT[match.group(0)]
40 return '"' + ESCAPE.sub(replace, s) + '"'
41
42
43def py_encode_basestring_ascii(s):
Benjamin Petersonc6b607d2009-05-02 12:36:44 +000044 """Return an ASCII-only JSON representation of a Python string
45
46 """
Christian Heimes90540002008-05-08 14:29:10 +000047 def replace(match):
48 s = match.group(0)
49 try:
50 return ESCAPE_DCT[s]
51 except KeyError:
52 n = ord(s)
53 if n < 0x10000:
54 return '\\u{0:04x}'.format(n)
Benjamin Petersonc6b607d2009-05-02 12:36:44 +000055 #return '\\u%04x' % (n,)
Christian Heimes90540002008-05-08 14:29:10 +000056 else:
57 # surrogate pair
58 n -= 0x10000
59 s1 = 0xd800 | ((n >> 10) & 0x3ff)
60 s2 = 0xdc00 | (n & 0x3ff)
61 return '\\u{0:04x}\\u{1:04x}'.format(s1, s2)
Benjamin Petersonc6b607d2009-05-02 12:36:44 +000062 return '"' + ESCAPE_ASCII.sub(replace, s) + '"'
Christian Heimes90540002008-05-08 14:29:10 +000063
64
Benjamin Petersonc6b607d2009-05-02 12:36:44 +000065encode_basestring_ascii = (
66 c_encode_basestring_ascii or py_encode_basestring_ascii)
Christian Heimes90540002008-05-08 14:29:10 +000067
68class JSONEncoder(object):
69 """Extensible JSON <http://json.org> encoder for Python data structures.
70
71 Supports the following objects and types by default:
72
73 +-------------------+---------------+
74 | Python | JSON |
75 +===================+===============+
76 | dict | object |
77 +-------------------+---------------+
78 | list, tuple | array |
79 +-------------------+---------------+
Georg Brandlc8284cf2010-08-02 20:16:18 +000080 | str | string |
Christian Heimes90540002008-05-08 14:29:10 +000081 +-------------------+---------------+
Georg Brandlc8284cf2010-08-02 20:16:18 +000082 | int, float | number |
Christian Heimes90540002008-05-08 14:29:10 +000083 +-------------------+---------------+
84 | True | true |
85 +-------------------+---------------+
86 | False | false |
87 +-------------------+---------------+
88 | None | null |
89 +-------------------+---------------+
90
91 To extend this to recognize other objects, subclass and implement a
92 ``.default()`` method with another method that returns a serializable
93 object for ``o`` if possible, otherwise it should call the superclass
94 implementation (to raise ``TypeError``).
95
96 """
Christian Heimes90540002008-05-08 14:29:10 +000097 item_separator = ', '
98 key_separator = ': '
99 def __init__(self, skipkeys=False, ensure_ascii=True,
100 check_circular=True, allow_nan=True, sort_keys=False,
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000101 indent=None, separators=None, default=None):
Christian Heimes90540002008-05-08 14:29:10 +0000102 """Constructor for JSONEncoder, with sensible defaults.
103
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000104 If skipkeys is false, then it is a TypeError to attempt
Georg Brandlc8284cf2010-08-02 20:16:18 +0000105 encoding of keys that are not str, int, float or None. If
Christian Heimes90540002008-05-08 14:29:10 +0000106 skipkeys is True, such items are simply skipped.
107
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000108 If ensure_ascii is true, the output is guaranteed to be str
Georg Brandlc8284cf2010-08-02 20:16:18 +0000109 objects with all incoming non-ASCII characters escaped. If
110 ensure_ascii is false, the output can contain non-ASCII characters.
Christian Heimes90540002008-05-08 14:29:10 +0000111
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000112 If check_circular is true, then lists, dicts, and custom encoded
Christian Heimes90540002008-05-08 14:29:10 +0000113 objects will be checked for circular references during encoding to
114 prevent an infinite recursion (which would cause an OverflowError).
115 Otherwise, no such check takes place.
116
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000117 If allow_nan is true, then NaN, Infinity, and -Infinity will be
Christian Heimes90540002008-05-08 14:29:10 +0000118 encoded as such. This behavior is not JSON specification compliant,
119 but is consistent with most JavaScript based encoders and decoders.
120 Otherwise, it will be a ValueError to encode such floats.
121
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000122 If sort_keys is true, then the output of dictionaries will be
Christian Heimes90540002008-05-08 14:29:10 +0000123 sorted by key; this is useful for regression tests to ensure
124 that JSON serializations can be compared on a day-to-day basis.
125
126 If indent is a non-negative integer, then JSON array
127 elements and object members will be pretty-printed with that
128 indent level. An indent level of 0 will only insert newlines.
129 None is the most compact representation.
130
131 If specified, separators should be a (item_separator, key_separator)
132 tuple. The default is (', ', ': '). To get the most compact JSON
133 representation you should specify (',', ':') to eliminate whitespace.
134
135 If specified, default is a function that gets called for objects
136 that can't otherwise be serialized. It should return a JSON encodable
137 version of the object or raise a ``TypeError``.
138
Christian Heimes90540002008-05-08 14:29:10 +0000139 """
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000140
Christian Heimes90540002008-05-08 14:29:10 +0000141 self.skipkeys = skipkeys
142 self.ensure_ascii = ensure_ascii
143 self.check_circular = check_circular
144 self.allow_nan = allow_nan
145 self.sort_keys = sort_keys
146 self.indent = indent
Christian Heimes90540002008-05-08 14:29:10 +0000147 if separators is not None:
148 self.item_separator, self.key_separator = separators
149 if default is not None:
150 self.default = default
Christian Heimes90540002008-05-08 14:29:10 +0000151
152 def default(self, o):
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000153 """Implement this method in a subclass such that it returns
154 a serializable object for ``o``, or calls the base implementation
155 (to raise a ``TypeError``).
Christian Heimes90540002008-05-08 14:29:10 +0000156
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000157 For example, to support arbitrary iterators, you could
158 implement default like this::
Christian Heimes90540002008-05-08 14:29:10 +0000159
160 def default(self, o):
161 try:
162 iterable = iter(o)
163 except TypeError:
164 pass
165 else:
166 return list(iterable)
167 return JSONEncoder.default(self, o)
168
169 """
170 raise TypeError(repr(o) + " is not JSON serializable")
171
172 def encode(self, o):
173 """Return a JSON string representation of a Python data structure.
174
175 >>> JSONEncoder().encode({"foo": ["bar", "baz"]})
176 '{"foo": ["bar", "baz"]}'
177
178 """
179 # This is for extremely simple cases and benchmarks.
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000180 if isinstance(o, str):
Christian Heimes90540002008-05-08 14:29:10 +0000181 if self.ensure_ascii:
182 return encode_basestring_ascii(o)
183 else:
184 return encode_basestring(o)
185 # This doesn't pass the iterator directly to ''.join() because the
186 # exceptions aren't as detailed. The list call should be roughly
187 # equivalent to the PySequence_Fast that ''.join() would do.
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000188 chunks = self.iterencode(o, _one_shot=True)
189 if not isinstance(chunks, (list, tuple)):
190 chunks = list(chunks)
Christian Heimes90540002008-05-08 14:29:10 +0000191 return ''.join(chunks)
192
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000193 def iterencode(self, o, _one_shot=False):
194 """Encode the given object and yield each string
195 representation as available.
Christian Heimes90540002008-05-08 14:29:10 +0000196
197 For example::
198
199 for chunk in JSONEncoder().iterencode(bigobject):
200 mysocket.write(chunk)
201
202 """
203 if self.check_circular:
204 markers = {}
205 else:
206 markers = None
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000207 if self.ensure_ascii:
208 _encoder = encode_basestring_ascii
209 else:
210 _encoder = encode_basestring
211
212 def floatstr(o, allow_nan=self.allow_nan,
213 _repr=FLOAT_REPR, _inf=INFINITY, _neginf=-INFINITY):
214 # Check for specials. Note that this type of test is processor
215 # and/or platform-specific, so do tests which don't depend on the
216 # internals.
217
218 if o != o:
219 text = 'NaN'
220 elif o == _inf:
221 text = 'Infinity'
222 elif o == _neginf:
223 text = '-Infinity'
224 else:
225 return _repr(o)
226
227 if not allow_nan:
228 raise ValueError(
229 "Out of range float values are not JSON compliant: " +
230 repr(o))
231
232 return text
233
234
235 if (_one_shot and c_make_encoder is not None
R David Murray3dd02d62011-04-12 21:02:45 -0400236 and self.indent is None):
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000237 _iterencode = c_make_encoder(
238 markers, self.default, _encoder, self.indent,
239 self.key_separator, self.item_separator, self.sort_keys,
240 self.skipkeys, self.allow_nan)
241 else:
242 _iterencode = _make_iterencode(
243 markers, self.default, _encoder, self.indent, floatstr,
244 self.key_separator, self.item_separator, self.sort_keys,
245 self.skipkeys, _one_shot)
246 return _iterencode(o, 0)
247
248def _make_iterencode(markers, _default, _encoder, _indent, _floatstr,
249 _key_separator, _item_separator, _sort_keys, _skipkeys, _one_shot,
250 ## HACK: hand-optimized bytecode; turn globals into locals
251 ValueError=ValueError,
252 dict=dict,
253 float=float,
254 id=id,
255 int=int,
256 isinstance=isinstance,
257 list=list,
258 str=str,
259 tuple=tuple,
260 ):
261
Raymond Hettingerb643ef82010-10-31 08:00:16 +0000262 if _indent is not None and not isinstance(_indent, str):
263 _indent = ' ' * _indent
264
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000265 def _iterencode_list(lst, _current_indent_level):
266 if not lst:
267 yield '[]'
268 return
269 if markers is not None:
270 markerid = id(lst)
271 if markerid in markers:
272 raise ValueError("Circular reference detected")
273 markers[markerid] = lst
274 buf = '['
275 if _indent is not None:
276 _current_indent_level += 1
Raymond Hettingerb643ef82010-10-31 08:00:16 +0000277 newline_indent = '\n' + _indent * _current_indent_level
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000278 separator = _item_separator + newline_indent
279 buf += newline_indent
280 else:
281 newline_indent = None
282 separator = _item_separator
283 first = True
284 for value in lst:
285 if first:
286 first = False
287 else:
288 buf = separator
289 if isinstance(value, str):
290 yield buf + _encoder(value)
291 elif value is None:
292 yield buf + 'null'
293 elif value is True:
294 yield buf + 'true'
295 elif value is False:
296 yield buf + 'false'
297 elif isinstance(value, int):
298 yield buf + str(value)
299 elif isinstance(value, float):
300 yield buf + _floatstr(value)
301 else:
302 yield buf
303 if isinstance(value, (list, tuple)):
304 chunks = _iterencode_list(value, _current_indent_level)
305 elif isinstance(value, dict):
306 chunks = _iterencode_dict(value, _current_indent_level)
307 else:
308 chunks = _iterencode(value, _current_indent_level)
309 for chunk in chunks:
310 yield chunk
311 if newline_indent is not None:
312 _current_indent_level -= 1
Raymond Hettingerb643ef82010-10-31 08:00:16 +0000313 yield '\n' + _indent * _current_indent_level
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000314 yield ']'
315 if markers is not None:
316 del markers[markerid]
317
318 def _iterencode_dict(dct, _current_indent_level):
319 if not dct:
320 yield '{}'
321 return
322 if markers is not None:
323 markerid = id(dct)
324 if markerid in markers:
325 raise ValueError("Circular reference detected")
326 markers[markerid] = dct
327 yield '{'
328 if _indent is not None:
329 _current_indent_level += 1
Raymond Hettingerb643ef82010-10-31 08:00:16 +0000330 newline_indent = '\n' + _indent * _current_indent_level
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000331 item_separator = _item_separator + newline_indent
332 yield newline_indent
333 else:
334 newline_indent = None
335 item_separator = _item_separator
336 first = True
337 if _sort_keys:
338 items = sorted(dct.items(), key=lambda kv: kv[0])
339 else:
340 items = dct.items()
341 for key, value in items:
342 if isinstance(key, str):
343 pass
344 # JavaScript is weakly typed for these, so it makes sense to
345 # also allow them. Many encoders seem to do something like this.
346 elif isinstance(key, float):
347 key = _floatstr(key)
348 elif key is True:
349 key = 'true'
350 elif key is False:
351 key = 'false'
352 elif key is None:
353 key = 'null'
354 elif isinstance(key, int):
355 key = str(key)
356 elif _skipkeys:
357 continue
358 else:
359 raise TypeError("key " + repr(key) + " is not a string")
360 if first:
361 first = False
362 else:
363 yield item_separator
364 yield _encoder(key)
365 yield _key_separator
366 if isinstance(value, str):
367 yield _encoder(value)
368 elif value is None:
369 yield 'null'
370 elif value is True:
371 yield 'true'
372 elif value is False:
373 yield 'false'
374 elif isinstance(value, int):
375 yield str(value)
376 elif isinstance(value, float):
377 yield _floatstr(value)
378 else:
379 if isinstance(value, (list, tuple)):
380 chunks = _iterencode_list(value, _current_indent_level)
381 elif isinstance(value, dict):
382 chunks = _iterencode_dict(value, _current_indent_level)
383 else:
384 chunks = _iterencode(value, _current_indent_level)
385 for chunk in chunks:
386 yield chunk
387 if newline_indent is not None:
388 _current_indent_level -= 1
Raymond Hettingerb643ef82010-10-31 08:00:16 +0000389 yield '\n' + _indent * _current_indent_level
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000390 yield '}'
391 if markers is not None:
392 del markers[markerid]
393
394 def _iterencode(o, _current_indent_level):
395 if isinstance(o, str):
396 yield _encoder(o)
397 elif o is None:
398 yield 'null'
399 elif o is True:
400 yield 'true'
401 elif o is False:
402 yield 'false'
Florent Xicluna02ea12b22010-07-28 16:39:41 +0000403 elif isinstance(o, int):
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000404 yield str(o)
405 elif isinstance(o, float):
406 yield _floatstr(o)
407 elif isinstance(o, (list, tuple)):
408 for chunk in _iterencode_list(o, _current_indent_level):
409 yield chunk
410 elif isinstance(o, dict):
411 for chunk in _iterencode_dict(o, _current_indent_level):
412 yield chunk
413 else:
414 if markers is not None:
415 markerid = id(o)
416 if markerid in markers:
417 raise ValueError("Circular reference detected")
418 markers[markerid] = o
419 o = _default(o)
420 for chunk in _iterencode(o, _current_indent_level):
421 yield chunk
422 if markers is not None:
423 del markers[markerid]
424 return _iterencode