blob: 13359856d29057ba9690550a3ee2e51f2198239a [file] [log] [blame]
Christian Heimes90540002008-05-08 14:29:10 +00001"""Implementation of JSONEncoder
2"""
Christian Heimes90540002008-05-08 14:29:10 +00003import re
4
5try:
6 from _json import encode_basestring_ascii as c_encode_basestring_ascii
7except ImportError:
8 c_encode_basestring_ascii = None
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00009try:
10 from _json import make_encoder as c_make_encoder
11except ImportError:
12 c_make_encoder = None
Christian Heimes90540002008-05-08 14:29:10 +000013
14ESCAPE = re.compile(r'[\x00-\x1f\\"\b\f\n\r\t]')
15ESCAPE_ASCII = re.compile(r'([\\"]|[^\ -~])')
Benjamin Petersonc6b607d2009-05-02 12:36:44 +000016HAS_UTF8 = re.compile(b'[\x80-\xff]')
Christian Heimes90540002008-05-08 14:29:10 +000017ESCAPE_DCT = {
18 '\\': '\\\\',
19 '"': '\\"',
20 '\b': '\\b',
21 '\f': '\\f',
22 '\n': '\\n',
23 '\r': '\\r',
24 '\t': '\\t',
25}
26for i in range(0x20):
27 ESCAPE_DCT.setdefault(chr(i), '\\u{0:04x}'.format(i))
Benjamin Petersonc6b607d2009-05-02 12:36:44 +000028 #ESCAPE_DCT.setdefault(chr(i), '\\u%04x' % (i,))
Christian Heimes90540002008-05-08 14:29:10 +000029
Benjamin Petersonc6b607d2009-05-02 12:36:44 +000030# Assume this produces an infinity on all machines (probably not guaranteed)
31INFINITY = float('1e66666')
Christian Heimes90540002008-05-08 14:29:10 +000032FLOAT_REPR = repr
33
Christian Heimes90540002008-05-08 14:29:10 +000034def encode_basestring(s):
35 """Return a JSON representation of a Python string
36
37 """
38 def replace(match):
39 return ESCAPE_DCT[match.group(0)]
40 return '"' + ESCAPE.sub(replace, s) + '"'
41
42
43def py_encode_basestring_ascii(s):
Benjamin Petersonc6b607d2009-05-02 12:36:44 +000044 """Return an ASCII-only JSON representation of a Python string
45
46 """
Christian Heimes90540002008-05-08 14:29:10 +000047 def replace(match):
48 s = match.group(0)
49 try:
50 return ESCAPE_DCT[s]
51 except KeyError:
52 n = ord(s)
53 if n < 0x10000:
54 return '\\u{0:04x}'.format(n)
Benjamin Petersonc6b607d2009-05-02 12:36:44 +000055 #return '\\u%04x' % (n,)
Christian Heimes90540002008-05-08 14:29:10 +000056 else:
57 # surrogate pair
58 n -= 0x10000
59 s1 = 0xd800 | ((n >> 10) & 0x3ff)
60 s2 = 0xdc00 | (n & 0x3ff)
61 return '\\u{0:04x}\\u{1:04x}'.format(s1, s2)
Benjamin Petersonc6b607d2009-05-02 12:36:44 +000062 return '"' + ESCAPE_ASCII.sub(replace, s) + '"'
Christian Heimes90540002008-05-08 14:29:10 +000063
64
Benjamin Petersonc6b607d2009-05-02 12:36:44 +000065encode_basestring_ascii = (
66 c_encode_basestring_ascii or py_encode_basestring_ascii)
Christian Heimes90540002008-05-08 14:29:10 +000067
68class JSONEncoder(object):
69 """Extensible JSON <http://json.org> encoder for Python data structures.
70
71 Supports the following objects and types by default:
72
73 +-------------------+---------------+
74 | Python | JSON |
75 +===================+===============+
76 | dict | object |
77 +-------------------+---------------+
78 | list, tuple | array |
79 +-------------------+---------------+
Georg Brandlc8284cf2010-08-02 20:16:18 +000080 | str | string |
Christian Heimes90540002008-05-08 14:29:10 +000081 +-------------------+---------------+
Georg Brandlc8284cf2010-08-02 20:16:18 +000082 | int, float | number |
Christian Heimes90540002008-05-08 14:29:10 +000083 +-------------------+---------------+
84 | True | true |
85 +-------------------+---------------+
86 | False | false |
87 +-------------------+---------------+
88 | None | null |
89 +-------------------+---------------+
90
91 To extend this to recognize other objects, subclass and implement a
92 ``.default()`` method with another method that returns a serializable
93 object for ``o`` if possible, otherwise it should call the superclass
94 implementation (to raise ``TypeError``).
95
96 """
Christian Heimes90540002008-05-08 14:29:10 +000097 item_separator = ', '
98 key_separator = ': '
99 def __init__(self, skipkeys=False, ensure_ascii=True,
100 check_circular=True, allow_nan=True, sort_keys=False,
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000101 indent=None, separators=None, default=None):
Christian Heimes90540002008-05-08 14:29:10 +0000102 """Constructor for JSONEncoder, with sensible defaults.
103
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000104 If skipkeys is false, then it is a TypeError to attempt
Georg Brandlc8284cf2010-08-02 20:16:18 +0000105 encoding of keys that are not str, int, float or None. If
Christian Heimes90540002008-05-08 14:29:10 +0000106 skipkeys is True, such items are simply skipped.
107
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000108 If ensure_ascii is true, the output is guaranteed to be str
Georg Brandlc8284cf2010-08-02 20:16:18 +0000109 objects with all incoming non-ASCII characters escaped. If
110 ensure_ascii is false, the output can contain non-ASCII characters.
Christian Heimes90540002008-05-08 14:29:10 +0000111
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000112 If check_circular is true, then lists, dicts, and custom encoded
Christian Heimes90540002008-05-08 14:29:10 +0000113 objects will be checked for circular references during encoding to
114 prevent an infinite recursion (which would cause an OverflowError).
115 Otherwise, no such check takes place.
116
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000117 If allow_nan is true, then NaN, Infinity, and -Infinity will be
Christian Heimes90540002008-05-08 14:29:10 +0000118 encoded as such. This behavior is not JSON specification compliant,
119 but is consistent with most JavaScript based encoders and decoders.
120 Otherwise, it will be a ValueError to encode such floats.
121
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000122 If sort_keys is true, then the output of dictionaries will be
Christian Heimes90540002008-05-08 14:29:10 +0000123 sorted by key; this is useful for regression tests to ensure
124 that JSON serializations can be compared on a day-to-day basis.
125
126 If indent is a non-negative integer, then JSON array
127 elements and object members will be pretty-printed with that
128 indent level. An indent level of 0 will only insert newlines.
129 None is the most compact representation.
130
131 If specified, separators should be a (item_separator, key_separator)
132 tuple. The default is (', ', ': '). To get the most compact JSON
133 representation you should specify (',', ':') to eliminate whitespace.
134
135 If specified, default is a function that gets called for objects
136 that can't otherwise be serialized. It should return a JSON encodable
137 version of the object or raise a ``TypeError``.
138
Christian Heimes90540002008-05-08 14:29:10 +0000139 """
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000140
Christian Heimes90540002008-05-08 14:29:10 +0000141 self.skipkeys = skipkeys
142 self.ensure_ascii = ensure_ascii
143 self.check_circular = check_circular
144 self.allow_nan = allow_nan
145 self.sort_keys = sort_keys
146 self.indent = indent
Christian Heimes90540002008-05-08 14:29:10 +0000147 if separators is not None:
148 self.item_separator, self.key_separator = separators
149 if default is not None:
150 self.default = default
Christian Heimes90540002008-05-08 14:29:10 +0000151
152 def default(self, o):
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000153 """Implement this method in a subclass such that it returns
154 a serializable object for ``o``, or calls the base implementation
155 (to raise a ``TypeError``).
Christian Heimes90540002008-05-08 14:29:10 +0000156
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000157 For example, to support arbitrary iterators, you could
158 implement default like this::
Christian Heimes90540002008-05-08 14:29:10 +0000159
160 def default(self, o):
161 try:
162 iterable = iter(o)
163 except TypeError:
164 pass
165 else:
166 return list(iterable)
167 return JSONEncoder.default(self, o)
168
169 """
170 raise TypeError(repr(o) + " is not JSON serializable")
171
172 def encode(self, o):
173 """Return a JSON string representation of a Python data structure.
174
175 >>> JSONEncoder().encode({"foo": ["bar", "baz"]})
176 '{"foo": ["bar", "baz"]}'
177
178 """
179 # This is for extremely simple cases and benchmarks.
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000180 if isinstance(o, str):
Christian Heimes90540002008-05-08 14:29:10 +0000181 if self.ensure_ascii:
182 return encode_basestring_ascii(o)
183 else:
184 return encode_basestring(o)
185 # This doesn't pass the iterator directly to ''.join() because the
186 # exceptions aren't as detailed. The list call should be roughly
187 # equivalent to the PySequence_Fast that ''.join() would do.
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000188 chunks = self.iterencode(o, _one_shot=True)
189 if not isinstance(chunks, (list, tuple)):
190 chunks = list(chunks)
Christian Heimes90540002008-05-08 14:29:10 +0000191 return ''.join(chunks)
192
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000193 def iterencode(self, o, _one_shot=False):
194 """Encode the given object and yield each string
195 representation as available.
Christian Heimes90540002008-05-08 14:29:10 +0000196
197 For example::
198
199 for chunk in JSONEncoder().iterencode(bigobject):
200 mysocket.write(chunk)
201
202 """
203 if self.check_circular:
204 markers = {}
205 else:
206 markers = None
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000207 if self.ensure_ascii:
208 _encoder = encode_basestring_ascii
209 else:
210 _encoder = encode_basestring
211
212 def floatstr(o, allow_nan=self.allow_nan,
213 _repr=FLOAT_REPR, _inf=INFINITY, _neginf=-INFINITY):
214 # Check for specials. Note that this type of test is processor
215 # and/or platform-specific, so do tests which don't depend on the
216 # internals.
217
218 if o != o:
219 text = 'NaN'
220 elif o == _inf:
221 text = 'Infinity'
222 elif o == _neginf:
223 text = '-Infinity'
224 else:
225 return _repr(o)
226
227 if not allow_nan:
228 raise ValueError(
229 "Out of range float values are not JSON compliant: " +
230 repr(o))
231
232 return text
233
234
235 if (_one_shot and c_make_encoder is not None
Raymond Hettingerbcf6f922009-05-27 09:58:34 +0000236 and not self.indent):
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000237 _iterencode = c_make_encoder(
238 markers, self.default, _encoder, self.indent,
239 self.key_separator, self.item_separator, self.sort_keys,
240 self.skipkeys, self.allow_nan)
241 else:
242 _iterencode = _make_iterencode(
243 markers, self.default, _encoder, self.indent, floatstr,
244 self.key_separator, self.item_separator, self.sort_keys,
245 self.skipkeys, _one_shot)
246 return _iterencode(o, 0)
247
248def _make_iterencode(markers, _default, _encoder, _indent, _floatstr,
249 _key_separator, _item_separator, _sort_keys, _skipkeys, _one_shot,
250 ## HACK: hand-optimized bytecode; turn globals into locals
251 ValueError=ValueError,
252 dict=dict,
253 float=float,
254 id=id,
255 int=int,
256 isinstance=isinstance,
257 list=list,
258 str=str,
259 tuple=tuple,
260 ):
261
262 def _iterencode_list(lst, _current_indent_level):
263 if not lst:
264 yield '[]'
265 return
266 if markers is not None:
267 markerid = id(lst)
268 if markerid in markers:
269 raise ValueError("Circular reference detected")
270 markers[markerid] = lst
271 buf = '['
272 if _indent is not None:
273 _current_indent_level += 1
274 newline_indent = '\n' + (' ' * (_indent * _current_indent_level))
275 separator = _item_separator + newline_indent
276 buf += newline_indent
277 else:
278 newline_indent = None
279 separator = _item_separator
280 first = True
281 for value in lst:
282 if first:
283 first = False
284 else:
285 buf = separator
286 if isinstance(value, str):
287 yield buf + _encoder(value)
288 elif value is None:
289 yield buf + 'null'
290 elif value is True:
291 yield buf + 'true'
292 elif value is False:
293 yield buf + 'false'
294 elif isinstance(value, int):
295 yield buf + str(value)
296 elif isinstance(value, float):
297 yield buf + _floatstr(value)
298 else:
299 yield buf
300 if isinstance(value, (list, tuple)):
301 chunks = _iterencode_list(value, _current_indent_level)
302 elif isinstance(value, dict):
303 chunks = _iterencode_dict(value, _current_indent_level)
304 else:
305 chunks = _iterencode(value, _current_indent_level)
306 for chunk in chunks:
307 yield chunk
308 if newline_indent is not None:
309 _current_indent_level -= 1
310 yield '\n' + (' ' * (_indent * _current_indent_level))
311 yield ']'
312 if markers is not None:
313 del markers[markerid]
314
315 def _iterencode_dict(dct, _current_indent_level):
316 if not dct:
317 yield '{}'
318 return
319 if markers is not None:
320 markerid = id(dct)
321 if markerid in markers:
322 raise ValueError("Circular reference detected")
323 markers[markerid] = dct
324 yield '{'
325 if _indent is not None:
326 _current_indent_level += 1
327 newline_indent = '\n' + (' ' * (_indent * _current_indent_level))
328 item_separator = _item_separator + newline_indent
329 yield newline_indent
330 else:
331 newline_indent = None
332 item_separator = _item_separator
333 first = True
334 if _sort_keys:
335 items = sorted(dct.items(), key=lambda kv: kv[0])
336 else:
337 items = dct.items()
338 for key, value in items:
339 if isinstance(key, str):
340 pass
341 # JavaScript is weakly typed for these, so it makes sense to
342 # also allow them. Many encoders seem to do something like this.
343 elif isinstance(key, float):
344 key = _floatstr(key)
345 elif key is True:
346 key = 'true'
347 elif key is False:
348 key = 'false'
349 elif key is None:
350 key = 'null'
351 elif isinstance(key, int):
352 key = str(key)
353 elif _skipkeys:
354 continue
355 else:
356 raise TypeError("key " + repr(key) + " is not a string")
357 if first:
358 first = False
359 else:
360 yield item_separator
361 yield _encoder(key)
362 yield _key_separator
363 if isinstance(value, str):
364 yield _encoder(value)
365 elif value is None:
366 yield 'null'
367 elif value is True:
368 yield 'true'
369 elif value is False:
370 yield 'false'
371 elif isinstance(value, int):
372 yield str(value)
373 elif isinstance(value, float):
374 yield _floatstr(value)
375 else:
376 if isinstance(value, (list, tuple)):
377 chunks = _iterencode_list(value, _current_indent_level)
378 elif isinstance(value, dict):
379 chunks = _iterencode_dict(value, _current_indent_level)
380 else:
381 chunks = _iterencode(value, _current_indent_level)
382 for chunk in chunks:
383 yield chunk
384 if newline_indent is not None:
385 _current_indent_level -= 1
386 yield '\n' + (' ' * (_indent * _current_indent_level))
387 yield '}'
388 if markers is not None:
389 del markers[markerid]
390
391 def _iterencode(o, _current_indent_level):
392 if isinstance(o, str):
393 yield _encoder(o)
394 elif o is None:
395 yield 'null'
396 elif o is True:
397 yield 'true'
398 elif o is False:
399 yield 'false'
Florent Xicluna02ea12b22010-07-28 16:39:41 +0000400 elif isinstance(o, int):
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000401 yield str(o)
402 elif isinstance(o, float):
403 yield _floatstr(o)
404 elif isinstance(o, (list, tuple)):
405 for chunk in _iterencode_list(o, _current_indent_level):
406 yield chunk
407 elif isinstance(o, dict):
408 for chunk in _iterencode_dict(o, _current_indent_level):
409 yield chunk
410 else:
411 if markers is not None:
412 markerid = id(o)
413 if markerid in markers:
414 raise ValueError("Circular reference detected")
415 markers[markerid] = o
416 o = _default(o)
417 for chunk in _iterencode(o, _current_indent_level):
418 yield chunk
419 if markers is not None:
420 del markers[markerid]
421 return _iterencode