blob: 1d8b20c0955ddb0f4089ef16bb4d41e0de48ca9c [file] [log] [blame]
Christian Heimes90540002008-05-08 14:29:10 +00001"""Implementation of JSONEncoder
2"""
Christian Heimes90540002008-05-08 14:29:10 +00003import re
4
5try:
6 from _json import encode_basestring_ascii as c_encode_basestring_ascii
7except ImportError:
8 c_encode_basestring_ascii = None
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00009try:
10 from _json import make_encoder as c_make_encoder
11except ImportError:
12 c_make_encoder = None
Christian Heimes90540002008-05-08 14:29:10 +000013
14ESCAPE = re.compile(r'[\x00-\x1f\\"\b\f\n\r\t]')
15ESCAPE_ASCII = re.compile(r'([\\"]|[^\ -~])')
Benjamin Petersonc6b607d2009-05-02 12:36:44 +000016HAS_UTF8 = re.compile(b'[\x80-\xff]')
Christian Heimes90540002008-05-08 14:29:10 +000017ESCAPE_DCT = {
18 '\\': '\\\\',
19 '"': '\\"',
20 '\b': '\\b',
21 '\f': '\\f',
22 '\n': '\\n',
23 '\r': '\\r',
24 '\t': '\\t',
25}
26for i in range(0x20):
27 ESCAPE_DCT.setdefault(chr(i), '\\u{0:04x}'.format(i))
Benjamin Petersonc6b607d2009-05-02 12:36:44 +000028 #ESCAPE_DCT.setdefault(chr(i), '\\u%04x' % (i,))
Christian Heimes90540002008-05-08 14:29:10 +000029
Ezio Melotti898d51d2012-05-21 17:49:06 -060030INFINITY = float('inf')
Christian Heimes90540002008-05-08 14:29:10 +000031FLOAT_REPR = repr
32
Christian Heimes90540002008-05-08 14:29:10 +000033def encode_basestring(s):
34 """Return a JSON representation of a Python string
35
36 """
37 def replace(match):
38 return ESCAPE_DCT[match.group(0)]
39 return '"' + ESCAPE.sub(replace, s) + '"'
40
41
42def py_encode_basestring_ascii(s):
Benjamin Petersonc6b607d2009-05-02 12:36:44 +000043 """Return an ASCII-only JSON representation of a Python string
44
45 """
Christian Heimes90540002008-05-08 14:29:10 +000046 def replace(match):
47 s = match.group(0)
48 try:
49 return ESCAPE_DCT[s]
50 except KeyError:
51 n = ord(s)
52 if n < 0x10000:
53 return '\\u{0:04x}'.format(n)
Benjamin Petersonc6b607d2009-05-02 12:36:44 +000054 #return '\\u%04x' % (n,)
Christian Heimes90540002008-05-08 14:29:10 +000055 else:
56 # surrogate pair
57 n -= 0x10000
58 s1 = 0xd800 | ((n >> 10) & 0x3ff)
59 s2 = 0xdc00 | (n & 0x3ff)
60 return '\\u{0:04x}\\u{1:04x}'.format(s1, s2)
Benjamin Petersonc6b607d2009-05-02 12:36:44 +000061 return '"' + ESCAPE_ASCII.sub(replace, s) + '"'
Christian Heimes90540002008-05-08 14:29:10 +000062
63
Benjamin Petersonc6b607d2009-05-02 12:36:44 +000064encode_basestring_ascii = (
65 c_encode_basestring_ascii or py_encode_basestring_ascii)
Christian Heimes90540002008-05-08 14:29:10 +000066
67class JSONEncoder(object):
68 """Extensible JSON <http://json.org> encoder for Python data structures.
69
70 Supports the following objects and types by default:
71
72 +-------------------+---------------+
73 | Python | JSON |
74 +===================+===============+
75 | dict | object |
76 +-------------------+---------------+
77 | list, tuple | array |
78 +-------------------+---------------+
Georg Brandlc8284cf2010-08-02 20:16:18 +000079 | str | string |
Christian Heimes90540002008-05-08 14:29:10 +000080 +-------------------+---------------+
Georg Brandlc8284cf2010-08-02 20:16:18 +000081 | int, float | number |
Christian Heimes90540002008-05-08 14:29:10 +000082 +-------------------+---------------+
83 | True | true |
84 +-------------------+---------------+
85 | False | false |
86 +-------------------+---------------+
87 | None | null |
88 +-------------------+---------------+
89
90 To extend this to recognize other objects, subclass and implement a
91 ``.default()`` method with another method that returns a serializable
92 object for ``o`` if possible, otherwise it should call the superclass
93 implementation (to raise ``TypeError``).
94
95 """
Christian Heimes90540002008-05-08 14:29:10 +000096 item_separator = ', '
97 key_separator = ': '
98 def __init__(self, skipkeys=False, ensure_ascii=True,
99 check_circular=True, allow_nan=True, sort_keys=False,
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000100 indent=None, separators=None, default=None):
Christian Heimes90540002008-05-08 14:29:10 +0000101 """Constructor for JSONEncoder, with sensible defaults.
102
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000103 If skipkeys is false, then it is a TypeError to attempt
Georg Brandlc8284cf2010-08-02 20:16:18 +0000104 encoding of keys that are not str, int, float or None. If
Christian Heimes90540002008-05-08 14:29:10 +0000105 skipkeys is True, such items are simply skipped.
106
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000107 If ensure_ascii is true, the output is guaranteed to be str
Georg Brandlc8284cf2010-08-02 20:16:18 +0000108 objects with all incoming non-ASCII characters escaped. If
109 ensure_ascii is false, the output can contain non-ASCII characters.
Christian Heimes90540002008-05-08 14:29:10 +0000110
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000111 If check_circular is true, then lists, dicts, and custom encoded
Christian Heimes90540002008-05-08 14:29:10 +0000112 objects will be checked for circular references during encoding to
113 prevent an infinite recursion (which would cause an OverflowError).
114 Otherwise, no such check takes place.
115
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000116 If allow_nan is true, then NaN, Infinity, and -Infinity will be
Christian Heimes90540002008-05-08 14:29:10 +0000117 encoded as such. This behavior is not JSON specification compliant,
118 but is consistent with most JavaScript based encoders and decoders.
119 Otherwise, it will be a ValueError to encode such floats.
120
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000121 If sort_keys is true, then the output of dictionaries will be
Christian Heimes90540002008-05-08 14:29:10 +0000122 sorted by key; this is useful for regression tests to ensure
123 that JSON serializations can be compared on a day-to-day basis.
124
125 If indent is a non-negative integer, then JSON array
126 elements and object members will be pretty-printed with that
127 indent level. An indent level of 0 will only insert newlines.
Ezio Melottid654ded2012-11-29 00:35:29 +0200128 None is the most compact representation. Since the default
129 item separator is ', ', the output might include trailing
130 whitespace when indent is specified. You can use
131 separators=(',', ': ') to avoid this.
Christian Heimes90540002008-05-08 14:29:10 +0000132
133 If specified, separators should be a (item_separator, key_separator)
134 tuple. The default is (', ', ': '). To get the most compact JSON
135 representation you should specify (',', ':') to eliminate whitespace.
136
137 If specified, default is a function that gets called for objects
138 that can't otherwise be serialized. It should return a JSON encodable
139 version of the object or raise a ``TypeError``.
140
Christian Heimes90540002008-05-08 14:29:10 +0000141 """
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000142
Christian Heimes90540002008-05-08 14:29:10 +0000143 self.skipkeys = skipkeys
144 self.ensure_ascii = ensure_ascii
145 self.check_circular = check_circular
146 self.allow_nan = allow_nan
147 self.sort_keys = sort_keys
148 self.indent = indent
Christian Heimes90540002008-05-08 14:29:10 +0000149 if separators is not None:
150 self.item_separator, self.key_separator = separators
151 if default is not None:
152 self.default = default
Christian Heimes90540002008-05-08 14:29:10 +0000153
154 def default(self, o):
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000155 """Implement this method in a subclass such that it returns
156 a serializable object for ``o``, or calls the base implementation
157 (to raise a ``TypeError``).
Christian Heimes90540002008-05-08 14:29:10 +0000158
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000159 For example, to support arbitrary iterators, you could
160 implement default like this::
Christian Heimes90540002008-05-08 14:29:10 +0000161
162 def default(self, o):
163 try:
164 iterable = iter(o)
165 except TypeError:
166 pass
167 else:
168 return list(iterable)
R David Murraydd246172013-03-17 21:52:35 -0400169 # Let the base class default method raise the TypeError
Christian Heimes90540002008-05-08 14:29:10 +0000170 return JSONEncoder.default(self, o)
171
172 """
173 raise TypeError(repr(o) + " is not JSON serializable")
174
175 def encode(self, o):
176 """Return a JSON string representation of a Python data structure.
177
178 >>> JSONEncoder().encode({"foo": ["bar", "baz"]})
179 '{"foo": ["bar", "baz"]}'
180
181 """
182 # This is for extremely simple cases and benchmarks.
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000183 if isinstance(o, str):
Christian Heimes90540002008-05-08 14:29:10 +0000184 if self.ensure_ascii:
185 return encode_basestring_ascii(o)
186 else:
187 return encode_basestring(o)
188 # This doesn't pass the iterator directly to ''.join() because the
189 # exceptions aren't as detailed. The list call should be roughly
190 # equivalent to the PySequence_Fast that ''.join() would do.
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000191 chunks = self.iterencode(o, _one_shot=True)
192 if not isinstance(chunks, (list, tuple)):
193 chunks = list(chunks)
Christian Heimes90540002008-05-08 14:29:10 +0000194 return ''.join(chunks)
195
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000196 def iterencode(self, o, _one_shot=False):
197 """Encode the given object and yield each string
198 representation as available.
Christian Heimes90540002008-05-08 14:29:10 +0000199
200 For example::
201
202 for chunk in JSONEncoder().iterencode(bigobject):
203 mysocket.write(chunk)
204
205 """
206 if self.check_circular:
207 markers = {}
208 else:
209 markers = None
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000210 if self.ensure_ascii:
211 _encoder = encode_basestring_ascii
212 else:
213 _encoder = encode_basestring
214
215 def floatstr(o, allow_nan=self.allow_nan,
216 _repr=FLOAT_REPR, _inf=INFINITY, _neginf=-INFINITY):
217 # Check for specials. Note that this type of test is processor
218 # and/or platform-specific, so do tests which don't depend on the
219 # internals.
220
221 if o != o:
222 text = 'NaN'
223 elif o == _inf:
224 text = 'Infinity'
225 elif o == _neginf:
226 text = '-Infinity'
227 else:
228 return _repr(o)
229
230 if not allow_nan:
231 raise ValueError(
232 "Out of range float values are not JSON compliant: " +
233 repr(o))
234
235 return text
236
237
238 if (_one_shot and c_make_encoder is not None
R David Murray3dd02d62011-04-12 21:02:45 -0400239 and self.indent is None):
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000240 _iterencode = c_make_encoder(
241 markers, self.default, _encoder, self.indent,
242 self.key_separator, self.item_separator, self.sort_keys,
243 self.skipkeys, self.allow_nan)
244 else:
245 _iterencode = _make_iterencode(
246 markers, self.default, _encoder, self.indent, floatstr,
247 self.key_separator, self.item_separator, self.sort_keys,
248 self.skipkeys, _one_shot)
249 return _iterencode(o, 0)
250
251def _make_iterencode(markers, _default, _encoder, _indent, _floatstr,
252 _key_separator, _item_separator, _sort_keys, _skipkeys, _one_shot,
253 ## HACK: hand-optimized bytecode; turn globals into locals
254 ValueError=ValueError,
255 dict=dict,
256 float=float,
257 id=id,
258 int=int,
259 isinstance=isinstance,
260 list=list,
261 str=str,
262 tuple=tuple,
263 ):
264
Raymond Hettingerb643ef82010-10-31 08:00:16 +0000265 if _indent is not None and not isinstance(_indent, str):
266 _indent = ' ' * _indent
267
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000268 def _iterencode_list(lst, _current_indent_level):
269 if not lst:
270 yield '[]'
271 return
272 if markers is not None:
273 markerid = id(lst)
274 if markerid in markers:
275 raise ValueError("Circular reference detected")
276 markers[markerid] = lst
277 buf = '['
278 if _indent is not None:
279 _current_indent_level += 1
Raymond Hettingerb643ef82010-10-31 08:00:16 +0000280 newline_indent = '\n' + _indent * _current_indent_level
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000281 separator = _item_separator + newline_indent
282 buf += newline_indent
283 else:
284 newline_indent = None
285 separator = _item_separator
286 first = True
287 for value in lst:
288 if first:
289 first = False
290 else:
291 buf = separator
292 if isinstance(value, str):
293 yield buf + _encoder(value)
294 elif value is None:
295 yield buf + 'null'
296 elif value is True:
297 yield buf + 'true'
298 elif value is False:
299 yield buf + 'false'
300 elif isinstance(value, int):
301 yield buf + str(value)
302 elif isinstance(value, float):
303 yield buf + _floatstr(value)
304 else:
305 yield buf
306 if isinstance(value, (list, tuple)):
307 chunks = _iterencode_list(value, _current_indent_level)
308 elif isinstance(value, dict):
309 chunks = _iterencode_dict(value, _current_indent_level)
310 else:
311 chunks = _iterencode(value, _current_indent_level)
312 for chunk in chunks:
313 yield chunk
314 if newline_indent is not None:
315 _current_indent_level -= 1
Raymond Hettingerb643ef82010-10-31 08:00:16 +0000316 yield '\n' + _indent * _current_indent_level
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000317 yield ']'
318 if markers is not None:
319 del markers[markerid]
320
321 def _iterencode_dict(dct, _current_indent_level):
322 if not dct:
323 yield '{}'
324 return
325 if markers is not None:
326 markerid = id(dct)
327 if markerid in markers:
328 raise ValueError("Circular reference detected")
329 markers[markerid] = dct
330 yield '{'
331 if _indent is not None:
332 _current_indent_level += 1
Raymond Hettingerb643ef82010-10-31 08:00:16 +0000333 newline_indent = '\n' + _indent * _current_indent_level
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000334 item_separator = _item_separator + newline_indent
335 yield newline_indent
336 else:
337 newline_indent = None
338 item_separator = _item_separator
339 first = True
340 if _sort_keys:
341 items = sorted(dct.items(), key=lambda kv: kv[0])
342 else:
343 items = dct.items()
344 for key, value in items:
345 if isinstance(key, str):
346 pass
347 # JavaScript is weakly typed for these, so it makes sense to
348 # also allow them. Many encoders seem to do something like this.
349 elif isinstance(key, float):
350 key = _floatstr(key)
351 elif key is True:
352 key = 'true'
353 elif key is False:
354 key = 'false'
355 elif key is None:
356 key = 'null'
357 elif isinstance(key, int):
358 key = str(key)
359 elif _skipkeys:
360 continue
361 else:
362 raise TypeError("key " + repr(key) + " is not a string")
363 if first:
364 first = False
365 else:
366 yield item_separator
367 yield _encoder(key)
368 yield _key_separator
369 if isinstance(value, str):
370 yield _encoder(value)
371 elif value is None:
372 yield 'null'
373 elif value is True:
374 yield 'true'
375 elif value is False:
376 yield 'false'
377 elif isinstance(value, int):
378 yield str(value)
379 elif isinstance(value, float):
380 yield _floatstr(value)
381 else:
382 if isinstance(value, (list, tuple)):
383 chunks = _iterencode_list(value, _current_indent_level)
384 elif isinstance(value, dict):
385 chunks = _iterencode_dict(value, _current_indent_level)
386 else:
387 chunks = _iterencode(value, _current_indent_level)
388 for chunk in chunks:
389 yield chunk
390 if newline_indent is not None:
391 _current_indent_level -= 1
Raymond Hettingerb643ef82010-10-31 08:00:16 +0000392 yield '\n' + _indent * _current_indent_level
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000393 yield '}'
394 if markers is not None:
395 del markers[markerid]
396
397 def _iterencode(o, _current_indent_level):
398 if isinstance(o, str):
399 yield _encoder(o)
400 elif o is None:
401 yield 'null'
402 elif o is True:
403 yield 'true'
404 elif o is False:
405 yield 'false'
Florent Xicluna02ea12b22010-07-28 16:39:41 +0000406 elif isinstance(o, int):
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000407 yield str(o)
408 elif isinstance(o, float):
409 yield _floatstr(o)
410 elif isinstance(o, (list, tuple)):
411 for chunk in _iterencode_list(o, _current_indent_level):
412 yield chunk
413 elif isinstance(o, dict):
414 for chunk in _iterencode_dict(o, _current_indent_level):
415 yield chunk
416 else:
417 if markers is not None:
418 markerid = id(o)
419 if markerid in markers:
420 raise ValueError("Circular reference detected")
421 markers[markerid] = o
422 o = _default(o)
423 for chunk in _iterencode(o, _current_indent_level):
424 yield chunk
425 if markers is not None:
426 del markers[markerid]
427 return _iterencode