blob: 41a497c5da016031f055059366c31b53dacbcf47 [file] [log] [blame]
Christian Heimes90540002008-05-08 14:29:10 +00001"""Implementation of JSONEncoder
2"""
Christian Heimes90540002008-05-08 14:29:10 +00003import re
4
5try:
6 from _json import encode_basestring_ascii as c_encode_basestring_ascii
Brett Cannoncd171c82013-07-04 17:43:24 -04007except ImportError:
Christian Heimes90540002008-05-08 14:29:10 +00008 c_encode_basestring_ascii = None
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00009try:
Antoine Pitroudc3eaa82015-01-11 16:41:01 +010010 from _json import encode_basestring as c_encode_basestring
11except ImportError:
12 c_encode_basestring = None
13try:
Benjamin Petersonc6b607d2009-05-02 12:36:44 +000014 from _json import make_encoder as c_make_encoder
Brett Cannoncd171c82013-07-04 17:43:24 -040015except ImportError:
Benjamin Petersonc6b607d2009-05-02 12:36:44 +000016 c_make_encoder = None
Christian Heimes90540002008-05-08 14:29:10 +000017
18ESCAPE = re.compile(r'[\x00-\x1f\\"\b\f\n\r\t]')
19ESCAPE_ASCII = re.compile(r'([\\"]|[^\ -~])')
Benjamin Petersonc6b607d2009-05-02 12:36:44 +000020HAS_UTF8 = re.compile(b'[\x80-\xff]')
Christian Heimes90540002008-05-08 14:29:10 +000021ESCAPE_DCT = {
22 '\\': '\\\\',
23 '"': '\\"',
24 '\b': '\\b',
25 '\f': '\\f',
26 '\n': '\\n',
27 '\r': '\\r',
28 '\t': '\\t',
29}
30for i in range(0x20):
31 ESCAPE_DCT.setdefault(chr(i), '\\u{0:04x}'.format(i))
Benjamin Petersonc6b607d2009-05-02 12:36:44 +000032 #ESCAPE_DCT.setdefault(chr(i), '\\u%04x' % (i,))
Christian Heimes90540002008-05-08 14:29:10 +000033
Ezio Melotti898d51d2012-05-21 17:49:06 -060034INFINITY = float('inf')
Christian Heimes90540002008-05-08 14:29:10 +000035
Antoine Pitroudc3eaa82015-01-11 16:41:01 +010036def py_encode_basestring(s):
Christian Heimes90540002008-05-08 14:29:10 +000037 """Return a JSON representation of a Python string
38
39 """
40 def replace(match):
41 return ESCAPE_DCT[match.group(0)]
42 return '"' + ESCAPE.sub(replace, s) + '"'
43
44
Antoine Pitroudc3eaa82015-01-11 16:41:01 +010045encode_basestring = (c_encode_basestring or py_encode_basestring)
46
47
Christian Heimes90540002008-05-08 14:29:10 +000048def py_encode_basestring_ascii(s):
Benjamin Petersonc6b607d2009-05-02 12:36:44 +000049 """Return an ASCII-only JSON representation of a Python string
50
51 """
Christian Heimes90540002008-05-08 14:29:10 +000052 def replace(match):
53 s = match.group(0)
54 try:
55 return ESCAPE_DCT[s]
56 except KeyError:
57 n = ord(s)
58 if n < 0x10000:
59 return '\\u{0:04x}'.format(n)
Benjamin Petersonc6b607d2009-05-02 12:36:44 +000060 #return '\\u%04x' % (n,)
Christian Heimes90540002008-05-08 14:29:10 +000061 else:
62 # surrogate pair
63 n -= 0x10000
64 s1 = 0xd800 | ((n >> 10) & 0x3ff)
65 s2 = 0xdc00 | (n & 0x3ff)
66 return '\\u{0:04x}\\u{1:04x}'.format(s1, s2)
Benjamin Petersonc6b607d2009-05-02 12:36:44 +000067 return '"' + ESCAPE_ASCII.sub(replace, s) + '"'
Christian Heimes90540002008-05-08 14:29:10 +000068
69
Benjamin Petersonc6b607d2009-05-02 12:36:44 +000070encode_basestring_ascii = (
71 c_encode_basestring_ascii or py_encode_basestring_ascii)
Christian Heimes90540002008-05-08 14:29:10 +000072
73class JSONEncoder(object):
74 """Extensible JSON <http://json.org> encoder for Python data structures.
75
76 Supports the following objects and types by default:
77
78 +-------------------+---------------+
79 | Python | JSON |
80 +===================+===============+
81 | dict | object |
82 +-------------------+---------------+
83 | list, tuple | array |
84 +-------------------+---------------+
Georg Brandlc8284cf2010-08-02 20:16:18 +000085 | str | string |
Christian Heimes90540002008-05-08 14:29:10 +000086 +-------------------+---------------+
Georg Brandlc8284cf2010-08-02 20:16:18 +000087 | int, float | number |
Christian Heimes90540002008-05-08 14:29:10 +000088 +-------------------+---------------+
89 | True | true |
90 +-------------------+---------------+
91 | False | false |
92 +-------------------+---------------+
93 | None | null |
94 +-------------------+---------------+
95
96 To extend this to recognize other objects, subclass and implement a
97 ``.default()`` method with another method that returns a serializable
98 object for ``o`` if possible, otherwise it should call the superclass
99 implementation (to raise ``TypeError``).
100
101 """
Christian Heimes90540002008-05-08 14:29:10 +0000102 item_separator = ', '
103 key_separator = ': '
Serhiy Storchakaaacd53f2016-06-22 00:03:20 +0300104 def __init__(self, *, skipkeys=False, ensure_ascii=True,
Christian Heimes90540002008-05-08 14:29:10 +0000105 check_circular=True, allow_nan=True, sort_keys=False,
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000106 indent=None, separators=None, default=None):
Christian Heimes90540002008-05-08 14:29:10 +0000107 """Constructor for JSONEncoder, with sensible defaults.
108
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000109 If skipkeys is false, then it is a TypeError to attempt
Georg Brandlc8284cf2010-08-02 20:16:18 +0000110 encoding of keys that are not str, int, float or None. If
Christian Heimes90540002008-05-08 14:29:10 +0000111 skipkeys is True, such items are simply skipped.
112
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000113 If ensure_ascii is true, the output is guaranteed to be str
Georg Brandlc8284cf2010-08-02 20:16:18 +0000114 objects with all incoming non-ASCII characters escaped. If
115 ensure_ascii is false, the output can contain non-ASCII characters.
Christian Heimes90540002008-05-08 14:29:10 +0000116
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000117 If check_circular is true, then lists, dicts, and custom encoded
Christian Heimes90540002008-05-08 14:29:10 +0000118 objects will be checked for circular references during encoding to
119 prevent an infinite recursion (which would cause an OverflowError).
120 Otherwise, no such check takes place.
121
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000122 If allow_nan is true, then NaN, Infinity, and -Infinity will be
Christian Heimes90540002008-05-08 14:29:10 +0000123 encoded as such. This behavior is not JSON specification compliant,
124 but is consistent with most JavaScript based encoders and decoders.
125 Otherwise, it will be a ValueError to encode such floats.
126
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000127 If sort_keys is true, then the output of dictionaries will be
Christian Heimes90540002008-05-08 14:29:10 +0000128 sorted by key; this is useful for regression tests to ensure
129 that JSON serializations can be compared on a day-to-day basis.
130
131 If indent is a non-negative integer, then JSON array
132 elements and object members will be pretty-printed with that
133 indent level. An indent level of 0 will only insert newlines.
134 None is the most compact representation.
135
Ezio Melotti10031442012-11-29 00:42:56 +0200136 If specified, separators should be an (item_separator, key_separator)
137 tuple. The default is (', ', ': ') if *indent* is ``None`` and
138 (',', ': ') otherwise. To get the most compact JSON representation,
139 you should specify (',', ':') to eliminate whitespace.
Christian Heimes90540002008-05-08 14:29:10 +0000140
141 If specified, default is a function that gets called for objects
142 that can't otherwise be serialized. It should return a JSON encodable
143 version of the object or raise a ``TypeError``.
144
Christian Heimes90540002008-05-08 14:29:10 +0000145 """
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000146
Christian Heimes90540002008-05-08 14:29:10 +0000147 self.skipkeys = skipkeys
148 self.ensure_ascii = ensure_ascii
149 self.check_circular = check_circular
150 self.allow_nan = allow_nan
151 self.sort_keys = sort_keys
152 self.indent = indent
Christian Heimes90540002008-05-08 14:29:10 +0000153 if separators is not None:
154 self.item_separator, self.key_separator = separators
Ezio Melotti10031442012-11-29 00:42:56 +0200155 elif indent is not None:
156 self.item_separator = ','
Christian Heimes90540002008-05-08 14:29:10 +0000157 if default is not None:
158 self.default = default
Christian Heimes90540002008-05-08 14:29:10 +0000159
160 def default(self, o):
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000161 """Implement this method in a subclass such that it returns
162 a serializable object for ``o``, or calls the base implementation
163 (to raise a ``TypeError``).
Christian Heimes90540002008-05-08 14:29:10 +0000164
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000165 For example, to support arbitrary iterators, you could
166 implement default like this::
Christian Heimes90540002008-05-08 14:29:10 +0000167
168 def default(self, o):
169 try:
170 iterable = iter(o)
171 except TypeError:
172 pass
173 else:
174 return list(iterable)
R David Murraydd246172013-03-17 21:52:35 -0400175 # Let the base class default method raise the TypeError
Christian Heimes90540002008-05-08 14:29:10 +0000176 return JSONEncoder.default(self, o)
177
178 """
Serhiy Storchaka47c54742016-04-10 15:46:30 +0300179 raise TypeError("Object of type '%s' is not JSON serializable" %
180 o.__class__.__name__)
Christian Heimes90540002008-05-08 14:29:10 +0000181
182 def encode(self, o):
183 """Return a JSON string representation of a Python data structure.
184
Ethan Furmana4998a72013-08-10 13:01:45 -0700185 >>> from json.encoder import JSONEncoder
Christian Heimes90540002008-05-08 14:29:10 +0000186 >>> JSONEncoder().encode({"foo": ["bar", "baz"]})
187 '{"foo": ["bar", "baz"]}'
188
189 """
190 # This is for extremely simple cases and benchmarks.
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000191 if isinstance(o, str):
Christian Heimes90540002008-05-08 14:29:10 +0000192 if self.ensure_ascii:
193 return encode_basestring_ascii(o)
194 else:
195 return encode_basestring(o)
196 # This doesn't pass the iterator directly to ''.join() because the
197 # exceptions aren't as detailed. The list call should be roughly
198 # equivalent to the PySequence_Fast that ''.join() would do.
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000199 chunks = self.iterencode(o, _one_shot=True)
200 if not isinstance(chunks, (list, tuple)):
201 chunks = list(chunks)
Christian Heimes90540002008-05-08 14:29:10 +0000202 return ''.join(chunks)
203
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000204 def iterencode(self, o, _one_shot=False):
205 """Encode the given object and yield each string
206 representation as available.
Christian Heimes90540002008-05-08 14:29:10 +0000207
208 For example::
209
210 for chunk in JSONEncoder().iterencode(bigobject):
211 mysocket.write(chunk)
212
213 """
214 if self.check_circular:
215 markers = {}
216 else:
217 markers = None
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000218 if self.ensure_ascii:
219 _encoder = encode_basestring_ascii
220 else:
221 _encoder = encode_basestring
222
223 def floatstr(o, allow_nan=self.allow_nan,
Serhiy Storchakae0805cf2016-04-10 14:41:19 +0300224 _repr=float.__repr__, _inf=INFINITY, _neginf=-INFINITY):
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000225 # Check for specials. Note that this type of test is processor
226 # and/or platform-specific, so do tests which don't depend on the
227 # internals.
228
229 if o != o:
230 text = 'NaN'
231 elif o == _inf:
232 text = 'Infinity'
233 elif o == _neginf:
234 text = '-Infinity'
235 else:
236 return _repr(o)
237
238 if not allow_nan:
239 raise ValueError(
240 "Out of range float values are not JSON compliant: " +
241 repr(o))
242
243 return text
244
245
246 if (_one_shot and c_make_encoder is not None
R David Murray3dd02d62011-04-12 21:02:45 -0400247 and self.indent is None):
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000248 _iterencode = c_make_encoder(
249 markers, self.default, _encoder, self.indent,
250 self.key_separator, self.item_separator, self.sort_keys,
251 self.skipkeys, self.allow_nan)
252 else:
253 _iterencode = _make_iterencode(
254 markers, self.default, _encoder, self.indent, floatstr,
255 self.key_separator, self.item_separator, self.sort_keys,
256 self.skipkeys, _one_shot)
257 return _iterencode(o, 0)
258
259def _make_iterencode(markers, _default, _encoder, _indent, _floatstr,
260 _key_separator, _item_separator, _sort_keys, _skipkeys, _one_shot,
261 ## HACK: hand-optimized bytecode; turn globals into locals
262 ValueError=ValueError,
263 dict=dict,
264 float=float,
265 id=id,
266 int=int,
267 isinstance=isinstance,
268 list=list,
269 str=str,
270 tuple=tuple,
Serhiy Storchakae0805cf2016-04-10 14:41:19 +0300271 _intstr=int.__str__,
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000272 ):
273
Raymond Hettingerb643ef82010-10-31 08:00:16 +0000274 if _indent is not None and not isinstance(_indent, str):
275 _indent = ' ' * _indent
276
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000277 def _iterencode_list(lst, _current_indent_level):
278 if not lst:
279 yield '[]'
280 return
281 if markers is not None:
282 markerid = id(lst)
283 if markerid in markers:
284 raise ValueError("Circular reference detected")
285 markers[markerid] = lst
286 buf = '['
287 if _indent is not None:
288 _current_indent_level += 1
Raymond Hettingerb643ef82010-10-31 08:00:16 +0000289 newline_indent = '\n' + _indent * _current_indent_level
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000290 separator = _item_separator + newline_indent
291 buf += newline_indent
292 else:
293 newline_indent = None
294 separator = _item_separator
295 first = True
296 for value in lst:
297 if first:
298 first = False
299 else:
300 buf = separator
301 if isinstance(value, str):
302 yield buf + _encoder(value)
303 elif value is None:
304 yield buf + 'null'
305 elif value is True:
306 yield buf + 'true'
307 elif value is False:
308 yield buf + 'false'
309 elif isinstance(value, int):
Ethan Furmana4998a72013-08-10 13:01:45 -0700310 # Subclasses of int/float may override __str__, but we still
311 # want to encode them as integers/floats in JSON. One example
312 # within the standard library is IntEnum.
Serhiy Storchakae0805cf2016-04-10 14:41:19 +0300313 yield buf + _intstr(value)
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000314 elif isinstance(value, float):
Ethan Furmana4998a72013-08-10 13:01:45 -0700315 # see comment above for int
Serhiy Storchakae0805cf2016-04-10 14:41:19 +0300316 yield buf + _floatstr(value)
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000317 else:
318 yield buf
319 if isinstance(value, (list, tuple)):
320 chunks = _iterencode_list(value, _current_indent_level)
321 elif isinstance(value, dict):
322 chunks = _iterencode_dict(value, _current_indent_level)
323 else:
324 chunks = _iterencode(value, _current_indent_level)
Philip Jenveyfd0d3e52012-10-01 15:34:31 -0700325 yield from chunks
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000326 if newline_indent is not None:
327 _current_indent_level -= 1
Raymond Hettingerb643ef82010-10-31 08:00:16 +0000328 yield '\n' + _indent * _current_indent_level
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000329 yield ']'
330 if markers is not None:
331 del markers[markerid]
332
333 def _iterencode_dict(dct, _current_indent_level):
334 if not dct:
335 yield '{}'
336 return
337 if markers is not None:
338 markerid = id(dct)
339 if markerid in markers:
340 raise ValueError("Circular reference detected")
341 markers[markerid] = dct
342 yield '{'
343 if _indent is not None:
344 _current_indent_level += 1
Raymond Hettingerb643ef82010-10-31 08:00:16 +0000345 newline_indent = '\n' + _indent * _current_indent_level
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000346 item_separator = _item_separator + newline_indent
347 yield newline_indent
348 else:
349 newline_indent = None
350 item_separator = _item_separator
351 first = True
352 if _sort_keys:
353 items = sorted(dct.items(), key=lambda kv: kv[0])
354 else:
355 items = dct.items()
356 for key, value in items:
357 if isinstance(key, str):
358 pass
359 # JavaScript is weakly typed for these, so it makes sense to
360 # also allow them. Many encoders seem to do something like this.
361 elif isinstance(key, float):
Ethan Furmana4998a72013-08-10 13:01:45 -0700362 # see comment for int/float in _make_iterencode
Serhiy Storchakae0805cf2016-04-10 14:41:19 +0300363 key = _floatstr(key)
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000364 elif key is True:
365 key = 'true'
366 elif key is False:
367 key = 'false'
368 elif key is None:
369 key = 'null'
370 elif isinstance(key, int):
Ethan Furmana4998a72013-08-10 13:01:45 -0700371 # see comment for int/float in _make_iterencode
Serhiy Storchakae0805cf2016-04-10 14:41:19 +0300372 key = _intstr(key)
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000373 elif _skipkeys:
374 continue
375 else:
376 raise TypeError("key " + repr(key) + " is not a string")
377 if first:
378 first = False
379 else:
380 yield item_separator
381 yield _encoder(key)
382 yield _key_separator
383 if isinstance(value, str):
384 yield _encoder(value)
385 elif value is None:
386 yield 'null'
387 elif value is True:
388 yield 'true'
389 elif value is False:
390 yield 'false'
391 elif isinstance(value, int):
Ethan Furmana4998a72013-08-10 13:01:45 -0700392 # see comment for int/float in _make_iterencode
Serhiy Storchakae0805cf2016-04-10 14:41:19 +0300393 yield _intstr(value)
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000394 elif isinstance(value, float):
Ethan Furmana4998a72013-08-10 13:01:45 -0700395 # see comment for int/float in _make_iterencode
Serhiy Storchakae0805cf2016-04-10 14:41:19 +0300396 yield _floatstr(value)
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000397 else:
398 if isinstance(value, (list, tuple)):
399 chunks = _iterencode_list(value, _current_indent_level)
400 elif isinstance(value, dict):
401 chunks = _iterencode_dict(value, _current_indent_level)
402 else:
403 chunks = _iterencode(value, _current_indent_level)
Philip Jenveyfd0d3e52012-10-01 15:34:31 -0700404 yield from chunks
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000405 if newline_indent is not None:
406 _current_indent_level -= 1
Raymond Hettingerb643ef82010-10-31 08:00:16 +0000407 yield '\n' + _indent * _current_indent_level
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000408 yield '}'
409 if markers is not None:
410 del markers[markerid]
411
412 def _iterencode(o, _current_indent_level):
413 if isinstance(o, str):
414 yield _encoder(o)
415 elif o is None:
416 yield 'null'
417 elif o is True:
418 yield 'true'
419 elif o is False:
420 yield 'false'
Florent Xicluna02ea12b22010-07-28 16:39:41 +0000421 elif isinstance(o, int):
Ethan Furmana4998a72013-08-10 13:01:45 -0700422 # see comment for int/float in _make_iterencode
Serhiy Storchakae0805cf2016-04-10 14:41:19 +0300423 yield _intstr(o)
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000424 elif isinstance(o, float):
Ethan Furmana4998a72013-08-10 13:01:45 -0700425 # see comment for int/float in _make_iterencode
Serhiy Storchakae0805cf2016-04-10 14:41:19 +0300426 yield _floatstr(o)
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000427 elif isinstance(o, (list, tuple)):
Philip Jenveyfd0d3e52012-10-01 15:34:31 -0700428 yield from _iterencode_list(o, _current_indent_level)
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000429 elif isinstance(o, dict):
Philip Jenveyfd0d3e52012-10-01 15:34:31 -0700430 yield from _iterencode_dict(o, _current_indent_level)
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000431 else:
432 if markers is not None:
433 markerid = id(o)
434 if markerid in markers:
435 raise ValueError("Circular reference detected")
436 markers[markerid] = o
437 o = _default(o)
Philip Jenveyfd0d3e52012-10-01 15:34:31 -0700438 yield from _iterencode(o, _current_indent_level)
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000439 if markers is not None:
440 del markers[markerid]
441 return _iterencode