blob: d596489f42d3856db96f507e1f04d410d154b58e [file] [log] [blame]
Christian Heimes90540002008-05-08 14:29:10 +00001"""Implementation of JSONEncoder
2"""
Christian Heimes90540002008-05-08 14:29:10 +00003import re
4
5try:
6 from _json import encode_basestring_ascii as c_encode_basestring_ascii
Brett Cannoncd171c82013-07-04 17:43:24 -04007except ImportError:
Christian Heimes90540002008-05-08 14:29:10 +00008 c_encode_basestring_ascii = None
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00009try:
Antoine Pitroudc3eaa82015-01-11 16:41:01 +010010 from _json import encode_basestring as c_encode_basestring
11except ImportError:
12 c_encode_basestring = None
13try:
Benjamin Petersonc6b607d2009-05-02 12:36:44 +000014 from _json import make_encoder as c_make_encoder
Brett Cannoncd171c82013-07-04 17:43:24 -040015except ImportError:
Benjamin Petersonc6b607d2009-05-02 12:36:44 +000016 c_make_encoder = None
Christian Heimes90540002008-05-08 14:29:10 +000017
18ESCAPE = re.compile(r'[\x00-\x1f\\"\b\f\n\r\t]')
19ESCAPE_ASCII = re.compile(r'([\\"]|[^\ -~])')
Benjamin Petersonc6b607d2009-05-02 12:36:44 +000020HAS_UTF8 = re.compile(b'[\x80-\xff]')
Christian Heimes90540002008-05-08 14:29:10 +000021ESCAPE_DCT = {
22 '\\': '\\\\',
23 '"': '\\"',
24 '\b': '\\b',
25 '\f': '\\f',
26 '\n': '\\n',
27 '\r': '\\r',
28 '\t': '\\t',
29}
30for i in range(0x20):
31 ESCAPE_DCT.setdefault(chr(i), '\\u{0:04x}'.format(i))
Benjamin Petersonc6b607d2009-05-02 12:36:44 +000032 #ESCAPE_DCT.setdefault(chr(i), '\\u%04x' % (i,))
Christian Heimes90540002008-05-08 14:29:10 +000033
Ezio Melotti898d51d2012-05-21 17:49:06 -060034INFINITY = float('inf')
Christian Heimes90540002008-05-08 14:29:10 +000035
Antoine Pitroudc3eaa82015-01-11 16:41:01 +010036def py_encode_basestring(s):
Christian Heimes90540002008-05-08 14:29:10 +000037 """Return a JSON representation of a Python string
38
39 """
40 def replace(match):
41 return ESCAPE_DCT[match.group(0)]
42 return '"' + ESCAPE.sub(replace, s) + '"'
43
44
Antoine Pitroudc3eaa82015-01-11 16:41:01 +010045encode_basestring = (c_encode_basestring or py_encode_basestring)
46
47
Christian Heimes90540002008-05-08 14:29:10 +000048def py_encode_basestring_ascii(s):
Benjamin Petersonc6b607d2009-05-02 12:36:44 +000049 """Return an ASCII-only JSON representation of a Python string
50
51 """
Christian Heimes90540002008-05-08 14:29:10 +000052 def replace(match):
53 s = match.group(0)
54 try:
55 return ESCAPE_DCT[s]
56 except KeyError:
57 n = ord(s)
58 if n < 0x10000:
59 return '\\u{0:04x}'.format(n)
Benjamin Petersonc6b607d2009-05-02 12:36:44 +000060 #return '\\u%04x' % (n,)
Christian Heimes90540002008-05-08 14:29:10 +000061 else:
62 # surrogate pair
63 n -= 0x10000
64 s1 = 0xd800 | ((n >> 10) & 0x3ff)
65 s2 = 0xdc00 | (n & 0x3ff)
66 return '\\u{0:04x}\\u{1:04x}'.format(s1, s2)
Benjamin Petersonc6b607d2009-05-02 12:36:44 +000067 return '"' + ESCAPE_ASCII.sub(replace, s) + '"'
Christian Heimes90540002008-05-08 14:29:10 +000068
69
Benjamin Petersonc6b607d2009-05-02 12:36:44 +000070encode_basestring_ascii = (
71 c_encode_basestring_ascii or py_encode_basestring_ascii)
Christian Heimes90540002008-05-08 14:29:10 +000072
73class JSONEncoder(object):
74 """Extensible JSON <http://json.org> encoder for Python data structures.
75
76 Supports the following objects and types by default:
77
78 +-------------------+---------------+
79 | Python | JSON |
80 +===================+===============+
81 | dict | object |
82 +-------------------+---------------+
83 | list, tuple | array |
84 +-------------------+---------------+
Georg Brandlc8284cf2010-08-02 20:16:18 +000085 | str | string |
Christian Heimes90540002008-05-08 14:29:10 +000086 +-------------------+---------------+
Georg Brandlc8284cf2010-08-02 20:16:18 +000087 | int, float | number |
Christian Heimes90540002008-05-08 14:29:10 +000088 +-------------------+---------------+
89 | True | true |
90 +-------------------+---------------+
91 | False | false |
92 +-------------------+---------------+
93 | None | null |
94 +-------------------+---------------+
95
96 To extend this to recognize other objects, subclass and implement a
97 ``.default()`` method with another method that returns a serializable
98 object for ``o`` if possible, otherwise it should call the superclass
99 implementation (to raise ``TypeError``).
100
101 """
Christian Heimes90540002008-05-08 14:29:10 +0000102 item_separator = ', '
103 key_separator = ': '
104 def __init__(self, skipkeys=False, ensure_ascii=True,
105 check_circular=True, allow_nan=True, sort_keys=False,
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000106 indent=None, separators=None, default=None):
Christian Heimes90540002008-05-08 14:29:10 +0000107 """Constructor for JSONEncoder, with sensible defaults.
108
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000109 If skipkeys is false, then it is a TypeError to attempt
Georg Brandlc8284cf2010-08-02 20:16:18 +0000110 encoding of keys that are not str, int, float or None. If
Christian Heimes90540002008-05-08 14:29:10 +0000111 skipkeys is True, such items are simply skipped.
112
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000113 If ensure_ascii is true, the output is guaranteed to be str
Georg Brandlc8284cf2010-08-02 20:16:18 +0000114 objects with all incoming non-ASCII characters escaped. If
115 ensure_ascii is false, the output can contain non-ASCII characters.
Christian Heimes90540002008-05-08 14:29:10 +0000116
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000117 If check_circular is true, then lists, dicts, and custom encoded
Christian Heimes90540002008-05-08 14:29:10 +0000118 objects will be checked for circular references during encoding to
119 prevent an infinite recursion (which would cause an OverflowError).
120 Otherwise, no such check takes place.
121
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000122 If allow_nan is true, then NaN, Infinity, and -Infinity will be
Christian Heimes90540002008-05-08 14:29:10 +0000123 encoded as such. This behavior is not JSON specification compliant,
124 but is consistent with most JavaScript based encoders and decoders.
125 Otherwise, it will be a ValueError to encode such floats.
126
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000127 If sort_keys is true, then the output of dictionaries will be
Christian Heimes90540002008-05-08 14:29:10 +0000128 sorted by key; this is useful for regression tests to ensure
129 that JSON serializations can be compared on a day-to-day basis.
130
131 If indent is a non-negative integer, then JSON array
132 elements and object members will be pretty-printed with that
133 indent level. An indent level of 0 will only insert newlines.
134 None is the most compact representation.
135
Ezio Melotti10031442012-11-29 00:42:56 +0200136 If specified, separators should be an (item_separator, key_separator)
137 tuple. The default is (', ', ': ') if *indent* is ``None`` and
138 (',', ': ') otherwise. To get the most compact JSON representation,
139 you should specify (',', ':') to eliminate whitespace.
Christian Heimes90540002008-05-08 14:29:10 +0000140
141 If specified, default is a function that gets called for objects
142 that can't otherwise be serialized. It should return a JSON encodable
143 version of the object or raise a ``TypeError``.
144
Christian Heimes90540002008-05-08 14:29:10 +0000145 """
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000146
Christian Heimes90540002008-05-08 14:29:10 +0000147 self.skipkeys = skipkeys
148 self.ensure_ascii = ensure_ascii
149 self.check_circular = check_circular
150 self.allow_nan = allow_nan
151 self.sort_keys = sort_keys
152 self.indent = indent
Christian Heimes90540002008-05-08 14:29:10 +0000153 if separators is not None:
154 self.item_separator, self.key_separator = separators
Ezio Melotti10031442012-11-29 00:42:56 +0200155 elif indent is not None:
156 self.item_separator = ','
Christian Heimes90540002008-05-08 14:29:10 +0000157 if default is not None:
158 self.default = default
Christian Heimes90540002008-05-08 14:29:10 +0000159
160 def default(self, o):
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000161 """Implement this method in a subclass such that it returns
162 a serializable object for ``o``, or calls the base implementation
163 (to raise a ``TypeError``).
Christian Heimes90540002008-05-08 14:29:10 +0000164
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000165 For example, to support arbitrary iterators, you could
166 implement default like this::
Christian Heimes90540002008-05-08 14:29:10 +0000167
168 def default(self, o):
169 try:
170 iterable = iter(o)
171 except TypeError:
172 pass
173 else:
174 return list(iterable)
R David Murraydd246172013-03-17 21:52:35 -0400175 # Let the base class default method raise the TypeError
Christian Heimes90540002008-05-08 14:29:10 +0000176 return JSONEncoder.default(self, o)
177
178 """
179 raise TypeError(repr(o) + " is not JSON serializable")
180
181 def encode(self, o):
182 """Return a JSON string representation of a Python data structure.
183
Ethan Furmana4998a72013-08-10 13:01:45 -0700184 >>> from json.encoder import JSONEncoder
Christian Heimes90540002008-05-08 14:29:10 +0000185 >>> JSONEncoder().encode({"foo": ["bar", "baz"]})
186 '{"foo": ["bar", "baz"]}'
187
188 """
189 # This is for extremely simple cases and benchmarks.
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000190 if isinstance(o, str):
Christian Heimes90540002008-05-08 14:29:10 +0000191 if self.ensure_ascii:
192 return encode_basestring_ascii(o)
193 else:
194 return encode_basestring(o)
195 # This doesn't pass the iterator directly to ''.join() because the
196 # exceptions aren't as detailed. The list call should be roughly
197 # equivalent to the PySequence_Fast that ''.join() would do.
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000198 chunks = self.iterencode(o, _one_shot=True)
199 if not isinstance(chunks, (list, tuple)):
200 chunks = list(chunks)
Christian Heimes90540002008-05-08 14:29:10 +0000201 return ''.join(chunks)
202
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000203 def iterencode(self, o, _one_shot=False):
204 """Encode the given object and yield each string
205 representation as available.
Christian Heimes90540002008-05-08 14:29:10 +0000206
207 For example::
208
209 for chunk in JSONEncoder().iterencode(bigobject):
210 mysocket.write(chunk)
211
212 """
213 if self.check_circular:
214 markers = {}
215 else:
216 markers = None
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000217 if self.ensure_ascii:
218 _encoder = encode_basestring_ascii
219 else:
220 _encoder = encode_basestring
221
222 def floatstr(o, allow_nan=self.allow_nan,
Serhiy Storchakae0805cf2016-04-10 14:41:19 +0300223 _repr=float.__repr__, _inf=INFINITY, _neginf=-INFINITY):
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000224 # Check for specials. Note that this type of test is processor
225 # and/or platform-specific, so do tests which don't depend on the
226 # internals.
227
228 if o != o:
229 text = 'NaN'
230 elif o == _inf:
231 text = 'Infinity'
232 elif o == _neginf:
233 text = '-Infinity'
234 else:
235 return _repr(o)
236
237 if not allow_nan:
238 raise ValueError(
239 "Out of range float values are not JSON compliant: " +
240 repr(o))
241
242 return text
243
244
245 if (_one_shot and c_make_encoder is not None
R David Murray3dd02d62011-04-12 21:02:45 -0400246 and self.indent is None):
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000247 _iterencode = c_make_encoder(
248 markers, self.default, _encoder, self.indent,
249 self.key_separator, self.item_separator, self.sort_keys,
250 self.skipkeys, self.allow_nan)
251 else:
252 _iterencode = _make_iterencode(
253 markers, self.default, _encoder, self.indent, floatstr,
254 self.key_separator, self.item_separator, self.sort_keys,
255 self.skipkeys, _one_shot)
256 return _iterencode(o, 0)
257
258def _make_iterencode(markers, _default, _encoder, _indent, _floatstr,
259 _key_separator, _item_separator, _sort_keys, _skipkeys, _one_shot,
260 ## HACK: hand-optimized bytecode; turn globals into locals
261 ValueError=ValueError,
262 dict=dict,
263 float=float,
264 id=id,
265 int=int,
266 isinstance=isinstance,
267 list=list,
268 str=str,
269 tuple=tuple,
Serhiy Storchakae0805cf2016-04-10 14:41:19 +0300270 _intstr=int.__str__,
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000271 ):
272
Raymond Hettingerb643ef82010-10-31 08:00:16 +0000273 if _indent is not None and not isinstance(_indent, str):
274 _indent = ' ' * _indent
275
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000276 def _iterencode_list(lst, _current_indent_level):
277 if not lst:
278 yield '[]'
279 return
280 if markers is not None:
281 markerid = id(lst)
282 if markerid in markers:
283 raise ValueError("Circular reference detected")
284 markers[markerid] = lst
285 buf = '['
286 if _indent is not None:
287 _current_indent_level += 1
Raymond Hettingerb643ef82010-10-31 08:00:16 +0000288 newline_indent = '\n' + _indent * _current_indent_level
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000289 separator = _item_separator + newline_indent
290 buf += newline_indent
291 else:
292 newline_indent = None
293 separator = _item_separator
294 first = True
295 for value in lst:
296 if first:
297 first = False
298 else:
299 buf = separator
300 if isinstance(value, str):
301 yield buf + _encoder(value)
302 elif value is None:
303 yield buf + 'null'
304 elif value is True:
305 yield buf + 'true'
306 elif value is False:
307 yield buf + 'false'
308 elif isinstance(value, int):
Ethan Furmana4998a72013-08-10 13:01:45 -0700309 # Subclasses of int/float may override __str__, but we still
310 # want to encode them as integers/floats in JSON. One example
311 # within the standard library is IntEnum.
Serhiy Storchakae0805cf2016-04-10 14:41:19 +0300312 yield buf + _intstr(value)
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000313 elif isinstance(value, float):
Ethan Furmana4998a72013-08-10 13:01:45 -0700314 # see comment above for int
Serhiy Storchakae0805cf2016-04-10 14:41:19 +0300315 yield buf + _floatstr(value)
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000316 else:
317 yield buf
318 if isinstance(value, (list, tuple)):
319 chunks = _iterencode_list(value, _current_indent_level)
320 elif isinstance(value, dict):
321 chunks = _iterencode_dict(value, _current_indent_level)
322 else:
323 chunks = _iterencode(value, _current_indent_level)
Philip Jenveyfd0d3e52012-10-01 15:34:31 -0700324 yield from chunks
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000325 if newline_indent is not None:
326 _current_indent_level -= 1
Raymond Hettingerb643ef82010-10-31 08:00:16 +0000327 yield '\n' + _indent * _current_indent_level
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000328 yield ']'
329 if markers is not None:
330 del markers[markerid]
331
332 def _iterencode_dict(dct, _current_indent_level):
333 if not dct:
334 yield '{}'
335 return
336 if markers is not None:
337 markerid = id(dct)
338 if markerid in markers:
339 raise ValueError("Circular reference detected")
340 markers[markerid] = dct
341 yield '{'
342 if _indent is not None:
343 _current_indent_level += 1
Raymond Hettingerb643ef82010-10-31 08:00:16 +0000344 newline_indent = '\n' + _indent * _current_indent_level
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000345 item_separator = _item_separator + newline_indent
346 yield newline_indent
347 else:
348 newline_indent = None
349 item_separator = _item_separator
350 first = True
351 if _sort_keys:
352 items = sorted(dct.items(), key=lambda kv: kv[0])
353 else:
354 items = dct.items()
355 for key, value in items:
356 if isinstance(key, str):
357 pass
358 # JavaScript is weakly typed for these, so it makes sense to
359 # also allow them. Many encoders seem to do something like this.
360 elif isinstance(key, float):
Ethan Furmana4998a72013-08-10 13:01:45 -0700361 # see comment for int/float in _make_iterencode
Serhiy Storchakae0805cf2016-04-10 14:41:19 +0300362 key = _floatstr(key)
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000363 elif key is True:
364 key = 'true'
365 elif key is False:
366 key = 'false'
367 elif key is None:
368 key = 'null'
369 elif isinstance(key, int):
Ethan Furmana4998a72013-08-10 13:01:45 -0700370 # see comment for int/float in _make_iterencode
Serhiy Storchakae0805cf2016-04-10 14:41:19 +0300371 key = _intstr(key)
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000372 elif _skipkeys:
373 continue
374 else:
375 raise TypeError("key " + repr(key) + " is not a string")
376 if first:
377 first = False
378 else:
379 yield item_separator
380 yield _encoder(key)
381 yield _key_separator
382 if isinstance(value, str):
383 yield _encoder(value)
384 elif value is None:
385 yield 'null'
386 elif value is True:
387 yield 'true'
388 elif value is False:
389 yield 'false'
390 elif isinstance(value, int):
Ethan Furmana4998a72013-08-10 13:01:45 -0700391 # see comment for int/float in _make_iterencode
Serhiy Storchakae0805cf2016-04-10 14:41:19 +0300392 yield _intstr(value)
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000393 elif isinstance(value, float):
Ethan Furmana4998a72013-08-10 13:01:45 -0700394 # see comment for int/float in _make_iterencode
Serhiy Storchakae0805cf2016-04-10 14:41:19 +0300395 yield _floatstr(value)
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000396 else:
397 if isinstance(value, (list, tuple)):
398 chunks = _iterencode_list(value, _current_indent_level)
399 elif isinstance(value, dict):
400 chunks = _iterencode_dict(value, _current_indent_level)
401 else:
402 chunks = _iterencode(value, _current_indent_level)
Philip Jenveyfd0d3e52012-10-01 15:34:31 -0700403 yield from chunks
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000404 if newline_indent is not None:
405 _current_indent_level -= 1
Raymond Hettingerb643ef82010-10-31 08:00:16 +0000406 yield '\n' + _indent * _current_indent_level
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000407 yield '}'
408 if markers is not None:
409 del markers[markerid]
410
411 def _iterencode(o, _current_indent_level):
412 if isinstance(o, str):
413 yield _encoder(o)
414 elif o is None:
415 yield 'null'
416 elif o is True:
417 yield 'true'
418 elif o is False:
419 yield 'false'
Florent Xicluna02ea12b22010-07-28 16:39:41 +0000420 elif isinstance(o, int):
Ethan Furmana4998a72013-08-10 13:01:45 -0700421 # see comment for int/float in _make_iterencode
Serhiy Storchakae0805cf2016-04-10 14:41:19 +0300422 yield _intstr(o)
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000423 elif isinstance(o, float):
Ethan Furmana4998a72013-08-10 13:01:45 -0700424 # see comment for int/float in _make_iterencode
Serhiy Storchakae0805cf2016-04-10 14:41:19 +0300425 yield _floatstr(o)
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000426 elif isinstance(o, (list, tuple)):
Philip Jenveyfd0d3e52012-10-01 15:34:31 -0700427 yield from _iterencode_list(o, _current_indent_level)
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000428 elif isinstance(o, dict):
Philip Jenveyfd0d3e52012-10-01 15:34:31 -0700429 yield from _iterencode_dict(o, _current_indent_level)
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000430 else:
431 if markers is not None:
432 markerid = id(o)
433 if markerid in markers:
434 raise ValueError("Circular reference detected")
435 markers[markerid] = o
436 o = _default(o)
Philip Jenveyfd0d3e52012-10-01 15:34:31 -0700437 yield from _iterencode(o, _current_indent_level)
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000438 if markers is not None:
439 del markers[markerid]
440 return _iterencode