blob: 05138383bccf8749a2fd3d1c7eb0405f5e825cb7 [file] [log] [blame]
Christian Heimes90540002008-05-08 14:29:10 +00001"""Implementation of JSONEncoder
2"""
Christian Heimes90540002008-05-08 14:29:10 +00003import re
4
5try:
6 from _json import encode_basestring_ascii as c_encode_basestring_ascii
Brett Cannoncd171c82013-07-04 17:43:24 -04007except ImportError:
Christian Heimes90540002008-05-08 14:29:10 +00008 c_encode_basestring_ascii = None
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00009try:
10 from _json import make_encoder as c_make_encoder
Brett Cannoncd171c82013-07-04 17:43:24 -040011except ImportError:
Benjamin Petersonc6b607d2009-05-02 12:36:44 +000012 c_make_encoder = None
Christian Heimes90540002008-05-08 14:29:10 +000013
14ESCAPE = re.compile(r'[\x00-\x1f\\"\b\f\n\r\t]')
15ESCAPE_ASCII = re.compile(r'([\\"]|[^\ -~])')
Benjamin Petersonc6b607d2009-05-02 12:36:44 +000016HAS_UTF8 = re.compile(b'[\x80-\xff]')
Christian Heimes90540002008-05-08 14:29:10 +000017ESCAPE_DCT = {
18 '\\': '\\\\',
19 '"': '\\"',
20 '\b': '\\b',
21 '\f': '\\f',
22 '\n': '\\n',
23 '\r': '\\r',
24 '\t': '\\t',
25}
26for i in range(0x20):
27 ESCAPE_DCT.setdefault(chr(i), '\\u{0:04x}'.format(i))
Benjamin Petersonc6b607d2009-05-02 12:36:44 +000028 #ESCAPE_DCT.setdefault(chr(i), '\\u%04x' % (i,))
Christian Heimes90540002008-05-08 14:29:10 +000029
Ezio Melotti898d51d2012-05-21 17:49:06 -060030INFINITY = float('inf')
Christian Heimes90540002008-05-08 14:29:10 +000031FLOAT_REPR = repr
32
Christian Heimes90540002008-05-08 14:29:10 +000033def encode_basestring(s):
34 """Return a JSON representation of a Python string
35
36 """
37 def replace(match):
38 return ESCAPE_DCT[match.group(0)]
39 return '"' + ESCAPE.sub(replace, s) + '"'
40
41
42def py_encode_basestring_ascii(s):
Benjamin Petersonc6b607d2009-05-02 12:36:44 +000043 """Return an ASCII-only JSON representation of a Python string
44
45 """
Christian Heimes90540002008-05-08 14:29:10 +000046 def replace(match):
47 s = match.group(0)
48 try:
49 return ESCAPE_DCT[s]
50 except KeyError:
51 n = ord(s)
52 if n < 0x10000:
53 return '\\u{0:04x}'.format(n)
Benjamin Petersonc6b607d2009-05-02 12:36:44 +000054 #return '\\u%04x' % (n,)
Christian Heimes90540002008-05-08 14:29:10 +000055 else:
56 # surrogate pair
57 n -= 0x10000
58 s1 = 0xd800 | ((n >> 10) & 0x3ff)
59 s2 = 0xdc00 | (n & 0x3ff)
60 return '\\u{0:04x}\\u{1:04x}'.format(s1, s2)
Benjamin Petersonc6b607d2009-05-02 12:36:44 +000061 return '"' + ESCAPE_ASCII.sub(replace, s) + '"'
Christian Heimes90540002008-05-08 14:29:10 +000062
63
Benjamin Petersonc6b607d2009-05-02 12:36:44 +000064encode_basestring_ascii = (
65 c_encode_basestring_ascii or py_encode_basestring_ascii)
Christian Heimes90540002008-05-08 14:29:10 +000066
67class JSONEncoder(object):
68 """Extensible JSON <http://json.org> encoder for Python data structures.
69
70 Supports the following objects and types by default:
71
72 +-------------------+---------------+
73 | Python | JSON |
74 +===================+===============+
75 | dict | object |
76 +-------------------+---------------+
77 | list, tuple | array |
78 +-------------------+---------------+
Georg Brandlc8284cf2010-08-02 20:16:18 +000079 | str | string |
Christian Heimes90540002008-05-08 14:29:10 +000080 +-------------------+---------------+
Georg Brandlc8284cf2010-08-02 20:16:18 +000081 | int, float | number |
Christian Heimes90540002008-05-08 14:29:10 +000082 +-------------------+---------------+
83 | True | true |
84 +-------------------+---------------+
85 | False | false |
86 +-------------------+---------------+
87 | None | null |
88 +-------------------+---------------+
89
90 To extend this to recognize other objects, subclass and implement a
91 ``.default()`` method with another method that returns a serializable
92 object for ``o`` if possible, otherwise it should call the superclass
93 implementation (to raise ``TypeError``).
94
95 """
Christian Heimes90540002008-05-08 14:29:10 +000096 item_separator = ', '
97 key_separator = ': '
98 def __init__(self, skipkeys=False, ensure_ascii=True,
99 check_circular=True, allow_nan=True, sort_keys=False,
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000100 indent=None, separators=None, default=None):
Christian Heimes90540002008-05-08 14:29:10 +0000101 """Constructor for JSONEncoder, with sensible defaults.
102
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000103 If skipkeys is false, then it is a TypeError to attempt
Georg Brandlc8284cf2010-08-02 20:16:18 +0000104 encoding of keys that are not str, int, float or None. If
Christian Heimes90540002008-05-08 14:29:10 +0000105 skipkeys is True, such items are simply skipped.
106
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000107 If ensure_ascii is true, the output is guaranteed to be str
Georg Brandlc8284cf2010-08-02 20:16:18 +0000108 objects with all incoming non-ASCII characters escaped. If
109 ensure_ascii is false, the output can contain non-ASCII characters.
Christian Heimes90540002008-05-08 14:29:10 +0000110
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000111 If check_circular is true, then lists, dicts, and custom encoded
Christian Heimes90540002008-05-08 14:29:10 +0000112 objects will be checked for circular references during encoding to
113 prevent an infinite recursion (which would cause an OverflowError).
114 Otherwise, no such check takes place.
115
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000116 If allow_nan is true, then NaN, Infinity, and -Infinity will be
Christian Heimes90540002008-05-08 14:29:10 +0000117 encoded as such. This behavior is not JSON specification compliant,
118 but is consistent with most JavaScript based encoders and decoders.
119 Otherwise, it will be a ValueError to encode such floats.
120
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000121 If sort_keys is true, then the output of dictionaries will be
Christian Heimes90540002008-05-08 14:29:10 +0000122 sorted by key; this is useful for regression tests to ensure
123 that JSON serializations can be compared on a day-to-day basis.
124
125 If indent is a non-negative integer, then JSON array
126 elements and object members will be pretty-printed with that
127 indent level. An indent level of 0 will only insert newlines.
128 None is the most compact representation.
129
Ezio Melotti10031442012-11-29 00:42:56 +0200130 If specified, separators should be an (item_separator, key_separator)
131 tuple. The default is (', ', ': ') if *indent* is ``None`` and
132 (',', ': ') otherwise. To get the most compact JSON representation,
133 you should specify (',', ':') to eliminate whitespace.
Christian Heimes90540002008-05-08 14:29:10 +0000134
135 If specified, default is a function that gets called for objects
136 that can't otherwise be serialized. It should return a JSON encodable
137 version of the object or raise a ``TypeError``.
138
Christian Heimes90540002008-05-08 14:29:10 +0000139 """
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000140
Christian Heimes90540002008-05-08 14:29:10 +0000141 self.skipkeys = skipkeys
142 self.ensure_ascii = ensure_ascii
143 self.check_circular = check_circular
144 self.allow_nan = allow_nan
145 self.sort_keys = sort_keys
146 self.indent = indent
Christian Heimes90540002008-05-08 14:29:10 +0000147 if separators is not None:
148 self.item_separator, self.key_separator = separators
Ezio Melotti10031442012-11-29 00:42:56 +0200149 elif indent is not None:
150 self.item_separator = ','
Christian Heimes90540002008-05-08 14:29:10 +0000151 if default is not None:
152 self.default = default
Christian Heimes90540002008-05-08 14:29:10 +0000153
154 def default(self, o):
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000155 """Implement this method in a subclass such that it returns
156 a serializable object for ``o``, or calls the base implementation
157 (to raise a ``TypeError``).
Christian Heimes90540002008-05-08 14:29:10 +0000158
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000159 For example, to support arbitrary iterators, you could
160 implement default like this::
Christian Heimes90540002008-05-08 14:29:10 +0000161
162 def default(self, o):
163 try:
164 iterable = iter(o)
165 except TypeError:
166 pass
167 else:
168 return list(iterable)
R David Murraydd246172013-03-17 21:52:35 -0400169 # Let the base class default method raise the TypeError
Christian Heimes90540002008-05-08 14:29:10 +0000170 return JSONEncoder.default(self, o)
171
172 """
173 raise TypeError(repr(o) + " is not JSON serializable")
174
175 def encode(self, o):
176 """Return a JSON string representation of a Python data structure.
177
Ethan Furmana4998a72013-08-10 13:01:45 -0700178 >>> from json.encoder import JSONEncoder
Christian Heimes90540002008-05-08 14:29:10 +0000179 >>> JSONEncoder().encode({"foo": ["bar", "baz"]})
180 '{"foo": ["bar", "baz"]}'
181
182 """
183 # This is for extremely simple cases and benchmarks.
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000184 if isinstance(o, str):
Christian Heimes90540002008-05-08 14:29:10 +0000185 if self.ensure_ascii:
186 return encode_basestring_ascii(o)
187 else:
188 return encode_basestring(o)
189 # This doesn't pass the iterator directly to ''.join() because the
190 # exceptions aren't as detailed. The list call should be roughly
191 # equivalent to the PySequence_Fast that ''.join() would do.
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000192 chunks = self.iterencode(o, _one_shot=True)
193 if not isinstance(chunks, (list, tuple)):
194 chunks = list(chunks)
Christian Heimes90540002008-05-08 14:29:10 +0000195 return ''.join(chunks)
196
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000197 def iterencode(self, o, _one_shot=False):
198 """Encode the given object and yield each string
199 representation as available.
Christian Heimes90540002008-05-08 14:29:10 +0000200
201 For example::
202
203 for chunk in JSONEncoder().iterencode(bigobject):
204 mysocket.write(chunk)
205
206 """
207 if self.check_circular:
208 markers = {}
209 else:
210 markers = None
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000211 if self.ensure_ascii:
212 _encoder = encode_basestring_ascii
213 else:
214 _encoder = encode_basestring
215
216 def floatstr(o, allow_nan=self.allow_nan,
217 _repr=FLOAT_REPR, _inf=INFINITY, _neginf=-INFINITY):
218 # Check for specials. Note that this type of test is processor
219 # and/or platform-specific, so do tests which don't depend on the
220 # internals.
221
222 if o != o:
223 text = 'NaN'
224 elif o == _inf:
225 text = 'Infinity'
226 elif o == _neginf:
227 text = '-Infinity'
228 else:
229 return _repr(o)
230
231 if not allow_nan:
232 raise ValueError(
233 "Out of range float values are not JSON compliant: " +
234 repr(o))
235
236 return text
237
238
239 if (_one_shot and c_make_encoder is not None
R David Murray3dd02d62011-04-12 21:02:45 -0400240 and self.indent is None):
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000241 _iterencode = c_make_encoder(
242 markers, self.default, _encoder, self.indent,
243 self.key_separator, self.item_separator, self.sort_keys,
244 self.skipkeys, self.allow_nan)
245 else:
246 _iterencode = _make_iterencode(
247 markers, self.default, _encoder, self.indent, floatstr,
248 self.key_separator, self.item_separator, self.sort_keys,
249 self.skipkeys, _one_shot)
250 return _iterencode(o, 0)
251
252def _make_iterencode(markers, _default, _encoder, _indent, _floatstr,
253 _key_separator, _item_separator, _sort_keys, _skipkeys, _one_shot,
254 ## HACK: hand-optimized bytecode; turn globals into locals
255 ValueError=ValueError,
256 dict=dict,
257 float=float,
258 id=id,
259 int=int,
260 isinstance=isinstance,
261 list=list,
262 str=str,
263 tuple=tuple,
264 ):
265
Raymond Hettingerb643ef82010-10-31 08:00:16 +0000266 if _indent is not None and not isinstance(_indent, str):
267 _indent = ' ' * _indent
268
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000269 def _iterencode_list(lst, _current_indent_level):
270 if not lst:
271 yield '[]'
272 return
273 if markers is not None:
274 markerid = id(lst)
275 if markerid in markers:
276 raise ValueError("Circular reference detected")
277 markers[markerid] = lst
278 buf = '['
279 if _indent is not None:
280 _current_indent_level += 1
Raymond Hettingerb643ef82010-10-31 08:00:16 +0000281 newline_indent = '\n' + _indent * _current_indent_level
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000282 separator = _item_separator + newline_indent
283 buf += newline_indent
284 else:
285 newline_indent = None
286 separator = _item_separator
287 first = True
288 for value in lst:
289 if first:
290 first = False
291 else:
292 buf = separator
293 if isinstance(value, str):
294 yield buf + _encoder(value)
295 elif value is None:
296 yield buf + 'null'
297 elif value is True:
298 yield buf + 'true'
299 elif value is False:
300 yield buf + 'false'
301 elif isinstance(value, int):
Ethan Furmana4998a72013-08-10 13:01:45 -0700302 # Subclasses of int/float may override __str__, but we still
303 # want to encode them as integers/floats in JSON. One example
304 # within the standard library is IntEnum.
305 yield buf + str(int(value))
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000306 elif isinstance(value, float):
Ethan Furmana4998a72013-08-10 13:01:45 -0700307 # see comment above for int
308 yield buf + _floatstr(float(value))
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000309 else:
310 yield buf
311 if isinstance(value, (list, tuple)):
312 chunks = _iterencode_list(value, _current_indent_level)
313 elif isinstance(value, dict):
314 chunks = _iterencode_dict(value, _current_indent_level)
315 else:
316 chunks = _iterencode(value, _current_indent_level)
Philip Jenveyfd0d3e52012-10-01 15:34:31 -0700317 yield from chunks
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000318 if newline_indent is not None:
319 _current_indent_level -= 1
Raymond Hettingerb643ef82010-10-31 08:00:16 +0000320 yield '\n' + _indent * _current_indent_level
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000321 yield ']'
322 if markers is not None:
323 del markers[markerid]
324
325 def _iterencode_dict(dct, _current_indent_level):
326 if not dct:
327 yield '{}'
328 return
329 if markers is not None:
330 markerid = id(dct)
331 if markerid in markers:
332 raise ValueError("Circular reference detected")
333 markers[markerid] = dct
334 yield '{'
335 if _indent is not None:
336 _current_indent_level += 1
Raymond Hettingerb643ef82010-10-31 08:00:16 +0000337 newline_indent = '\n' + _indent * _current_indent_level
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000338 item_separator = _item_separator + newline_indent
339 yield newline_indent
340 else:
341 newline_indent = None
342 item_separator = _item_separator
343 first = True
344 if _sort_keys:
345 items = sorted(dct.items(), key=lambda kv: kv[0])
346 else:
347 items = dct.items()
348 for key, value in items:
349 if isinstance(key, str):
350 pass
351 # JavaScript is weakly typed for these, so it makes sense to
352 # also allow them. Many encoders seem to do something like this.
353 elif isinstance(key, float):
Ethan Furmana4998a72013-08-10 13:01:45 -0700354 # see comment for int/float in _make_iterencode
355 key = _floatstr(float(key))
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000356 elif key is True:
357 key = 'true'
358 elif key is False:
359 key = 'false'
360 elif key is None:
361 key = 'null'
362 elif isinstance(key, int):
Ethan Furmana4998a72013-08-10 13:01:45 -0700363 # see comment for int/float in _make_iterencode
364 key = str(int(key))
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000365 elif _skipkeys:
366 continue
367 else:
368 raise TypeError("key " + repr(key) + " is not a string")
369 if first:
370 first = False
371 else:
372 yield item_separator
373 yield _encoder(key)
374 yield _key_separator
375 if isinstance(value, str):
376 yield _encoder(value)
377 elif value is None:
378 yield 'null'
379 elif value is True:
380 yield 'true'
381 elif value is False:
382 yield 'false'
383 elif isinstance(value, int):
Ethan Furmana4998a72013-08-10 13:01:45 -0700384 # see comment for int/float in _make_iterencode
385 yield str(int(value))
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000386 elif isinstance(value, float):
Ethan Furmana4998a72013-08-10 13:01:45 -0700387 # see comment for int/float in _make_iterencode
388 yield _floatstr(float(value))
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000389 else:
390 if isinstance(value, (list, tuple)):
391 chunks = _iterencode_list(value, _current_indent_level)
392 elif isinstance(value, dict):
393 chunks = _iterencode_dict(value, _current_indent_level)
394 else:
395 chunks = _iterencode(value, _current_indent_level)
Philip Jenveyfd0d3e52012-10-01 15:34:31 -0700396 yield from chunks
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000397 if newline_indent is not None:
398 _current_indent_level -= 1
Raymond Hettingerb643ef82010-10-31 08:00:16 +0000399 yield '\n' + _indent * _current_indent_level
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000400 yield '}'
401 if markers is not None:
402 del markers[markerid]
403
404 def _iterencode(o, _current_indent_level):
405 if isinstance(o, str):
406 yield _encoder(o)
407 elif o is None:
408 yield 'null'
409 elif o is True:
410 yield 'true'
411 elif o is False:
412 yield 'false'
Florent Xicluna02ea12b22010-07-28 16:39:41 +0000413 elif isinstance(o, int):
Ethan Furmana4998a72013-08-10 13:01:45 -0700414 # see comment for int/float in _make_iterencode
415 yield str(int(o))
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000416 elif isinstance(o, float):
Ethan Furmana4998a72013-08-10 13:01:45 -0700417 # see comment for int/float in _make_iterencode
418 yield _floatstr(float(o))
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000419 elif isinstance(o, (list, tuple)):
Philip Jenveyfd0d3e52012-10-01 15:34:31 -0700420 yield from _iterencode_list(o, _current_indent_level)
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000421 elif isinstance(o, dict):
Philip Jenveyfd0d3e52012-10-01 15:34:31 -0700422 yield from _iterencode_dict(o, _current_indent_level)
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000423 else:
424 if markers is not None:
425 markerid = id(o)
426 if markerid in markers:
427 raise ValueError("Circular reference detected")
428 markers[markerid] = o
429 o = _default(o)
Philip Jenveyfd0d3e52012-10-01 15:34:31 -0700430 yield from _iterencode(o, _current_indent_level)
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000431 if markers is not None:
432 del markers[markerid]
433 return _iterencode