blob: 39b550dbb0bda128fefffd3adb654ae032f06843 [file] [log] [blame]
Christian Heimes90540002008-05-08 14:29:10 +00001"""Implementation of JSONEncoder
2"""
Christian Heimes90540002008-05-08 14:29:10 +00003import re
4
5try:
6 from _json import encode_basestring_ascii as c_encode_basestring_ascii
7except ImportError:
8 c_encode_basestring_ascii = None
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00009try:
10 from _json import make_encoder as c_make_encoder
11except ImportError:
12 c_make_encoder = None
Christian Heimes90540002008-05-08 14:29:10 +000013
14ESCAPE = re.compile(r'[\x00-\x1f\\"\b\f\n\r\t]')
15ESCAPE_ASCII = re.compile(r'([\\"]|[^\ -~])')
Benjamin Petersonc6b607d2009-05-02 12:36:44 +000016HAS_UTF8 = re.compile(b'[\x80-\xff]')
Christian Heimes90540002008-05-08 14:29:10 +000017ESCAPE_DCT = {
18 '\\': '\\\\',
19 '"': '\\"',
20 '\b': '\\b',
21 '\f': '\\f',
22 '\n': '\\n',
23 '\r': '\\r',
24 '\t': '\\t',
25}
26for i in range(0x20):
27 ESCAPE_DCT.setdefault(chr(i), '\\u{0:04x}'.format(i))
Benjamin Petersonc6b607d2009-05-02 12:36:44 +000028 #ESCAPE_DCT.setdefault(chr(i), '\\u%04x' % (i,))
Christian Heimes90540002008-05-08 14:29:10 +000029
Ezio Melotti898d51d2012-05-21 17:49:06 -060030INFINITY = float('inf')
Christian Heimes90540002008-05-08 14:29:10 +000031FLOAT_REPR = repr
32
Christian Heimes90540002008-05-08 14:29:10 +000033def encode_basestring(s):
34 """Return a JSON representation of a Python string
35
36 """
37 def replace(match):
38 return ESCAPE_DCT[match.group(0)]
39 return '"' + ESCAPE.sub(replace, s) + '"'
40
41
42def py_encode_basestring_ascii(s):
Benjamin Petersonc6b607d2009-05-02 12:36:44 +000043 """Return an ASCII-only JSON representation of a Python string
44
45 """
Christian Heimes90540002008-05-08 14:29:10 +000046 def replace(match):
47 s = match.group(0)
48 try:
49 return ESCAPE_DCT[s]
50 except KeyError:
51 n = ord(s)
52 if n < 0x10000:
53 return '\\u{0:04x}'.format(n)
Benjamin Petersonc6b607d2009-05-02 12:36:44 +000054 #return '\\u%04x' % (n,)
Christian Heimes90540002008-05-08 14:29:10 +000055 else:
56 # surrogate pair
57 n -= 0x10000
58 s1 = 0xd800 | ((n >> 10) & 0x3ff)
59 s2 = 0xdc00 | (n & 0x3ff)
60 return '\\u{0:04x}\\u{1:04x}'.format(s1, s2)
Benjamin Petersonc6b607d2009-05-02 12:36:44 +000061 return '"' + ESCAPE_ASCII.sub(replace, s) + '"'
Christian Heimes90540002008-05-08 14:29:10 +000062
63
Benjamin Petersonc6b607d2009-05-02 12:36:44 +000064encode_basestring_ascii = (
65 c_encode_basestring_ascii or py_encode_basestring_ascii)
Christian Heimes90540002008-05-08 14:29:10 +000066
67class JSONEncoder(object):
68 """Extensible JSON <http://json.org> encoder for Python data structures.
69
70 Supports the following objects and types by default:
71
72 +-------------------+---------------+
73 | Python | JSON |
74 +===================+===============+
75 | dict | object |
76 +-------------------+---------------+
77 | list, tuple | array |
78 +-------------------+---------------+
Georg Brandlc8284cf2010-08-02 20:16:18 +000079 | str | string |
Christian Heimes90540002008-05-08 14:29:10 +000080 +-------------------+---------------+
Georg Brandlc8284cf2010-08-02 20:16:18 +000081 | int, float | number |
Christian Heimes90540002008-05-08 14:29:10 +000082 +-------------------+---------------+
83 | True | true |
84 +-------------------+---------------+
85 | False | false |
86 +-------------------+---------------+
87 | None | null |
88 +-------------------+---------------+
89
90 To extend this to recognize other objects, subclass and implement a
91 ``.default()`` method with another method that returns a serializable
92 object for ``o`` if possible, otherwise it should call the superclass
93 implementation (to raise ``TypeError``).
94
95 """
Christian Heimes90540002008-05-08 14:29:10 +000096 item_separator = ', '
97 key_separator = ': '
98 def __init__(self, skipkeys=False, ensure_ascii=True,
99 check_circular=True, allow_nan=True, sort_keys=False,
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000100 indent=None, separators=None, default=None):
Christian Heimes90540002008-05-08 14:29:10 +0000101 """Constructor for JSONEncoder, with sensible defaults.
102
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000103 If skipkeys is false, then it is a TypeError to attempt
Georg Brandlc8284cf2010-08-02 20:16:18 +0000104 encoding of keys that are not str, int, float or None. If
Christian Heimes90540002008-05-08 14:29:10 +0000105 skipkeys is True, such items are simply skipped.
106
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000107 If ensure_ascii is true, the output is guaranteed to be str
Georg Brandlc8284cf2010-08-02 20:16:18 +0000108 objects with all incoming non-ASCII characters escaped. If
109 ensure_ascii is false, the output can contain non-ASCII characters.
Christian Heimes90540002008-05-08 14:29:10 +0000110
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000111 If check_circular is true, then lists, dicts, and custom encoded
Christian Heimes90540002008-05-08 14:29:10 +0000112 objects will be checked for circular references during encoding to
113 prevent an infinite recursion (which would cause an OverflowError).
114 Otherwise, no such check takes place.
115
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000116 If allow_nan is true, then NaN, Infinity, and -Infinity will be
Christian Heimes90540002008-05-08 14:29:10 +0000117 encoded as such. This behavior is not JSON specification compliant,
118 but is consistent with most JavaScript based encoders and decoders.
119 Otherwise, it will be a ValueError to encode such floats.
120
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000121 If sort_keys is true, then the output of dictionaries will be
Christian Heimes90540002008-05-08 14:29:10 +0000122 sorted by key; this is useful for regression tests to ensure
123 that JSON serializations can be compared on a day-to-day basis.
124
125 If indent is a non-negative integer, then JSON array
126 elements and object members will be pretty-printed with that
127 indent level. An indent level of 0 will only insert newlines.
128 None is the most compact representation.
129
Ezio Melotti10031442012-11-29 00:42:56 +0200130 If specified, separators should be an (item_separator, key_separator)
131 tuple. The default is (', ', ': ') if *indent* is ``None`` and
132 (',', ': ') otherwise. To get the most compact JSON representation,
133 you should specify (',', ':') to eliminate whitespace.
Christian Heimes90540002008-05-08 14:29:10 +0000134
135 If specified, default is a function that gets called for objects
136 that can't otherwise be serialized. It should return a JSON encodable
137 version of the object or raise a ``TypeError``.
138
Christian Heimes90540002008-05-08 14:29:10 +0000139 """
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000140
Christian Heimes90540002008-05-08 14:29:10 +0000141 self.skipkeys = skipkeys
142 self.ensure_ascii = ensure_ascii
143 self.check_circular = check_circular
144 self.allow_nan = allow_nan
145 self.sort_keys = sort_keys
146 self.indent = indent
Christian Heimes90540002008-05-08 14:29:10 +0000147 if separators is not None:
148 self.item_separator, self.key_separator = separators
Ezio Melotti10031442012-11-29 00:42:56 +0200149 elif indent is not None:
150 self.item_separator = ','
Christian Heimes90540002008-05-08 14:29:10 +0000151 if default is not None:
152 self.default = default
Christian Heimes90540002008-05-08 14:29:10 +0000153
154 def default(self, o):
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000155 """Implement this method in a subclass such that it returns
156 a serializable object for ``o``, or calls the base implementation
157 (to raise a ``TypeError``).
Christian Heimes90540002008-05-08 14:29:10 +0000158
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000159 For example, to support arbitrary iterators, you could
160 implement default like this::
Christian Heimes90540002008-05-08 14:29:10 +0000161
162 def default(self, o):
163 try:
164 iterable = iter(o)
165 except TypeError:
166 pass
167 else:
168 return list(iterable)
R David Murraydd246172013-03-17 21:52:35 -0400169 # Let the base class default method raise the TypeError
Christian Heimes90540002008-05-08 14:29:10 +0000170 return JSONEncoder.default(self, o)
171
172 """
173 raise TypeError(repr(o) + " is not JSON serializable")
174
175 def encode(self, o):
176 """Return a JSON string representation of a Python data structure.
177
178 >>> JSONEncoder().encode({"foo": ["bar", "baz"]})
179 '{"foo": ["bar", "baz"]}'
180
181 """
182 # This is for extremely simple cases and benchmarks.
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000183 if isinstance(o, str):
Christian Heimes90540002008-05-08 14:29:10 +0000184 if self.ensure_ascii:
185 return encode_basestring_ascii(o)
186 else:
187 return encode_basestring(o)
188 # This doesn't pass the iterator directly to ''.join() because the
189 # exceptions aren't as detailed. The list call should be roughly
190 # equivalent to the PySequence_Fast that ''.join() would do.
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000191 chunks = self.iterencode(o, _one_shot=True)
192 if not isinstance(chunks, (list, tuple)):
193 chunks = list(chunks)
Christian Heimes90540002008-05-08 14:29:10 +0000194 return ''.join(chunks)
195
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000196 def iterencode(self, o, _one_shot=False):
197 """Encode the given object and yield each string
198 representation as available.
Christian Heimes90540002008-05-08 14:29:10 +0000199
200 For example::
201
202 for chunk in JSONEncoder().iterencode(bigobject):
203 mysocket.write(chunk)
204
205 """
206 if self.check_circular:
207 markers = {}
208 else:
209 markers = None
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000210 if self.ensure_ascii:
211 _encoder = encode_basestring_ascii
212 else:
213 _encoder = encode_basestring
214
215 def floatstr(o, allow_nan=self.allow_nan,
216 _repr=FLOAT_REPR, _inf=INFINITY, _neginf=-INFINITY):
217 # Check for specials. Note that this type of test is processor
218 # and/or platform-specific, so do tests which don't depend on the
219 # internals.
220
221 if o != o:
222 text = 'NaN'
223 elif o == _inf:
224 text = 'Infinity'
225 elif o == _neginf:
226 text = '-Infinity'
227 else:
228 return _repr(o)
229
230 if not allow_nan:
231 raise ValueError(
232 "Out of range float values are not JSON compliant: " +
233 repr(o))
234
235 return text
236
237
238 if (_one_shot and c_make_encoder is not None
R David Murray3dd02d62011-04-12 21:02:45 -0400239 and self.indent is None):
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000240 _iterencode = c_make_encoder(
241 markers, self.default, _encoder, self.indent,
242 self.key_separator, self.item_separator, self.sort_keys,
243 self.skipkeys, self.allow_nan)
244 else:
245 _iterencode = _make_iterencode(
246 markers, self.default, _encoder, self.indent, floatstr,
247 self.key_separator, self.item_separator, self.sort_keys,
248 self.skipkeys, _one_shot)
249 return _iterencode(o, 0)
250
251def _make_iterencode(markers, _default, _encoder, _indent, _floatstr,
252 _key_separator, _item_separator, _sort_keys, _skipkeys, _one_shot,
253 ## HACK: hand-optimized bytecode; turn globals into locals
254 ValueError=ValueError,
255 dict=dict,
256 float=float,
257 id=id,
258 int=int,
259 isinstance=isinstance,
260 list=list,
261 str=str,
262 tuple=tuple,
263 ):
264
Raymond Hettingerb643ef82010-10-31 08:00:16 +0000265 if _indent is not None and not isinstance(_indent, str):
266 _indent = ' ' * _indent
267
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000268 def _iterencode_list(lst, _current_indent_level):
269 if not lst:
270 yield '[]'
271 return
272 if markers is not None:
273 markerid = id(lst)
274 if markerid in markers:
275 raise ValueError("Circular reference detected")
276 markers[markerid] = lst
277 buf = '['
278 if _indent is not None:
279 _current_indent_level += 1
Raymond Hettingerb643ef82010-10-31 08:00:16 +0000280 newline_indent = '\n' + _indent * _current_indent_level
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000281 separator = _item_separator + newline_indent
282 buf += newline_indent
283 else:
284 newline_indent = None
285 separator = _item_separator
286 first = True
287 for value in lst:
288 if first:
289 first = False
290 else:
291 buf = separator
292 if isinstance(value, str):
293 yield buf + _encoder(value)
294 elif value is None:
295 yield buf + 'null'
296 elif value is True:
297 yield buf + 'true'
298 elif value is False:
299 yield buf + 'false'
300 elif isinstance(value, int):
301 yield buf + str(value)
302 elif isinstance(value, float):
303 yield buf + _floatstr(value)
304 else:
305 yield buf
306 if isinstance(value, (list, tuple)):
307 chunks = _iterencode_list(value, _current_indent_level)
308 elif isinstance(value, dict):
309 chunks = _iterencode_dict(value, _current_indent_level)
310 else:
311 chunks = _iterencode(value, _current_indent_level)
Philip Jenveyfd0d3e52012-10-01 15:34:31 -0700312 yield from chunks
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000313 if newline_indent is not None:
314 _current_indent_level -= 1
Raymond Hettingerb643ef82010-10-31 08:00:16 +0000315 yield '\n' + _indent * _current_indent_level
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000316 yield ']'
317 if markers is not None:
318 del markers[markerid]
319
320 def _iterencode_dict(dct, _current_indent_level):
321 if not dct:
322 yield '{}'
323 return
324 if markers is not None:
325 markerid = id(dct)
326 if markerid in markers:
327 raise ValueError("Circular reference detected")
328 markers[markerid] = dct
329 yield '{'
330 if _indent is not None:
331 _current_indent_level += 1
Raymond Hettingerb643ef82010-10-31 08:00:16 +0000332 newline_indent = '\n' + _indent * _current_indent_level
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000333 item_separator = _item_separator + newline_indent
334 yield newline_indent
335 else:
336 newline_indent = None
337 item_separator = _item_separator
338 first = True
339 if _sort_keys:
340 items = sorted(dct.items(), key=lambda kv: kv[0])
341 else:
342 items = dct.items()
343 for key, value in items:
344 if isinstance(key, str):
345 pass
346 # JavaScript is weakly typed for these, so it makes sense to
347 # also allow them. Many encoders seem to do something like this.
348 elif isinstance(key, float):
349 key = _floatstr(key)
350 elif key is True:
351 key = 'true'
352 elif key is False:
353 key = 'false'
354 elif key is None:
355 key = 'null'
356 elif isinstance(key, int):
357 key = str(key)
358 elif _skipkeys:
359 continue
360 else:
361 raise TypeError("key " + repr(key) + " is not a string")
362 if first:
363 first = False
364 else:
365 yield item_separator
366 yield _encoder(key)
367 yield _key_separator
368 if isinstance(value, str):
369 yield _encoder(value)
370 elif value is None:
371 yield 'null'
372 elif value is True:
373 yield 'true'
374 elif value is False:
375 yield 'false'
376 elif isinstance(value, int):
377 yield str(value)
378 elif isinstance(value, float):
379 yield _floatstr(value)
380 else:
381 if isinstance(value, (list, tuple)):
382 chunks = _iterencode_list(value, _current_indent_level)
383 elif isinstance(value, dict):
384 chunks = _iterencode_dict(value, _current_indent_level)
385 else:
386 chunks = _iterencode(value, _current_indent_level)
Philip Jenveyfd0d3e52012-10-01 15:34:31 -0700387 yield from chunks
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000388 if newline_indent is not None:
389 _current_indent_level -= 1
Raymond Hettingerb643ef82010-10-31 08:00:16 +0000390 yield '\n' + _indent * _current_indent_level
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000391 yield '}'
392 if markers is not None:
393 del markers[markerid]
394
395 def _iterencode(o, _current_indent_level):
396 if isinstance(o, str):
397 yield _encoder(o)
398 elif o is None:
399 yield 'null'
400 elif o is True:
401 yield 'true'
402 elif o is False:
403 yield 'false'
Florent Xicluna02ea12b22010-07-28 16:39:41 +0000404 elif isinstance(o, int):
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000405 yield str(o)
406 elif isinstance(o, float):
407 yield _floatstr(o)
408 elif isinstance(o, (list, tuple)):
Philip Jenveyfd0d3e52012-10-01 15:34:31 -0700409 yield from _iterencode_list(o, _current_indent_level)
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000410 elif isinstance(o, dict):
Philip Jenveyfd0d3e52012-10-01 15:34:31 -0700411 yield from _iterencode_dict(o, _current_indent_level)
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000412 else:
413 if markers is not None:
414 markerid = id(o)
415 if markerid in markers:
416 raise ValueError("Circular reference detected")
417 markers[markerid] = o
418 o = _default(o)
Philip Jenveyfd0d3e52012-10-01 15:34:31 -0700419 yield from _iterencode(o, _current_indent_level)
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000420 if markers is not None:
421 del markers[markerid]
422 return _iterencode