blob: 93b5ea7d5e046baef9c81571d644812028dcfe5f [file] [log] [blame]
Christian Heimes90540002008-05-08 14:29:10 +00001"""Implementation of JSONEncoder
2"""
Christian Heimes90540002008-05-08 14:29:10 +00003import re
4
5try:
6 from _json import encode_basestring_ascii as c_encode_basestring_ascii
7except ImportError:
8 c_encode_basestring_ascii = None
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00009try:
10 from _json import make_encoder as c_make_encoder
11except ImportError:
12 c_make_encoder = None
Christian Heimes90540002008-05-08 14:29:10 +000013
14ESCAPE = re.compile(r'[\x00-\x1f\\"\b\f\n\r\t]')
15ESCAPE_ASCII = re.compile(r'([\\"]|[^\ -~])')
Benjamin Petersonc6b607d2009-05-02 12:36:44 +000016HAS_UTF8 = re.compile(b'[\x80-\xff]')
Christian Heimes90540002008-05-08 14:29:10 +000017ESCAPE_DCT = {
18 '\\': '\\\\',
19 '"': '\\"',
20 '\b': '\\b',
21 '\f': '\\f',
22 '\n': '\\n',
23 '\r': '\\r',
24 '\t': '\\t',
25}
26for i in range(0x20):
27 ESCAPE_DCT.setdefault(chr(i), '\\u{0:04x}'.format(i))
Benjamin Petersonc6b607d2009-05-02 12:36:44 +000028 #ESCAPE_DCT.setdefault(chr(i), '\\u%04x' % (i,))
Christian Heimes90540002008-05-08 14:29:10 +000029
Ezio Melotti898d51d2012-05-21 17:49:06 -060030INFINITY = float('inf')
Christian Heimes90540002008-05-08 14:29:10 +000031FLOAT_REPR = repr
32
Christian Heimes90540002008-05-08 14:29:10 +000033def encode_basestring(s):
34 """Return a JSON representation of a Python string
35
36 """
37 def replace(match):
38 return ESCAPE_DCT[match.group(0)]
39 return '"' + ESCAPE.sub(replace, s) + '"'
40
41
42def py_encode_basestring_ascii(s):
Benjamin Petersonc6b607d2009-05-02 12:36:44 +000043 """Return an ASCII-only JSON representation of a Python string
44
45 """
Christian Heimes90540002008-05-08 14:29:10 +000046 def replace(match):
47 s = match.group(0)
48 try:
49 return ESCAPE_DCT[s]
50 except KeyError:
51 n = ord(s)
52 if n < 0x10000:
53 return '\\u{0:04x}'.format(n)
Benjamin Petersonc6b607d2009-05-02 12:36:44 +000054 #return '\\u%04x' % (n,)
Christian Heimes90540002008-05-08 14:29:10 +000055 else:
56 # surrogate pair
57 n -= 0x10000
58 s1 = 0xd800 | ((n >> 10) & 0x3ff)
59 s2 = 0xdc00 | (n & 0x3ff)
60 return '\\u{0:04x}\\u{1:04x}'.format(s1, s2)
Benjamin Petersonc6b607d2009-05-02 12:36:44 +000061 return '"' + ESCAPE_ASCII.sub(replace, s) + '"'
Christian Heimes90540002008-05-08 14:29:10 +000062
63
Benjamin Petersonc6b607d2009-05-02 12:36:44 +000064encode_basestring_ascii = (
65 c_encode_basestring_ascii or py_encode_basestring_ascii)
Christian Heimes90540002008-05-08 14:29:10 +000066
67class JSONEncoder(object):
68 """Extensible JSON <http://json.org> encoder for Python data structures.
69
70 Supports the following objects and types by default:
71
72 +-------------------+---------------+
73 | Python | JSON |
74 +===================+===============+
75 | dict | object |
76 +-------------------+---------------+
77 | list, tuple | array |
78 +-------------------+---------------+
Georg Brandlc8284cf2010-08-02 20:16:18 +000079 | str | string |
Christian Heimes90540002008-05-08 14:29:10 +000080 +-------------------+---------------+
Georg Brandlc8284cf2010-08-02 20:16:18 +000081 | int, float | number |
Christian Heimes90540002008-05-08 14:29:10 +000082 +-------------------+---------------+
83 | True | true |
84 +-------------------+---------------+
85 | False | false |
86 +-------------------+---------------+
87 | None | null |
88 +-------------------+---------------+
89
90 To extend this to recognize other objects, subclass and implement a
91 ``.default()`` method with another method that returns a serializable
92 object for ``o`` if possible, otherwise it should call the superclass
93 implementation (to raise ``TypeError``).
94
95 """
Christian Heimes90540002008-05-08 14:29:10 +000096 item_separator = ', '
97 key_separator = ': '
98 def __init__(self, skipkeys=False, ensure_ascii=True,
99 check_circular=True, allow_nan=True, sort_keys=False,
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000100 indent=None, separators=None, default=None):
Christian Heimes90540002008-05-08 14:29:10 +0000101 """Constructor for JSONEncoder, with sensible defaults.
102
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000103 If skipkeys is false, then it is a TypeError to attempt
Georg Brandlc8284cf2010-08-02 20:16:18 +0000104 encoding of keys that are not str, int, float or None. If
Christian Heimes90540002008-05-08 14:29:10 +0000105 skipkeys is True, such items are simply skipped.
106
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000107 If ensure_ascii is true, the output is guaranteed to be str
Georg Brandlc8284cf2010-08-02 20:16:18 +0000108 objects with all incoming non-ASCII characters escaped. If
109 ensure_ascii is false, the output can contain non-ASCII characters.
Christian Heimes90540002008-05-08 14:29:10 +0000110
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000111 If check_circular is true, then lists, dicts, and custom encoded
Christian Heimes90540002008-05-08 14:29:10 +0000112 objects will be checked for circular references during encoding to
113 prevent an infinite recursion (which would cause an OverflowError).
114 Otherwise, no such check takes place.
115
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000116 If allow_nan is true, then NaN, Infinity, and -Infinity will be
Christian Heimes90540002008-05-08 14:29:10 +0000117 encoded as such. This behavior is not JSON specification compliant,
118 but is consistent with most JavaScript based encoders and decoders.
119 Otherwise, it will be a ValueError to encode such floats.
120
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000121 If sort_keys is true, then the output of dictionaries will be
Christian Heimes90540002008-05-08 14:29:10 +0000122 sorted by key; this is useful for regression tests to ensure
123 that JSON serializations can be compared on a day-to-day basis.
124
125 If indent is a non-negative integer, then JSON array
126 elements and object members will be pretty-printed with that
127 indent level. An indent level of 0 will only insert newlines.
128 None is the most compact representation.
129
Ezio Melotti10031442012-11-29 00:42:56 +0200130 If specified, separators should be an (item_separator, key_separator)
131 tuple. The default is (', ', ': ') if *indent* is ``None`` and
132 (',', ': ') otherwise. To get the most compact JSON representation,
133 you should specify (',', ':') to eliminate whitespace.
Christian Heimes90540002008-05-08 14:29:10 +0000134
135 If specified, default is a function that gets called for objects
136 that can't otherwise be serialized. It should return a JSON encodable
137 version of the object or raise a ``TypeError``.
138
Christian Heimes90540002008-05-08 14:29:10 +0000139 """
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000140
Christian Heimes90540002008-05-08 14:29:10 +0000141 self.skipkeys = skipkeys
142 self.ensure_ascii = ensure_ascii
143 self.check_circular = check_circular
144 self.allow_nan = allow_nan
145 self.sort_keys = sort_keys
146 self.indent = indent
Christian Heimes90540002008-05-08 14:29:10 +0000147 if separators is not None:
148 self.item_separator, self.key_separator = separators
Ezio Melotti10031442012-11-29 00:42:56 +0200149 elif indent is not None:
150 self.item_separator = ','
Christian Heimes90540002008-05-08 14:29:10 +0000151 if default is not None:
152 self.default = default
Christian Heimes90540002008-05-08 14:29:10 +0000153
154 def default(self, o):
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000155 """Implement this method in a subclass such that it returns
156 a serializable object for ``o``, or calls the base implementation
157 (to raise a ``TypeError``).
Christian Heimes90540002008-05-08 14:29:10 +0000158
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000159 For example, to support arbitrary iterators, you could
160 implement default like this::
Christian Heimes90540002008-05-08 14:29:10 +0000161
162 def default(self, o):
163 try:
164 iterable = iter(o)
165 except TypeError:
166 pass
167 else:
168 return list(iterable)
169 return JSONEncoder.default(self, o)
170
171 """
172 raise TypeError(repr(o) + " is not JSON serializable")
173
174 def encode(self, o):
175 """Return a JSON string representation of a Python data structure.
176
177 >>> JSONEncoder().encode({"foo": ["bar", "baz"]})
178 '{"foo": ["bar", "baz"]}'
179
180 """
181 # This is for extremely simple cases and benchmarks.
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000182 if isinstance(o, str):
Christian Heimes90540002008-05-08 14:29:10 +0000183 if self.ensure_ascii:
184 return encode_basestring_ascii(o)
185 else:
186 return encode_basestring(o)
187 # This doesn't pass the iterator directly to ''.join() because the
188 # exceptions aren't as detailed. The list call should be roughly
189 # equivalent to the PySequence_Fast that ''.join() would do.
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000190 chunks = self.iterencode(o, _one_shot=True)
191 if not isinstance(chunks, (list, tuple)):
192 chunks = list(chunks)
Christian Heimes90540002008-05-08 14:29:10 +0000193 return ''.join(chunks)
194
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000195 def iterencode(self, o, _one_shot=False):
196 """Encode the given object and yield each string
197 representation as available.
Christian Heimes90540002008-05-08 14:29:10 +0000198
199 For example::
200
201 for chunk in JSONEncoder().iterencode(bigobject):
202 mysocket.write(chunk)
203
204 """
205 if self.check_circular:
206 markers = {}
207 else:
208 markers = None
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000209 if self.ensure_ascii:
210 _encoder = encode_basestring_ascii
211 else:
212 _encoder = encode_basestring
213
214 def floatstr(o, allow_nan=self.allow_nan,
215 _repr=FLOAT_REPR, _inf=INFINITY, _neginf=-INFINITY):
216 # Check for specials. Note that this type of test is processor
217 # and/or platform-specific, so do tests which don't depend on the
218 # internals.
219
220 if o != o:
221 text = 'NaN'
222 elif o == _inf:
223 text = 'Infinity'
224 elif o == _neginf:
225 text = '-Infinity'
226 else:
227 return _repr(o)
228
229 if not allow_nan:
230 raise ValueError(
231 "Out of range float values are not JSON compliant: " +
232 repr(o))
233
234 return text
235
236
237 if (_one_shot and c_make_encoder is not None
R David Murray3dd02d62011-04-12 21:02:45 -0400238 and self.indent is None):
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000239 _iterencode = c_make_encoder(
240 markers, self.default, _encoder, self.indent,
241 self.key_separator, self.item_separator, self.sort_keys,
242 self.skipkeys, self.allow_nan)
243 else:
244 _iterencode = _make_iterencode(
245 markers, self.default, _encoder, self.indent, floatstr,
246 self.key_separator, self.item_separator, self.sort_keys,
247 self.skipkeys, _one_shot)
248 return _iterencode(o, 0)
249
250def _make_iterencode(markers, _default, _encoder, _indent, _floatstr,
251 _key_separator, _item_separator, _sort_keys, _skipkeys, _one_shot,
252 ## HACK: hand-optimized bytecode; turn globals into locals
253 ValueError=ValueError,
254 dict=dict,
255 float=float,
256 id=id,
257 int=int,
258 isinstance=isinstance,
259 list=list,
260 str=str,
261 tuple=tuple,
262 ):
263
Raymond Hettingerb643ef82010-10-31 08:00:16 +0000264 if _indent is not None and not isinstance(_indent, str):
265 _indent = ' ' * _indent
266
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000267 def _iterencode_list(lst, _current_indent_level):
268 if not lst:
269 yield '[]'
270 return
271 if markers is not None:
272 markerid = id(lst)
273 if markerid in markers:
274 raise ValueError("Circular reference detected")
275 markers[markerid] = lst
276 buf = '['
277 if _indent is not None:
278 _current_indent_level += 1
Raymond Hettingerb643ef82010-10-31 08:00:16 +0000279 newline_indent = '\n' + _indent * _current_indent_level
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000280 separator = _item_separator + newline_indent
281 buf += newline_indent
282 else:
283 newline_indent = None
284 separator = _item_separator
285 first = True
286 for value in lst:
287 if first:
288 first = False
289 else:
290 buf = separator
291 if isinstance(value, str):
292 yield buf + _encoder(value)
293 elif value is None:
294 yield buf + 'null'
295 elif value is True:
296 yield buf + 'true'
297 elif value is False:
298 yield buf + 'false'
299 elif isinstance(value, int):
300 yield buf + str(value)
301 elif isinstance(value, float):
302 yield buf + _floatstr(value)
303 else:
304 yield buf
305 if isinstance(value, (list, tuple)):
306 chunks = _iterencode_list(value, _current_indent_level)
307 elif isinstance(value, dict):
308 chunks = _iterencode_dict(value, _current_indent_level)
309 else:
310 chunks = _iterencode(value, _current_indent_level)
Philip Jenveyfd0d3e52012-10-01 15:34:31 -0700311 yield from chunks
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000312 if newline_indent is not None:
313 _current_indent_level -= 1
Raymond Hettingerb643ef82010-10-31 08:00:16 +0000314 yield '\n' + _indent * _current_indent_level
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000315 yield ']'
316 if markers is not None:
317 del markers[markerid]
318
319 def _iterencode_dict(dct, _current_indent_level):
320 if not dct:
321 yield '{}'
322 return
323 if markers is not None:
324 markerid = id(dct)
325 if markerid in markers:
326 raise ValueError("Circular reference detected")
327 markers[markerid] = dct
328 yield '{'
329 if _indent is not None:
330 _current_indent_level += 1
Raymond Hettingerb643ef82010-10-31 08:00:16 +0000331 newline_indent = '\n' + _indent * _current_indent_level
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000332 item_separator = _item_separator + newline_indent
333 yield newline_indent
334 else:
335 newline_indent = None
336 item_separator = _item_separator
337 first = True
338 if _sort_keys:
339 items = sorted(dct.items(), key=lambda kv: kv[0])
340 else:
341 items = dct.items()
342 for key, value in items:
343 if isinstance(key, str):
344 pass
345 # JavaScript is weakly typed for these, so it makes sense to
346 # also allow them. Many encoders seem to do something like this.
347 elif isinstance(key, float):
348 key = _floatstr(key)
349 elif key is True:
350 key = 'true'
351 elif key is False:
352 key = 'false'
353 elif key is None:
354 key = 'null'
355 elif isinstance(key, int):
356 key = str(key)
357 elif _skipkeys:
358 continue
359 else:
360 raise TypeError("key " + repr(key) + " is not a string")
361 if first:
362 first = False
363 else:
364 yield item_separator
365 yield _encoder(key)
366 yield _key_separator
367 if isinstance(value, str):
368 yield _encoder(value)
369 elif value is None:
370 yield 'null'
371 elif value is True:
372 yield 'true'
373 elif value is False:
374 yield 'false'
375 elif isinstance(value, int):
376 yield str(value)
377 elif isinstance(value, float):
378 yield _floatstr(value)
379 else:
380 if isinstance(value, (list, tuple)):
381 chunks = _iterencode_list(value, _current_indent_level)
382 elif isinstance(value, dict):
383 chunks = _iterencode_dict(value, _current_indent_level)
384 else:
385 chunks = _iterencode(value, _current_indent_level)
Philip Jenveyfd0d3e52012-10-01 15:34:31 -0700386 yield from chunks
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000387 if newline_indent is not None:
388 _current_indent_level -= 1
Raymond Hettingerb643ef82010-10-31 08:00:16 +0000389 yield '\n' + _indent * _current_indent_level
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000390 yield '}'
391 if markers is not None:
392 del markers[markerid]
393
394 def _iterencode(o, _current_indent_level):
395 if isinstance(o, str):
396 yield _encoder(o)
397 elif o is None:
398 yield 'null'
399 elif o is True:
400 yield 'true'
401 elif o is False:
402 yield 'false'
Florent Xicluna02ea12b22010-07-28 16:39:41 +0000403 elif isinstance(o, int):
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000404 yield str(o)
405 elif isinstance(o, float):
406 yield _floatstr(o)
407 elif isinstance(o, (list, tuple)):
Philip Jenveyfd0d3e52012-10-01 15:34:31 -0700408 yield from _iterencode_list(o, _current_indent_level)
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000409 elif isinstance(o, dict):
Philip Jenveyfd0d3e52012-10-01 15:34:31 -0700410 yield from _iterencode_dict(o, _current_indent_level)
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000411 else:
412 if markers is not None:
413 markerid = id(o)
414 if markerid in markers:
415 raise ValueError("Circular reference detected")
416 markers[markerid] = o
417 o = _default(o)
Philip Jenveyfd0d3e52012-10-01 15:34:31 -0700418 yield from _iterencode(o, _current_indent_level)
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000419 if markers is not None:
420 del markers[markerid]
421 return _iterencode