blob: f5eeed75f04c9d035a00b4d67c75b74326f0683f [file] [log] [blame]
Brett Cannon4b964f92008-05-05 20:21:38 +00001"""Implementation of JSONEncoder
2"""
Brett Cannon4b964f92008-05-05 20:21:38 +00003import re
4
5try:
6 from _json import encode_basestring_ascii as c_encode_basestring_ascii
7except ImportError:
8 c_encode_basestring_ascii = None
Bob Ippolitod914e3f2009-03-17 23:19:00 +00009try:
10 from _json import make_encoder as c_make_encoder
11except ImportError:
12 c_make_encoder = None
Brett Cannon4b964f92008-05-05 20:21:38 +000013
14ESCAPE = re.compile(r'[\x00-\x1f\\"\b\f\n\r\t]')
15ESCAPE_ASCII = re.compile(r'([\\"]|[^\ -~])')
16HAS_UTF8 = re.compile(r'[\x80-\xff]')
17ESCAPE_DCT = {
18 '\\': '\\\\',
19 '"': '\\"',
20 '\b': '\\b',
21 '\f': '\\f',
22 '\n': '\\n',
23 '\r': '\\r',
24 '\t': '\\t',
25}
26for i in range(0x20):
27 ESCAPE_DCT.setdefault(chr(i), '\\u{0:04x}'.format(i))
Bob Ippolitod914e3f2009-03-17 23:19:00 +000028 #ESCAPE_DCT.setdefault(chr(i), '\\u%04x' % (i,))
Brett Cannon4b964f92008-05-05 20:21:38 +000029
Ezio Melottied8cf7a2012-05-21 17:46:55 -060030INFINITY = float('inf')
Brett Cannon4b964f92008-05-05 20:21:38 +000031FLOAT_REPR = repr
32
Brett Cannon4b964f92008-05-05 20:21:38 +000033def encode_basestring(s):
34 """Return a JSON representation of a Python string
35
36 """
37 def replace(match):
38 return ESCAPE_DCT[match.group(0)]
39 return '"' + ESCAPE.sub(replace, s) + '"'
40
41
42def py_encode_basestring_ascii(s):
Bob Ippolitod914e3f2009-03-17 23:19:00 +000043 """Return an ASCII-only JSON representation of a Python string
44
45 """
Brett Cannon4b964f92008-05-05 20:21:38 +000046 if isinstance(s, str) and HAS_UTF8.search(s) is not None:
47 s = s.decode('utf-8')
48 def replace(match):
49 s = match.group(0)
50 try:
51 return ESCAPE_DCT[s]
52 except KeyError:
53 n = ord(s)
54 if n < 0x10000:
55 return '\\u{0:04x}'.format(n)
Bob Ippolitod914e3f2009-03-17 23:19:00 +000056 #return '\\u%04x' % (n,)
Brett Cannon4b964f92008-05-05 20:21:38 +000057 else:
58 # surrogate pair
59 n -= 0x10000
60 s1 = 0xd800 | ((n >> 10) & 0x3ff)
61 s2 = 0xdc00 | (n & 0x3ff)
62 return '\\u{0:04x}\\u{1:04x}'.format(s1, s2)
Bob Ippolitod914e3f2009-03-17 23:19:00 +000063 #return '\\u%04x\\u%04x' % (s1, s2)
Brett Cannon4b964f92008-05-05 20:21:38 +000064 return '"' + str(ESCAPE_ASCII.sub(replace, s)) + '"'
65
66
Bob Ippolitod914e3f2009-03-17 23:19:00 +000067encode_basestring_ascii = (
68 c_encode_basestring_ascii or py_encode_basestring_ascii)
Brett Cannon4b964f92008-05-05 20:21:38 +000069
70class JSONEncoder(object):
71 """Extensible JSON <http://json.org> encoder for Python data structures.
72
73 Supports the following objects and types by default:
74
75 +-------------------+---------------+
76 | Python | JSON |
77 +===================+===============+
78 | dict | object |
79 +-------------------+---------------+
80 | list, tuple | array |
81 +-------------------+---------------+
82 | str, unicode | string |
83 +-------------------+---------------+
84 | int, long, float | number |
85 +-------------------+---------------+
86 | True | true |
87 +-------------------+---------------+
88 | False | false |
89 +-------------------+---------------+
90 | None | null |
91 +-------------------+---------------+
92
93 To extend this to recognize other objects, subclass and implement a
94 ``.default()`` method with another method that returns a serializable
95 object for ``o`` if possible, otherwise it should call the superclass
96 implementation (to raise ``TypeError``).
97
98 """
Brett Cannon4b964f92008-05-05 20:21:38 +000099 item_separator = ', '
100 key_separator = ': '
101 def __init__(self, skipkeys=False, ensure_ascii=True,
102 check_circular=True, allow_nan=True, sort_keys=False,
103 indent=None, separators=None, encoding='utf-8', default=None):
104 """Constructor for JSONEncoder, with sensible defaults.
105
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000106 If skipkeys is false, then it is a TypeError to attempt
Brett Cannon4b964f92008-05-05 20:21:38 +0000107 encoding of keys that are not str, int, long, float or None. If
108 skipkeys is True, such items are simply skipped.
109
Petri Lehtinenf9e1f112012-09-01 07:27:58 +0300110 If *ensure_ascii* is true (the default), all non-ASCII
111 characters in the output are escaped with \uXXXX sequences,
112 and the results are str instances consisting of ASCII
113 characters only. If ensure_ascii is False, a result may be a
114 unicode instance. This usually happens if the input contains
115 unicode strings or the *encoding* parameter is used.
Brett Cannon4b964f92008-05-05 20:21:38 +0000116
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000117 If check_circular is true, then lists, dicts, and custom encoded
Brett Cannon4b964f92008-05-05 20:21:38 +0000118 objects will be checked for circular references during encoding to
119 prevent an infinite recursion (which would cause an OverflowError).
120 Otherwise, no such check takes place.
121
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000122 If allow_nan is true, then NaN, Infinity, and -Infinity will be
Brett Cannon4b964f92008-05-05 20:21:38 +0000123 encoded as such. This behavior is not JSON specification compliant,
124 but is consistent with most JavaScript based encoders and decoders.
125 Otherwise, it will be a ValueError to encode such floats.
126
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000127 If sort_keys is true, then the output of dictionaries will be
Brett Cannon4b964f92008-05-05 20:21:38 +0000128 sorted by key; this is useful for regression tests to ensure
129 that JSON serializations can be compared on a day-to-day basis.
130
131 If indent is a non-negative integer, then JSON array
132 elements and object members will be pretty-printed with that
133 indent level. An indent level of 0 will only insert newlines.
Ezio Melotti3a237eb2012-11-29 00:22:30 +0200134 None is the most compact representation. Since the default
135 item separator is ', ', the output might include trailing
136 whitespace when indent is specified. You can use
137 separators=(',', ': ') to avoid this.
Brett Cannon4b964f92008-05-05 20:21:38 +0000138
139 If specified, separators should be a (item_separator, key_separator)
140 tuple. The default is (', ', ': '). To get the most compact JSON
141 representation you should specify (',', ':') to eliminate whitespace.
142
143 If specified, default is a function that gets called for objects
144 that can't otherwise be serialized. It should return a JSON encodable
145 version of the object or raise a ``TypeError``.
146
147 If encoding is not None, then all input strings will be
148 transformed into unicode using that encoding prior to JSON-encoding.
149 The default is UTF-8.
150
151 """
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000152
Brett Cannon4b964f92008-05-05 20:21:38 +0000153 self.skipkeys = skipkeys
154 self.ensure_ascii = ensure_ascii
155 self.check_circular = check_circular
156 self.allow_nan = allow_nan
157 self.sort_keys = sort_keys
158 self.indent = indent
Brett Cannon4b964f92008-05-05 20:21:38 +0000159 if separators is not None:
160 self.item_separator, self.key_separator = separators
161 if default is not None:
162 self.default = default
163 self.encoding = encoding
164
Brett Cannon4b964f92008-05-05 20:21:38 +0000165 def default(self, o):
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000166 """Implement this method in a subclass such that it returns
167 a serializable object for ``o``, or calls the base implementation
168 (to raise a ``TypeError``).
Brett Cannon4b964f92008-05-05 20:21:38 +0000169
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000170 For example, to support arbitrary iterators, you could
171 implement default like this::
Brett Cannon4b964f92008-05-05 20:21:38 +0000172
173 def default(self, o):
174 try:
175 iterable = iter(o)
176 except TypeError:
177 pass
178 else:
179 return list(iterable)
R David Murray35893b72013-03-17 22:06:18 -0400180 # Let the base class default method raise the TypeError
Brett Cannon4b964f92008-05-05 20:21:38 +0000181 return JSONEncoder.default(self, o)
182
183 """
184 raise TypeError(repr(o) + " is not JSON serializable")
185
186 def encode(self, o):
187 """Return a JSON string representation of a Python data structure.
188
189 >>> JSONEncoder().encode({"foo": ["bar", "baz"]})
190 '{"foo": ["bar", "baz"]}'
191
192 """
193 # This is for extremely simple cases and benchmarks.
194 if isinstance(o, basestring):
195 if isinstance(o, str):
196 _encoding = self.encoding
197 if (_encoding is not None
198 and not (_encoding == 'utf-8')):
199 o = o.decode(_encoding)
200 if self.ensure_ascii:
201 return encode_basestring_ascii(o)
202 else:
203 return encode_basestring(o)
204 # This doesn't pass the iterator directly to ''.join() because the
205 # exceptions aren't as detailed. The list call should be roughly
206 # equivalent to the PySequence_Fast that ''.join() would do.
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000207 chunks = self.iterencode(o, _one_shot=True)
208 if not isinstance(chunks, (list, tuple)):
209 chunks = list(chunks)
Brett Cannon4b964f92008-05-05 20:21:38 +0000210 return ''.join(chunks)
211
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000212 def iterencode(self, o, _one_shot=False):
213 """Encode the given object and yield each string
214 representation as available.
Brett Cannon4b964f92008-05-05 20:21:38 +0000215
216 For example::
217
218 for chunk in JSONEncoder().iterencode(bigobject):
219 mysocket.write(chunk)
220
221 """
222 if self.check_circular:
223 markers = {}
224 else:
225 markers = None
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000226 if self.ensure_ascii:
227 _encoder = encode_basestring_ascii
228 else:
229 _encoder = encode_basestring
230 if self.encoding != 'utf-8':
231 def _encoder(o, _orig_encoder=_encoder, _encoding=self.encoding):
232 if isinstance(o, str):
233 o = o.decode(_encoding)
234 return _orig_encoder(o)
235
236 def floatstr(o, allow_nan=self.allow_nan,
237 _repr=FLOAT_REPR, _inf=INFINITY, _neginf=-INFINITY):
238 # Check for specials. Note that this type of test is processor
239 # and/or platform-specific, so do tests which don't depend on the
240 # internals.
241
242 if o != o:
243 text = 'NaN'
244 elif o == _inf:
245 text = 'Infinity'
246 elif o == _neginf:
247 text = '-Infinity'
248 else:
249 return _repr(o)
250
251 if not allow_nan:
252 raise ValueError(
253 "Out of range float values are not JSON compliant: " +
254 repr(o))
255
256 return text
257
258
259 if (_one_shot and c_make_encoder is not None
R David Murrayea8b6ef2011-04-12 21:00:26 -0400260 and self.indent is None and not self.sort_keys):
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000261 _iterencode = c_make_encoder(
262 markers, self.default, _encoder, self.indent,
263 self.key_separator, self.item_separator, self.sort_keys,
264 self.skipkeys, self.allow_nan)
265 else:
266 _iterencode = _make_iterencode(
267 markers, self.default, _encoder, self.indent, floatstr,
268 self.key_separator, self.item_separator, self.sort_keys,
269 self.skipkeys, _one_shot)
270 return _iterencode(o, 0)
271
272def _make_iterencode(markers, _default, _encoder, _indent, _floatstr,
273 _key_separator, _item_separator, _sort_keys, _skipkeys, _one_shot,
274 ## HACK: hand-optimized bytecode; turn globals into locals
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000275 ValueError=ValueError,
276 basestring=basestring,
277 dict=dict,
278 float=float,
279 id=id,
280 int=int,
281 isinstance=isinstance,
282 list=list,
283 long=long,
284 str=str,
285 tuple=tuple,
286 ):
287
288 def _iterencode_list(lst, _current_indent_level):
289 if not lst:
290 yield '[]'
291 return
292 if markers is not None:
293 markerid = id(lst)
294 if markerid in markers:
295 raise ValueError("Circular reference detected")
296 markers[markerid] = lst
297 buf = '['
298 if _indent is not None:
299 _current_indent_level += 1
300 newline_indent = '\n' + (' ' * (_indent * _current_indent_level))
301 separator = _item_separator + newline_indent
302 buf += newline_indent
303 else:
304 newline_indent = None
305 separator = _item_separator
306 first = True
307 for value in lst:
308 if first:
309 first = False
310 else:
311 buf = separator
312 if isinstance(value, basestring):
313 yield buf + _encoder(value)
314 elif value is None:
315 yield buf + 'null'
316 elif value is True:
317 yield buf + 'true'
318 elif value is False:
319 yield buf + 'false'
320 elif isinstance(value, (int, long)):
321 yield buf + str(value)
322 elif isinstance(value, float):
323 yield buf + _floatstr(value)
324 else:
325 yield buf
326 if isinstance(value, (list, tuple)):
327 chunks = _iterencode_list(value, _current_indent_level)
328 elif isinstance(value, dict):
329 chunks = _iterencode_dict(value, _current_indent_level)
330 else:
331 chunks = _iterencode(value, _current_indent_level)
332 for chunk in chunks:
333 yield chunk
334 if newline_indent is not None:
335 _current_indent_level -= 1
336 yield '\n' + (' ' * (_indent * _current_indent_level))
337 yield ']'
338 if markers is not None:
339 del markers[markerid]
340
341 def _iterencode_dict(dct, _current_indent_level):
342 if not dct:
343 yield '{}'
344 return
345 if markers is not None:
346 markerid = id(dct)
347 if markerid in markers:
348 raise ValueError("Circular reference detected")
349 markers[markerid] = dct
350 yield '{'
351 if _indent is not None:
352 _current_indent_level += 1
353 newline_indent = '\n' + (' ' * (_indent * _current_indent_level))
354 item_separator = _item_separator + newline_indent
355 yield newline_indent
356 else:
357 newline_indent = None
358 item_separator = _item_separator
359 first = True
360 if _sort_keys:
Ezio Melottiffd84962010-01-26 15:57:21 +0000361 items = sorted(dct.items(), key=lambda kv: kv[0])
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000362 else:
363 items = dct.iteritems()
364 for key, value in items:
365 if isinstance(key, basestring):
366 pass
367 # JavaScript is weakly typed for these, so it makes sense to
368 # also allow them. Many encoders seem to do something like this.
369 elif isinstance(key, float):
370 key = _floatstr(key)
371 elif key is True:
372 key = 'true'
373 elif key is False:
374 key = 'false'
375 elif key is None:
376 key = 'null'
377 elif isinstance(key, (int, long)):
378 key = str(key)
379 elif _skipkeys:
380 continue
381 else:
382 raise TypeError("key " + repr(key) + " is not a string")
383 if first:
384 first = False
385 else:
386 yield item_separator
387 yield _encoder(key)
388 yield _key_separator
389 if isinstance(value, basestring):
390 yield _encoder(value)
391 elif value is None:
392 yield 'null'
393 elif value is True:
394 yield 'true'
395 elif value is False:
396 yield 'false'
397 elif isinstance(value, (int, long)):
398 yield str(value)
399 elif isinstance(value, float):
400 yield _floatstr(value)
401 else:
402 if isinstance(value, (list, tuple)):
403 chunks = _iterencode_list(value, _current_indent_level)
404 elif isinstance(value, dict):
405 chunks = _iterencode_dict(value, _current_indent_level)
406 else:
407 chunks = _iterencode(value, _current_indent_level)
408 for chunk in chunks:
409 yield chunk
410 if newline_indent is not None:
411 _current_indent_level -= 1
412 yield '\n' + (' ' * (_indent * _current_indent_level))
413 yield '}'
414 if markers is not None:
415 del markers[markerid]
416
417 def _iterencode(o, _current_indent_level):
418 if isinstance(o, basestring):
419 yield _encoder(o)
420 elif o is None:
421 yield 'null'
422 elif o is True:
423 yield 'true'
424 elif o is False:
425 yield 'false'
426 elif isinstance(o, (int, long)):
427 yield str(o)
428 elif isinstance(o, float):
429 yield _floatstr(o)
430 elif isinstance(o, (list, tuple)):
431 for chunk in _iterencode_list(o, _current_indent_level):
432 yield chunk
433 elif isinstance(o, dict):
434 for chunk in _iterencode_dict(o, _current_indent_level):
435 yield chunk
436 else:
437 if markers is not None:
438 markerid = id(o)
439 if markerid in markers:
440 raise ValueError("Circular reference detected")
441 markers[markerid] = o
442 o = _default(o)
443 for chunk in _iterencode(o, _current_indent_level):
444 yield chunk
445 if markers is not None:
446 del markers[markerid]
447
448 return _iterencode