blob: 4d1aaa8eedfd6173beea57ac09a0fa9dcda34593 [file] [log] [blame]
Brett Cannon4b964f92008-05-05 20:21:38 +00001"""Implementation of JSONEncoder
2"""
Brett Cannon4b964f92008-05-05 20:21:38 +00003import re
4
5try:
6 from _json import encode_basestring_ascii as c_encode_basestring_ascii
7except ImportError:
8 c_encode_basestring_ascii = None
Bob Ippolitod914e3f2009-03-17 23:19:00 +00009try:
10 from _json import make_encoder as c_make_encoder
11except ImportError:
12 c_make_encoder = None
Brett Cannon4b964f92008-05-05 20:21:38 +000013
14ESCAPE = re.compile(r'[\x00-\x1f\\"\b\f\n\r\t]')
15ESCAPE_ASCII = re.compile(r'([\\"]|[^\ -~])')
16HAS_UTF8 = re.compile(r'[\x80-\xff]')
17ESCAPE_DCT = {
18 '\\': '\\\\',
19 '"': '\\"',
20 '\b': '\\b',
21 '\f': '\\f',
22 '\n': '\\n',
23 '\r': '\\r',
24 '\t': '\\t',
25}
26for i in range(0x20):
27 ESCAPE_DCT.setdefault(chr(i), '\\u{0:04x}'.format(i))
Bob Ippolitod914e3f2009-03-17 23:19:00 +000028 #ESCAPE_DCT.setdefault(chr(i), '\\u%04x' % (i,))
Brett Cannon4b964f92008-05-05 20:21:38 +000029
Ezio Melottied8cf7a2012-05-21 17:46:55 -060030INFINITY = float('inf')
Brett Cannon4b964f92008-05-05 20:21:38 +000031FLOAT_REPR = repr
32
Brett Cannon4b964f92008-05-05 20:21:38 +000033def encode_basestring(s):
34 """Return a JSON representation of a Python string
35
36 """
37 def replace(match):
38 return ESCAPE_DCT[match.group(0)]
39 return '"' + ESCAPE.sub(replace, s) + '"'
40
41
42def py_encode_basestring_ascii(s):
Bob Ippolitod914e3f2009-03-17 23:19:00 +000043 """Return an ASCII-only JSON representation of a Python string
44
45 """
Brett Cannon4b964f92008-05-05 20:21:38 +000046 if isinstance(s, str) and HAS_UTF8.search(s) is not None:
47 s = s.decode('utf-8')
48 def replace(match):
49 s = match.group(0)
50 try:
51 return ESCAPE_DCT[s]
52 except KeyError:
53 n = ord(s)
54 if n < 0x10000:
55 return '\\u{0:04x}'.format(n)
Bob Ippolitod914e3f2009-03-17 23:19:00 +000056 #return '\\u%04x' % (n,)
Brett Cannon4b964f92008-05-05 20:21:38 +000057 else:
58 # surrogate pair
59 n -= 0x10000
60 s1 = 0xd800 | ((n >> 10) & 0x3ff)
61 s2 = 0xdc00 | (n & 0x3ff)
62 return '\\u{0:04x}\\u{1:04x}'.format(s1, s2)
Bob Ippolitod914e3f2009-03-17 23:19:00 +000063 #return '\\u%04x\\u%04x' % (s1, s2)
Brett Cannon4b964f92008-05-05 20:21:38 +000064 return '"' + str(ESCAPE_ASCII.sub(replace, s)) + '"'
65
66
Bob Ippolitod914e3f2009-03-17 23:19:00 +000067encode_basestring_ascii = (
68 c_encode_basestring_ascii or py_encode_basestring_ascii)
Brett Cannon4b964f92008-05-05 20:21:38 +000069
70class JSONEncoder(object):
71 """Extensible JSON <http://json.org> encoder for Python data structures.
72
73 Supports the following objects and types by default:
74
75 +-------------------+---------------+
76 | Python | JSON |
77 +===================+===============+
78 | dict | object |
79 +-------------------+---------------+
80 | list, tuple | array |
81 +-------------------+---------------+
82 | str, unicode | string |
83 +-------------------+---------------+
84 | int, long, float | number |
85 +-------------------+---------------+
86 | True | true |
87 +-------------------+---------------+
88 | False | false |
89 +-------------------+---------------+
90 | None | null |
91 +-------------------+---------------+
92
93 To extend this to recognize other objects, subclass and implement a
94 ``.default()`` method with another method that returns a serializable
95 object for ``o`` if possible, otherwise it should call the superclass
96 implementation (to raise ``TypeError``).
97
98 """
Brett Cannon4b964f92008-05-05 20:21:38 +000099 item_separator = ', '
100 key_separator = ': '
101 def __init__(self, skipkeys=False, ensure_ascii=True,
102 check_circular=True, allow_nan=True, sort_keys=False,
103 indent=None, separators=None, encoding='utf-8', default=None):
104 """Constructor for JSONEncoder, with sensible defaults.
105
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000106 If skipkeys is false, then it is a TypeError to attempt
Brett Cannon4b964f92008-05-05 20:21:38 +0000107 encoding of keys that are not str, int, long, float or None. If
108 skipkeys is True, such items are simply skipped.
109
Petri Lehtinenf9e1f112012-09-01 07:27:58 +0300110 If *ensure_ascii* is true (the default), all non-ASCII
111 characters in the output are escaped with \uXXXX sequences,
112 and the results are str instances consisting of ASCII
113 characters only. If ensure_ascii is False, a result may be a
114 unicode instance. This usually happens if the input contains
115 unicode strings or the *encoding* parameter is used.
Brett Cannon4b964f92008-05-05 20:21:38 +0000116
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000117 If check_circular is true, then lists, dicts, and custom encoded
Brett Cannon4b964f92008-05-05 20:21:38 +0000118 objects will be checked for circular references during encoding to
119 prevent an infinite recursion (which would cause an OverflowError).
120 Otherwise, no such check takes place.
121
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000122 If allow_nan is true, then NaN, Infinity, and -Infinity will be
Brett Cannon4b964f92008-05-05 20:21:38 +0000123 encoded as such. This behavior is not JSON specification compliant,
124 but is consistent with most JavaScript based encoders and decoders.
125 Otherwise, it will be a ValueError to encode such floats.
126
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000127 If sort_keys is true, then the output of dictionaries will be
Brett Cannon4b964f92008-05-05 20:21:38 +0000128 sorted by key; this is useful for regression tests to ensure
129 that JSON serializations can be compared on a day-to-day basis.
130
131 If indent is a non-negative integer, then JSON array
132 elements and object members will be pretty-printed with that
133 indent level. An indent level of 0 will only insert newlines.
Ezio Melotti3a237eb2012-11-29 00:22:30 +0200134 None is the most compact representation. Since the default
135 item separator is ', ', the output might include trailing
136 whitespace when indent is specified. You can use
137 separators=(',', ': ') to avoid this.
Brett Cannon4b964f92008-05-05 20:21:38 +0000138
139 If specified, separators should be a (item_separator, key_separator)
140 tuple. The default is (', ', ': '). To get the most compact JSON
141 representation you should specify (',', ':') to eliminate whitespace.
142
143 If specified, default is a function that gets called for objects
144 that can't otherwise be serialized. It should return a JSON encodable
145 version of the object or raise a ``TypeError``.
146
147 If encoding is not None, then all input strings will be
148 transformed into unicode using that encoding prior to JSON-encoding.
149 The default is UTF-8.
150
151 """
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000152
Brett Cannon4b964f92008-05-05 20:21:38 +0000153 self.skipkeys = skipkeys
154 self.ensure_ascii = ensure_ascii
155 self.check_circular = check_circular
156 self.allow_nan = allow_nan
157 self.sort_keys = sort_keys
158 self.indent = indent
Brett Cannon4b964f92008-05-05 20:21:38 +0000159 if separators is not None:
160 self.item_separator, self.key_separator = separators
161 if default is not None:
162 self.default = default
163 self.encoding = encoding
164
Brett Cannon4b964f92008-05-05 20:21:38 +0000165 def default(self, o):
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000166 """Implement this method in a subclass such that it returns
167 a serializable object for ``o``, or calls the base implementation
168 (to raise a ``TypeError``).
Brett Cannon4b964f92008-05-05 20:21:38 +0000169
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000170 For example, to support arbitrary iterators, you could
171 implement default like this::
Brett Cannon4b964f92008-05-05 20:21:38 +0000172
173 def default(self, o):
174 try:
175 iterable = iter(o)
176 except TypeError:
177 pass
178 else:
179 return list(iterable)
180 return JSONEncoder.default(self, o)
181
182 """
183 raise TypeError(repr(o) + " is not JSON serializable")
184
185 def encode(self, o):
186 """Return a JSON string representation of a Python data structure.
187
188 >>> JSONEncoder().encode({"foo": ["bar", "baz"]})
189 '{"foo": ["bar", "baz"]}'
190
191 """
192 # This is for extremely simple cases and benchmarks.
193 if isinstance(o, basestring):
194 if isinstance(o, str):
195 _encoding = self.encoding
196 if (_encoding is not None
197 and not (_encoding == 'utf-8')):
198 o = o.decode(_encoding)
199 if self.ensure_ascii:
200 return encode_basestring_ascii(o)
201 else:
202 return encode_basestring(o)
203 # This doesn't pass the iterator directly to ''.join() because the
204 # exceptions aren't as detailed. The list call should be roughly
205 # equivalent to the PySequence_Fast that ''.join() would do.
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000206 chunks = self.iterencode(o, _one_shot=True)
207 if not isinstance(chunks, (list, tuple)):
208 chunks = list(chunks)
Brett Cannon4b964f92008-05-05 20:21:38 +0000209 return ''.join(chunks)
210
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000211 def iterencode(self, o, _one_shot=False):
212 """Encode the given object and yield each string
213 representation as available.
Brett Cannon4b964f92008-05-05 20:21:38 +0000214
215 For example::
216
217 for chunk in JSONEncoder().iterencode(bigobject):
218 mysocket.write(chunk)
219
220 """
221 if self.check_circular:
222 markers = {}
223 else:
224 markers = None
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000225 if self.ensure_ascii:
226 _encoder = encode_basestring_ascii
227 else:
228 _encoder = encode_basestring
229 if self.encoding != 'utf-8':
230 def _encoder(o, _orig_encoder=_encoder, _encoding=self.encoding):
231 if isinstance(o, str):
232 o = o.decode(_encoding)
233 return _orig_encoder(o)
234
235 def floatstr(o, allow_nan=self.allow_nan,
236 _repr=FLOAT_REPR, _inf=INFINITY, _neginf=-INFINITY):
237 # Check for specials. Note that this type of test is processor
238 # and/or platform-specific, so do tests which don't depend on the
239 # internals.
240
241 if o != o:
242 text = 'NaN'
243 elif o == _inf:
244 text = 'Infinity'
245 elif o == _neginf:
246 text = '-Infinity'
247 else:
248 return _repr(o)
249
250 if not allow_nan:
251 raise ValueError(
252 "Out of range float values are not JSON compliant: " +
253 repr(o))
254
255 return text
256
257
258 if (_one_shot and c_make_encoder is not None
R David Murrayea8b6ef2011-04-12 21:00:26 -0400259 and self.indent is None and not self.sort_keys):
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000260 _iterencode = c_make_encoder(
261 markers, self.default, _encoder, self.indent,
262 self.key_separator, self.item_separator, self.sort_keys,
263 self.skipkeys, self.allow_nan)
264 else:
265 _iterencode = _make_iterencode(
266 markers, self.default, _encoder, self.indent, floatstr,
267 self.key_separator, self.item_separator, self.sort_keys,
268 self.skipkeys, _one_shot)
269 return _iterencode(o, 0)
270
271def _make_iterencode(markers, _default, _encoder, _indent, _floatstr,
272 _key_separator, _item_separator, _sort_keys, _skipkeys, _one_shot,
273 ## HACK: hand-optimized bytecode; turn globals into locals
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000274 ValueError=ValueError,
275 basestring=basestring,
276 dict=dict,
277 float=float,
278 id=id,
279 int=int,
280 isinstance=isinstance,
281 list=list,
282 long=long,
283 str=str,
284 tuple=tuple,
285 ):
286
287 def _iterencode_list(lst, _current_indent_level):
288 if not lst:
289 yield '[]'
290 return
291 if markers is not None:
292 markerid = id(lst)
293 if markerid in markers:
294 raise ValueError("Circular reference detected")
295 markers[markerid] = lst
296 buf = '['
297 if _indent is not None:
298 _current_indent_level += 1
299 newline_indent = '\n' + (' ' * (_indent * _current_indent_level))
300 separator = _item_separator + newline_indent
301 buf += newline_indent
302 else:
303 newline_indent = None
304 separator = _item_separator
305 first = True
306 for value in lst:
307 if first:
308 first = False
309 else:
310 buf = separator
311 if isinstance(value, basestring):
312 yield buf + _encoder(value)
313 elif value is None:
314 yield buf + 'null'
315 elif value is True:
316 yield buf + 'true'
317 elif value is False:
318 yield buf + 'false'
319 elif isinstance(value, (int, long)):
320 yield buf + str(value)
321 elif isinstance(value, float):
322 yield buf + _floatstr(value)
323 else:
324 yield buf
325 if isinstance(value, (list, tuple)):
326 chunks = _iterencode_list(value, _current_indent_level)
327 elif isinstance(value, dict):
328 chunks = _iterencode_dict(value, _current_indent_level)
329 else:
330 chunks = _iterencode(value, _current_indent_level)
331 for chunk in chunks:
332 yield chunk
333 if newline_indent is not None:
334 _current_indent_level -= 1
335 yield '\n' + (' ' * (_indent * _current_indent_level))
336 yield ']'
337 if markers is not None:
338 del markers[markerid]
339
340 def _iterencode_dict(dct, _current_indent_level):
341 if not dct:
342 yield '{}'
343 return
344 if markers is not None:
345 markerid = id(dct)
346 if markerid in markers:
347 raise ValueError("Circular reference detected")
348 markers[markerid] = dct
349 yield '{'
350 if _indent is not None:
351 _current_indent_level += 1
352 newline_indent = '\n' + (' ' * (_indent * _current_indent_level))
353 item_separator = _item_separator + newline_indent
354 yield newline_indent
355 else:
356 newline_indent = None
357 item_separator = _item_separator
358 first = True
359 if _sort_keys:
Ezio Melottiffd84962010-01-26 15:57:21 +0000360 items = sorted(dct.items(), key=lambda kv: kv[0])
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000361 else:
362 items = dct.iteritems()
363 for key, value in items:
364 if isinstance(key, basestring):
365 pass
366 # JavaScript is weakly typed for these, so it makes sense to
367 # also allow them. Many encoders seem to do something like this.
368 elif isinstance(key, float):
369 key = _floatstr(key)
370 elif key is True:
371 key = 'true'
372 elif key is False:
373 key = 'false'
374 elif key is None:
375 key = 'null'
376 elif isinstance(key, (int, long)):
377 key = str(key)
378 elif _skipkeys:
379 continue
380 else:
381 raise TypeError("key " + repr(key) + " is not a string")
382 if first:
383 first = False
384 else:
385 yield item_separator
386 yield _encoder(key)
387 yield _key_separator
388 if isinstance(value, basestring):
389 yield _encoder(value)
390 elif value is None:
391 yield 'null'
392 elif value is True:
393 yield 'true'
394 elif value is False:
395 yield 'false'
396 elif isinstance(value, (int, long)):
397 yield str(value)
398 elif isinstance(value, float):
399 yield _floatstr(value)
400 else:
401 if isinstance(value, (list, tuple)):
402 chunks = _iterencode_list(value, _current_indent_level)
403 elif isinstance(value, dict):
404 chunks = _iterencode_dict(value, _current_indent_level)
405 else:
406 chunks = _iterencode(value, _current_indent_level)
407 for chunk in chunks:
408 yield chunk
409 if newline_indent is not None:
410 _current_indent_level -= 1
411 yield '\n' + (' ' * (_indent * _current_indent_level))
412 yield '}'
413 if markers is not None:
414 del markers[markerid]
415
416 def _iterencode(o, _current_indent_level):
417 if isinstance(o, basestring):
418 yield _encoder(o)
419 elif o is None:
420 yield 'null'
421 elif o is True:
422 yield 'true'
423 elif o is False:
424 yield 'false'
425 elif isinstance(o, (int, long)):
426 yield str(o)
427 elif isinstance(o, float):
428 yield _floatstr(o)
429 elif isinstance(o, (list, tuple)):
430 for chunk in _iterencode_list(o, _current_indent_level):
431 yield chunk
432 elif isinstance(o, dict):
433 for chunk in _iterencode_dict(o, _current_indent_level):
434 yield chunk
435 else:
436 if markers is not None:
437 markerid = id(o)
438 if markerid in markers:
439 raise ValueError("Circular reference detected")
440 markers[markerid] = o
441 o = _default(o)
442 for chunk in _iterencode(o, _current_indent_level):
443 yield chunk
444 if markers is not None:
445 del markers[markerid]
446
447 return _iterencode