blob: 169450d68ecd69e429df044ce8d83131ef79162c [file] [log] [blame]
Brett Cannon4b964f92008-05-05 20:21:38 +00001"""Implementation of JSONEncoder
2"""
Brett Cannon4b964f92008-05-05 20:21:38 +00003import re
4
5try:
6 from _json import encode_basestring_ascii as c_encode_basestring_ascii
7except ImportError:
8 c_encode_basestring_ascii = None
Bob Ippolitod914e3f2009-03-17 23:19:00 +00009try:
10 from _json import make_encoder as c_make_encoder
11except ImportError:
12 c_make_encoder = None
Brett Cannon4b964f92008-05-05 20:21:38 +000013
14ESCAPE = re.compile(r'[\x00-\x1f\\"\b\f\n\r\t]')
15ESCAPE_ASCII = re.compile(r'([\\"]|[^\ -~])')
16HAS_UTF8 = re.compile(r'[\x80-\xff]')
17ESCAPE_DCT = {
18 '\\': '\\\\',
19 '"': '\\"',
20 '\b': '\\b',
21 '\f': '\\f',
22 '\n': '\\n',
23 '\r': '\\r',
24 '\t': '\\t',
25}
26for i in range(0x20):
27 ESCAPE_DCT.setdefault(chr(i), '\\u{0:04x}'.format(i))
Bob Ippolitod914e3f2009-03-17 23:19:00 +000028 #ESCAPE_DCT.setdefault(chr(i), '\\u%04x' % (i,))
Brett Cannon4b964f92008-05-05 20:21:38 +000029
Ezio Melottied8cf7a2012-05-21 17:46:55 -060030INFINITY = float('inf')
Brett Cannon4b964f92008-05-05 20:21:38 +000031FLOAT_REPR = repr
32
Brett Cannon4b964f92008-05-05 20:21:38 +000033def encode_basestring(s):
34 """Return a JSON representation of a Python string
35
36 """
37 def replace(match):
38 return ESCAPE_DCT[match.group(0)]
39 return '"' + ESCAPE.sub(replace, s) + '"'
40
41
42def py_encode_basestring_ascii(s):
Bob Ippolitod914e3f2009-03-17 23:19:00 +000043 """Return an ASCII-only JSON representation of a Python string
44
45 """
Brett Cannon4b964f92008-05-05 20:21:38 +000046 if isinstance(s, str) and HAS_UTF8.search(s) is not None:
47 s = s.decode('utf-8')
48 def replace(match):
49 s = match.group(0)
50 try:
51 return ESCAPE_DCT[s]
52 except KeyError:
53 n = ord(s)
54 if n < 0x10000:
55 return '\\u{0:04x}'.format(n)
Bob Ippolitod914e3f2009-03-17 23:19:00 +000056 #return '\\u%04x' % (n,)
Brett Cannon4b964f92008-05-05 20:21:38 +000057 else:
58 # surrogate pair
59 n -= 0x10000
60 s1 = 0xd800 | ((n >> 10) & 0x3ff)
61 s2 = 0xdc00 | (n & 0x3ff)
62 return '\\u{0:04x}\\u{1:04x}'.format(s1, s2)
Bob Ippolitod914e3f2009-03-17 23:19:00 +000063 #return '\\u%04x\\u%04x' % (s1, s2)
Brett Cannon4b964f92008-05-05 20:21:38 +000064 return '"' + str(ESCAPE_ASCII.sub(replace, s)) + '"'
65
66
Bob Ippolitod914e3f2009-03-17 23:19:00 +000067encode_basestring_ascii = (
68 c_encode_basestring_ascii or py_encode_basestring_ascii)
Brett Cannon4b964f92008-05-05 20:21:38 +000069
70class JSONEncoder(object):
71 """Extensible JSON <http://json.org> encoder for Python data structures.
72
73 Supports the following objects and types by default:
74
75 +-------------------+---------------+
76 | Python | JSON |
77 +===================+===============+
78 | dict | object |
79 +-------------------+---------------+
80 | list, tuple | array |
81 +-------------------+---------------+
82 | str, unicode | string |
83 +-------------------+---------------+
84 | int, long, float | number |
85 +-------------------+---------------+
86 | True | true |
87 +-------------------+---------------+
88 | False | false |
89 +-------------------+---------------+
90 | None | null |
91 +-------------------+---------------+
92
93 To extend this to recognize other objects, subclass and implement a
94 ``.default()`` method with another method that returns a serializable
95 object for ``o`` if possible, otherwise it should call the superclass
96 implementation (to raise ``TypeError``).
97
98 """
Brett Cannon4b964f92008-05-05 20:21:38 +000099 item_separator = ', '
100 key_separator = ': '
101 def __init__(self, skipkeys=False, ensure_ascii=True,
102 check_circular=True, allow_nan=True, sort_keys=False,
103 indent=None, separators=None, encoding='utf-8', default=None):
104 """Constructor for JSONEncoder, with sensible defaults.
105
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000106 If skipkeys is false, then it is a TypeError to attempt
Brett Cannon4b964f92008-05-05 20:21:38 +0000107 encoding of keys that are not str, int, long, float or None. If
108 skipkeys is True, such items are simply skipped.
109
Petri Lehtinenf9e1f112012-09-01 07:27:58 +0300110 If *ensure_ascii* is true (the default), all non-ASCII
111 characters in the output are escaped with \uXXXX sequences,
112 and the results are str instances consisting of ASCII
113 characters only. If ensure_ascii is False, a result may be a
114 unicode instance. This usually happens if the input contains
115 unicode strings or the *encoding* parameter is used.
Brett Cannon4b964f92008-05-05 20:21:38 +0000116
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000117 If check_circular is true, then lists, dicts, and custom encoded
Brett Cannon4b964f92008-05-05 20:21:38 +0000118 objects will be checked for circular references during encoding to
119 prevent an infinite recursion (which would cause an OverflowError).
120 Otherwise, no such check takes place.
121
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000122 If allow_nan is true, then NaN, Infinity, and -Infinity will be
Brett Cannon4b964f92008-05-05 20:21:38 +0000123 encoded as such. This behavior is not JSON specification compliant,
124 but is consistent with most JavaScript based encoders and decoders.
125 Otherwise, it will be a ValueError to encode such floats.
126
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000127 If sort_keys is true, then the output of dictionaries will be
Brett Cannon4b964f92008-05-05 20:21:38 +0000128 sorted by key; this is useful for regression tests to ensure
129 that JSON serializations can be compared on a day-to-day basis.
130
131 If indent is a non-negative integer, then JSON array
132 elements and object members will be pretty-printed with that
133 indent level. An indent level of 0 will only insert newlines.
134 None is the most compact representation.
135
136 If specified, separators should be a (item_separator, key_separator)
137 tuple. The default is (', ', ': '). To get the most compact JSON
138 representation you should specify (',', ':') to eliminate whitespace.
139
140 If specified, default is a function that gets called for objects
141 that can't otherwise be serialized. It should return a JSON encodable
142 version of the object or raise a ``TypeError``.
143
144 If encoding is not None, then all input strings will be
145 transformed into unicode using that encoding prior to JSON-encoding.
146 The default is UTF-8.
147
148 """
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000149
Brett Cannon4b964f92008-05-05 20:21:38 +0000150 self.skipkeys = skipkeys
151 self.ensure_ascii = ensure_ascii
152 self.check_circular = check_circular
153 self.allow_nan = allow_nan
154 self.sort_keys = sort_keys
155 self.indent = indent
Brett Cannon4b964f92008-05-05 20:21:38 +0000156 if separators is not None:
157 self.item_separator, self.key_separator = separators
158 if default is not None:
159 self.default = default
160 self.encoding = encoding
161
Brett Cannon4b964f92008-05-05 20:21:38 +0000162 def default(self, o):
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000163 """Implement this method in a subclass such that it returns
164 a serializable object for ``o``, or calls the base implementation
165 (to raise a ``TypeError``).
Brett Cannon4b964f92008-05-05 20:21:38 +0000166
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000167 For example, to support arbitrary iterators, you could
168 implement default like this::
Brett Cannon4b964f92008-05-05 20:21:38 +0000169
170 def default(self, o):
171 try:
172 iterable = iter(o)
173 except TypeError:
174 pass
175 else:
176 return list(iterable)
177 return JSONEncoder.default(self, o)
178
179 """
180 raise TypeError(repr(o) + " is not JSON serializable")
181
182 def encode(self, o):
183 """Return a JSON string representation of a Python data structure.
184
185 >>> JSONEncoder().encode({"foo": ["bar", "baz"]})
186 '{"foo": ["bar", "baz"]}'
187
188 """
189 # This is for extremely simple cases and benchmarks.
190 if isinstance(o, basestring):
191 if isinstance(o, str):
192 _encoding = self.encoding
193 if (_encoding is not None
194 and not (_encoding == 'utf-8')):
195 o = o.decode(_encoding)
196 if self.ensure_ascii:
197 return encode_basestring_ascii(o)
198 else:
199 return encode_basestring(o)
200 # This doesn't pass the iterator directly to ''.join() because the
201 # exceptions aren't as detailed. The list call should be roughly
202 # equivalent to the PySequence_Fast that ''.join() would do.
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000203 chunks = self.iterencode(o, _one_shot=True)
204 if not isinstance(chunks, (list, tuple)):
205 chunks = list(chunks)
Brett Cannon4b964f92008-05-05 20:21:38 +0000206 return ''.join(chunks)
207
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000208 def iterencode(self, o, _one_shot=False):
209 """Encode the given object and yield each string
210 representation as available.
Brett Cannon4b964f92008-05-05 20:21:38 +0000211
212 For example::
213
214 for chunk in JSONEncoder().iterencode(bigobject):
215 mysocket.write(chunk)
216
217 """
218 if self.check_circular:
219 markers = {}
220 else:
221 markers = None
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000222 if self.ensure_ascii:
223 _encoder = encode_basestring_ascii
224 else:
225 _encoder = encode_basestring
226 if self.encoding != 'utf-8':
227 def _encoder(o, _orig_encoder=_encoder, _encoding=self.encoding):
228 if isinstance(o, str):
229 o = o.decode(_encoding)
230 return _orig_encoder(o)
231
232 def floatstr(o, allow_nan=self.allow_nan,
233 _repr=FLOAT_REPR, _inf=INFINITY, _neginf=-INFINITY):
234 # Check for specials. Note that this type of test is processor
235 # and/or platform-specific, so do tests which don't depend on the
236 # internals.
237
238 if o != o:
239 text = 'NaN'
240 elif o == _inf:
241 text = 'Infinity'
242 elif o == _neginf:
243 text = '-Infinity'
244 else:
245 return _repr(o)
246
247 if not allow_nan:
248 raise ValueError(
249 "Out of range float values are not JSON compliant: " +
250 repr(o))
251
252 return text
253
254
255 if (_one_shot and c_make_encoder is not None
R David Murrayea8b6ef2011-04-12 21:00:26 -0400256 and self.indent is None and not self.sort_keys):
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000257 _iterencode = c_make_encoder(
258 markers, self.default, _encoder, self.indent,
259 self.key_separator, self.item_separator, self.sort_keys,
260 self.skipkeys, self.allow_nan)
261 else:
262 _iterencode = _make_iterencode(
263 markers, self.default, _encoder, self.indent, floatstr,
264 self.key_separator, self.item_separator, self.sort_keys,
265 self.skipkeys, _one_shot)
266 return _iterencode(o, 0)
267
268def _make_iterencode(markers, _default, _encoder, _indent, _floatstr,
269 _key_separator, _item_separator, _sort_keys, _skipkeys, _one_shot,
270 ## HACK: hand-optimized bytecode; turn globals into locals
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000271 ValueError=ValueError,
272 basestring=basestring,
273 dict=dict,
274 float=float,
275 id=id,
276 int=int,
277 isinstance=isinstance,
278 list=list,
279 long=long,
280 str=str,
281 tuple=tuple,
282 ):
283
284 def _iterencode_list(lst, _current_indent_level):
285 if not lst:
286 yield '[]'
287 return
288 if markers is not None:
289 markerid = id(lst)
290 if markerid in markers:
291 raise ValueError("Circular reference detected")
292 markers[markerid] = lst
293 buf = '['
294 if _indent is not None:
295 _current_indent_level += 1
296 newline_indent = '\n' + (' ' * (_indent * _current_indent_level))
297 separator = _item_separator + newline_indent
298 buf += newline_indent
299 else:
300 newline_indent = None
301 separator = _item_separator
302 first = True
303 for value in lst:
304 if first:
305 first = False
306 else:
307 buf = separator
308 if isinstance(value, basestring):
309 yield buf + _encoder(value)
310 elif value is None:
311 yield buf + 'null'
312 elif value is True:
313 yield buf + 'true'
314 elif value is False:
315 yield buf + 'false'
316 elif isinstance(value, (int, long)):
317 yield buf + str(value)
318 elif isinstance(value, float):
319 yield buf + _floatstr(value)
320 else:
321 yield buf
322 if isinstance(value, (list, tuple)):
323 chunks = _iterencode_list(value, _current_indent_level)
324 elif isinstance(value, dict):
325 chunks = _iterencode_dict(value, _current_indent_level)
326 else:
327 chunks = _iterencode(value, _current_indent_level)
328 for chunk in chunks:
329 yield chunk
330 if newline_indent is not None:
331 _current_indent_level -= 1
332 yield '\n' + (' ' * (_indent * _current_indent_level))
333 yield ']'
334 if markers is not None:
335 del markers[markerid]
336
337 def _iterencode_dict(dct, _current_indent_level):
338 if not dct:
339 yield '{}'
340 return
341 if markers is not None:
342 markerid = id(dct)
343 if markerid in markers:
344 raise ValueError("Circular reference detected")
345 markers[markerid] = dct
346 yield '{'
347 if _indent is not None:
348 _current_indent_level += 1
349 newline_indent = '\n' + (' ' * (_indent * _current_indent_level))
350 item_separator = _item_separator + newline_indent
351 yield newline_indent
352 else:
353 newline_indent = None
354 item_separator = _item_separator
355 first = True
356 if _sort_keys:
Ezio Melottiffd84962010-01-26 15:57:21 +0000357 items = sorted(dct.items(), key=lambda kv: kv[0])
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000358 else:
359 items = dct.iteritems()
360 for key, value in items:
361 if isinstance(key, basestring):
362 pass
363 # JavaScript is weakly typed for these, so it makes sense to
364 # also allow them. Many encoders seem to do something like this.
365 elif isinstance(key, float):
366 key = _floatstr(key)
367 elif key is True:
368 key = 'true'
369 elif key is False:
370 key = 'false'
371 elif key is None:
372 key = 'null'
373 elif isinstance(key, (int, long)):
374 key = str(key)
375 elif _skipkeys:
376 continue
377 else:
378 raise TypeError("key " + repr(key) + " is not a string")
379 if first:
380 first = False
381 else:
382 yield item_separator
383 yield _encoder(key)
384 yield _key_separator
385 if isinstance(value, basestring):
386 yield _encoder(value)
387 elif value is None:
388 yield 'null'
389 elif value is True:
390 yield 'true'
391 elif value is False:
392 yield 'false'
393 elif isinstance(value, (int, long)):
394 yield str(value)
395 elif isinstance(value, float):
396 yield _floatstr(value)
397 else:
398 if isinstance(value, (list, tuple)):
399 chunks = _iterencode_list(value, _current_indent_level)
400 elif isinstance(value, dict):
401 chunks = _iterencode_dict(value, _current_indent_level)
402 else:
403 chunks = _iterencode(value, _current_indent_level)
404 for chunk in chunks:
405 yield chunk
406 if newline_indent is not None:
407 _current_indent_level -= 1
408 yield '\n' + (' ' * (_indent * _current_indent_level))
409 yield '}'
410 if markers is not None:
411 del markers[markerid]
412
413 def _iterencode(o, _current_indent_level):
414 if isinstance(o, basestring):
415 yield _encoder(o)
416 elif o is None:
417 yield 'null'
418 elif o is True:
419 yield 'true'
420 elif o is False:
421 yield 'false'
422 elif isinstance(o, (int, long)):
423 yield str(o)
424 elif isinstance(o, float):
425 yield _floatstr(o)
426 elif isinstance(o, (list, tuple)):
427 for chunk in _iterencode_list(o, _current_indent_level):
428 yield chunk
429 elif isinstance(o, dict):
430 for chunk in _iterencode_dict(o, _current_indent_level):
431 yield chunk
432 else:
433 if markers is not None:
434 markerid = id(o)
435 if markerid in markers:
436 raise ValueError("Circular reference detected")
437 markers[markerid] = o
438 o = _default(o)
439 for chunk in _iterencode(o, _current_indent_level):
440 yield chunk
441 if markers is not None:
442 del markers[markerid]
443
444 return _iterencode