blob: b0d745b5dc059acac5bd6a0af5b78353855e93eb [file] [log] [blame]
Brett Cannon4b964f92008-05-05 20:21:38 +00001"""Implementation of JSONEncoder
2"""
Brett Cannon4b964f92008-05-05 20:21:38 +00003import re
4
5try:
6 from _json import encode_basestring_ascii as c_encode_basestring_ascii
7except ImportError:
8 c_encode_basestring_ascii = None
Bob Ippolitod914e3f2009-03-17 23:19:00 +00009try:
10 from _json import make_encoder as c_make_encoder
11except ImportError:
12 c_make_encoder = None
Brett Cannon4b964f92008-05-05 20:21:38 +000013
14ESCAPE = re.compile(r'[\x00-\x1f\\"\b\f\n\r\t]')
15ESCAPE_ASCII = re.compile(r'([\\"]|[^\ -~])')
16HAS_UTF8 = re.compile(r'[\x80-\xff]')
17ESCAPE_DCT = {
18 '\\': '\\\\',
19 '"': '\\"',
20 '\b': '\\b',
21 '\f': '\\f',
22 '\n': '\\n',
23 '\r': '\\r',
24 '\t': '\\t',
25}
26for i in range(0x20):
27 ESCAPE_DCT.setdefault(chr(i), '\\u{0:04x}'.format(i))
Bob Ippolitod914e3f2009-03-17 23:19:00 +000028 #ESCAPE_DCT.setdefault(chr(i), '\\u%04x' % (i,))
Brett Cannon4b964f92008-05-05 20:21:38 +000029
Ezio Melottied8cf7a2012-05-21 17:46:55 -060030INFINITY = float('inf')
Brett Cannon4b964f92008-05-05 20:21:38 +000031FLOAT_REPR = repr
32
Brett Cannon4b964f92008-05-05 20:21:38 +000033def encode_basestring(s):
34 """Return a JSON representation of a Python string
35
36 """
37 def replace(match):
38 return ESCAPE_DCT[match.group(0)]
39 return '"' + ESCAPE.sub(replace, s) + '"'
40
41
42def py_encode_basestring_ascii(s):
Bob Ippolitod914e3f2009-03-17 23:19:00 +000043 """Return an ASCII-only JSON representation of a Python string
44
45 """
Brett Cannon4b964f92008-05-05 20:21:38 +000046 if isinstance(s, str) and HAS_UTF8.search(s) is not None:
47 s = s.decode('utf-8')
48 def replace(match):
49 s = match.group(0)
50 try:
51 return ESCAPE_DCT[s]
52 except KeyError:
53 n = ord(s)
54 if n < 0x10000:
55 return '\\u{0:04x}'.format(n)
Bob Ippolitod914e3f2009-03-17 23:19:00 +000056 #return '\\u%04x' % (n,)
Brett Cannon4b964f92008-05-05 20:21:38 +000057 else:
58 # surrogate pair
59 n -= 0x10000
60 s1 = 0xd800 | ((n >> 10) & 0x3ff)
61 s2 = 0xdc00 | (n & 0x3ff)
62 return '\\u{0:04x}\\u{1:04x}'.format(s1, s2)
Bob Ippolitod914e3f2009-03-17 23:19:00 +000063 #return '\\u%04x\\u%04x' % (s1, s2)
Brett Cannon4b964f92008-05-05 20:21:38 +000064 return '"' + str(ESCAPE_ASCII.sub(replace, s)) + '"'
65
66
Bob Ippolitod914e3f2009-03-17 23:19:00 +000067encode_basestring_ascii = (
68 c_encode_basestring_ascii or py_encode_basestring_ascii)
Brett Cannon4b964f92008-05-05 20:21:38 +000069
70class JSONEncoder(object):
71 """Extensible JSON <http://json.org> encoder for Python data structures.
72
73 Supports the following objects and types by default:
74
75 +-------------------+---------------+
76 | Python | JSON |
77 +===================+===============+
78 | dict | object |
79 +-------------------+---------------+
80 | list, tuple | array |
81 +-------------------+---------------+
82 | str, unicode | string |
83 +-------------------+---------------+
84 | int, long, float | number |
85 +-------------------+---------------+
86 | True | true |
87 +-------------------+---------------+
88 | False | false |
89 +-------------------+---------------+
90 | None | null |
91 +-------------------+---------------+
92
93 To extend this to recognize other objects, subclass and implement a
94 ``.default()`` method with another method that returns a serializable
95 object for ``o`` if possible, otherwise it should call the superclass
96 implementation (to raise ``TypeError``).
97
98 """
Brett Cannon4b964f92008-05-05 20:21:38 +000099 item_separator = ', '
100 key_separator = ': '
101 def __init__(self, skipkeys=False, ensure_ascii=True,
102 check_circular=True, allow_nan=True, sort_keys=False,
103 indent=None, separators=None, encoding='utf-8', default=None):
104 """Constructor for JSONEncoder, with sensible defaults.
105
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000106 If skipkeys is false, then it is a TypeError to attempt
Brett Cannon4b964f92008-05-05 20:21:38 +0000107 encoding of keys that are not str, int, long, float or None. If
108 skipkeys is True, such items are simply skipped.
109
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000110 If ensure_ascii is true, the output is guaranteed to be str
Brett Cannon4b964f92008-05-05 20:21:38 +0000111 objects with all incoming unicode characters escaped. If
112 ensure_ascii is false, the output will be unicode object.
113
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000114 If check_circular is true, then lists, dicts, and custom encoded
Brett Cannon4b964f92008-05-05 20:21:38 +0000115 objects will be checked for circular references during encoding to
116 prevent an infinite recursion (which would cause an OverflowError).
117 Otherwise, no such check takes place.
118
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000119 If allow_nan is true, then NaN, Infinity, and -Infinity will be
Brett Cannon4b964f92008-05-05 20:21:38 +0000120 encoded as such. This behavior is not JSON specification compliant,
121 but is consistent with most JavaScript based encoders and decoders.
122 Otherwise, it will be a ValueError to encode such floats.
123
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000124 If sort_keys is true, then the output of dictionaries will be
Brett Cannon4b964f92008-05-05 20:21:38 +0000125 sorted by key; this is useful for regression tests to ensure
126 that JSON serializations can be compared on a day-to-day basis.
127
128 If indent is a non-negative integer, then JSON array
129 elements and object members will be pretty-printed with that
130 indent level. An indent level of 0 will only insert newlines.
131 None is the most compact representation.
132
133 If specified, separators should be a (item_separator, key_separator)
134 tuple. The default is (', ', ': '). To get the most compact JSON
135 representation you should specify (',', ':') to eliminate whitespace.
136
137 If specified, default is a function that gets called for objects
138 that can't otherwise be serialized. It should return a JSON encodable
139 version of the object or raise a ``TypeError``.
140
141 If encoding is not None, then all input strings will be
142 transformed into unicode using that encoding prior to JSON-encoding.
143 The default is UTF-8.
144
145 """
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000146
Brett Cannon4b964f92008-05-05 20:21:38 +0000147 self.skipkeys = skipkeys
148 self.ensure_ascii = ensure_ascii
149 self.check_circular = check_circular
150 self.allow_nan = allow_nan
151 self.sort_keys = sort_keys
152 self.indent = indent
Brett Cannon4b964f92008-05-05 20:21:38 +0000153 if separators is not None:
154 self.item_separator, self.key_separator = separators
155 if default is not None:
156 self.default = default
157 self.encoding = encoding
158
Brett Cannon4b964f92008-05-05 20:21:38 +0000159 def default(self, o):
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000160 """Implement this method in a subclass such that it returns
161 a serializable object for ``o``, or calls the base implementation
162 (to raise a ``TypeError``).
Brett Cannon4b964f92008-05-05 20:21:38 +0000163
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000164 For example, to support arbitrary iterators, you could
165 implement default like this::
Brett Cannon4b964f92008-05-05 20:21:38 +0000166
167 def default(self, o):
168 try:
169 iterable = iter(o)
170 except TypeError:
171 pass
172 else:
173 return list(iterable)
174 return JSONEncoder.default(self, o)
175
176 """
177 raise TypeError(repr(o) + " is not JSON serializable")
178
179 def encode(self, o):
180 """Return a JSON string representation of a Python data structure.
181
182 >>> JSONEncoder().encode({"foo": ["bar", "baz"]})
183 '{"foo": ["bar", "baz"]}'
184
185 """
186 # This is for extremely simple cases and benchmarks.
187 if isinstance(o, basestring):
188 if isinstance(o, str):
189 _encoding = self.encoding
190 if (_encoding is not None
191 and not (_encoding == 'utf-8')):
192 o = o.decode(_encoding)
193 if self.ensure_ascii:
194 return encode_basestring_ascii(o)
195 else:
196 return encode_basestring(o)
197 # This doesn't pass the iterator directly to ''.join() because the
198 # exceptions aren't as detailed. The list call should be roughly
199 # equivalent to the PySequence_Fast that ''.join() would do.
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000200 chunks = self.iterencode(o, _one_shot=True)
201 if not isinstance(chunks, (list, tuple)):
202 chunks = list(chunks)
Brett Cannon4b964f92008-05-05 20:21:38 +0000203 return ''.join(chunks)
204
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000205 def iterencode(self, o, _one_shot=False):
206 """Encode the given object and yield each string
207 representation as available.
Brett Cannon4b964f92008-05-05 20:21:38 +0000208
209 For example::
210
211 for chunk in JSONEncoder().iterencode(bigobject):
212 mysocket.write(chunk)
213
214 """
215 if self.check_circular:
216 markers = {}
217 else:
218 markers = None
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000219 if self.ensure_ascii:
220 _encoder = encode_basestring_ascii
221 else:
222 _encoder = encode_basestring
223 if self.encoding != 'utf-8':
224 def _encoder(o, _orig_encoder=_encoder, _encoding=self.encoding):
225 if isinstance(o, str):
226 o = o.decode(_encoding)
227 return _orig_encoder(o)
228
229 def floatstr(o, allow_nan=self.allow_nan,
230 _repr=FLOAT_REPR, _inf=INFINITY, _neginf=-INFINITY):
231 # Check for specials. Note that this type of test is processor
232 # and/or platform-specific, so do tests which don't depend on the
233 # internals.
234
235 if o != o:
236 text = 'NaN'
237 elif o == _inf:
238 text = 'Infinity'
239 elif o == _neginf:
240 text = '-Infinity'
241 else:
242 return _repr(o)
243
244 if not allow_nan:
245 raise ValueError(
246 "Out of range float values are not JSON compliant: " +
247 repr(o))
248
249 return text
250
251
252 if (_one_shot and c_make_encoder is not None
R David Murrayea8b6ef2011-04-12 21:00:26 -0400253 and self.indent is None and not self.sort_keys):
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000254 _iterencode = c_make_encoder(
255 markers, self.default, _encoder, self.indent,
256 self.key_separator, self.item_separator, self.sort_keys,
257 self.skipkeys, self.allow_nan)
258 else:
259 _iterencode = _make_iterencode(
260 markers, self.default, _encoder, self.indent, floatstr,
261 self.key_separator, self.item_separator, self.sort_keys,
262 self.skipkeys, _one_shot)
263 return _iterencode(o, 0)
264
265def _make_iterencode(markers, _default, _encoder, _indent, _floatstr,
266 _key_separator, _item_separator, _sort_keys, _skipkeys, _one_shot,
267 ## HACK: hand-optimized bytecode; turn globals into locals
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000268 ValueError=ValueError,
269 basestring=basestring,
270 dict=dict,
271 float=float,
272 id=id,
273 int=int,
274 isinstance=isinstance,
275 list=list,
276 long=long,
277 str=str,
278 tuple=tuple,
279 ):
280
281 def _iterencode_list(lst, _current_indent_level):
282 if not lst:
283 yield '[]'
284 return
285 if markers is not None:
286 markerid = id(lst)
287 if markerid in markers:
288 raise ValueError("Circular reference detected")
289 markers[markerid] = lst
290 buf = '['
291 if _indent is not None:
292 _current_indent_level += 1
293 newline_indent = '\n' + (' ' * (_indent * _current_indent_level))
294 separator = _item_separator + newline_indent
295 buf += newline_indent
296 else:
297 newline_indent = None
298 separator = _item_separator
299 first = True
300 for value in lst:
301 if first:
302 first = False
303 else:
304 buf = separator
305 if isinstance(value, basestring):
306 yield buf + _encoder(value)
307 elif value is None:
308 yield buf + 'null'
309 elif value is True:
310 yield buf + 'true'
311 elif value is False:
312 yield buf + 'false'
313 elif isinstance(value, (int, long)):
314 yield buf + str(value)
315 elif isinstance(value, float):
316 yield buf + _floatstr(value)
317 else:
318 yield buf
319 if isinstance(value, (list, tuple)):
320 chunks = _iterencode_list(value, _current_indent_level)
321 elif isinstance(value, dict):
322 chunks = _iterencode_dict(value, _current_indent_level)
323 else:
324 chunks = _iterencode(value, _current_indent_level)
325 for chunk in chunks:
326 yield chunk
327 if newline_indent is not None:
328 _current_indent_level -= 1
329 yield '\n' + (' ' * (_indent * _current_indent_level))
330 yield ']'
331 if markers is not None:
332 del markers[markerid]
333
334 def _iterencode_dict(dct, _current_indent_level):
335 if not dct:
336 yield '{}'
337 return
338 if markers is not None:
339 markerid = id(dct)
340 if markerid in markers:
341 raise ValueError("Circular reference detected")
342 markers[markerid] = dct
343 yield '{'
344 if _indent is not None:
345 _current_indent_level += 1
346 newline_indent = '\n' + (' ' * (_indent * _current_indent_level))
347 item_separator = _item_separator + newline_indent
348 yield newline_indent
349 else:
350 newline_indent = None
351 item_separator = _item_separator
352 first = True
353 if _sort_keys:
Ezio Melottiffd84962010-01-26 15:57:21 +0000354 items = sorted(dct.items(), key=lambda kv: kv[0])
Bob Ippolitod914e3f2009-03-17 23:19:00 +0000355 else:
356 items = dct.iteritems()
357 for key, value in items:
358 if isinstance(key, basestring):
359 pass
360 # JavaScript is weakly typed for these, so it makes sense to
361 # also allow them. Many encoders seem to do something like this.
362 elif isinstance(key, float):
363 key = _floatstr(key)
364 elif key is True:
365 key = 'true'
366 elif key is False:
367 key = 'false'
368 elif key is None:
369 key = 'null'
370 elif isinstance(key, (int, long)):
371 key = str(key)
372 elif _skipkeys:
373 continue
374 else:
375 raise TypeError("key " + repr(key) + " is not a string")
376 if first:
377 first = False
378 else:
379 yield item_separator
380 yield _encoder(key)
381 yield _key_separator
382 if isinstance(value, basestring):
383 yield _encoder(value)
384 elif value is None:
385 yield 'null'
386 elif value is True:
387 yield 'true'
388 elif value is False:
389 yield 'false'
390 elif isinstance(value, (int, long)):
391 yield str(value)
392 elif isinstance(value, float):
393 yield _floatstr(value)
394 else:
395 if isinstance(value, (list, tuple)):
396 chunks = _iterencode_list(value, _current_indent_level)
397 elif isinstance(value, dict):
398 chunks = _iterencode_dict(value, _current_indent_level)
399 else:
400 chunks = _iterencode(value, _current_indent_level)
401 for chunk in chunks:
402 yield chunk
403 if newline_indent is not None:
404 _current_indent_level -= 1
405 yield '\n' + (' ' * (_indent * _current_indent_level))
406 yield '}'
407 if markers is not None:
408 del markers[markerid]
409
410 def _iterencode(o, _current_indent_level):
411 if isinstance(o, basestring):
412 yield _encoder(o)
413 elif o is None:
414 yield 'null'
415 elif o is True:
416 yield 'true'
417 elif o is False:
418 yield 'false'
419 elif isinstance(o, (int, long)):
420 yield str(o)
421 elif isinstance(o, float):
422 yield _floatstr(o)
423 elif isinstance(o, (list, tuple)):
424 for chunk in _iterencode_list(o, _current_indent_level):
425 yield chunk
426 elif isinstance(o, dict):
427 for chunk in _iterencode_dict(o, _current_indent_level):
428 yield chunk
429 else:
430 if markers is not None:
431 markerid = id(o)
432 if markerid in markers:
433 raise ValueError("Circular reference detected")
434 markers[markerid] = o
435 o = _default(o)
436 for chunk in _iterencode(o, _current_indent_level):
437 yield chunk
438 if markers is not None:
439 del markers[markerid]
440
441 return _iterencode