blob: e1ed21f6e70a00ce46c76e786da09f8f85849196 [file] [log] [blame]
Christian Heimes90540002008-05-08 14:29:10 +00001"""Implementation of JSONEncoder
2"""
Christian Heimes90540002008-05-08 14:29:10 +00003import re
4
5try:
6 from _json import encode_basestring_ascii as c_encode_basestring_ascii
7except ImportError:
8 c_encode_basestring_ascii = None
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00009try:
10 from _json import make_encoder as c_make_encoder
11except ImportError:
12 c_make_encoder = None
Christian Heimes90540002008-05-08 14:29:10 +000013
14ESCAPE = re.compile(r'[\x00-\x1f\\"\b\f\n\r\t]')
15ESCAPE_ASCII = re.compile(r'([\\"]|[^\ -~])')
Benjamin Petersonc6b607d2009-05-02 12:36:44 +000016HAS_UTF8 = re.compile(b'[\x80-\xff]')
Christian Heimes90540002008-05-08 14:29:10 +000017ESCAPE_DCT = {
18 '\\': '\\\\',
19 '"': '\\"',
20 '\b': '\\b',
21 '\f': '\\f',
22 '\n': '\\n',
23 '\r': '\\r',
24 '\t': '\\t',
25}
26for i in range(0x20):
27 ESCAPE_DCT.setdefault(chr(i), '\\u{0:04x}'.format(i))
Benjamin Petersonc6b607d2009-05-02 12:36:44 +000028 #ESCAPE_DCT.setdefault(chr(i), '\\u%04x' % (i,))
Christian Heimes90540002008-05-08 14:29:10 +000029
Ezio Melotti898d51d2012-05-21 17:49:06 -060030INFINITY = float('inf')
Christian Heimes90540002008-05-08 14:29:10 +000031FLOAT_REPR = repr
32
Christian Heimes90540002008-05-08 14:29:10 +000033def encode_basestring(s):
34 """Return a JSON representation of a Python string
35
36 """
37 def replace(match):
38 return ESCAPE_DCT[match.group(0)]
39 return '"' + ESCAPE.sub(replace, s) + '"'
40
41
42def py_encode_basestring_ascii(s):
Benjamin Petersonc6b607d2009-05-02 12:36:44 +000043 """Return an ASCII-only JSON representation of a Python string
44
45 """
Christian Heimes90540002008-05-08 14:29:10 +000046 def replace(match):
47 s = match.group(0)
48 try:
49 return ESCAPE_DCT[s]
50 except KeyError:
51 n = ord(s)
52 if n < 0x10000:
53 return '\\u{0:04x}'.format(n)
Benjamin Petersonc6b607d2009-05-02 12:36:44 +000054 #return '\\u%04x' % (n,)
Christian Heimes90540002008-05-08 14:29:10 +000055 else:
56 # surrogate pair
57 n -= 0x10000
58 s1 = 0xd800 | ((n >> 10) & 0x3ff)
59 s2 = 0xdc00 | (n & 0x3ff)
60 return '\\u{0:04x}\\u{1:04x}'.format(s1, s2)
Benjamin Petersonc6b607d2009-05-02 12:36:44 +000061 return '"' + ESCAPE_ASCII.sub(replace, s) + '"'
Christian Heimes90540002008-05-08 14:29:10 +000062
63
Benjamin Petersonc6b607d2009-05-02 12:36:44 +000064encode_basestring_ascii = (
65 c_encode_basestring_ascii or py_encode_basestring_ascii)
Christian Heimes90540002008-05-08 14:29:10 +000066
67class JSONEncoder(object):
68 """Extensible JSON <http://json.org> encoder for Python data structures.
69
70 Supports the following objects and types by default:
71
72 +-------------------+---------------+
73 | Python | JSON |
74 +===================+===============+
75 | dict | object |
76 +-------------------+---------------+
77 | list, tuple | array |
78 +-------------------+---------------+
Georg Brandlc8284cf2010-08-02 20:16:18 +000079 | str | string |
Christian Heimes90540002008-05-08 14:29:10 +000080 +-------------------+---------------+
Georg Brandlc8284cf2010-08-02 20:16:18 +000081 | int, float | number |
Christian Heimes90540002008-05-08 14:29:10 +000082 +-------------------+---------------+
83 | True | true |
84 +-------------------+---------------+
85 | False | false |
86 +-------------------+---------------+
87 | None | null |
88 +-------------------+---------------+
89
90 To extend this to recognize other objects, subclass and implement a
91 ``.default()`` method with another method that returns a serializable
92 object for ``o`` if possible, otherwise it should call the superclass
93 implementation (to raise ``TypeError``).
94
95 """
Christian Heimes90540002008-05-08 14:29:10 +000096 item_separator = ', '
97 key_separator = ': '
98 def __init__(self, skipkeys=False, ensure_ascii=True,
99 check_circular=True, allow_nan=True, sort_keys=False,
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000100 indent=None, separators=None, default=None):
Christian Heimes90540002008-05-08 14:29:10 +0000101 """Constructor for JSONEncoder, with sensible defaults.
102
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000103 If skipkeys is false, then it is a TypeError to attempt
Georg Brandlc8284cf2010-08-02 20:16:18 +0000104 encoding of keys that are not str, int, float or None. If
Christian Heimes90540002008-05-08 14:29:10 +0000105 skipkeys is True, such items are simply skipped.
106
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000107 If ensure_ascii is true, the output is guaranteed to be str
Georg Brandlc8284cf2010-08-02 20:16:18 +0000108 objects with all incoming non-ASCII characters escaped. If
109 ensure_ascii is false, the output can contain non-ASCII characters.
Christian Heimes90540002008-05-08 14:29:10 +0000110
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000111 If check_circular is true, then lists, dicts, and custom encoded
Christian Heimes90540002008-05-08 14:29:10 +0000112 objects will be checked for circular references during encoding to
113 prevent an infinite recursion (which would cause an OverflowError).
114 Otherwise, no such check takes place.
115
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000116 If allow_nan is true, then NaN, Infinity, and -Infinity will be
Christian Heimes90540002008-05-08 14:29:10 +0000117 encoded as such. This behavior is not JSON specification compliant,
118 but is consistent with most JavaScript based encoders and decoders.
119 Otherwise, it will be a ValueError to encode such floats.
120
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000121 If sort_keys is true, then the output of dictionaries will be
Christian Heimes90540002008-05-08 14:29:10 +0000122 sorted by key; this is useful for regression tests to ensure
123 that JSON serializations can be compared on a day-to-day basis.
124
125 If indent is a non-negative integer, then JSON array
126 elements and object members will be pretty-printed with that
127 indent level. An indent level of 0 will only insert newlines.
Ezio Melottid654ded2012-11-29 00:35:29 +0200128 None is the most compact representation. Since the default
129 item separator is ', ', the output might include trailing
130 whitespace when indent is specified. You can use
131 separators=(',', ': ') to avoid this.
Christian Heimes90540002008-05-08 14:29:10 +0000132
133 If specified, separators should be a (item_separator, key_separator)
134 tuple. The default is (', ', ': '). To get the most compact JSON
135 representation you should specify (',', ':') to eliminate whitespace.
136
137 If specified, default is a function that gets called for objects
138 that can't otherwise be serialized. It should return a JSON encodable
139 version of the object or raise a ``TypeError``.
140
Christian Heimes90540002008-05-08 14:29:10 +0000141 """
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000142
Christian Heimes90540002008-05-08 14:29:10 +0000143 self.skipkeys = skipkeys
144 self.ensure_ascii = ensure_ascii
145 self.check_circular = check_circular
146 self.allow_nan = allow_nan
147 self.sort_keys = sort_keys
148 self.indent = indent
Christian Heimes90540002008-05-08 14:29:10 +0000149 if separators is not None:
150 self.item_separator, self.key_separator = separators
151 if default is not None:
152 self.default = default
Christian Heimes90540002008-05-08 14:29:10 +0000153
154 def default(self, o):
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000155 """Implement this method in a subclass such that it returns
156 a serializable object for ``o``, or calls the base implementation
157 (to raise a ``TypeError``).
Christian Heimes90540002008-05-08 14:29:10 +0000158
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000159 For example, to support arbitrary iterators, you could
160 implement default like this::
Christian Heimes90540002008-05-08 14:29:10 +0000161
162 def default(self, o):
163 try:
164 iterable = iter(o)
165 except TypeError:
166 pass
167 else:
168 return list(iterable)
169 return JSONEncoder.default(self, o)
170
171 """
172 raise TypeError(repr(o) + " is not JSON serializable")
173
174 def encode(self, o):
175 """Return a JSON string representation of a Python data structure.
176
177 >>> JSONEncoder().encode({"foo": ["bar", "baz"]})
178 '{"foo": ["bar", "baz"]}'
179
180 """
181 # This is for extremely simple cases and benchmarks.
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000182 if isinstance(o, str):
Christian Heimes90540002008-05-08 14:29:10 +0000183 if self.ensure_ascii:
184 return encode_basestring_ascii(o)
185 else:
186 return encode_basestring(o)
187 # This doesn't pass the iterator directly to ''.join() because the
188 # exceptions aren't as detailed. The list call should be roughly
189 # equivalent to the PySequence_Fast that ''.join() would do.
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000190 chunks = self.iterencode(o, _one_shot=True)
191 if not isinstance(chunks, (list, tuple)):
192 chunks = list(chunks)
Christian Heimes90540002008-05-08 14:29:10 +0000193 return ''.join(chunks)
194
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000195 def iterencode(self, o, _one_shot=False):
196 """Encode the given object and yield each string
197 representation as available.
Christian Heimes90540002008-05-08 14:29:10 +0000198
199 For example::
200
201 for chunk in JSONEncoder().iterencode(bigobject):
202 mysocket.write(chunk)
203
204 """
205 if self.check_circular:
206 markers = {}
207 else:
208 markers = None
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000209 if self.ensure_ascii:
210 _encoder = encode_basestring_ascii
211 else:
212 _encoder = encode_basestring
213
214 def floatstr(o, allow_nan=self.allow_nan,
215 _repr=FLOAT_REPR, _inf=INFINITY, _neginf=-INFINITY):
216 # Check for specials. Note that this type of test is processor
217 # and/or platform-specific, so do tests which don't depend on the
218 # internals.
219
220 if o != o:
221 text = 'NaN'
222 elif o == _inf:
223 text = 'Infinity'
224 elif o == _neginf:
225 text = '-Infinity'
226 else:
227 return _repr(o)
228
229 if not allow_nan:
230 raise ValueError(
231 "Out of range float values are not JSON compliant: " +
232 repr(o))
233
234 return text
235
236
237 if (_one_shot and c_make_encoder is not None
R David Murray3dd02d62011-04-12 21:02:45 -0400238 and self.indent is None):
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000239 _iterencode = c_make_encoder(
240 markers, self.default, _encoder, self.indent,
241 self.key_separator, self.item_separator, self.sort_keys,
242 self.skipkeys, self.allow_nan)
243 else:
244 _iterencode = _make_iterencode(
245 markers, self.default, _encoder, self.indent, floatstr,
246 self.key_separator, self.item_separator, self.sort_keys,
247 self.skipkeys, _one_shot)
248 return _iterencode(o, 0)
249
250def _make_iterencode(markers, _default, _encoder, _indent, _floatstr,
251 _key_separator, _item_separator, _sort_keys, _skipkeys, _one_shot,
252 ## HACK: hand-optimized bytecode; turn globals into locals
253 ValueError=ValueError,
254 dict=dict,
255 float=float,
256 id=id,
257 int=int,
258 isinstance=isinstance,
259 list=list,
260 str=str,
261 tuple=tuple,
262 ):
263
Raymond Hettingerb643ef82010-10-31 08:00:16 +0000264 if _indent is not None and not isinstance(_indent, str):
265 _indent = ' ' * _indent
266
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000267 def _iterencode_list(lst, _current_indent_level):
268 if not lst:
269 yield '[]'
270 return
271 if markers is not None:
272 markerid = id(lst)
273 if markerid in markers:
274 raise ValueError("Circular reference detected")
275 markers[markerid] = lst
276 buf = '['
277 if _indent is not None:
278 _current_indent_level += 1
Raymond Hettingerb643ef82010-10-31 08:00:16 +0000279 newline_indent = '\n' + _indent * _current_indent_level
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000280 separator = _item_separator + newline_indent
281 buf += newline_indent
282 else:
283 newline_indent = None
284 separator = _item_separator
285 first = True
286 for value in lst:
287 if first:
288 first = False
289 else:
290 buf = separator
291 if isinstance(value, str):
292 yield buf + _encoder(value)
293 elif value is None:
294 yield buf + 'null'
295 elif value is True:
296 yield buf + 'true'
297 elif value is False:
298 yield buf + 'false'
299 elif isinstance(value, int):
300 yield buf + str(value)
301 elif isinstance(value, float):
302 yield buf + _floatstr(value)
303 else:
304 yield buf
305 if isinstance(value, (list, tuple)):
306 chunks = _iterencode_list(value, _current_indent_level)
307 elif isinstance(value, dict):
308 chunks = _iterencode_dict(value, _current_indent_level)
309 else:
310 chunks = _iterencode(value, _current_indent_level)
311 for chunk in chunks:
312 yield chunk
313 if newline_indent is not None:
314 _current_indent_level -= 1
Raymond Hettingerb643ef82010-10-31 08:00:16 +0000315 yield '\n' + _indent * _current_indent_level
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000316 yield ']'
317 if markers is not None:
318 del markers[markerid]
319
320 def _iterencode_dict(dct, _current_indent_level):
321 if not dct:
322 yield '{}'
323 return
324 if markers is not None:
325 markerid = id(dct)
326 if markerid in markers:
327 raise ValueError("Circular reference detected")
328 markers[markerid] = dct
329 yield '{'
330 if _indent is not None:
331 _current_indent_level += 1
Raymond Hettingerb643ef82010-10-31 08:00:16 +0000332 newline_indent = '\n' + _indent * _current_indent_level
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000333 item_separator = _item_separator + newline_indent
334 yield newline_indent
335 else:
336 newline_indent = None
337 item_separator = _item_separator
338 first = True
339 if _sort_keys:
340 items = sorted(dct.items(), key=lambda kv: kv[0])
341 else:
342 items = dct.items()
343 for key, value in items:
344 if isinstance(key, str):
345 pass
346 # JavaScript is weakly typed for these, so it makes sense to
347 # also allow them. Many encoders seem to do something like this.
348 elif isinstance(key, float):
349 key = _floatstr(key)
350 elif key is True:
351 key = 'true'
352 elif key is False:
353 key = 'false'
354 elif key is None:
355 key = 'null'
356 elif isinstance(key, int):
357 key = str(key)
358 elif _skipkeys:
359 continue
360 else:
361 raise TypeError("key " + repr(key) + " is not a string")
362 if first:
363 first = False
364 else:
365 yield item_separator
366 yield _encoder(key)
367 yield _key_separator
368 if isinstance(value, str):
369 yield _encoder(value)
370 elif value is None:
371 yield 'null'
372 elif value is True:
373 yield 'true'
374 elif value is False:
375 yield 'false'
376 elif isinstance(value, int):
377 yield str(value)
378 elif isinstance(value, float):
379 yield _floatstr(value)
380 else:
381 if isinstance(value, (list, tuple)):
382 chunks = _iterencode_list(value, _current_indent_level)
383 elif isinstance(value, dict):
384 chunks = _iterencode_dict(value, _current_indent_level)
385 else:
386 chunks = _iterencode(value, _current_indent_level)
387 for chunk in chunks:
388 yield chunk
389 if newline_indent is not None:
390 _current_indent_level -= 1
Raymond Hettingerb643ef82010-10-31 08:00:16 +0000391 yield '\n' + _indent * _current_indent_level
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000392 yield '}'
393 if markers is not None:
394 del markers[markerid]
395
396 def _iterencode(o, _current_indent_level):
397 if isinstance(o, str):
398 yield _encoder(o)
399 elif o is None:
400 yield 'null'
401 elif o is True:
402 yield 'true'
403 elif o is False:
404 yield 'false'
Florent Xicluna02ea12b22010-07-28 16:39:41 +0000405 elif isinstance(o, int):
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000406 yield str(o)
407 elif isinstance(o, float):
408 yield _floatstr(o)
409 elif isinstance(o, (list, tuple)):
410 for chunk in _iterencode_list(o, _current_indent_level):
411 yield chunk
412 elif isinstance(o, dict):
413 for chunk in _iterencode_dict(o, _current_indent_level):
414 yield chunk
415 else:
416 if markers is not None:
417 markerid = id(o)
418 if markerid in markers:
419 raise ValueError("Circular reference detected")
420 markers[markerid] = o
421 o = _default(o)
422 for chunk in _iterencode(o, _current_indent_level):
423 yield chunk
424 if markers is not None:
425 del markers[markerid]
426 return _iterencode