blob: ba57c2c25eeed92d4b328d8823bb5a4bc3d556f0 [file] [log] [blame]
Christian Heimes90540002008-05-08 14:29:10 +00001"""Implementation of JSONEncoder
2"""
Christian Heimes90540002008-05-08 14:29:10 +00003import re
4
5try:
6 from _json import encode_basestring_ascii as c_encode_basestring_ascii
7except ImportError:
8 c_encode_basestring_ascii = None
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00009try:
10 from _json import make_encoder as c_make_encoder
11except ImportError:
12 c_make_encoder = None
Christian Heimes90540002008-05-08 14:29:10 +000013
14ESCAPE = re.compile(r'[\x00-\x1f\\"\b\f\n\r\t]')
15ESCAPE_ASCII = re.compile(r'([\\"]|[^\ -~])')
Benjamin Petersonc6b607d2009-05-02 12:36:44 +000016HAS_UTF8 = re.compile(b'[\x80-\xff]')
Christian Heimes90540002008-05-08 14:29:10 +000017ESCAPE_DCT = {
18 '\\': '\\\\',
19 '"': '\\"',
20 '\b': '\\b',
21 '\f': '\\f',
22 '\n': '\\n',
23 '\r': '\\r',
24 '\t': '\\t',
25}
26for i in range(0x20):
27 ESCAPE_DCT.setdefault(chr(i), '\\u{0:04x}'.format(i))
Benjamin Petersonc6b607d2009-05-02 12:36:44 +000028 #ESCAPE_DCT.setdefault(chr(i), '\\u%04x' % (i,))
Christian Heimes90540002008-05-08 14:29:10 +000029
Ezio Melotti898d51d2012-05-21 17:49:06 -060030INFINITY = float('inf')
Christian Heimes90540002008-05-08 14:29:10 +000031FLOAT_REPR = repr
32
Christian Heimes90540002008-05-08 14:29:10 +000033def encode_basestring(s):
34 """Return a JSON representation of a Python string
35
36 """
37 def replace(match):
38 return ESCAPE_DCT[match.group(0)]
39 return '"' + ESCAPE.sub(replace, s) + '"'
40
41
42def py_encode_basestring_ascii(s):
Benjamin Petersonc6b607d2009-05-02 12:36:44 +000043 """Return an ASCII-only JSON representation of a Python string
44
45 """
Christian Heimes90540002008-05-08 14:29:10 +000046 def replace(match):
47 s = match.group(0)
48 try:
49 return ESCAPE_DCT[s]
50 except KeyError:
51 n = ord(s)
52 if n < 0x10000:
53 return '\\u{0:04x}'.format(n)
Benjamin Petersonc6b607d2009-05-02 12:36:44 +000054 #return '\\u%04x' % (n,)
Christian Heimes90540002008-05-08 14:29:10 +000055 else:
56 # surrogate pair
57 n -= 0x10000
58 s1 = 0xd800 | ((n >> 10) & 0x3ff)
59 s2 = 0xdc00 | (n & 0x3ff)
60 return '\\u{0:04x}\\u{1:04x}'.format(s1, s2)
Benjamin Petersonc6b607d2009-05-02 12:36:44 +000061 return '"' + ESCAPE_ASCII.sub(replace, s) + '"'
Christian Heimes90540002008-05-08 14:29:10 +000062
63
Benjamin Petersonc6b607d2009-05-02 12:36:44 +000064encode_basestring_ascii = (
65 c_encode_basestring_ascii or py_encode_basestring_ascii)
Christian Heimes90540002008-05-08 14:29:10 +000066
67class JSONEncoder(object):
68 """Extensible JSON <http://json.org> encoder for Python data structures.
69
70 Supports the following objects and types by default:
71
72 +-------------------+---------------+
73 | Python | JSON |
74 +===================+===============+
75 | dict | object |
76 +-------------------+---------------+
77 | list, tuple | array |
78 +-------------------+---------------+
Georg Brandlc8284cf2010-08-02 20:16:18 +000079 | str | string |
Christian Heimes90540002008-05-08 14:29:10 +000080 +-------------------+---------------+
Georg Brandlc8284cf2010-08-02 20:16:18 +000081 | int, float | number |
Christian Heimes90540002008-05-08 14:29:10 +000082 +-------------------+---------------+
83 | True | true |
84 +-------------------+---------------+
85 | False | false |
86 +-------------------+---------------+
87 | None | null |
88 +-------------------+---------------+
89
90 To extend this to recognize other objects, subclass and implement a
91 ``.default()`` method with another method that returns a serializable
92 object for ``o`` if possible, otherwise it should call the superclass
93 implementation (to raise ``TypeError``).
94
95 """
Christian Heimes90540002008-05-08 14:29:10 +000096 item_separator = ', '
97 key_separator = ': '
98 def __init__(self, skipkeys=False, ensure_ascii=True,
99 check_circular=True, allow_nan=True, sort_keys=False,
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000100 indent=None, separators=None, default=None):
Christian Heimes90540002008-05-08 14:29:10 +0000101 """Constructor for JSONEncoder, with sensible defaults.
102
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000103 If skipkeys is false, then it is a TypeError to attempt
Georg Brandlc8284cf2010-08-02 20:16:18 +0000104 encoding of keys that are not str, int, float or None. If
Christian Heimes90540002008-05-08 14:29:10 +0000105 skipkeys is True, such items are simply skipped.
106
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000107 If ensure_ascii is true, the output is guaranteed to be str
Georg Brandlc8284cf2010-08-02 20:16:18 +0000108 objects with all incoming non-ASCII characters escaped. If
109 ensure_ascii is false, the output can contain non-ASCII characters.
Christian Heimes90540002008-05-08 14:29:10 +0000110
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000111 If check_circular is true, then lists, dicts, and custom encoded
Christian Heimes90540002008-05-08 14:29:10 +0000112 objects will be checked for circular references during encoding to
113 prevent an infinite recursion (which would cause an OverflowError).
114 Otherwise, no such check takes place.
115
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000116 If allow_nan is true, then NaN, Infinity, and -Infinity will be
Christian Heimes90540002008-05-08 14:29:10 +0000117 encoded as such. This behavior is not JSON specification compliant,
118 but is consistent with most JavaScript based encoders and decoders.
119 Otherwise, it will be a ValueError to encode such floats.
120
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000121 If sort_keys is true, then the output of dictionaries will be
Christian Heimes90540002008-05-08 14:29:10 +0000122 sorted by key; this is useful for regression tests to ensure
123 that JSON serializations can be compared on a day-to-day basis.
124
125 If indent is a non-negative integer, then JSON array
126 elements and object members will be pretty-printed with that
127 indent level. An indent level of 0 will only insert newlines.
128 None is the most compact representation.
129
130 If specified, separators should be a (item_separator, key_separator)
131 tuple. The default is (', ', ': '). To get the most compact JSON
132 representation you should specify (',', ':') to eliminate whitespace.
133
134 If specified, default is a function that gets called for objects
135 that can't otherwise be serialized. It should return a JSON encodable
136 version of the object or raise a ``TypeError``.
137
Christian Heimes90540002008-05-08 14:29:10 +0000138 """
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000139
Christian Heimes90540002008-05-08 14:29:10 +0000140 self.skipkeys = skipkeys
141 self.ensure_ascii = ensure_ascii
142 self.check_circular = check_circular
143 self.allow_nan = allow_nan
144 self.sort_keys = sort_keys
145 self.indent = indent
Christian Heimes90540002008-05-08 14:29:10 +0000146 if separators is not None:
147 self.item_separator, self.key_separator = separators
148 if default is not None:
149 self.default = default
Christian Heimes90540002008-05-08 14:29:10 +0000150
151 def default(self, o):
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000152 """Implement this method in a subclass such that it returns
153 a serializable object for ``o``, or calls the base implementation
154 (to raise a ``TypeError``).
Christian Heimes90540002008-05-08 14:29:10 +0000155
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000156 For example, to support arbitrary iterators, you could
157 implement default like this::
Christian Heimes90540002008-05-08 14:29:10 +0000158
159 def default(self, o):
160 try:
161 iterable = iter(o)
162 except TypeError:
163 pass
164 else:
165 return list(iterable)
166 return JSONEncoder.default(self, o)
167
168 """
169 raise TypeError(repr(o) + " is not JSON serializable")
170
171 def encode(self, o):
172 """Return a JSON string representation of a Python data structure.
173
174 >>> JSONEncoder().encode({"foo": ["bar", "baz"]})
175 '{"foo": ["bar", "baz"]}'
176
177 """
178 # This is for extremely simple cases and benchmarks.
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000179 if isinstance(o, str):
Christian Heimes90540002008-05-08 14:29:10 +0000180 if self.ensure_ascii:
181 return encode_basestring_ascii(o)
182 else:
183 return encode_basestring(o)
184 # This doesn't pass the iterator directly to ''.join() because the
185 # exceptions aren't as detailed. The list call should be roughly
186 # equivalent to the PySequence_Fast that ''.join() would do.
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000187 chunks = self.iterencode(o, _one_shot=True)
188 if not isinstance(chunks, (list, tuple)):
189 chunks = list(chunks)
Christian Heimes90540002008-05-08 14:29:10 +0000190 return ''.join(chunks)
191
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000192 def iterencode(self, o, _one_shot=False):
193 """Encode the given object and yield each string
194 representation as available.
Christian Heimes90540002008-05-08 14:29:10 +0000195
196 For example::
197
198 for chunk in JSONEncoder().iterencode(bigobject):
199 mysocket.write(chunk)
200
201 """
202 if self.check_circular:
203 markers = {}
204 else:
205 markers = None
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000206 if self.ensure_ascii:
207 _encoder = encode_basestring_ascii
208 else:
209 _encoder = encode_basestring
210
211 def floatstr(o, allow_nan=self.allow_nan,
212 _repr=FLOAT_REPR, _inf=INFINITY, _neginf=-INFINITY):
213 # Check for specials. Note that this type of test is processor
214 # and/or platform-specific, so do tests which don't depend on the
215 # internals.
216
217 if o != o:
218 text = 'NaN'
219 elif o == _inf:
220 text = 'Infinity'
221 elif o == _neginf:
222 text = '-Infinity'
223 else:
224 return _repr(o)
225
226 if not allow_nan:
227 raise ValueError(
228 "Out of range float values are not JSON compliant: " +
229 repr(o))
230
231 return text
232
233
234 if (_one_shot and c_make_encoder is not None
R David Murray3dd02d62011-04-12 21:02:45 -0400235 and self.indent is None):
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000236 _iterencode = c_make_encoder(
237 markers, self.default, _encoder, self.indent,
238 self.key_separator, self.item_separator, self.sort_keys,
239 self.skipkeys, self.allow_nan)
240 else:
241 _iterencode = _make_iterencode(
242 markers, self.default, _encoder, self.indent, floatstr,
243 self.key_separator, self.item_separator, self.sort_keys,
244 self.skipkeys, _one_shot)
245 return _iterencode(o, 0)
246
247def _make_iterencode(markers, _default, _encoder, _indent, _floatstr,
248 _key_separator, _item_separator, _sort_keys, _skipkeys, _one_shot,
249 ## HACK: hand-optimized bytecode; turn globals into locals
250 ValueError=ValueError,
251 dict=dict,
252 float=float,
253 id=id,
254 int=int,
255 isinstance=isinstance,
256 list=list,
257 str=str,
258 tuple=tuple,
259 ):
260
Raymond Hettingerb643ef82010-10-31 08:00:16 +0000261 if _indent is not None and not isinstance(_indent, str):
262 _indent = ' ' * _indent
263
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000264 def _iterencode_list(lst, _current_indent_level):
265 if not lst:
266 yield '[]'
267 return
268 if markers is not None:
269 markerid = id(lst)
270 if markerid in markers:
271 raise ValueError("Circular reference detected")
272 markers[markerid] = lst
273 buf = '['
274 if _indent is not None:
275 _current_indent_level += 1
Raymond Hettingerb643ef82010-10-31 08:00:16 +0000276 newline_indent = '\n' + _indent * _current_indent_level
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000277 separator = _item_separator + newline_indent
278 buf += newline_indent
279 else:
280 newline_indent = None
281 separator = _item_separator
282 first = True
283 for value in lst:
284 if first:
285 first = False
286 else:
287 buf = separator
288 if isinstance(value, str):
289 yield buf + _encoder(value)
290 elif value is None:
291 yield buf + 'null'
292 elif value is True:
293 yield buf + 'true'
294 elif value is False:
295 yield buf + 'false'
296 elif isinstance(value, int):
297 yield buf + str(value)
298 elif isinstance(value, float):
299 yield buf + _floatstr(value)
300 else:
301 yield buf
302 if isinstance(value, (list, tuple)):
303 chunks = _iterencode_list(value, _current_indent_level)
304 elif isinstance(value, dict):
305 chunks = _iterencode_dict(value, _current_indent_level)
306 else:
307 chunks = _iterencode(value, _current_indent_level)
Philip Jenveyfd0d3e52012-10-01 15:34:31 -0700308 yield from chunks
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000309 if newline_indent is not None:
310 _current_indent_level -= 1
Raymond Hettingerb643ef82010-10-31 08:00:16 +0000311 yield '\n' + _indent * _current_indent_level
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000312 yield ']'
313 if markers is not None:
314 del markers[markerid]
315
316 def _iterencode_dict(dct, _current_indent_level):
317 if not dct:
318 yield '{}'
319 return
320 if markers is not None:
321 markerid = id(dct)
322 if markerid in markers:
323 raise ValueError("Circular reference detected")
324 markers[markerid] = dct
325 yield '{'
326 if _indent is not None:
327 _current_indent_level += 1
Raymond Hettingerb643ef82010-10-31 08:00:16 +0000328 newline_indent = '\n' + _indent * _current_indent_level
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000329 item_separator = _item_separator + newline_indent
330 yield newline_indent
331 else:
332 newline_indent = None
333 item_separator = _item_separator
334 first = True
335 if _sort_keys:
336 items = sorted(dct.items(), key=lambda kv: kv[0])
337 else:
338 items = dct.items()
339 for key, value in items:
340 if isinstance(key, str):
341 pass
342 # JavaScript is weakly typed for these, so it makes sense to
343 # also allow them. Many encoders seem to do something like this.
344 elif isinstance(key, float):
345 key = _floatstr(key)
346 elif key is True:
347 key = 'true'
348 elif key is False:
349 key = 'false'
350 elif key is None:
351 key = 'null'
352 elif isinstance(key, int):
353 key = str(key)
354 elif _skipkeys:
355 continue
356 else:
357 raise TypeError("key " + repr(key) + " is not a string")
358 if first:
359 first = False
360 else:
361 yield item_separator
362 yield _encoder(key)
363 yield _key_separator
364 if isinstance(value, str):
365 yield _encoder(value)
366 elif value is None:
367 yield 'null'
368 elif value is True:
369 yield 'true'
370 elif value is False:
371 yield 'false'
372 elif isinstance(value, int):
373 yield str(value)
374 elif isinstance(value, float):
375 yield _floatstr(value)
376 else:
377 if isinstance(value, (list, tuple)):
378 chunks = _iterencode_list(value, _current_indent_level)
379 elif isinstance(value, dict):
380 chunks = _iterencode_dict(value, _current_indent_level)
381 else:
382 chunks = _iterencode(value, _current_indent_level)
Philip Jenveyfd0d3e52012-10-01 15:34:31 -0700383 yield from chunks
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000384 if newline_indent is not None:
385 _current_indent_level -= 1
Raymond Hettingerb643ef82010-10-31 08:00:16 +0000386 yield '\n' + _indent * _current_indent_level
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000387 yield '}'
388 if markers is not None:
389 del markers[markerid]
390
391 def _iterencode(o, _current_indent_level):
392 if isinstance(o, str):
393 yield _encoder(o)
394 elif o is None:
395 yield 'null'
396 elif o is True:
397 yield 'true'
398 elif o is False:
399 yield 'false'
Florent Xicluna02ea12b22010-07-28 16:39:41 +0000400 elif isinstance(o, int):
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000401 yield str(o)
402 elif isinstance(o, float):
403 yield _floatstr(o)
404 elif isinstance(o, (list, tuple)):
Philip Jenveyfd0d3e52012-10-01 15:34:31 -0700405 yield from _iterencode_list(o, _current_indent_level)
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000406 elif isinstance(o, dict):
Philip Jenveyfd0d3e52012-10-01 15:34:31 -0700407 yield from _iterencode_dict(o, _current_indent_level)
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000408 else:
409 if markers is not None:
410 markerid = id(o)
411 if markerid in markers:
412 raise ValueError("Circular reference detected")
413 markers[markerid] = o
414 o = _default(o)
Philip Jenveyfd0d3e52012-10-01 15:34:31 -0700415 yield from _iterencode(o, _current_indent_level)
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000416 if markers is not None:
417 del markers[markerid]
418 return _iterencode