blob: 75b7f494b3f00b299e509eb5aab66bdcd186868d [file] [log] [blame]
Christian Heimes90540002008-05-08 14:29:10 +00001"""Implementation of JSONEncoder
2"""
Christian Heimes90540002008-05-08 14:29:10 +00003import re
4
5try:
6 from _json import encode_basestring_ascii as c_encode_basestring_ascii
7except ImportError:
8 c_encode_basestring_ascii = None
Benjamin Petersonc6b607d2009-05-02 12:36:44 +00009try:
10 from _json import make_encoder as c_make_encoder
11except ImportError:
12 c_make_encoder = None
Christian Heimes90540002008-05-08 14:29:10 +000013
14ESCAPE = re.compile(r'[\x00-\x1f\\"\b\f\n\r\t]')
15ESCAPE_ASCII = re.compile(r'([\\"]|[^\ -~])')
Benjamin Petersonc6b607d2009-05-02 12:36:44 +000016HAS_UTF8 = re.compile(b'[\x80-\xff]')
Christian Heimes90540002008-05-08 14:29:10 +000017ESCAPE_DCT = {
18 '\\': '\\\\',
19 '"': '\\"',
20 '\b': '\\b',
21 '\f': '\\f',
22 '\n': '\\n',
23 '\r': '\\r',
24 '\t': '\\t',
25}
26for i in range(0x20):
27 ESCAPE_DCT.setdefault(chr(i), '\\u{0:04x}'.format(i))
Benjamin Petersonc6b607d2009-05-02 12:36:44 +000028 #ESCAPE_DCT.setdefault(chr(i), '\\u%04x' % (i,))
Christian Heimes90540002008-05-08 14:29:10 +000029
Ezio Melotti898d51d2012-05-21 17:49:06 -060030INFINITY = float('inf')
Christian Heimes90540002008-05-08 14:29:10 +000031FLOAT_REPR = repr
32
Christian Heimes90540002008-05-08 14:29:10 +000033def encode_basestring(s):
34 """Return a JSON representation of a Python string
35
36 """
37 def replace(match):
38 return ESCAPE_DCT[match.group(0)]
39 return '"' + ESCAPE.sub(replace, s) + '"'
40
41
42def py_encode_basestring_ascii(s):
Benjamin Petersonc6b607d2009-05-02 12:36:44 +000043 """Return an ASCII-only JSON representation of a Python string
44
45 """
Christian Heimes90540002008-05-08 14:29:10 +000046 def replace(match):
47 s = match.group(0)
48 try:
49 return ESCAPE_DCT[s]
50 except KeyError:
51 n = ord(s)
52 if n < 0x10000:
53 return '\\u{0:04x}'.format(n)
Benjamin Petersonc6b607d2009-05-02 12:36:44 +000054 #return '\\u%04x' % (n,)
Christian Heimes90540002008-05-08 14:29:10 +000055 else:
56 # surrogate pair
57 n -= 0x10000
58 s1 = 0xd800 | ((n >> 10) & 0x3ff)
59 s2 = 0xdc00 | (n & 0x3ff)
60 return '\\u{0:04x}\\u{1:04x}'.format(s1, s2)
Benjamin Petersonc6b607d2009-05-02 12:36:44 +000061 return '"' + ESCAPE_ASCII.sub(replace, s) + '"'
Christian Heimes90540002008-05-08 14:29:10 +000062
63
Benjamin Petersonc6b607d2009-05-02 12:36:44 +000064encode_basestring_ascii = (
65 c_encode_basestring_ascii or py_encode_basestring_ascii)
Christian Heimes90540002008-05-08 14:29:10 +000066
67class JSONEncoder(object):
68 """Extensible JSON <http://json.org> encoder for Python data structures.
69
70 Supports the following objects and types by default:
71
72 +-------------------+---------------+
73 | Python | JSON |
74 +===================+===============+
75 | dict | object |
76 +-------------------+---------------+
77 | list, tuple | array |
78 +-------------------+---------------+
Georg Brandlc8284cf2010-08-02 20:16:18 +000079 | str | string |
Christian Heimes90540002008-05-08 14:29:10 +000080 +-------------------+---------------+
Georg Brandlc8284cf2010-08-02 20:16:18 +000081 | int, float | number |
Christian Heimes90540002008-05-08 14:29:10 +000082 +-------------------+---------------+
83 | True | true |
84 +-------------------+---------------+
85 | False | false |
86 +-------------------+---------------+
87 | None | null |
88 +-------------------+---------------+
89
90 To extend this to recognize other objects, subclass and implement a
91 ``.default()`` method with another method that returns a serializable
92 object for ``o`` if possible, otherwise it should call the superclass
93 implementation (to raise ``TypeError``).
94
95 """
Christian Heimes90540002008-05-08 14:29:10 +000096 item_separator = ', '
97 key_separator = ': '
98 def __init__(self, skipkeys=False, ensure_ascii=True,
99 check_circular=True, allow_nan=True, sort_keys=False,
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000100 indent=None, separators=None, default=None):
Christian Heimes90540002008-05-08 14:29:10 +0000101 """Constructor for JSONEncoder, with sensible defaults.
102
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000103 If skipkeys is false, then it is a TypeError to attempt
Georg Brandlc8284cf2010-08-02 20:16:18 +0000104 encoding of keys that are not str, int, float or None. If
Christian Heimes90540002008-05-08 14:29:10 +0000105 skipkeys is True, such items are simply skipped.
106
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000107 If ensure_ascii is true, the output is guaranteed to be str
Georg Brandlc8284cf2010-08-02 20:16:18 +0000108 objects with all incoming non-ASCII characters escaped. If
109 ensure_ascii is false, the output can contain non-ASCII characters.
Christian Heimes90540002008-05-08 14:29:10 +0000110
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000111 If check_circular is true, then lists, dicts, and custom encoded
Christian Heimes90540002008-05-08 14:29:10 +0000112 objects will be checked for circular references during encoding to
113 prevent an infinite recursion (which would cause an OverflowError).
114 Otherwise, no such check takes place.
115
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000116 If allow_nan is true, then NaN, Infinity, and -Infinity will be
Christian Heimes90540002008-05-08 14:29:10 +0000117 encoded as such. This behavior is not JSON specification compliant,
118 but is consistent with most JavaScript based encoders and decoders.
119 Otherwise, it will be a ValueError to encode such floats.
120
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000121 If sort_keys is true, then the output of dictionaries will be
Christian Heimes90540002008-05-08 14:29:10 +0000122 sorted by key; this is useful for regression tests to ensure
123 that JSON serializations can be compared on a day-to-day basis.
124
125 If indent is a non-negative integer, then JSON array
126 elements and object members will be pretty-printed with that
127 indent level. An indent level of 0 will only insert newlines.
128 None is the most compact representation.
129
130 If specified, separators should be a (item_separator, key_separator)
131 tuple. The default is (', ', ': '). To get the most compact JSON
132 representation you should specify (',', ':') to eliminate whitespace.
133
134 If specified, default is a function that gets called for objects
135 that can't otherwise be serialized. It should return a JSON encodable
136 version of the object or raise a ``TypeError``.
137
Christian Heimes90540002008-05-08 14:29:10 +0000138 """
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000139
Christian Heimes90540002008-05-08 14:29:10 +0000140 self.skipkeys = skipkeys
141 self.ensure_ascii = ensure_ascii
142 self.check_circular = check_circular
143 self.allow_nan = allow_nan
144 self.sort_keys = sort_keys
145 self.indent = indent
Christian Heimes90540002008-05-08 14:29:10 +0000146 if separators is not None:
147 self.item_separator, self.key_separator = separators
148 if default is not None:
149 self.default = default
Christian Heimes90540002008-05-08 14:29:10 +0000150
151 def default(self, o):
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000152 """Implement this method in a subclass such that it returns
153 a serializable object for ``o``, or calls the base implementation
154 (to raise a ``TypeError``).
Christian Heimes90540002008-05-08 14:29:10 +0000155
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000156 For example, to support arbitrary iterators, you could
157 implement default like this::
Christian Heimes90540002008-05-08 14:29:10 +0000158
159 def default(self, o):
160 try:
161 iterable = iter(o)
162 except TypeError:
163 pass
164 else:
165 return list(iterable)
166 return JSONEncoder.default(self, o)
167
168 """
169 raise TypeError(repr(o) + " is not JSON serializable")
170
171 def encode(self, o):
172 """Return a JSON string representation of a Python data structure.
173
174 >>> JSONEncoder().encode({"foo": ["bar", "baz"]})
175 '{"foo": ["bar", "baz"]}'
176
177 """
178 # This is for extremely simple cases and benchmarks.
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000179 if isinstance(o, str):
Christian Heimes90540002008-05-08 14:29:10 +0000180 if self.ensure_ascii:
181 return encode_basestring_ascii(o)
182 else:
183 return encode_basestring(o)
184 # This doesn't pass the iterator directly to ''.join() because the
185 # exceptions aren't as detailed. The list call should be roughly
186 # equivalent to the PySequence_Fast that ''.join() would do.
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000187 chunks = self.iterencode(o, _one_shot=True)
188 if not isinstance(chunks, (list, tuple)):
189 chunks = list(chunks)
Christian Heimes90540002008-05-08 14:29:10 +0000190 return ''.join(chunks)
191
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000192 def iterencode(self, o, _one_shot=False):
193 """Encode the given object and yield each string
194 representation as available.
Christian Heimes90540002008-05-08 14:29:10 +0000195
196 For example::
197
198 for chunk in JSONEncoder().iterencode(bigobject):
199 mysocket.write(chunk)
200
201 """
202 if self.check_circular:
203 markers = {}
204 else:
205 markers = None
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000206 if self.ensure_ascii:
207 _encoder = encode_basestring_ascii
208 else:
209 _encoder = encode_basestring
210
211 def floatstr(o, allow_nan=self.allow_nan,
212 _repr=FLOAT_REPR, _inf=INFINITY, _neginf=-INFINITY):
213 # Check for specials. Note that this type of test is processor
214 # and/or platform-specific, so do tests which don't depend on the
215 # internals.
216
217 if o != o:
218 text = 'NaN'
219 elif o == _inf:
220 text = 'Infinity'
221 elif o == _neginf:
222 text = '-Infinity'
223 else:
224 return _repr(o)
225
226 if not allow_nan:
227 raise ValueError(
228 "Out of range float values are not JSON compliant: " +
229 repr(o))
230
231 return text
232
233
234 if (_one_shot and c_make_encoder is not None
R David Murray3dd02d62011-04-12 21:02:45 -0400235 and self.indent is None):
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000236 _iterencode = c_make_encoder(
237 markers, self.default, _encoder, self.indent,
238 self.key_separator, self.item_separator, self.sort_keys,
239 self.skipkeys, self.allow_nan)
240 else:
241 _iterencode = _make_iterencode(
242 markers, self.default, _encoder, self.indent, floatstr,
243 self.key_separator, self.item_separator, self.sort_keys,
244 self.skipkeys, _one_shot)
245 return _iterencode(o, 0)
246
247def _make_iterencode(markers, _default, _encoder, _indent, _floatstr,
248 _key_separator, _item_separator, _sort_keys, _skipkeys, _one_shot,
249 ## HACK: hand-optimized bytecode; turn globals into locals
250 ValueError=ValueError,
251 dict=dict,
252 float=float,
253 id=id,
254 int=int,
255 isinstance=isinstance,
256 list=list,
257 str=str,
258 tuple=tuple,
259 ):
260
Raymond Hettingerb643ef82010-10-31 08:00:16 +0000261 if _indent is not None and not isinstance(_indent, str):
262 _indent = ' ' * _indent
263
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000264 def _iterencode_list(lst, _current_indent_level):
265 if not lst:
266 yield '[]'
267 return
268 if markers is not None:
269 markerid = id(lst)
270 if markerid in markers:
271 raise ValueError("Circular reference detected")
272 markers[markerid] = lst
273 buf = '['
274 if _indent is not None:
275 _current_indent_level += 1
Raymond Hettingerb643ef82010-10-31 08:00:16 +0000276 newline_indent = '\n' + _indent * _current_indent_level
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000277 separator = _item_separator + newline_indent
278 buf += newline_indent
279 else:
280 newline_indent = None
281 separator = _item_separator
282 first = True
283 for value in lst:
284 if first:
285 first = False
286 else:
287 buf = separator
288 if isinstance(value, str):
289 yield buf + _encoder(value)
290 elif value is None:
291 yield buf + 'null'
292 elif value is True:
293 yield buf + 'true'
294 elif value is False:
295 yield buf + 'false'
296 elif isinstance(value, int):
297 yield buf + str(value)
298 elif isinstance(value, float):
299 yield buf + _floatstr(value)
300 else:
301 yield buf
302 if isinstance(value, (list, tuple)):
303 chunks = _iterencode_list(value, _current_indent_level)
304 elif isinstance(value, dict):
305 chunks = _iterencode_dict(value, _current_indent_level)
306 else:
307 chunks = _iterencode(value, _current_indent_level)
308 for chunk in chunks:
309 yield chunk
310 if newline_indent is not None:
311 _current_indent_level -= 1
Raymond Hettingerb643ef82010-10-31 08:00:16 +0000312 yield '\n' + _indent * _current_indent_level
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000313 yield ']'
314 if markers is not None:
315 del markers[markerid]
316
317 def _iterencode_dict(dct, _current_indent_level):
318 if not dct:
319 yield '{}'
320 return
321 if markers is not None:
322 markerid = id(dct)
323 if markerid in markers:
324 raise ValueError("Circular reference detected")
325 markers[markerid] = dct
326 yield '{'
327 if _indent is not None:
328 _current_indent_level += 1
Raymond Hettingerb643ef82010-10-31 08:00:16 +0000329 newline_indent = '\n' + _indent * _current_indent_level
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000330 item_separator = _item_separator + newline_indent
331 yield newline_indent
332 else:
333 newline_indent = None
334 item_separator = _item_separator
335 first = True
336 if _sort_keys:
337 items = sorted(dct.items(), key=lambda kv: kv[0])
338 else:
339 items = dct.items()
340 for key, value in items:
341 if isinstance(key, str):
342 pass
343 # JavaScript is weakly typed for these, so it makes sense to
344 # also allow them. Many encoders seem to do something like this.
345 elif isinstance(key, float):
346 key = _floatstr(key)
347 elif key is True:
348 key = 'true'
349 elif key is False:
350 key = 'false'
351 elif key is None:
352 key = 'null'
353 elif isinstance(key, int):
354 key = str(key)
355 elif _skipkeys:
356 continue
357 else:
358 raise TypeError("key " + repr(key) + " is not a string")
359 if first:
360 first = False
361 else:
362 yield item_separator
363 yield _encoder(key)
364 yield _key_separator
365 if isinstance(value, str):
366 yield _encoder(value)
367 elif value is None:
368 yield 'null'
369 elif value is True:
370 yield 'true'
371 elif value is False:
372 yield 'false'
373 elif isinstance(value, int):
374 yield str(value)
375 elif isinstance(value, float):
376 yield _floatstr(value)
377 else:
378 if isinstance(value, (list, tuple)):
379 chunks = _iterencode_list(value, _current_indent_level)
380 elif isinstance(value, dict):
381 chunks = _iterencode_dict(value, _current_indent_level)
382 else:
383 chunks = _iterencode(value, _current_indent_level)
384 for chunk in chunks:
385 yield chunk
386 if newline_indent is not None:
387 _current_indent_level -= 1
Raymond Hettingerb643ef82010-10-31 08:00:16 +0000388 yield '\n' + _indent * _current_indent_level
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000389 yield '}'
390 if markers is not None:
391 del markers[markerid]
392
393 def _iterencode(o, _current_indent_level):
394 if isinstance(o, str):
395 yield _encoder(o)
396 elif o is None:
397 yield 'null'
398 elif o is True:
399 yield 'true'
400 elif o is False:
401 yield 'false'
Florent Xicluna02ea12b22010-07-28 16:39:41 +0000402 elif isinstance(o, int):
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000403 yield str(o)
404 elif isinstance(o, float):
405 yield _floatstr(o)
406 elif isinstance(o, (list, tuple)):
407 for chunk in _iterencode_list(o, _current_indent_level):
408 yield chunk
409 elif isinstance(o, dict):
410 for chunk in _iterencode_dict(o, _current_indent_level):
411 yield chunk
412 else:
413 if markers is not None:
414 markerid = id(o)
415 if markerid in markers:
416 raise ValueError("Circular reference detected")
417 markers[markerid] = o
418 o = _default(o)
419 for chunk in _iterencode(o, _current_indent_level):
420 yield chunk
421 if markers is not None:
422 del markers[markerid]
423 return _iterencode