blob: 4cb366016f827319d84909f971fa6e1beea56243 [file] [log] [blame]
Christian Heimes90540002008-05-08 14:29:10 +00001"""Implementation of JSONEncoder
2"""
3
4import re
5
6try:
7 from _json import encode_basestring_ascii as c_encode_basestring_ascii
8except ImportError:
9 c_encode_basestring_ascii = None
10
11__all__ = ['JSONEncoder']
12
13ESCAPE = re.compile(r'[\x00-\x1f\\"\b\f\n\r\t]')
14ESCAPE_ASCII = re.compile(r'([\\"]|[^\ -~])')
15HAS_UTF8 = re.compile(r'[\x80-\xff]')
16ESCAPE_DCT = {
17 '\\': '\\\\',
18 '"': '\\"',
19 '\b': '\\b',
20 '\f': '\\f',
21 '\n': '\\n',
22 '\r': '\\r',
23 '\t': '\\t',
24}
25for i in range(0x20):
26 ESCAPE_DCT.setdefault(chr(i), '\\u{0:04x}'.format(i))
27
28# Assume this produces an infinity on all machines (probably not guaranteed)
29INFINITY = float('1e66666')
30FLOAT_REPR = repr
31
32def floatstr(o, allow_nan=True):
33 # Check for specials. Note that this type of test is processor- and/or
34 # platform-specific, so do tests which don't depend on the internals.
35
36 if o != o:
37 text = 'NaN'
38 elif o == INFINITY:
39 text = 'Infinity'
40 elif o == -INFINITY:
41 text = '-Infinity'
42 else:
43 return FLOAT_REPR(o)
44
45 if not allow_nan:
46 msg = "Out of range float values are not JSON compliant: " + repr(o)
47 raise ValueError(msg)
48
49 return text
50
51
52def encode_basestring(s):
53 """Return a JSON representation of a Python string
54
55 """
56 def replace(match):
57 return ESCAPE_DCT[match.group(0)]
58 return '"' + ESCAPE.sub(replace, s) + '"'
59
60
61def py_encode_basestring_ascii(s):
62 if isinstance(s, bytes): # and HAS_UTF8.search(s) is not None:
63 s = s.decode('utf-8')
64 def replace(match):
65 s = match.group(0)
66 try:
67 return ESCAPE_DCT[s]
68 except KeyError:
69 n = ord(s)
70 if n < 0x10000:
71 return '\\u{0:04x}'.format(n)
72 else:
73 # surrogate pair
74 n -= 0x10000
75 s1 = 0xd800 | ((n >> 10) & 0x3ff)
76 s2 = 0xdc00 | (n & 0x3ff)
77 return '\\u{0:04x}\\u{1:04x}'.format(s1, s2)
78 return '"' + (ESCAPE_ASCII.sub(replace, s)) + '"'
79
80
81if c_encode_basestring_ascii is not None:
82 encode_basestring_ascii = c_encode_basestring_ascii
83else:
84 encode_basestring_ascii = py_encode_basestring_ascii
85
86
87class JSONEncoder(object):
88 """Extensible JSON <http://json.org> encoder for Python data structures.
89
90 Supports the following objects and types by default:
91
92 +-------------------+---------------+
93 | Python | JSON |
94 +===================+===============+
95 | dict | object |
96 +-------------------+---------------+
97 | list, tuple | array |
98 +-------------------+---------------+
99 | str, unicode | string |
100 +-------------------+---------------+
101 | int, long, float | number |
102 +-------------------+---------------+
103 | True | true |
104 +-------------------+---------------+
105 | False | false |
106 +-------------------+---------------+
107 | None | null |
108 +-------------------+---------------+
109
110 To extend this to recognize other objects, subclass and implement a
111 ``.default()`` method with another method that returns a serializable
112 object for ``o`` if possible, otherwise it should call the superclass
113 implementation (to raise ``TypeError``).
114
115 """
116 __all__ = ['__init__', 'default', 'encode', 'iterencode']
117 item_separator = ', '
118 key_separator = ': '
119 def __init__(self, skipkeys=False, ensure_ascii=True,
120 check_circular=True, allow_nan=True, sort_keys=False,
121 indent=None, separators=None, encoding='utf-8', default=None):
122 """Constructor for JSONEncoder, with sensible defaults.
123
124 If skipkeys is False, then it is a TypeError to attempt
125 encoding of keys that are not str, int, long, float or None. If
126 skipkeys is True, such items are simply skipped.
127
128 If ensure_ascii is True, the output is guaranteed to be str
129 objects with all incoming unicode characters escaped. If
130 ensure_ascii is false, the output will be unicode object.
131
132 If check_circular is True, then lists, dicts, and custom encoded
133 objects will be checked for circular references during encoding to
134 prevent an infinite recursion (which would cause an OverflowError).
135 Otherwise, no such check takes place.
136
137 If allow_nan is True, then NaN, Infinity, and -Infinity will be
138 encoded as such. This behavior is not JSON specification compliant,
139 but is consistent with most JavaScript based encoders and decoders.
140 Otherwise, it will be a ValueError to encode such floats.
141
142 If sort_keys is True, then the output of dictionaries will be
143 sorted by key; this is useful for regression tests to ensure
144 that JSON serializations can be compared on a day-to-day basis.
145
146 If indent is a non-negative integer, then JSON array
147 elements and object members will be pretty-printed with that
148 indent level. An indent level of 0 will only insert newlines.
149 None is the most compact representation.
150
151 If specified, separators should be a (item_separator, key_separator)
152 tuple. The default is (', ', ': '). To get the most compact JSON
153 representation you should specify (',', ':') to eliminate whitespace.
154
155 If specified, default is a function that gets called for objects
156 that can't otherwise be serialized. It should return a JSON encodable
157 version of the object or raise a ``TypeError``.
158
159 If encoding is not None, then all input strings will be
160 transformed into unicode using that encoding prior to JSON-encoding.
161 The default is UTF-8.
162
163 """
164 self.skipkeys = skipkeys
165 self.ensure_ascii = ensure_ascii
166 self.check_circular = check_circular
167 self.allow_nan = allow_nan
168 self.sort_keys = sort_keys
169 self.indent = indent
170 self.current_indent_level = 0
171 if separators is not None:
172 self.item_separator, self.key_separator = separators
173 if default is not None:
174 self.default = default
175 self.encoding = encoding
176
177 def _newline_indent(self):
178 return '\n' + (' ' * (self.indent * self.current_indent_level))
179
180 def _iterencode_list(self, lst, markers=None):
181 if not lst:
182 yield '[]'
183 return
184 if markers is not None:
185 markerid = id(lst)
186 if markerid in markers:
187 raise ValueError("Circular reference detected")
188 markers[markerid] = lst
189 yield '['
190 if self.indent is not None:
191 self.current_indent_level += 1
192 newline_indent = self._newline_indent()
193 separator = self.item_separator + newline_indent
194 yield newline_indent
195 else:
196 newline_indent = None
197 separator = self.item_separator
198 first = True
199 for value in lst:
200 if first:
201 first = False
202 else:
203 yield separator
204 for chunk in self._iterencode(value, markers):
205 yield chunk
206 if newline_indent is not None:
207 self.current_indent_level -= 1
208 yield self._newline_indent()
209 yield ']'
210 if markers is not None:
211 del markers[markerid]
212
213 def _iterencode_dict(self, dct, markers=None):
214 if not dct:
215 yield '{}'
216 return
217 if markers is not None:
218 markerid = id(dct)
219 if markerid in markers:
220 raise ValueError("Circular reference detected")
221 markers[markerid] = dct
222 yield '{'
223 key_separator = self.key_separator
224 if self.indent is not None:
225 self.current_indent_level += 1
226 newline_indent = self._newline_indent()
227 item_separator = self.item_separator + newline_indent
228 yield newline_indent
229 else:
230 newline_indent = None
231 item_separator = self.item_separator
232 first = True
233 if self.ensure_ascii:
234 encoder = encode_basestring_ascii
235 else:
236 encoder = encode_basestring
237 allow_nan = self.allow_nan
238 if self.sort_keys:
239 keys = list(dct.keys())
240 keys.sort()
241 items = [(k, dct[k]) for k in keys]
242 else:
243 items = iter(dct.items())
244 _encoding = self.encoding
245 _do_decode = (_encoding is not None
246 and not (_encoding == 'utf-8'))
247 for key, value in items:
248 if isinstance(key, str):
249 if _do_decode:
250 key = key.decode(_encoding)
251 elif isinstance(key, str):
252 pass
253 # JavaScript is weakly typed for these, so it makes sense to
254 # also allow them. Many encoders seem to do something like this.
255 elif isinstance(key, float):
256 key = floatstr(key, allow_nan)
257 elif isinstance(key, (int, int)):
258 key = str(key)
259 elif key is True:
260 key = 'true'
261 elif key is False:
262 key = 'false'
263 elif key is None:
264 key = 'null'
265 elif self.skipkeys:
266 continue
267 else:
268 raise TypeError("key {0!r} is not a string".format(key))
269 if first:
270 first = False
271 else:
272 yield item_separator
273 yield encoder(key)
274 yield key_separator
275 for chunk in self._iterencode(value, markers):
276 yield chunk
277 if newline_indent is not None:
278 self.current_indent_level -= 1
279 yield self._newline_indent()
280 yield '}'
281 if markers is not None:
282 del markers[markerid]
283
284 def _iterencode(self, o, markers=None):
285 if isinstance(o, str):
286 if self.ensure_ascii:
287 encoder = encode_basestring_ascii
288 else:
289 encoder = encode_basestring
290 _encoding = self.encoding
291 if (_encoding is not None and isinstance(o, str)
292 and not (_encoding == 'utf-8')):
293 o = o.decode(_encoding)
294 yield encoder(o)
295 elif o is None:
296 yield 'null'
297 elif o is True:
298 yield 'true'
299 elif o is False:
300 yield 'false'
301 elif isinstance(o, (int, int)):
302 yield str(o)
303 elif isinstance(o, float):
304 yield floatstr(o, self.allow_nan)
305 elif isinstance(o, (list, tuple)):
306 for chunk in self._iterencode_list(o, markers):
307 yield chunk
308 elif isinstance(o, dict):
309 for chunk in self._iterencode_dict(o, markers):
310 yield chunk
311 else:
312 if markers is not None:
313 markerid = id(o)
314 if markerid in markers:
315 raise ValueError("Circular reference detected")
316 markers[markerid] = o
317 for chunk in self._iterencode_default(o, markers):
318 yield chunk
319 if markers is not None:
320 del markers[markerid]
321
322 def _iterencode_default(self, o, markers=None):
323 newobj = self.default(o)
324 return self._iterencode(newobj, markers)
325
326 def default(self, o):
327 """Implement this method in a subclass such that it returns a serializable
328 object for ``o``, or calls the base implementation (to raise a
329 ``TypeError``).
330
331 For example, to support arbitrary iterators, you could implement
332 default like this::
333
334 def default(self, o):
335 try:
336 iterable = iter(o)
337 except TypeError:
338 pass
339 else:
340 return list(iterable)
341 return JSONEncoder.default(self, o)
342
343 """
344 raise TypeError(repr(o) + " is not JSON serializable")
345
346 def encode(self, o):
347 """Return a JSON string representation of a Python data structure.
348
349 >>> JSONEncoder().encode({"foo": ["bar", "baz"]})
350 '{"foo": ["bar", "baz"]}'
351
352 """
353 # This is for extremely simple cases and benchmarks.
354 if isinstance(o, (str, bytes)):
355 if isinstance(o, bytes):
356 _encoding = self.encoding
357 if (_encoding is not None
358 and not (_encoding == 'utf-8')):
359 o = o.decode(_encoding)
360 if self.ensure_ascii:
361 return encode_basestring_ascii(o)
362 else:
363 return encode_basestring(o)
364 # This doesn't pass the iterator directly to ''.join() because the
365 # exceptions aren't as detailed. The list call should be roughly
366 # equivalent to the PySequence_Fast that ''.join() would do.
367 chunks = list(self.iterencode(o))
368 return ''.join(chunks)
369
370 def iterencode(self, o):
371 """Encode the given object and yield each string representation as
372 available.
373
374 For example::
375
376 for chunk in JSONEncoder().iterencode(bigobject):
377 mysocket.write(chunk)
378
379 """
380 if self.check_circular:
381 markers = {}
382 else:
383 markers = None
384 return self._iterencode(o, markers)