Blame - Lib/json/encoder.py - platform/external/python/cpython3

blob: 4b214eb60e82130cb24d14f097b9c8f4896762cf [file] [log] [blame]

Christian Heimes	9054000	2008-05-08 14:29:10 +0000	[diff] [blame]	1	"""Implementation of JSONEncoder
				2	"""
Christian Heimes	9054000	2008-05-08 14:29:10 +0000	[diff] [blame]	3	import re
				4
				5	try:
				6	from _json import encode_basestring_ascii as c_encode_basestring_ascii
				7	except ImportError:
				8	c_encode_basestring_ascii = None
Benjamin Peterson	c6b607d	2009-05-02 12:36:44 +0000	[diff] [blame]	9	try:
				10	from _json import make_encoder as c_make_encoder
				11	except ImportError:
				12	c_make_encoder = None
Christian Heimes	9054000	2008-05-08 14:29:10 +0000	[diff] [blame]	13
				14	ESCAPE = re.compile(r'[\x00-\x1f\\"\b\f\n\r\t]')
				15	ESCAPE_ASCII = re.compile(r'([\\"]\|[^\ -~])')
Benjamin Peterson	c6b607d	2009-05-02 12:36:44 +0000	[diff] [blame]	16	HAS_UTF8 = re.compile(b'[\x80-\xff]')
Christian Heimes	9054000	2008-05-08 14:29:10 +0000	[diff] [blame]	17	ESCAPE_DCT = {
				18	'\\': '\\\\',
				19	'"': '\\"',
				20	'\b': '\\b',
				21	'\f': '\\f',
				22	'\n': '\\n',
				23	'\r': '\\r',
				24	'\t': '\\t',
				25	}
				26	for i in range(0x20):
				27	ESCAPE_DCT.setdefault(chr(i), '\\u{0:04x}'.format(i))
Benjamin Peterson	c6b607d	2009-05-02 12:36:44 +0000	[diff] [blame]	28	#ESCAPE_DCT.setdefault(chr(i), '\\u%04x' % (i,))
Christian Heimes	9054000	2008-05-08 14:29:10 +0000	[diff] [blame]	29
Benjamin Peterson	c6b607d	2009-05-02 12:36:44 +0000	[diff] [blame]	30	# Assume this produces an infinity on all machines (probably not guaranteed)
				31	INFINITY = float('1e66666')
Christian Heimes	9054000	2008-05-08 14:29:10 +0000	[diff] [blame]	32	FLOAT_REPR = repr
				33
Christian Heimes	9054000	2008-05-08 14:29:10 +0000	[diff] [blame]	34	def encode_basestring(s):
				35	"""Return a JSON representation of a Python string
				36
				37	"""
				38	def replace(match):
				39	return ESCAPE_DCT[match.group(0)]
				40	return '"' + ESCAPE.sub(replace, s) + '"'
				41
				42
				43	def py_encode_basestring_ascii(s):
Benjamin Peterson	c6b607d	2009-05-02 12:36:44 +0000	[diff] [blame]	44	"""Return an ASCII-only JSON representation of a Python string
				45
				46	"""
Christian Heimes	9054000	2008-05-08 14:29:10 +0000	[diff] [blame]	47	def replace(match):
				48	s = match.group(0)
				49	try:
				50	return ESCAPE_DCT[s]
				51	except KeyError:
				52	n = ord(s)
				53	if n < 0x10000:
				54	return '\\u{0:04x}'.format(n)
Benjamin Peterson	c6b607d	2009-05-02 12:36:44 +0000	[diff] [blame]	55	#return '\\u%04x' % (n,)
Christian Heimes	9054000	2008-05-08 14:29:10 +0000	[diff] [blame]	56	else:
				57	# surrogate pair
				58	n -= 0x10000
				59	s1 = 0xd800 \| ((n >> 10) & 0x3ff)
				60	s2 = 0xdc00 \| (n & 0x3ff)
				61	return '\\u{0:04x}\\u{1:04x}'.format(s1, s2)
Benjamin Peterson	c6b607d	2009-05-02 12:36:44 +0000	[diff] [blame]	62	return '"' + ESCAPE_ASCII.sub(replace, s) + '"'
Christian Heimes	9054000	2008-05-08 14:29:10 +0000	[diff] [blame]	63
				64
Benjamin Peterson	c6b607d	2009-05-02 12:36:44 +0000	[diff] [blame]	65	encode_basestring_ascii = (
				66	c_encode_basestring_ascii or py_encode_basestring_ascii)
Christian Heimes	9054000	2008-05-08 14:29:10 +0000	[diff] [blame]	67
				68	class JSONEncoder(object):
				69	"""Extensible JSON <http://json.org> encoder for Python data structures.
				70
				71	Supports the following objects and types by default:
				72
				73	+-------------------+---------------+
				74	\| Python \| JSON \|
				75	+===================+===============+
				76	\| dict \| object \|
				77	+-------------------+---------------+
				78	\| list, tuple \| array \|
				79	+-------------------+---------------+
Georg Brandl	c8284cf	2010-08-02 20:16:18 +0000	[diff] [blame]	80	\| str \| string \|
Christian Heimes	9054000	2008-05-08 14:29:10 +0000	[diff] [blame]	81	+-------------------+---------------+
Georg Brandl	c8284cf	2010-08-02 20:16:18 +0000	[diff] [blame]	82	\| int, float \| number \|
Christian Heimes	9054000	2008-05-08 14:29:10 +0000	[diff] [blame]	83	+-------------------+---------------+
				84	\| True \| true \|
				85	+-------------------+---------------+
				86	\| False \| false \|
				87	+-------------------+---------------+
				88	\| None \| null \|
				89	+-------------------+---------------+
				90
				91	To extend this to recognize other objects, subclass and implement a
				92	``.default()`` method with another method that returns a serializable
				93	object for ``o`` if possible, otherwise it should call the superclass
				94	implementation (to raise ``TypeError``).
				95
				96	"""
Christian Heimes	9054000	2008-05-08 14:29:10 +0000	[diff] [blame]	97	item_separator = ', '
				98	key_separator = ': '
				99	def __init__(self, skipkeys=False, ensure_ascii=True,
				100	check_circular=True, allow_nan=True, sort_keys=False,
Benjamin Peterson	c6b607d	2009-05-02 12:36:44 +0000	[diff] [blame]	101	indent=None, separators=None, default=None):
Christian Heimes	9054000	2008-05-08 14:29:10 +0000	[diff] [blame]	102	"""Constructor for JSONEncoder, with sensible defaults.
				103
Benjamin Peterson	c6b607d	2009-05-02 12:36:44 +0000	[diff] [blame]	104	If skipkeys is false, then it is a TypeError to attempt
Georg Brandl	c8284cf	2010-08-02 20:16:18 +0000	[diff] [blame]	105	encoding of keys that are not str, int, float or None. If
Christian Heimes	9054000	2008-05-08 14:29:10 +0000	[diff] [blame]	106	skipkeys is True, such items are simply skipped.
				107
Benjamin Peterson	c6b607d	2009-05-02 12:36:44 +0000	[diff] [blame]	108	If ensure_ascii is true, the output is guaranteed to be str
Georg Brandl	c8284cf	2010-08-02 20:16:18 +0000	[diff] [blame]	109	objects with all incoming non-ASCII characters escaped. If
				110	ensure_ascii is false, the output can contain non-ASCII characters.
Christian Heimes	9054000	2008-05-08 14:29:10 +0000	[diff] [blame]	111
Benjamin Peterson	c6b607d	2009-05-02 12:36:44 +0000	[diff] [blame]	112	If check_circular is true, then lists, dicts, and custom encoded
Christian Heimes	9054000	2008-05-08 14:29:10 +0000	[diff] [blame]	113	objects will be checked for circular references during encoding to
				114	prevent an infinite recursion (which would cause an OverflowError).
				115	Otherwise, no such check takes place.
				116
Benjamin Peterson	c6b607d	2009-05-02 12:36:44 +0000	[diff] [blame]	117	If allow_nan is true, then NaN, Infinity, and -Infinity will be
Christian Heimes	9054000	2008-05-08 14:29:10 +0000	[diff] [blame]	118	encoded as such. This behavior is not JSON specification compliant,
				119	but is consistent with most JavaScript based encoders and decoders.
				120	Otherwise, it will be a ValueError to encode such floats.
				121
Benjamin Peterson	c6b607d	2009-05-02 12:36:44 +0000	[diff] [blame]	122	If sort_keys is true, then the output of dictionaries will be
Christian Heimes	9054000	2008-05-08 14:29:10 +0000	[diff] [blame]	123	sorted by key; this is useful for regression tests to ensure
				124	that JSON serializations can be compared on a day-to-day basis.
				125
				126	If indent is a non-negative integer, then JSON array
				127	elements and object members will be pretty-printed with that
				128	indent level. An indent level of 0 will only insert newlines.
				129	None is the most compact representation.
				130
				131	If specified, separators should be a (item_separator, key_separator)
				132	tuple. The default is (', ', ': '). To get the most compact JSON
				133	representation you should specify (',', ':') to eliminate whitespace.
				134
				135	If specified, default is a function that gets called for objects
				136	that can't otherwise be serialized. It should return a JSON encodable
				137	version of the object or raise a ``TypeError``.
				138
Christian Heimes	9054000	2008-05-08 14:29:10 +0000	[diff] [blame]	139	"""
Benjamin Peterson	c6b607d	2009-05-02 12:36:44 +0000	[diff] [blame]	140
Christian Heimes	9054000	2008-05-08 14:29:10 +0000	[diff] [blame]	141	self.skipkeys = skipkeys
				142	self.ensure_ascii = ensure_ascii
				143	self.check_circular = check_circular
				144	self.allow_nan = allow_nan
				145	self.sort_keys = sort_keys
				146	self.indent = indent
Christian Heimes	9054000	2008-05-08 14:29:10 +0000	[diff] [blame]	147	if separators is not None:
				148	self.item_separator, self.key_separator = separators
				149	if default is not None:
				150	self.default = default
Christian Heimes	9054000	2008-05-08 14:29:10 +0000	[diff] [blame]	151
				152	def default(self, o):
Benjamin Peterson	c6b607d	2009-05-02 12:36:44 +0000	[diff] [blame]	153	"""Implement this method in a subclass such that it returns
				154	a serializable object for ``o``, or calls the base implementation
				155	(to raise a ``TypeError``).
Christian Heimes	9054000	2008-05-08 14:29:10 +0000	[diff] [blame]	156
Benjamin Peterson	c6b607d	2009-05-02 12:36:44 +0000	[diff] [blame]	157	For example, to support arbitrary iterators, you could
				158	implement default like this::
Christian Heimes	9054000	2008-05-08 14:29:10 +0000	[diff] [blame]	159
				160	def default(self, o):
				161	try:
				162	iterable = iter(o)
				163	except TypeError:
				164	pass
				165	else:
				166	return list(iterable)
				167	return JSONEncoder.default(self, o)
				168
				169	"""
				170	raise TypeError(repr(o) + " is not JSON serializable")
				171
				172	def encode(self, o):
				173	"""Return a JSON string representation of a Python data structure.
				174
				175	>>> JSONEncoder().encode({"foo": ["bar", "baz"]})
				176	'{"foo": ["bar", "baz"]}'
				177
				178	"""
				179	# This is for extremely simple cases and benchmarks.
Benjamin Peterson	c6b607d	2009-05-02 12:36:44 +0000	[diff] [blame]	180	if isinstance(o, str):
Christian Heimes	9054000	2008-05-08 14:29:10 +0000	[diff] [blame]	181	if self.ensure_ascii:
				182	return encode_basestring_ascii(o)
				183	else:
				184	return encode_basestring(o)
				185	# This doesn't pass the iterator directly to ''.join() because the
				186	# exceptions aren't as detailed. The list call should be roughly
				187	# equivalent to the PySequence_Fast that ''.join() would do.
Benjamin Peterson	c6b607d	2009-05-02 12:36:44 +0000	[diff] [blame]	188	chunks = self.iterencode(o, _one_shot=True)
				189	if not isinstance(chunks, (list, tuple)):
				190	chunks = list(chunks)
Christian Heimes	9054000	2008-05-08 14:29:10 +0000	[diff] [blame]	191	return ''.join(chunks)
				192
Benjamin Peterson	c6b607d	2009-05-02 12:36:44 +0000	[diff] [blame]	193	def iterencode(self, o, _one_shot=False):
				194	"""Encode the given object and yield each string
				195	representation as available.
Christian Heimes	9054000	2008-05-08 14:29:10 +0000	[diff] [blame]	196
				197	For example::
				198
				199	for chunk in JSONEncoder().iterencode(bigobject):
				200	mysocket.write(chunk)
				201
				202	"""
				203	if self.check_circular:
				204	markers = {}
				205	else:
				206	markers = None
Benjamin Peterson	c6b607d	2009-05-02 12:36:44 +0000	[diff] [blame]	207	if self.ensure_ascii:
				208	_encoder = encode_basestring_ascii
				209	else:
				210	_encoder = encode_basestring
				211
				212	def floatstr(o, allow_nan=self.allow_nan,
				213	_repr=FLOAT_REPR, _inf=INFINITY, _neginf=-INFINITY):
				214	# Check for specials. Note that this type of test is processor
				215	# and/or platform-specific, so do tests which don't depend on the
				216	# internals.
				217
				218	if o != o:
				219	text = 'NaN'
				220	elif o == _inf:
				221	text = 'Infinity'
				222	elif o == _neginf:
				223	text = '-Infinity'
				224	else:
				225	return _repr(o)
				226
				227	if not allow_nan:
				228	raise ValueError(
				229	"Out of range float values are not JSON compliant: " +
				230	repr(o))
				231
				232	return text
				233
				234
				235	if (_one_shot and c_make_encoder is not None
R David Murray	3dd02d6	2011-04-12 21:02:45 -0400	[diff] [blame]	236	and self.indent is None):
Benjamin Peterson	c6b607d	2009-05-02 12:36:44 +0000	[diff] [blame]	237	_iterencode = c_make_encoder(
				238	markers, self.default, _encoder, self.indent,
				239	self.key_separator, self.item_separator, self.sort_keys,
				240	self.skipkeys, self.allow_nan)
				241	else:
				242	_iterencode = _make_iterencode(
				243	markers, self.default, _encoder, self.indent, floatstr,
				244	self.key_separator, self.item_separator, self.sort_keys,
				245	self.skipkeys, _one_shot)
				246	return _iterencode(o, 0)
				247
				248	def _make_iterencode(markers, _default, _encoder, _indent, _floatstr,
				249	_key_separator, _item_separator, _sort_keys, _skipkeys, _one_shot,
				250	## HACK: hand-optimized bytecode; turn globals into locals
				251	ValueError=ValueError,
				252	dict=dict,
				253	float=float,
				254	id=id,
				255	int=int,
				256	isinstance=isinstance,
				257	list=list,
				258	str=str,
				259	tuple=tuple,
				260	):
				261
Raymond Hettinger	b643ef8	2010-10-31 08:00:16 +0000	[diff] [blame]	262	if _indent is not None and not isinstance(_indent, str):
				263	_indent = ' ' * _indent
				264
Benjamin Peterson	c6b607d	2009-05-02 12:36:44 +0000	[diff] [blame]	265	def _iterencode_list(lst, _current_indent_level):
				266	if not lst:
				267	yield '[]'
				268	return
				269	if markers is not None:
				270	markerid = id(lst)
				271	if markerid in markers:
				272	raise ValueError("Circular reference detected")
				273	markers[markerid] = lst
				274	buf = '['
				275	if _indent is not None:
				276	_current_indent_level += 1
Raymond Hettinger	b643ef8	2010-10-31 08:00:16 +0000	[diff] [blame]	277	newline_indent = '\n' + _indent * _current_indent_level
Benjamin Peterson	c6b607d	2009-05-02 12:36:44 +0000	[diff] [blame]	278	separator = _item_separator + newline_indent
				279	buf += newline_indent
				280	else:
				281	newline_indent = None
				282	separator = _item_separator
				283	first = True
				284	for value in lst:
				285	if first:
				286	first = False
				287	else:
				288	buf = separator
				289	if isinstance(value, str):
				290	yield buf + _encoder(value)
				291	elif value is None:
				292	yield buf + 'null'
				293	elif value is True:
				294	yield buf + 'true'
				295	elif value is False:
				296	yield buf + 'false'
				297	elif isinstance(value, int):
				298	yield buf + str(value)
				299	elif isinstance(value, float):
				300	yield buf + _floatstr(value)
				301	else:
				302	yield buf
				303	if isinstance(value, (list, tuple)):
				304	chunks = _iterencode_list(value, _current_indent_level)
				305	elif isinstance(value, dict):
				306	chunks = _iterencode_dict(value, _current_indent_level)
				307	else:
				308	chunks = _iterencode(value, _current_indent_level)
				309	for chunk in chunks:
				310	yield chunk
				311	if newline_indent is not None:
				312	_current_indent_level -= 1
Raymond Hettinger	b643ef8	2010-10-31 08:00:16 +0000	[diff] [blame]	313	yield '\n' + _indent * _current_indent_level
Benjamin Peterson	c6b607d	2009-05-02 12:36:44 +0000	[diff] [blame]	314	yield ']'
				315	if markers is not None:
				316	del markers[markerid]
				317
				318	def _iterencode_dict(dct, _current_indent_level):
				319	if not dct:
				320	yield '{}'
				321	return
				322	if markers is not None:
				323	markerid = id(dct)
				324	if markerid in markers:
				325	raise ValueError("Circular reference detected")
				326	markers[markerid] = dct
				327	yield '{'
				328	if _indent is not None:
				329	_current_indent_level += 1
Raymond Hettinger	b643ef8	2010-10-31 08:00:16 +0000	[diff] [blame]	330	newline_indent = '\n' + _indent * _current_indent_level
Benjamin Peterson	c6b607d	2009-05-02 12:36:44 +0000	[diff] [blame]	331	item_separator = _item_separator + newline_indent
				332	yield newline_indent
				333	else:
				334	newline_indent = None
				335	item_separator = _item_separator
				336	first = True
				337	if _sort_keys:
				338	items = sorted(dct.items(), key=lambda kv: kv[0])
				339	else:
				340	items = dct.items()
				341	for key, value in items:
				342	if isinstance(key, str):
				343	pass
				344	# JavaScript is weakly typed for these, so it makes sense to
				345	# also allow them. Many encoders seem to do something like this.
				346	elif isinstance(key, float):
				347	key = _floatstr(key)
				348	elif key is True:
				349	key = 'true'
				350	elif key is False:
				351	key = 'false'
				352	elif key is None:
				353	key = 'null'
				354	elif isinstance(key, int):
				355	key = str(key)
				356	elif _skipkeys:
				357	continue
				358	else:
				359	raise TypeError("key " + repr(key) + " is not a string")
				360	if first:
				361	first = False
				362	else:
				363	yield item_separator
				364	yield _encoder(key)
				365	yield _key_separator
				366	if isinstance(value, str):
				367	yield _encoder(value)
				368	elif value is None:
				369	yield 'null'
				370	elif value is True:
				371	yield 'true'
				372	elif value is False:
				373	yield 'false'
				374	elif isinstance(value, int):
				375	yield str(value)
				376	elif isinstance(value, float):
				377	yield _floatstr(value)
				378	else:
				379	if isinstance(value, (list, tuple)):
				380	chunks = _iterencode_list(value, _current_indent_level)
				381	elif isinstance(value, dict):
				382	chunks = _iterencode_dict(value, _current_indent_level)
				383	else:
				384	chunks = _iterencode(value, _current_indent_level)
				385	for chunk in chunks:
				386	yield chunk
				387	if newline_indent is not None:
				388	_current_indent_level -= 1
Raymond Hettinger	b643ef8	2010-10-31 08:00:16 +0000	[diff] [blame]	389	yield '\n' + _indent * _current_indent_level
Benjamin Peterson	c6b607d	2009-05-02 12:36:44 +0000	[diff] [blame]	390	yield '}'
				391	if markers is not None:
				392	del markers[markerid]
				393
				394	def _iterencode(o, _current_indent_level):
				395	if isinstance(o, str):
				396	yield _encoder(o)
				397	elif o is None:
				398	yield 'null'
				399	elif o is True:
				400	yield 'true'
				401	elif o is False:
				402	yield 'false'
Florent Xicluna	02ea12b2	2010-07-28 16:39:41 +0000	[diff] [blame]	403	elif isinstance(o, int):
Benjamin Peterson	c6b607d	2009-05-02 12:36:44 +0000	[diff] [blame]	404	yield str(o)
				405	elif isinstance(o, float):
				406	yield _floatstr(o)
				407	elif isinstance(o, (list, tuple)):
				408	for chunk in _iterencode_list(o, _current_indent_level):
				409	yield chunk
				410	elif isinstance(o, dict):
				411	for chunk in _iterencode_dict(o, _current_indent_level):
				412	yield chunk
				413	else:
				414	if markers is not None:
				415	markerid = id(o)
				416	if markerid in markers:
				417	raise ValueError("Circular reference detected")
				418	markers[markerid] = o
				419	o = _default(o)
				420	for chunk in _iterencode(o, _current_indent_level):
				421	yield chunk
				422	if markers is not None:
				423	del markers[markerid]
				424	return _iterencode