Blame - Lib/json/encoder.py - platform/external/python/cpython2

blob: b0d745b5dc059acac5bd6a0af5b78353855e93eb [file] [log] [blame]

Brett Cannon	4b964f9	2008-05-05 20:21:38 +0000	[diff] [blame]	1	"""Implementation of JSONEncoder
				2	"""
Brett Cannon	4b964f9	2008-05-05 20:21:38 +0000	[diff] [blame]	3	import re
				4
				5	try:
				6	from _json import encode_basestring_ascii as c_encode_basestring_ascii
				7	except ImportError:
				8	c_encode_basestring_ascii = None
Bob Ippolito	d914e3f	2009-03-17 23:19:00 +0000	[diff] [blame]	9	try:
				10	from _json import make_encoder as c_make_encoder
				11	except ImportError:
				12	c_make_encoder = None
Brett Cannon	4b964f9	2008-05-05 20:21:38 +0000	[diff] [blame]	13
				14	ESCAPE = re.compile(r'[\x00-\x1f\\"\b\f\n\r\t]')
				15	ESCAPE_ASCII = re.compile(r'([\\"]\|[^\ -~])')
				16	HAS_UTF8 = re.compile(r'[\x80-\xff]')
				17	ESCAPE_DCT = {
				18	'\\': '\\\\',
				19	'"': '\\"',
				20	'\b': '\\b',
				21	'\f': '\\f',
				22	'\n': '\\n',
				23	'\r': '\\r',
				24	'\t': '\\t',
				25	}
				26	for i in range(0x20):
				27	ESCAPE_DCT.setdefault(chr(i), '\\u{0:04x}'.format(i))
Bob Ippolito	d914e3f	2009-03-17 23:19:00 +0000	[diff] [blame]	28	#ESCAPE_DCT.setdefault(chr(i), '\\u%04x' % (i,))
Brett Cannon	4b964f9	2008-05-05 20:21:38 +0000	[diff] [blame]	29
Ezio Melotti	ed8cf7a	2012-05-21 17:46:55 -0600	[diff] [blame]	30	INFINITY = float('inf')
Brett Cannon	4b964f9	2008-05-05 20:21:38 +0000	[diff] [blame]	31	FLOAT_REPR = repr
				32
Brett Cannon	4b964f9	2008-05-05 20:21:38 +0000	[diff] [blame]	33	def encode_basestring(s):
				34	"""Return a JSON representation of a Python string
				35
				36	"""
				37	def replace(match):
				38	return ESCAPE_DCT[match.group(0)]
				39	return '"' + ESCAPE.sub(replace, s) + '"'
				40
				41
				42	def py_encode_basestring_ascii(s):
Bob Ippolito	d914e3f	2009-03-17 23:19:00 +0000	[diff] [blame]	43	"""Return an ASCII-only JSON representation of a Python string
				44
				45	"""
Brett Cannon	4b964f9	2008-05-05 20:21:38 +0000	[diff] [blame]	46	if isinstance(s, str) and HAS_UTF8.search(s) is not None:
				47	s = s.decode('utf-8')
				48	def replace(match):
				49	s = match.group(0)
				50	try:
				51	return ESCAPE_DCT[s]
				52	except KeyError:
				53	n = ord(s)
				54	if n < 0x10000:
				55	return '\\u{0:04x}'.format(n)
Bob Ippolito	d914e3f	2009-03-17 23:19:00 +0000	[diff] [blame]	56	#return '\\u%04x' % (n,)
Brett Cannon	4b964f9	2008-05-05 20:21:38 +0000	[diff] [blame]	57	else:
				58	# surrogate pair
				59	n -= 0x10000
				60	s1 = 0xd800 \| ((n >> 10) & 0x3ff)
				61	s2 = 0xdc00 \| (n & 0x3ff)
				62	return '\\u{0:04x}\\u{1:04x}'.format(s1, s2)
Bob Ippolito	d914e3f	2009-03-17 23:19:00 +0000	[diff] [blame]	63	#return '\\u%04x\\u%04x' % (s1, s2)
Brett Cannon	4b964f9	2008-05-05 20:21:38 +0000	[diff] [blame]	64	return '"' + str(ESCAPE_ASCII.sub(replace, s)) + '"'
				65
				66
Bob Ippolito	d914e3f	2009-03-17 23:19:00 +0000	[diff] [blame]	67	encode_basestring_ascii = (
				68	c_encode_basestring_ascii or py_encode_basestring_ascii)
Brett Cannon	4b964f9	2008-05-05 20:21:38 +0000	[diff] [blame]	69
				70	class JSONEncoder(object):
				71	"""Extensible JSON <http://json.org> encoder for Python data structures.
				72
				73	Supports the following objects and types by default:
				74
				75	+-------------------+---------------+
				76	\| Python \| JSON \|
				77	+===================+===============+
				78	\| dict \| object \|
				79	+-------------------+---------------+
				80	\| list, tuple \| array \|
				81	+-------------------+---------------+
				82	\| str, unicode \| string \|
				83	+-------------------+---------------+
				84	\| int, long, float \| number \|
				85	+-------------------+---------------+
				86	\| True \| true \|
				87	+-------------------+---------------+
				88	\| False \| false \|
				89	+-------------------+---------------+
				90	\| None \| null \|
				91	+-------------------+---------------+
				92
				93	To extend this to recognize other objects, subclass and implement a
				94	``.default()`` method with another method that returns a serializable
				95	object for ``o`` if possible, otherwise it should call the superclass
				96	implementation (to raise ``TypeError``).
				97
				98	"""
Brett Cannon	4b964f9	2008-05-05 20:21:38 +0000	[diff] [blame]	99	item_separator = ', '
				100	key_separator = ': '
				101	def __init__(self, skipkeys=False, ensure_ascii=True,
				102	check_circular=True, allow_nan=True, sort_keys=False,
				103	indent=None, separators=None, encoding='utf-8', default=None):
				104	"""Constructor for JSONEncoder, with sensible defaults.
				105
Bob Ippolito	d914e3f	2009-03-17 23:19:00 +0000	[diff] [blame]	106	If skipkeys is false, then it is a TypeError to attempt
Brett Cannon	4b964f9	2008-05-05 20:21:38 +0000	[diff] [blame]	107	encoding of keys that are not str, int, long, float or None. If
				108	skipkeys is True, such items are simply skipped.
				109
Bob Ippolito	d914e3f	2009-03-17 23:19:00 +0000	[diff] [blame]	110	If ensure_ascii is true, the output is guaranteed to be str
Brett Cannon	4b964f9	2008-05-05 20:21:38 +0000	[diff] [blame]	111	objects with all incoming unicode characters escaped. If
				112	ensure_ascii is false, the output will be unicode object.
				113
Bob Ippolito	d914e3f	2009-03-17 23:19:00 +0000	[diff] [blame]	114	If check_circular is true, then lists, dicts, and custom encoded
Brett Cannon	4b964f9	2008-05-05 20:21:38 +0000	[diff] [blame]	115	objects will be checked for circular references during encoding to
				116	prevent an infinite recursion (which would cause an OverflowError).
				117	Otherwise, no such check takes place.
				118
Bob Ippolito	d914e3f	2009-03-17 23:19:00 +0000	[diff] [blame]	119	If allow_nan is true, then NaN, Infinity, and -Infinity will be
Brett Cannon	4b964f9	2008-05-05 20:21:38 +0000	[diff] [blame]	120	encoded as such. This behavior is not JSON specification compliant,
				121	but is consistent with most JavaScript based encoders and decoders.
				122	Otherwise, it will be a ValueError to encode such floats.
				123
Bob Ippolito	d914e3f	2009-03-17 23:19:00 +0000	[diff] [blame]	124	If sort_keys is true, then the output of dictionaries will be
Brett Cannon	4b964f9	2008-05-05 20:21:38 +0000	[diff] [blame]	125	sorted by key; this is useful for regression tests to ensure
				126	that JSON serializations can be compared on a day-to-day basis.
				127
				128	If indent is a non-negative integer, then JSON array
				129	elements and object members will be pretty-printed with that
				130	indent level. An indent level of 0 will only insert newlines.
				131	None is the most compact representation.
				132
				133	If specified, separators should be a (item_separator, key_separator)
				134	tuple. The default is (', ', ': '). To get the most compact JSON
				135	representation you should specify (',', ':') to eliminate whitespace.
				136
				137	If specified, default is a function that gets called for objects
				138	that can't otherwise be serialized. It should return a JSON encodable
				139	version of the object or raise a ``TypeError``.
				140
				141	If encoding is not None, then all input strings will be
				142	transformed into unicode using that encoding prior to JSON-encoding.
				143	The default is UTF-8.
				144
				145	"""
Bob Ippolito	d914e3f	2009-03-17 23:19:00 +0000	[diff] [blame]	146
Brett Cannon	4b964f9	2008-05-05 20:21:38 +0000	[diff] [blame]	147	self.skipkeys = skipkeys
				148	self.ensure_ascii = ensure_ascii
				149	self.check_circular = check_circular
				150	self.allow_nan = allow_nan
				151	self.sort_keys = sort_keys
				152	self.indent = indent
Brett Cannon	4b964f9	2008-05-05 20:21:38 +0000	[diff] [blame]	153	if separators is not None:
				154	self.item_separator, self.key_separator = separators
				155	if default is not None:
				156	self.default = default
				157	self.encoding = encoding
				158
Brett Cannon	4b964f9	2008-05-05 20:21:38 +0000	[diff] [blame]	159	def default(self, o):
Bob Ippolito	d914e3f	2009-03-17 23:19:00 +0000	[diff] [blame]	160	"""Implement this method in a subclass such that it returns
				161	a serializable object for ``o``, or calls the base implementation
				162	(to raise a ``TypeError``).
Brett Cannon	4b964f9	2008-05-05 20:21:38 +0000	[diff] [blame]	163
Bob Ippolito	d914e3f	2009-03-17 23:19:00 +0000	[diff] [blame]	164	For example, to support arbitrary iterators, you could
				165	implement default like this::
Brett Cannon	4b964f9	2008-05-05 20:21:38 +0000	[diff] [blame]	166
				167	def default(self, o):
				168	try:
				169	iterable = iter(o)
				170	except TypeError:
				171	pass
				172	else:
				173	return list(iterable)
				174	return JSONEncoder.default(self, o)
				175
				176	"""
				177	raise TypeError(repr(o) + " is not JSON serializable")
				178
				179	def encode(self, o):
				180	"""Return a JSON string representation of a Python data structure.
				181
				182	>>> JSONEncoder().encode({"foo": ["bar", "baz"]})
				183	'{"foo": ["bar", "baz"]}'
				184
				185	"""
				186	# This is for extremely simple cases and benchmarks.
				187	if isinstance(o, basestring):
				188	if isinstance(o, str):
				189	_encoding = self.encoding
				190	if (_encoding is not None
				191	and not (_encoding == 'utf-8')):
				192	o = o.decode(_encoding)
				193	if self.ensure_ascii:
				194	return encode_basestring_ascii(o)
				195	else:
				196	return encode_basestring(o)
				197	# This doesn't pass the iterator directly to ''.join() because the
				198	# exceptions aren't as detailed. The list call should be roughly
				199	# equivalent to the PySequence_Fast that ''.join() would do.
Bob Ippolito	d914e3f	2009-03-17 23:19:00 +0000	[diff] [blame]	200	chunks = self.iterencode(o, _one_shot=True)
				201	if not isinstance(chunks, (list, tuple)):
				202	chunks = list(chunks)
Brett Cannon	4b964f9	2008-05-05 20:21:38 +0000	[diff] [blame]	203	return ''.join(chunks)
				204
Bob Ippolito	d914e3f	2009-03-17 23:19:00 +0000	[diff] [blame]	205	def iterencode(self, o, _one_shot=False):
				206	"""Encode the given object and yield each string
				207	representation as available.
Brett Cannon	4b964f9	2008-05-05 20:21:38 +0000	[diff] [blame]	208
				209	For example::
				210
				211	for chunk in JSONEncoder().iterencode(bigobject):
				212	mysocket.write(chunk)
				213
				214	"""
				215	if self.check_circular:
				216	markers = {}
				217	else:
				218	markers = None
Bob Ippolito	d914e3f	2009-03-17 23:19:00 +0000	[diff] [blame]	219	if self.ensure_ascii:
				220	_encoder = encode_basestring_ascii
				221	else:
				222	_encoder = encode_basestring
				223	if self.encoding != 'utf-8':
				224	def _encoder(o, _orig_encoder=_encoder, _encoding=self.encoding):
				225	if isinstance(o, str):
				226	o = o.decode(_encoding)
				227	return _orig_encoder(o)
				228
				229	def floatstr(o, allow_nan=self.allow_nan,
				230	_repr=FLOAT_REPR, _inf=INFINITY, _neginf=-INFINITY):
				231	# Check for specials. Note that this type of test is processor
				232	# and/or platform-specific, so do tests which don't depend on the
				233	# internals.
				234
				235	if o != o:
				236	text = 'NaN'
				237	elif o == _inf:
				238	text = 'Infinity'
				239	elif o == _neginf:
				240	text = '-Infinity'
				241	else:
				242	return _repr(o)
				243
				244	if not allow_nan:
				245	raise ValueError(
				246	"Out of range float values are not JSON compliant: " +
				247	repr(o))
				248
				249	return text
				250
				251
				252	if (_one_shot and c_make_encoder is not None
R David Murray	ea8b6ef	2011-04-12 21:00:26 -0400	[diff] [blame]	253	and self.indent is None and not self.sort_keys):
Bob Ippolito	d914e3f	2009-03-17 23:19:00 +0000	[diff] [blame]	254	_iterencode = c_make_encoder(
				255	markers, self.default, _encoder, self.indent,
				256	self.key_separator, self.item_separator, self.sort_keys,
				257	self.skipkeys, self.allow_nan)
				258	else:
				259	_iterencode = _make_iterencode(
				260	markers, self.default, _encoder, self.indent, floatstr,
				261	self.key_separator, self.item_separator, self.sort_keys,
				262	self.skipkeys, _one_shot)
				263	return _iterencode(o, 0)
				264
				265	def _make_iterencode(markers, _default, _encoder, _indent, _floatstr,
				266	_key_separator, _item_separator, _sort_keys, _skipkeys, _one_shot,
				267	## HACK: hand-optimized bytecode; turn globals into locals
Bob Ippolito	d914e3f	2009-03-17 23:19:00 +0000	[diff] [blame]	268	ValueError=ValueError,
				269	basestring=basestring,
				270	dict=dict,
				271	float=float,
				272	id=id,
				273	int=int,
				274	isinstance=isinstance,
				275	list=list,
				276	long=long,
				277	str=str,
				278	tuple=tuple,
				279	):
				280
				281	def _iterencode_list(lst, _current_indent_level):
				282	if not lst:
				283	yield '[]'
				284	return
				285	if markers is not None:
				286	markerid = id(lst)
				287	if markerid in markers:
				288	raise ValueError("Circular reference detected")
				289	markers[markerid] = lst
				290	buf = '['
				291	if _indent is not None:
				292	_current_indent_level += 1
				293	newline_indent = '\n' + (' ' * (_indent * _current_indent_level))
				294	separator = _item_separator + newline_indent
				295	buf += newline_indent
				296	else:
				297	newline_indent = None
				298	separator = _item_separator
				299	first = True
				300	for value in lst:
				301	if first:
				302	first = False
				303	else:
				304	buf = separator
				305	if isinstance(value, basestring):
				306	yield buf + _encoder(value)
				307	elif value is None:
				308	yield buf + 'null'
				309	elif value is True:
				310	yield buf + 'true'
				311	elif value is False:
				312	yield buf + 'false'
				313	elif isinstance(value, (int, long)):
				314	yield buf + str(value)
				315	elif isinstance(value, float):
				316	yield buf + _floatstr(value)
				317	else:
				318	yield buf
				319	if isinstance(value, (list, tuple)):
				320	chunks = _iterencode_list(value, _current_indent_level)
				321	elif isinstance(value, dict):
				322	chunks = _iterencode_dict(value, _current_indent_level)
				323	else:
				324	chunks = _iterencode(value, _current_indent_level)
				325	for chunk in chunks:
				326	yield chunk
				327	if newline_indent is not None:
				328	_current_indent_level -= 1
				329	yield '\n' + (' ' * (_indent * _current_indent_level))
				330	yield ']'
				331	if markers is not None:
				332	del markers[markerid]
				333
				334	def _iterencode_dict(dct, _current_indent_level):
				335	if not dct:
				336	yield '{}'
				337	return
				338	if markers is not None:
				339	markerid = id(dct)
				340	if markerid in markers:
				341	raise ValueError("Circular reference detected")
				342	markers[markerid] = dct
				343	yield '{'
				344	if _indent is not None:
				345	_current_indent_level += 1
				346	newline_indent = '\n' + (' ' * (_indent * _current_indent_level))
				347	item_separator = _item_separator + newline_indent
				348	yield newline_indent
				349	else:
				350	newline_indent = None
				351	item_separator = _item_separator
				352	first = True
				353	if _sort_keys:
Ezio Melotti	ffd8496	2010-01-26 15:57:21 +0000	[diff] [blame]	354	items = sorted(dct.items(), key=lambda kv: kv[0])
Bob Ippolito	d914e3f	2009-03-17 23:19:00 +0000	[diff] [blame]	355	else:
				356	items = dct.iteritems()
				357	for key, value in items:
				358	if isinstance(key, basestring):
				359	pass
				360	# JavaScript is weakly typed for these, so it makes sense to
				361	# also allow them. Many encoders seem to do something like this.
				362	elif isinstance(key, float):
				363	key = _floatstr(key)
				364	elif key is True:
				365	key = 'true'
				366	elif key is False:
				367	key = 'false'
				368	elif key is None:
				369	key = 'null'
				370	elif isinstance(key, (int, long)):
				371	key = str(key)
				372	elif _skipkeys:
				373	continue
				374	else:
				375	raise TypeError("key " + repr(key) + " is not a string")
				376	if first:
				377	first = False
				378	else:
				379	yield item_separator
				380	yield _encoder(key)
				381	yield _key_separator
				382	if isinstance(value, basestring):
				383	yield _encoder(value)
				384	elif value is None:
				385	yield 'null'
				386	elif value is True:
				387	yield 'true'
				388	elif value is False:
				389	yield 'false'
				390	elif isinstance(value, (int, long)):
				391	yield str(value)
				392	elif isinstance(value, float):
				393	yield _floatstr(value)
				394	else:
				395	if isinstance(value, (list, tuple)):
				396	chunks = _iterencode_list(value, _current_indent_level)
				397	elif isinstance(value, dict):
				398	chunks = _iterencode_dict(value, _current_indent_level)
				399	else:
				400	chunks = _iterencode(value, _current_indent_level)
				401	for chunk in chunks:
				402	yield chunk
				403	if newline_indent is not None:
				404	_current_indent_level -= 1
				405	yield '\n' + (' ' * (_indent * _current_indent_level))
				406	yield '}'
				407	if markers is not None:
				408	del markers[markerid]
				409
				410	def _iterencode(o, _current_indent_level):
				411	if isinstance(o, basestring):
				412	yield _encoder(o)
				413	elif o is None:
				414	yield 'null'
				415	elif o is True:
				416	yield 'true'
				417	elif o is False:
				418	yield 'false'
				419	elif isinstance(o, (int, long)):
				420	yield str(o)
				421	elif isinstance(o, float):
				422	yield _floatstr(o)
				423	elif isinstance(o, (list, tuple)):
				424	for chunk in _iterencode_list(o, _current_indent_level):
				425	yield chunk
				426	elif isinstance(o, dict):
				427	for chunk in _iterencode_dict(o, _current_indent_level):
				428	yield chunk
				429	else:
				430	if markers is not None:
				431	markerid = id(o)
				432	if markerid in markers:
				433	raise ValueError("Circular reference detected")
				434	markers[markerid] = o
				435	o = _default(o)
				436	for chunk in _iterencode(o, _current_indent_level):
				437	yield chunk
				438	if markers is not None:
				439	del markers[markerid]
				440
				441	return _iterencode