Blame - simplejson/decoder.py - platform/external/python/google-api-python-client

blob: 4cf4015f6736f057054f9290e7b40c54a0298647 [file] [log] [blame]

jcgregorio@wpgntai-ubiq72.hot.corp.google.com	ed13252	2010-04-19 11:12:52 -0700	[diff] [blame^]	1	"""Implementation of JSONDecoder
				2	"""
				3	import re
				4	import sys
				5	import struct
				6
				7	from simplejson.scanner import make_scanner
				8	def _import_c_scanstring():
				9	try:
				10	from simplejson._speedups import scanstring
				11	return scanstring
				12	except ImportError:
				13	return None
				14	c_scanstring = _import_c_scanstring()
				15
				16	__all__ = ['JSONDecoder']
				17
				18	FLAGS = re.VERBOSE \| re.MULTILINE \| re.DOTALL
				19
				20	def _floatconstants():
				21	_BYTES = '7FF80000000000007FF0000000000000'.decode('hex')
				22	# The struct module in Python 2.4 would get frexp() out of range here
				23	# when an endian is specified in the format string. Fixed in Python 2.5+
				24	if sys.byteorder != 'big':
				25	_BYTES = _BYTES[:8][::-1] + _BYTES[8:][::-1]
				26	nan, inf = struct.unpack('dd', _BYTES)
				27	return nan, inf, -inf
				28
				29	NaN, PosInf, NegInf = _floatconstants()
				30
				31
				32	class JSONDecodeError(ValueError):
				33	"""Subclass of ValueError with the following additional properties:
				34
				35	msg: The unformatted error message
				36	doc: The JSON document being parsed
				37	pos: The start index of doc where parsing failed
				38	end: The end index of doc where parsing failed (may be None)
				39	lineno: The line corresponding to pos
				40	colno: The column corresponding to pos
				41	endlineno: The line corresponding to end (may be None)
				42	endcolno: The column corresponding to end (may be None)
				43
				44	"""
				45	def __init__(self, msg, doc, pos, end=None):
				46	ValueError.__init__(self, errmsg(msg, doc, pos, end=end))
				47	self.msg = msg
				48	self.doc = doc
				49	self.pos = pos
				50	self.end = end
				51	self.lineno, self.colno = linecol(doc, pos)
				52	if end is not None:
				53	self.endlineno, self.endcolno = linecol(doc, pos)
				54	else:
				55	self.endlineno, self.endcolno = None, None
				56
				57
				58	def linecol(doc, pos):
				59	lineno = doc.count('\n', 0, pos) + 1
				60	if lineno == 1:
				61	colno = pos
				62	else:
				63	colno = pos - doc.rindex('\n', 0, pos)
				64	return lineno, colno
				65
				66
				67	def errmsg(msg, doc, pos, end=None):
				68	# Note that this function is called from _speedups
				69	lineno, colno = linecol(doc, pos)
				70	if end is None:
				71	#fmt = '{0}: line {1} column {2} (char {3})'
				72	#return fmt.format(msg, lineno, colno, pos)
				73	fmt = '%s: line %d column %d (char %d)'
				74	return fmt % (msg, lineno, colno, pos)
				75	endlineno, endcolno = linecol(doc, end)
				76	#fmt = '{0}: line {1} column {2} - line {3} column {4} (char {5} - {6})'
				77	#return fmt.format(msg, lineno, colno, endlineno, endcolno, pos, end)
				78	fmt = '%s: line %d column %d - line %d column %d (char %d - %d)'
				79	return fmt % (msg, lineno, colno, endlineno, endcolno, pos, end)
				80
				81
				82	_CONSTANTS = {
				83	'-Infinity': NegInf,
				84	'Infinity': PosInf,
				85	'NaN': NaN,
				86	}
				87
				88	STRINGCHUNK = re.compile(r'(.*?)(["\\\x00-\x1f])', FLAGS)
				89	BACKSLASH = {
				90	'"': u'"', '\\': u'\\', '/': u'/',
				91	'b': u'\b', 'f': u'\f', 'n': u'\n', 'r': u'\r', 't': u'\t',
				92	}
				93
				94	DEFAULT_ENCODING = "utf-8"
				95
				96	def py_scanstring(s, end, encoding=None, strict=True,
				97	_b=BACKSLASH, _m=STRINGCHUNK.match):
				98	"""Scan the string s for a JSON string. End is the index of the
				99	character in s after the quote that started the JSON string.
				100	Unescapes all valid JSON string escape sequences and raises ValueError
				101	on attempt to decode an invalid string. If strict is False then literal
				102	control characters are allowed in the string.
				103
				104	Returns a tuple of the decoded string and the index of the character in s
				105	after the end quote."""
				106	if encoding is None:
				107	encoding = DEFAULT_ENCODING
				108	chunks = []
				109	_append = chunks.append
				110	begin = end - 1
				111	while 1:
				112	chunk = _m(s, end)
				113	if chunk is None:
				114	raise JSONDecodeError(
				115	"Unterminated string starting at", s, begin)
				116	end = chunk.end()
				117	content, terminator = chunk.groups()
				118	# Content is contains zero or more unescaped string characters
				119	if content:
				120	if not isinstance(content, unicode):
				121	content = unicode(content, encoding)
				122	_append(content)
				123	# Terminator is the end of string, a literal control character,
				124	# or a backslash denoting that an escape sequence follows
				125	if terminator == '"':
				126	break
				127	elif terminator != '\\':
				128	if strict:
				129	msg = "Invalid control character %r at" % (terminator,)
				130	#msg = "Invalid control character {0!r} at".format(terminator)
				131	raise JSONDecodeError(msg, s, end)
				132	else:
				133	_append(terminator)
				134	continue
				135	try:
				136	esc = s[end]
				137	except IndexError:
				138	raise JSONDecodeError(
				139	"Unterminated string starting at", s, begin)
				140	# If not a unicode escape sequence, must be in the lookup table
				141	if esc != 'u':
				142	try:
				143	char = _b[esc]
				144	except KeyError:
				145	msg = "Invalid \\escape: " + repr(esc)
				146	raise JSONDecodeError(msg, s, end)
				147	end += 1
				148	else:
				149	# Unicode escape sequence
				150	esc = s[end + 1:end + 5]
				151	next_end = end + 5
				152	if len(esc) != 4:
				153	msg = "Invalid \\uXXXX escape"
				154	raise JSONDecodeError(msg, s, end)
				155	uni = int(esc, 16)
				156	# Check for surrogate pair on UCS-4 systems
				157	if 0xd800 <= uni <= 0xdbff and sys.maxunicode > 65535:
				158	msg = "Invalid \\uXXXX\\uXXXX surrogate pair"
				159	if not s[end + 5:end + 7] == '\\u':
				160	raise JSONDecodeError(msg, s, end)
				161	esc2 = s[end + 7:end + 11]
				162	if len(esc2) != 4:
				163	raise JSONDecodeError(msg, s, end)
				164	uni2 = int(esc2, 16)
				165	uni = 0x10000 + (((uni - 0xd800) << 10) \| (uni2 - 0xdc00))
				166	next_end += 6
				167	char = unichr(uni)
				168	end = next_end
				169	# Append the unescaped character
				170	_append(char)
				171	return u''.join(chunks), end
				172
				173
				174	# Use speedup if available
				175	scanstring = c_scanstring or py_scanstring
				176
				177	WHITESPACE = re.compile(r'[ \t\n\r]*', FLAGS)
				178	WHITESPACE_STR = ' \t\n\r'
				179
				180	def JSONObject((s, end), encoding, strict, scan_once, object_hook,
				181	object_pairs_hook, memo=None,
				182	_w=WHITESPACE.match, _ws=WHITESPACE_STR):
				183	# Backwards compatibility
				184	if memo is None:
				185	memo = {}
				186	memo_get = memo.setdefault
				187	pairs = []
				188	# Use a slice to prevent IndexError from being raised, the following
				189	# check will raise a more specific ValueError if the string is empty
				190	nextchar = s[end:end + 1]
				191	# Normally we expect nextchar == '"'
				192	if nextchar != '"':
				193	if nextchar in _ws:
				194	end = _w(s, end).end()
				195	nextchar = s[end:end + 1]
				196	# Trivial empty object
				197	if nextchar == '}':
				198	if object_pairs_hook is not None:
				199	result = object_pairs_hook(pairs)
				200	return result, end
				201	pairs = {}
				202	if object_hook is not None:
				203	pairs = object_hook(pairs)
				204	return pairs, end + 1
				205	elif nextchar != '"':
				206	raise JSONDecodeError("Expecting property name", s, end)
				207	end += 1
				208	while True:
				209	key, end = scanstring(s, end, encoding, strict)
				210	key = memo_get(key, key)
				211
				212	# To skip some function call overhead we optimize the fast paths where
				213	# the JSON key separator is ": " or just ":".
				214	if s[end:end + 1] != ':':
				215	end = _w(s, end).end()
				216	if s[end:end + 1] != ':':
				217	raise JSONDecodeError("Expecting : delimiter", s, end)
				218
				219	end += 1
				220
				221	try:
				222	if s[end] in _ws:
				223	end += 1
				224	if s[end] in _ws:
				225	end = _w(s, end + 1).end()
				226	except IndexError:
				227	pass
				228
				229	try:
				230	value, end = scan_once(s, end)
				231	except StopIteration:
				232	raise JSONDecodeError("Expecting object", s, end)
				233	pairs.append((key, value))
				234
				235	try:
				236	nextchar = s[end]
				237	if nextchar in _ws:
				238	end = _w(s, end + 1).end()
				239	nextchar = s[end]
				240	except IndexError:
				241	nextchar = ''
				242	end += 1
				243
				244	if nextchar == '}':
				245	break
				246	elif nextchar != ',':
				247	raise JSONDecodeError("Expecting , delimiter", s, end - 1)
				248
				249	try:
				250	nextchar = s[end]
				251	if nextchar in _ws:
				252	end += 1
				253	nextchar = s[end]
				254	if nextchar in _ws:
				255	end = _w(s, end + 1).end()
				256	nextchar = s[end]
				257	except IndexError:
				258	nextchar = ''
				259
				260	end += 1
				261	if nextchar != '"':
				262	raise JSONDecodeError("Expecting property name", s, end - 1)
				263
				264	if object_pairs_hook is not None:
				265	result = object_pairs_hook(pairs)
				266	return result, end
				267	pairs = dict(pairs)
				268	if object_hook is not None:
				269	pairs = object_hook(pairs)
				270	return pairs, end
				271
				272	def JSONArray((s, end), scan_once, _w=WHITESPACE.match, _ws=WHITESPACE_STR):
				273	values = []
				274	nextchar = s[end:end + 1]
				275	if nextchar in _ws:
				276	end = _w(s, end + 1).end()
				277	nextchar = s[end:end + 1]
				278	# Look-ahead for trivial empty array
				279	if nextchar == ']':
				280	return values, end + 1
				281	_append = values.append
				282	while True:
				283	try:
				284	value, end = scan_once(s, end)
				285	except StopIteration:
				286	raise JSONDecodeError("Expecting object", s, end)
				287	_append(value)
				288	nextchar = s[end:end + 1]
				289	if nextchar in _ws:
				290	end = _w(s, end + 1).end()
				291	nextchar = s[end:end + 1]
				292	end += 1
				293	if nextchar == ']':
				294	break
				295	elif nextchar != ',':
				296	raise JSONDecodeError("Expecting , delimiter", s, end)
				297
				298	try:
				299	if s[end] in _ws:
				300	end += 1
				301	if s[end] in _ws:
				302	end = _w(s, end + 1).end()
				303	except IndexError:
				304	pass
				305
				306	return values, end
				307
				308	class JSONDecoder(object):
				309	"""Simple JSON <http://json.org> decoder
				310
				311	Performs the following translations in decoding by default:
				312
				313	+---------------+-------------------+
				314	\| JSON \| Python \|
				315	+===============+===================+
				316	\| object \| dict \|
				317	+---------------+-------------------+
				318	\| array \| list \|
				319	+---------------+-------------------+
				320	\| string \| unicode \|
				321	+---------------+-------------------+
				322	\| number (int) \| int, long \|
				323	+---------------+-------------------+
				324	\| number (real) \| float \|
				325	+---------------+-------------------+
				326	\| true \| True \|
				327	+---------------+-------------------+
				328	\| false \| False \|
				329	+---------------+-------------------+
				330	\| null \| None \|
				331	+---------------+-------------------+
				332
				333	It also understands ``NaN``, ``Infinity``, and ``-Infinity`` as
				334	their corresponding ``float`` values, which is outside the JSON spec.
				335
				336	"""
				337
				338	def __init__(self, encoding=None, object_hook=None, parse_float=None,
				339	parse_int=None, parse_constant=None, strict=True,
				340	object_pairs_hook=None):
				341	"""
				342	encoding determines the encoding used to interpret any
				343	:class:`str` objects decoded by this instance (``'utf-8'`` by
				344	default). It has no effect when decoding :class:`unicode` objects.
				345
				346	Note that currently only encodings that are a superset of ASCII work,
				347	strings of other encodings should be passed in as :class:`unicode`.
				348
				349	object_hook, if specified, will be called with the result of every
				350	JSON object decoded and its return value will be used in place of the
				351	given :class:`dict`. This can be used to provide custom
				352	deserializations (e.g. to support JSON-RPC class hinting).
				353
				354	object_pairs_hook is an optional function that will be called with
				355	the result of any object literal decode with an ordered list of pairs.
				356	The return value of object_pairs_hook will be used instead of the
				357	:class:`dict`. This feature can be used to implement custom decoders
				358	that rely on the order that the key and value pairs are decoded (for
				359	example, :func:`collections.OrderedDict` will remember the order of
				360	insertion). If object_hook is also defined, the object_pairs_hook
				361	takes priority.
				362
				363	parse_float, if specified, will be called with the string of every
				364	JSON float to be decoded. By default, this is equivalent to
				365	``float(num_str)``. This can be used to use another datatype or parser
				366	for JSON floats (e.g. :class:`decimal.Decimal`).
				367
				368	parse_int, if specified, will be called with the string of every
				369	JSON int to be decoded. By default, this is equivalent to
				370	``int(num_str)``. This can be used to use another datatype or parser
				371	for JSON integers (e.g. :class:`float`).
				372
				373	parse_constant, if specified, will be called with one of the
				374	following strings: ``'-Infinity'``, ``'Infinity'``, ``'NaN'``. This
				375	can be used to raise an exception if invalid JSON numbers are
				376	encountered.
				377
				378	strict controls the parser's behavior when it encounters an
				379	invalid control character in a string. The default setting of
				380	``True`` means that unescaped control characters are parse errors, if
				381	``False`` then control characters will be allowed in strings.
				382
				383	"""
				384	self.encoding = encoding
				385	self.object_hook = object_hook
				386	self.object_pairs_hook = object_pairs_hook
				387	self.parse_float = parse_float or float
				388	self.parse_int = parse_int or int
				389	self.parse_constant = parse_constant or _CONSTANTS.__getitem__
				390	self.strict = strict
				391	self.parse_object = JSONObject
				392	self.parse_array = JSONArray
				393	self.parse_string = scanstring
				394	self.memo = {}
				395	self.scan_once = make_scanner(self)
				396
				397	def decode(self, s, _w=WHITESPACE.match):
				398	"""Return the Python representation of ``s`` (a ``str`` or ``unicode``
				399	instance containing a JSON document)
				400
				401	"""
				402	obj, end = self.raw_decode(s, idx=_w(s, 0).end())
				403	end = _w(s, end).end()
				404	if end != len(s):
				405	raise JSONDecodeError("Extra data", s, end, len(s))
				406	return obj
				407
				408	def raw_decode(self, s, idx=0):
				409	"""Decode a JSON document from ``s`` (a ``str`` or ``unicode``
				410	beginning with a JSON document) and return a 2-tuple of the Python
				411	representation and the index in ``s`` where the document ended.
				412
				413	This can be used to decode a JSON document from a string that may
				414	have extraneous data at the end.
				415
				416	"""
				417	try:
				418	obj, end = self.scan_once(s, idx)
				419	except StopIteration:
				420	raise JSONDecodeError("No JSON object could be decoded", s, idx)
				421	return obj, end