blob: 2422c6ac102a64f9ff68b2acd1ed9704332cde75 [file] [log] [blame]
Christian Heimes90540002008-05-08 14:29:10 +00001"""Implementation of JSONDecoder
2"""
Christian Heimes90540002008-05-08 14:29:10 +00003import re
Christian Heimes90540002008-05-08 14:29:10 +00004
Ezio Melotti6b60fb92011-05-14 06:47:51 +03005from json import scanner
Christian Heimes90540002008-05-08 14:29:10 +00006try:
7 from _json import scanstring as c_scanstring
Brett Cannoncd171c82013-07-04 17:43:24 -04008except ImportError:
Christian Heimes90540002008-05-08 14:29:10 +00009 c_scanstring = None
10
Serhiy Storchaka47efb4a2015-01-26 13:16:30 +020011__all__ = ['JSONDecoder', 'JSONDecodeError']
Christian Heimes90540002008-05-08 14:29:10 +000012
13FLAGS = re.VERBOSE | re.MULTILINE | re.DOTALL
14
Victor Stinnerd7fed372012-11-29 00:12:40 +010015NaN = float('nan')
16PosInf = float('inf')
17NegInf = float('-inf')
Christian Heimes90540002008-05-08 14:29:10 +000018
19
Serhiy Storchaka47efb4a2015-01-26 13:16:30 +020020class JSONDecodeError(ValueError):
21 """Subclass of ValueError with the following additional properties:
Christian Heimes90540002008-05-08 14:29:10 +000022
Serhiy Storchaka47efb4a2015-01-26 13:16:30 +020023 msg: The unformatted error message
24 doc: The JSON document being parsed
25 pos: The start index of doc where parsing failed
26 lineno: The line corresponding to pos
27 colno: The column corresponding to pos
Christian Heimes90540002008-05-08 14:29:10 +000028
Serhiy Storchaka47efb4a2015-01-26 13:16:30 +020029 """
30 # Note that this exception is used from _json
31 def __init__(self, msg, doc, pos):
32 lineno = doc.count('\n', 0, pos) + 1
33 colno = pos - doc.rfind('\n', 0, pos)
34 errmsg = '%s: line %d column %d (char %d)' % (msg, lineno, colno, pos)
35 ValueError.__init__(self, errmsg)
36 self.msg = msg
37 self.doc = doc
38 self.pos = pos
39 self.lineno = lineno
40 self.colno = colno
41
42 def __reduce__(self):
43 return self.__class__, (self.msg, self.doc, self.pos)
Christian Heimes90540002008-05-08 14:29:10 +000044
45
46_CONSTANTS = {
47 '-Infinity': NegInf,
48 'Infinity': PosInf,
49 'NaN': NaN,
Christian Heimes90540002008-05-08 14:29:10 +000050}
51
52
Christian Heimes90540002008-05-08 14:29:10 +000053STRINGCHUNK = re.compile(r'(.*?)(["\\\x00-\x1f])', FLAGS)
54BACKSLASH = {
55 '"': '"', '\\': '\\', '/': '/',
56 'b': '\b', 'f': '\f', 'n': '\n', 'r': '\r', 't': '\t',
57}
58
Serhiy Storchakac93329b2013-11-26 21:25:28 +020059def _decode_uXXXX(s, pos):
60 esc = s[pos + 1:pos + 5]
61 if len(esc) == 4 and esc[1] not in 'xX':
62 try:
63 return int(esc, 16)
64 except ValueError:
65 pass
66 msg = "Invalid \\uXXXX escape"
Serhiy Storchaka47efb4a2015-01-26 13:16:30 +020067 raise JSONDecodeError(msg, s, pos)
Serhiy Storchakac93329b2013-11-26 21:25:28 +020068
Benjamin Petersonc6b607d2009-05-02 12:36:44 +000069def py_scanstring(s, end, strict=True,
70 _b=BACKSLASH, _m=STRINGCHUNK.match):
71 """Scan the string s for a JSON string. End is the index of the
72 character in s after the quote that started the JSON string.
73 Unescapes all valid JSON string escape sequences and raises ValueError
74 on attempt to decode an invalid string. If strict is False then literal
75 control characters are allowed in the string.
Christian Heimes90540002008-05-08 14:29:10 +000076
Benjamin Petersonc6b607d2009-05-02 12:36:44 +000077 Returns a tuple of the decoded string and the index of the character in s
78 after the end quote."""
Christian Heimes90540002008-05-08 14:29:10 +000079 chunks = []
80 _append = chunks.append
81 begin = end - 1
82 while 1:
83 chunk = _m(s, end)
84 if chunk is None:
Serhiy Storchaka47efb4a2015-01-26 13:16:30 +020085 raise JSONDecodeError("Unterminated string starting at", s, begin)
Christian Heimes90540002008-05-08 14:29:10 +000086 end = chunk.end()
87 content, terminator = chunk.groups()
Benjamin Petersonc6b607d2009-05-02 12:36:44 +000088 # Content is contains zero or more unescaped string characters
Christian Heimes90540002008-05-08 14:29:10 +000089 if content:
Christian Heimes90540002008-05-08 14:29:10 +000090 _append(content)
Benjamin Petersonc6b607d2009-05-02 12:36:44 +000091 # Terminator is the end of string, a literal control character,
92 # or a backslash denoting that an escape sequence follows
Christian Heimes90540002008-05-08 14:29:10 +000093 if terminator == '"':
94 break
95 elif terminator != '\\':
96 if strict:
Benjamin Petersonc6b607d2009-05-02 12:36:44 +000097 #msg = "Invalid control character %r at" % (terminator,)
Christian Heimes90540002008-05-08 14:29:10 +000098 msg = "Invalid control character {0!r} at".format(terminator)
Serhiy Storchaka47efb4a2015-01-26 13:16:30 +020099 raise JSONDecodeError(msg, s, end)
Christian Heimes90540002008-05-08 14:29:10 +0000100 else:
101 _append(terminator)
102 continue
103 try:
104 esc = s[end]
105 except IndexError:
Serhiy Storchaka47efb4a2015-01-26 13:16:30 +0200106 raise JSONDecodeError("Unterminated string starting at", s, begin)
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000107 # If not a unicode escape sequence, must be in the lookup table
Christian Heimes90540002008-05-08 14:29:10 +0000108 if esc != 'u':
109 try:
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000110 char = _b[esc]
Christian Heimes90540002008-05-08 14:29:10 +0000111 except KeyError:
112 msg = "Invalid \\escape: {0!r}".format(esc)
Serhiy Storchaka47efb4a2015-01-26 13:16:30 +0200113 raise JSONDecodeError(msg, s, end)
Christian Heimes90540002008-05-08 14:29:10 +0000114 end += 1
115 else:
Serhiy Storchakac93329b2013-11-26 21:25:28 +0200116 uni = _decode_uXXXX(s, end)
117 end += 5
118 if 0xd800 <= uni <= 0xdbff and s[end:end + 2] == '\\u':
119 uni2 = _decode_uXXXX(s, end + 1)
120 if 0xdc00 <= uni2 <= 0xdfff:
121 uni = 0x10000 + (((uni - 0xd800) << 10) | (uni2 - 0xdc00))
122 end += 6
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000123 char = chr(uni)
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000124 _append(char)
Christian Heimes90540002008-05-08 14:29:10 +0000125 return ''.join(chunks), end
126
127
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000128# Use speedup if available
129scanstring = c_scanstring or py_scanstring
Christian Heimes90540002008-05-08 14:29:10 +0000130
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000131WHITESPACE = re.compile(r'[ \t\n\r]*', FLAGS)
132WHITESPACE_STR = ' \t\n\r'
Christian Heimes90540002008-05-08 14:29:10 +0000133
134
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000135def JSONObject(s_and_end, strict, scan_once, object_hook, object_pairs_hook,
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000136 memo=None, _w=WHITESPACE.match, _ws=WHITESPACE_STR):
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000137 s, end = s_and_end
Raymond Hettinger0ad98d82009-04-21 03:09:17 +0000138 pairs = []
139 pairs_append = pairs.append
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000140 # Backwards compatibility
141 if memo is None:
142 memo = {}
143 memo_get = memo.setdefault
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000144 # Use a slice to prevent IndexError from being raised, the following
145 # check will raise a more specific ValueError if the string is empty
Christian Heimes90540002008-05-08 14:29:10 +0000146 nextchar = s[end:end + 1]
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000147 # Normally we expect nextchar == '"'
Christian Heimes90540002008-05-08 14:29:10 +0000148 if nextchar != '"':
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000149 if nextchar in _ws:
150 end = _w(s, end).end()
151 nextchar = s[end:end + 1]
152 # Trivial empty object
153 if nextchar == '}':
Ezio Melottid210aa12011-04-13 07:10:13 +0300154 if object_pairs_hook is not None:
155 result = object_pairs_hook(pairs)
Ezio Melottia7d64a62013-03-13 01:52:34 +0200156 return result, end + 1
Ezio Melottid210aa12011-04-13 07:10:13 +0300157 pairs = {}
158 if object_hook is not None:
159 pairs = object_hook(pairs)
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000160 return pairs, end + 1
161 elif nextchar != '"':
Serhiy Storchaka47efb4a2015-01-26 13:16:30 +0200162 raise JSONDecodeError(
163 "Expecting property name enclosed in double quotes", s, end)
Christian Heimes90540002008-05-08 14:29:10 +0000164 end += 1
Christian Heimes90540002008-05-08 14:29:10 +0000165 while True:
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000166 key, end = scanstring(s, end, strict)
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000167 key = memo_get(key, key)
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000168 # To skip some function call overhead we optimize the fast paths where
169 # the JSON key separator is ": " or just ":".
Christian Heimes90540002008-05-08 14:29:10 +0000170 if s[end:end + 1] != ':':
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000171 end = _w(s, end).end()
172 if s[end:end + 1] != ':':
Serhiy Storchaka47efb4a2015-01-26 13:16:30 +0200173 raise JSONDecodeError("Expecting ':' delimiter", s, end)
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000174 end += 1
175
Christian Heimes90540002008-05-08 14:29:10 +0000176 try:
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000177 if s[end] in _ws:
178 end += 1
179 if s[end] in _ws:
180 end = _w(s, end + 1).end()
181 except IndexError:
182 pass
183
184 try:
185 value, end = scan_once(s, end)
Ezio Melotti37623ab2013-01-03 08:44:15 +0200186 except StopIteration as err:
Serhiy Storchaka47efb4a2015-01-26 13:16:30 +0200187 raise JSONDecodeError("Expecting value", s, err.value) from None
Raymond Hettinger0ad98d82009-04-21 03:09:17 +0000188 pairs_append((key, value))
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000189 try:
190 nextchar = s[end]
191 if nextchar in _ws:
192 end = _w(s, end + 1).end()
193 nextchar = s[end]
194 except IndexError:
195 nextchar = ''
Christian Heimes90540002008-05-08 14:29:10 +0000196 end += 1
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000197
Christian Heimes90540002008-05-08 14:29:10 +0000198 if nextchar == '}':
199 break
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000200 elif nextchar != ',':
Serhiy Storchaka47efb4a2015-01-26 13:16:30 +0200201 raise JSONDecodeError("Expecting ',' delimiter", s, end - 1)
Christian Heimes90540002008-05-08 14:29:10 +0000202 end = _w(s, end).end()
203 nextchar = s[end:end + 1]
204 end += 1
205 if nextchar != '"':
Serhiy Storchaka47efb4a2015-01-26 13:16:30 +0200206 raise JSONDecodeError(
207 "Expecting property name enclosed in double quotes", s, end - 1)
Raymond Hettinger0ad98d82009-04-21 03:09:17 +0000208 if object_pairs_hook is not None:
209 result = object_pairs_hook(pairs)
210 return result, end
211 pairs = dict(pairs)
Christian Heimes90540002008-05-08 14:29:10 +0000212 if object_hook is not None:
213 pairs = object_hook(pairs)
214 return pairs, end
Christian Heimes90540002008-05-08 14:29:10 +0000215
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000216def JSONArray(s_and_end, scan_once, _w=WHITESPACE.match, _ws=WHITESPACE_STR):
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000217 s, end = s_and_end
Christian Heimes90540002008-05-08 14:29:10 +0000218 values = []
Christian Heimes90540002008-05-08 14:29:10 +0000219 nextchar = s[end:end + 1]
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000220 if nextchar in _ws:
221 end = _w(s, end + 1).end()
222 nextchar = s[end:end + 1]
223 # Look-ahead for trivial empty array
Christian Heimes90540002008-05-08 14:29:10 +0000224 if nextchar == ']':
225 return values, end + 1
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000226 _append = values.append
Christian Heimes90540002008-05-08 14:29:10 +0000227 while True:
228 try:
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000229 value, end = scan_once(s, end)
Ezio Melotti37623ab2013-01-03 08:44:15 +0200230 except StopIteration as err:
Serhiy Storchaka47efb4a2015-01-26 13:16:30 +0200231 raise JSONDecodeError("Expecting value", s, err.value) from None
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000232 _append(value)
Christian Heimes90540002008-05-08 14:29:10 +0000233 nextchar = s[end:end + 1]
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000234 if nextchar in _ws:
235 end = _w(s, end + 1).end()
236 nextchar = s[end:end + 1]
Christian Heimes90540002008-05-08 14:29:10 +0000237 end += 1
238 if nextchar == ']':
239 break
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000240 elif nextchar != ',':
Serhiy Storchaka47efb4a2015-01-26 13:16:30 +0200241 raise JSONDecodeError("Expecting ',' delimiter", s, end - 1)
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000242 try:
243 if s[end] in _ws:
244 end += 1
245 if s[end] in _ws:
246 end = _w(s, end + 1).end()
247 except IndexError:
248 pass
249
Christian Heimes90540002008-05-08 14:29:10 +0000250 return values, end
Christian Heimes90540002008-05-08 14:29:10 +0000251
252
253class JSONDecoder(object):
254 """Simple JSON <http://json.org> decoder
255
256 Performs the following translations in decoding by default:
257
258 +---------------+-------------------+
259 | JSON | Python |
260 +===============+===================+
261 | object | dict |
262 +---------------+-------------------+
263 | array | list |
264 +---------------+-------------------+
Georg Brandlc8284cf2010-08-02 20:16:18 +0000265 | string | str |
Christian Heimes90540002008-05-08 14:29:10 +0000266 +---------------+-------------------+
Georg Brandlc8284cf2010-08-02 20:16:18 +0000267 | number (int) | int |
Christian Heimes90540002008-05-08 14:29:10 +0000268 +---------------+-------------------+
269 | number (real) | float |
270 +---------------+-------------------+
271 | true | True |
272 +---------------+-------------------+
273 | false | False |
274 +---------------+-------------------+
275 | null | None |
276 +---------------+-------------------+
277
278 It also understands ``NaN``, ``Infinity``, and ``-Infinity`` as
279 their corresponding ``float`` values, which is outside the JSON spec.
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000280
Christian Heimes90540002008-05-08 14:29:10 +0000281 """
282
Serhiy Storchakaaacd53f2016-06-22 00:03:20 +0300283 def __init__(self, *, object_hook=None, parse_float=None,
Raymond Hettinger0ad98d82009-04-21 03:09:17 +0000284 parse_int=None, parse_constant=None, strict=True,
285 object_pairs_hook=None):
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000286 """``object_hook``, if specified, will be called with the result
287 of every JSON object decoded and its return value will be used in
Christian Heimes90540002008-05-08 14:29:10 +0000288 place of the given ``dict``. This can be used to provide custom
289 deserializations (e.g. to support JSON-RPC class hinting).
290
Georg Brandld4460aa2010-10-15 17:03:02 +0000291 ``object_pairs_hook``, if specified will be called with the result of
292 every JSON object decoded with an ordered list of pairs. The return
293 value of ``object_pairs_hook`` will be used instead of the ``dict``.
294 This feature can be used to implement custom decoders that rely on the
295 order that the key and value pairs are decoded (for example,
296 collections.OrderedDict will remember the order of insertion). If
297 ``object_hook`` is also defined, the ``object_pairs_hook`` takes
298 priority.
299
Christian Heimes90540002008-05-08 14:29:10 +0000300 ``parse_float``, if specified, will be called with the string
301 of every JSON float to be decoded. By default this is equivalent to
302 float(num_str). This can be used to use another datatype or parser
303 for JSON floats (e.g. decimal.Decimal).
304
305 ``parse_int``, if specified, will be called with the string
306 of every JSON int to be decoded. By default this is equivalent to
307 int(num_str). This can be used to use another datatype or parser
308 for JSON integers (e.g. float).
309
310 ``parse_constant``, if specified, will be called with one of the
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000311 following strings: -Infinity, Infinity, NaN.
Christian Heimes90540002008-05-08 14:29:10 +0000312 This can be used to raise an exception if invalid JSON numbers
313 are encountered.
314
Georg Brandld4460aa2010-10-15 17:03:02 +0000315 If ``strict`` is false (true is the default), then control
316 characters will be allowed inside strings. Control characters in
317 this context are those with character codes in the 0-31 range,
318 including ``'\\t'`` (tab), ``'\\n'``, ``'\\r'`` and ``'\\0'``.
319
Christian Heimes90540002008-05-08 14:29:10 +0000320 """
Christian Heimes90540002008-05-08 14:29:10 +0000321 self.object_hook = object_hook
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000322 self.parse_float = parse_float or float
323 self.parse_int = parse_int or int
324 self.parse_constant = parse_constant or _CONSTANTS.__getitem__
Christian Heimes90540002008-05-08 14:29:10 +0000325 self.strict = strict
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000326 self.object_pairs_hook = object_pairs_hook
327 self.parse_object = JSONObject
328 self.parse_array = JSONArray
329 self.parse_string = scanstring
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000330 self.memo = {}
Ezio Melotti6b60fb92011-05-14 06:47:51 +0300331 self.scan_once = scanner.make_scanner(self)
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000332
Christian Heimes90540002008-05-08 14:29:10 +0000333
334 def decode(self, s, _w=WHITESPACE.match):
Georg Brandlc8284cf2010-08-02 20:16:18 +0000335 """Return the Python representation of ``s`` (a ``str`` instance
336 containing a JSON document).
Christian Heimes90540002008-05-08 14:29:10 +0000337
338 """
339 obj, end = self.raw_decode(s, idx=_w(s, 0).end())
340 end = _w(s, end).end()
341 if end != len(s):
Serhiy Storchaka47efb4a2015-01-26 13:16:30 +0200342 raise JSONDecodeError("Extra data", s, end)
Christian Heimes90540002008-05-08 14:29:10 +0000343 return obj
344
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000345 def raw_decode(self, s, idx=0):
Georg Brandlc8284cf2010-08-02 20:16:18 +0000346 """Decode a JSON document from ``s`` (a ``str`` beginning with
347 a JSON document) and return a 2-tuple of the Python
Christian Heimes90540002008-05-08 14:29:10 +0000348 representation and the index in ``s`` where the document ended.
349
350 This can be used to decode a JSON document from a string that may
351 have extraneous data at the end.
352
353 """
Christian Heimes90540002008-05-08 14:29:10 +0000354 try:
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000355 obj, end = self.scan_once(s, idx)
Ezio Melotti37623ab2013-01-03 08:44:15 +0200356 except StopIteration as err:
Serhiy Storchaka47efb4a2015-01-26 13:16:30 +0200357 raise JSONDecodeError("Expecting value", s, err.value) from None
Christian Heimes90540002008-05-08 14:29:10 +0000358 return obj, end