blob: d7d824454e1ba34e89154b094f32f1b5a6d63e32 [file] [log] [blame]
Christian Heimes90540002008-05-08 14:29:10 +00001"""Implementation of JSONDecoder
2"""
Christian Heimes90540002008-05-08 14:29:10 +00003import re
Christian Heimes90540002008-05-08 14:29:10 +00004
Ezio Melotti6b60fb92011-05-14 06:47:51 +03005from json import scanner
Christian Heimes90540002008-05-08 14:29:10 +00006try:
7 from _json import scanstring as c_scanstring
Brett Cannoncd171c82013-07-04 17:43:24 -04008except ImportError:
Christian Heimes90540002008-05-08 14:29:10 +00009 c_scanstring = None
10
Serhiy Storchaka47efb4a2015-01-26 13:16:30 +020011__all__ = ['JSONDecoder', 'JSONDecodeError']
Christian Heimes90540002008-05-08 14:29:10 +000012
13FLAGS = re.VERBOSE | re.MULTILINE | re.DOTALL
14
Victor Stinnerd7fed372012-11-29 00:12:40 +010015NaN = float('nan')
16PosInf = float('inf')
17NegInf = float('-inf')
Christian Heimes90540002008-05-08 14:29:10 +000018
19
Serhiy Storchaka47efb4a2015-01-26 13:16:30 +020020class JSONDecodeError(ValueError):
21 """Subclass of ValueError with the following additional properties:
Christian Heimes90540002008-05-08 14:29:10 +000022
Serhiy Storchaka47efb4a2015-01-26 13:16:30 +020023 msg: The unformatted error message
24 doc: The JSON document being parsed
25 pos: The start index of doc where parsing failed
26 lineno: The line corresponding to pos
27 colno: The column corresponding to pos
Christian Heimes90540002008-05-08 14:29:10 +000028
Serhiy Storchaka47efb4a2015-01-26 13:16:30 +020029 """
30 # Note that this exception is used from _json
31 def __init__(self, msg, doc, pos):
32 lineno = doc.count('\n', 0, pos) + 1
33 colno = pos - doc.rfind('\n', 0, pos)
34 errmsg = '%s: line %d column %d (char %d)' % (msg, lineno, colno, pos)
35 ValueError.__init__(self, errmsg)
36 self.msg = msg
37 self.doc = doc
38 self.pos = pos
39 self.lineno = lineno
40 self.colno = colno
41
42 def __reduce__(self):
43 return self.__class__, (self.msg, self.doc, self.pos)
Christian Heimes90540002008-05-08 14:29:10 +000044
45
46_CONSTANTS = {
47 '-Infinity': NegInf,
48 'Infinity': PosInf,
49 'NaN': NaN,
Christian Heimes90540002008-05-08 14:29:10 +000050}
51
52
Christian Heimes90540002008-05-08 14:29:10 +000053STRINGCHUNK = re.compile(r'(.*?)(["\\\x00-\x1f])', FLAGS)
54BACKSLASH = {
55 '"': '"', '\\': '\\', '/': '/',
56 'b': '\b', 'f': '\f', 'n': '\n', 'r': '\r', 't': '\t',
57}
58
Serhiy Storchakac93329b2013-11-26 21:25:28 +020059def _decode_uXXXX(s, pos):
60 esc = s[pos + 1:pos + 5]
61 if len(esc) == 4 and esc[1] not in 'xX':
62 try:
63 return int(esc, 16)
64 except ValueError:
65 pass
66 msg = "Invalid \\uXXXX escape"
Serhiy Storchaka47efb4a2015-01-26 13:16:30 +020067 raise JSONDecodeError(msg, s, pos)
Serhiy Storchakac93329b2013-11-26 21:25:28 +020068
Benjamin Petersonc6b607d2009-05-02 12:36:44 +000069def py_scanstring(s, end, strict=True,
70 _b=BACKSLASH, _m=STRINGCHUNK.match):
71 """Scan the string s for a JSON string. End is the index of the
72 character in s after the quote that started the JSON string.
73 Unescapes all valid JSON string escape sequences and raises ValueError
74 on attempt to decode an invalid string. If strict is False then literal
75 control characters are allowed in the string.
Christian Heimes90540002008-05-08 14:29:10 +000076
Benjamin Petersonc6b607d2009-05-02 12:36:44 +000077 Returns a tuple of the decoded string and the index of the character in s
78 after the end quote."""
Christian Heimes90540002008-05-08 14:29:10 +000079 chunks = []
80 _append = chunks.append
81 begin = end - 1
82 while 1:
83 chunk = _m(s, end)
84 if chunk is None:
Serhiy Storchaka47efb4a2015-01-26 13:16:30 +020085 raise JSONDecodeError("Unterminated string starting at", s, begin)
Christian Heimes90540002008-05-08 14:29:10 +000086 end = chunk.end()
87 content, terminator = chunk.groups()
Benjamin Petersonc6b607d2009-05-02 12:36:44 +000088 # Content is contains zero or more unescaped string characters
Christian Heimes90540002008-05-08 14:29:10 +000089 if content:
Christian Heimes90540002008-05-08 14:29:10 +000090 _append(content)
Benjamin Petersonc6b607d2009-05-02 12:36:44 +000091 # Terminator is the end of string, a literal control character,
92 # or a backslash denoting that an escape sequence follows
Christian Heimes90540002008-05-08 14:29:10 +000093 if terminator == '"':
94 break
95 elif terminator != '\\':
96 if strict:
Benjamin Petersonc6b607d2009-05-02 12:36:44 +000097 #msg = "Invalid control character %r at" % (terminator,)
Christian Heimes90540002008-05-08 14:29:10 +000098 msg = "Invalid control character {0!r} at".format(terminator)
Serhiy Storchaka47efb4a2015-01-26 13:16:30 +020099 raise JSONDecodeError(msg, s, end)
Christian Heimes90540002008-05-08 14:29:10 +0000100 else:
101 _append(terminator)
102 continue
103 try:
104 esc = s[end]
105 except IndexError:
Serhiy Storchaka5affd232017-04-05 09:37:24 +0300106 raise JSONDecodeError("Unterminated string starting at",
107 s, begin) from None
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000108 # If not a unicode escape sequence, must be in the lookup table
Christian Heimes90540002008-05-08 14:29:10 +0000109 if esc != 'u':
110 try:
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000111 char = _b[esc]
Christian Heimes90540002008-05-08 14:29:10 +0000112 except KeyError:
113 msg = "Invalid \\escape: {0!r}".format(esc)
Serhiy Storchaka47efb4a2015-01-26 13:16:30 +0200114 raise JSONDecodeError(msg, s, end)
Christian Heimes90540002008-05-08 14:29:10 +0000115 end += 1
116 else:
Serhiy Storchakac93329b2013-11-26 21:25:28 +0200117 uni = _decode_uXXXX(s, end)
118 end += 5
119 if 0xd800 <= uni <= 0xdbff and s[end:end + 2] == '\\u':
120 uni2 = _decode_uXXXX(s, end + 1)
121 if 0xdc00 <= uni2 <= 0xdfff:
122 uni = 0x10000 + (((uni - 0xd800) << 10) | (uni2 - 0xdc00))
123 end += 6
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000124 char = chr(uni)
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000125 _append(char)
Christian Heimes90540002008-05-08 14:29:10 +0000126 return ''.join(chunks), end
127
128
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000129# Use speedup if available
130scanstring = c_scanstring or py_scanstring
Christian Heimes90540002008-05-08 14:29:10 +0000131
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000132WHITESPACE = re.compile(r'[ \t\n\r]*', FLAGS)
133WHITESPACE_STR = ' \t\n\r'
Christian Heimes90540002008-05-08 14:29:10 +0000134
135
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000136def JSONObject(s_and_end, strict, scan_once, object_hook, object_pairs_hook,
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000137 memo=None, _w=WHITESPACE.match, _ws=WHITESPACE_STR):
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000138 s, end = s_and_end
Raymond Hettinger0ad98d82009-04-21 03:09:17 +0000139 pairs = []
140 pairs_append = pairs.append
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000141 # Backwards compatibility
142 if memo is None:
143 memo = {}
144 memo_get = memo.setdefault
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000145 # Use a slice to prevent IndexError from being raised, the following
146 # check will raise a more specific ValueError if the string is empty
Christian Heimes90540002008-05-08 14:29:10 +0000147 nextchar = s[end:end + 1]
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000148 # Normally we expect nextchar == '"'
Christian Heimes90540002008-05-08 14:29:10 +0000149 if nextchar != '"':
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000150 if nextchar in _ws:
151 end = _w(s, end).end()
152 nextchar = s[end:end + 1]
153 # Trivial empty object
154 if nextchar == '}':
Ezio Melottid210aa12011-04-13 07:10:13 +0300155 if object_pairs_hook is not None:
156 result = object_pairs_hook(pairs)
Ezio Melottia7d64a62013-03-13 01:52:34 +0200157 return result, end + 1
Ezio Melottid210aa12011-04-13 07:10:13 +0300158 pairs = {}
159 if object_hook is not None:
160 pairs = object_hook(pairs)
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000161 return pairs, end + 1
162 elif nextchar != '"':
Serhiy Storchaka47efb4a2015-01-26 13:16:30 +0200163 raise JSONDecodeError(
164 "Expecting property name enclosed in double quotes", s, end)
Christian Heimes90540002008-05-08 14:29:10 +0000165 end += 1
Christian Heimes90540002008-05-08 14:29:10 +0000166 while True:
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000167 key, end = scanstring(s, end, strict)
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000168 key = memo_get(key, key)
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000169 # To skip some function call overhead we optimize the fast paths where
170 # the JSON key separator is ": " or just ":".
Christian Heimes90540002008-05-08 14:29:10 +0000171 if s[end:end + 1] != ':':
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000172 end = _w(s, end).end()
173 if s[end:end + 1] != ':':
Serhiy Storchaka47efb4a2015-01-26 13:16:30 +0200174 raise JSONDecodeError("Expecting ':' delimiter", s, end)
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000175 end += 1
176
Christian Heimes90540002008-05-08 14:29:10 +0000177 try:
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000178 if s[end] in _ws:
179 end += 1
180 if s[end] in _ws:
181 end = _w(s, end + 1).end()
182 except IndexError:
183 pass
184
185 try:
186 value, end = scan_once(s, end)
Ezio Melotti37623ab2013-01-03 08:44:15 +0200187 except StopIteration as err:
Serhiy Storchaka47efb4a2015-01-26 13:16:30 +0200188 raise JSONDecodeError("Expecting value", s, err.value) from None
Raymond Hettinger0ad98d82009-04-21 03:09:17 +0000189 pairs_append((key, value))
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000190 try:
191 nextchar = s[end]
192 if nextchar in _ws:
193 end = _w(s, end + 1).end()
194 nextchar = s[end]
195 except IndexError:
196 nextchar = ''
Christian Heimes90540002008-05-08 14:29:10 +0000197 end += 1
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000198
Christian Heimes90540002008-05-08 14:29:10 +0000199 if nextchar == '}':
200 break
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000201 elif nextchar != ',':
Serhiy Storchaka47efb4a2015-01-26 13:16:30 +0200202 raise JSONDecodeError("Expecting ',' delimiter", s, end - 1)
Christian Heimes90540002008-05-08 14:29:10 +0000203 end = _w(s, end).end()
204 nextchar = s[end:end + 1]
205 end += 1
206 if nextchar != '"':
Serhiy Storchaka47efb4a2015-01-26 13:16:30 +0200207 raise JSONDecodeError(
208 "Expecting property name enclosed in double quotes", s, end - 1)
Raymond Hettinger0ad98d82009-04-21 03:09:17 +0000209 if object_pairs_hook is not None:
210 result = object_pairs_hook(pairs)
211 return result, end
212 pairs = dict(pairs)
Christian Heimes90540002008-05-08 14:29:10 +0000213 if object_hook is not None:
214 pairs = object_hook(pairs)
215 return pairs, end
Christian Heimes90540002008-05-08 14:29:10 +0000216
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000217def JSONArray(s_and_end, scan_once, _w=WHITESPACE.match, _ws=WHITESPACE_STR):
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000218 s, end = s_and_end
Christian Heimes90540002008-05-08 14:29:10 +0000219 values = []
Christian Heimes90540002008-05-08 14:29:10 +0000220 nextchar = s[end:end + 1]
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000221 if nextchar in _ws:
222 end = _w(s, end + 1).end()
223 nextchar = s[end:end + 1]
224 # Look-ahead for trivial empty array
Christian Heimes90540002008-05-08 14:29:10 +0000225 if nextchar == ']':
226 return values, end + 1
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000227 _append = values.append
Christian Heimes90540002008-05-08 14:29:10 +0000228 while True:
229 try:
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000230 value, end = scan_once(s, end)
Ezio Melotti37623ab2013-01-03 08:44:15 +0200231 except StopIteration as err:
Serhiy Storchaka47efb4a2015-01-26 13:16:30 +0200232 raise JSONDecodeError("Expecting value", s, err.value) from None
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000233 _append(value)
Christian Heimes90540002008-05-08 14:29:10 +0000234 nextchar = s[end:end + 1]
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000235 if nextchar in _ws:
236 end = _w(s, end + 1).end()
237 nextchar = s[end:end + 1]
Christian Heimes90540002008-05-08 14:29:10 +0000238 end += 1
239 if nextchar == ']':
240 break
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000241 elif nextchar != ',':
Serhiy Storchaka47efb4a2015-01-26 13:16:30 +0200242 raise JSONDecodeError("Expecting ',' delimiter", s, end - 1)
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000243 try:
244 if s[end] in _ws:
245 end += 1
246 if s[end] in _ws:
247 end = _w(s, end + 1).end()
248 except IndexError:
249 pass
250
Christian Heimes90540002008-05-08 14:29:10 +0000251 return values, end
Christian Heimes90540002008-05-08 14:29:10 +0000252
253
254class JSONDecoder(object):
255 """Simple JSON <http://json.org> decoder
256
257 Performs the following translations in decoding by default:
258
259 +---------------+-------------------+
260 | JSON | Python |
261 +===============+===================+
262 | object | dict |
263 +---------------+-------------------+
264 | array | list |
265 +---------------+-------------------+
Georg Brandlc8284cf2010-08-02 20:16:18 +0000266 | string | str |
Christian Heimes90540002008-05-08 14:29:10 +0000267 +---------------+-------------------+
Georg Brandlc8284cf2010-08-02 20:16:18 +0000268 | number (int) | int |
Christian Heimes90540002008-05-08 14:29:10 +0000269 +---------------+-------------------+
270 | number (real) | float |
271 +---------------+-------------------+
272 | true | True |
273 +---------------+-------------------+
274 | false | False |
275 +---------------+-------------------+
276 | null | None |
277 +---------------+-------------------+
278
279 It also understands ``NaN``, ``Infinity``, and ``-Infinity`` as
280 their corresponding ``float`` values, which is outside the JSON spec.
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000281
Christian Heimes90540002008-05-08 14:29:10 +0000282 """
283
Serhiy Storchakaaacd53f2016-06-22 00:03:20 +0300284 def __init__(self, *, object_hook=None, parse_float=None,
Raymond Hettinger0ad98d82009-04-21 03:09:17 +0000285 parse_int=None, parse_constant=None, strict=True,
286 object_pairs_hook=None):
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000287 """``object_hook``, if specified, will be called with the result
288 of every JSON object decoded and its return value will be used in
Christian Heimes90540002008-05-08 14:29:10 +0000289 place of the given ``dict``. This can be used to provide custom
290 deserializations (e.g. to support JSON-RPC class hinting).
291
Georg Brandld4460aa2010-10-15 17:03:02 +0000292 ``object_pairs_hook``, if specified will be called with the result of
293 every JSON object decoded with an ordered list of pairs. The return
294 value of ``object_pairs_hook`` will be used instead of the ``dict``.
INADA Naoki629338f2018-04-03 12:39:47 +0900295 This feature can be used to implement custom decoders.
296 If ``object_hook`` is also defined, the ``object_pairs_hook`` takes
Georg Brandld4460aa2010-10-15 17:03:02 +0000297 priority.
298
Christian Heimes90540002008-05-08 14:29:10 +0000299 ``parse_float``, if specified, will be called with the string
300 of every JSON float to be decoded. By default this is equivalent to
301 float(num_str). This can be used to use another datatype or parser
302 for JSON floats (e.g. decimal.Decimal).
303
304 ``parse_int``, if specified, will be called with the string
305 of every JSON int to be decoded. By default this is equivalent to
306 int(num_str). This can be used to use another datatype or parser
307 for JSON integers (e.g. float).
308
309 ``parse_constant``, if specified, will be called with one of the
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000310 following strings: -Infinity, Infinity, NaN.
Christian Heimes90540002008-05-08 14:29:10 +0000311 This can be used to raise an exception if invalid JSON numbers
312 are encountered.
313
Georg Brandld4460aa2010-10-15 17:03:02 +0000314 If ``strict`` is false (true is the default), then control
315 characters will be allowed inside strings. Control characters in
316 this context are those with character codes in the 0-31 range,
317 including ``'\\t'`` (tab), ``'\\n'``, ``'\\r'`` and ``'\\0'``.
Christian Heimes90540002008-05-08 14:29:10 +0000318 """
Christian Heimes90540002008-05-08 14:29:10 +0000319 self.object_hook = object_hook
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000320 self.parse_float = parse_float or float
321 self.parse_int = parse_int or int
322 self.parse_constant = parse_constant or _CONSTANTS.__getitem__
Christian Heimes90540002008-05-08 14:29:10 +0000323 self.strict = strict
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000324 self.object_pairs_hook = object_pairs_hook
325 self.parse_object = JSONObject
326 self.parse_array = JSONArray
327 self.parse_string = scanstring
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000328 self.memo = {}
Ezio Melotti6b60fb92011-05-14 06:47:51 +0300329 self.scan_once = scanner.make_scanner(self)
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000330
Christian Heimes90540002008-05-08 14:29:10 +0000331
332 def decode(self, s, _w=WHITESPACE.match):
Georg Brandlc8284cf2010-08-02 20:16:18 +0000333 """Return the Python representation of ``s`` (a ``str`` instance
334 containing a JSON document).
Christian Heimes90540002008-05-08 14:29:10 +0000335
336 """
337 obj, end = self.raw_decode(s, idx=_w(s, 0).end())
338 end = _w(s, end).end()
339 if end != len(s):
Serhiy Storchaka47efb4a2015-01-26 13:16:30 +0200340 raise JSONDecodeError("Extra data", s, end)
Christian Heimes90540002008-05-08 14:29:10 +0000341 return obj
342
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000343 def raw_decode(self, s, idx=0):
Georg Brandlc8284cf2010-08-02 20:16:18 +0000344 """Decode a JSON document from ``s`` (a ``str`` beginning with
345 a JSON document) and return a 2-tuple of the Python
Christian Heimes90540002008-05-08 14:29:10 +0000346 representation and the index in ``s`` where the document ended.
347
348 This can be used to decode a JSON document from a string that may
349 have extraneous data at the end.
350
351 """
Christian Heimes90540002008-05-08 14:29:10 +0000352 try:
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000353 obj, end = self.scan_once(s, idx)
Ezio Melotti37623ab2013-01-03 08:44:15 +0200354 except StopIteration as err:
Serhiy Storchaka47efb4a2015-01-26 13:16:30 +0200355 raise JSONDecodeError("Expecting value", s, err.value) from None
Christian Heimes90540002008-05-08 14:29:10 +0000356 return obj, end