blob: da7ef9c81951349030a44021085f2675bc4955b5 [file] [log] [blame]
Christian Heimes90540002008-05-08 14:29:10 +00001"""Implementation of JSONDecoder
2"""
Christian Heimes90540002008-05-08 14:29:10 +00003import re
Christian Heimes90540002008-05-08 14:29:10 +00004
Ezio Melotti6b60fb92011-05-14 06:47:51 +03005from json import scanner
Christian Heimes90540002008-05-08 14:29:10 +00006try:
7 from _json import scanstring as c_scanstring
8except ImportError:
9 c_scanstring = None
10
11__all__ = ['JSONDecoder']
12
13FLAGS = re.VERBOSE | re.MULTILINE | re.DOTALL
14
Victor Stinnerd7fed372012-11-29 00:12:40 +010015NaN = float('nan')
16PosInf = float('inf')
17NegInf = float('-inf')
Christian Heimes90540002008-05-08 14:29:10 +000018
19
20def linecol(doc, pos):
Benjamin Petersona13d4752008-10-16 21:17:24 +000021 if isinstance(doc, bytes):
22 newline = b'\n'
23 else:
24 newline = '\n'
25 lineno = doc.count(newline, 0, pos) + 1
Christian Heimes90540002008-05-08 14:29:10 +000026 if lineno == 1:
Serhiy Storchakac510a042013-02-21 20:19:16 +020027 colno = pos + 1
Christian Heimes90540002008-05-08 14:29:10 +000028 else:
Benjamin Petersona13d4752008-10-16 21:17:24 +000029 colno = pos - doc.rindex(newline, 0, pos)
Christian Heimes90540002008-05-08 14:29:10 +000030 return lineno, colno
31
32
33def errmsg(msg, doc, pos, end=None):
Benjamin Petersonc6b607d2009-05-02 12:36:44 +000034 # Note that this function is called from _json
Christian Heimes90540002008-05-08 14:29:10 +000035 lineno, colno = linecol(doc, pos)
36 if end is None:
37 fmt = '{0}: line {1} column {2} (char {3})'
38 return fmt.format(msg, lineno, colno, pos)
Benjamin Petersonc6b607d2009-05-02 12:36:44 +000039 #fmt = '%s: line %d column %d (char %d)'
40 #return fmt % (msg, lineno, colno, pos)
Christian Heimes90540002008-05-08 14:29:10 +000041 endlineno, endcolno = linecol(doc, end)
42 fmt = '{0}: line {1} column {2} - line {3} column {4} (char {5} - {6})'
43 return fmt.format(msg, lineno, colno, endlineno, endcolno, pos, end)
Benjamin Petersonc6b607d2009-05-02 12:36:44 +000044 #fmt = '%s: line %d column %d - line %d column %d (char %d - %d)'
45 #return fmt % (msg, lineno, colno, endlineno, endcolno, pos, end)
Christian Heimes90540002008-05-08 14:29:10 +000046
47
48_CONSTANTS = {
49 '-Infinity': NegInf,
50 'Infinity': PosInf,
51 'NaN': NaN,
Christian Heimes90540002008-05-08 14:29:10 +000052}
53
54
Christian Heimes90540002008-05-08 14:29:10 +000055STRINGCHUNK = re.compile(r'(.*?)(["\\\x00-\x1f])', FLAGS)
56BACKSLASH = {
57 '"': '"', '\\': '\\', '/': '/',
58 'b': '\b', 'f': '\f', 'n': '\n', 'r': '\r', 't': '\t',
59}
60
Benjamin Petersonc6b607d2009-05-02 12:36:44 +000061def py_scanstring(s, end, strict=True,
62 _b=BACKSLASH, _m=STRINGCHUNK.match):
63 """Scan the string s for a JSON string. End is the index of the
64 character in s after the quote that started the JSON string.
65 Unescapes all valid JSON string escape sequences and raises ValueError
66 on attempt to decode an invalid string. If strict is False then literal
67 control characters are allowed in the string.
Christian Heimes90540002008-05-08 14:29:10 +000068
Benjamin Petersonc6b607d2009-05-02 12:36:44 +000069 Returns a tuple of the decoded string and the index of the character in s
70 after the end quote."""
Christian Heimes90540002008-05-08 14:29:10 +000071 chunks = []
72 _append = chunks.append
73 begin = end - 1
74 while 1:
75 chunk = _m(s, end)
76 if chunk is None:
77 raise ValueError(
78 errmsg("Unterminated string starting at", s, begin))
79 end = chunk.end()
80 content, terminator = chunk.groups()
Benjamin Petersonc6b607d2009-05-02 12:36:44 +000081 # Content is contains zero or more unescaped string characters
Christian Heimes90540002008-05-08 14:29:10 +000082 if content:
Christian Heimes90540002008-05-08 14:29:10 +000083 _append(content)
Benjamin Petersonc6b607d2009-05-02 12:36:44 +000084 # Terminator is the end of string, a literal control character,
85 # or a backslash denoting that an escape sequence follows
Christian Heimes90540002008-05-08 14:29:10 +000086 if terminator == '"':
87 break
88 elif terminator != '\\':
89 if strict:
Benjamin Petersonc6b607d2009-05-02 12:36:44 +000090 #msg = "Invalid control character %r at" % (terminator,)
Christian Heimes90540002008-05-08 14:29:10 +000091 msg = "Invalid control character {0!r} at".format(terminator)
92 raise ValueError(errmsg(msg, s, end))
93 else:
94 _append(terminator)
95 continue
96 try:
97 esc = s[end]
98 except IndexError:
99 raise ValueError(
100 errmsg("Unterminated string starting at", s, begin))
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000101 # If not a unicode escape sequence, must be in the lookup table
Christian Heimes90540002008-05-08 14:29:10 +0000102 if esc != 'u':
103 try:
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000104 char = _b[esc]
Christian Heimes90540002008-05-08 14:29:10 +0000105 except KeyError:
106 msg = "Invalid \\escape: {0!r}".format(esc)
107 raise ValueError(errmsg(msg, s, end))
108 end += 1
109 else:
110 esc = s[end + 1:end + 5]
111 next_end = end + 5
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000112 if len(esc) != 4:
113 msg = "Invalid \\uXXXX escape"
Christian Heimes90540002008-05-08 14:29:10 +0000114 raise ValueError(errmsg(msg, s, end))
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000115 uni = int(esc, 16)
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200116 if 0xd800 <= uni <= 0xdbff:
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000117 msg = "Invalid \\uXXXX\\uXXXX surrogate pair"
118 if not s[end + 5:end + 7] == '\\u':
119 raise ValueError(errmsg(msg, s, end))
120 esc2 = s[end + 7:end + 11]
121 if len(esc2) != 4:
122 raise ValueError(errmsg(msg, s, end))
123 uni2 = int(esc2, 16)
124 uni = 0x10000 + (((uni - 0xd800) << 10) | (uni2 - 0xdc00))
125 next_end += 6
126 char = chr(uni)
127
Christian Heimes90540002008-05-08 14:29:10 +0000128 end = next_end
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000129 _append(char)
Christian Heimes90540002008-05-08 14:29:10 +0000130 return ''.join(chunks), end
131
132
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000133# Use speedup if available
134scanstring = c_scanstring or py_scanstring
Christian Heimes90540002008-05-08 14:29:10 +0000135
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000136WHITESPACE = re.compile(r'[ \t\n\r]*', FLAGS)
137WHITESPACE_STR = ' \t\n\r'
Christian Heimes90540002008-05-08 14:29:10 +0000138
139
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000140def JSONObject(s_and_end, strict, scan_once, object_hook, object_pairs_hook,
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000141 memo=None, _w=WHITESPACE.match, _ws=WHITESPACE_STR):
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000142 s, end = s_and_end
Raymond Hettinger0ad98d82009-04-21 03:09:17 +0000143 pairs = []
144 pairs_append = pairs.append
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000145 # Backwards compatibility
146 if memo is None:
147 memo = {}
148 memo_get = memo.setdefault
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000149 # Use a slice to prevent IndexError from being raised, the following
150 # check will raise a more specific ValueError if the string is empty
Christian Heimes90540002008-05-08 14:29:10 +0000151 nextchar = s[end:end + 1]
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000152 # Normally we expect nextchar == '"'
Christian Heimes90540002008-05-08 14:29:10 +0000153 if nextchar != '"':
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000154 if nextchar in _ws:
155 end = _w(s, end).end()
156 nextchar = s[end:end + 1]
157 # Trivial empty object
158 if nextchar == '}':
Ezio Melottid210aa12011-04-13 07:10:13 +0300159 if object_pairs_hook is not None:
160 result = object_pairs_hook(pairs)
Ezio Melottia7d64a62013-03-13 01:52:34 +0200161 return result, end + 1
Ezio Melottid210aa12011-04-13 07:10:13 +0300162 pairs = {}
163 if object_hook is not None:
164 pairs = object_hook(pairs)
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000165 return pairs, end + 1
166 elif nextchar != '"':
Antoine Pitrou2d24e942012-06-29 01:58:26 +0200167 raise ValueError(errmsg(
168 "Expecting property name enclosed in double quotes", s, end))
Christian Heimes90540002008-05-08 14:29:10 +0000169 end += 1
Christian Heimes90540002008-05-08 14:29:10 +0000170 while True:
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000171 key, end = scanstring(s, end, strict)
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000172 key = memo_get(key, key)
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000173 # To skip some function call overhead we optimize the fast paths where
174 # the JSON key separator is ": " or just ":".
Christian Heimes90540002008-05-08 14:29:10 +0000175 if s[end:end + 1] != ':':
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000176 end = _w(s, end).end()
177 if s[end:end + 1] != ':':
Antoine Pitrou2d24e942012-06-29 01:58:26 +0200178 raise ValueError(errmsg("Expecting ':' delimiter", s, end))
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000179 end += 1
180
Christian Heimes90540002008-05-08 14:29:10 +0000181 try:
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000182 if s[end] in _ws:
183 end += 1
184 if s[end] in _ws:
185 end = _w(s, end + 1).end()
186 except IndexError:
187 pass
188
189 try:
190 value, end = scan_once(s, end)
Ezio Melotti37623ab2013-01-03 08:44:15 +0200191 except StopIteration as err:
192 raise ValueError(errmsg("Expecting value", s, err.value)) from None
Raymond Hettinger0ad98d82009-04-21 03:09:17 +0000193 pairs_append((key, value))
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000194 try:
195 nextchar = s[end]
196 if nextchar in _ws:
197 end = _w(s, end + 1).end()
198 nextchar = s[end]
199 except IndexError:
200 nextchar = ''
Christian Heimes90540002008-05-08 14:29:10 +0000201 end += 1
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000202
Christian Heimes90540002008-05-08 14:29:10 +0000203 if nextchar == '}':
204 break
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000205 elif nextchar != ',':
Antoine Pitrou2d24e942012-06-29 01:58:26 +0200206 raise ValueError(errmsg("Expecting ',' delimiter", s, end - 1))
Christian Heimes90540002008-05-08 14:29:10 +0000207 end = _w(s, end).end()
208 nextchar = s[end:end + 1]
209 end += 1
210 if nextchar != '"':
Antoine Pitrou2d24e942012-06-29 01:58:26 +0200211 raise ValueError(errmsg(
212 "Expecting property name enclosed in double quotes", s, end - 1))
Raymond Hettinger0ad98d82009-04-21 03:09:17 +0000213 if object_pairs_hook is not None:
214 result = object_pairs_hook(pairs)
215 return result, end
216 pairs = dict(pairs)
Christian Heimes90540002008-05-08 14:29:10 +0000217 if object_hook is not None:
218 pairs = object_hook(pairs)
219 return pairs, end
Christian Heimes90540002008-05-08 14:29:10 +0000220
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000221def JSONArray(s_and_end, scan_once, _w=WHITESPACE.match, _ws=WHITESPACE_STR):
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000222 s, end = s_and_end
Christian Heimes90540002008-05-08 14:29:10 +0000223 values = []
Christian Heimes90540002008-05-08 14:29:10 +0000224 nextchar = s[end:end + 1]
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000225 if nextchar in _ws:
226 end = _w(s, end + 1).end()
227 nextchar = s[end:end + 1]
228 # Look-ahead for trivial empty array
Christian Heimes90540002008-05-08 14:29:10 +0000229 if nextchar == ']':
230 return values, end + 1
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000231 _append = values.append
Christian Heimes90540002008-05-08 14:29:10 +0000232 while True:
233 try:
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000234 value, end = scan_once(s, end)
Ezio Melotti37623ab2013-01-03 08:44:15 +0200235 except StopIteration as err:
236 raise ValueError(errmsg("Expecting value", s, err.value)) from None
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000237 _append(value)
Christian Heimes90540002008-05-08 14:29:10 +0000238 nextchar = s[end:end + 1]
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000239 if nextchar in _ws:
240 end = _w(s, end + 1).end()
241 nextchar = s[end:end + 1]
Christian Heimes90540002008-05-08 14:29:10 +0000242 end += 1
243 if nextchar == ']':
244 break
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000245 elif nextchar != ',':
Ezio Melotti37623ab2013-01-03 08:44:15 +0200246 raise ValueError(errmsg("Expecting ',' delimiter", s, end - 1))
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000247 try:
248 if s[end] in _ws:
249 end += 1
250 if s[end] in _ws:
251 end = _w(s, end + 1).end()
252 except IndexError:
253 pass
254
Christian Heimes90540002008-05-08 14:29:10 +0000255 return values, end
Christian Heimes90540002008-05-08 14:29:10 +0000256
257
258class JSONDecoder(object):
259 """Simple JSON <http://json.org> decoder
260
261 Performs the following translations in decoding by default:
262
263 +---------------+-------------------+
264 | JSON | Python |
265 +===============+===================+
266 | object | dict |
267 +---------------+-------------------+
268 | array | list |
269 +---------------+-------------------+
Georg Brandlc8284cf2010-08-02 20:16:18 +0000270 | string | str |
Christian Heimes90540002008-05-08 14:29:10 +0000271 +---------------+-------------------+
Georg Brandlc8284cf2010-08-02 20:16:18 +0000272 | number (int) | int |
Christian Heimes90540002008-05-08 14:29:10 +0000273 +---------------+-------------------+
274 | number (real) | float |
275 +---------------+-------------------+
276 | true | True |
277 +---------------+-------------------+
278 | false | False |
279 +---------------+-------------------+
280 | null | None |
281 +---------------+-------------------+
282
283 It also understands ``NaN``, ``Infinity``, and ``-Infinity`` as
284 their corresponding ``float`` values, which is outside the JSON spec.
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000285
Christian Heimes90540002008-05-08 14:29:10 +0000286 """
287
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000288 def __init__(self, object_hook=None, parse_float=None,
Raymond Hettinger0ad98d82009-04-21 03:09:17 +0000289 parse_int=None, parse_constant=None, strict=True,
290 object_pairs_hook=None):
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000291 """``object_hook``, if specified, will be called with the result
292 of every JSON object decoded and its return value will be used in
Christian Heimes90540002008-05-08 14:29:10 +0000293 place of the given ``dict``. This can be used to provide custom
294 deserializations (e.g. to support JSON-RPC class hinting).
295
Georg Brandld4460aa2010-10-15 17:03:02 +0000296 ``object_pairs_hook``, if specified will be called with the result of
297 every JSON object decoded with an ordered list of pairs. The return
298 value of ``object_pairs_hook`` will be used instead of the ``dict``.
299 This feature can be used to implement custom decoders that rely on the
300 order that the key and value pairs are decoded (for example,
301 collections.OrderedDict will remember the order of insertion). If
302 ``object_hook`` is also defined, the ``object_pairs_hook`` takes
303 priority.
304
Christian Heimes90540002008-05-08 14:29:10 +0000305 ``parse_float``, if specified, will be called with the string
306 of every JSON float to be decoded. By default this is equivalent to
307 float(num_str). This can be used to use another datatype or parser
308 for JSON floats (e.g. decimal.Decimal).
309
310 ``parse_int``, if specified, will be called with the string
311 of every JSON int to be decoded. By default this is equivalent to
312 int(num_str). This can be used to use another datatype or parser
313 for JSON integers (e.g. float).
314
315 ``parse_constant``, if specified, will be called with one of the
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000316 following strings: -Infinity, Infinity, NaN.
Christian Heimes90540002008-05-08 14:29:10 +0000317 This can be used to raise an exception if invalid JSON numbers
318 are encountered.
319
Georg Brandld4460aa2010-10-15 17:03:02 +0000320 If ``strict`` is false (true is the default), then control
321 characters will be allowed inside strings. Control characters in
322 this context are those with character codes in the 0-31 range,
323 including ``'\\t'`` (tab), ``'\\n'``, ``'\\r'`` and ``'\\0'``.
324
Christian Heimes90540002008-05-08 14:29:10 +0000325 """
Christian Heimes90540002008-05-08 14:29:10 +0000326 self.object_hook = object_hook
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000327 self.parse_float = parse_float or float
328 self.parse_int = parse_int or int
329 self.parse_constant = parse_constant or _CONSTANTS.__getitem__
Christian Heimes90540002008-05-08 14:29:10 +0000330 self.strict = strict
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000331 self.object_pairs_hook = object_pairs_hook
332 self.parse_object = JSONObject
333 self.parse_array = JSONArray
334 self.parse_string = scanstring
Antoine Pitrou7d6e0762010-09-04 20:16:53 +0000335 self.memo = {}
Ezio Melotti6b60fb92011-05-14 06:47:51 +0300336 self.scan_once = scanner.make_scanner(self)
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000337
Christian Heimes90540002008-05-08 14:29:10 +0000338
339 def decode(self, s, _w=WHITESPACE.match):
Georg Brandlc8284cf2010-08-02 20:16:18 +0000340 """Return the Python representation of ``s`` (a ``str`` instance
341 containing a JSON document).
Christian Heimes90540002008-05-08 14:29:10 +0000342
343 """
344 obj, end = self.raw_decode(s, idx=_w(s, 0).end())
345 end = _w(s, end).end()
346 if end != len(s):
347 raise ValueError(errmsg("Extra data", s, end, len(s)))
348 return obj
349
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000350 def raw_decode(self, s, idx=0):
Georg Brandlc8284cf2010-08-02 20:16:18 +0000351 """Decode a JSON document from ``s`` (a ``str`` beginning with
352 a JSON document) and return a 2-tuple of the Python
Christian Heimes90540002008-05-08 14:29:10 +0000353 representation and the index in ``s`` where the document ended.
354
355 This can be used to decode a JSON document from a string that may
356 have extraneous data at the end.
357
358 """
Christian Heimes90540002008-05-08 14:29:10 +0000359 try:
Benjamin Petersonc6b607d2009-05-02 12:36:44 +0000360 obj, end = self.scan_once(s, idx)
Ezio Melotti37623ab2013-01-03 08:44:15 +0200361 except StopIteration as err:
362 raise ValueError(errmsg("Expecting value", s, err.value)) from None
Christian Heimes90540002008-05-08 14:29:10 +0000363 return obj, end