| """Implementation of JSONDecoder |
| """ |
| |
| import re |
| import sys |
| |
| from json.scanner import Scanner, pattern |
| try: |
| from _json import scanstring as c_scanstring |
| except ImportError: |
| c_scanstring = None |
| |
| __all__ = ['JSONDecoder'] |
| |
| FLAGS = re.VERBOSE | re.MULTILINE | re.DOTALL |
| |
| NaN, PosInf, NegInf = float('nan'), float('inf'), float('-inf') |
| |
| |
| def linecol(doc, pos): |
| if isinstance(doc, bytes): |
| newline = b'\n' |
| else: |
| newline = '\n' |
| lineno = doc.count(newline, 0, pos) + 1 |
| if lineno == 1: |
| colno = pos |
| else: |
| colno = pos - doc.rindex(newline, 0, pos) |
| return lineno, colno |
| |
| |
| def errmsg(msg, doc, pos, end=None): |
| lineno, colno = linecol(doc, pos) |
| if end is None: |
| fmt = '{0}: line {1} column {2} (char {3})' |
| return fmt.format(msg, lineno, colno, pos) |
| endlineno, endcolno = linecol(doc, end) |
| fmt = '{0}: line {1} column {2} - line {3} column {4} (char {5} - {6})' |
| return fmt.format(msg, lineno, colno, endlineno, endcolno, pos, end) |
| |
| |
| _CONSTANTS = { |
| '-Infinity': NegInf, |
| 'Infinity': PosInf, |
| 'NaN': NaN, |
| 'true': True, |
| 'false': False, |
| 'null': None, |
| } |
| |
| |
| def JSONConstant(match, context, c=_CONSTANTS): |
| s = match.group(0) |
| fn = getattr(context, 'parse_constant', None) |
| if fn is None: |
| rval = c[s] |
| else: |
| rval = fn(s) |
| return rval, None |
| pattern('(-?Infinity|NaN|true|false|null)')(JSONConstant) |
| |
| |
| def JSONNumber(match, context): |
| match = JSONNumber.regex.match(match.string, *match.span()) |
| integer, frac, exp = match.groups() |
| if frac or exp: |
| fn = getattr(context, 'parse_float', None) or float |
| res = fn(integer + (frac or '') + (exp or '')) |
| else: |
| fn = getattr(context, 'parse_int', None) or int |
| res = fn(integer) |
| return res, None |
| pattern(r'(-?(?:0|[1-9][0-9]*))(\.[0-9]+)?([eE][-+]?[0-9]+)?')(JSONNumber) |
| |
| |
| STRINGCHUNK = re.compile(r'(.*?)(["\\\x00-\x1f])', FLAGS) |
| BACKSLASH = { |
| '"': '"', '\\': '\\', '/': '/', |
| 'b': '\b', 'f': '\f', 'n': '\n', 'r': '\r', 't': '\t', |
| } |
| |
| DEFAULT_ENCODING = "utf-8" |
| |
| |
| def py_scanstring(s, end, encoding=None, strict=True, _b=BACKSLASH, _m=STRINGCHUNK.match): |
| if encoding is None: |
| encoding = DEFAULT_ENCODING |
| chunks = [] |
| _append = chunks.append |
| begin = end - 1 |
| while 1: |
| chunk = _m(s, end) |
| if chunk is None: |
| raise ValueError( |
| errmsg("Unterminated string starting at", s, begin)) |
| end = chunk.end() |
| content, terminator = chunk.groups() |
| if content: |
| if not isinstance(content, str): |
| content = str(content, encoding) |
| _append(content) |
| if terminator == '"': |
| break |
| elif terminator != '\\': |
| if strict: |
| msg = "Invalid control character {0!r} at".format(terminator) |
| raise ValueError(errmsg(msg, s, end)) |
| else: |
| _append(terminator) |
| continue |
| try: |
| esc = s[end] |
| except IndexError: |
| raise ValueError( |
| errmsg("Unterminated string starting at", s, begin)) |
| if esc != 'u': |
| try: |
| m = _b[esc] |
| except KeyError: |
| msg = "Invalid \\escape: {0!r}".format(esc) |
| raise ValueError(errmsg(msg, s, end)) |
| end += 1 |
| else: |
| esc = s[end + 1:end + 5] |
| next_end = end + 5 |
| msg = "Invalid \\uXXXX escape" |
| try: |
| if len(esc) != 4: |
| raise ValueError |
| uni = int(esc, 16) |
| if 0xd800 <= uni <= 0xdbff and sys.maxunicode > 65535: |
| msg = "Invalid \\uXXXX\\uXXXX surrogate pair" |
| if not s[end + 5:end + 7] == '\\u': |
| raise ValueError |
| esc2 = s[end + 7:end + 11] |
| if len(esc2) != 4: |
| raise ValueError |
| uni2 = int(esc2, 16) |
| uni = 0x10000 + (((uni - 0xd800) << 10) | (uni2 - 0xdc00)) |
| next_end += 6 |
| m = chr(uni) |
| except ValueError: |
| raise ValueError(errmsg(msg, s, end)) |
| end = next_end |
| _append(m) |
| return ''.join(chunks), end |
| |
| |
| # Use speedup |
| if c_scanstring is not None: |
| scanstring = c_scanstring |
| else: |
| scanstring = py_scanstring |
| |
| def JSONString(match, context): |
| encoding = getattr(context, 'encoding', None) |
| strict = getattr(context, 'strict', True) |
| return scanstring(match.string, match.end(), encoding, strict) |
| pattern(r'"')(JSONString) |
| |
| |
| WHITESPACE = re.compile(r'\s*', FLAGS) |
| |
| |
| def JSONObject(match, context, _w=WHITESPACE.match): |
| pairs = {} |
| s = match.string |
| end = _w(s, match.end()).end() |
| nextchar = s[end:end + 1] |
| # Trivial empty object |
| if nextchar == '}': |
| return pairs, end + 1 |
| if nextchar != '"': |
| raise ValueError(errmsg("Expecting property name", s, end)) |
| end += 1 |
| encoding = getattr(context, 'encoding', None) |
| strict = getattr(context, 'strict', True) |
| iterscan = JSONScanner.iterscan |
| while True: |
| key, end = scanstring(s, end, encoding, strict) |
| end = _w(s, end).end() |
| if s[end:end + 1] != ':': |
| raise ValueError(errmsg("Expecting : delimiter", s, end)) |
| end = _w(s, end + 1).end() |
| try: |
| value, end = next(iterscan(s, idx=end, context=context)) |
| except StopIteration: |
| raise ValueError(errmsg("Expecting object", s, end)) |
| pairs[key] = value |
| end = _w(s, end).end() |
| nextchar = s[end:end + 1] |
| end += 1 |
| if nextchar == '}': |
| break |
| if nextchar != ',': |
| raise ValueError(errmsg("Expecting , delimiter", s, end - 1)) |
| end = _w(s, end).end() |
| nextchar = s[end:end + 1] |
| end += 1 |
| if nextchar != '"': |
| raise ValueError(errmsg("Expecting property name", s, end - 1)) |
| object_hook = getattr(context, 'object_hook', None) |
| if object_hook is not None: |
| pairs = object_hook(pairs) |
| return pairs, end |
| pattern(r'{')(JSONObject) |
| |
| |
| def JSONArray(match, context, _w=WHITESPACE.match): |
| values = [] |
| s = match.string |
| end = _w(s, match.end()).end() |
| # Look-ahead for trivial empty array |
| nextchar = s[end:end + 1] |
| if nextchar == ']': |
| return values, end + 1 |
| iterscan = JSONScanner.iterscan |
| while True: |
| try: |
| value, end = next(iterscan(s, idx=end, context=context)) |
| except StopIteration: |
| raise ValueError(errmsg("Expecting object", s, end)) |
| values.append(value) |
| end = _w(s, end).end() |
| nextchar = s[end:end + 1] |
| end += 1 |
| if nextchar == ']': |
| break |
| if nextchar != ',': |
| raise ValueError(errmsg("Expecting , delimiter", s, end)) |
| end = _w(s, end).end() |
| return values, end |
| pattern(r'\[')(JSONArray) |
| |
| |
| ANYTHING = [ |
| JSONObject, |
| JSONArray, |
| JSONString, |
| JSONConstant, |
| JSONNumber, |
| ] |
| |
| JSONScanner = Scanner(ANYTHING) |
| |
| |
| class JSONDecoder(object): |
| """Simple JSON <http://json.org> decoder |
| |
| Performs the following translations in decoding by default: |
| |
| +---------------+-------------------+ |
| | JSON | Python | |
| +===============+===================+ |
| | object | dict | |
| +---------------+-------------------+ |
| | array | list | |
| +---------------+-------------------+ |
| | string | unicode | |
| +---------------+-------------------+ |
| | number (int) | int, long | |
| +---------------+-------------------+ |
| | number (real) | float | |
| +---------------+-------------------+ |
| | true | True | |
| +---------------+-------------------+ |
| | false | False | |
| +---------------+-------------------+ |
| | null | None | |
| +---------------+-------------------+ |
| |
| It also understands ``NaN``, ``Infinity``, and ``-Infinity`` as |
| their corresponding ``float`` values, which is outside the JSON spec. |
| """ |
| |
| _scanner = Scanner(ANYTHING) |
| __all__ = ['__init__', 'decode', 'raw_decode'] |
| |
| def __init__(self, encoding=None, object_hook=None, parse_float=None, |
| parse_int=None, parse_constant=None, strict=True): |
| """``encoding`` determines the encoding used to interpret any ``str`` |
| objects decoded by this instance (utf-8 by default). It has no |
| effect when decoding ``unicode`` objects. |
| |
| Note that currently only encodings that are a superset of ASCII work, |
| strings of other encodings should be passed in as ``unicode``. |
| |
| ``object_hook``, if specified, will be called with the result of |
| every JSON object decoded and its return value will be used in |
| place of the given ``dict``. This can be used to provide custom |
| deserializations (e.g. to support JSON-RPC class hinting). |
| |
| ``parse_float``, if specified, will be called with the string |
| of every JSON float to be decoded. By default this is equivalent to |
| float(num_str). This can be used to use another datatype or parser |
| for JSON floats (e.g. decimal.Decimal). |
| |
| ``parse_int``, if specified, will be called with the string |
| of every JSON int to be decoded. By default this is equivalent to |
| int(num_str). This can be used to use another datatype or parser |
| for JSON integers (e.g. float). |
| |
| ``parse_constant``, if specified, will be called with one of the |
| following strings: -Infinity, Infinity, NaN, null, true, false. |
| This can be used to raise an exception if invalid JSON numbers |
| are encountered. |
| |
| """ |
| self.encoding = encoding |
| self.object_hook = object_hook |
| self.parse_float = parse_float |
| self.parse_int = parse_int |
| self.parse_constant = parse_constant |
| self.strict = strict |
| |
| def decode(self, s, _w=WHITESPACE.match): |
| """ |
| Return the Python representation of ``s`` (a ``str`` or ``unicode`` |
| instance containing a JSON document) |
| |
| """ |
| obj, end = self.raw_decode(s, idx=_w(s, 0).end()) |
| end = _w(s, end).end() |
| if end != len(s): |
| raise ValueError(errmsg("Extra data", s, end, len(s))) |
| return obj |
| |
| def raw_decode(self, s, **kw): |
| """Decode a JSON document from ``s`` (a ``str`` or ``unicode`` beginning |
| with a JSON document) and return a 2-tuple of the Python |
| representation and the index in ``s`` where the document ended. |
| |
| This can be used to decode a JSON document from a string that may |
| have extraneous data at the end. |
| |
| """ |
| kw.setdefault('context', self) |
| try: |
| obj, end = next(self._scanner.iterscan(s, **kw)) |
| except StopIteration: |
| raise ValueError("No JSON object could be decoded") |
| return obj, end |