Bob Ippolito | d914e3f | 2009-03-17 23:19:00 +0000 | [diff] [blame] | 1 | """JSON token scanner |
Brett Cannon | 4b964f9 | 2008-05-05 20:21:38 +0000 | [diff] [blame] | 2 | """ |
Brett Cannon | 4b964f9 | 2008-05-05 20:21:38 +0000 | [diff] [blame] | 3 | import re |
Bob Ippolito | d914e3f | 2009-03-17 23:19:00 +0000 | [diff] [blame] | 4 | try: |
| 5 | from _json import make_scanner as c_make_scanner |
| 6 | except ImportError: |
| 7 | c_make_scanner = None |
Brett Cannon | 4b964f9 | 2008-05-05 20:21:38 +0000 | [diff] [blame] | 8 | |
Bob Ippolito | d914e3f | 2009-03-17 23:19:00 +0000 | [diff] [blame] | 9 | __all__ = ['make_scanner'] |
Brett Cannon | 4b964f9 | 2008-05-05 20:21:38 +0000 | [diff] [blame] | 10 | |
Bob Ippolito | d914e3f | 2009-03-17 23:19:00 +0000 | [diff] [blame] | 11 | NUMBER_RE = re.compile( |
| 12 | r'(-?(?:0|[1-9]\d*))(\.\d+)?([eE][-+]?\d+)?', |
| 13 | (re.VERBOSE | re.MULTILINE | re.DOTALL)) |
Brett Cannon | 4b964f9 | 2008-05-05 20:21:38 +0000 | [diff] [blame] | 14 | |
Bob Ippolito | d914e3f | 2009-03-17 23:19:00 +0000 | [diff] [blame] | 15 | def py_make_scanner(context): |
| 16 | parse_object = context.parse_object |
| 17 | parse_array = context.parse_array |
| 18 | parse_string = context.parse_string |
| 19 | match_number = NUMBER_RE.match |
| 20 | encoding = context.encoding |
| 21 | strict = context.strict |
| 22 | parse_float = context.parse_float |
| 23 | parse_int = context.parse_int |
| 24 | parse_constant = context.parse_constant |
| 25 | object_hook = context.object_hook |
Bob Ippolito | 76a982a | 2009-03-29 22:33:58 +0000 | [diff] [blame] | 26 | object_pairs_hook = context.object_pairs_hook |
Brett Cannon | 4b964f9 | 2008-05-05 20:21:38 +0000 | [diff] [blame] | 27 | |
Bob Ippolito | d914e3f | 2009-03-17 23:19:00 +0000 | [diff] [blame] | 28 | def _scan_once(string, idx): |
| 29 | try: |
| 30 | nextchar = string[idx] |
| 31 | except IndexError: |
| 32 | raise StopIteration |
Brett Cannon | 4b964f9 | 2008-05-05 20:21:38 +0000 | [diff] [blame] | 33 | |
Bob Ippolito | d914e3f | 2009-03-17 23:19:00 +0000 | [diff] [blame] | 34 | if nextchar == '"': |
| 35 | return parse_string(string, idx + 1, encoding, strict) |
| 36 | elif nextchar == '{': |
| 37 | return parse_object((string, idx + 1), encoding, strict, |
Bob Ippolito | 76a982a | 2009-03-29 22:33:58 +0000 | [diff] [blame] | 38 | _scan_once, object_hook, object_pairs_hook) |
Bob Ippolito | d914e3f | 2009-03-17 23:19:00 +0000 | [diff] [blame] | 39 | elif nextchar == '[': |
| 40 | return parse_array((string, idx + 1), _scan_once) |
| 41 | elif nextchar == 'n' and string[idx:idx + 4] == 'null': |
| 42 | return None, idx + 4 |
| 43 | elif nextchar == 't' and string[idx:idx + 4] == 'true': |
| 44 | return True, idx + 4 |
| 45 | elif nextchar == 'f' and string[idx:idx + 5] == 'false': |
| 46 | return False, idx + 5 |
Brett Cannon | 4b964f9 | 2008-05-05 20:21:38 +0000 | [diff] [blame] | 47 | |
Bob Ippolito | d914e3f | 2009-03-17 23:19:00 +0000 | [diff] [blame] | 48 | m = match_number(string, idx) |
| 49 | if m is not None: |
| 50 | integer, frac, exp = m.groups() |
| 51 | if frac or exp: |
| 52 | res = parse_float(integer + (frac or '') + (exp or '')) |
| 53 | else: |
| 54 | res = parse_int(integer) |
| 55 | return res, m.end() |
| 56 | elif nextchar == 'N' and string[idx:idx + 3] == 'NaN': |
| 57 | return parse_constant('NaN'), idx + 3 |
| 58 | elif nextchar == 'I' and string[idx:idx + 8] == 'Infinity': |
| 59 | return parse_constant('Infinity'), idx + 8 |
| 60 | elif nextchar == '-' and string[idx:idx + 9] == '-Infinity': |
| 61 | return parse_constant('-Infinity'), idx + 9 |
| 62 | else: |
| 63 | raise StopIteration |
Brett Cannon | 4b964f9 | 2008-05-05 20:21:38 +0000 | [diff] [blame] | 64 | |
Bob Ippolito | d914e3f | 2009-03-17 23:19:00 +0000 | [diff] [blame] | 65 | return _scan_once |
Brett Cannon | 4b964f9 | 2008-05-05 20:21:38 +0000 | [diff] [blame] | 66 | |
Bob Ippolito | d914e3f | 2009-03-17 23:19:00 +0000 | [diff] [blame] | 67 | make_scanner = c_make_scanner or py_make_scanner |