Bob Ippolito | d914e3f | 2009-03-17 23:19:00 +0000 | [diff] [blame] | 1 | """JSON token scanner |
Brett Cannon | 4b964f9 | 2008-05-05 20:21:38 +0000 | [diff] [blame] | 2 | """ |
Brett Cannon | 4b964f9 | 2008-05-05 20:21:38 +0000 | [diff] [blame] | 3 | import re |
Bob Ippolito | d914e3f | 2009-03-17 23:19:00 +0000 | [diff] [blame] | 4 | try: |
| 5 | from _json import make_scanner as c_make_scanner |
| 6 | except ImportError: |
| 7 | c_make_scanner = None |
Brett Cannon | 4b964f9 | 2008-05-05 20:21:38 +0000 | [diff] [blame] | 8 | |
Bob Ippolito | d914e3f | 2009-03-17 23:19:00 +0000 | [diff] [blame] | 9 | __all__ = ['make_scanner'] |
Brett Cannon | 4b964f9 | 2008-05-05 20:21:38 +0000 | [diff] [blame] | 10 | |
Bob Ippolito | d914e3f | 2009-03-17 23:19:00 +0000 | [diff] [blame] | 11 | NUMBER_RE = re.compile( |
| 12 | r'(-?(?:0|[1-9]\d*))(\.\d+)?([eE][-+]?\d+)?', |
| 13 | (re.VERBOSE | re.MULTILINE | re.DOTALL)) |
Brett Cannon | 4b964f9 | 2008-05-05 20:21:38 +0000 | [diff] [blame] | 14 | |
Bob Ippolito | d914e3f | 2009-03-17 23:19:00 +0000 | [diff] [blame] | 15 | def py_make_scanner(context): |
| 16 | parse_object = context.parse_object |
| 17 | parse_array = context.parse_array |
| 18 | parse_string = context.parse_string |
| 19 | match_number = NUMBER_RE.match |
| 20 | encoding = context.encoding |
| 21 | strict = context.strict |
| 22 | parse_float = context.parse_float |
| 23 | parse_int = context.parse_int |
| 24 | parse_constant = context.parse_constant |
| 25 | object_hook = context.object_hook |
Brett Cannon | 4b964f9 | 2008-05-05 20:21:38 +0000 | [diff] [blame] | 26 | |
Bob Ippolito | d914e3f | 2009-03-17 23:19:00 +0000 | [diff] [blame] | 27 | def _scan_once(string, idx): |
| 28 | try: |
| 29 | nextchar = string[idx] |
| 30 | except IndexError: |
| 31 | raise StopIteration |
Brett Cannon | 4b964f9 | 2008-05-05 20:21:38 +0000 | [diff] [blame] | 32 | |
Bob Ippolito | d914e3f | 2009-03-17 23:19:00 +0000 | [diff] [blame] | 33 | if nextchar == '"': |
| 34 | return parse_string(string, idx + 1, encoding, strict) |
| 35 | elif nextchar == '{': |
| 36 | return parse_object((string, idx + 1), encoding, strict, |
| 37 | _scan_once, object_hook) |
| 38 | elif nextchar == '[': |
| 39 | return parse_array((string, idx + 1), _scan_once) |
| 40 | elif nextchar == 'n' and string[idx:idx + 4] == 'null': |
| 41 | return None, idx + 4 |
| 42 | elif nextchar == 't' and string[idx:idx + 4] == 'true': |
| 43 | return True, idx + 4 |
| 44 | elif nextchar == 'f' and string[idx:idx + 5] == 'false': |
| 45 | return False, idx + 5 |
Brett Cannon | 4b964f9 | 2008-05-05 20:21:38 +0000 | [diff] [blame] | 46 | |
Bob Ippolito | d914e3f | 2009-03-17 23:19:00 +0000 | [diff] [blame] | 47 | m = match_number(string, idx) |
| 48 | if m is not None: |
| 49 | integer, frac, exp = m.groups() |
| 50 | if frac or exp: |
| 51 | res = parse_float(integer + (frac or '') + (exp or '')) |
| 52 | else: |
| 53 | res = parse_int(integer) |
| 54 | return res, m.end() |
| 55 | elif nextchar == 'N' and string[idx:idx + 3] == 'NaN': |
| 56 | return parse_constant('NaN'), idx + 3 |
| 57 | elif nextchar == 'I' and string[idx:idx + 8] == 'Infinity': |
| 58 | return parse_constant('Infinity'), idx + 8 |
| 59 | elif nextchar == '-' and string[idx:idx + 9] == '-Infinity': |
| 60 | return parse_constant('-Infinity'), idx + 9 |
| 61 | else: |
| 62 | raise StopIteration |
Brett Cannon | 4b964f9 | 2008-05-05 20:21:38 +0000 | [diff] [blame] | 63 | |
Bob Ippolito | d914e3f | 2009-03-17 23:19:00 +0000 | [diff] [blame] | 64 | return _scan_once |
Brett Cannon | 4b964f9 | 2008-05-05 20:21:38 +0000 | [diff] [blame] | 65 | |
Bob Ippolito | d914e3f | 2009-03-17 23:19:00 +0000 | [diff] [blame] | 66 | make_scanner = c_make_scanner or py_make_scanner |