| #! /usr/bin/env python3 |
| # This script generates token related files from Grammar/Tokens: |
| # |
| # Doc/library/token-list.inc |
| # Include/token.h |
| # Parser/token.c |
| # Lib/token.py |
| |
| |
| NT_OFFSET = 256 |
| |
| def load_tokens(path): |
| tok_names = [] |
| string_to_tok = {} |
| ERRORTOKEN = None |
| with open(path) as fp: |
| for line in fp: |
| line = line.strip() |
| # strip comments |
| i = line.find('#') |
| if i >= 0: |
| line = line[:i].strip() |
| if not line: |
| continue |
| fields = line.split() |
| name = fields[0] |
| value = len(tok_names) |
| if name == 'ERRORTOKEN': |
| ERRORTOKEN = value |
| string = fields[1] if len(fields) > 1 else None |
| if string: |
| string = eval(string) |
| string_to_tok[string] = value |
| tok_names.append(name) |
| return tok_names, ERRORTOKEN, string_to_tok |
| |
| |
| def update_file(file, content): |
| try: |
| with open(file, 'r') as fobj: |
| if fobj.read() == content: |
| return False |
| except (OSError, ValueError): |
| pass |
| with open(file, 'w') as fobj: |
| fobj.write(content) |
| return True |
| |
| |
| token_h_template = """\ |
| /* Auto-generated by Tools/scripts/generate_token.py */ |
| |
| /* Token types */ |
| #ifndef Py_LIMITED_API |
| #ifndef Py_TOKEN_H |
| #define Py_TOKEN_H |
| #ifdef __cplusplus |
| extern "C" { |
| #endif |
| |
| #undef TILDE /* Prevent clash of our definition with system macro. Ex AIX, ioctl.h */ |
| |
| %s\ |
| #define N_TOKENS %d |
| #define NT_OFFSET %d |
| |
| /* Special definitions for cooperation with parser */ |
| |
| #define ISTERMINAL(x) ((x) < NT_OFFSET) |
| #define ISNONTERMINAL(x) ((x) >= NT_OFFSET) |
| #define ISEOF(x) ((x) == ENDMARKER) |
| |
| |
| PyAPI_DATA(const char * const) _PyParser_TokenNames[]; /* Token names */ |
| PyAPI_FUNC(int) PyToken_OneChar(int); |
| PyAPI_FUNC(int) PyToken_TwoChars(int, int); |
| PyAPI_FUNC(int) PyToken_ThreeChars(int, int, int); |
| |
| #ifdef __cplusplus |
| } |
| #endif |
| #endif /* !Py_TOKEN_H */ |
| #endif /* Py_LIMITED_API */ |
| """ |
| |
| def make_h(infile, outfile='Include/token.h'): |
| tok_names, ERRORTOKEN, string_to_tok = load_tokens(infile) |
| |
| defines = [] |
| for value, name in enumerate(tok_names[:ERRORTOKEN + 1]): |
| defines.append("#define %-15s %d\n" % (name, value)) |
| |
| if update_file(outfile, token_h_template % ( |
| ''.join(defines), |
| len(tok_names), |
| NT_OFFSET |
| )): |
| print("%s regenerated from %s" % (outfile, infile)) |
| |
| |
| token_c_template = """\ |
| /* Auto-generated by Tools/scripts/generate_token.py */ |
| |
| #include "Python.h" |
| #include "token.h" |
| |
| /* Token names */ |
| |
| const char * const _PyParser_TokenNames[] = { |
| %s\ |
| }; |
| |
| /* Return the token corresponding to a single character */ |
| |
| int |
| PyToken_OneChar(int c1) |
| { |
| %s\ |
| return OP; |
| } |
| |
| int |
| PyToken_TwoChars(int c1, int c2) |
| { |
| %s\ |
| return OP; |
| } |
| |
| int |
| PyToken_ThreeChars(int c1, int c2, int c3) |
| { |
| %s\ |
| return OP; |
| } |
| """ |
| |
| def generate_chars_to_token(mapping, n=1): |
| result = [] |
| write = result.append |
| indent = ' ' * n |
| write(indent) |
| write('switch (c%d) {\n' % (n,)) |
| for c in sorted(mapping): |
| write(indent) |
| value = mapping[c] |
| if isinstance(value, dict): |
| write("case '%s':\n" % (c,)) |
| write(generate_chars_to_token(value, n + 1)) |
| write(indent) |
| write(' break;\n') |
| else: |
| write("case '%s': return %s;\n" % (c, value)) |
| write(indent) |
| write('}\n') |
| return ''.join(result) |
| |
| def make_c(infile, outfile='Parser/token.c'): |
| tok_names, ERRORTOKEN, string_to_tok = load_tokens(infile) |
| string_to_tok['<>'] = string_to_tok['!='] |
| chars_to_token = {} |
| for string, value in string_to_tok.items(): |
| assert 1 <= len(string) <= 3 |
| name = tok_names[value] |
| m = chars_to_token.setdefault(len(string), {}) |
| for c in string[:-1]: |
| m = m.setdefault(c, {}) |
| m[string[-1]] = name |
| |
| names = [] |
| for value, name in enumerate(tok_names): |
| if value >= ERRORTOKEN: |
| name = '<%s>' % name |
| names.append(' "%s",\n' % name) |
| names.append(' "<N_TOKENS>",\n') |
| |
| if update_file(outfile, token_c_template % ( |
| ''.join(names), |
| generate_chars_to_token(chars_to_token[1]), |
| generate_chars_to_token(chars_to_token[2]), |
| generate_chars_to_token(chars_to_token[3]) |
| )): |
| print("%s regenerated from %s" % (outfile, infile)) |
| |
| |
| token_inc_template = """\ |
| .. Auto-generated by Tools/scripts/generate_token.py |
| %s |
| .. data:: N_TOKENS |
| |
| .. data:: NT_OFFSET |
| """ |
| |
| def make_rst(infile, outfile='Doc/library/token-list.inc'): |
| tok_names, ERRORTOKEN, string_to_tok = load_tokens(infile) |
| tok_to_string = {value: s for s, value in string_to_tok.items()} |
| |
| names = [] |
| for value, name in enumerate(tok_names[:ERRORTOKEN + 1]): |
| names.append('.. data:: %s' % (name,)) |
| if value in tok_to_string: |
| names.append('') |
| names.append(' Token value for ``"%s"``.' % tok_to_string[value]) |
| names.append('') |
| |
| if update_file(outfile, token_inc_template % '\n'.join(names)): |
| print("%s regenerated from %s" % (outfile, infile)) |
| |
| |
| token_py_template = '''\ |
| """Token constants.""" |
| # Auto-generated by Tools/scripts/generate_token.py |
| |
| __all__ = ['tok_name', 'ISTERMINAL', 'ISNONTERMINAL', 'ISEOF'] |
| |
| %s |
| N_TOKENS = %d |
| # Special definitions for cooperation with parser |
| NT_OFFSET = %d |
| |
| tok_name = {value: name |
| for name, value in globals().items() |
| if isinstance(value, int) and not name.startswith('_')} |
| __all__.extend(tok_name.values()) |
| |
| EXACT_TOKEN_TYPES = { |
| %s |
| } |
| |
| def ISTERMINAL(x): |
| return x < NT_OFFSET |
| |
| def ISNONTERMINAL(x): |
| return x >= NT_OFFSET |
| |
| def ISEOF(x): |
| return x == ENDMARKER |
| ''' |
| |
| def make_py(infile, outfile='Lib/token.py'): |
| tok_names, ERRORTOKEN, string_to_tok = load_tokens(infile) |
| |
| constants = [] |
| for value, name in enumerate(tok_names): |
| constants.append('%s = %d' % (name, value)) |
| constants.insert(ERRORTOKEN, |
| "# These aren't used by the C tokenizer but are needed for tokenize.py") |
| |
| token_types = [] |
| for s, value in sorted(string_to_tok.items()): |
| token_types.append(' %r: %s,' % (s, tok_names[value])) |
| |
| if update_file(outfile, token_py_template % ( |
| '\n'.join(constants), |
| len(tok_names), |
| NT_OFFSET, |
| '\n'.join(token_types), |
| )): |
| print("%s regenerated from %s" % (outfile, infile)) |
| |
| |
| def main(op, infile='Grammar/Tokens', *args): |
| make = globals()['make_' + op] |
| make(infile, *args) |
| |
| |
| if __name__ == '__main__': |
| import sys |
| main(*sys.argv[1:]) |