Serhiy Storchaka | 8ac6581 | 2018-12-22 11:18:40 +0200 | [diff] [blame^] | 1 | #! /usr/bin/env python3 |
| 2 | # This script generates token related files from Grammar/Tokens: |
| 3 | # |
| 4 | # Doc/library/token-list.inc |
| 5 | # Include/token.h |
| 6 | # Parser/token.c |
| 7 | # Lib/token.py |
| 8 | |
| 9 | |
| 10 | NT_OFFSET = 256 |
| 11 | |
| 12 | def load_tokens(path): |
| 13 | tok_names = [] |
| 14 | string_to_tok = {} |
| 15 | ERRORTOKEN = None |
| 16 | with open(path) as fp: |
| 17 | for line in fp: |
| 18 | line = line.strip() |
| 19 | # strip comments |
| 20 | i = line.find('#') |
| 21 | if i >= 0: |
| 22 | line = line[:i].strip() |
| 23 | if not line: |
| 24 | continue |
| 25 | fields = line.split() |
| 26 | name = fields[0] |
| 27 | value = len(tok_names) |
| 28 | if name == 'ERRORTOKEN': |
| 29 | ERRORTOKEN = value |
| 30 | string = fields[1] if len(fields) > 1 else None |
| 31 | if string: |
| 32 | string = eval(string) |
| 33 | string_to_tok[string] = value |
| 34 | tok_names.append(name) |
| 35 | return tok_names, ERRORTOKEN, string_to_tok |
| 36 | |
| 37 | |
| 38 | def update_file(file, content): |
| 39 | try: |
| 40 | with open(file, 'r') as fobj: |
| 41 | if fobj.read() == content: |
| 42 | return False |
| 43 | except (OSError, ValueError): |
| 44 | pass |
| 45 | with open(file, 'w') as fobj: |
| 46 | fobj.write(content) |
| 47 | return True |
| 48 | |
| 49 | |
| 50 | token_h_template = """\ |
| 51 | /* Auto-generated by Tools/scripts/generate_token.py */ |
| 52 | |
| 53 | /* Token types */ |
| 54 | #ifndef Py_LIMITED_API |
| 55 | #ifndef Py_TOKEN_H |
| 56 | #define Py_TOKEN_H |
| 57 | #ifdef __cplusplus |
| 58 | extern "C" { |
| 59 | #endif |
| 60 | |
| 61 | #undef TILDE /* Prevent clash of our definition with system macro. Ex AIX, ioctl.h */ |
| 62 | |
| 63 | %s\ |
| 64 | #define N_TOKENS %d |
| 65 | #define NT_OFFSET %d |
| 66 | |
| 67 | /* Special definitions for cooperation with parser */ |
| 68 | |
| 69 | #define ISTERMINAL(x) ((x) < NT_OFFSET) |
| 70 | #define ISNONTERMINAL(x) ((x) >= NT_OFFSET) |
| 71 | #define ISEOF(x) ((x) == ENDMARKER) |
| 72 | |
| 73 | |
| 74 | PyAPI_DATA(const char * const) _PyParser_TokenNames[]; /* Token names */ |
| 75 | PyAPI_FUNC(int) PyToken_OneChar(int); |
| 76 | PyAPI_FUNC(int) PyToken_TwoChars(int, int); |
| 77 | PyAPI_FUNC(int) PyToken_ThreeChars(int, int, int); |
| 78 | |
| 79 | #ifdef __cplusplus |
| 80 | } |
| 81 | #endif |
| 82 | #endif /* !Py_TOKEN_H */ |
| 83 | #endif /* Py_LIMITED_API */ |
| 84 | """ |
| 85 | |
| 86 | def make_h(infile, outfile='Include/token.h'): |
| 87 | tok_names, ERRORTOKEN, string_to_tok = load_tokens(infile) |
| 88 | |
| 89 | defines = [] |
| 90 | for value, name in enumerate(tok_names[:ERRORTOKEN + 1]): |
| 91 | defines.append("#define %-15s %d\n" % (name, value)) |
| 92 | |
| 93 | if update_file(outfile, token_h_template % ( |
| 94 | ''.join(defines), |
| 95 | len(tok_names), |
| 96 | NT_OFFSET |
| 97 | )): |
| 98 | print("%s regenerated from %s" % (outfile, infile)) |
| 99 | |
| 100 | |
| 101 | token_c_template = """\ |
| 102 | /* Auto-generated by Tools/scripts/generate_token.py */ |
| 103 | |
| 104 | #include "Python.h" |
| 105 | #include "token.h" |
| 106 | |
| 107 | /* Token names */ |
| 108 | |
| 109 | const char * const _PyParser_TokenNames[] = { |
| 110 | %s\ |
| 111 | }; |
| 112 | |
| 113 | /* Return the token corresponding to a single character */ |
| 114 | |
| 115 | int |
| 116 | PyToken_OneChar(int c1) |
| 117 | { |
| 118 | %s\ |
| 119 | return OP; |
| 120 | } |
| 121 | |
| 122 | int |
| 123 | PyToken_TwoChars(int c1, int c2) |
| 124 | { |
| 125 | %s\ |
| 126 | return OP; |
| 127 | } |
| 128 | |
| 129 | int |
| 130 | PyToken_ThreeChars(int c1, int c2, int c3) |
| 131 | { |
| 132 | %s\ |
| 133 | return OP; |
| 134 | } |
| 135 | """ |
| 136 | |
| 137 | def generate_chars_to_token(mapping, n=1): |
| 138 | result = [] |
| 139 | write = result.append |
| 140 | indent = ' ' * n |
| 141 | write(indent) |
| 142 | write('switch (c%d) {\n' % (n,)) |
| 143 | for c in sorted(mapping): |
| 144 | write(indent) |
| 145 | value = mapping[c] |
| 146 | if isinstance(value, dict): |
| 147 | write("case '%s':\n" % (c,)) |
| 148 | write(generate_chars_to_token(value, n + 1)) |
| 149 | write(indent) |
| 150 | write(' break;\n') |
| 151 | else: |
| 152 | write("case '%s': return %s;\n" % (c, value)) |
| 153 | write(indent) |
| 154 | write('}\n') |
| 155 | return ''.join(result) |
| 156 | |
| 157 | def make_c(infile, outfile='Parser/token.c'): |
| 158 | tok_names, ERRORTOKEN, string_to_tok = load_tokens(infile) |
| 159 | string_to_tok['<>'] = string_to_tok['!='] |
| 160 | chars_to_token = {} |
| 161 | for string, value in string_to_tok.items(): |
| 162 | assert 1 <= len(string) <= 3 |
| 163 | name = tok_names[value] |
| 164 | m = chars_to_token.setdefault(len(string), {}) |
| 165 | for c in string[:-1]: |
| 166 | m = m.setdefault(c, {}) |
| 167 | m[string[-1]] = name |
| 168 | |
| 169 | names = [] |
| 170 | for value, name in enumerate(tok_names): |
| 171 | if value >= ERRORTOKEN: |
| 172 | name = '<%s>' % name |
| 173 | names.append(' "%s",\n' % name) |
| 174 | names.append(' "<N_TOKENS>",\n') |
| 175 | |
| 176 | if update_file(outfile, token_c_template % ( |
| 177 | ''.join(names), |
| 178 | generate_chars_to_token(chars_to_token[1]), |
| 179 | generate_chars_to_token(chars_to_token[2]), |
| 180 | generate_chars_to_token(chars_to_token[3]) |
| 181 | )): |
| 182 | print("%s regenerated from %s" % (outfile, infile)) |
| 183 | |
| 184 | |
| 185 | token_inc_template = """\ |
| 186 | .. Auto-generated by Tools/scripts/generate_token.py |
| 187 | %s |
| 188 | .. data:: N_TOKENS |
| 189 | |
| 190 | .. data:: NT_OFFSET |
| 191 | """ |
| 192 | |
| 193 | def make_rst(infile, outfile='Doc/library/token-list.inc'): |
| 194 | tok_names, ERRORTOKEN, string_to_tok = load_tokens(infile) |
| 195 | tok_to_string = {value: s for s, value in string_to_tok.items()} |
| 196 | |
| 197 | names = [] |
| 198 | for value, name in enumerate(tok_names[:ERRORTOKEN + 1]): |
| 199 | names.append('.. data:: %s' % (name,)) |
| 200 | if value in tok_to_string: |
| 201 | names.append('') |
| 202 | names.append(' Token value for ``"%s"``.' % tok_to_string[value]) |
| 203 | names.append('') |
| 204 | |
| 205 | if update_file(outfile, token_inc_template % '\n'.join(names)): |
| 206 | print("%s regenerated from %s" % (outfile, infile)) |
| 207 | |
| 208 | |
| 209 | token_py_template = '''\ |
| 210 | """Token constants.""" |
| 211 | # Auto-generated by Tools/scripts/generate_token.py |
| 212 | |
| 213 | __all__ = ['tok_name', 'ISTERMINAL', 'ISNONTERMINAL', 'ISEOF'] |
| 214 | |
| 215 | %s |
| 216 | N_TOKENS = %d |
| 217 | # Special definitions for cooperation with parser |
| 218 | NT_OFFSET = %d |
| 219 | |
| 220 | tok_name = {value: name |
| 221 | for name, value in globals().items() |
| 222 | if isinstance(value, int) and not name.startswith('_')} |
| 223 | __all__.extend(tok_name.values()) |
| 224 | |
| 225 | EXACT_TOKEN_TYPES = { |
| 226 | %s |
| 227 | } |
| 228 | |
| 229 | def ISTERMINAL(x): |
| 230 | return x < NT_OFFSET |
| 231 | |
| 232 | def ISNONTERMINAL(x): |
| 233 | return x >= NT_OFFSET |
| 234 | |
| 235 | def ISEOF(x): |
| 236 | return x == ENDMARKER |
| 237 | ''' |
| 238 | |
| 239 | def make_py(infile, outfile='Lib/token.py'): |
| 240 | tok_names, ERRORTOKEN, string_to_tok = load_tokens(infile) |
| 241 | |
| 242 | constants = [] |
| 243 | for value, name in enumerate(tok_names): |
| 244 | constants.append('%s = %d' % (name, value)) |
| 245 | constants.insert(ERRORTOKEN, |
| 246 | "# These aren't used by the C tokenizer but are needed for tokenize.py") |
| 247 | |
| 248 | token_types = [] |
| 249 | for s, value in sorted(string_to_tok.items()): |
| 250 | token_types.append(' %r: %s,' % (s, tok_names[value])) |
| 251 | |
| 252 | if update_file(outfile, token_py_template % ( |
| 253 | '\n'.join(constants), |
| 254 | len(tok_names), |
| 255 | NT_OFFSET, |
| 256 | '\n'.join(token_types), |
| 257 | )): |
| 258 | print("%s regenerated from %s" % (outfile, infile)) |
| 259 | |
| 260 | |
| 261 | def main(op, infile='Grammar/Tokens', *args): |
| 262 | make = globals()['make_' + op] |
| 263 | make(infile, *args) |
| 264 | |
| 265 | |
| 266 | if __name__ == '__main__': |
| 267 | import sys |
| 268 | main(*sys.argv[1:]) |