blob: 77bb5bd5eca02c1dc5482eb7319559d113d38cc3 [file] [log] [blame]
Serhiy Storchaka8ac65812018-12-22 11:18:40 +02001#! /usr/bin/env python3
2# This script generates token related files from Grammar/Tokens:
3#
4# Doc/library/token-list.inc
5# Include/token.h
6# Parser/token.c
7# Lib/token.py
8
9
10NT_OFFSET = 256
11
12def load_tokens(path):
13 tok_names = []
14 string_to_tok = {}
15 ERRORTOKEN = None
16 with open(path) as fp:
17 for line in fp:
18 line = line.strip()
19 # strip comments
20 i = line.find('#')
21 if i >= 0:
22 line = line[:i].strip()
23 if not line:
24 continue
25 fields = line.split()
26 name = fields[0]
27 value = len(tok_names)
28 if name == 'ERRORTOKEN':
29 ERRORTOKEN = value
30 string = fields[1] if len(fields) > 1 else None
31 if string:
32 string = eval(string)
33 string_to_tok[string] = value
34 tok_names.append(name)
35 return tok_names, ERRORTOKEN, string_to_tok
36
37
38def update_file(file, content):
39 try:
40 with open(file, 'r') as fobj:
41 if fobj.read() == content:
42 return False
43 except (OSError, ValueError):
44 pass
45 with open(file, 'w') as fobj:
46 fobj.write(content)
47 return True
48
49
50token_h_template = """\
51/* Auto-generated by Tools/scripts/generate_token.py */
52
53/* Token types */
54#ifndef Py_LIMITED_API
55#ifndef Py_TOKEN_H
56#define Py_TOKEN_H
57#ifdef __cplusplus
58extern "C" {
59#endif
60
61#undef TILDE /* Prevent clash of our definition with system macro. Ex AIX, ioctl.h */
62
63%s\
64#define N_TOKENS %d
65#define NT_OFFSET %d
66
67/* Special definitions for cooperation with parser */
68
69#define ISTERMINAL(x) ((x) < NT_OFFSET)
70#define ISNONTERMINAL(x) ((x) >= NT_OFFSET)
71#define ISEOF(x) ((x) == ENDMARKER)
Lysandros Nikolaou9a4b38f2020-04-15 21:22:10 +030072#define ISWHITESPACE(x) ((x) == ENDMARKER || \\
73 (x) == NEWLINE || \\
74 (x) == INDENT || \\
75 (x) == DEDENT)
Serhiy Storchaka8ac65812018-12-22 11:18:40 +020076
77
78PyAPI_DATA(const char * const) _PyParser_TokenNames[]; /* Token names */
79PyAPI_FUNC(int) PyToken_OneChar(int);
80PyAPI_FUNC(int) PyToken_TwoChars(int, int);
81PyAPI_FUNC(int) PyToken_ThreeChars(int, int, int);
82
83#ifdef __cplusplus
84}
85#endif
86#endif /* !Py_TOKEN_H */
87#endif /* Py_LIMITED_API */
88"""
89
90def make_h(infile, outfile='Include/token.h'):
91 tok_names, ERRORTOKEN, string_to_tok = load_tokens(infile)
92
93 defines = []
94 for value, name in enumerate(tok_names[:ERRORTOKEN + 1]):
95 defines.append("#define %-15s %d\n" % (name, value))
96
97 if update_file(outfile, token_h_template % (
98 ''.join(defines),
99 len(tok_names),
100 NT_OFFSET
101 )):
102 print("%s regenerated from %s" % (outfile, infile))
103
104
105token_c_template = """\
106/* Auto-generated by Tools/scripts/generate_token.py */
107
108#include "Python.h"
109#include "token.h"
110
111/* Token names */
112
113const char * const _PyParser_TokenNames[] = {
114%s\
115};
116
117/* Return the token corresponding to a single character */
118
119int
120PyToken_OneChar(int c1)
121{
122%s\
123 return OP;
124}
125
126int
127PyToken_TwoChars(int c1, int c2)
128{
129%s\
130 return OP;
131}
132
133int
134PyToken_ThreeChars(int c1, int c2, int c3)
135{
136%s\
137 return OP;
138}
139"""
140
141def generate_chars_to_token(mapping, n=1):
142 result = []
143 write = result.append
144 indent = ' ' * n
145 write(indent)
146 write('switch (c%d) {\n' % (n,))
147 for c in sorted(mapping):
148 write(indent)
149 value = mapping[c]
150 if isinstance(value, dict):
151 write("case '%s':\n" % (c,))
152 write(generate_chars_to_token(value, n + 1))
153 write(indent)
154 write(' break;\n')
155 else:
156 write("case '%s': return %s;\n" % (c, value))
157 write(indent)
158 write('}\n')
159 return ''.join(result)
160
161def make_c(infile, outfile='Parser/token.c'):
162 tok_names, ERRORTOKEN, string_to_tok = load_tokens(infile)
163 string_to_tok['<>'] = string_to_tok['!=']
164 chars_to_token = {}
165 for string, value in string_to_tok.items():
166 assert 1 <= len(string) <= 3
167 name = tok_names[value]
168 m = chars_to_token.setdefault(len(string), {})
169 for c in string[:-1]:
170 m = m.setdefault(c, {})
171 m[string[-1]] = name
172
173 names = []
174 for value, name in enumerate(tok_names):
175 if value >= ERRORTOKEN:
176 name = '<%s>' % name
177 names.append(' "%s",\n' % name)
178 names.append(' "<N_TOKENS>",\n')
179
180 if update_file(outfile, token_c_template % (
181 ''.join(names),
182 generate_chars_to_token(chars_to_token[1]),
183 generate_chars_to_token(chars_to_token[2]),
184 generate_chars_to_token(chars_to_token[3])
185 )):
186 print("%s regenerated from %s" % (outfile, infile))
187
188
189token_inc_template = """\
190.. Auto-generated by Tools/scripts/generate_token.py
191%s
192.. data:: N_TOKENS
193
194.. data:: NT_OFFSET
195"""
196
197def make_rst(infile, outfile='Doc/library/token-list.inc'):
198 tok_names, ERRORTOKEN, string_to_tok = load_tokens(infile)
199 tok_to_string = {value: s for s, value in string_to_tok.items()}
200
201 names = []
202 for value, name in enumerate(tok_names[:ERRORTOKEN + 1]):
203 names.append('.. data:: %s' % (name,))
204 if value in tok_to_string:
205 names.append('')
206 names.append(' Token value for ``"%s"``.' % tok_to_string[value])
207 names.append('')
208
209 if update_file(outfile, token_inc_template % '\n'.join(names)):
210 print("%s regenerated from %s" % (outfile, infile))
211
212
213token_py_template = '''\
214"""Token constants."""
215# Auto-generated by Tools/scripts/generate_token.py
216
217__all__ = ['tok_name', 'ISTERMINAL', 'ISNONTERMINAL', 'ISEOF']
218
219%s
220N_TOKENS = %d
221# Special definitions for cooperation with parser
222NT_OFFSET = %d
223
224tok_name = {value: name
225 for name, value in globals().items()
226 if isinstance(value, int) and not name.startswith('_')}
227__all__.extend(tok_name.values())
228
229EXACT_TOKEN_TYPES = {
230%s
231}
232
233def ISTERMINAL(x):
234 return x < NT_OFFSET
235
236def ISNONTERMINAL(x):
237 return x >= NT_OFFSET
238
239def ISEOF(x):
240 return x == ENDMARKER
241'''
242
243def make_py(infile, outfile='Lib/token.py'):
244 tok_names, ERRORTOKEN, string_to_tok = load_tokens(infile)
245
246 constants = []
247 for value, name in enumerate(tok_names):
248 constants.append('%s = %d' % (name, value))
249 constants.insert(ERRORTOKEN,
250 "# These aren't used by the C tokenizer but are needed for tokenize.py")
251
252 token_types = []
253 for s, value in sorted(string_to_tok.items()):
254 token_types.append(' %r: %s,' % (s, tok_names[value]))
255
256 if update_file(outfile, token_py_template % (
257 '\n'.join(constants),
258 len(tok_names),
259 NT_OFFSET,
260 '\n'.join(token_types),
261 )):
262 print("%s regenerated from %s" % (outfile, infile))
263
264
265def main(op, infile='Grammar/Tokens', *args):
266 make = globals()['make_' + op]
267 make(infile, *args)
268
269
270if __name__ == '__main__':
271 import sys
272 main(*sys.argv[1:])