blob: f2745e8353fc3777da8240da850ea8605fc4daad [file] [log] [blame]
Serhiy Storchaka8ac65812018-12-22 11:18:40 +02001#! /usr/bin/env python3
2# This script generates token related files from Grammar/Tokens:
3#
4# Doc/library/token-list.inc
5# Include/token.h
6# Parser/token.c
7# Lib/token.py
8
9
10NT_OFFSET = 256
11
12def load_tokens(path):
13 tok_names = []
14 string_to_tok = {}
15 ERRORTOKEN = None
16 with open(path) as fp:
17 for line in fp:
18 line = line.strip()
19 # strip comments
20 i = line.find('#')
21 if i >= 0:
22 line = line[:i].strip()
23 if not line:
24 continue
25 fields = line.split()
26 name = fields[0]
27 value = len(tok_names)
28 if name == 'ERRORTOKEN':
29 ERRORTOKEN = value
30 string = fields[1] if len(fields) > 1 else None
31 if string:
32 string = eval(string)
33 string_to_tok[string] = value
34 tok_names.append(name)
35 return tok_names, ERRORTOKEN, string_to_tok
36
37
38def update_file(file, content):
39 try:
40 with open(file, 'r') as fobj:
41 if fobj.read() == content:
42 return False
43 except (OSError, ValueError):
44 pass
45 with open(file, 'w') as fobj:
46 fobj.write(content)
47 return True
48
49
50token_h_template = """\
51/* Auto-generated by Tools/scripts/generate_token.py */
52
53/* Token types */
54#ifndef Py_LIMITED_API
55#ifndef Py_TOKEN_H
56#define Py_TOKEN_H
57#ifdef __cplusplus
58extern "C" {
59#endif
60
61#undef TILDE /* Prevent clash of our definition with system macro. Ex AIX, ioctl.h */
62
63%s\
64#define N_TOKENS %d
65#define NT_OFFSET %d
66
67/* Special definitions for cooperation with parser */
68
69#define ISTERMINAL(x) ((x) < NT_OFFSET)
70#define ISNONTERMINAL(x) ((x) >= NT_OFFSET)
71#define ISEOF(x) ((x) == ENDMARKER)
72
73
74PyAPI_DATA(const char * const) _PyParser_TokenNames[]; /* Token names */
75PyAPI_FUNC(int) PyToken_OneChar(int);
76PyAPI_FUNC(int) PyToken_TwoChars(int, int);
77PyAPI_FUNC(int) PyToken_ThreeChars(int, int, int);
78
79#ifdef __cplusplus
80}
81#endif
82#endif /* !Py_TOKEN_H */
83#endif /* Py_LIMITED_API */
84"""
85
86def make_h(infile, outfile='Include/token.h'):
87 tok_names, ERRORTOKEN, string_to_tok = load_tokens(infile)
88
89 defines = []
90 for value, name in enumerate(tok_names[:ERRORTOKEN + 1]):
91 defines.append("#define %-15s %d\n" % (name, value))
92
93 if update_file(outfile, token_h_template % (
94 ''.join(defines),
95 len(tok_names),
96 NT_OFFSET
97 )):
98 print("%s regenerated from %s" % (outfile, infile))
99
100
101token_c_template = """\
102/* Auto-generated by Tools/scripts/generate_token.py */
103
104#include "Python.h"
105#include "token.h"
106
107/* Token names */
108
109const char * const _PyParser_TokenNames[] = {
110%s\
111};
112
113/* Return the token corresponding to a single character */
114
115int
116PyToken_OneChar(int c1)
117{
118%s\
119 return OP;
120}
121
122int
123PyToken_TwoChars(int c1, int c2)
124{
125%s\
126 return OP;
127}
128
129int
130PyToken_ThreeChars(int c1, int c2, int c3)
131{
132%s\
133 return OP;
134}
135"""
136
137def generate_chars_to_token(mapping, n=1):
138 result = []
139 write = result.append
140 indent = ' ' * n
141 write(indent)
142 write('switch (c%d) {\n' % (n,))
143 for c in sorted(mapping):
144 write(indent)
145 value = mapping[c]
146 if isinstance(value, dict):
147 write("case '%s':\n" % (c,))
148 write(generate_chars_to_token(value, n + 1))
149 write(indent)
150 write(' break;\n')
151 else:
152 write("case '%s': return %s;\n" % (c, value))
153 write(indent)
154 write('}\n')
155 return ''.join(result)
156
157def make_c(infile, outfile='Parser/token.c'):
158 tok_names, ERRORTOKEN, string_to_tok = load_tokens(infile)
159 string_to_tok['<>'] = string_to_tok['!=']
160 chars_to_token = {}
161 for string, value in string_to_tok.items():
162 assert 1 <= len(string) <= 3
163 name = tok_names[value]
164 m = chars_to_token.setdefault(len(string), {})
165 for c in string[:-1]:
166 m = m.setdefault(c, {})
167 m[string[-1]] = name
168
169 names = []
170 for value, name in enumerate(tok_names):
171 if value >= ERRORTOKEN:
172 name = '<%s>' % name
173 names.append(' "%s",\n' % name)
174 names.append(' "<N_TOKENS>",\n')
175
176 if update_file(outfile, token_c_template % (
177 ''.join(names),
178 generate_chars_to_token(chars_to_token[1]),
179 generate_chars_to_token(chars_to_token[2]),
180 generate_chars_to_token(chars_to_token[3])
181 )):
182 print("%s regenerated from %s" % (outfile, infile))
183
184
185token_inc_template = """\
186.. Auto-generated by Tools/scripts/generate_token.py
187%s
188.. data:: N_TOKENS
189
190.. data:: NT_OFFSET
191"""
192
193def make_rst(infile, outfile='Doc/library/token-list.inc'):
194 tok_names, ERRORTOKEN, string_to_tok = load_tokens(infile)
195 tok_to_string = {value: s for s, value in string_to_tok.items()}
196
197 names = []
198 for value, name in enumerate(tok_names[:ERRORTOKEN + 1]):
199 names.append('.. data:: %s' % (name,))
200 if value in tok_to_string:
201 names.append('')
202 names.append(' Token value for ``"%s"``.' % tok_to_string[value])
203 names.append('')
204
205 if update_file(outfile, token_inc_template % '\n'.join(names)):
206 print("%s regenerated from %s" % (outfile, infile))
207
208
209token_py_template = '''\
210"""Token constants."""
211# Auto-generated by Tools/scripts/generate_token.py
212
213__all__ = ['tok_name', 'ISTERMINAL', 'ISNONTERMINAL', 'ISEOF']
214
215%s
216N_TOKENS = %d
217# Special definitions for cooperation with parser
218NT_OFFSET = %d
219
220tok_name = {value: name
221 for name, value in globals().items()
222 if isinstance(value, int) and not name.startswith('_')}
223__all__.extend(tok_name.values())
224
225EXACT_TOKEN_TYPES = {
226%s
227}
228
229def ISTERMINAL(x):
230 return x < NT_OFFSET
231
232def ISNONTERMINAL(x):
233 return x >= NT_OFFSET
234
235def ISEOF(x):
236 return x == ENDMARKER
237'''
238
239def make_py(infile, outfile='Lib/token.py'):
240 tok_names, ERRORTOKEN, string_to_tok = load_tokens(infile)
241
242 constants = []
243 for value, name in enumerate(tok_names):
244 constants.append('%s = %d' % (name, value))
245 constants.insert(ERRORTOKEN,
246 "# These aren't used by the C tokenizer but are needed for tokenize.py")
247
248 token_types = []
249 for s, value in sorted(string_to_tok.items()):
250 token_types.append(' %r: %s,' % (s, tok_names[value]))
251
252 if update_file(outfile, token_py_template % (
253 '\n'.join(constants),
254 len(tok_names),
255 NT_OFFSET,
256 '\n'.join(token_types),
257 )):
258 print("%s regenerated from %s" % (outfile, infile))
259
260
261def main(op, infile='Grammar/Tokens', *args):
262 make = globals()['make_' + op]
263 make(infile, *args)
264
265
266if __name__ == '__main__':
267 import sys
268 main(*sys.argv[1:])