Blame - Tools/scripts/generate_token.py - platform/external/python/cpython3

blob: 77bb5bd5eca02c1dc5482eb7319559d113d38cc3 [file] [log] [blame]

Serhiy Storchaka	8ac6581	2018-12-22 11:18:40 +0200	[diff] [blame]	1	#! /usr/bin/env python3
				2	# This script generates token related files from Grammar/Tokens:
				3	#
				4	# Doc/library/token-list.inc
				5	# Include/token.h
				6	# Parser/token.c
				7	# Lib/token.py
				8
				9
				10	NT_OFFSET = 256
				11
				12	def load_tokens(path):
				13	tok_names = []
				14	string_to_tok = {}
				15	ERRORTOKEN = None
				16	with open(path) as fp:
				17	for line in fp:
				18	line = line.strip()
				19	# strip comments
				20	i = line.find('#')
				21	if i >= 0:
				22	line = line[:i].strip()
				23	if not line:
				24	continue
				25	fields = line.split()
				26	name = fields[0]
				27	value = len(tok_names)
				28	if name == 'ERRORTOKEN':
				29	ERRORTOKEN = value
				30	string = fields[1] if len(fields) > 1 else None
				31	if string:
				32	string = eval(string)
				33	string_to_tok[string] = value
				34	tok_names.append(name)
				35	return tok_names, ERRORTOKEN, string_to_tok
				36
				37
				38	def update_file(file, content):
				39	try:
				40	with open(file, 'r') as fobj:
				41	if fobj.read() == content:
				42	return False
				43	except (OSError, ValueError):
				44	pass
				45	with open(file, 'w') as fobj:
				46	fobj.write(content)
				47	return True
				48
				49
				50	token_h_template = """\
				51	/* Auto-generated by Tools/scripts/generate_token.py */
				52
				53	/* Token types */
				54	#ifndef Py_LIMITED_API
				55	#ifndef Py_TOKEN_H
				56	#define Py_TOKEN_H
				57	#ifdef __cplusplus
				58	extern "C" {
				59	#endif
				60
				61	#undef TILDE /* Prevent clash of our definition with system macro. Ex AIX, ioctl.h */
				62
				63	%s\
				64	#define N_TOKENS %d
				65	#define NT_OFFSET %d
				66
				67	/* Special definitions for cooperation with parser */
				68
				69	#define ISTERMINAL(x) ((x) < NT_OFFSET)
				70	#define ISNONTERMINAL(x) ((x) >= NT_OFFSET)
				71	#define ISEOF(x) ((x) == ENDMARKER)
Lysandros Nikolaou	9a4b38f	2020-04-15 21:22:10 +0300	[diff] [blame]	72	#define ISWHITESPACE(x) ((x) == ENDMARKER \|\| \\
				73	(x) == NEWLINE \|\| \\
				74	(x) == INDENT \|\| \\
				75	(x) == DEDENT)
Serhiy Storchaka	8ac6581	2018-12-22 11:18:40 +0200	[diff] [blame]	76
				77
				78	PyAPI_DATA(const char * const) _PyParser_TokenNames[]; /* Token names */
				79	PyAPI_FUNC(int) PyToken_OneChar(int);
				80	PyAPI_FUNC(int) PyToken_TwoChars(int, int);
				81	PyAPI_FUNC(int) PyToken_ThreeChars(int, int, int);
				82
				83	#ifdef __cplusplus
				84	}
				85	#endif
				86	#endif /* !Py_TOKEN_H */
				87	#endif /* Py_LIMITED_API */
				88	"""
				89
				90	def make_h(infile, outfile='Include/token.h'):
				91	tok_names, ERRORTOKEN, string_to_tok = load_tokens(infile)
				92
				93	defines = []
				94	for value, name in enumerate(tok_names[:ERRORTOKEN + 1]):
				95	defines.append("#define %-15s %d\n" % (name, value))
				96
				97	if update_file(outfile, token_h_template % (
				98	''.join(defines),
				99	len(tok_names),
				100	NT_OFFSET
				101	)):
				102	print("%s regenerated from %s" % (outfile, infile))
				103
				104
				105	token_c_template = """\
				106	/* Auto-generated by Tools/scripts/generate_token.py */
				107
				108	#include "Python.h"
				109	#include "token.h"
				110
				111	/* Token names */
				112
				113	const char * const _PyParser_TokenNames[] = {
				114	%s\
				115	};
				116
				117	/* Return the token corresponding to a single character */
				118
				119	int
				120	PyToken_OneChar(int c1)
				121	{
				122	%s\
				123	return OP;
				124	}
				125
				126	int
				127	PyToken_TwoChars(int c1, int c2)
				128	{
				129	%s\
				130	return OP;
				131	}
				132
				133	int
				134	PyToken_ThreeChars(int c1, int c2, int c3)
				135	{
				136	%s\
				137	return OP;
				138	}
				139	"""
				140
				141	def generate_chars_to_token(mapping, n=1):
				142	result = []
				143	write = result.append
				144	indent = ' ' * n
				145	write(indent)
				146	write('switch (c%d) {\n' % (n,))
				147	for c in sorted(mapping):
				148	write(indent)
				149	value = mapping[c]
				150	if isinstance(value, dict):
				151	write("case '%s':\n" % (c,))
				152	write(generate_chars_to_token(value, n + 1))
				153	write(indent)
				154	write(' break;\n')
				155	else:
				156	write("case '%s': return %s;\n" % (c, value))
				157	write(indent)
				158	write('}\n')
				159	return ''.join(result)
				160
				161	def make_c(infile, outfile='Parser/token.c'):
				162	tok_names, ERRORTOKEN, string_to_tok = load_tokens(infile)
				163	string_to_tok['<>'] = string_to_tok['!=']
				164	chars_to_token = {}
				165	for string, value in string_to_tok.items():
				166	assert 1 <= len(string) <= 3
				167	name = tok_names[value]
				168	m = chars_to_token.setdefault(len(string), {})
				169	for c in string[:-1]:
				170	m = m.setdefault(c, {})
				171	m[string[-1]] = name
				172
				173	names = []
				174	for value, name in enumerate(tok_names):
				175	if value >= ERRORTOKEN:
				176	name = '<%s>' % name
				177	names.append(' "%s",\n' % name)
				178	names.append(' "<N_TOKENS>",\n')
				179
				180	if update_file(outfile, token_c_template % (
				181	''.join(names),
				182	generate_chars_to_token(chars_to_token[1]),
				183	generate_chars_to_token(chars_to_token[2]),
				184	generate_chars_to_token(chars_to_token[3])
				185	)):
				186	print("%s regenerated from %s" % (outfile, infile))
				187
				188
				189	token_inc_template = """\
				190	.. Auto-generated by Tools/scripts/generate_token.py
				191	%s
				192	.. data:: N_TOKENS
				193
				194	.. data:: NT_OFFSET
				195	"""
				196
				197	def make_rst(infile, outfile='Doc/library/token-list.inc'):
				198	tok_names, ERRORTOKEN, string_to_tok = load_tokens(infile)
				199	tok_to_string = {value: s for s, value in string_to_tok.items()}
				200
				201	names = []
				202	for value, name in enumerate(tok_names[:ERRORTOKEN + 1]):
				203	names.append('.. data:: %s' % (name,))
				204	if value in tok_to_string:
				205	names.append('')
				206	names.append(' Token value for ``"%s"``.' % tok_to_string[value])
				207	names.append('')
				208
				209	if update_file(outfile, token_inc_template % '\n'.join(names)):
				210	print("%s regenerated from %s" % (outfile, infile))
				211
				212
				213	token_py_template = '''\
				214	"""Token constants."""
				215	# Auto-generated by Tools/scripts/generate_token.py
				216
				217	__all__ = ['tok_name', 'ISTERMINAL', 'ISNONTERMINAL', 'ISEOF']
				218
				219	%s
				220	N_TOKENS = %d
				221	# Special definitions for cooperation with parser
				222	NT_OFFSET = %d
				223
				224	tok_name = {value: name
				225	for name, value in globals().items()
				226	if isinstance(value, int) and not name.startswith('_')}
				227	__all__.extend(tok_name.values())
				228
				229	EXACT_TOKEN_TYPES = {
				230	%s
				231	}
				232
				233	def ISTERMINAL(x):
				234	return x < NT_OFFSET
				235
				236	def ISNONTERMINAL(x):
				237	return x >= NT_OFFSET
				238
				239	def ISEOF(x):
				240	return x == ENDMARKER
				241	'''
				242
				243	def make_py(infile, outfile='Lib/token.py'):
				244	tok_names, ERRORTOKEN, string_to_tok = load_tokens(infile)
				245
				246	constants = []
				247	for value, name in enumerate(tok_names):
				248	constants.append('%s = %d' % (name, value))
				249	constants.insert(ERRORTOKEN,
				250	"# These aren't used by the C tokenizer but are needed for tokenize.py")
				251
				252	token_types = []
				253	for s, value in sorted(string_to_tok.items()):
				254	token_types.append(' %r: %s,' % (s, tok_names[value]))
				255
				256	if update_file(outfile, token_py_template % (
				257	'\n'.join(constants),
				258	len(tok_names),
				259	NT_OFFSET,
				260	'\n'.join(token_types),
				261	)):
				262	print("%s regenerated from %s" % (outfile, infile))
				263
				264
				265	def main(op, infile='Grammar/Tokens', *args):
				266	make = globals()['make_' + op]
				267	make(infile, *args)
				268
				269
				270	if __name__ == '__main__':
				271	import sys
				272	main(*sys.argv[1:])