Blame - Tools/scripts/generate_token.py - platform/external/python/cpython3

blob: f2745e8353fc3777da8240da850ea8605fc4daad [file] [log] [blame]

Serhiy Storchaka	8ac6581	2018-12-22 11:18:40 +0200	[diff] [blame^]	1	#! /usr/bin/env python3
				2	# This script generates token related files from Grammar/Tokens:
				3	#
				4	# Doc/library/token-list.inc
				5	# Include/token.h
				6	# Parser/token.c
				7	# Lib/token.py
				8
				9
				10	NT_OFFSET = 256
				11
				12	def load_tokens(path):
				13	tok_names = []
				14	string_to_tok = {}
				15	ERRORTOKEN = None
				16	with open(path) as fp:
				17	for line in fp:
				18	line = line.strip()
				19	# strip comments
				20	i = line.find('#')
				21	if i >= 0:
				22	line = line[:i].strip()
				23	if not line:
				24	continue
				25	fields = line.split()
				26	name = fields[0]
				27	value = len(tok_names)
				28	if name == 'ERRORTOKEN':
				29	ERRORTOKEN = value
				30	string = fields[1] if len(fields) > 1 else None
				31	if string:
				32	string = eval(string)
				33	string_to_tok[string] = value
				34	tok_names.append(name)
				35	return tok_names, ERRORTOKEN, string_to_tok
				36
				37
				38	def update_file(file, content):
				39	try:
				40	with open(file, 'r') as fobj:
				41	if fobj.read() == content:
				42	return False
				43	except (OSError, ValueError):
				44	pass
				45	with open(file, 'w') as fobj:
				46	fobj.write(content)
				47	return True
				48
				49
				50	token_h_template = """\
				51	/* Auto-generated by Tools/scripts/generate_token.py */
				52
				53	/* Token types */
				54	#ifndef Py_LIMITED_API
				55	#ifndef Py_TOKEN_H
				56	#define Py_TOKEN_H
				57	#ifdef __cplusplus
				58	extern "C" {
				59	#endif
				60
				61	#undef TILDE /* Prevent clash of our definition with system macro. Ex AIX, ioctl.h */
				62
				63	%s\
				64	#define N_TOKENS %d
				65	#define NT_OFFSET %d
				66
				67	/* Special definitions for cooperation with parser */
				68
				69	#define ISTERMINAL(x) ((x) < NT_OFFSET)
				70	#define ISNONTERMINAL(x) ((x) >= NT_OFFSET)
				71	#define ISEOF(x) ((x) == ENDMARKER)
				72
				73
				74	PyAPI_DATA(const char * const) _PyParser_TokenNames[]; /* Token names */
				75	PyAPI_FUNC(int) PyToken_OneChar(int);
				76	PyAPI_FUNC(int) PyToken_TwoChars(int, int);
				77	PyAPI_FUNC(int) PyToken_ThreeChars(int, int, int);
				78
				79	#ifdef __cplusplus
				80	}
				81	#endif
				82	#endif /* !Py_TOKEN_H */
				83	#endif /* Py_LIMITED_API */
				84	"""
				85
				86	def make_h(infile, outfile='Include/token.h'):
				87	tok_names, ERRORTOKEN, string_to_tok = load_tokens(infile)
				88
				89	defines = []
				90	for value, name in enumerate(tok_names[:ERRORTOKEN + 1]):
				91	defines.append("#define %-15s %d\n" % (name, value))
				92
				93	if update_file(outfile, token_h_template % (
				94	''.join(defines),
				95	len(tok_names),
				96	NT_OFFSET
				97	)):
				98	print("%s regenerated from %s" % (outfile, infile))
				99
				100
				101	token_c_template = """\
				102	/* Auto-generated by Tools/scripts/generate_token.py */
				103
				104	#include "Python.h"
				105	#include "token.h"
				106
				107	/* Token names */
				108
				109	const char * const _PyParser_TokenNames[] = {
				110	%s\
				111	};
				112
				113	/* Return the token corresponding to a single character */
				114
				115	int
				116	PyToken_OneChar(int c1)
				117	{
				118	%s\
				119	return OP;
				120	}
				121
				122	int
				123	PyToken_TwoChars(int c1, int c2)
				124	{
				125	%s\
				126	return OP;
				127	}
				128
				129	int
				130	PyToken_ThreeChars(int c1, int c2, int c3)
				131	{
				132	%s\
				133	return OP;
				134	}
				135	"""
				136
				137	def generate_chars_to_token(mapping, n=1):
				138	result = []
				139	write = result.append
				140	indent = ' ' * n
				141	write(indent)
				142	write('switch (c%d) {\n' % (n,))
				143	for c in sorted(mapping):
				144	write(indent)
				145	value = mapping[c]
				146	if isinstance(value, dict):
				147	write("case '%s':\n" % (c,))
				148	write(generate_chars_to_token(value, n + 1))
				149	write(indent)
				150	write(' break;\n')
				151	else:
				152	write("case '%s': return %s;\n" % (c, value))
				153	write(indent)
				154	write('}\n')
				155	return ''.join(result)
				156
				157	def make_c(infile, outfile='Parser/token.c'):
				158	tok_names, ERRORTOKEN, string_to_tok = load_tokens(infile)
				159	string_to_tok['<>'] = string_to_tok['!=']
				160	chars_to_token = {}
				161	for string, value in string_to_tok.items():
				162	assert 1 <= len(string) <= 3
				163	name = tok_names[value]
				164	m = chars_to_token.setdefault(len(string), {})
				165	for c in string[:-1]:
				166	m = m.setdefault(c, {})
				167	m[string[-1]] = name
				168
				169	names = []
				170	for value, name in enumerate(tok_names):
				171	if value >= ERRORTOKEN:
				172	name = '<%s>' % name
				173	names.append(' "%s",\n' % name)
				174	names.append(' "<N_TOKENS>",\n')
				175
				176	if update_file(outfile, token_c_template % (
				177	''.join(names),
				178	generate_chars_to_token(chars_to_token[1]),
				179	generate_chars_to_token(chars_to_token[2]),
				180	generate_chars_to_token(chars_to_token[3])
				181	)):
				182	print("%s regenerated from %s" % (outfile, infile))
				183
				184
				185	token_inc_template = """\
				186	.. Auto-generated by Tools/scripts/generate_token.py
				187	%s
				188	.. data:: N_TOKENS
				189
				190	.. data:: NT_OFFSET
				191	"""
				192
				193	def make_rst(infile, outfile='Doc/library/token-list.inc'):
				194	tok_names, ERRORTOKEN, string_to_tok = load_tokens(infile)
				195	tok_to_string = {value: s for s, value in string_to_tok.items()}
				196
				197	names = []
				198	for value, name in enumerate(tok_names[:ERRORTOKEN + 1]):
				199	names.append('.. data:: %s' % (name,))
				200	if value in tok_to_string:
				201	names.append('')
				202	names.append(' Token value for ``"%s"``.' % tok_to_string[value])
				203	names.append('')
				204
				205	if update_file(outfile, token_inc_template % '\n'.join(names)):
				206	print("%s regenerated from %s" % (outfile, infile))
				207
				208
				209	token_py_template = '''\
				210	"""Token constants."""
				211	# Auto-generated by Tools/scripts/generate_token.py
				212
				213	__all__ = ['tok_name', 'ISTERMINAL', 'ISNONTERMINAL', 'ISEOF']
				214
				215	%s
				216	N_TOKENS = %d
				217	# Special definitions for cooperation with parser
				218	NT_OFFSET = %d
				219
				220	tok_name = {value: name
				221	for name, value in globals().items()
				222	if isinstance(value, int) and not name.startswith('_')}
				223	__all__.extend(tok_name.values())
				224
				225	EXACT_TOKEN_TYPES = {
				226	%s
				227	}
				228
				229	def ISTERMINAL(x):
				230	return x < NT_OFFSET
				231
				232	def ISNONTERMINAL(x):
				233	return x >= NT_OFFSET
				234
				235	def ISEOF(x):
				236	return x == ENDMARKER
				237	'''
				238
				239	def make_py(infile, outfile='Lib/token.py'):
				240	tok_names, ERRORTOKEN, string_to_tok = load_tokens(infile)
				241
				242	constants = []
				243	for value, name in enumerate(tok_names):
				244	constants.append('%s = %d' % (name, value))
				245	constants.insert(ERRORTOKEN,
				246	"# These aren't used by the C tokenizer but are needed for tokenize.py")
				247
				248	token_types = []
				249	for s, value in sorted(string_to_tok.items()):
				250	token_types.append(' %r: %s,' % (s, tok_names[value]))
				251
				252	if update_file(outfile, token_py_template % (
				253	'\n'.join(constants),
				254	len(tok_names),
				255	NT_OFFSET,
				256	'\n'.join(token_types),
				257	)):
				258	print("%s regenerated from %s" % (outfile, infile))
				259
				260
				261	def main(op, infile='Grammar/Tokens', *args):
				262	make = globals()['make_' + op]
				263	make(infile, *args)
				264
				265
				266	if __name__ == '__main__':
				267	import sys
				268	main(*sys.argv[1:])