Tools/scripts/generate_token.py - platform/external/python/cpython3 - Gitiles

 #! /usr/bin/env python3
 # This script generates token related files from Grammar/Tokens:
 #
 #   Doc/library/token-list.inc
 #   Include/token.h
 #   Parser/token.c
 #   Lib/token.py


 NT_OFFSET = 256

 def load_tokens(path):
     tok_names = []
     string_to_tok = {}
     ERRORTOKEN = None
     with open(path) as fp:
         for line in fp:
             line = line.strip()
             # strip comments
             i = line.find('#')
             if i >= 0:
                 line = line[:i].strip()
             if not line:
                 continue
             fields = line.split()
             name = fields[0]
             value = len(tok_names)
             if name == 'ERRORTOKEN':
                 ERRORTOKEN = value
             string = fields[1] if len(fields) > 1 else None
             if string:
                 string = eval(string)
                 string_to_tok[string] = value
             tok_names.append(name)
     return tok_names, ERRORTOKEN, string_to_tok


 def update_file(file, content):
     try:
         with open(file, 'r') as fobj:
             if fobj.read() == content:
                 return False
     except (OSError, ValueError):
         pass
     with open(file, 'w') as fobj:
         fobj.write(content)
     return True


 token_h_template = """\
 /* Auto-generated by Tools/scripts/generate_token.py */

 /* Token types */
 #ifndef Py_LIMITED_API
 #ifndef Py_TOKEN_H
 #define Py_TOKEN_H
 #ifdef __cplusplus
 extern "C" {
 #endif

 #undef TILDE   /* Prevent clash of our definition with system macro. Ex AIX, ioctl.h */

 %s\
 #define N_TOKENS        %d
 #define NT_OFFSET       %d

 /* Special definitions for cooperation with parser */

 #define ISTERMINAL(x)           ((x) < NT_OFFSET)
 #define ISNONTERMINAL(x)        ((x) >= NT_OFFSET)
 #define ISEOF(x)                ((x) == ENDMARKER)
 #define ISWHITESPACE(x)         ((x) == ENDMARKER || \\
                                  (x) == NEWLINE   || \\
                                  (x) == INDENT    || \\
                                  (x) == DEDENT)


 PyAPI_DATA(const char * const) _PyParser_TokenNames[]; /* Token names */
 PyAPI_FUNC(int) PyToken_OneChar(int);
 PyAPI_FUNC(int) PyToken_TwoChars(int, int);
 PyAPI_FUNC(int) PyToken_ThreeChars(int, int, int);

 #ifdef __cplusplus
 }
 #endif
 #endif /* !Py_TOKEN_H */
 #endif /* Py_LIMITED_API */
 """

 def make_h(infile, outfile='Include/token.h'):
     tok_names, ERRORTOKEN, string_to_tok = load_tokens(infile)

     defines = []
     for value, name in enumerate(tok_names[:ERRORTOKEN + 1]):
         defines.append("#define %-15s %d\n" % (name, value))

     if update_file(outfile, token_h_template % (
             ''.join(defines),
             len(tok_names),
             NT_OFFSET
         )):
         print("%s regenerated from %s" % (outfile, infile))


 token_c_template = """\
 /* Auto-generated by Tools/scripts/generate_token.py */

 #include "Python.h"
 #include "token.h"

 /* Token names */

 const char * const _PyParser_TokenNames[] = {
 %s\
 };

 /* Return the token corresponding to a single character */

 int
 PyToken_OneChar(int c1)
 {
 %s\
     return OP;
 }

 int
 PyToken_TwoChars(int c1, int c2)
 {
 %s\
     return OP;
 }

 int
 PyToken_ThreeChars(int c1, int c2, int c3)
 {
 %s\
     return OP;
 }
 """

 def generate_chars_to_token(mapping, n=1):
     result = []
     write = result.append
     indent = '    ' * n
     write(indent)
     write('switch (c%d) {\n' % (n,))
     for c in sorted(mapping):
         write(indent)
         value = mapping[c]
         if isinstance(value, dict):
             write("case '%s':\n" % (c,))
             write(generate_chars_to_token(value, n + 1))
             write(indent)
             write('    break;\n')
         else:
             write("case '%s': return %s;\n" % (c, value))
     write(indent)
     write('}\n')
     return ''.join(result)

 def make_c(infile, outfile='Parser/token.c'):
     tok_names, ERRORTOKEN, string_to_tok = load_tokens(infile)
     string_to_tok['<>'] = string_to_tok['!=']
     chars_to_token = {}
     for string, value in string_to_tok.items():
         assert 1 <= len(string) <= 3
         name = tok_names[value]
         m = chars_to_token.setdefault(len(string), {})
         for c in string[:-1]:
             m = m.setdefault(c, {})
         m[string[-1]] = name

     names = []
     for value, name in enumerate(tok_names):
         if value >= ERRORTOKEN:
             name = '<%s>' % name
         names.append('    "%s",\n' % name)
     names.append('    "<N_TOKENS>",\n')

     if update_file(outfile, token_c_template % (
             ''.join(names),
             generate_chars_to_token(chars_to_token[1]),
             generate_chars_to_token(chars_to_token[2]),
             generate_chars_to_token(chars_to_token[3])
         )):
         print("%s regenerated from %s" % (outfile, infile))


 token_inc_template = """\
 .. Auto-generated by Tools/scripts/generate_token.py
 %s
 .. data:: N_TOKENS

 .. data:: NT_OFFSET
 """

 def make_rst(infile, outfile='Doc/library/token-list.inc'):
     tok_names, ERRORTOKEN, string_to_tok = load_tokens(infile)
     tok_to_string = {value: s for s, value in string_to_tok.items()}

     names = []
     for value, name in enumerate(tok_names[:ERRORTOKEN + 1]):
         names.append('.. data:: %s' % (name,))
         if value in tok_to_string:
             names.append('')
             names.append('   Token value for ``"%s"``.' % tok_to_string[value])
         names.append('')

     if update_file(outfile, token_inc_template % '\n'.join(names)):
         print("%s regenerated from %s" % (outfile, infile))


 token_py_template = '''\
 """Token constants."""
 # Auto-generated by Tools/scripts/generate_token.py

 __all__ = ['tok_name', 'ISTERMINAL', 'ISNONTERMINAL', 'ISEOF']

 %s
 N_TOKENS = %d
 # Special definitions for cooperation with parser
 NT_OFFSET = %d

 tok_name = {value: name
             for name, value in globals().items()
             if isinstance(value, int) and not name.startswith('_')}
 __all__.extend(tok_name.values())

 EXACT_TOKEN_TYPES = {
 %s
 }

 def ISTERMINAL(x):
     return x < NT_OFFSET

 def ISNONTERMINAL(x):
     return x >= NT_OFFSET

 def ISEOF(x):
     return x == ENDMARKER
 '''

 def make_py(infile, outfile='Lib/token.py'):
     tok_names, ERRORTOKEN, string_to_tok = load_tokens(infile)

     constants = []
     for value, name in enumerate(tok_names):
         constants.append('%s = %d' % (name, value))
     constants.insert(ERRORTOKEN,
         "# These aren't used by the C tokenizer but are needed for tokenize.py")

     token_types = []
     for s, value in sorted(string_to_tok.items()):
         token_types.append('    %r: %s,' % (s, tok_names[value]))

     if update_file(outfile, token_py_template % (
             '\n'.join(constants),
             len(tok_names),
             NT_OFFSET,
             '\n'.join(token_types),
         )):
         print("%s regenerated from %s" % (outfile, infile))


 def main(op, infile='Grammar/Tokens', *args):
     make = globals()['make_' + op]
     make(infile, *args)


 if __name__ == '__main__':
     import sys
     main(*sys.argv[1:])
	#! /usr/bin/env python3
	# This script generates token related files from Grammar/Tokens:
	#
	# Doc/library/token-list.inc
	# Include/token.h
	# Parser/token.c
	# Lib/token.py


	NT_OFFSET = 256

	def load_tokens(path):
	tok_names = []
	string_to_tok = {}
	ERRORTOKEN = None
	with open(path) as fp:
	for line in fp:
	line = line.strip()
	# strip comments
	i = line.find('#')
	if i >= 0:
	line = line[:i].strip()
	if not line:
	continue
	fields = line.split()
	name = fields[0]
	value = len(tok_names)
	if name == 'ERRORTOKEN':
	ERRORTOKEN = value
	string = fields[1] if len(fields) > 1 else None
	if string:
	string = eval(string)
	string_to_tok[string] = value
	tok_names.append(name)
	return tok_names, ERRORTOKEN, string_to_tok


	def update_file(file, content):
	try:
	with open(file, 'r') as fobj:
	if fobj.read() == content:
	return False
	except (OSError, ValueError):
	pass
	with open(file, 'w') as fobj:
	fobj.write(content)
	return True


	token_h_template = """\
	/* Auto-generated by Tools/scripts/generate_token.py */

	/* Token types */
	#ifndef Py_LIMITED_API
	#ifndef Py_TOKEN_H
	#define Py_TOKEN_H
	#ifdef __cplusplus
	extern "C" {
	#endif

	#undef TILDE /* Prevent clash of our definition with system macro. Ex AIX, ioctl.h */

	%s\
	#define N_TOKENS %d
	#define NT_OFFSET %d

	/* Special definitions for cooperation with parser */

	#define ISTERMINAL(x) ((x) < NT_OFFSET)
	#define ISNONTERMINAL(x) ((x) >= NT_OFFSET)
	#define ISEOF(x) ((x) == ENDMARKER)
	#define ISWHITESPACE(x) ((x) == ENDMARKER \|\| \\
	(x) == NEWLINE \|\| \\
	(x) == INDENT \|\| \\
	(x) == DEDENT)


	PyAPI_DATA(const char * const) _PyParser_TokenNames[]; /* Token names */
	PyAPI_FUNC(int) PyToken_OneChar(int);
	PyAPI_FUNC(int) PyToken_TwoChars(int, int);
	PyAPI_FUNC(int) PyToken_ThreeChars(int, int, int);

	#ifdef __cplusplus
	}
	#endif
	#endif /* !Py_TOKEN_H */
	#endif /* Py_LIMITED_API */
	"""

	def make_h(infile, outfile='Include/token.h'):
	tok_names, ERRORTOKEN, string_to_tok = load_tokens(infile)

	defines = []
	for value, name in enumerate(tok_names[:ERRORTOKEN + 1]):
	defines.append("#define %-15s %d\n" % (name, value))

	if update_file(outfile, token_h_template % (
	''.join(defines),
	len(tok_names),
	NT_OFFSET
	)):
	print("%s regenerated from %s" % (outfile, infile))


	token_c_template = """\
	/* Auto-generated by Tools/scripts/generate_token.py */

	#include "Python.h"
	#include "token.h"

	/* Token names */

	const char * const _PyParser_TokenNames[] = {
	%s\
	};

	/* Return the token corresponding to a single character */

	int
	PyToken_OneChar(int c1)
	{
	%s\
	return OP;
	}

	int
	PyToken_TwoChars(int c1, int c2)
	{
	%s\
	return OP;
	}

	int
	PyToken_ThreeChars(int c1, int c2, int c3)
	{
	%s\
	return OP;
	}
	"""

	def generate_chars_to_token(mapping, n=1):
	result = []
	write = result.append
	indent = ' ' * n
	write(indent)
	write('switch (c%d) {\n' % (n,))
	for c in sorted(mapping):
	write(indent)
	value = mapping[c]
	if isinstance(value, dict):
	write("case '%s':\n" % (c,))
	write(generate_chars_to_token(value, n + 1))
	write(indent)
	write(' break;\n')
	else:
	write("case '%s': return %s;\n" % (c, value))
	write(indent)
	write('}\n')
	return ''.join(result)

	def make_c(infile, outfile='Parser/token.c'):
	tok_names, ERRORTOKEN, string_to_tok = load_tokens(infile)
	string_to_tok['<>'] = string_to_tok['!=']
	chars_to_token = {}
	for string, value in string_to_tok.items():
	assert 1 <= len(string) <= 3
	name = tok_names[value]
	m = chars_to_token.setdefault(len(string), {})
	for c in string[:-1]:
	m = m.setdefault(c, {})
	m[string[-1]] = name

	names = []
	for value, name in enumerate(tok_names):
	if value >= ERRORTOKEN:
	name = '<%s>' % name
	names.append(' "%s",\n' % name)
	names.append(' "<N_TOKENS>",\n')

	if update_file(outfile, token_c_template % (
	''.join(names),
	generate_chars_to_token(chars_to_token[1]),
	generate_chars_to_token(chars_to_token[2]),
	generate_chars_to_token(chars_to_token[3])
	)):
	print("%s regenerated from %s" % (outfile, infile))


	token_inc_template = """\
	.. Auto-generated by Tools/scripts/generate_token.py
	%s
	.. data:: N_TOKENS

	.. data:: NT_OFFSET
	"""

	def make_rst(infile, outfile='Doc/library/token-list.inc'):
	tok_names, ERRORTOKEN, string_to_tok = load_tokens(infile)
	tok_to_string = {value: s for s, value in string_to_tok.items()}

	names = []
	for value, name in enumerate(tok_names[:ERRORTOKEN + 1]):
	names.append('.. data:: %s' % (name,))
	if value in tok_to_string:
	names.append('')
	names.append(' Token value for ``"%s"``.' % tok_to_string[value])
	names.append('')

	if update_file(outfile, token_inc_template % '\n'.join(names)):
	print("%s regenerated from %s" % (outfile, infile))


	token_py_template = '''\
	"""Token constants."""
	# Auto-generated by Tools/scripts/generate_token.py

	__all__ = ['tok_name', 'ISTERMINAL', 'ISNONTERMINAL', 'ISEOF']

	%s
	N_TOKENS = %d
	# Special definitions for cooperation with parser
	NT_OFFSET = %d

	tok_name = {value: name
	for name, value in globals().items()
	if isinstance(value, int) and not name.startswith('_')}
	__all__.extend(tok_name.values())

	EXACT_TOKEN_TYPES = {
	%s
	}

	def ISTERMINAL(x):
	return x < NT_OFFSET

	def ISNONTERMINAL(x):
	return x >= NT_OFFSET

	def ISEOF(x):
	return x == ENDMARKER
	'''

	def make_py(infile, outfile='Lib/token.py'):
	tok_names, ERRORTOKEN, string_to_tok = load_tokens(infile)

	constants = []
	for value, name in enumerate(tok_names):
	constants.append('%s = %d' % (name, value))
	constants.insert(ERRORTOKEN,
	"# These aren't used by the C tokenizer but are needed for tokenize.py")

	token_types = []
	for s, value in sorted(string_to_tok.items()):
	token_types.append(' %r: %s,' % (s, tok_names[value]))

	if update_file(outfile, token_py_template % (
	'\n'.join(constants),
	len(tok_names),
	NT_OFFSET,
	'\n'.join(token_types),
	)):
	print("%s regenerated from %s" % (outfile, infile))


	def main(op, infile='Grammar/Tokens', *args):
	make = globals()['make_' + op]
	make(infile, *args)


	if __name__ == '__main__':
	import sys
	main(*sys.argv[1:])