Refactor scripts in Tools/peg_generator/scripts (GH-20401)
(cherry picked from commit ba6fd87e41dceb01dcdacc57c722aca12cde42a9)
Co-authored-by: Lysandros Nikolaou <lisandrosnik@gmail.com>
diff --git a/Modules/_peg_parser.c b/Modules/_peg_parser.c
index b66d5a8..ca2a3cf 100644
--- a/Modules/_peg_parser.c
+++ b/Modules/_peg_parser.c
@@ -80,14 +80,15 @@
PyObject *
_Py_parse_string(PyObject *self, PyObject *args, PyObject *kwds)
{
- static char *keywords[] = {"string", "filename", "mode", "oldparser", NULL};
+ static char *keywords[] = {"string", "filename", "mode", "oldparser", "ast", NULL};
char *the_string;
char *filename = "<string>";
char *mode_str = "exec";
int oldparser = 0;
+ int ast = 1;
- if (!PyArg_ParseTupleAndKeywords(args, kwds, "s|ssp", keywords,
- &the_string, &filename, &mode_str, &oldparser)) {
+ if (!PyArg_ParseTupleAndKeywords(args, kwds, "s|sspp", keywords,
+ &the_string, &filename, &mode_str, &oldparser, &ast)) {
return NULL;
}
@@ -110,7 +111,14 @@
return NULL;
}
- PyObject *result = PyAST_mod2obj(mod);
+ PyObject *result;
+ if (ast) {
+ result = PyAST_mod2obj(mod);
+ }
+ else {
+ Py_INCREF(Py_None);
+ result = Py_None;
+ }
PyArena_Free(arena);
return result;
}
diff --git a/Tools/peg_generator/Makefile b/Tools/peg_generator/Makefile
index e7a190c..fb727c0 100644
--- a/Tools/peg_generator/Makefile
+++ b/Tools/peg_generator/Makefile
@@ -70,23 +70,21 @@
time: time_compile
time_compile: venv data/xxl.py
- $(VENVPYTHON) scripts/benchmark.py --parser=pegen --target=xxl compile
+ $(VENVPYTHON) scripts/benchmark.py --parser=new --target=xxl compile
time_parse: venv data/xxl.py
- $(VENVPYTHON) scripts/benchmark.py --parser=pegen --target=xxl parse
+ $(VENVPYTHON) scripts/benchmark.py --parser=new --target=xxl parse
time_old: time_old_compile
time_old_compile: venv data/xxl.py
- $(VENVPYTHON) scripts/benchmark.py --parser=cpython --target=xxl compile
+ $(VENVPYTHON) scripts/benchmark.py --parser=old --target=xxl compile
time_old_parse: venv data/xxl.py
- $(VENVPYTHON) scripts/benchmark.py --parser=cpython --target=xxl parse
+ $(VENVPYTHON) scripts/benchmark.py --parser=old --target=xxl parse
time_peg_dir: venv
$(VENVPYTHON) scripts/test_parse_directory.py \
- --grammar-file $(GRAMMAR) \
- --tokens-file $(TOKENS) \
-d $(TESTDIR) \
$(TESTFLAGS) \
--exclude "*/failset/*" \
@@ -95,12 +93,8 @@
time_stdlib: $(CPYTHON) venv
$(VENVPYTHON) scripts/test_parse_directory.py \
- --grammar-file $(GRAMMAR) \
- --tokens-file $(TOKENS) \
-d $(CPYTHON) \
$(TESTFLAGS) \
- --exclude "*/test2to3/*" \
- --exclude "*/test2to3/**/*" \
--exclude "*/bad*" \
--exclude "*/lib2to3/tests/data/*"
diff --git a/Tools/peg_generator/scripts/benchmark.py b/Tools/peg_generator/scripts/benchmark.py
index 71512c2..af356be 100644
--- a/Tools/peg_generator/scripts/benchmark.py
+++ b/Tools/peg_generator/scripts/benchmark.py
@@ -24,7 +24,7 @@
argparser.add_argument(
"--parser",
action="store",
- choices=["pegen", "cpython"],
+ choices=["new", "old"],
default="pegen",
help="Which parser to benchmark (default is pegen)",
)
@@ -40,7 +40,12 @@
command_compile = subcommands.add_parser(
"compile", help="Benchmark parsing and compiling to bytecode"
)
-command_parse = subcommands.add_parser("parse", help="Benchmark parsing and generating an ast.AST")
+command_parse = subcommands.add_parser(
+ "parse", help="Benchmark parsing and generating an ast.AST"
+)
+command_notree = subcommands.add_parser(
+ "notree", help="Benchmark parsing and dumping the tree"
+)
def benchmark(func):
@@ -62,7 +67,7 @@
@benchmark
def time_compile(source, parser):
- if parser == "cpython":
+ if parser == "old":
return _peg_parser.compile_string(
source,
oldparser=True,
@@ -73,32 +78,40 @@
@benchmark
def time_parse(source, parser):
- if parser == "cpython":
+ if parser == "old":
return _peg_parser.parse_string(source, oldparser=True)
else:
return _peg_parser.parse_string(source)
+@benchmark
+def time_notree(source, parser):
+ if parser == "old":
+ return _peg_parser.parse_string(source, oldparser=True, ast=False)
+ else:
+ return _peg_parser.parse_string(source, ast=False)
+
+
def run_benchmark_xxl(subcommand, parser, source):
if subcommand == "compile":
time_compile(source, parser)
elif subcommand == "parse":
time_parse(source, parser)
+ elif subcommand == "notree":
+ time_notree(source, parser)
def run_benchmark_stdlib(subcommand, parser):
+ modes = {"compile": 2, "parse": 1, "notree": 0}
for _ in range(3):
parse_directory(
"../../Lib",
- "../../Grammar/python.gram",
- "../../Grammar/Tokens",
verbose=False,
excluded_files=["*/bad*", "*/lib2to3/tests/data/*",],
- skip_actions=False,
tree_arg=0,
short=True,
- mode=2 if subcommand == "compile" else 1,
- parser=parser,
+ mode=modes[subcommand],
+ oldparser=(parser == "old"),
)
diff --git a/Tools/peg_generator/scripts/grammar_grapher.py b/Tools/peg_generator/scripts/grammar_grapher.py
index 3aa2546..4afdbce 100755
--- a/Tools/peg_generator/scripts/grammar_grapher.py
+++ b/Tools/peg_generator/scripts/grammar_grapher.py
@@ -42,6 +42,13 @@
)
argparser = argparse.ArgumentParser(prog="graph_grammar", description="Graph a grammar tree",)
+argparser.add_argument(
+ "-s",
+ "--start",
+ choices=["exec", "eval", "single"],
+ default="exec",
+ help="Choose the grammar's start rule (exec, eval or single)",
+)
argparser.add_argument("grammar_file", help="The grammar file to graph")
@@ -91,19 +98,15 @@
references[name] = set(references_for_item(rule))
# Flatten the start node if has only a single reference
- root_node = "start"
- if start := references["start"]:
- if len(start) == 1:
- root_node = list(start)[0]
- del references["start"]
+ root_node = {"exec": "file", "eval": "eval", "single": "interactive"}[args.start]
print("digraph g1 {")
print('\toverlap="scale";') # Force twopi to scale the graph to avoid overlaps
print(f'\troot="{root_node}";')
- print(f"\t{root_node} [color=green, shape=circle]")
+ print(f"\t{root_node} [color=green, shape=circle];")
for name, refs in references.items():
- if refs: # Ignore empty sets
- print(f"\t{name} -> {','.join(refs)};")
+ for ref in refs:
+ print(f"\t{name} -> {ref};")
print("}")
diff --git a/Tools/peg_generator/scripts/show_parse.py b/Tools/peg_generator/scripts/show_parse.py
index 1c1996f..b4ee5a1 100755
--- a/Tools/peg_generator/scripts/show_parse.py
+++ b/Tools/peg_generator/scripts/show_parse.py
@@ -41,7 +41,13 @@
parser.add_argument(
"-d", "--diff", action="store_true", help="show diff between grammar and ast (requires -g)"
)
-parser.add_argument("-g", "--grammar-file", help="grammar to use (default: use the ast module)")
+parser.add_argument(
+ "-p",
+ "--parser",
+ choices=["new", "old"],
+ default="new",
+ help="choose the parser to use"
+)
parser.add_argument(
"-m",
"--multiline",
@@ -84,19 +90,18 @@
def main() -> None:
args = parser.parse_args()
- if args.diff and not args.grammar_file:
- parser.error("-d/--diff requires -g/--grammar-file")
+ new_parser = args.parser == "new"
if args.multiline:
sep = "\n"
else:
sep = " "
program = sep.join(args.program)
- if args.grammar_file:
+ if new_parser:
tree = _peg_parser.parse_string(program)
if args.diff:
- a = tree
- b = _peg_parser.parse_string(program, oldparser=True)
+ a = _peg_parser.parse_string(program, oldparser=True)
+ b = tree
diff = diff_trees(a, b, args.verbose)
if diff:
for line in diff:
@@ -104,11 +109,11 @@
else:
print("# Trees are the same")
else:
- print(f"# Parsed using {args.grammar_file}")
+ print("# Parsed using the new parser")
print(format_tree(tree, args.verbose))
else:
tree = _peg_parser.parse_string(program, oldparser=True)
- print("# Parse using the old parser")
+ print("# Parsed using the old parser")
print(format_tree(tree, args.verbose))
diff --git a/Tools/peg_generator/scripts/test_parse_directory.py b/Tools/peg_generator/scripts/test_parse_directory.py
index e88afe1..63204ce 100755
--- a/Tools/peg_generator/scripts/test_parse_directory.py
+++ b/Tools/peg_generator/scripts/test_parse_directory.py
@@ -11,7 +11,7 @@
from glob import glob
from pathlib import PurePath
-from typing import List, Optional, Any
+from typing import List, Optional, Any, Tuple
sys.path.insert(0, os.getcwd())
from pegen.ast_dump import ast_dump
@@ -22,13 +22,15 @@
FAIL = "\033[91m"
ENDC = "\033[0m"
+COMPILE = 2
+PARSE = 1
+NOTREE = 0
+
argparser = argparse.ArgumentParser(
prog="test_parse_directory",
description="Helper program to test directories or files for pegen",
)
argparser.add_argument("-d", "--directory", help="Directory path containing files to test")
-argparser.add_argument("--grammar-file", help="Grammar file path")
-argparser.add_argument("--tokens-file", help="Tokens file path")
argparser.add_argument(
"-e", "--exclude", action="append", default=[], help="Glob(s) for matching files to exclude"
)
@@ -39,9 +41,6 @@
"-v", "--verbose", action="store_true", help="Display detailed errors for failures"
)
argparser.add_argument(
- "--skip-actions", action="store_true", help="Suppress code emission for rule actions",
-)
-argparser.add_argument(
"-t", "--tree", action="count", help="Compare parse tree to official AST", default=0
)
@@ -113,92 +112,35 @@
return 1
-def parse_directory(
- directory: str,
- grammar_file: str,
- tokens_file: str,
- verbose: bool,
- excluded_files: List[str],
- skip_actions: bool,
- tree_arg: int,
- short: bool,
- mode: int,
- parser: str,
-) -> int:
- if parser == "cpython" and (tree_arg or mode == 0):
- print("Cannot specify tree argument or mode=0 with the cpython parser.", file=sys.stderr)
- return 1
-
- if not directory:
- print("You must specify a directory of files to test.", file=sys.stderr)
- return 1
-
- if grammar_file and tokens_file:
- if not os.path.exists(grammar_file):
- print(f"The specified grammar file, {grammar_file}, does not exist.", file=sys.stderr)
- return 1
- else:
- print(
- "A grammar file or a tokens file was not provided - attempting to use existing parser from stdlib...\n"
+def parse_file(source: str, file: str, mode: int, oldparser: bool) -> Tuple[Any, float]:
+ t0 = time.time()
+ if mode == COMPILE:
+ result = _peg_parser.compile_string(
+ source,
+ filename=file,
+ oldparser=oldparser,
)
-
- if tree_arg:
- assert mode == 1, "Mode should be 1 (parse), when comparing the generated trees"
-
- # For a given directory, traverse files and attempt to parse each one
- # - Output success/failure for each file
- errors = 0
- files = []
- trees = {} # Trees to compare (after everything else is done)
- total_seconds = 0
-
- for file in sorted(glob(f"{directory}/**/*.py", recursive=True)):
- # Only attempt to parse Python files and files that are not excluded
- should_exclude_file = False
- for pattern in excluded_files:
- if PurePath(file).match(pattern):
- should_exclude_file = True
- break
-
- if not should_exclude_file:
- with tokenize.open(file) as f:
- source = f.read()
- try:
- t0 = time.time()
- if mode == 2:
- result = _peg_parser.compile_string(
- source,
- filename=file,
- oldparser=parser == "cpython",
- )
- else:
- result = _peg_parser.parse_string(
- source,
- filename=file,
- oldparser=parser == "cpython"
- )
- t1 = time.time()
- total_seconds += (t1 - t0)
- if tree_arg:
- trees[file] = result
- if not short:
- report_status(succeeded=True, file=file, verbose=verbose)
- except Exception as error:
- try:
- _peg_parser.parse_string(source, mode="exec", oldparser=True)
- except Exception:
- if not short:
- print(f"File {file} cannot be parsed by either pegen or the ast module.")
- else:
- report_status(
- succeeded=False, file=file, verbose=verbose, error=error, short=short
- )
- errors += 1
- files.append(file)
+ else:
+ result = _peg_parser.parse_string(
+ source,
+ filename=file,
+ oldparser=oldparser,
+ ast=(mode == PARSE),
+ )
t1 = time.time()
+ return result, t1 - t0
+
+def is_parsing_failure(source: str) -> bool:
+ try:
+ _peg_parser.parse_string(source, mode="exec", oldparser=True)
+ except SyntaxError:
+ return False
+ return True
+
+
+def generate_time_stats(files, total_seconds) -> None:
total_files = len(files)
-
total_bytes = 0
total_lines = 0
for file in files:
@@ -217,6 +159,57 @@
f"or {total_bytes / total_seconds :,.0f} bytes/sec.",
)
+
+def parse_directory(
+ directory: str,
+ verbose: bool,
+ excluded_files: List[str],
+ tree_arg: int,
+ short: bool,
+ mode: int,
+ oldparser: bool,
+) -> int:
+ if tree_arg:
+ assert mode == PARSE, "Mode should be 1 (parse), when comparing the generated trees"
+
+ if oldparser and tree_arg:
+ print("Cannot specify tree argument with the cpython parser.", file=sys.stderr)
+ return 1
+
+ # For a given directory, traverse files and attempt to parse each one
+ # - Output success/failure for each file
+ errors = 0
+ files = []
+ trees = {} # Trees to compare (after everything else is done)
+ total_seconds = 0
+
+ for file in sorted(glob(f"{directory}/**/*.py", recursive=True)):
+ # Only attempt to parse Python files and files that are not excluded
+ if any(PurePath(file).match(pattern) for pattern in excluded_files):
+ continue
+
+ with tokenize.open(file) as f:
+ source = f.read()
+
+ try:
+ result, dt = parse_file(source, file, mode, oldparser)
+ total_seconds += dt
+ if tree_arg:
+ trees[file] = result
+ report_status(succeeded=True, file=file, verbose=verbose, short=short)
+ except SyntaxError as error:
+ if is_parsing_failure(source):
+ print(f"File {file} cannot be parsed by either parser.")
+ else:
+ report_status(
+ succeeded=False, file=file, verbose=verbose, error=error, short=short
+ )
+ errors += 1
+ files.append(file)
+
+ t1 = time.time()
+
+ generate_time_stats(files, total_seconds)
if short:
print_memstats()
@@ -240,26 +233,20 @@
def main() -> None:
args = argparser.parse_args()
directory = args.directory
- grammar_file = args.grammar_file
- tokens_file = args.tokens_file
verbose = args.verbose
excluded_files = args.exclude
- skip_actions = args.skip_actions
tree = args.tree
short = args.short
mode = 1 if args.tree else 2
sys.exit(
parse_directory(
directory,
- grammar_file,
- tokens_file,
verbose,
excluded_files,
- skip_actions,
tree,
short,
mode,
- "pegen",
+ oldparser=False,
)
)
diff --git a/Tools/peg_generator/scripts/test_pypi_packages.py b/Tools/peg_generator/scripts/test_pypi_packages.py
index 98f7778..f014753 100755
--- a/Tools/peg_generator/scripts/test_pypi_packages.py
+++ b/Tools/peg_generator/scripts/test_pypi_packages.py
@@ -57,22 +57,11 @@
def run_tests(dirname: str, tree: int) -> int:
return test_parse_directory.parse_directory(
dirname,
- HERE / ".." / ".." / ".." / "Grammar" / "python.gram",
- HERE / ".." / ".." / ".." / "Grammar" / "Tokens",
verbose=False,
- excluded_files=[
- "*/failset/*",
- "*/failset/**",
- "*/failset/**/*",
- "*/test2to3/*",
- "*/test2to3/**/*",
- "*/bad*",
- "*/lib2to3/tests/data/*",
- ],
- skip_actions=False,
+ excluded_files=[],
tree_arg=tree,
short=True,
- mode=1,
+ mode=1 if tree else 0,
parser="pegen",
)