bpo-40334: PEP 617 implementation: New PEG parser for CPython (GH-19503)

Co-authored-by: Guido van Rossum <guido@python.org>
Co-authored-by: Lysandros Nikolaou <lisandrosnik@gmail.com>
diff --git a/Tools/README b/Tools/README
index 6c5fb20..b6d0b18 100644
--- a/Tools/README
+++ b/Tools/README
@@ -23,6 +23,8 @@
 
 parser          Un-parsing tool to generate code from an AST.
 
+peg_generator   PEG-based parser generator (pegen) used for new parser.
+
 pynche          A Tkinter-based color editor.
 
 scripts         A number of useful single-file programs, e.g. tabnanny.py
diff --git a/Tools/peg_generator/.clang-format b/Tools/peg_generator/.clang-format
new file mode 100644
index 0000000..b2bb93d
--- /dev/null
+++ b/Tools/peg_generator/.clang-format
@@ -0,0 +1,17 @@
+# A clang-format style that approximates Python's PEP 7
+BasedOnStyle: Google
+AlwaysBreakAfterReturnType: All
+AllowShortIfStatementsOnASingleLine: false
+AlignAfterOpenBracket: Align
+BreakBeforeBraces: Stroustrup
+ColumnLimit: 95
+DerivePointerAlignment: false
+IndentWidth: 4
+Language: Cpp
+PointerAlignment: Right
+ReflowComments: true
+SpaceBeforeParens: ControlStatements
+SpacesInParentheses: false
+TabWidth: 4
+UseTab: Never
+SortIncludes: false
diff --git a/Tools/peg_generator/.gitignore b/Tools/peg_generator/.gitignore
new file mode 100644
index 0000000..91c41f8
--- /dev/null
+++ b/Tools/peg_generator/.gitignore
@@ -0,0 +1,3 @@
+peg_extension/parse.c
+data/xxl.py
+@data
diff --git a/Tools/peg_generator/Makefile b/Tools/peg_generator/Makefile
new file mode 100644
index 0000000..fb67a21
--- /dev/null
+++ b/Tools/peg_generator/Makefile
@@ -0,0 +1,116 @@
+UNAME_S := $(shell uname -s)
+ifeq ($(UNAME_S),Linux)
+	PYTHON ?= ../../python
+endif
+ifeq ($(UNAME_S),Darwin)
+	PYTHON ?= ../../python.exe
+endif
+
+CPYTHON ?= ../../Lib
+MYPY ?= mypy
+
+GRAMMAR = ../../Grammar/python.gram
+TESTFILE = data/cprog.py
+TIMEFILE = data/xxl.py
+TESTDIR = .
+TESTFLAGS = --short
+
+data/xxl.py:
+	$(PYTHON) -m zipfile -e data/xxl.zip data
+
+build: peg_extension/parse.c
+
+peg_extension/parse.c: $(GRAMMAR) pegen/*.py peg_extension/peg_extension.c ../../Parser/pegen/pegen.c ../../Parser/pegen/parse_string.c ../../Parser/pegen/*.h pegen/grammar_parser.py
+	$(PYTHON) -m pegen -q -c $(GRAMMAR) -o peg_extension/parse.c --compile-extension
+
+clean:
+	-rm -f peg_extension/*.o peg_extension/*.so peg_extension/parse.c
+	-rm -f data/xxl.py
+
+dump: peg_extension/parse.c
+	cat -n $(TESTFILE)
+	$(PYTHON) -c "from peg_extension import parse; import ast; t = parse.parse_file('$(TESTFILE)', mode=1); print(ast.dump(t))"
+
+regen-metaparser: pegen/metagrammar.gram pegen/*.py
+	$(PYTHON) -m pegen -q -c pegen/metagrammar.gram -o pegen/grammar_parser.py
+
+# Note: These targets really depend on the generated shared object in peg_extension/parse.*.so but
+# this has different names in different systems so we are abusing the implicit dependency on
+# parse.c by the use of --compile-extension.
+
+.PHONY: test
+
+test: run
+
+run: peg_extension/parse.c
+	$(PYTHON) -c "from peg_extension import parse; t = parse.parse_file('$(TESTFILE)'); exec(t)"
+
+compile: peg_extension/parse.c
+	$(PYTHON) -c "from peg_extension import parse; t = parse.parse_file('$(TESTFILE)', mode=2)"
+
+parse: peg_extension/parse.c
+	$(PYTHON) -c "from peg_extension import parse; t = parse.parse_file('$(TESTFILE)', mode=1)"
+
+check: peg_extension/parse.c
+	$(PYTHON) -c "from peg_extension import parse; t = parse.parse_file('$(TESTFILE)', mode=0)"
+
+stats: peg_extension/parse.c data/xxl.py
+	$(PYTHON) -c "from peg_extension import parse; t = parse.parse_file('$(TIMEFILE)', mode=0); parse.dump_memo_stats()" >@data
+	$(PYTHON) scripts/joinstats.py @data
+
+time: time_compile
+
+time_compile: peg_extension/parse.c data/xxl.py
+	$(PYTHON) scripts/benchmark.py --parser=pegen --target=xxl compile
+
+time_parse: peg_extension/parse.c data/xxl.py
+	$(PYTHON) scripts/benchmark.py --parser=pegen --target=xxl parse
+
+time_check: peg_extension/parse.c data/xxl.py
+	$(PYTHON) scripts/benchmark.py --parser=pegen --target=xxl check
+
+time_stdlib: time_stdlib_compile
+
+time_stdlib_compile: data/xxl.py
+	$(PYTHON) scripts/benchmark.py --parser=cpython --target=xxl compile
+
+time_stdlib_parse: data/xxl.py
+	$(PYTHON) scripts/benchmark.py --parser=cpython --target=xxl parse
+
+test_local:
+	$(PYTHON) scripts/test_parse_directory.py \
+		-g $(GRAMMAR) \
+		-d $(TESTDIR) \
+		$(TESTFLAGS) \
+		--exclude "*/failset/*" \
+		--exclude "*/failset/**" \
+		--exclude "*/failset/**/*"
+
+test_global: $(CPYTHON)
+	$(PYTHON) scripts/test_parse_directory.py \
+		-g $(GRAMMAR) \
+		-d $(CPYTHON) \
+		$(TESTFLAGS) \
+		--exclude "*/test2to3/*" \
+		--exclude "*/test2to3/**/*" \
+		--exclude "*/bad*" \
+		--exclude "*/lib2to3/tests/data/*"
+
+mypy: regen-metaparser
+	$(MYPY)  # For list of files, see mypy.ini
+
+format-python:
+	black pegen scripts
+
+bench:
+	$(PYTHON) scripts/benchmark.py --parser=pegen --target=stdlib check
+
+format: format-python
+
+find_max_nesting:
+	$(PYTHON) scripts/find_max_nesting.py
+
+tags: TAGS
+
+TAGS: pegen/*.py test/test_pegen.py
+	etags pegen/*.py test/test_pegen.py
diff --git a/Tools/peg_generator/data/cprog.py b/Tools/peg_generator/data/cprog.py
new file mode 100644
index 0000000..07b96f0
--- /dev/null
+++ b/Tools/peg_generator/data/cprog.py
@@ -0,0 +1,10 @@
+if 1:
+    print("Hello " + "world")
+    if 0:
+        print("then")
+        print("clause")
+    elif 1:
+        pass
+    elif 1:
+        pass
+    else: print("else-clause")
diff --git a/Tools/peg_generator/data/xxl.zip b/Tools/peg_generator/data/xxl.zip
new file mode 100644
index 0000000..5421408
--- /dev/null
+++ b/Tools/peg_generator/data/xxl.zip
Binary files differ
diff --git a/Tools/peg_generator/mypy.ini b/Tools/peg_generator/mypy.ini
new file mode 100644
index 0000000..80d5c05
--- /dev/null
+++ b/Tools/peg_generator/mypy.ini
@@ -0,0 +1,26 @@
+[mypy]
+files = pegen, scripts
+
+follow_imports = error
+no_implicit_optional = True
+strict_optional = True
+
+#check_untyped_defs = True
+disallow_untyped_calls = True
+disallow_untyped_defs = True
+
+disallow_any_generics = true
+disallow_any_unimported = True
+disallow_incomplete_defs = True
+disallow_subclassing_any = True
+
+warn_unused_configs = True
+warn_unused_ignores = true
+warn_redundant_casts = true
+warn_no_return = True
+
+show_traceback = True
+show_error_codes = True
+
+[mypy-pegen.grammar_parser]
+strict_optional = False
diff --git a/Tools/peg_generator/peg_extension/peg_extension.c b/Tools/peg_generator/peg_extension/peg_extension.c
new file mode 100644
index 0000000..d8d36a0
--- /dev/null
+++ b/Tools/peg_generator/peg_extension/peg_extension.c
@@ -0,0 +1,153 @@
+#include "pegen.h"
+
+PyObject *
+_build_return_object(mod_ty module, int mode, PyObject *filename_ob, PyArena *arena)
+{
+    PyObject *result = NULL;
+
+    if (mode == 2) {
+        result = (PyObject *)PyAST_CompileObject(module, filename_ob, NULL, -1, arena);
+    } else if (mode == 1) {
+        result = PyAST_mod2obj(module);
+    } else {
+        result = Py_None;
+        Py_INCREF(result);
+        
+    }
+
+    return result;
+}
+
+static PyObject *
+parse_file(PyObject *self, PyObject *args, PyObject *kwds)
+{
+    static char *keywords[] = {"file", "mode", NULL};
+    const char *filename;
+    int mode = 2;
+    if (!PyArg_ParseTupleAndKeywords(args, kwds, "s|i", keywords, &filename, &mode)) {
+        return NULL;
+    }
+    if (mode < 0 || mode > 2) {
+        return PyErr_Format(PyExc_ValueError, "Bad mode, must be 0 <= mode <= 2");
+    }
+
+    PyArena *arena = PyArena_New();
+    if (arena == NULL) {
+        return NULL;
+    }
+
+    PyObject *result = NULL;
+
+    PyObject *filename_ob = PyUnicode_FromString(filename);
+    if (filename_ob == NULL) {
+        goto error;
+    }
+
+    mod_ty res = _PyPegen_run_parser_from_file(filename, Py_file_input, filename_ob, arena);
+    if (res == NULL) {
+        goto error;
+    }
+
+    result = _build_return_object(res, mode, filename_ob, arena);
+
+error:
+    Py_XDECREF(filename_ob);
+    PyArena_Free(arena);
+    return result;
+}
+
+static PyObject *
+parse_string(PyObject *self, PyObject *args, PyObject *kwds)
+{
+    static char *keywords[] = {"str", "mode", NULL};
+    const char *the_string;
+    int mode = 2;
+    if (!PyArg_ParseTupleAndKeywords(args, kwds, "s|i", keywords, &the_string, &mode)) {
+        return NULL;
+    }
+    if (mode < 0 || mode > 2) {
+        return PyErr_Format(PyExc_ValueError, "Bad mode, must be 0 <= mode <= 2");
+    }
+
+    PyArena *arena = PyArena_New();
+    if (arena == NULL) {
+        return NULL;
+    }
+
+    PyObject *result = NULL;
+
+    PyObject *filename_ob = PyUnicode_FromString("<string>");
+    if (filename_ob == NULL) {
+        goto error;
+    }
+
+    mod_ty res = _PyPegen_run_parser_from_string(the_string, Py_file_input, filename_ob,
+                                        PyCF_IGNORE_COOKIE, arena);
+    if (res == NULL) {
+        goto error;
+    }
+    result = _build_return_object(res, mode, filename_ob, arena);
+
+error:
+    Py_XDECREF(filename_ob);
+    PyArena_Free(arena);
+    return result;
+}
+
+static PyObject *
+clear_memo_stats()
+{
+    _PyPegen_clear_memo_statistics();
+    Py_RETURN_NONE;
+}
+
+static PyObject *
+get_memo_stats()
+{
+    return _PyPegen_get_memo_statistics();
+}
+
+// TODO: Write to Python's sys.stdout instead of C's stdout.
+static PyObject *
+dump_memo_stats()
+{
+    PyObject *list = _PyPegen_get_memo_statistics();
+    if (list == NULL) {
+        return NULL;
+    }
+    Py_ssize_t len = PyList_Size(list);
+    for (Py_ssize_t i = 0; i < len; i++) {
+        PyObject *value = PyList_GetItem(list, i);  // Borrowed reference.
+        long count = PyLong_AsLong(value);
+        if (count < 0) {
+            break;
+        }
+        if (count > 0) {
+            printf("%4ld %9ld\n", i, count);
+        }
+    }
+    Py_DECREF(list);
+    Py_RETURN_NONE;
+}
+
+static PyMethodDef ParseMethods[] = {
+    {"parse_file", (PyCFunction)(void(*)(void))parse_file, METH_VARARGS|METH_KEYWORDS, "Parse a file."},
+    {"parse_string", (PyCFunction)(void(*)(void))parse_string, METH_VARARGS|METH_KEYWORDS, "Parse a string."},
+    {"clear_memo_stats", clear_memo_stats, METH_NOARGS},
+    {"dump_memo_stats", dump_memo_stats, METH_NOARGS},
+    {"get_memo_stats", get_memo_stats, METH_NOARGS},
+    {NULL, NULL, 0, NULL}        /* Sentinel */
+};
+
+static struct PyModuleDef parsemodule = {
+    PyModuleDef_HEAD_INIT,
+    .m_name = "parse",
+    .m_doc = "A parser.",
+    .m_methods = ParseMethods,
+};
+
+PyMODINIT_FUNC
+PyInit_parse(void)
+{
+    return PyModule_Create(&parsemodule);
+}
diff --git a/Tools/peg_generator/pegen/__init__.py b/Tools/peg_generator/pegen/__init__.py
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/Tools/peg_generator/pegen/__init__.py
diff --git a/Tools/peg_generator/pegen/__main__.py b/Tools/peg_generator/pegen/__main__.py
new file mode 100755
index 0000000..874b307
--- /dev/null
+++ b/Tools/peg_generator/pegen/__main__.py
@@ -0,0 +1,136 @@
+#!/usr/bin/env python3.8
+
+"""pegen -- PEG Generator.
+
+Search the web for PEG Parsers for reference.
+"""
+
+import argparse
+import sys
+import time
+import token
+import traceback
+
+from typing import Final
+
+from pegen.build import build_parser_and_generator
+from pegen.testutil import print_memstats
+
+
+argparser = argparse.ArgumentParser(
+    prog="pegen", description="Experimental PEG-like parser generator"
+)
+argparser.add_argument("-q", "--quiet", action="store_true", help="Don't print the parsed grammar")
+argparser.add_argument(
+    "-v",
+    "--verbose",
+    action="count",
+    default=0,
+    help="Print timing stats; repeat for more debug output",
+)
+argparser.add_argument(
+    "-c", "--cpython", action="store_true", help="Generate C code for inclusion into CPython"
+)
+argparser.add_argument(
+    "--compile-extension",
+    action="store_true",
+    help="Compile generated C code into an extension module",
+)
+argparser.add_argument(
+    "-o",
+    "--output",
+    metavar="OUT",
+    help="Where to write the generated parser (default parse.py or parse.c)",
+)
+argparser.add_argument("filename", help="Grammar description")
+argparser.add_argument(
+    "--optimized", action="store_true", help="Compile the extension in optimized mode"
+)
+argparser.add_argument(
+    "--skip-actions", action="store_true", help="Suppress code emission for rule actions",
+)
+
+
+def main() -> None:
+    args = argparser.parse_args()
+    verbose = args.verbose
+    verbose_tokenizer = verbose >= 3
+    verbose_parser = verbose == 2 or verbose >= 4
+    t0 = time.time()
+
+    output_file = args.output
+    if not output_file:
+        if args.cpython:
+            output_file = "parse.c"
+        else:
+            output_file = "parse.py"
+
+    try:
+        grammar, parser, tokenizer, gen = build_parser_and_generator(
+            args.filename,
+            output_file,
+            args.compile_extension,
+            verbose_tokenizer,
+            verbose_parser,
+            args.verbose,
+            keep_asserts_in_extension=False if args.optimized else True,
+            skip_actions=args.skip_actions,
+        )
+    except Exception as err:
+        if args.verbose:
+            raise  # Show traceback
+        traceback.print_exception(err.__class__, err, None)
+        sys.stderr.write("For full traceback, use -v\n")
+        sys.exit(1)
+
+    if not args.quiet:
+        if args.verbose:
+            print("Raw Grammar:")
+            for line in repr(grammar).splitlines():
+                print(" ", line)
+
+        print("Clean Grammar:")
+        for line in str(grammar).splitlines():
+            print(" ", line)
+
+    if args.verbose:
+        print("First Graph:")
+        for src, dsts in gen.first_graph.items():
+            print(f"  {src} -> {', '.join(dsts)}")
+        print("First SCCS:")
+        for scc in gen.first_sccs:
+            print(" ", scc, end="")
+            if len(scc) > 1:
+                print(
+                    "  # Indirectly left-recursive; leaders:",
+                    {name for name in scc if grammar.rules[name].leader},
+                )
+            else:
+                name = next(iter(scc))
+                if name in gen.first_graph[name]:
+                    print("  # Left-recursive")
+                else:
+                    print()
+
+    t1 = time.time()
+
+    if args.verbose:
+        dt = t1 - t0
+        diag = tokenizer.diagnose()
+        nlines = diag.end[0]
+        if diag.type == token.ENDMARKER:
+            nlines -= 1
+        print(f"Total time: {dt:.3f} sec; {nlines} lines", end="")
+        if dt:
+            print(f"; {nlines / dt:.0f} lines/sec")
+        else:
+            print()
+        print("Caches sizes:")
+        print(f"  token array : {len(tokenizer._tokens):10}")
+        print(f"        cache : {len(parser._cache):10}")
+        if not print_memstats():
+            print("(Can't find psutil; install it for memory stats.)")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/Tools/peg_generator/pegen/build.py b/Tools/peg_generator/pegen/build.py
new file mode 100644
index 0000000..623b4ae
--- /dev/null
+++ b/Tools/peg_generator/pegen/build.py
@@ -0,0 +1,169 @@
+import pathlib
+import shutil
+import tokenize
+
+from typing import Optional, Tuple
+
+import distutils.log
+from distutils.core import Distribution, Extension
+from distutils.command.clean import clean  # type: ignore
+from distutils.command.build_ext import build_ext  # type: ignore
+
+from pegen.c_generator import CParserGenerator
+from pegen.grammar import Grammar
+from pegen.grammar_parser import GeneratedParser as GrammarParser
+from pegen.parser import Parser
+from pegen.parser_generator import ParserGenerator
+from pegen.python_generator import PythonParserGenerator
+from pegen.tokenizer import Tokenizer
+
+MOD_DIR = pathlib.Path(__file__).parent
+
+
+def compile_c_extension(
+    generated_source_path: str,
+    build_dir: Optional[str] = None,
+    verbose: bool = False,
+    keep_asserts: bool = True,
+) -> str:
+    """Compile the generated source for a parser generator into an extension module.
+
+    The extension module will be generated in the same directory as the provided path
+    for the generated source, with the same basename (in addition to extension module
+    metadata). For example, for the source mydir/parser.c the generated extension
+    in a darwin system with python 3.8 will be mydir/parser.cpython-38-darwin.so.
+
+    If *build_dir* is provided, that path will be used as the temporary build directory
+    of distutils (this is useful in case you want to use a temporary directory).
+    """
+    if verbose:
+        distutils.log.set_verbosity(distutils.log.DEBUG)
+
+    source_file_path = pathlib.Path(generated_source_path)
+    extension_name = source_file_path.stem
+    extra_compile_args = []
+    if keep_asserts:
+        extra_compile_args.append("-UNDEBUG")
+    extension = [
+        Extension(
+            extension_name,
+            sources=[
+                str(MOD_DIR.parent.parent.parent / "Python" / "Python-ast.c"),
+                str(MOD_DIR.parent.parent.parent / "Python" / "asdl.c"),
+                str(MOD_DIR.parent.parent.parent / "Parser" / "tokenizer.c"),
+                str(MOD_DIR.parent.parent.parent / "Parser" / "pegen" / "pegen.c"),
+                str(MOD_DIR.parent.parent.parent / "Parser" / "pegen" / "parse_string.c"),
+                str(MOD_DIR.parent / "peg_extension" / "peg_extension.c"),
+                generated_source_path,
+            ],
+            include_dirs=[
+                str(MOD_DIR.parent.parent.parent / "Include" / "internal"),
+                str(MOD_DIR.parent.parent.parent / "Parser"),
+                str(MOD_DIR.parent.parent.parent / "Parser" / "pegen"),
+            ],
+            extra_compile_args=extra_compile_args,
+        )
+    ]
+    dist = Distribution({"name": extension_name, "ext_modules": extension})
+    cmd = build_ext(dist)
+    cmd.inplace = True
+    if build_dir:
+        cmd.build_temp = build_dir
+    cmd.ensure_finalized()
+    cmd.run()
+
+    extension_path = source_file_path.parent / cmd.get_ext_filename(extension_name)
+    shutil.move(cmd.get_ext_fullpath(extension_name), extension_path)
+
+    cmd = clean(dist)
+    cmd.finalize_options()
+    cmd.run()
+
+    return extension_path
+
+
+def build_parser(
+    grammar_file: str, verbose_tokenizer: bool = False, verbose_parser: bool = False
+) -> Tuple[Grammar, Parser, Tokenizer]:
+    with open(grammar_file) as file:
+        tokenizer = Tokenizer(tokenize.generate_tokens(file.readline), verbose=verbose_tokenizer)
+        parser = GrammarParser(tokenizer, verbose=verbose_parser)
+        grammar = parser.start()
+
+        if not grammar:
+            raise parser.make_syntax_error(grammar_file)
+
+    return grammar, parser, tokenizer
+
+
+def build_generator(
+    tokenizer: Tokenizer,
+    grammar: Grammar,
+    grammar_file: str,
+    output_file: str,
+    compile_extension: bool = False,
+    verbose_c_extension: bool = False,
+    keep_asserts_in_extension: bool = True,
+    skip_actions: bool = False,
+) -> ParserGenerator:
+    # TODO: Allow other extensions; pass the output type as an argument.
+    if not output_file.endswith((".c", ".py")):
+        raise RuntimeError("Your output file must either be a .c or .py file")
+    with open(output_file, "w") as file:
+        gen: ParserGenerator
+        if output_file.endswith(".c"):
+            gen = CParserGenerator(grammar, file, skip_actions=skip_actions)
+        elif output_file.endswith(".py"):
+            gen = PythonParserGenerator(grammar, file)  # TODO: skip_actions
+        else:
+            assert False  # Should have been checked above
+        gen.generate(grammar_file)
+
+    if compile_extension and output_file.endswith(".c"):
+        compile_c_extension(
+            output_file, verbose=verbose_c_extension, keep_asserts=keep_asserts_in_extension
+        )
+
+    return gen
+
+
+def build_parser_and_generator(
+    grammar_file: str,
+    output_file: str,
+    compile_extension: bool = False,
+    verbose_tokenizer: bool = False,
+    verbose_parser: bool = False,
+    verbose_c_extension: bool = False,
+    keep_asserts_in_extension: bool = True,
+    skip_actions: bool = False,
+) -> Tuple[Grammar, Parser, Tokenizer, ParserGenerator]:
+    """Generate rules, parser, tokenizer, parser generator for a given grammar
+
+    Args:
+        grammar_file (string): Path for the grammar file
+        output_file (string): Path for the output file
+        compile_extension (bool, optional): Whether to compile the C extension.
+          Defaults to False.
+        verbose_tokenizer (bool, optional): Whether to display additional output
+          when generating the tokenizer. Defaults to False.
+        verbose_parser (bool, optional): Whether to display additional output
+          when generating the parser. Defaults to False.
+        verbose_c_extension (bool, optional): Whether to display additional
+          output when compiling the C extension . Defaults to False.
+        keep_asserts_in_extension (bool, optional): Whether to keep the assert statements
+          when compiling the extension module. Defaults to True.
+        skip_actions (bool, optional): Whether to pretend no rule has any actions.
+    """
+    grammar, parser, tokenizer = build_parser(grammar_file, verbose_tokenizer, verbose_parser)
+    gen = build_generator(
+        tokenizer,
+        grammar,
+        grammar_file,
+        output_file,
+        compile_extension,
+        verbose_c_extension,
+        keep_asserts_in_extension,
+        skip_actions=skip_actions,
+    )
+
+    return grammar, parser, tokenizer, gen
diff --git a/Tools/peg_generator/pegen/c_generator.py b/Tools/peg_generator/pegen/c_generator.py
new file mode 100644
index 0000000..ce732a0
--- /dev/null
+++ b/Tools/peg_generator/pegen/c_generator.py
@@ -0,0 +1,605 @@
+import ast
+import re
+from typing import Any, cast, Dict, IO, Optional, List, Text, Tuple
+
+from pegen.grammar import (
+    Cut,
+    GrammarVisitor,
+    Rhs,
+    Alt,
+    NamedItem,
+    NameLeaf,
+    StringLeaf,
+    Lookahead,
+    PositiveLookahead,
+    NegativeLookahead,
+    Opt,
+    Repeat0,
+    Repeat1,
+    Gather,
+    Group,
+    Rule,
+)
+from pegen import grammar
+from pegen.parser_generator import dedupe, ParserGenerator
+from pegen.tokenizer import exact_token_types
+
+EXTENSION_PREFIX = """\
+#include "pegen.h"
+
+"""
+
+EXTENSION_SUFFIX = """
+void *
+_PyPegen_parse(Parser *p)
+{
+    // Initialize keywords
+    p->keywords = reserved_keywords;
+    p->n_keyword_lists = n_keyword_lists;
+
+    return start_rule(p);
+}
+"""
+
+
+class CCallMakerVisitor(GrammarVisitor):
+    def __init__(self, parser_generator: ParserGenerator):
+        self.gen = parser_generator
+        self.cache: Dict[Any, Any] = {}
+        self.keyword_cache: Dict[str, int] = {}
+
+    def keyword_helper(self, keyword: str) -> Tuple[str, str]:
+        if keyword not in self.keyword_cache:
+            self.keyword_cache[keyword] = self.gen.keyword_type()
+        return "keyword", f"_PyPegen_expect_token(p, {self.keyword_cache[keyword]})"
+
+    def visit_NameLeaf(self, node: NameLeaf) -> Tuple[str, str]:
+        name = node.value
+        if name in ("NAME", "NUMBER", "STRING"):
+            name = name.lower()
+            return f"{name}_var", f"_PyPegen_{name}_token(p)"
+        if name in ("NEWLINE", "DEDENT", "INDENT", "ENDMARKER", "ASYNC", "AWAIT"):
+            name = name.lower()
+            return f"{name}_var", f"_PyPegen_{name}_token(p)"
+        return f"{name}_var", f"{name}_rule(p)"
+
+    def visit_StringLeaf(self, node: StringLeaf) -> Tuple[str, str]:
+        val = ast.literal_eval(node.value)
+        if re.match(r"[a-zA-Z_]\w*\Z", val):  # This is a keyword
+            return self.keyword_helper(val)
+        else:
+            assert val in exact_token_types, f"{node.value} is not a known literal"
+            type = exact_token_types[val]
+            return "literal", f"_PyPegen_expect_token(p, {type})"
+
+    def visit_Rhs(self, node: Rhs) -> Tuple[Optional[str], str]:
+        if node in self.cache:
+            return self.cache[node]
+        if len(node.alts) == 1 and len(node.alts[0].items) == 1:
+            self.cache[node] = self.visit(node.alts[0].items[0])
+        else:
+            name = self.gen.name_node(node)
+            self.cache[node] = f"{name}_var", f"{name}_rule(p)"
+        return self.cache[node]
+
+    def visit_NamedItem(self, node: NamedItem) -> Tuple[Optional[str], str]:
+        name, call = self.visit(node.item)
+        if node.name:
+            name = node.name
+        return name, call
+
+    def lookahead_call_helper(self, node: Lookahead, positive: int) -> Tuple[None, str]:
+        name, call = self.visit(node.node)
+        func, args = call.split("(", 1)
+        assert args[-1] == ")"
+        args = args[:-1]
+        if not args.startswith("p,"):
+            return None, f"_PyPegen_lookahead({positive}, {func}, {args})"
+        elif args[2:].strip().isalnum():
+            return None, f"_PyPegen_lookahead_with_int({positive}, {func}, {args})"
+        else:
+            return None, f"_PyPegen_lookahead_with_string({positive}, {func}, {args})"
+
+    def visit_PositiveLookahead(self, node: PositiveLookahead) -> Tuple[None, str]:
+        return self.lookahead_call_helper(node, 1)
+
+    def visit_NegativeLookahead(self, node: NegativeLookahead) -> Tuple[None, str]:
+        return self.lookahead_call_helper(node, 0)
+
+    def visit_Opt(self, node: Opt) -> Tuple[str, str]:
+        name, call = self.visit(node.node)
+        return "opt_var", f"{call}, 1"  # Using comma operator!
+
+    def visit_Repeat0(self, node: Repeat0) -> Tuple[str, str]:
+        if node in self.cache:
+            return self.cache[node]
+        name = self.gen.name_loop(node.node, False)
+        self.cache[node] = f"{name}_var", f"{name}_rule(p)"
+        return self.cache[node]
+
+    def visit_Repeat1(self, node: Repeat1) -> Tuple[str, str]:
+        if node in self.cache:
+            return self.cache[node]
+        name = self.gen.name_loop(node.node, True)
+        self.cache[node] = f"{name}_var", f"{name}_rule(p)"
+        return self.cache[node]
+
+    def visit_Gather(self, node: Gather) -> Tuple[str, str]:
+        if node in self.cache:
+            return self.cache[node]
+        name = self.gen.name_gather(node)
+        self.cache[node] = f"{name}_var", f"{name}_rule(p)"
+        return self.cache[node]
+
+    def visit_Group(self, node: Group) -> Tuple[Optional[str], str]:
+        return self.visit(node.rhs)
+
+    def visit_Cut(self, node: Cut) -> Tuple[str, str]:
+        return "cut_var", "1"
+
+
+class CParserGenerator(ParserGenerator, GrammarVisitor):
+    def __init__(
+        self,
+        grammar: grammar.Grammar,
+        file: Optional[IO[Text]],
+        debug: bool = False,
+        skip_actions: bool = False,
+    ):
+        super().__init__(grammar, file)
+        self.callmakervisitor: CCallMakerVisitor = CCallMakerVisitor(self)
+        self._varname_counter = 0
+        self.debug = debug
+        self.skip_actions = skip_actions
+
+    def unique_varname(self, name: str = "tmpvar") -> str:
+        new_var = name + "_" + str(self._varname_counter)
+        self._varname_counter += 1
+        return new_var
+
+    def call_with_errorcheck_return(self, call_text: str, returnval: str) -> None:
+        error_var = self.unique_varname()
+        self.print(f"int {error_var} = {call_text};")
+        self.print(f"if ({error_var}) {{")
+        with self.indent():
+            self.print(f"return {returnval};")
+        self.print(f"}}")
+
+    def call_with_errorcheck_goto(self, call_text: str, goto_target: str) -> None:
+        error_var = self.unique_varname()
+        self.print(f"int {error_var} = {call_text};")
+        self.print(f"if ({error_var}) {{")
+        with self.indent():
+            self.print(f"goto {goto_target};")
+        self.print(f"}}")
+
+    def out_of_memory_return(
+        self, expr: str, returnval: str, message: str = "Parser out of memory", cleanup_code=None
+    ) -> None:
+        self.print(f"if ({expr}) {{")
+        with self.indent():
+            self.print(f'PyErr_Format(PyExc_MemoryError, "{message}");')
+            if cleanup_code is not None:
+                self.print(cleanup_code)
+            self.print(f"return {returnval};")
+        self.print(f"}}")
+
+    def out_of_memory_goto(
+        self, expr: str, goto_target: str, message: str = "Parser out of memory"
+    ) -> None:
+        self.print(f"if ({expr}) {{")
+        with self.indent():
+            self.print(f'PyErr_Format(PyExc_MemoryError, "{message}");')
+            self.print(f"goto {goto_target};")
+        self.print(f"}}")
+
+    def generate(self, filename: str) -> None:
+        self.collect_todo()
+        self.print(f"// @generated by pegen.py from {filename}")
+        header = self.grammar.metas.get("header", EXTENSION_PREFIX)
+        if header:
+            self.print(header.rstrip("\n"))
+        subheader = self.grammar.metas.get("subheader", "")
+        if subheader:
+            self.print(subheader)
+        self._setup_keywords()
+        for i, (rulename, rule) in enumerate(self.todo.items(), 1000):
+            comment = "  // Left-recursive" if rule.left_recursive else ""
+            self.print(f"#define {rulename}_type {i}{comment}")
+        self.print()
+        for rulename, rule in self.todo.items():
+            if rule.is_loop() or rule.is_gather():
+                type = "asdl_seq *"
+            elif rule.type:
+                type = rule.type + " "
+            else:
+                type = "void *"
+            self.print(f"static {type}{rulename}_rule(Parser *p);")
+        self.print()
+        while self.todo:
+            for rulename, rule in list(self.todo.items()):
+                del self.todo[rulename]
+                self.print()
+                if rule.left_recursive:
+                    self.print("// Left-recursive")
+                self.visit(rule)
+        if self.skip_actions:
+            mode = 0
+        else:
+            mode = int(self.rules["start"].type == "mod_ty") if "start" in self.rules else 1
+            if mode == 1 and self.grammar.metas.get("bytecode"):
+                mode += 1
+        modulename = self.grammar.metas.get("modulename", "parse")
+        trailer = self.grammar.metas.get("trailer", EXTENSION_SUFFIX)
+        keyword_cache = self.callmakervisitor.keyword_cache
+        if trailer:
+            self.print(trailer.rstrip("\n") % dict(mode=mode, modulename=modulename))
+
+    def _group_keywords_by_length(self) -> Dict[int, List[Tuple[str, int]]]:
+        groups: Dict[int, List[Tuple[str, int]]] = {}
+        for keyword_str, keyword_type in self.callmakervisitor.keyword_cache.items():
+            length = len(keyword_str)
+            if length in groups:
+                groups[length].append((keyword_str, keyword_type))
+            else:
+                groups[length] = [(keyword_str, keyword_type)]
+        return groups
+
+    def _setup_keywords(self) -> None:
+        keyword_cache = self.callmakervisitor.keyword_cache
+        n_keyword_lists = (
+            len(max(keyword_cache.keys(), key=len)) + 1 if len(keyword_cache) > 0 else 0
+        )
+        self.print(f"static const int n_keyword_lists = {n_keyword_lists};")
+        groups = self._group_keywords_by_length()
+        self.print("static KeywordToken *reserved_keywords[] = {")
+        with self.indent():
+            num_groups = max(groups) + 1 if groups else 1
+            for keywords_length in range(num_groups):
+                if keywords_length not in groups.keys():
+                    self.print("NULL,")
+                else:
+                    self.print("(KeywordToken[]) {")
+                    with self.indent():
+                        for keyword_str, keyword_type in groups[keywords_length]:
+                            self.print(f'{{"{keyword_str}", {keyword_type}}},')
+                        self.print("{NULL, -1},")
+                    self.print("},")
+        self.print("};")
+
+    def _set_up_token_start_metadata_extraction(self) -> None:
+        self.print("if (p->mark == p->fill && _PyPegen_fill_token(p) < 0) {")
+        with self.indent():
+            self.print("p->error_indicator = 1;")
+            self.print("return NULL;")
+        self.print("}")
+        self.print("int start_lineno = p->tokens[mark]->lineno;")
+        self.print("UNUSED(start_lineno); // Only used by EXTRA macro")
+        self.print("int start_col_offset = p->tokens[mark]->col_offset;")
+        self.print("UNUSED(start_col_offset); // Only used by EXTRA macro")
+
+    def _set_up_token_end_metadata_extraction(self) -> None:
+        self.print("Token *token = _PyPegen_get_last_nonnwhitespace_token(p);")
+        self.print("if (token == NULL) {")
+        with self.indent():
+            self.print("return NULL;")
+        self.print("}")
+        self.print(f"int end_lineno = token->end_lineno;")
+        self.print("UNUSED(end_lineno); // Only used by EXTRA macro")
+        self.print(f"int end_col_offset = token->end_col_offset;")
+        self.print("UNUSED(end_col_offset); // Only used by EXTRA macro")
+
+    def _set_up_rule_memoization(self, node: Rule, result_type: str) -> None:
+        self.print("{")
+        with self.indent():
+            self.print(f"{result_type} res = NULL;")
+            self.print(f"if (_PyPegen_is_memoized(p, {node.name}_type, &res))")
+            with self.indent():
+                self.print("return res;")
+            self.print("int mark = p->mark;")
+            self.print("int resmark = p->mark;")
+            self.print("while (1) {")
+            with self.indent():
+                self.call_with_errorcheck_return(
+                    f"_PyPegen_update_memo(p, mark, {node.name}_type, res)", "res"
+                )
+                self.print("p->mark = mark;")
+                self.print(f"void *raw = {node.name}_raw(p);")
+                self.print("if (raw == NULL || p->mark <= resmark)")
+                with self.indent():
+                    self.print("break;")
+                self.print("resmark = p->mark;")
+                self.print("res = raw;")
+            self.print("}")
+            self.print("p->mark = resmark;")
+            self.print("return res;")
+        self.print("}")
+        self.print(f"static {result_type}")
+        self.print(f"{node.name}_raw(Parser *p)")
+
+    def _should_memoize(self, node: Rule) -> bool:
+        return node.memo and not node.left_recursive
+
+    def _handle_default_rule_body(self, node: Rule, rhs: Rhs, result_type: str) -> None:
+        memoize = self._should_memoize(node)
+
+        with self.indent():
+            self.print("if (p->error_indicator) {")
+            with self.indent():
+                self.print("return NULL;")
+            self.print("}")
+            self.print(f"{result_type} res = NULL;")
+            if memoize:
+                self.print(f"if (_PyPegen_is_memoized(p, {node.name}_type, &res))")
+                with self.indent():
+                    self.print("return res;")
+            self.print("int mark = p->mark;")
+            if any(alt.action and "EXTRA" in alt.action for alt in rhs.alts):
+                self._set_up_token_start_metadata_extraction()
+            self.visit(
+                rhs,
+                is_loop=False,
+                is_gather=node.is_gather(),
+                rulename=node.name if memoize else None,
+            )
+            if self.debug:
+                self.print(f'fprintf(stderr, "Fail at %d: {node.name}\\n", p->mark);')
+            self.print("res = NULL;")
+        self.print("  done:")
+        with self.indent():
+            if memoize:
+                self.print(f"_PyPegen_insert_memo(p, mark, {node.name}_type, res);")
+            self.print("return res;")
+
+    def _handle_loop_rule_body(self, node: Rule, rhs: Rhs) -> None:
+        memoize = self._should_memoize(node)
+        is_repeat1 = node.name.startswith("_loop1")
+
+        with self.indent():
+            self.print("if (p->error_indicator) {")
+            with self.indent():
+                self.print("return NULL;")
+            self.print("}")
+            self.print(f"void *res = NULL;")
+            if memoize:
+                self.print(f"if (_PyPegen_is_memoized(p, {node.name}_type, &res))")
+                with self.indent():
+                    self.print("return res;")
+            self.print("int mark = p->mark;")
+            self.print("int start_mark = p->mark;")
+            self.print("void **children = PyMem_Malloc(sizeof(void *));")
+            self.out_of_memory_return(f"!children", "NULL")
+            self.print("ssize_t children_capacity = 1;")
+            self.print("ssize_t n = 0;")
+            if any(alt.action and "EXTRA" in alt.action for alt in rhs.alts):
+                self._set_up_token_start_metadata_extraction()
+            self.visit(
+                rhs,
+                is_loop=True,
+                is_gather=node.is_gather(),
+                rulename=node.name if memoize else None,
+            )
+            if is_repeat1:
+                self.print("if (n == 0) {")
+                with self.indent():
+                    self.print("PyMem_Free(children);")
+                    self.print("return NULL;")
+                self.print("}")
+            self.print("asdl_seq *seq = _Py_asdl_seq_new(n, p->arena);")
+            self.out_of_memory_return(
+                f"!seq",
+                "NULL",
+                message=f"asdl_seq_new {node.name}",
+                cleanup_code="PyMem_Free(children);",
+            )
+            self.print("for (int i = 0; i < n; i++) asdl_seq_SET(seq, i, children[i]);")
+            self.print("PyMem_Free(children);")
+            if node.name:
+                self.print(f"_PyPegen_insert_memo(p, start_mark, {node.name}_type, seq);")
+            self.print("return seq;")
+
+    def visit_Rule(self, node: Rule) -> None:
+        is_loop = node.is_loop()
+        is_gather = node.is_gather()
+        rhs = node.flatten()
+        if is_loop or is_gather:
+            result_type = "asdl_seq *"
+        elif node.type:
+            result_type = node.type
+        else:
+            result_type = "void *"
+
+        for line in str(node).splitlines():
+            self.print(f"// {line}")
+        if node.left_recursive and node.leader:
+            self.print(f"static {result_type} {node.name}_raw(Parser *);")
+
+        self.print(f"static {result_type}")
+        self.print(f"{node.name}_rule(Parser *p)")
+
+        if node.left_recursive and node.leader:
+            self._set_up_rule_memoization(node, result_type)
+
+        self.print("{")
+        if is_loop:
+            self._handle_loop_rule_body(node, rhs)
+        else:
+            self._handle_default_rule_body(node, rhs, result_type)
+        self.print("}")
+
+    def visit_NamedItem(self, node: NamedItem, names: List[str]) -> None:
+        name, call = self.callmakervisitor.visit(node)
+        if not name:
+            self.print(call)
+        else:
+            name = dedupe(name, names)
+            self.print(f"({name} = {call})")
+
+    def visit_Rhs(
+        self, node: Rhs, is_loop: bool, is_gather: bool, rulename: Optional[str]
+    ) -> None:
+        if is_loop:
+            assert len(node.alts) == 1
+        for alt in node.alts:
+            self.visit(alt, is_loop=is_loop, is_gather=is_gather, rulename=rulename)
+
+    def join_conditions(self, keyword: str, node: Any, names: List[str]) -> None:
+        self.print(f"{keyword} (")
+        with self.indent():
+            first = True
+            for item in node.items:
+                if first:
+                    first = False
+                else:
+                    self.print("&&")
+                self.visit(item, names=names)
+        self.print(")")
+
+    def emit_action(self, node: Alt, cleanup_code=None) -> None:
+        self.print(f"res = {node.action};")
+
+        self.print("if (res == NULL && PyErr_Occurred()) {")
+        with self.indent():
+            self.print("p->error_indicator = 1;")
+            if cleanup_code:
+                self.print(cleanup_code)
+            self.print("return NULL;")
+        self.print("}")
+
+        if self.debug:
+            self.print(
+                f'fprintf(stderr, "Hit with action [%d-%d]: %s\\n", mark, p->mark, "{node}");'
+            )
+
+    def emit_default_action(self, is_gather: bool, names: List[str], node: Alt) -> None:
+        if len(names) > 1:
+            if is_gather:
+                assert len(names) == 2
+                self.print(f"res = _PyPegen_seq_insert_in_front(p, {names[0]}, {names[1]});")
+            else:
+                if self.debug:
+                    self.print(
+                        f'fprintf(stderr, "Hit without action [%d:%d]: %s\\n", mark, p->mark, "{node}");'
+                    )
+                self.print(f"res = _PyPegen_dummy_name(p, {', '.join(names)});")
+        else:
+            if self.debug:
+                self.print(
+                    f'fprintf(stderr, "Hit with default action [%d:%d]: %s\\n", mark, p->mark, "{node}");'
+                )
+            self.print(f"res = {names[0]};")
+
+    def emit_dummy_action(self) -> None:
+        self.print(f"res = _PyPegen_dummy_name(p);")
+
+    def handle_alt_normal(self, node: Alt, is_gather: bool, names: List[str]) -> None:
+        self.join_conditions(keyword="if", node=node, names=names)
+        self.print("{")
+        # We have parsed successfully all the conditions for the option.
+        with self.indent():
+            # Prepare to emmit the rule action and do so
+            if node.action and "EXTRA" in node.action:
+                self._set_up_token_end_metadata_extraction()
+            if self.skip_actions:
+                self.emit_dummy_action()
+            elif node.action:
+                self.emit_action(node)
+            else:
+                self.emit_default_action(is_gather, names, node)
+
+            # As the current option has parsed correctly, do not continue with the rest.
+            self.print(f"goto done;")
+        self.print("}")
+
+    def handle_alt_loop(
+        self, node: Alt, is_gather: bool, rulename: Optional[str], names: List[str]
+    ) -> None:
+        # Condition of the main body of the alternative
+        self.join_conditions(keyword="while", node=node, names=names)
+        self.print("{")
+        # We have parsed successfully one item!
+        with self.indent():
+            # Prepare to emit the rule action and do so
+            if node.action and "EXTRA" in node.action:
+                self._set_up_token_end_metadata_extraction()
+            if self.skip_actions:
+                self.emit_dummy_action()
+            elif node.action:
+                self.emit_action(node, cleanup_code="PyMem_Free(children);")
+            else:
+                self.emit_default_action(is_gather, names, node)
+
+            # Add the result of rule to the temporary buffer of children. This buffer
+            # will populate later an asdl_seq with all elements to return.
+            self.print("if (n == children_capacity) {")
+            with self.indent():
+                self.print("children_capacity *= 2;")
+                self.print("children = PyMem_Realloc(children, children_capacity*sizeof(void *));")
+                self.out_of_memory_return(f"!children", "NULL", message=f"realloc {rulename}")
+            self.print("}")
+            self.print(f"children[n++] = res;")
+            self.print("mark = p->mark;")
+        self.print("}")
+
+    def visit_Alt(
+        self, node: Alt, is_loop: bool, is_gather: bool, rulename: Optional[str]
+    ) -> None:
+        self.print(f"{{ // {node}")
+        with self.indent():
+            # Prepare variable declarations for the alternative
+            vars = self.collect_vars(node)
+            for v, var_type in sorted(item for item in vars.items() if item[0] is not None):
+                if not var_type:
+                    var_type = "void *"
+                else:
+                    var_type += " "
+                if v == "cut_var":
+                    v += " = 0"  # cut_var must be initialized
+                self.print(f"{var_type}{v};")
+                if v == "opt_var":
+                    self.print("UNUSED(opt_var); // Silence compiler warnings")
+
+            names: List[str] = []
+            if is_loop:
+                self.handle_alt_loop(node, is_gather, rulename, names)
+            else:
+                self.handle_alt_normal(node, is_gather, names)
+
+            self.print("p->mark = mark;")
+            if "cut_var" in names:
+                self.print("if (cut_var) return NULL;")
+        self.print("}")
+
+    def collect_vars(self, node: Alt) -> Dict[str, Optional[str]]:
+        names: List[str] = []
+        types = {}
+        for item in node.items:
+            name, type = self.add_var(item, names)
+            types[name] = type
+        return types
+
+    def add_var(self, node: NamedItem, names: List[str]) -> Tuple[str, Optional[str]]:
+        name: str
+        call: str
+        name, call = self.callmakervisitor.visit(node.item)
+        type = None
+        if not name:
+            return name, type
+        if name.startswith("cut"):
+            return name, "int"
+        if name.endswith("_var"):
+            rulename = name[:-4]
+            rule = self.rules.get(rulename)
+            if rule is not None:
+                if rule.is_loop() or rule.is_gather():
+                    type = "asdl_seq *"
+                else:
+                    type = rule.type
+            elif name.startswith("_loop") or name.startswith("_gather"):
+                type = "asdl_seq *"
+            elif name in ("name_var", "string_var", "number_var"):
+                type = "expr_ty"
+        if node.name:
+            name = node.name
+        name = dedupe(name, names)
+        return name, type
diff --git a/Tools/peg_generator/pegen/first_sets.py b/Tools/peg_generator/pegen/first_sets.py
new file mode 100755
index 0000000..da30eba
--- /dev/null
+++ b/Tools/peg_generator/pegen/first_sets.py
@@ -0,0 +1,153 @@
+#!/usr/bin/env python3.8
+
+import argparse
+import collections
+import pprint
+import sys
+from typing import Optional, Set, Dict
+
+from pegen.build import build_parser
+from pegen.grammar import (
+    Alt,
+    Cut,
+    Gather,
+    Grammar,
+    GrammarVisitor,
+    Group,
+    Leaf,
+    Lookahead,
+    NamedItem,
+    NameLeaf,
+    NegativeLookahead,
+    Opt,
+    Repeat,
+    Repeat0,
+    Repeat1,
+    Rhs,
+    Rule,
+    StringLeaf,
+    PositiveLookahead,
+)
+
+argparser = argparse.ArgumentParser(
+    prog="calculate_first_sets", description="Calculate the first sets of a grammar",
+)
+argparser.add_argument("grammar_file", help="The grammar file")
+
+
+class FirstSetCalculator(GrammarVisitor):
+    def __init__(self, rules: Dict[str, Rule]) -> None:
+        self.rules = rules
+        for rule in rules.values():
+            rule.nullable_visit(rules)
+        self.first_sets: Dict[str, Set[str]] = dict()
+        self.in_process: Set[str] = set()
+
+    def calculate(self) -> Dict[str, Set[str]]:
+        for name, rule in self.rules.items():
+            self.visit(rule)
+        return self.first_sets
+
+    def visit_Alt(self, item: Alt) -> Set[str]:
+        result: Set[str] = set()
+        to_remove: Set[str] = set()
+        for other in item.items:
+            new_terminals = self.visit(other)
+            if isinstance(other.item, NegativeLookahead):
+                to_remove |= new_terminals
+            result |= new_terminals
+            if to_remove:
+                result -= to_remove
+
+            # If the set of new terminals can start with the empty string,
+            # it means that the item is completelly nullable and we should
+            # also considering at least the next item in case the current
+            # one fails to parse.
+
+            if "" in new_terminals:
+                continue
+
+            if not isinstance(other.item, (Opt, NegativeLookahead, Repeat0)):
+                break
+
+        # Do not allow the empty string to propagate.
+        result.discard("")
+
+        return result
+
+    def visit_Cut(self, item: Cut) -> Set[str]:
+        return set()
+
+    def visit_Group(self, item: Group) -> Set[str]:
+        return self.visit(item.rhs)
+
+    def visit_PositiveLookahead(self, item: Lookahead) -> Set[str]:
+        return self.visit(item.node)
+
+    def visit_NegativeLookahead(self, item: NegativeLookahead) -> Set[str]:
+        return self.visit(item.node)
+
+    def visit_NamedItem(self, item: NamedItem) -> Set[str]:
+        return self.visit(item.item)
+
+    def visit_Opt(self, item: Opt) -> Set[str]:
+        return self.visit(item.node)
+
+    def visit_Gather(self, item: Gather) -> Set[str]:
+        return self.visit(item.node)
+
+    def visit_Repeat0(self, item: Repeat0) -> Set[str]:
+        return self.visit(item.node)
+
+    def visit_Repeat1(self, item: Repeat1) -> Set[str]:
+        return self.visit(item.node)
+
+    def visit_NameLeaf(self, item: NameLeaf) -> Set[str]:
+        if item.value not in self.rules:
+            return {item.value}
+
+        if item.value not in self.first_sets:
+            self.first_sets[item.value] = self.visit(self.rules[item.value])
+            return self.first_sets[item.value]
+        elif item.value in self.in_process:
+            return set()
+
+        return self.first_sets[item.value]
+
+    def visit_StringLeaf(self, item: StringLeaf) -> Set[str]:
+        return {item.value}
+
+    def visit_Rhs(self, item: Rhs) -> Set[str]:
+        result: Set[str] = set()
+        for alt in item.alts:
+            result |= self.visit(alt)
+        return result
+
+    def visit_Rule(self, item: Rule) -> Set[str]:
+        if item.name in self.in_process:
+            return set()
+        elif item.name not in self.first_sets:
+            self.in_process.add(item.name)
+            terminals = self.visit(item.rhs)
+            if item.nullable:
+                terminals.add("")
+            self.first_sets[item.name] = terminals
+            self.in_process.remove(item.name)
+        return self.first_sets[item.name]
+
+
+def main() -> None:
+    args = argparser.parse_args()
+
+    try:
+        grammar, parser, tokenizer = build_parser(args.grammar_file)
+    except Exception as err:
+        print("ERROR: Failed to parse grammar file", file=sys.stderr)
+        sys.exit(1)
+
+    firs_sets = FirstSetCalculator(grammar.rules).calculate()
+    pprint.pprint(firs_sets)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/Tools/peg_generator/pegen/grammar.py b/Tools/peg_generator/pegen/grammar.py
new file mode 100644
index 0000000..67039d5
--- /dev/null
+++ b/Tools/peg_generator/pegen/grammar.py
@@ -0,0 +1,470 @@
+from __future__ import annotations
+
+from abc import abstractmethod
+from typing import (
+    AbstractSet,
+    Any,
+    Callable,
+    Dict,
+    Iterable,
+    Iterator,
+    List,
+    Optional,
+    Set,
+    Tuple,
+    TYPE_CHECKING,
+    TypeVar,
+    Union,
+)
+
+from pegen.parser import memoize, Parser
+
+if TYPE_CHECKING:
+    from pegen.parser_generator import ParserGenerator
+
+
+class GrammarError(Exception):
+    pass
+
+
+class GrammarVisitor:
+    def visit(self, node: Any, *args: Any, **kwargs: Any) -> Any:
+        """Visit a node."""
+        method = "visit_" + node.__class__.__name__
+        visitor = getattr(self, method, self.generic_visit)
+        return visitor(node, *args, **kwargs)
+
+    def generic_visit(self, node: Iterable[Any], *args: Any, **kwargs: Any) -> None:
+        """Called if no explicit visitor function exists for a node."""
+        for value in node:
+            if isinstance(value, list):
+                for item in value:
+                    self.visit(item, *args, **kwargs)
+            else:
+                self.visit(value, *args, **kwargs)
+
+
+class Grammar:
+    def __init__(self, rules: Iterable[Rule], metas: Iterable[Tuple[str, Optional[str]]]):
+        self.rules = {rule.name: rule for rule in rules}
+        self.metas = dict(metas)
+
+    def __str__(self) -> str:
+        return "\n".join(str(rule) for name, rule in self.rules.items())
+
+    def __repr__(self) -> str:
+        lines = ["Grammar("]
+        lines.append("  [")
+        for rule in self.rules.values():
+            lines.append(f"    {repr(rule)},")
+        lines.append("  ],")
+        lines.append("  {repr(list(self.metas.items()))}")
+        lines.append(")")
+        return "\n".join(lines)
+
+    def __iter__(self) -> Iterator[Rule]:
+        yield from self.rules.values()
+
+
+# Global flag whether we want actions in __str__() -- default off.
+SIMPLE_STR = True
+
+
+class Rule:
+    def __init__(self, name: str, type: Optional[str], rhs: Rhs, memo: Optional[object] = None):
+        self.name = name
+        self.type = type
+        self.rhs = rhs
+        self.memo = bool(memo)
+        self.visited = False
+        self.nullable = False
+        self.left_recursive = False
+        self.leader = False
+
+    def is_loop(self) -> bool:
+        return self.name.startswith("_loop")
+
+    def is_gather(self) -> bool:
+        return self.name.startswith("_gather")
+
+    def __str__(self) -> str:
+        if SIMPLE_STR or self.type is None:
+            res = f"{self.name}: {self.rhs}"
+        else:
+            res = f"{self.name}[{self.type}]: {self.rhs}"
+        if len(res) < 88:
+            return res
+        lines = [res.split(":")[0] + ":"]
+        lines += [f"    | {alt}" for alt in self.rhs.alts]
+        return "\n".join(lines)
+
+    def __repr__(self) -> str:
+        return f"Rule({self.name!r}, {self.type!r}, {self.rhs!r})"
+
+    def __iter__(self) -> Iterator[Rhs]:
+        yield self.rhs
+
+    def nullable_visit(self, rules: Dict[str, Rule]) -> bool:
+        if self.visited:
+            # A left-recursive rule is considered non-nullable.
+            return False
+        self.visited = True
+        self.nullable = self.rhs.nullable_visit(rules)
+        return self.nullable
+
+    def initial_names(self) -> AbstractSet[str]:
+        return self.rhs.initial_names()
+
+    def flatten(self) -> Rhs:
+        # If it's a single parenthesized group, flatten it.
+        rhs = self.rhs
+        if (
+            not self.is_loop()
+            and len(rhs.alts) == 1
+            and len(rhs.alts[0].items) == 1
+            and isinstance(rhs.alts[0].items[0].item, Group)
+        ):
+            rhs = rhs.alts[0].items[0].item.rhs
+        return rhs
+
+    def collect_todo(self, gen: ParserGenerator) -> None:
+        rhs = self.flatten()
+        rhs.collect_todo(gen)
+
+
+class Leaf:
+    def __init__(self, value: str):
+        self.value = value
+
+    def __str__(self) -> str:
+        return self.value
+
+    def __iter__(self) -> Iterable[str]:
+        if False:
+            yield
+
+    @abstractmethod
+    def nullable_visit(self, rules: Dict[str, Rule]) -> bool:
+        raise NotImplementedError
+
+    @abstractmethod
+    def initial_names(self) -> AbstractSet[str]:
+        raise NotImplementedError
+
+
+class NameLeaf(Leaf):
+    """The value is the name."""
+
+    def __str__(self) -> str:
+        if self.value == "ENDMARKER":
+            return "$"
+        return super().__str__()
+
+    def __repr__(self) -> str:
+        return f"NameLeaf({self.value!r})"
+
+    def nullable_visit(self, rules: Dict[str, Rule]) -> bool:
+        if self.value in rules:
+            return rules[self.value].nullable_visit(rules)
+        # Token or unknown; never empty.
+        return False
+
+    def initial_names(self) -> AbstractSet[str]:
+        return {self.value}
+
+
+class StringLeaf(Leaf):
+    """The value is a string literal, including quotes."""
+
+    def __repr__(self) -> str:
+        return f"StringLeaf({self.value!r})"
+
+    def nullable_visit(self, rules: Dict[str, Rule]) -> bool:
+        # The string token '' is considered empty.
+        return not self.value
+
+    def initial_names(self) -> AbstractSet[str]:
+        return set()
+
+
+class Rhs:
+    def __init__(self, alts: List[Alt]):
+        self.alts = alts
+        self.memo: Optional[Tuple[Optional[str], str]] = None
+
+    def __str__(self) -> str:
+        return " | ".join(str(alt) for alt in self.alts)
+
+    def __repr__(self) -> str:
+        return f"Rhs({self.alts!r})"
+
+    def __iter__(self) -> Iterator[List[Alt]]:
+        yield self.alts
+
+    def nullable_visit(self, rules: Dict[str, Rule]) -> bool:
+        for alt in self.alts:
+            if alt.nullable_visit(rules):
+                return True
+        return False
+
+    def initial_names(self) -> AbstractSet[str]:
+        names: Set[str] = set()
+        for alt in self.alts:
+            names |= alt.initial_names()
+        return names
+
+    def collect_todo(self, gen: ParserGenerator) -> None:
+        for alt in self.alts:
+            alt.collect_todo(gen)
+
+
+class Alt:
+    def __init__(self, items: List[NamedItem], *, icut: int = -1, action: Optional[str] = None):
+        self.items = items
+        self.icut = icut
+        self.action = action
+
+    def __str__(self) -> str:
+        core = " ".join(str(item) for item in self.items)
+        if not SIMPLE_STR and self.action:
+            return f"{core} {{ {self.action} }}"
+        else:
+            return core
+
+    def __repr__(self) -> str:
+        args = [repr(self.items)]
+        if self.icut >= 0:
+            args.append(f"icut={self.icut}")
+        if self.action:
+            args.append(f"action={self.action!r}")
+        return f"Alt({', '.join(args)})"
+
+    def __iter__(self) -> Iterator[List[NamedItem]]:
+        yield self.items
+
+    def nullable_visit(self, rules: Dict[str, Rule]) -> bool:
+        for item in self.items:
+            if not item.nullable_visit(rules):
+                return False
+        return True
+
+    def initial_names(self) -> AbstractSet[str]:
+        names: Set[str] = set()
+        for item in self.items:
+            names |= item.initial_names()
+            if not item.nullable:
+                break
+        return names
+
+    def collect_todo(self, gen: ParserGenerator) -> None:
+        for item in self.items:
+            item.collect_todo(gen)
+
+
+class NamedItem:
+    def __init__(self, name: Optional[str], item: Item):
+        self.name = name
+        self.item = item
+        self.nullable = False
+
+    def __str__(self) -> str:
+        if not SIMPLE_STR and self.name:
+            return f"{self.name}={self.item}"
+        else:
+            return str(self.item)
+
+    def __repr__(self) -> str:
+        return f"NamedItem({self.name!r}, {self.item!r})"
+
+    def __iter__(self) -> Iterator[Item]:
+        yield self.item
+
+    def nullable_visit(self, rules: Dict[str, Rule]) -> bool:
+        self.nullable = self.item.nullable_visit(rules)
+        return self.nullable
+
+    def initial_names(self) -> AbstractSet[str]:
+        return self.item.initial_names()
+
+    def collect_todo(self, gen: ParserGenerator) -> None:
+        gen.callmakervisitor.visit(self.item)
+
+
+class Lookahead:
+    def __init__(self, node: Plain, sign: str):
+        self.node = node
+        self.sign = sign
+
+    def __str__(self) -> str:
+        return f"{self.sign}{self.node}"
+
+    def __iter__(self) -> Iterator[Plain]:
+        yield self.node
+
+    def nullable_visit(self, rules: Dict[str, Rule]) -> bool:
+        return True
+
+    def initial_names(self) -> AbstractSet[str]:
+        return set()
+
+
+class PositiveLookahead(Lookahead):
+    def __init__(self, node: Plain):
+        super().__init__(node, "&")
+
+    def __repr__(self) -> str:
+        return f"PositiveLookahead({self.node!r})"
+
+
+class NegativeLookahead(Lookahead):
+    def __init__(self, node: Plain):
+        super().__init__(node, "!")
+
+    def __repr__(self) -> str:
+        return f"NegativeLookahead({self.node!r})"
+
+
+class Opt:
+    def __init__(self, node: Item):
+        self.node = node
+
+    def __str__(self) -> str:
+        s = str(self.node)
+        # TODO: Decide whether to use [X] or X? based on type of X
+        if " " in s:
+            return f"[{s}]"
+        else:
+            return f"{s}?"
+
+    def __repr__(self) -> str:
+        return f"Opt({self.node!r})"
+
+    def __iter__(self) -> Iterator[Item]:
+        yield self.node
+
+    def nullable_visit(self, rules: Dict[str, Rule]) -> bool:
+        return True
+
+    def initial_names(self) -> AbstractSet[str]:
+        return self.node.initial_names()
+
+
+class Repeat:
+    """Shared base class for x* and x+."""
+
+    def __init__(self, node: Plain):
+        self.node = node
+        self.memo: Optional[Tuple[Optional[str], str]] = None
+
+    @abstractmethod
+    def nullable_visit(self, rules: Dict[str, Rule]) -> bool:
+        raise NotImplementedError
+
+    def __iter__(self) -> Iterator[Plain]:
+        yield self.node
+
+    def initial_names(self) -> AbstractSet[str]:
+        return self.node.initial_names()
+
+
+class Repeat0(Repeat):
+    def __str__(self) -> str:
+        s = str(self.node)
+        # TODO: Decide whether to use (X)* or X* based on type of X
+        if " " in s:
+            return f"({s})*"
+        else:
+            return f"{s}*"
+
+    def __repr__(self) -> str:
+        return f"Repeat0({self.node!r})"
+
+    def nullable_visit(self, rules: Dict[str, Rule]) -> bool:
+        return True
+
+
+class Repeat1(Repeat):
+    def __str__(self) -> str:
+        s = str(self.node)
+        # TODO: Decide whether to use (X)+ or X+ based on type of X
+        if " " in s:
+            return f"({s})+"
+        else:
+            return f"{s}+"
+
+    def __repr__(self) -> str:
+        return f"Repeat1({self.node!r})"
+
+    def nullable_visit(self, rules: Dict[str, Rule]) -> bool:
+        return False
+
+
+class Gather(Repeat):
+    def __init__(self, separator: Plain, node: Plain):
+        self.separator = separator
+        self.node = node
+
+    def __str__(self) -> str:
+        return f"{self.separator!s}.{self.node!s}+"
+
+    def __repr__(self) -> str:
+        return f"Gather({self.separator!r}, {self.node!r})"
+
+    def nullable_visit(self, rules: Dict[str, Rule]) -> bool:
+        return False
+
+
+class Group:
+    def __init__(self, rhs: Rhs):
+        self.rhs = rhs
+
+    def __str__(self) -> str:
+        return f"({self.rhs})"
+
+    def __repr__(self) -> str:
+        return f"Group({self.rhs!r})"
+
+    def __iter__(self) -> Iterator[Rhs]:
+        yield self.rhs
+
+    def nullable_visit(self, rules: Dict[str, Rule]) -> bool:
+        return self.rhs.nullable_visit(rules)
+
+    def initial_names(self) -> AbstractSet[str]:
+        return self.rhs.initial_names()
+
+
+class Cut:
+    def __init__(self) -> None:
+        pass
+
+    def __repr__(self) -> str:
+        return f"Cut()"
+
+    def __str__(self) -> str:
+        return f"~"
+
+    def __iter__(self) -> Iterator[Tuple[str, str]]:
+        if False:
+            yield
+
+    def __eq__(self, other: object) -> bool:
+        if not isinstance(other, Cut):
+            return NotImplemented
+        return True
+
+    def nullable_visit(self, rules: Dict[str, Rule]) -> bool:
+        return True
+
+    def initial_names(self) -> AbstractSet[str]:
+        return set()
+
+
+Plain = Union[Leaf, Group]
+Item = Union[Plain, Opt, Repeat, Lookahead, Rhs, Cut]
+RuleName = Tuple[str, str]
+MetaTuple = Tuple[str, Optional[str]]
+MetaList = List[MetaTuple]
+RuleList = List[Rule]
+NamedItemList = List[NamedItem]
+LookaheadOrCut = Union[Lookahead, Cut]
diff --git a/Tools/peg_generator/pegen/grammar_parser.py b/Tools/peg_generator/pegen/grammar_parser.py
new file mode 100644
index 0000000..0e206ee
--- /dev/null
+++ b/Tools/peg_generator/pegen/grammar_parser.py
@@ -0,0 +1,677 @@
+#!/usr/bin/env python3.8
+# @generated by pegen from pegen/metagrammar.gram
+
+import ast
+import sys
+import tokenize
+
+from typing import Any, Optional
+
+from pegen.parser import memoize, memoize_left_rec, logger, Parser
+from ast import literal_eval
+
+from pegen.grammar import (
+    Alt,
+    Cut,
+    Gather,
+    Group,
+    Item,
+    Lookahead,
+    LookaheadOrCut,
+    MetaTuple,
+    MetaList,
+    NameLeaf,
+    NamedItem,
+    NamedItemList,
+    NegativeLookahead,
+    Opt,
+    Plain,
+    PositiveLookahead,
+    Repeat0,
+    Repeat1,
+    Rhs,
+    Rule,
+    RuleList,
+    RuleName,
+    Grammar,
+    StringLeaf,
+)
+
+class GeneratedParser(Parser):
+
+    @memoize
+    def start(self) -> Optional[Grammar]:
+        # start: grammar $
+        mark = self.mark()
+        cut = False
+        if (
+            (grammar := self.grammar())
+            and
+            (endmarker := self.expect('ENDMARKER'))
+        ):
+            return grammar
+        self.reset(mark)
+        if cut: return None
+        return None
+
+    @memoize
+    def grammar(self) -> Optional[Grammar]:
+        # grammar: metas rules | rules
+        mark = self.mark()
+        cut = False
+        if (
+            (metas := self.metas())
+            and
+            (rules := self.rules())
+        ):
+            return Grammar ( rules , metas )
+        self.reset(mark)
+        if cut: return None
+        cut = False
+        if (
+            (rules := self.rules())
+        ):
+            return Grammar ( rules , [ ] )
+        self.reset(mark)
+        if cut: return None
+        return None
+
+    @memoize
+    def metas(self) -> Optional[MetaList]:
+        # metas: meta metas | meta
+        mark = self.mark()
+        cut = False
+        if (
+            (meta := self.meta())
+            and
+            (metas := self.metas())
+        ):
+            return [ meta ] + metas
+        self.reset(mark)
+        if cut: return None
+        cut = False
+        if (
+            (meta := self.meta())
+        ):
+            return [ meta ]
+        self.reset(mark)
+        if cut: return None
+        return None
+
+    @memoize
+    def meta(self) -> Optional[MetaTuple]:
+        # meta: "@" NAME NEWLINE | "@" NAME NAME NEWLINE | "@" NAME STRING NEWLINE
+        mark = self.mark()
+        cut = False
+        if (
+            (literal := self.expect("@"))
+            and
+            (name := self.name())
+            and
+            (newline := self.expect('NEWLINE'))
+        ):
+            return ( name . string , None )
+        self.reset(mark)
+        if cut: return None
+        cut = False
+        if (
+            (literal := self.expect("@"))
+            and
+            (a := self.name())
+            and
+            (b := self.name())
+            and
+            (newline := self.expect('NEWLINE'))
+        ):
+            return ( a . string , b . string )
+        self.reset(mark)
+        if cut: return None
+        cut = False
+        if (
+            (literal := self.expect("@"))
+            and
+            (name := self.name())
+            and
+            (string := self.string())
+            and
+            (newline := self.expect('NEWLINE'))
+        ):
+            return ( name . string , literal_eval ( string . string ) )
+        self.reset(mark)
+        if cut: return None
+        return None
+
+    @memoize
+    def rules(self) -> Optional[RuleList]:
+        # rules: rule rules | rule
+        mark = self.mark()
+        cut = False
+        if (
+            (rule := self.rule())
+            and
+            (rules := self.rules())
+        ):
+            return [ rule ] + rules
+        self.reset(mark)
+        if cut: return None
+        cut = False
+        if (
+            (rule := self.rule())
+        ):
+            return [ rule ]
+        self.reset(mark)
+        if cut: return None
+        return None
+
+    @memoize
+    def rule(self) -> Optional[Rule]:
+        # rule: rulename memoflag? ":" alts NEWLINE INDENT more_alts DEDENT | rulename memoflag? ":" NEWLINE INDENT more_alts DEDENT | rulename memoflag? ":" alts NEWLINE
+        mark = self.mark()
+        cut = False
+        if (
+            (rulename := self.rulename())
+            and
+            (opt := self.memoflag(),)
+            and
+            (literal := self.expect(":"))
+            and
+            (alts := self.alts())
+            and
+            (newline := self.expect('NEWLINE'))
+            and
+            (indent := self.expect('INDENT'))
+            and
+            (more_alts := self.more_alts())
+            and
+            (dedent := self.expect('DEDENT'))
+        ):
+            return Rule ( rulename [ 0 ] , rulename [ 1 ] , Rhs ( alts . alts + more_alts . alts ) , memo = opt )
+        self.reset(mark)
+        if cut: return None
+        cut = False
+        if (
+            (rulename := self.rulename())
+            and
+            (opt := self.memoflag(),)
+            and
+            (literal := self.expect(":"))
+            and
+            (newline := self.expect('NEWLINE'))
+            and
+            (indent := self.expect('INDENT'))
+            and
+            (more_alts := self.more_alts())
+            and
+            (dedent := self.expect('DEDENT'))
+        ):
+            return Rule ( rulename [ 0 ] , rulename [ 1 ] , more_alts , memo = opt )
+        self.reset(mark)
+        if cut: return None
+        cut = False
+        if (
+            (rulename := self.rulename())
+            and
+            (opt := self.memoflag(),)
+            and
+            (literal := self.expect(":"))
+            and
+            (alts := self.alts())
+            and
+            (newline := self.expect('NEWLINE'))
+        ):
+            return Rule ( rulename [ 0 ] , rulename [ 1 ] , alts , memo = opt )
+        self.reset(mark)
+        if cut: return None
+        return None
+
+    @memoize
+    def rulename(self) -> Optional[RuleName]:
+        # rulename: NAME '[' NAME '*' ']' | NAME '[' NAME ']' | NAME
+        mark = self.mark()
+        cut = False
+        if (
+            (name := self.name())
+            and
+            (literal := self.expect('['))
+            and
+            (type := self.name())
+            and
+            (literal_1 := self.expect('*'))
+            and
+            (literal_2 := self.expect(']'))
+        ):
+            return ( name . string , type . string + "*" )
+        self.reset(mark)
+        if cut: return None
+        cut = False
+        if (
+            (name := self.name())
+            and
+            (literal := self.expect('['))
+            and
+            (type := self.name())
+            and
+            (literal_1 := self.expect(']'))
+        ):
+            return ( name . string , type . string )
+        self.reset(mark)
+        if cut: return None
+        cut = False
+        if (
+            (name := self.name())
+        ):
+            return ( name . string , None )
+        self.reset(mark)
+        if cut: return None
+        return None
+
+    @memoize
+    def memoflag(self) -> Optional[str]:
+        # memoflag: '(' 'memo' ')'
+        mark = self.mark()
+        cut = False
+        if (
+            (literal := self.expect('('))
+            and
+            (literal_1 := self.expect('memo'))
+            and
+            (literal_2 := self.expect(')'))
+        ):
+            return "memo"
+        self.reset(mark)
+        if cut: return None
+        return None
+
+    @memoize
+    def alts(self) -> Optional[Rhs]:
+        # alts: alt "|" alts | alt
+        mark = self.mark()
+        cut = False
+        if (
+            (alt := self.alt())
+            and
+            (literal := self.expect("|"))
+            and
+            (alts := self.alts())
+        ):
+            return Rhs ( [ alt ] + alts . alts )
+        self.reset(mark)
+        if cut: return None
+        cut = False
+        if (
+            (alt := self.alt())
+        ):
+            return Rhs ( [ alt ] )
+        self.reset(mark)
+        if cut: return None
+        return None
+
+    @memoize
+    def more_alts(self) -> Optional[Rhs]:
+        # more_alts: "|" alts NEWLINE more_alts | "|" alts NEWLINE
+        mark = self.mark()
+        cut = False
+        if (
+            (literal := self.expect("|"))
+            and
+            (alts := self.alts())
+            and
+            (newline := self.expect('NEWLINE'))
+            and
+            (more_alts := self.more_alts())
+        ):
+            return Rhs ( alts . alts + more_alts . alts )
+        self.reset(mark)
+        if cut: return None
+        cut = False
+        if (
+            (literal := self.expect("|"))
+            and
+            (alts := self.alts())
+            and
+            (newline := self.expect('NEWLINE'))
+        ):
+            return Rhs ( alts . alts )
+        self.reset(mark)
+        if cut: return None
+        return None
+
+    @memoize
+    def alt(self) -> Optional[Alt]:
+        # alt: items '$' action | items '$' | items action | items
+        mark = self.mark()
+        cut = False
+        if (
+            (items := self.items())
+            and
+            (literal := self.expect('$'))
+            and
+            (action := self.action())
+        ):
+            return Alt ( items + [ NamedItem ( None , NameLeaf ( 'ENDMARKER' ) ) ] , action = action )
+        self.reset(mark)
+        if cut: return None
+        cut = False
+        if (
+            (items := self.items())
+            and
+            (literal := self.expect('$'))
+        ):
+            return Alt ( items + [ NamedItem ( None , NameLeaf ( 'ENDMARKER' ) ) ] , action = None )
+        self.reset(mark)
+        if cut: return None
+        cut = False
+        if (
+            (items := self.items())
+            and
+            (action := self.action())
+        ):
+            return Alt ( items , action = action )
+        self.reset(mark)
+        if cut: return None
+        cut = False
+        if (
+            (items := self.items())
+        ):
+            return Alt ( items , action = None )
+        self.reset(mark)
+        if cut: return None
+        return None
+
+    @memoize
+    def items(self) -> Optional[NamedItemList]:
+        # items: named_item items | named_item
+        mark = self.mark()
+        cut = False
+        if (
+            (named_item := self.named_item())
+            and
+            (items := self.items())
+        ):
+            return [ named_item ] + items
+        self.reset(mark)
+        if cut: return None
+        cut = False
+        if (
+            (named_item := self.named_item())
+        ):
+            return [ named_item ]
+        self.reset(mark)
+        if cut: return None
+        return None
+
+    @memoize
+    def named_item(self) -> Optional[NamedItem]:
+        # named_item: NAME '=' ~ item | item | lookahead
+        mark = self.mark()
+        cut = False
+        if (
+            (name := self.name())
+            and
+            (literal := self.expect('='))
+            and
+            (cut := True)
+            and
+            (item := self.item())
+        ):
+            return NamedItem ( name . string , item )
+        self.reset(mark)
+        if cut: return None
+        cut = False
+        if (
+            (item := self.item())
+        ):
+            return NamedItem ( None , item )
+        self.reset(mark)
+        if cut: return None
+        cut = False
+        if (
+            (it := self.lookahead())
+        ):
+            return NamedItem ( None , it )
+        self.reset(mark)
+        if cut: return None
+        return None
+
+    @memoize
+    def lookahead(self) -> Optional[LookaheadOrCut]:
+        # lookahead: '&' ~ atom | '!' ~ atom | '~'
+        mark = self.mark()
+        cut = False
+        if (
+            (literal := self.expect('&'))
+            and
+            (cut := True)
+            and
+            (atom := self.atom())
+        ):
+            return PositiveLookahead ( atom )
+        self.reset(mark)
+        if cut: return None
+        cut = False
+        if (
+            (literal := self.expect('!'))
+            and
+            (cut := True)
+            and
+            (atom := self.atom())
+        ):
+            return NegativeLookahead ( atom )
+        self.reset(mark)
+        if cut: return None
+        cut = False
+        if (
+            (literal := self.expect('~'))
+        ):
+            return Cut ( )
+        self.reset(mark)
+        if cut: return None
+        return None
+
+    @memoize
+    def item(self) -> Optional[Item]:
+        # item: '[' ~ alts ']' | atom '?' | atom '*' | atom '+' | atom '.' atom '+' | atom
+        mark = self.mark()
+        cut = False
+        if (
+            (literal := self.expect('['))
+            and
+            (cut := True)
+            and
+            (alts := self.alts())
+            and
+            (literal_1 := self.expect(']'))
+        ):
+            return Opt ( alts )
+        self.reset(mark)
+        if cut: return None
+        cut = False
+        if (
+            (atom := self.atom())
+            and
+            (literal := self.expect('?'))
+        ):
+            return Opt ( atom )
+        self.reset(mark)
+        if cut: return None
+        cut = False
+        if (
+            (atom := self.atom())
+            and
+            (literal := self.expect('*'))
+        ):
+            return Repeat0 ( atom )
+        self.reset(mark)
+        if cut: return None
+        cut = False
+        if (
+            (atom := self.atom())
+            and
+            (literal := self.expect('+'))
+        ):
+            return Repeat1 ( atom )
+        self.reset(mark)
+        if cut: return None
+        cut = False
+        if (
+            (sep := self.atom())
+            and
+            (literal := self.expect('.'))
+            and
+            (node := self.atom())
+            and
+            (literal_1 := self.expect('+'))
+        ):
+            return Gather ( sep , node )
+        self.reset(mark)
+        if cut: return None
+        cut = False
+        if (
+            (atom := self.atom())
+        ):
+            return atom
+        self.reset(mark)
+        if cut: return None
+        return None
+
+    @memoize
+    def atom(self) -> Optional[Plain]:
+        # atom: '(' ~ alts ')' | NAME | STRING
+        mark = self.mark()
+        cut = False
+        if (
+            (literal := self.expect('('))
+            and
+            (cut := True)
+            and
+            (alts := self.alts())
+            and
+            (literal_1 := self.expect(')'))
+        ):
+            return Group ( alts )
+        self.reset(mark)
+        if cut: return None
+        cut = False
+        if (
+            (name := self.name())
+        ):
+            return NameLeaf ( name . string )
+        self.reset(mark)
+        if cut: return None
+        cut = False
+        if (
+            (string := self.string())
+        ):
+            return StringLeaf ( string . string )
+        self.reset(mark)
+        if cut: return None
+        return None
+
+    @memoize
+    def action(self) -> Optional[str]:
+        # action: "{" ~ target_atoms "}"
+        mark = self.mark()
+        cut = False
+        if (
+            (literal := self.expect("{"))
+            and
+            (cut := True)
+            and
+            (target_atoms := self.target_atoms())
+            and
+            (literal_1 := self.expect("}"))
+        ):
+            return target_atoms
+        self.reset(mark)
+        if cut: return None
+        return None
+
+    @memoize
+    def target_atoms(self) -> Optional[str]:
+        # target_atoms: target_atom target_atoms | target_atom
+        mark = self.mark()
+        cut = False
+        if (
+            (target_atom := self.target_atom())
+            and
+            (target_atoms := self.target_atoms())
+        ):
+            return target_atom + " " + target_atoms
+        self.reset(mark)
+        if cut: return None
+        cut = False
+        if (
+            (target_atom := self.target_atom())
+        ):
+            return target_atom
+        self.reset(mark)
+        if cut: return None
+        return None
+
+    @memoize
+    def target_atom(self) -> Optional[str]:
+        # target_atom: "{" ~ target_atoms "}" | NAME | NUMBER | STRING | "?" | ":" | !"}" OP
+        mark = self.mark()
+        cut = False
+        if (
+            (literal := self.expect("{"))
+            and
+            (cut := True)
+            and
+            (target_atoms := self.target_atoms())
+            and
+            (literal_1 := self.expect("}"))
+        ):
+            return "{" + target_atoms + "}"
+        self.reset(mark)
+        if cut: return None
+        cut = False
+        if (
+            (name := self.name())
+        ):
+            return name . string
+        self.reset(mark)
+        if cut: return None
+        cut = False
+        if (
+            (number := self.number())
+        ):
+            return number . string
+        self.reset(mark)
+        if cut: return None
+        cut = False
+        if (
+            (string := self.string())
+        ):
+            return string . string
+        self.reset(mark)
+        if cut: return None
+        cut = False
+        if (
+            (literal := self.expect("?"))
+        ):
+            return "?"
+        self.reset(mark)
+        if cut: return None
+        cut = False
+        if (
+            (literal := self.expect(":"))
+        ):
+            return ":"
+        self.reset(mark)
+        if cut: return None
+        cut = False
+        if (
+            self.negative_lookahead(self.expect, "}")
+            and
+            (op := self.op())
+        ):
+            return op . string
+        self.reset(mark)
+        if cut: return None
+        return None
+
+
+if __name__ == '__main__':
+    from pegen.parser import simple_parser_main
+    simple_parser_main(GeneratedParser)
diff --git a/Tools/peg_generator/pegen/grammar_visualizer.py b/Tools/peg_generator/pegen/grammar_visualizer.py
new file mode 100644
index 0000000..b1d51d2
--- /dev/null
+++ b/Tools/peg_generator/pegen/grammar_visualizer.py
@@ -0,0 +1,65 @@
+import argparse
+import sys
+
+from typing import Any, Iterator, Iterable, Callable
+
+from pegen.build import build_parser
+from pegen.grammar import Grammar, Rule
+
+argparser = argparse.ArgumentParser(
+    prog="pegen", description="Pretty print the AST for a given PEG grammar"
+)
+argparser.add_argument("filename", help="Grammar description")
+
+
+class ASTGrammarPrinter:
+    def children(self, node: Rule) -> Iterator[Any]:
+        for value in node:
+            if isinstance(value, list):
+                yield from value
+            else:
+                yield value
+
+    def name(self, node: Rule) -> str:
+        if not list(self.children(node)):
+            return repr(node)
+        return node.__class__.__name__
+
+    def print_grammar_ast(self, grammar: Grammar, printer: Callable[..., None] = print) -> None:
+        for rule in grammar.rules.values():
+            printer(self.print_nodes_recursively(rule))
+
+    def print_nodes_recursively(self, node: Rule, prefix: str = "", istail: bool = True) -> str:
+
+        children = list(self.children(node))
+        value = self.name(node)
+
+        line = prefix + ("└──" if istail else "├──") + value + "\n"
+        sufix = "   " if istail else "│  "
+
+        if not children:
+            return line
+
+        *children, last = children
+        for child in children:
+            line += self.print_nodes_recursively(child, prefix + sufix, False)
+        line += self.print_nodes_recursively(last, prefix + sufix, True)
+
+        return line
+
+
+def main() -> None:
+    args = argparser.parse_args()
+
+    try:
+        grammar, parser, tokenizer = build_parser(args.filename)
+    except Exception as err:
+        print("ERROR: Failed to parse grammar file", file=sys.stderr)
+        sys.exit(1)
+
+    visitor = ASTGrammarPrinter()
+    visitor.print_grammar_ast(grammar)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/Tools/peg_generator/pegen/metagrammar.gram b/Tools/peg_generator/pegen/metagrammar.gram
new file mode 100644
index 0000000..f0c5ac3
--- /dev/null
+++ b/Tools/peg_generator/pegen/metagrammar.gram
@@ -0,0 +1,123 @@
+@subheader """\
+from ast import literal_eval
+
+from pegen.grammar import (
+    Alt,
+    Cut,
+    Gather,
+    Group,
+    Item,
+    Lookahead,
+    LookaheadOrCut,
+    MetaTuple,
+    MetaList,
+    NameLeaf,
+    NamedItem,
+    NamedItemList,
+    NegativeLookahead,
+    Opt,
+    Plain,
+    PositiveLookahead,
+    Repeat0,
+    Repeat1,
+    Rhs,
+    Rule,
+    RuleList,
+    RuleName,
+    Grammar,
+    StringLeaf,
+)
+"""
+
+start[Grammar]: grammar ENDMARKER { grammar }
+
+grammar[Grammar]:
+    | metas rules { Grammar(rules, metas) }
+    | rules { Grammar(rules, []) }
+
+metas[MetaList]:
+    | meta metas { [meta] + metas }
+    | meta { [meta] }
+
+meta[MetaTuple]:
+    | "@" NAME NEWLINE { (name.string, None) }
+    | "@" a=NAME b=NAME NEWLINE { (a.string, b.string) }
+    | "@" NAME STRING NEWLINE { (name.string, literal_eval(string.string)) }
+
+rules[RuleList]:
+    | rule rules { [rule] + rules }
+    | rule { [rule] }
+
+rule[Rule]:
+    | rulename memoflag? ":" alts NEWLINE INDENT more_alts DEDENT {
+          Rule(rulename[0], rulename[1], Rhs(alts.alts + more_alts.alts), memo=opt) }
+    | rulename memoflag? ":" NEWLINE INDENT more_alts DEDENT {
+          Rule(rulename[0], rulename[1], more_alts, memo=opt) }
+    | rulename memoflag? ":" alts NEWLINE { Rule(rulename[0], rulename[1], alts, memo=opt) }
+
+rulename[RuleName]:
+    | NAME '[' type=NAME '*' ']' { (name.string, type.string+"*") }
+    | NAME '[' type=NAME ']' { (name.string, type.string) }
+    | NAME { (name.string, None) }
+
+# In the future this may return something more complicated
+memoflag[str]:
+    | '(' 'memo' ')' { "memo" }
+
+alts[Rhs]:
+    | alt "|" alts { Rhs([alt] + alts.alts)}
+    | alt { Rhs([alt]) }
+
+more_alts[Rhs]:
+    | "|" alts NEWLINE more_alts { Rhs(alts.alts + more_alts.alts) }
+    | "|" alts NEWLINE { Rhs(alts.alts) }
+
+alt[Alt]:
+    | items '$' action { Alt(items + [NamedItem(None, NameLeaf('ENDMARKER'))], action=action) }
+    | items '$' { Alt(items + [NamedItem(None, NameLeaf('ENDMARKER'))], action=None) }
+    | items action { Alt(items, action=action) }
+    | items { Alt(items, action=None) }
+
+items[NamedItemList]:
+    | named_item items { [named_item] + items }
+    | named_item { [named_item] }
+
+named_item[NamedItem]:
+    | NAME '=' ~ item {NamedItem(name.string, item)}
+    | item {NamedItem(None, item)}
+    | it=lookahead {NamedItem(None, it)}
+
+lookahead[LookaheadOrCut]:
+    | '&' ~ atom {PositiveLookahead(atom)}
+    | '!' ~ atom {NegativeLookahead(atom)}
+    | '~' {Cut()}
+
+item[Item]:
+    | '[' ~ alts ']' {Opt(alts)}
+    |  atom '?' {Opt(atom)}
+    |  atom '*' {Repeat0(atom)}
+    |  atom '+' {Repeat1(atom)}
+    |  sep=atom '.' node=atom '+' {Gather(sep, node)}
+    |  atom {atom}
+
+atom[Plain]:
+    | '(' ~ alts ')' {Group(alts)}
+    | NAME {NameLeaf(name.string) }
+    | STRING {StringLeaf(string.string)}
+
+# Mini-grammar for the actions
+
+action[str]: "{" ~ target_atoms "}" { target_atoms }
+
+target_atoms[str]:
+    | target_atom target_atoms { target_atom + " " + target_atoms }
+    | target_atom { target_atom }
+
+target_atom[str]:
+    | "{" ~ target_atoms "}" { "{" + target_atoms + "}" }
+    | NAME { name.string }
+    | NUMBER { number.string }
+    | STRING { string.string }
+    | "?" { "?" }
+    | ":" { ":" }
+    | !"}" OP { op.string }
diff --git a/Tools/peg_generator/pegen/parser.py b/Tools/peg_generator/pegen/parser.py
new file mode 100644
index 0000000..16d954d
--- /dev/null
+++ b/Tools/peg_generator/pegen/parser.py
@@ -0,0 +1,310 @@
+import argparse
+import sys
+import time
+import token
+import tokenize
+import traceback
+
+from abc import abstractmethod
+from typing import Any, Callable, cast, Dict, Optional, Tuple, Type, TypeVar
+
+from pegen.tokenizer import exact_token_types
+from pegen.tokenizer import Mark
+from pegen.tokenizer import Tokenizer
+
+T = TypeVar("T")
+P = TypeVar("P", bound="Parser")
+F = TypeVar("F", bound=Callable[..., Any])
+
+
+def logger(method: F) -> F:
+    """For non-memoized functions that we want to be logged.
+
+    (In practice this is only non-leader left-recursive functions.)
+    """
+    method_name = method.__name__
+
+    def logger_wrapper(self: P, *args: object) -> T:
+        if not self._verbose:
+            return method(self, *args)
+        argsr = ",".join(repr(arg) for arg in args)
+        fill = "  " * self._level
+        print(f"{fill}{method_name}({argsr}) .... (looking at {self.showpeek()})")
+        self._level += 1
+        tree = method(self, *args)
+        self._level -= 1
+        print(f"{fill}... {method_name}({argsr}) --> {tree!s:.200}")
+        return tree
+
+    logger_wrapper.__wrapped__ = method  # type: ignore
+    return cast(F, logger_wrapper)
+
+
+def memoize(method: F) -> F:
+    """Memoize a symbol method."""
+    method_name = method.__name__
+
+    def memoize_wrapper(self: P, *args: object) -> T:
+        mark = self.mark()
+        key = mark, method_name, args
+        # Fast path: cache hit, and not verbose.
+        if key in self._cache and not self._verbose:
+            tree, endmark = self._cache[key]
+            self.reset(endmark)
+            return tree
+        # Slow path: no cache hit, or verbose.
+        verbose = self._verbose
+        argsr = ",".join(repr(arg) for arg in args)
+        fill = "  " * self._level
+        if key not in self._cache:
+            if verbose:
+                print(f"{fill}{method_name}({argsr}) ... (looking at {self.showpeek()})")
+            self._level += 1
+            tree = method(self, *args)
+            self._level -= 1
+            if verbose:
+                print(f"{fill}... {method_name}({argsr}) -> {tree!s:.200}")
+            endmark = self.mark()
+            self._cache[key] = tree, endmark
+        else:
+            tree, endmark = self._cache[key]
+            if verbose:
+                print(f"{fill}{method_name}({argsr}) -> {tree!s:.200}")
+            self.reset(endmark)
+        return tree
+
+    memoize_wrapper.__wrapped__ = method  # type: ignore
+    return cast(F, memoize_wrapper)
+
+
+def memoize_left_rec(method: Callable[[P], Optional[T]]) -> Callable[[P], Optional[T]]:
+    """Memoize a left-recursive symbol method."""
+    method_name = method.__name__
+
+    def memoize_left_rec_wrapper(self: P) -> Optional[T]:
+        mark = self.mark()
+        key = mark, method_name, ()
+        # Fast path: cache hit, and not verbose.
+        if key in self._cache and not self._verbose:
+            tree, endmark = self._cache[key]
+            self.reset(endmark)
+            return tree
+        # Slow path: no cache hit, or verbose.
+        verbose = self._verbose
+        fill = "  " * self._level
+        if key not in self._cache:
+            if verbose:
+                print(f"{fill}{method_name} ... (looking at {self.showpeek()})")
+            self._level += 1
+
+            # For left-recursive rules we manipulate the cache and
+            # loop until the rule shows no progress, then pick the
+            # previous result.  For an explanation why this works, see
+            # https://github.com/PhilippeSigaud/Pegged/wiki/Left-Recursion
+            # (But we use the memoization cache instead of a static
+            # variable; perhaps this is similar to a paper by Warth et al.
+            # (http://web.cs.ucla.edu/~todd/research/pub.php?id=pepm08).
+
+            # Prime the cache with a failure.
+            self._cache[key] = None, mark
+            lastresult, lastmark = None, mark
+            depth = 0
+            if verbose:
+                print(f"{fill}Recursive {method_name} at {mark} depth {depth}")
+
+            while True:
+                self.reset(mark)
+                result = method(self)
+                endmark = self.mark()
+                depth += 1
+                if verbose:
+                    print(
+                        f"{fill}Recursive {method_name} at {mark} depth {depth}: {result!s:.200} to {endmark}"
+                    )
+                if not result:
+                    if verbose:
+                        print(f"{fill}Fail with {lastresult!s:.200} to {lastmark}")
+                    break
+                if endmark <= lastmark:
+                    if verbose:
+                        print(f"{fill}Bailing with {lastresult!s:.200} to {lastmark}")
+                    break
+                self._cache[key] = lastresult, lastmark = result, endmark
+
+            self.reset(lastmark)
+            tree = lastresult
+
+            self._level -= 1
+            if verbose:
+                print(f"{fill}{method_name}() -> {tree!s:.200} [cached]")
+            if tree:
+                endmark = self.mark()
+            else:
+                endmark = mark
+                self.reset(endmark)
+            self._cache[key] = tree, endmark
+        else:
+            tree, endmark = self._cache[key]
+            if verbose:
+                print(f"{fill}{method_name}() -> {tree!s:.200} [fresh]")
+            if tree:
+                self.reset(endmark)
+        return tree
+
+    memoize_left_rec_wrapper.__wrapped__ = method  # type: ignore
+    return memoize_left_rec_wrapper
+
+
+class Parser:
+    """Parsing base class."""
+
+    def __init__(self, tokenizer: Tokenizer, *, verbose: bool = False):
+        self._tokenizer = tokenizer
+        self._verbose = verbose
+        self._level = 0
+        self._cache: Dict[Tuple[Mark, str, Tuple[Any, ...]], Tuple[Any, Mark]] = {}
+        # Pass through common tokenizer methods.
+        # TODO: Rename to _mark and _reset.
+        self.mark = self._tokenizer.mark
+        self.reset = self._tokenizer.reset
+
+    @abstractmethod
+    def start(self) -> Any:
+        pass
+
+    def showpeek(self) -> str:
+        tok = self._tokenizer.peek()
+        return f"{tok.start[0]}.{tok.start[1]}: {token.tok_name[tok.type]}:{tok.string!r}"
+
+    @memoize
+    def name(self) -> Optional[tokenize.TokenInfo]:
+        tok = self._tokenizer.peek()
+        if tok.type == token.NAME:
+            return self._tokenizer.getnext()
+        return None
+
+    @memoize
+    def number(self) -> Optional[tokenize.TokenInfo]:
+        tok = self._tokenizer.peek()
+        if tok.type == token.NUMBER:
+            return self._tokenizer.getnext()
+        return None
+
+    @memoize
+    def string(self) -> Optional[tokenize.TokenInfo]:
+        tok = self._tokenizer.peek()
+        if tok.type == token.STRING:
+            return self._tokenizer.getnext()
+        return None
+
+    @memoize
+    def op(self) -> Optional[tokenize.TokenInfo]:
+        tok = self._tokenizer.peek()
+        if tok.type == token.OP:
+            return self._tokenizer.getnext()
+        return None
+
+    @memoize
+    def expect(self, type: str) -> Optional[tokenize.TokenInfo]:
+        tok = self._tokenizer.peek()
+        if tok.string == type:
+            return self._tokenizer.getnext()
+        if type in exact_token_types:
+            if tok.type == exact_token_types[type]:
+                return self._tokenizer.getnext()
+        if type in token.__dict__:
+            if tok.type == token.__dict__[type]:
+                return self._tokenizer.getnext()
+        if tok.type == token.OP and tok.string == type:
+            return self._tokenizer.getnext()
+        return None
+
+    def positive_lookahead(self, func: Callable[..., T], *args: object) -> T:
+        mark = self.mark()
+        ok = func(*args)
+        self.reset(mark)
+        return ok
+
+    def negative_lookahead(self, func: Callable[..., object], *args: object) -> bool:
+        mark = self.mark()
+        ok = func(*args)
+        self.reset(mark)
+        return not ok
+
+    def make_syntax_error(self, filename: str = "<unknown>") -> SyntaxError:
+        tok = self._tokenizer.diagnose()
+        return SyntaxError(
+            "pegen parse failure", (filename, tok.start[0], 1 + tok.start[1], tok.line)
+        )
+
+
+def simple_parser_main(parser_class: Type[Parser]) -> None:
+    argparser = argparse.ArgumentParser()
+    argparser.add_argument(
+        "-v",
+        "--verbose",
+        action="count",
+        default=0,
+        help="Print timing stats; repeat for more debug output",
+    )
+    argparser.add_argument(
+        "-q", "--quiet", action="store_true", help="Don't print the parsed program"
+    )
+    argparser.add_argument("filename", help="Input file ('-' to use stdin)")
+
+    args = argparser.parse_args()
+    verbose = args.verbose
+    verbose_tokenizer = verbose >= 3
+    verbose_parser = verbose == 2 or verbose >= 4
+
+    t0 = time.time()
+
+    filename = args.filename
+    if filename == "" or filename == "-":
+        filename = "<stdin>"
+        file = sys.stdin
+    else:
+        file = open(args.filename)
+    try:
+        tokengen = tokenize.generate_tokens(file.readline)
+        tokenizer = Tokenizer(tokengen, verbose=verbose_tokenizer)
+        parser = parser_class(tokenizer, verbose=verbose_parser)
+        tree = parser.start()
+        try:
+            if file.isatty():
+                endpos = 0
+            else:
+                endpos = file.tell()
+        except IOError:
+            endpos = 0
+    finally:
+        if file is not sys.stdin:
+            file.close()
+
+    t1 = time.time()
+
+    if not tree:
+        err = parser.make_syntax_error(filename)
+        traceback.print_exception(err.__class__, err, None)
+        sys.exit(1)
+
+    if not args.quiet:
+        print(tree)
+
+    if verbose:
+        dt = t1 - t0
+        diag = tokenizer.diagnose()
+        nlines = diag.end[0]
+        if diag.type == token.ENDMARKER:
+            nlines -= 1
+        print(f"Total time: {dt:.3f} sec; {nlines} lines", end="")
+        if endpos:
+            print(f" ({endpos} bytes)", end="")
+        if dt:
+            print(f"; {nlines / dt:.0f} lines/sec")
+        else:
+            print()
+        print("Caches sizes:")
+        print(f"  token array : {len(tokenizer._tokens):10}")
+        print(f"        cache : {len(parser._cache):10}")
+        ## print_memstats()
diff --git a/Tools/peg_generator/pegen/parser_generator.py b/Tools/peg_generator/pegen/parser_generator.py
new file mode 100644
index 0000000..7851a7c
--- /dev/null
+++ b/Tools/peg_generator/pegen/parser_generator.py
@@ -0,0 +1,188 @@
+import contextlib
+import token
+from abc import abstractmethod
+
+from typing import AbstractSet, Dict, IO, Iterator, List, Optional, Set, Text, Tuple
+
+from pegen import sccutils
+from pegen.grammar import (
+    Grammar,
+    Rule,
+    Rhs,
+    Alt,
+    NamedItem,
+    Plain,
+    NameLeaf,
+    StringLeaf,
+    Gather,
+)
+from pegen.grammar import GrammarError, GrammarVisitor
+
+
+class RuleCheckingVisitor(GrammarVisitor):
+    def __init__(self, rules: Dict[str, Rule]):
+        self.rules = rules
+
+    def visit_NameLeaf(self, node: NameLeaf) -> None:
+        if node.value not in self.rules and node.value not in token.tok_name.values():
+            # TODO: Add line/col info to (leaf) nodes
+            raise GrammarError(f"Dangling reference to rule {node.value!r}")
+
+
+class ParserGenerator:
+
+    callmakervisitor: GrammarVisitor
+
+    def __init__(self, grammar: Grammar, file: Optional[IO[Text]]):
+        self.grammar = grammar
+        self.rules = grammar.rules
+        if "trailer" not in grammar.metas and "start" not in self.rules:
+            raise GrammarError("Grammar without a trailer must have a 'start' rule")
+        checker = RuleCheckingVisitor(self.rules)
+        for rule in self.rules.values():
+            checker.visit(rule)
+        self.file = file
+        self.level = 0
+        compute_nullables(self.rules)
+        self.first_graph, self.first_sccs = compute_left_recursives(self.rules)
+        self.todo = self.rules.copy()  # Rules to generate
+        self.counter = 0  # For name_rule()/name_loop()
+        self.keyword_counter = 499  # For keyword_type()
+
+    @abstractmethod
+    def generate(self, filename: str) -> None:
+        raise NotImplementedError
+
+    @contextlib.contextmanager
+    def indent(self) -> Iterator[None]:
+        self.level += 1
+        try:
+            yield
+        finally:
+            self.level -= 1
+
+    def print(self, *args: object) -> None:
+        if not args:
+            print(file=self.file)
+        else:
+            print("    " * self.level, end="", file=self.file)
+            print(*args, file=self.file)
+
+    def printblock(self, lines: str) -> None:
+        for line in lines.splitlines():
+            self.print(line)
+
+    def collect_todo(self) -> None:
+        done: Set[str] = set()
+        while True:
+            alltodo = list(self.todo)
+            todo = [i for i in alltodo if i not in done]
+            if not todo:
+                break
+            for rulename in todo:
+                self.todo[rulename].collect_todo(self)
+            done = set(alltodo)
+
+    def keyword_type(self) -> int:
+        self.keyword_counter += 1
+        return self.keyword_counter
+
+    def name_node(self, rhs: Rhs) -> str:
+        self.counter += 1
+        name = f"_tmp_{self.counter}"  # TODO: Pick a nicer name.
+        self.todo[name] = Rule(name, None, rhs)
+        return name
+
+    def name_loop(self, node: Plain, is_repeat1: bool) -> str:
+        self.counter += 1
+        if is_repeat1:
+            prefix = "_loop1_"
+        else:
+            prefix = "_loop0_"
+        name = f"{prefix}{self.counter}"  # TODO: It's ugly to signal via the name.
+        self.todo[name] = Rule(name, None, Rhs([Alt([NamedItem(None, node)])]))
+        return name
+
+    def name_gather(self, node: Gather) -> str:
+        self.counter += 1
+        name = f"_gather_{self.counter}"
+        self.counter += 1
+        extra_function_name = f"_loop0_{self.counter}"
+        extra_function_alt = Alt(
+            [NamedItem(None, node.separator), NamedItem("elem", node.node),], action="elem",
+        )
+        self.todo[extra_function_name] = Rule(
+            extra_function_name, None, Rhs([extra_function_alt]),
+        )
+        alt = Alt(
+            [NamedItem("elem", node.node), NamedItem("seq", NameLeaf(extra_function_name)),],
+        )
+        self.todo[name] = Rule(name, None, Rhs([alt]),)
+        return name
+
+
+def dedupe(name: str, names: List[str]) -> str:
+    origname = name
+    counter = 0
+    while name in names:
+        counter += 1
+        name = f"{origname}_{counter}"
+    names.append(name)
+    return name
+
+
+def compute_nullables(rules: Dict[str, Rule]) -> None:
+    """Compute which rules in a grammar are nullable.
+
+    Thanks to TatSu (tatsu/leftrec.py) for inspiration.
+    """
+    for rule in rules.values():
+        rule.nullable_visit(rules)
+
+
+def compute_left_recursives(
+    rules: Dict[str, Rule]
+) -> Tuple[Dict[str, AbstractSet[str]], List[AbstractSet[str]]]:
+    graph = make_first_graph(rules)
+    sccs = list(sccutils.strongly_connected_components(graph.keys(), graph))
+    for scc in sccs:
+        if len(scc) > 1:
+            for name in scc:
+                rules[name].left_recursive = True
+            # Try to find a leader such that all cycles go through it.
+            leaders = set(scc)
+            for start in scc:
+                for cycle in sccutils.find_cycles_in_scc(graph, scc, start):
+                    ## print("Cycle:", " -> ".join(cycle))
+                    leaders -= scc - set(cycle)
+                    if not leaders:
+                        raise ValueError(
+                            f"SCC {scc} has no leadership candidate (no element is included in all cycles)"
+                        )
+            ## print("Leaders:", leaders)
+            leader = min(leaders)  # Pick an arbitrary leader from the candidates.
+            rules[leader].leader = True
+        else:
+            name = min(scc)  # The only element.
+            if name in graph[name]:
+                rules[name].left_recursive = True
+                rules[name].leader = True
+    return graph, sccs
+
+
+def make_first_graph(rules: Dict[str, Rule]) -> Dict[str, AbstractSet[str]]:
+    """Compute the graph of left-invocations.
+
+    There's an edge from A to B if A may invoke B at its initial
+    position.
+
+    Note that this requires the nullable flags to have been computed.
+    """
+    graph = {}
+    vertices: Set[str] = set()
+    for rulename, rhs in rules.items():
+        graph[rulename] = names = rhs.initial_names()
+        vertices |= names
+    for vertex in vertices:
+        graph.setdefault(vertex, set())
+    return graph
diff --git a/Tools/peg_generator/pegen/python_generator.py b/Tools/peg_generator/pegen/python_generator.py
new file mode 100644
index 0000000..b289188
--- /dev/null
+++ b/Tools/peg_generator/pegen/python_generator.py
@@ -0,0 +1,224 @@
+from typing import Any, Dict, List, Optional, IO, Text, Tuple
+
+from pegen.grammar import (
+    Cut,
+    GrammarVisitor,
+    NameLeaf,
+    StringLeaf,
+    Rhs,
+    NamedItem,
+    Lookahead,
+    PositiveLookahead,
+    NegativeLookahead,
+    Opt,
+    Repeat0,
+    Repeat1,
+    Gather,
+    Group,
+    Rule,
+    Alt,
+)
+from pegen import grammar
+from pegen.parser_generator import dedupe, ParserGenerator
+
+MODULE_PREFIX = """\
+#!/usr/bin/env python3.8
+# @generated by pegen from {filename}
+
+import ast
+import sys
+import tokenize
+
+from typing import Any, Optional
+
+from pegen.parser import memoize, memoize_left_rec, logger, Parser
+
+"""
+MODULE_SUFFIX = """
+
+if __name__ == '__main__':
+    from pegen.parser import simple_parser_main
+    simple_parser_main(GeneratedParser)
+"""
+
+
+class PythonCallMakerVisitor(GrammarVisitor):
+    def __init__(self, parser_generator: ParserGenerator):
+        self.gen = parser_generator
+        self.cache: Dict[Any, Any] = {}
+
+    def visit_NameLeaf(self, node: NameLeaf) -> Tuple[Optional[str], str]:
+        name = node.value
+        if name in ("NAME", "NUMBER", "STRING", "OP"):
+            name = name.lower()
+            return name, f"self.{name}()"
+        if name in ("NEWLINE", "DEDENT", "INDENT", "ENDMARKER", "ASYNC", "AWAIT"):
+            return name.lower(), f"self.expect({name!r})"
+        return name, f"self.{name}()"
+
+    def visit_StringLeaf(self, node: StringLeaf) -> Tuple[str, str]:
+        return "literal", f"self.expect({node.value})"
+
+    def visit_Rhs(self, node: Rhs) -> Tuple[Optional[str], str]:
+        if node in self.cache:
+            return self.cache[node]
+        if len(node.alts) == 1 and len(node.alts[0].items) == 1:
+            self.cache[node] = self.visit(node.alts[0].items[0])
+        else:
+            name = self.gen.name_node(node)
+            self.cache[node] = name, f"self.{name}()"
+        return self.cache[node]
+
+    def visit_NamedItem(self, node: NamedItem) -> Tuple[Optional[str], str]:
+        name, call = self.visit(node.item)
+        if node.name:
+            name = node.name
+        return name, call
+
+    def lookahead_call_helper(self, node: Lookahead) -> Tuple[str, str]:
+        name, call = self.visit(node.node)
+        head, tail = call.split("(", 1)
+        assert tail[-1] == ")"
+        tail = tail[:-1]
+        return head, tail
+
+    def visit_PositiveLookahead(self, node: PositiveLookahead) -> Tuple[None, str]:
+        head, tail = self.lookahead_call_helper(node)
+        return None, f"self.positive_lookahead({head}, {tail})"
+
+    def visit_NegativeLookahead(self, node: NegativeLookahead) -> Tuple[None, str]:
+        head, tail = self.lookahead_call_helper(node)
+        return None, f"self.negative_lookahead({head}, {tail})"
+
+    def visit_Opt(self, node: Opt) -> Tuple[str, str]:
+        name, call = self.visit(node.node)
+        return "opt", f"{call},"  # Note trailing comma!
+
+    def visit_Repeat0(self, node: Repeat0) -> Tuple[str, str]:
+        if node in self.cache:
+            return self.cache[node]
+        name = self.gen.name_loop(node.node, False)
+        self.cache[node] = name, f"self.{name}(),"  # Also a trailing comma!
+        return self.cache[node]
+
+    def visit_Repeat1(self, node: Repeat1) -> Tuple[str, str]:
+        if node in self.cache:
+            return self.cache[node]
+        name = self.gen.name_loop(node.node, True)
+        self.cache[node] = name, f"self.{name}()"  # But no trailing comma here!
+        return self.cache[node]
+
+    def visit_Gather(self, node: Gather) -> Tuple[str, str]:
+        if node in self.cache:
+            return self.cache[node]
+        name = self.gen.name_gather(node)
+        self.cache[node] = name, f"self.{name}()"  # No trailing comma here either!
+        return self.cache[node]
+
+    def visit_Group(self, node: Group) -> Tuple[Optional[str], str]:
+        return self.visit(node.rhs)
+
+    def visit_Cut(self, node: Cut) -> Tuple[str, str]:
+        return "cut", "True"
+
+
+class PythonParserGenerator(ParserGenerator, GrammarVisitor):
+    def __init__(self, grammar: grammar.Grammar, file: Optional[IO[Text]]):
+        super().__init__(grammar, file)
+        self.callmakervisitor = PythonCallMakerVisitor(self)
+
+    def generate(self, filename: str) -> None:
+        header = self.grammar.metas.get("header", MODULE_PREFIX)
+        if header is not None:
+            self.print(header.rstrip("\n").format(filename=filename))
+        subheader = self.grammar.metas.get("subheader", "")
+        if subheader:
+            self.print(subheader.format(filename=filename))
+        self.print("class GeneratedParser(Parser):")
+        while self.todo:
+            for rulename, rule in list(self.todo.items()):
+                del self.todo[rulename]
+                self.print()
+                with self.indent():
+                    self.visit(rule)
+        trailer = self.grammar.metas.get("trailer", MODULE_SUFFIX)
+        if trailer is not None:
+            self.print(trailer.rstrip("\n"))
+
+    def visit_Rule(self, node: Rule) -> None:
+        is_loop = node.is_loop()
+        is_gather = node.is_gather()
+        rhs = node.flatten()
+        if node.left_recursive:
+            if node.leader:
+                self.print("@memoize_left_rec")
+            else:
+                # Non-leader rules in a cycle are not memoized,
+                # but they must still be logged.
+                self.print("@logger")
+        else:
+            self.print("@memoize")
+        node_type = node.type or "Any"
+        self.print(f"def {node.name}(self) -> Optional[{node_type}]:")
+        with self.indent():
+            self.print(f"# {node.name}: {rhs}")
+            if node.nullable:
+                self.print(f"# nullable={node.nullable}")
+            self.print("mark = self.mark()")
+            if is_loop:
+                self.print("children = []")
+            self.visit(rhs, is_loop=is_loop, is_gather=is_gather)
+            if is_loop:
+                self.print("return children")
+            else:
+                self.print("return None")
+
+    def visit_NamedItem(self, node: NamedItem, names: List[str]) -> None:
+        name, call = self.callmakervisitor.visit(node.item)
+        if node.name:
+            name = node.name
+        if not name:
+            self.print(call)
+        else:
+            if name != "cut":
+                name = dedupe(name, names)
+            self.print(f"({name} := {call})")
+
+    def visit_Rhs(self, node: Rhs, is_loop: bool = False, is_gather: bool = False) -> None:
+        if is_loop:
+            assert len(node.alts) == 1
+        for alt in node.alts:
+            self.visit(alt, is_loop=is_loop, is_gather=is_gather)
+
+    def visit_Alt(self, node: Alt, is_loop: bool, is_gather: bool) -> None:
+        names: List[str] = []
+        self.print("cut = False")  # TODO: Only if needed.
+        if is_loop:
+            self.print("while (")
+        else:
+            self.print("if (")
+        with self.indent():
+            first = True
+            for item in node.items:
+                if first:
+                    first = False
+                else:
+                    self.print("and")
+                self.visit(item, names=names)
+        self.print("):")
+        with self.indent():
+            action = node.action
+            if not action:
+                if is_gather:
+                    assert len(names) == 2
+                    action = f"[{names[0]}] + {names[1]}"
+                else:
+                    action = f"[{', '.join(names)}]"
+            if is_loop:
+                self.print(f"children.append({action})")
+                self.print(f"mark = self.mark()")
+            else:
+                self.print(f"return {action}")
+        self.print("self.reset(mark)")
+        # Skip remaining alternatives if a cut was reached.
+        self.print("if cut: return None")  # TODO: Only if needed.
diff --git a/Tools/peg_generator/pegen/sccutils.py b/Tools/peg_generator/pegen/sccutils.py
new file mode 100644
index 0000000..1f0586b
--- /dev/null
+++ b/Tools/peg_generator/pegen/sccutils.py
@@ -0,0 +1,128 @@
+# Adapted from mypy (mypy/build.py) under the MIT license.
+
+from typing import *
+
+
+def strongly_connected_components(
+    vertices: AbstractSet[str], edges: Dict[str, AbstractSet[str]]
+) -> Iterator[AbstractSet[str]]:
+    """Compute Strongly Connected Components of a directed graph.
+
+    Args:
+      vertices: the labels for the vertices
+      edges: for each vertex, gives the target vertices of its outgoing edges
+
+    Returns:
+      An iterator yielding strongly connected components, each
+      represented as a set of vertices.  Each input vertex will occur
+      exactly once; vertices not part of a SCC are returned as
+      singleton sets.
+
+    From http://code.activestate.com/recipes/578507/.
+    """
+    identified: Set[str] = set()
+    stack: List[str] = []
+    index: Dict[str, int] = {}
+    boundaries: List[int] = []
+
+    def dfs(v: str) -> Iterator[Set[str]]:
+        index[v] = len(stack)
+        stack.append(v)
+        boundaries.append(index[v])
+
+        for w in edges[v]:
+            if w not in index:
+                yield from dfs(w)
+            elif w not in identified:
+                while index[w] < boundaries[-1]:
+                    boundaries.pop()
+
+        if boundaries[-1] == index[v]:
+            boundaries.pop()
+            scc = set(stack[index[v] :])
+            del stack[index[v] :]
+            identified.update(scc)
+            yield scc
+
+    for v in vertices:
+        if v not in index:
+            yield from dfs(v)
+
+
+def topsort(
+    data: Dict[AbstractSet[str], Set[AbstractSet[str]]]
+) -> Iterable[AbstractSet[AbstractSet[str]]]:
+    """Topological sort.
+
+    Args:
+      data: A map from SCCs (represented as frozen sets of strings) to
+            sets of SCCs, its dependencies.  NOTE: This data structure
+            is modified in place -- for normalization purposes,
+            self-dependencies are removed and entries representing
+            orphans are added.
+
+    Returns:
+      An iterator yielding sets of SCCs that have an equivalent
+      ordering.  NOTE: The algorithm doesn't care about the internal
+      structure of SCCs.
+
+    Example:
+      Suppose the input has the following structure:
+
+        {A: {B, C}, B: {D}, C: {D}}
+
+      This is normalized to:
+
+        {A: {B, C}, B: {D}, C: {D}, D: {}}
+
+      The algorithm will yield the following values:
+
+        {D}
+        {B, C}
+        {A}
+
+    From http://code.activestate.com/recipes/577413/.
+    """
+    # TODO: Use a faster algorithm?
+    for k, v in data.items():
+        v.discard(k)  # Ignore self dependencies.
+    for item in set.union(*data.values()) - set(data.keys()):
+        data[item] = set()
+    while True:
+        ready = {item for item, dep in data.items() if not dep}
+        if not ready:
+            break
+        yield ready
+        data = {item: (dep - ready) for item, dep in data.items() if item not in ready}
+    assert not data, "A cyclic dependency exists amongst %r" % data
+
+
+def find_cycles_in_scc(
+    graph: Dict[str, AbstractSet[str]], scc: AbstractSet[str], start: str
+) -> Iterable[List[str]]:
+    """Find cycles in SCC emanating from start.
+
+    Yields lists of the form ['A', 'B', 'C', 'A'], which means there's
+    a path from A -> B -> C -> A.  The first item is always the start
+    argument, but the last item may be another element, e.g.  ['A',
+    'B', 'C', 'B'] means there's a path from A to B and there's a
+    cycle from B to C and back.
+    """
+    # Basic input checks.
+    assert start in scc, (start, scc)
+    assert scc <= graph.keys(), scc - graph.keys()
+
+    # Reduce the graph to nodes in the SCC.
+    graph = {src: {dst for dst in dsts if dst in scc} for src, dsts in graph.items() if src in scc}
+    assert start in graph
+
+    # Recursive helper that yields cycles.
+    def dfs(node: str, path: List[str]) -> Iterator[List[str]]:
+        if node in path:
+            yield path + [node]
+            return
+        path = path + [node]  # TODO: Make this not quadratic.
+        for child in graph[node]:
+            yield from dfs(child, path)
+
+    yield from dfs(start, [])
diff --git a/Tools/peg_generator/pegen/testutil.py b/Tools/peg_generator/pegen/testutil.py
new file mode 100644
index 0000000..3616eff
--- /dev/null
+++ b/Tools/peg_generator/pegen/testutil.py
@@ -0,0 +1,126 @@
+import importlib.util
+import io
+import os
+import pathlib
+import sys
+import textwrap
+import tokenize
+
+from typing import Any, cast, Dict, IO, Type, Final
+
+from pegen.build import compile_c_extension
+from pegen.c_generator import CParserGenerator
+from pegen.grammar import Grammar
+from pegen.grammar_parser import GeneratedParser as GrammarParser
+from pegen.parser import Parser
+from pegen.python_generator import PythonParserGenerator
+from pegen.tokenizer import Tokenizer
+
+
+def generate_parser(grammar: Grammar) -> Type[Parser]:
+    # Generate a parser.
+    out = io.StringIO()
+    genr = PythonParserGenerator(grammar, out)
+    genr.generate("<string>")
+
+    # Load the generated parser class.
+    ns: Dict[str, Any] = {}
+    exec(out.getvalue(), ns)
+    return ns["GeneratedParser"]
+
+
+def run_parser(file: IO[bytes], parser_class: Type[Parser], *, verbose: bool = False) -> Any:
+    # Run a parser on a file (stream).
+    tokenizer = Tokenizer(tokenize.generate_tokens(file.readline))  # type: ignore # typeshed issue #3515
+    parser = parser_class(tokenizer, verbose=verbose)
+    result = parser.start()
+    if result is None:
+        raise parser.make_syntax_error()
+    return result
+
+
+def parse_string(
+    source: str, parser_class: Type[Parser], *, dedent: bool = True, verbose: bool = False
+) -> Any:
+    # Run the parser on a string.
+    if dedent:
+        source = textwrap.dedent(source)
+    file = io.StringIO(source)
+    return run_parser(file, parser_class, verbose=verbose)  # type: ignore # typeshed issue #3515
+
+
+def make_parser(source: str) -> Type[Parser]:
+    # Combine parse_string() and generate_parser().
+    grammar = parse_string(source, GrammarParser)
+    return generate_parser(grammar)
+
+
+def import_file(full_name: str, path: str) -> Any:
+    """Import a python module from a path"""
+
+    spec = importlib.util.spec_from_file_location(full_name, path)
+    mod = importlib.util.module_from_spec(spec)
+
+    # We assume this is not None and has an exec_module() method.
+    # See https://docs.python.org/3/reference/import.html?highlight=exec_module#loading
+    loader = cast(Any, spec.loader)
+    loader.exec_module(mod)
+    return mod
+
+
+def generate_c_parser_source(grammar: Grammar) -> str:
+    out = io.StringIO()
+    genr = CParserGenerator(grammar, out)
+    genr.generate("<string>")
+    return out.getvalue()
+
+
+def generate_parser_c_extension(
+    grammar: Grammar, path: pathlib.PurePath, debug: bool = False
+) -> Any:
+    """Generate a parser c extension for the given grammar in the given path
+
+    Returns a module object with a parse_string() method.
+    TODO: express that using a Protocol.
+    """
+    # Make sure that the working directory is empty: reusing non-empty temporary
+    # directories when generating extensions can lead to segmentation faults.
+    # Check issue #95 (https://github.com/gvanrossum/pegen/issues/95) for more
+    # context.
+    assert not os.listdir(path)
+    source = path / "parse.c"
+    with open(source, "w") as file:
+        genr = CParserGenerator(grammar, file, debug=debug)
+        genr.generate("parse.c")
+    extension_path = compile_c_extension(str(source), build_dir=str(path / "build"))
+    extension = import_file("parse", extension_path)
+    return extension
+
+
+def print_memstats() -> bool:
+    MiB: Final = 2 ** 20
+    try:
+        import psutil  # type: ignore
+    except ImportError:
+        return False
+    print("Memory stats:")
+    process = psutil.Process()
+    meminfo = process.memory_info()
+    res = {}
+    res["rss"] = meminfo.rss / MiB
+    res["vms"] = meminfo.vms / MiB
+    if sys.platform == "win32":
+        res["maxrss"] = meminfo.peak_wset / MiB
+    else:
+        # See https://stackoverflow.com/questions/938733/total-memory-used-by-python-process
+        import resource  # Since it doesn't exist on Windows.
+
+        rusage = resource.getrusage(resource.RUSAGE_SELF)
+        if sys.platform == "darwin":
+            factor = 1
+        else:
+            factor = 1024  # Linux
+        res["maxrss"] = rusage.ru_maxrss * factor / MiB
+    for key, value in res.items():
+        print(f"  {key:12.12s}: {value:10.0f} MiB")
+    return True
diff --git a/Tools/peg_generator/pegen/tokenizer.py b/Tools/peg_generator/pegen/tokenizer.py
new file mode 100644
index 0000000..61a28ef
--- /dev/null
+++ b/Tools/peg_generator/pegen/tokenizer.py
@@ -0,0 +1,86 @@
+import token
+import tokenize
+from typing import List, Iterator
+
+Mark = int  # NewType('Mark', int)
+
+exact_token_types = token.EXACT_TOKEN_TYPES  # type: ignore
+
+
+def shorttok(tok: tokenize.TokenInfo) -> str:
+    return "%-25.25s" % f"{tok.start[0]}.{tok.start[1]}: {token.tok_name[tok.type]}:{tok.string!r}"
+
+
+class Tokenizer:
+    """Caching wrapper for the tokenize module.
+
+    This is pretty tied to Python's syntax.
+    """
+
+    _tokens: List[tokenize.TokenInfo]
+
+    def __init__(self, tokengen: Iterator[tokenize.TokenInfo], *, verbose: bool = False):
+        self._tokengen = tokengen
+        self._tokens = []
+        self._index = 0
+        self._verbose = verbose
+        if verbose:
+            self.report(False, False)
+
+    def getnext(self) -> tokenize.TokenInfo:
+        """Return the next token and updates the index."""
+        cached = True
+        while self._index == len(self._tokens):
+            tok = next(self._tokengen)
+            if tok.type in (tokenize.NL, tokenize.COMMENT):
+                continue
+            if tok.type == token.ERRORTOKEN and tok.string.isspace():
+                continue
+            self._tokens.append(tok)
+            cached = False
+        tok = self._tokens[self._index]
+        self._index += 1
+        if self._verbose:
+            self.report(cached, False)
+        return tok
+
+    def peek(self) -> tokenize.TokenInfo:
+        """Return the next token *without* updating the index."""
+        while self._index == len(self._tokens):
+            tok = next(self._tokengen)
+            if tok.type in (tokenize.NL, tokenize.COMMENT):
+                continue
+            if tok.type == token.ERRORTOKEN and tok.string.isspace():
+                continue
+            self._tokens.append(tok)
+        return self._tokens[self._index]
+
+    def diagnose(self) -> tokenize.TokenInfo:
+        if not self._tokens:
+            self.getnext()
+        return self._tokens[-1]
+
+    def mark(self) -> Mark:
+        return self._index
+
+    def reset(self, index: Mark) -> None:
+        if index == self._index:
+            return
+        assert 0 <= index <= len(self._tokens), (index, len(self._tokens))
+        old_index = self._index
+        self._index = index
+        if self._verbose:
+            self.report(True, index < old_index)
+
+    def report(self, cached: bool, back: bool) -> None:
+        if back:
+            fill = "-" * self._index + "-"
+        elif cached:
+            fill = "-" * self._index + ">"
+        else:
+            fill = "-" * self._index + "*"
+        if self._index == 0:
+            print(f"{fill} (Bof)")
+        else:
+            tok = self._tokens[self._index - 1]
+            print(f"{fill} {shorttok(tok)}")
diff --git a/Tools/peg_generator/pyproject.toml b/Tools/peg_generator/pyproject.toml
new file mode 100644
index 0000000..f69c5b5
--- /dev/null
+++ b/Tools/peg_generator/pyproject.toml
@@ -0,0 +1,9 @@
+[tool.black]
+line-length = 99
+target_version = ['py38']
+exclude = '''
+(
+          /pegen/grammar_parser.py   # generated file
+        | /test/test_data/           # test files
+)
+'''
diff --git a/Tools/peg_generator/requirements.pip b/Tools/peg_generator/requirements.pip
new file mode 100644
index 0000000..190b348
--- /dev/null
+++ b/Tools/peg_generator/requirements.pip
@@ -0,0 +1,2 @@
+memory-profiler==0.57.0
+psutil==5.7.0
diff --git a/Tools/peg_generator/scripts/__init__.py b/Tools/peg_generator/scripts/__init__.py
new file mode 100644
index 0000000..1e423f4
--- /dev/null
+++ b/Tools/peg_generator/scripts/__init__.py
@@ -0,0 +1 @@
+# This exists to let mypy find modules here
diff --git a/Tools/peg_generator/scripts/ast_timings.py b/Tools/peg_generator/scripts/ast_timings.py
new file mode 100644
index 0000000..7ebd46f
--- /dev/null
+++ b/Tools/peg_generator/scripts/ast_timings.py
@@ -0,0 +1,28 @@
+import ast
+import sys
+import time
+import token
+import tokenize
+
+from pegen.testutil import print_memstats
+
+
+def main() -> None:
+    t0 = time.time()
+    for filename in sys.argv[1:]:
+        print(filename, end="\r")
+        try:
+            with open(filename) as file:
+                source = file.read()
+            tree = ast.parse(source, filename)
+        except Exception as err:
+            print(f"{filename}: {err.__class__.__name__}: {err}", file=sys.stderr)
+    tok = None
+    t1 = time.time()
+    dt = t1 - t0
+    print(f"Parsed in {dt:.3f} secs", file=sys.stderr)
+    print_memstats()
+
+
+if __name__ == "__main__":
+    main()
diff --git a/Tools/peg_generator/scripts/benchmark.py b/Tools/peg_generator/scripts/benchmark.py
new file mode 100644
index 0000000..bc75115
--- /dev/null
+++ b/Tools/peg_generator/scripts/benchmark.py
@@ -0,0 +1,140 @@
+#!/usr/bin/env python3.9
+
+import argparse
+import ast
+import sys
+import os
+import resource
+from time import time
+
+import memory_profiler
+
+sys.path.insert(0, os.getcwd())
+from peg_extension import parse
+from pegen.build import build_parser_and_generator
+from scripts.test_parse_directory import parse_directory
+
+argparser = argparse.ArgumentParser(
+    prog="benchmark", description="Reproduce the various pegen benchmarks"
+)
+argparser.add_argument(
+    "--parser",
+    action="store",
+    choices=["pegen", "cpython"],
+    default="pegen",
+    help="Which parser to benchmark (default is pegen)",
+)
+argparser.add_argument(
+    "--target",
+    action="store",
+    choices=["xxl", "stdlib"],
+    default="xxl",
+    help="Which target to use for the benchmark (default is xxl.py)",
+)
+
+subcommands = argparser.add_subparsers(title="Benchmarks", dest="subcommand")
+command_compile = subcommands.add_parser(
+    "compile", help="Benchmark parsing and compiling to bytecode"
+)
+command_parse = subcommands.add_parser("parse", help="Benchmark parsing and generating an ast.AST")
+command_check = subcommands.add_parser(
+    "check", help="Benchmark parsing and throwing the tree away"
+)
+
+
+def benchmark(func):
+    def wrapper(*args):
+        times = list()
+        for _ in range(3):
+            start = time()
+            result = func(*args)
+            end = time()
+            times.append(end - start)
+        memory = memory_profiler.memory_usage((func, args))
+        print(f"{func.__name__}")
+        print(f"\tTime: {sum(times)/3:.3f} seconds on an average of 3 runs")
+        print(f"\tMemory: {max(memory)} MiB on an average of 3 runs")
+        return result
+
+    return wrapper
+
+
+@benchmark
+def time_compile(source, parser):
+    if parser == "cpython":
+        return compile(source, os.path.join("data", "xxl.py"), "exec")
+    else:
+        return parse.parse_string(source, mode=2)
+
+
+@benchmark
+def time_parse(source, parser):
+    if parser == "cpython":
+        return ast.parse(source, os.path.join("data", "xxl.py"), "exec")
+    else:
+        return parse.parse_string(source, mode=1)
+
+
+@benchmark
+def time_check(source):
+    return parse.parse_string(source, mode=0)
+
+
+def run_benchmark_xxl(subcommand, parser, source):
+    if subcommand == "compile":
+        time_compile(source, parser)
+    elif subcommand == "parse":
+        time_parse(source, parser)
+    elif subcommand == "check":
+        time_check(source)
+
+
+def run_benchmark_stdlib(subcommand, parser):
+    modes = {"compile": 2, "parse": 1, "check": 0}
+    extension = None
+    if parser == "pegen":
+        extension = build_parser_and_generator(
+            "../../Grammar/python.gram",
+            "peg_extension/parse.c",
+            compile_extension=True,
+            skip_actions=False,
+        )
+    for _ in range(3):
+        parse_directory(
+            "../../Lib",
+            "../../Grammar/python.gram",
+            verbose=False,
+            excluded_files=[
+                "*/bad*",
+                "*/lib2to3/tests/data/*",
+            ],
+            skip_actions=False,
+            tree_arg=0,
+            short=True,
+            extension=extension,
+            mode=modes[subcommand],
+            parser=parser,
+        )
+
+
+def main():
+    args = argparser.parse_args()
+    subcommand = args.subcommand
+    parser = args.parser
+    target = args.target
+
+    if subcommand is None:
+        argparser.error("A benchmark to run is required")
+    if subcommand == "check" and parser == "cpython":
+        argparser.error("Cannot use check target with the CPython parser")
+
+    if target == "xxl":
+        with open(os.path.join("data", "xxl.py"), "r") as f:
+            source = f.read()
+            run_benchmark_xxl(subcommand, parser, source)
+    elif target == "stdlib":
+        run_benchmark_stdlib(subcommand, parser)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/Tools/peg_generator/scripts/download_pypi_packages.py b/Tools/peg_generator/scripts/download_pypi_packages.py
new file mode 100755
index 0000000..9874202
--- /dev/null
+++ b/Tools/peg_generator/scripts/download_pypi_packages.py
@@ -0,0 +1,86 @@
+#!/usr/bin/env python3.8
+
+import argparse
+import os
+import json
+
+from typing import Dict, Any
+from urllib.request import urlretrieve
+
+argparser = argparse.ArgumentParser(
+    prog="download_pypi_packages", description="Helper program to download PyPI packages",
+)
+argparser.add_argument(
+    "-n", "--number", type=int, default=100, help="Number of packages to download"
+)
+argparser.add_argument(
+    "-a", "--all", action="store_true", help="Download all packages listed in the json file"
+)
+
+
+def load_json(filename: str) -> Dict[Any, Any]:
+    with open(os.path.join("data", f"{filename}.json"), "r") as f:
+        j = json.loads(f.read())
+    return j
+
+
+def remove_json(filename: str) -> None:
+    path = os.path.join("data", f"{filename}.json")
+    os.remove(path)
+
+
+def download_package_json(package_name: str) -> None:
+    url = f"https://pypi.org/pypi/{package_name}/json"
+    urlretrieve(url, os.path.join("data", f"{package_name}.json"))
+
+
+def download_package_code(name: str, package_json: Dict[Any, Any]) -> None:
+    source_index = -1
+    for idx, url_info in enumerate(package_json["urls"]):
+        if url_info["python_version"] == "source":
+            source_index = idx
+            break
+    filename = package_json["urls"][source_index]["filename"]
+    url = package_json["urls"][source_index]["url"]
+    urlretrieve(url, os.path.join("data", "pypi", filename))
+
+
+def main() -> None:
+    args = argparser.parse_args()
+    number_packages = args.number
+    all_packages = args.all
+
+    top_pypi_packages = load_json("top-pypi-packages-365-days")
+    if all_packages:
+        top_pypi_packages = top_pypi_packages["rows"]
+    elif number_packages >= 0 and number_packages <= 4000:
+        top_pypi_packages = top_pypi_packages["rows"][:number_packages]
+    else:
+        raise AssertionError("Unknown value for NUMBER_OF_PACKAGES")
+
+    try:
+        os.mkdir(os.path.join("data", "pypi"))
+    except FileExistsError:
+        pass
+
+    for package in top_pypi_packages:
+        package_name = package["project"]
+
+        print(f"Downloading JSON Data for {package_name}... ", end="")
+        download_package_json(package_name)
+        print("Done")
+
+        package_json = load_json(package_name)
+        try:
+            print(f"Dowloading and compressing package {package_name} ... ", end="")
+            download_package_code(package_name, package_json)
+            print("Done")
+        except (IndexError, KeyError):
+            print(f"Could not locate source for {package_name}")
+            continue
+        finally:
+            remove_json(package_name)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/Tools/peg_generator/scripts/find_max_nesting.py b/Tools/peg_generator/scripts/find_max_nesting.py
new file mode 100755
index 0000000..a2c41a8
--- /dev/null
+++ b/Tools/peg_generator/scripts/find_max_nesting.py
@@ -0,0 +1,61 @@
+#!/usr/bin/env python3.8
+"""Find the maximum amount of nesting for an expression that can be parsed
+without causing a parse error.
+
+Starting at the INITIAL_NESTING_DEPTH, an expression containing n parenthesis
+around a 0 is generated then tested with both the C and Python parsers. We
+continue incrementing the number of parenthesis by 10 until both parsers have
+failed. As soon as a single parser fails, we stop testing that parser.
+
+The grammar file, initial nesting size, and amount by which the nested size is
+incremented on each success can be controlled by changing the GRAMMAR_FILE,
+INITIAL_NESTING_DEPTH, or NESTED_INCR_AMT variables.
+
+Usage: python -m scripts.find_max_nesting
+"""
+import os
+import sys
+from tempfile import TemporaryDirectory
+from pathlib import Path
+from typing import Any
+
+from _peg_parser import parse_string
+
+GRAMMAR_FILE = "data/python.gram"
+INITIAL_NESTING_DEPTH = 10
+NESTED_INCR_AMT = 10
+
+
+FAIL = "\033[91m"
+ENDC = "\033[0m"
+
+
+def check_nested_expr(nesting_depth: int) -> bool:
+    expr = f"{'(' * nesting_depth}0{')' * nesting_depth}"
+
+    try:
+        parse_string(expr)
+        print(f"Nesting depth of {nesting_depth} is successful")
+        return True
+    except Exception as err:
+        print(f"{FAIL}(Failed with nesting depth of {nesting_depth}{ENDC}")
+        print(f"{FAIL}\t{err}{ENDC}")
+        return False
+
+
+def main() -> None:
+    print(f"Testing {GRAMMAR_FILE} starting at nesting depth of {INITIAL_NESTING_DEPTH}...")
+
+    nesting_depth = INITIAL_NESTING_DEPTH
+    succeeded = True
+    while succeeded:
+        expr = f"{'(' * nesting_depth}0{')' * nesting_depth}"
+        if succeeded:
+            succeeded = check_nested_expr(nesting_depth)
+        nesting_depth += NESTED_INCR_AMT
+
+    sys.exit(1)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/Tools/peg_generator/scripts/grammar_grapher.py b/Tools/peg_generator/scripts/grammar_grapher.py
new file mode 100755
index 0000000..3aa2546
--- /dev/null
+++ b/Tools/peg_generator/scripts/grammar_grapher.py
@@ -0,0 +1,111 @@
+#!/usr/bin/env python3.8
+
+""" Convert a grammar into a dot-file suitable for use with GraphViz
+
+    For example:
+        Generate the GraphViz file:
+        # scripts/grammar_grapher.py data/python.gram > python.gv
+
+        Then generate the graph...
+
+        # twopi python.gv -Tpng > python_twopi.png
+
+        or
+
+        # dot python.gv -Tpng > python_dot.png
+
+        NOTE: The _dot_ and _twopi_ tools seem to produce the most useful results.
+              The _circo_ tool is the worst of the bunch. Don't even bother.
+"""
+
+import argparse
+import sys
+
+from typing import Any, List
+
+sys.path.insert(0, ".")
+
+from pegen.build import build_parser
+from pegen.grammar import (
+    Alt,
+    Cut,
+    Grammar,
+    Group,
+    Leaf,
+    Lookahead,
+    Rule,
+    NameLeaf,
+    NamedItem,
+    Opt,
+    Repeat,
+    Rhs,
+)
+
+argparser = argparse.ArgumentParser(prog="graph_grammar", description="Graph a grammar tree",)
+argparser.add_argument("grammar_file", help="The grammar file to graph")
+
+
+def references_for_item(item: Any) -> List[Any]:
+    if isinstance(item, Alt):
+        return [_ref for _item in item.items for _ref in references_for_item(_item)]
+    elif isinstance(item, Cut):
+        return []
+    elif isinstance(item, Group):
+        return references_for_item(item.rhs)
+    elif isinstance(item, Lookahead):
+        return references_for_item(item.node)
+    elif isinstance(item, NamedItem):
+        return references_for_item(item.item)
+
+    # NOTE NameLeaf must be before Leaf
+    elif isinstance(item, NameLeaf):
+        if item.value == "ENDMARKER":
+            return []
+        return [item.value]
+    elif isinstance(item, Leaf):
+        return []
+
+    elif isinstance(item, Opt):
+        return references_for_item(item.node)
+    elif isinstance(item, Repeat):
+        return references_for_item(item.node)
+    elif isinstance(item, Rhs):
+        return [_ref for alt in item.alts for _ref in references_for_item(alt)]
+    elif isinstance(item, Rule):
+        return references_for_item(item.rhs)
+    else:
+        raise RuntimeError(f"Unknown item: {type(item)}")
+
+
+def main() -> None:
+    args = argparser.parse_args()
+
+    try:
+        grammar, parser, tokenizer = build_parser(args.grammar_file)
+    except Exception as err:
+        print("ERROR: Failed to parse grammar file", file=sys.stderr)
+        sys.exit(1)
+
+    references = {}
+    for name, rule in grammar.rules.items():
+        references[name] = set(references_for_item(rule))
+
+    # Flatten the start node if has only a single reference
+    root_node = "start"
+    if start := references["start"]:
+        if len(start) == 1:
+            root_node = list(start)[0]
+            del references["start"]
+
+    print("digraph g1 {")
+    print('\toverlap="scale";')  # Force twopi to scale the graph to avoid overlaps
+    print(f'\troot="{root_node}";')
+    print(f"\t{root_node} [color=green, shape=circle]")
+    for name, refs in references.items():
+        if refs:  # Ignore empty sets
+            print(f"\t{name} -> {','.join(refs)};")
+    print("}")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/Tools/peg_generator/scripts/joinstats.py b/Tools/peg_generator/scripts/joinstats.py
new file mode 100644
index 0000000..b2d762b
--- /dev/null
+++ b/Tools/peg_generator/scripts/joinstats.py
@@ -0,0 +1,66 @@
+#!/usr/bin/env python3.8
+
+"""Produce a report about the most-memoable types.
+
+Reads a list of statistics from stdin.  Each line must be two numbers,
+being a type and a count.  We then read some other files and produce a
+list sorted by most frequent type.
+
+There should also be something to recognize left-recursive rules.
+"""
+
+import os
+import re
+import sys
+
+from typing import Dict
+
+reporoot = os.path.dirname(os.path.dirname(__file__))
+parse_c = os.path.join(reporoot, "peg_extension", "parse.c")
+
+
+class TypeMapper:
+    """State used to map types to names."""
+
+    def __init__(self, filename: str) -> None:
+        self.table: Dict[int, str] = {}
+        with open(filename) as f:
+            for line in f:
+                match = re.match(r"#define (\w+)_type (\d+)", line)
+                if match:
+                    name, type = match.groups()
+                    if "left" in line.lower():
+                        name += " // Left-recursive"
+                    self.table[int(type)] = name
+
+    def lookup(self, type: int) -> str:
+        return self.table.get(type, str(type))
+
+
+def main() -> None:
+    mapper = TypeMapper(parse_c)
+    table = []
+    filename = sys.argv[1]
+    with open(filename) as f:
+        for lineno, line in enumerate(f, 1):
+            line = line.strip()
+            if not line or line.startswith("#"):
+                continue
+            parts = line.split()
+            # Extra fields ignored
+            if len(parts) < 2:
+                print(f"{lineno}: bad input ({line!r})")
+                continue
+            try:
+                type, count = map(int, parts[:2])
+            except ValueError as err:
+                print(f"{lineno}: non-integer input ({line!r})")
+                continue
+            table.append((type, count))
+    table.sort(key=lambda values: -values[1])
+    for type, count in table:
+        print(f"{type:4d} {count:9d} {mapper.lookup(type)}")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/Tools/peg_generator/scripts/show_parse.py b/Tools/peg_generator/scripts/show_parse.py
new file mode 100755
index 0000000..f5f92fd
--- /dev/null
+++ b/Tools/peg_generator/scripts/show_parse.py
@@ -0,0 +1,117 @@
+#!/usr/bin/env python3.8
+
+"""Show the parse tree for a given program, nicely formatted.
+
+Example:
+
+$ scripts/show_parse.py a+b
+Module(
+    body=[
+        Expr(
+            value=BinOp(
+                left=Name(id="a", ctx=Load()), op=Add(), right=Name(id="b", ctx=Load())
+            )
+        )
+    ],
+    type_ignores=[],
+)
+$
+
+Use -v to show line numbers and column offsets.
+
+The formatting is done using black.  You can also import this module
+and call one of its functions.
+"""
+
+import argparse
+import ast
+import difflib
+import os
+import sys
+import tempfile
+
+from typing import List
+
+parser = argparse.ArgumentParser()
+parser.add_argument(
+    "-d", "--diff", action="store_true", help="show diff between grammar and ast (requires -g)"
+)
+parser.add_argument("-g", "--grammar-file", help="grammar to use (default: use the ast module)")
+parser.add_argument(
+    "-m",
+    "--multiline",
+    action="store_true",
+    help="concatenate program arguments using newline instead of space",
+)
+parser.add_argument("-v", "--verbose", action="store_true", help="show line/column numbers")
+parser.add_argument("program", nargs="+", help="program to parse (will be concatenated)")
+
+
+def format_tree(tree: ast.AST, verbose: bool = False) -> str:
+    with tempfile.NamedTemporaryFile("w+") as tf:
+        tf.write(ast.dump(tree, include_attributes=verbose))
+        tf.write("\n")
+        tf.flush()
+        cmd = f"black -q {tf.name}"
+        sts = os.system(cmd)
+        if sts:
+            raise RuntimeError(f"Command {cmd!r} failed with status 0x{sts:x}")
+        tf.seek(0)
+        return tf.read()
+
+
+def diff_trees(a: ast.AST, b: ast.AST, verbose: bool = False) -> List[str]:
+    sa = format_tree(a, verbose)
+    sb = format_tree(b, verbose)
+    la = sa.splitlines()
+    lb = sb.splitlines()
+    return list(difflib.unified_diff(la, lb, "a", "b", lineterm=""))
+
+
+def show_parse(source: str, verbose: bool = False) -> str:
+    tree = ast.parse(source)
+    return format_tree(tree, verbose).rstrip("\n")
+
+
+def print_parse(source: str, verbose: bool = False) -> None:
+    print(show_parse(source, verbose))
+
+
+def main() -> None:
+    args = parser.parse_args()
+    if args.diff and not args.grammar_file:
+        parser.error("-d/--diff requires -g/--grammar-file")
+    if args.multiline:
+        sep = "\n"
+    else:
+        sep = " "
+    program = sep.join(args.program)
+    if args.grammar_file:
+        sys.path.insert(0, os.curdir)
+        from pegen.build import build_parser_and_generator
+
+        build_parser_and_generator(args.grammar_file, "peg_parser/parse.c", compile_extension=True)
+        from pegen.parse import parse_string  # type: ignore[import]
+
+        tree = parse_string(program, mode=1)
+
+        if args.diff:
+            a = tree
+            b = ast.parse(program)
+            diff = diff_trees(a, b, args.verbose)
+            if diff:
+                for line in diff:
+                    print(line)
+            else:
+                print("# Trees are the same")
+        else:
+            print(f"# Parsed using {args.grammar_file}")
+            print(format_tree(tree, args.verbose))
+    else:
+        tree = ast.parse(program)
+        print("# Parse using ast.parse()")
+        print(format_tree(tree, args.verbose))
+
+
+if __name__ == "__main__":
+    main()
diff --git a/Tools/peg_generator/scripts/test_parse_directory.py b/Tools/peg_generator/scripts/test_parse_directory.py
new file mode 100755
index 0000000..06a38fc
--- /dev/null
+++ b/Tools/peg_generator/scripts/test_parse_directory.py
@@ -0,0 +1,289 @@
+#!/usr/bin/env python3.8
+
+import argparse
+import ast
+import os
+import sys
+import tempfile
+import time
+import traceback
+from glob import glob
+from pathlib import PurePath
+
+from typing import List, Optional, Any
+
+sys.path.insert(0, os.getcwd())
+from pegen.build import build_parser_and_generator
+from pegen.testutil import print_memstats
+from scripts import show_parse
+
+SUCCESS = "\033[92m"
+FAIL = "\033[91m"
+ENDC = "\033[0m"
+
+argparser = argparse.ArgumentParser(
+    prog="test_parse_directory",
+    description="Helper program to test directories or files for pegen",
+)
+argparser.add_argument("-d", "--directory", help="Directory path containing files to test")
+argparser.add_argument("-g", "--grammar-file", help="Grammar file path")
+argparser.add_argument(
+    "-e", "--exclude", action="append", default=[], help="Glob(s) for matching files to exclude"
+)
+argparser.add_argument(
+    "-s", "--short", action="store_true", help="Only show errors, in a more Emacs-friendly format"
+)
+argparser.add_argument(
+    "-v", "--verbose", action="store_true", help="Display detailed errors for failures"
+)
+argparser.add_argument(
+    "--skip-actions", action="store_true", help="Suppress code emission for rule actions",
+)
+argparser.add_argument(
+    "-t", "--tree", action="count", help="Compare parse tree to official AST", default=0
+)
+
+
+def report_status(
+    succeeded: bool,
+    file: str,
+    verbose: bool,
+    error: Optional[Exception] = None,
+    short: bool = False,
+) -> None:
+    if short and succeeded:
+        return
+
+    if succeeded is True:
+        status = "OK"
+        COLOR = SUCCESS
+    else:
+        status = "Fail"
+        COLOR = FAIL
+
+    if short:
+        lineno = 0
+        offset = 0
+        if isinstance(error, SyntaxError):
+            lineno = error.lineno or 1
+            offset = error.offset or 1
+            message = error.args[0]
+        else:
+            message = f"{error.__class__.__name__}: {error}"
+        print(f"{file}:{lineno}:{offset}: {message}")
+    else:
+        print(f"{COLOR}{file:60} {status}{ENDC}")
+
+        if error and verbose:
+            print(f"  {str(error.__class__.__name__)}: {error}")
+
+
+def compare_trees(
+    actual_tree: ast.AST, file: str, verbose: bool, include_attributes: bool = False,
+) -> int:
+    with open(file) as f:
+        expected_tree = ast.parse(f.read())
+
+    expected_text = ast.dump(expected_tree, include_attributes=include_attributes)
+    actual_text = ast.dump(actual_tree, include_attributes=include_attributes)
+    if actual_text == expected_text:
+        if verbose:
+            print("Tree for {file}:")
+            print(show_parse.format_tree(actual_tree, include_attributes))
+        return 0
+
+    print(f"Diffing ASTs for {file} ...")
+
+    expected = show_parse.format_tree(expected_tree, include_attributes)
+    actual = show_parse.format_tree(actual_tree, include_attributes)
+
+    if verbose:
+        print("Expected for {file}:")
+        print(expected)
+        print("Actual for {file}:")
+        print(actual)
+        print(f"Diff for {file}:")
+
+    diff = show_parse.diff_trees(expected_tree, actual_tree, include_attributes)
+    for line in diff:
+        print(line)
+
+    return 1
+
+
+def parse_directory(
+    directory: str,
+    grammar_file: str,
+    verbose: bool,
+    excluded_files: List[str],
+    skip_actions: bool,
+    tree_arg: int,
+    short: bool,
+    extension: Any,
+    mode: int,
+    parser: str,
+) -> int:
+    if parser == "cpython" and (tree_arg or mode == 0):
+        print("Cannot specify tree argument or mode=0 with the cpython parser.", file=sys.stderr)
+        return 1
+
+    if not directory:
+        print("You must specify a directory of files to test.", file=sys.stderr)
+        return 1
+
+    if grammar_file:
+        if not os.path.exists(grammar_file):
+            print(f"The specified grammar file, {grammar_file}, does not exist.", file=sys.stderr)
+            return 1
+
+        try:
+            if not extension and parser == "pegen":
+                build_parser_and_generator(
+                    grammar_file,
+                    "peg_extension/parse.c",
+                    compile_extension=True,
+                    skip_actions=skip_actions,
+                )
+        except Exception as err:
+            print(
+                f"{FAIL}The following error occurred when generating the parser. Please check your grammar file.\n{ENDC}",
+                file=sys.stderr,
+            )
+            traceback.print_exception(err.__class__, err, None)
+
+            return 1
+
+    else:
+        print("A grammar file was not provided - attempting to use existing file...\n")
+
+    if parser == "pegen":
+        try:
+            from peg_extension import parse  # type: ignore
+        except:
+            print(
+                "An existing parser was not found. Please run `make` or specify a grammar file with the `-g` flag.",
+                file=sys.stderr,
+            )
+            return 1
+
+    # For a given directory, traverse files and attempt to parse each one
+    # - Output success/failure for each file
+    errors = 0
+    files = []
+    trees = {}  # Trees to compare (after everything else is done)
+
+    t0 = time.time()
+    for file in sorted(glob(f"{directory}/**/*.py", recursive=True)):
+        # Only attempt to parse Python files and files that are not excluded
+        should_exclude_file = False
+        for pattern in excluded_files:
+            if PurePath(file).match(pattern):
+                should_exclude_file = True
+                break
+
+        if not should_exclude_file:
+            try:
+                if tree_arg:
+                    mode = 1
+                if parser == "cpython":
+                    with open(file, "r") as f:
+                        source = f.read()
+                        if mode == 2:
+                            compile(source, file, "exec")
+                        elif mode == 1:
+                            ast.parse(source, file, "exec")
+                else:
+                    tree = parse.parse_file(file, mode=mode)
+                if tree_arg:
+                    trees[file] = tree
+                if not short:
+                    report_status(succeeded=True, file=file, verbose=verbose)
+            except Exception as error:
+                try:
+                    ast.parse(file)
+                except Exception:
+                    if not short:
+                        print(f"File {file} cannot be parsed by either pegen or the ast module.")
+                else:
+                    report_status(
+                        succeeded=False, file=file, verbose=verbose, error=error, short=short
+                    )
+                    errors += 1
+            files.append(file)
+    t1 = time.time()
+
+    total_seconds = t1 - t0
+    total_files = len(files)
+
+    total_bytes = 0
+    total_lines = 0
+    for file in files:
+        # Count lines and bytes separately
+        with open(file, "rb") as f:
+            total_lines += sum(1 for _ in f)
+            total_bytes += f.tell()
+
+    print(
+        f"Checked {total_files:,} files, {total_lines:,} lines,",
+        f"{total_bytes:,} bytes in {total_seconds:,.3f} seconds.",
+    )
+    if total_seconds > 0:
+        print(
+            f"That's {total_lines / total_seconds :,.0f} lines/sec,",
+            f"or {total_bytes / total_seconds :,.0f} bytes/sec.",
+        )
+
+    if parser == "pegen":
+        # Dump memo stats to @data.
+        with open("@data", "w") as datafile:
+            for i, count in enumerate(parse.get_memo_stats()):
+                if count:
+                    datafile.write(f"{i:4d} {count:9d}\n")
+
+    if short:
+        print_memstats()
+
+    if errors:
+        print(f"Encountered {errors} failures.", file=sys.stderr)
+
+    # Compare trees (the dict is empty unless -t is given)
+    compare_trees_errors = 0
+    for file, tree in trees.items():
+        if not short:
+            print("Comparing ASTs for", file)
+        if compare_trees(tree, file, verbose, tree_arg >= 2) == 1:
+            compare_trees_errors += 1
+
+    if errors or compare_trees_errors:
+        return 1
+
+    return 0
+
+
+def main() -> None:
+    args = argparser.parse_args()
+    directory = args.directory
+    grammar_file = args.grammar_file
+    verbose = args.verbose
+    excluded_files = args.exclude
+    skip_actions = args.skip_actions
+    tree = args.tree
+    short = args.short
+    sys.exit(
+        parse_directory(
+            directory,
+            grammar_file,
+            verbose,
+            excluded_files,
+            skip_actions,
+            tree,
+            short,
+            None,
+            0,
+            "pegen",
+        )
+    )
+
+
+if __name__ == "__main__":
+    main()
diff --git a/Tools/peg_generator/scripts/test_pypi_packages.py b/Tools/peg_generator/scripts/test_pypi_packages.py
new file mode 100755
index 0000000..9049033
--- /dev/null
+++ b/Tools/peg_generator/scripts/test_pypi_packages.py
@@ -0,0 +1,101 @@
+#!/usr/bin/env python3.8
+
+import argparse
+import os
+import glob
+import tarfile
+import zipfile
+import shutil
+import sys
+
+from typing import Generator, Any
+
+sys.path.insert(0, ".")
+from pegen import build
+from scripts import test_parse_directory
+
+argparser = argparse.ArgumentParser(
+    prog="test_pypi_packages", description="Helper program to test parsing PyPI packages",
+)
+argparser.add_argument(
+    "-t", "--tree", action="count", help="Compare parse tree to official AST", default=0
+)
+
+
+def get_packages() -> Generator[str, None, None]:
+    all_packages = (
+        glob.glob("./data/pypi/*.tar.gz")
+        + glob.glob("./data/pypi/*.zip")
+        + glob.glob("./data/pypi/*.tgz")
+    )
+    for package in all_packages:
+        yield package
+
+
+def extract_files(filename: str) -> None:
+    savedir = os.path.join("data", "pypi")
+    if tarfile.is_tarfile(filename):
+        tarfile.open(filename).extractall(savedir)
+    elif zipfile.is_zipfile(filename):
+        zipfile.ZipFile(filename).extractall(savedir)
+    else:
+        raise ValueError(f"Could not identify type of compressed file {filename}")
+
+
+def find_dirname(package_name: str) -> str:
+    for name in os.listdir(os.path.join("data", "pypi")):
+        full_path = os.path.join("data", "pypi", name)
+        if os.path.isdir(full_path) and name in package_name:
+            return full_path
+    assert False  # This is to fix mypy, should never be reached
+
+
+def run_tests(dirname: str, tree: int, extension: Any) -> int:
+    return test_parse_directory.parse_directory(
+        dirname,
+        "data/python.gram",
+        verbose=False,
+        excluded_files=[
+            "*/failset/*",
+            "*/failset/**",
+            "*/failset/**/*",
+            "*/test2to3/*",
+            "*/test2to3/**/*",
+            "*/bad*",
+            "*/lib2to3/tests/data/*",
+        ],
+        skip_actions=False,
+        tree_arg=tree,
+        short=True,
+        extension=extension,
+    )
+
+
+def main() -> None:
+    args = argparser.parse_args()
+    tree = args.tree
+
+    extension = build.build_parser_and_generator(
+        "data/python.gram", "peg_parser/parse.c", compile_extension=True
+    )
+    for package in get_packages():
+        print(f"Extracting files from {package}... ", end="")
+        try:
+            extract_files(package)
+            print("Done")
+        except ValueError as e:
+            print(e)
+            continue
+
+        print(f"Trying to parse all python files ... ")
+        dirname = find_dirname(package)
+        status = run_tests(dirname, tree, extension)
+        if status == 0:
+            print("Done")
+            shutil.rmtree(dirname)
+        else:
+            print(f"Failed to parse {dirname}")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/Tools/scripts/run_tests.py b/Tools/scripts/run_tests.py
index 3c1c3bd..bcfa5e9 100644
--- a/Tools/scripts/run_tests.py
+++ b/Tools/scripts/run_tests.py
@@ -25,8 +25,10 @@
             '-u',                 # Unbuffered stdout and stderr
             '-W', 'default',      # Warnings set to 'default'
             '-bb',                # Warnings about bytes/bytearray
-            '-E',                 # Ignore environment variables
             ]
+    if 'PYTHONOLDPARSER' not in os.environ:
+        args.append('-E')         # Ignore environment variables
+
     # Allow user-specified interpreter options to override our defaults.
     args.extend(test.support.args_from_interpreter_flags())