bpo-42955: Add Python/module_names.h (GH-24258)

Add a private list of all stdlib modules: _Py_module_names.

* Add Tools/scripts/generate_module_names.py script.
* Makefile: Add "make regen-module-names" command.
* setup.py: Add --list-module-names option.
* GitHub Action and Travis CI also runs "make regen-module-names",
  not ony "make regen-all", to ensure that the module names remains
  up to date.
diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
index 6f05310..6a41b51 100644
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@@ -63,6 +63,7 @@
           # Build Python with the libpython dynamic library
           ./configure --with-pydebug --enable-shared
           make -j4 regen-all
+          make regen-module-names
       - name: Check for changes
         run: |
           changes=$(git status --porcelain)
diff --git a/.travis.yml b/.travis.yml
index 547d919..c908891 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -172,6 +172,7 @@
   - eval "$(pyenv init -)"
   - pyenv global 3.8
   - PYTHON_FOR_REGEN=python3.8 make -j4 regen-all
+  - make regen-module-names
   - changes=`git status --porcelain`
   - |
       # Check for changes in regenerated files
diff --git a/Makefile.pre.in b/Makefile.pre.in
index 5605a88..fa0b9d8 100644
--- a/Makefile.pre.in
+++ b/Makefile.pre.in
@@ -252,7 +252,7 @@
 BUILDPYTHON=	python$(BUILDEXE)
 
 PYTHON_FOR_REGEN?=@PYTHON_FOR_REGEN@
-UPDATE_FILE=@PYTHON_FOR_REGEN@ $(srcdir)/Tools/scripts/update_file.py
+UPDATE_FILE=$(PYTHON_FOR_REGEN) $(srcdir)/Tools/scripts/update_file.py
 PYTHON_FOR_BUILD=@PYTHON_FOR_BUILD@
 _PYTHON_HOST_PLATFORM=@_PYTHON_HOST_PLATFORM@
 BUILD_GNU_TYPE=	@build@
@@ -757,6 +757,8 @@
 regen-all: regen-opcode regen-opcode-targets regen-typeslots \
 	regen-token regen-ast regen-keyword regen-importlib clinic \
 	regen-pegen-metaparser regen-pegen
+	@echo
+	@echo "Note: make regen-module-names and autoconf should be run manually"
 
 ############################################################################
 # Special rules for object files
@@ -896,6 +898,15 @@
 		$(srcdir)/Lib/keyword.py.new
 	$(UPDATE_FILE) $(srcdir)/Lib/keyword.py $(srcdir)/Lib/keyword.py.new
 
+.PHONY: regen-module-names
+regen-module-names: build_all
+	# Regenerate Python/module_names.h
+	# using Tools/scripts/generate_module_names.py
+	$(RUNSHARED) ./$(BUILDPYTHON) \
+		$(srcdir)/Tools/scripts/generate_module_names.py \
+		> $(srcdir)/Python/module_names.h.new
+	$(UPDATE_FILE) $(srcdir)/Python/module_names.h $(srcdir)/Python/module_names.h.new
+
 Python/compile.o Python/symtable.o Python/ast_unparse.o Python/ast.o Python/future.o: $(srcdir)/Include/Python-ast.h
 
 Python/getplatform.o: $(srcdir)/Python/getplatform.c
@@ -1145,7 +1156,9 @@
 		$(srcdir)/Include/internal/pycore_ucnhash.h \
 		$(srcdir)/Include/internal/pycore_unionobject.h \
 		$(srcdir)/Include/internal/pycore_warnings.h \
-		$(DTRACE_HEADERS)
+		$(DTRACE_HEADERS) \
+		\
+		$(srcdir)/Python/module_names.h
 
 $(LIBRARY_OBJS) $(MODOBJS) Programs/python.o: $(PYTHON_HEADERS)
 
diff --git a/PCbuild/pythoncore.vcxproj b/PCbuild/pythoncore.vcxproj
index fd27dea..6a260da 100644
--- a/PCbuild/pythoncore.vcxproj
+++ b/PCbuild/pythoncore.vcxproj
@@ -291,6 +291,7 @@
     <ClInclude Include="..\Python\ceval_gil.h" />
     <ClInclude Include="..\Python\condvar.h" />
     <ClInclude Include="..\Python\importdl.h" />
+    <ClInclude Include="..\Python\module_names.h" />
     <ClInclude Include="..\Python\thread_nt.h" />
     <ClInclude Include="..\Python\wordcode_helpers.h" />
   </ItemGroup>
diff --git a/PCbuild/pythoncore.vcxproj.filters b/PCbuild/pythoncore.vcxproj.filters
index 75a653d..98e3ca2 100644
--- a/PCbuild/pythoncore.vcxproj.filters
+++ b/PCbuild/pythoncore.vcxproj.filters
@@ -360,6 +360,9 @@
     <ClInclude Include="..\Python\importdl.h">
       <Filter>Python</Filter>
     </ClInclude>
+    <ClInclude Include="..\Python\module_names.h">
+      <Filter>Python</Filter>
+    </ClInclude>
     <ClInclude Include="..\Python\thread_nt.h">
       <Filter>Python</Filter>
     </ClInclude>
diff --git a/Python/module_names.h b/Python/module_names.h
new file mode 100644
index 0000000..533a732
--- /dev/null
+++ b/Python/module_names.h
@@ -0,0 +1,336 @@
+// Auto-generated by Tools/scripts/generate_module_names.py.
+
+static const char* _Py_module_names[] = {
+
+// Built-in modules
+"_abc",
+"_ast",
+"_codecs",
+"_collections",
+"_functools",
+"_imp",
+"_io",
+"_locale",
+"_operator",
+"_signal",
+"_sre",
+"_stat",
+"_string",
+"_symtable",
+"_thread",
+"_tracemalloc",
+"_warnings",
+"_weakref",
+"atexit",
+"builtins",
+"errno",
+"faulthandler",
+"gc",
+"itertools",
+"marshal",
+"posix",
+"pwd",
+"sys",
+"time",
+
+// Pure Python modules (Lib/*.py)
+"__future__",
+"abc",
+"aifc",
+"antigravity",
+"argparse",
+"ast",
+"asynchat",
+"asyncore",
+"base64",
+"bdb",
+"binhex",
+"bisect",
+"bz2",
+"cProfile",
+"calendar",
+"cgi",
+"cgitb",
+"chunk",
+"cmd",
+"code",
+"codecs",
+"codeop",
+"colorsys",
+"compileall",
+"configparser",
+"contextlib",
+"contextvars",
+"copy",
+"copyreg",
+"crypt",
+"csv",
+"dataclasses",
+"datetime",
+"decimal",
+"difflib",
+"dis",
+"doctest",
+"enum",
+"filecmp",
+"fileinput",
+"fnmatch",
+"fractions",
+"ftplib",
+"functools",
+"genericpath",
+"getopt",
+"getpass",
+"gettext",
+"glob",
+"graphlib",
+"gzip",
+"hashlib",
+"heapq",
+"hmac",
+"imaplib",
+"imghdr",
+"imp",
+"inspect",
+"io",
+"ipaddress",
+"keyword",
+"linecache",
+"locale",
+"lzma",
+"mailbox",
+"mailcap",
+"mimetypes",
+"modulefinder",
+"netrc",
+"nntplib",
+"ntpath",
+"nturl2path",
+"numbers",
+"opcode",
+"operator",
+"optparse",
+"os",
+"pathlib",
+"pdb",
+"pickle",
+"pickletools",
+"pipes",
+"pkgutil",
+"platform",
+"plistlib",
+"poplib",
+"posixpath",
+"pprint",
+"profile",
+"pstats",
+"pty",
+"py_compile",
+"pyclbr",
+"pydoc",
+"queue",
+"quopri",
+"random",
+"re",
+"reprlib",
+"rlcompleter",
+"runpy",
+"sched",
+"secrets",
+"selectors",
+"shelve",
+"shlex",
+"shutil",
+"signal",
+"site",
+"smtpd",
+"smtplib",
+"sndhdr",
+"socket",
+"socketserver",
+"sre_compile",
+"sre_constants",
+"sre_parse",
+"ssl",
+"stat",
+"statistics",
+"string",
+"stringprep",
+"struct",
+"subprocess",
+"sunau",
+"symtable",
+"sysconfig",
+"tabnanny",
+"tarfile",
+"telnetlib",
+"tempfile",
+"textwrap",
+"this",
+"threading",
+"timeit",
+"token",
+"tokenize",
+"trace",
+"traceback",
+"tracemalloc",
+"tty",
+"turtle",
+"types",
+"typing",
+"uu",
+"uuid",
+"warnings",
+"wave",
+"weakref",
+"webbrowser",
+"xdrlib",
+"zipapp",
+"zipfile",
+"zipimport",
+
+// Packages and sub-packages
+"asyncio",
+"collections",
+"concurrent",
+"concurrent.futures",
+"ctypes",
+"ctypes.macholib",
+"curses",
+"dbm",
+"distutils",
+"distutils.command",
+"email",
+"email.mime",
+"encodings",
+"ensurepip",
+"ensurepip._bundled",
+"html",
+"http",
+"idlelib",
+"importlib",
+"json",
+"lib2to3",
+"lib2to3.fixes",
+"lib2to3.pgen2",
+"logging",
+"msilib",
+"multiprocessing",
+"multiprocessing.dummy",
+"pydoc_data",
+"sqlite3",
+"tkinter",
+"turtledemo",
+"unittest",
+"urllib",
+"venv",
+"wsgiref",
+"xml",
+"xml.dom",
+"xml.etree",
+"xml.parsers",
+"xml.sax",
+"xmlrpc",
+"zoneinfo",
+
+// Extension modules built by setup.py
+"_asyncio",
+"_bisect",
+"_blake2",
+"_bz2",
+"_codecs_cn",
+"_codecs_hk",
+"_codecs_iso2022",
+"_codecs_jp",
+"_codecs_kr",
+"_codecs_tw",
+"_contextvars",
+"_crypt",
+"_csv",
+"_ctypes",
+"_curses",
+"_curses_panel",
+"_datetime",
+"_dbm",
+"_decimal",
+"_elementtree",
+"_gdbm",
+"_hashlib",
+"_heapq",
+"_json",
+"_lsprof",
+"_lzma",
+"_md5",
+"_multibytecodec",
+"_multiprocessing",
+"_opcode",
+"_pickle",
+"_posixshmem",
+"_posixsubprocess",
+"_queue",
+"_random",
+"_sha1",
+"_sha256",
+"_sha3",
+"_sha512",
+"_socket",
+"_sqlite3",
+"_ssl",
+"_statistics",
+"_struct",
+"_tkinter",
+"_uuid",
+"_xxsubinterpreters",
+"_zoneinfo",
+"array",
+"audioop",
+"binascii",
+"cmath",
+"fcntl",
+"grp",
+"math",
+"mmap",
+"nis",
+"ossaudiodev",
+"pyexpat",
+"readline",
+"resource",
+"select",
+"spwd",
+"syslog",
+"termios",
+"unicodedata",
+"zlib",
+
+// Built-in and extension modules built by Modules/Setup
+"_abc",
+"_codecs",
+"_collections",
+"_functools",
+"_io",
+"_locale",
+"_operator",
+"_signal",
+"_sre",
+"_stat",
+"_symtable",
+"_thread",
+"_tracemalloc",
+"_weakref",
+"atexit",
+"errno",
+"faulthandler",
+"itertools",
+"posix",
+"pwd",
+"time",
+
+// Windows extension modules
+"_msi",
+"_winapi",
+"msvcrt",
+"nt",
+"winreg",
+"winsound",
+
+};
diff --git a/Tools/scripts/generate_module_names.py b/Tools/scripts/generate_module_names.py
new file mode 100644
index 0000000..985a1a5
--- /dev/null
+++ b/Tools/scripts/generate_module_names.py
@@ -0,0 +1,200 @@
+# This script lists the names of standard library modules
+# to update Python/module_names.h
+import os.path
+import re
+import subprocess
+import sys
+import sysconfig
+
+
+SRC_DIR = os.path.dirname(os.path.dirname(os.path.dirname(__file__)))
+STDLIB_PATH = os.path.join(SRC_DIR, 'Lib')
+MODULES_SETUP = os.path.join(SRC_DIR, 'Modules', 'Setup')
+SETUP_PY = os.path.join(SRC_DIR, 'setup.py')
+
+IGNORE = {
+    '__init__',
+    '__pycache__',
+    'site-packages',
+
+    # Helper modules of public modules.
+    # For example, sysconfig uses _osx_support.
+    '_aix_support',
+    '_collections_abc',
+    '_compat_pickle',
+    '_compression',
+    '_markupbase',
+    '_osx_support',
+    '_sitebuiltins',
+    '_strptime',
+    '_threading_local',
+    '_weakrefset',
+
+    # Used to bootstrap setup.py
+    '_bootsubprocess',
+
+    # pure Python implementation
+    '_py_abc',
+    '_pydecimal',
+    '_pyio',
+
+    # test modules
+    '__phello__.foo',
+    '_ctypes_test',
+    '_testbuffer',
+    '_testcapi',
+    '_testconsole',
+    '_testimportmultiple',
+    '_testinternalcapi',
+    '_testmultiphase',
+    '_xxtestfuzz',
+    'distutils.tests',
+    'idlelib.idle_test',
+    'lib2to3.tests',
+    'test',
+    'xxlimited',
+    'xxlimited_35',
+    'xxsubtype',
+}
+
+# Windows extension modules
+WINDOWS_MODULES = (
+    '_msi',
+    '_testconsole',
+    '_winapi',
+    'msvcrt',
+    'nt',
+    'winreg',
+    'winsound'
+)
+
+
+def write_comment(fp, comment):
+    print(f"// {comment}", file=fp)
+
+
+def write_modules(fp, names):
+    for name in sorted(names):
+        if name in IGNORE:
+            continue
+        print(f'"{name}",', file=fp)
+    print(file=fp)
+
+
+def list_builtin_modules(fp):
+    write_comment(fp, "Built-in modules")
+    write_modules(fp, sys.builtin_module_names)
+
+
+# Pure Python modules (Lib/*.py)
+def list_python_modules(fp):
+    write_comment(fp, "Pure Python modules (Lib/*.py)")
+    names = []
+    for filename in os.listdir(STDLIB_PATH):
+        if not filename.endswith(".py"):
+            continue
+        name = filename.removesuffix(".py")
+        names.append(name)
+    write_modules(fp, names)
+
+
+def _list_sub_packages(path, names, parent=None):
+    for name in os.listdir(path):
+        package_path = os.path.join(path, name)
+        if name in IGNORE:
+            continue
+        if not os.path.isdir(package_path):
+            continue
+        if not any(package_file.endswith(".py")
+                   for package_file in os.listdir(package_path)):
+            continue
+        if parent:
+            qualname = f"{parent}.{name}"
+        else:
+            qualname = name
+        if qualname in IGNORE:
+            continue
+        names.append(qualname)
+        _list_sub_packages(package_path, names, qualname)
+
+
+# Packages and sub-packages
+def list_packages(fp):
+    write_comment(fp, "Packages and sub-packages")
+    names = []
+    _list_sub_packages(STDLIB_PATH, names)
+    write_modules(fp, names)
+
+
+# Windows extensions
+def list_windows_extensions(fp):
+    write_comment(fp, "Windows extension modules")
+    write_modules(fp, WINDOWS_MODULES)
+
+
+# Extension modules built by setup.py
+def list_setup(fp):
+    cmd = [sys.executable, SETUP_PY, "-q", "build", "--list-module-names"]
+    output = subprocess.check_output(cmd)
+    output = output.decode("utf8")
+    names = output.splitlines()
+
+    write_comment(fp, "Extension modules built by setup.py")
+    write_modules(fp, names)
+
+
+# Built-in and extension modules built by Modules/Setup
+def list_modules_setup(fp):
+    assign_var = re.compile("^[A-Z]+=")
+
+    names = []
+    with open(MODULES_SETUP, encoding="utf-8") as modules_fp:
+        for line in modules_fp:
+            # Strip comment
+            line = line.partition("#")[0]
+            line = line.rstrip()
+            if not line:
+                continue
+            if assign_var.match(line):
+                # Ignore "VAR=VALUE"
+                continue
+            if line in ("*disabled*", "*shared*"):
+                continue
+            parts = line.split()
+            if len(parts) < 2:
+                continue
+            # "errno errnomodule.c" => write "errno"
+            name = parts[0]
+            names.append(name)
+
+    write_comment(fp, "Built-in and extension modules built by Modules/Setup")
+    write_modules(fp, names)
+
+
+def list_modules(fp):
+    print("// Auto-generated by Tools/scripts/generate_module_names.py.", file=fp)
+    print(file=fp)
+    print("static const char* _Py_module_names[] = {", file=fp)
+    print(file=fp)
+
+    list_builtin_modules(fp)
+    list_python_modules(fp)
+    list_packages(fp)
+    list_setup(fp)
+    list_modules_setup(fp)
+    list_windows_extensions(fp)
+
+    print("};", file=fp)
+
+
+def main():
+    if not sysconfig.is_python_build():
+        print(f"ERROR: {sys.executable} is not a Python build",
+              file=sys.stderr)
+        sys.exit(1)
+
+    list_modules(sys.stdout)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/setup.py b/setup.py
index ddc0bd0..a4d21d4 100644
--- a/setup.py
+++ b/setup.py
@@ -46,6 +46,9 @@
 # This global variable is used to hold the list of modules to be disabled.
 DISABLED_MODULE_LIST = []
 
+# --list-module-names option used by Tools/scripts/generate_module_names.py
+LIST_MODULE_NAMES = False
+
 
 def get_platform():
     # Cross compiling
@@ -447,12 +450,20 @@ def build_extensions(self):
         # Detect which modules should be compiled
         self.detect_modules()
 
-        self.remove_disabled()
+        if not LIST_MODULE_NAMES:
+            self.remove_disabled()
 
         self.update_sources_depends()
         mods_built, mods_disabled = self.remove_configured_extensions()
         self.set_compiler_executables()
 
+        if LIST_MODULE_NAMES:
+            for ext in self.extensions:
+                print(ext.name)
+            for name in self.missing:
+                print(name)
+            return
+
         build_ext.build_extensions(self)
 
         if SUBPROCESS_BOOTSTRAP:
@@ -1118,6 +1129,7 @@ def detect_crypt(self):
             # bpo-31904: crypt() function is not provided by VxWorks.
             # DES_crypt() OpenSSL provides is too weak to implement
             # the encryption.
+            self.missing.append('_crypt')
             return
 
         if self.compiler.find_library_file(self.lib_dirs, 'crypt'):
@@ -1125,8 +1137,7 @@ def detect_crypt(self):
         else:
             libs = []
 
-        self.add(Extension('_crypt', ['_cryptmodule.c'],
-                               libraries=libs))
+        self.add(Extension('_crypt', ['_cryptmodule.c'], libraries=libs))
 
     def detect_socket(self):
         # socket(2)
@@ -1735,27 +1746,29 @@ def detect_multiprocessing(self):
         if MS_WINDOWS:
             multiprocessing_srcs = ['_multiprocessing/multiprocessing.c',
                                     '_multiprocessing/semaphore.c']
-
         else:
             multiprocessing_srcs = ['_multiprocessing/multiprocessing.c']
             if (sysconfig.get_config_var('HAVE_SEM_OPEN') and not
                 sysconfig.get_config_var('POSIX_SEMAPHORES_NOT_ENABLED')):
                 multiprocessing_srcs.append('_multiprocessing/semaphore.c')
-            if (sysconfig.get_config_var('HAVE_SHM_OPEN') and
-                sysconfig.get_config_var('HAVE_SHM_UNLINK')):
-                posixshmem_srcs = ['_multiprocessing/posixshmem.c']
-                libs = []
-                if sysconfig.get_config_var('SHM_NEEDS_LIBRT'):
-                    # need to link with librt to get shm_open()
-                    libs.append('rt')
-                self.add(Extension('_posixshmem', posixshmem_srcs,
-                                   define_macros={},
-                                   libraries=libs,
-                                   include_dirs=["Modules/_multiprocessing"]))
-
         self.add(Extension('_multiprocessing', multiprocessing_srcs,
                            include_dirs=["Modules/_multiprocessing"]))
 
+        if (not MS_WINDOWS and
+           sysconfig.get_config_var('HAVE_SHM_OPEN') and
+           sysconfig.get_config_var('HAVE_SHM_UNLINK')):
+            posixshmem_srcs = ['_multiprocessing/posixshmem.c']
+            libs = []
+            if sysconfig.get_config_var('SHM_NEEDS_LIBRT'):
+                # need to link with librt to get shm_open()
+                libs.append('rt')
+            self.add(Extension('_posixshmem', posixshmem_srcs,
+                               define_macros={},
+                               libraries=libs,
+                               include_dirs=["Modules/_multiprocessing"]))
+        else:
+            self.missing.append('_posixshmem')
+
     def detect_uuid(self):
         # Build the _uuid module if possible
         uuid_incs = find_file("uuid.h", self.inc_dirs, ["/usr/include/uuid"])
@@ -2549,6 +2562,12 @@ def copy_scripts(self):
 
 
 def main():
+    global LIST_MODULE_NAMES
+
+    if "--list-module-names" in sys.argv:
+        LIST_MODULE_NAMES = True
+        sys.argv.remove("--list-module-names")
+
     set_compiler_flags('CFLAGS', 'PY_CFLAGS_NODIST')
     set_compiler_flags('LDFLAGS', 'PY_LDFLAGS_NODIST')