bpo-43788: Generate version specific _ssl_data.h (GH-25300)



Signed-off-by: Christian Heimes <christian@python.org>

Automerge-Triggered-By: GH:tiran
diff --git a/Tools/ssl/make_ssl_data.py b/Tools/ssl/make_ssl_data.py
index 1dc234f..ab1134e 100755
--- a/Tools/ssl/make_ssl_data.py
+++ b/Tools/ssl/make_ssl_data.py
@@ -6,95 +6,129 @@
 
 It takes two arguments:
 - the path to the OpenSSL source tree (e.g. git checkout)
-- the path to the C file to be generated
-  (probably Modules/_ssl_data.h)
+- the path to the header file to be generated Modules/_ssl_data_{version}.h
+- error codes are version specific
 """
 
+import argparse
 import datetime
-import glob
+import operator
 import os
 import re
 import sys
-import _ssl
 
 
-def parse_error_codes(h_file, prefix, libcode):
-    pat = re.compile(r"#\s*define\W+(%s([\w]+))\W+(\d+)\b" % re.escape(prefix))
-    codes = []
-    with open(h_file, "r", encoding="latin1") as f:
+parser = argparse.ArgumentParser(
+    description="Generate ssl_data.h from OpenSSL sources"
+)
+parser.add_argument("srcdir", help="OpenSSL source directory")
+parser.add_argument(
+    "output", nargs="?", type=argparse.FileType("w"), default=sys.stdout
+)
+
+
+def _file_search(fname, pat):
+    with open(fname, encoding="utf-8") as f:
         for line in f:
             match = pat.search(line)
-            if match:
-                code, name, num = match.groups()
-                num = int(num)
-                # e.g. ("SSL_R_BAD_DATA", ("ERR_LIB_SSL", "BAD_DATA", 390))
-                codes.append((code, (libcode, name, num)))
-    assert codes, f"no codes found in {h_file}"
-    return codes
+            if match is not None:
+                yield match
+
+
+def parse_err_h(args):
+    """Parse err codes, e.g. ERR_LIB_X509: 11"""
+    pat = re.compile(r"#\s*define\W+ERR_LIB_(\w+)\s+(\d+)")
+    lib2errnum = {}
+    for match in _file_search(args.err_h, pat):
+        libname, num = match.groups()
+        lib2errnum[libname] = int(num)
+
+    return lib2errnum
+
+
+def parse_openssl_error_text(args):
+    """Parse error reasons, X509_R_AKID_MISMATCH"""
+    # ignore backslash line continuation for now
+    pat = re.compile(r"^((\w+?)_R_(\w+)):(\d+):")
+    for match in _file_search(args.errtxt, pat):
+        reason, libname, errname, num = match.groups()
+        if "_F_" in reason:
+            # ignore function codes
+            continue
+        num = int(num)
+        yield reason, libname, errname, num
+
+
+def parse_extra_reasons(args):
+    """Parse extra reasons from openssl.ec"""
+    pat = re.compile(r"^R\s+((\w+)_R_(\w+))\s+(\d+)")
+    for match in _file_search(args.errcodes, pat):
+        reason, libname, errname, num = match.groups()
+        num = int(num)
+        yield reason, libname, errname, num
+
+
+def gen_library_codes(args):
+    """Generate table short libname to numeric code"""
+    yield "static struct py_ssl_library_code library_codes[] = {"
+    for libname in sorted(args.lib2errnum):
+        yield f"#ifdef ERR_LIB_{libname}"
+        yield f'    {{"{libname}", ERR_LIB_{libname}}},'
+        yield "#endif"
+    yield "    { NULL }"
+    yield "};"
+    yield ""
+
+
+def gen_error_codes(args):
+    """Generate error code table for error reasons"""
+    yield "static struct py_ssl_error_code error_codes[] = {"
+    for reason, libname, errname, num in args.reasons:
+        yield f"  #ifdef {reason}"
+        yield f'    {{"{errname}", ERR_LIB_{libname}, {reason}}},'
+        yield "  #else"
+        yield f'    {{"{errname}", {args.lib2errnum[libname]}, {num}}},'
+        yield "  #endif"
+
+    yield "    { NULL }"
+    yield "};"
+    yield ""
+
+
+def main():
+    args = parser.parse_args()
+
+    args.err_h = os.path.join(args.srcdir, "include", "openssl", "err.h")
+    if not os.path.isfile(args.err_h):
+        # Fall back to infile for OpenSSL 3.0.0
+        args.err_h += ".in"
+    args.errcodes = os.path.join(args.srcdir, "crypto", "err", "openssl.ec")
+    args.errtxt = os.path.join(args.srcdir, "crypto", "err", "openssl.txt")
+
+    if not os.path.isfile(args.errtxt):
+        parser.error(f"File {args.errtxt} not found in srcdir\n.")
+
+    # {X509: 11, ...}
+    args.lib2errnum = parse_err_h(args)
+
+    # [('X509_R_AKID_MISMATCH', 'X509', 'AKID_MISMATCH', 110), ...]
+    reasons = []
+    reasons.extend(parse_openssl_error_text(args))
+    reasons.extend(parse_extra_reasons(args))
+    # sort by libname, numeric error code
+    args.reasons = sorted(reasons, key=operator.itemgetter(0, 3))
+
+    lines = [
+        "/* File generated by Tools/ssl/make_ssl_data.py */"
+        f"/* Generated on {datetime.datetime.utcnow().isoformat()} */"
+    ]
+    lines.extend(gen_library_codes(args))
+    lines.append("")
+    lines.extend(gen_error_codes(args))
+
+    for line in lines:
+        args.output.write(line + "\n")
+
 
 if __name__ == "__main__":
-    openssl_inc = sys.argv[1]
-    outfile = sys.argv[2]
-    use_stdout = outfile == '-'
-    f = sys.stdout if use_stdout else open(outfile, "w")
-    # mnemonic -> (library code, error prefix, header file)
-    error_libraries = {}
-    for error_header in glob.glob(os.path.join(glob.escape(openssl_inc), 'include/openssl/*err.h')):
-        base = os.path.basename(error_header)
-        if base in ('buffererr.h', 'objectserr.h', 'storeerr.h'):
-            # Deprecated in 3.0.
-            continue
-        mnemonic = base[:-5].upper()
-        if mnemonic == "":
-            # err.h
-            lib_codes = {
-                code: num
-                for (code, (_, _, num)) in parse_error_codes(error_header, 'ERR_LIB_', None)
-            }
-        else:
-            error_libraries[mnemonic] = (f'ERR_LIB_{mnemonic}', f'{mnemonic}_R_', error_header)
-
-    # Read codes from libraries
-    new_codes = []
-    for libcode, prefix, h_file in sorted(error_libraries.values()):
-        new_codes += parse_error_codes(h_file, prefix, libcode)
-    new_code_nums = set((libcode, num)
-                        for (code, (libcode, name, num)) in new_codes)
-
-    # Merge with existing codes (in case some old codes disappeared).
-    codes = {}
-    for errname, (libnum, errnum) in _ssl.err_names_to_codes.items():
-        lib = error_libraries[_ssl.lib_codes_to_names[libnum]]
-        libcode = lib[0]              # e.g. ERR_LIB_PEM
-        errcode = lib[1] + errname    # e.g. SSL_R_BAD_SSL_SESSION_ID_LENGTH
-        # Only keep it if the numeric codes weren't reused
-        if (libcode, errnum) not in new_code_nums:
-            codes[errcode] = libcode, errname, errnum
-    codes.update(dict(new_codes))
-
-    def w(l):
-        f.write(l + "\n")
-    w("/* File generated by Tools/ssl/make_ssl_data.py */")
-    w("/* Generated on %s */" % datetime.datetime.now().isoformat())
-    w("")
-
-    w("static struct py_ssl_library_code library_codes[] = {")
-    for mnemo, (libcode, _, _) in sorted(error_libraries.items()):
-        w(f'#ifdef {libcode}')
-        w('    {"%s", %s},' % (mnemo, libcode))
-        w('#endif')
-    w('    { NULL }')
-    w('};')
-    w("")
-
-    w("static struct py_ssl_error_code error_codes[] = {")
-    for errcode, (libcode, name, num) in sorted(codes.items()):
-        w('  #ifdef %s' % (errcode))
-        w('    {"%s", %s, %s},' % (name, libcode, errcode))
-        w('  #else')
-        w('    {"%s", %s, %d},' % (name, lib_codes[libcode], num))
-        w('  #endif')
-    w('    { NULL }')
-    w('};')
-    if not use_stdout:
-        f.close()
+    main()
diff --git a/Tools/ssl/multissltests.py b/Tools/ssl/multissltests.py
index 598503f..dd8d211 100755
--- a/Tools/ssl/multissltests.py
+++ b/Tools/ssl/multissltests.py
@@ -49,7 +49,7 @@
 
 OPENSSL_RECENT_VERSIONS = [
     "1.1.1k",
-    # "3.0.0-alpha12"
+    # "3.0.0-alpha14"
 ]
 
 LIBRESSL_OLD_VERSIONS = [