bpo-32819: Simplify and improve ssl.match_hostname (#5620)

ssl.match_hostname() has been simplified and no longer depends on re and
ipaddress module for wildcard and IP addresses. Error reporting for invalid
wildcards has been improved.

Signed-off-by: Christian Heimes <christian@python.org>
diff --git a/Lib/ssl.py b/Lib/ssl.py
index f253769..ecdbb70 100644
--- a/Lib/ssl.py
+++ b/Lib/ssl.py
@@ -90,8 +90,6 @@
 ALERT_DESCRIPTION_UNKNOWN_PSK_IDENTITY
 """
 
-import ipaddress
-import re
 import sys
 import os
 from collections import namedtuple
@@ -160,6 +158,7 @@
 
 from socket import socket, AF_INET, SOCK_STREAM, create_connection
 from socket import SOL_SOCKET, SO_TYPE
+import socket as _socket
 import base64        # for DER-to-PEM translation
 import errno
 import warnings
@@ -183,55 +182,75 @@
 def _dnsname_match(dn, hostname):
     """Matching according to RFC 6125, section 6.4.3
 
-    http://tools.ietf.org/html/rfc6125#section-6.4.3
+    - Hostnames are compared lower case.
+    - For IDNA, both dn and hostname must be encoded as IDN A-label (ACE).
+    - Partial wildcards like 'www*.example.org', multiple wildcards, sole
+      wildcard or wildcards in labels other then the left-most label are not
+      supported and a CertificateError is raised.
+    - A wildcard must match at least one character.
     """
-    pats = []
     if not dn:
         return False
 
-    leftmost, *remainder = dn.split(r'.')
-
-    wildcards = leftmost.count('*')
-    if wildcards == 1 and len(leftmost) > 1:
-        # Only match wildcard in leftmost segment.
-        raise CertificateError(
-            "wildcard can only be present in the leftmost segment: " + repr(dn))
-
-    if wildcards > 1:
-        # Issue #17980: avoid denials of service by refusing more
-        # than one wildcard per fragment.  A survey of established
-        # policy among SSL implementations showed it to be a
-        # reasonable choice.
-        raise CertificateError(
-            "too many wildcards in certificate DNS name: " + repr(dn))
-
+    wildcards = dn.count('*')
     # speed up common case w/o wildcards
     if not wildcards:
         return dn.lower() == hostname.lower()
 
-    # RFC 6125, section 6.4.3, subitem 1.
-    # The client SHOULD NOT attempt to match a presented identifier in which
-    # the wildcard character comprises a label other than the left-most label.
-    if leftmost == '*':
-        # When '*' is a fragment by itself, it matches a non-empty dotless
-        # fragment.
-        pats.append('[^.]+')
-    elif leftmost.startswith('xn--') or hostname.startswith('xn--'):
-        # RFC 6125, section 6.4.3, subitem 3.
-        # The client SHOULD NOT attempt to match a presented identifier
-        # where the wildcard character is embedded within an A-label or
-        # U-label of an internationalized domain name.
-        pats.append(re.escape(leftmost))
-    else:
-        # Otherwise, '*' matches any dotless string, e.g. www*
-        pats.append(re.escape(leftmost).replace(r'\*', '[^.]*'))
+    if wildcards > 1:
+        raise CertificateError(
+            "too many wildcards in certificate DNS name: {!r}.".format(dn))
 
-    # add the remaining fragments, ignore any wildcards
-    for frag in remainder:
-        pats.append(re.escape(frag))
+    dn_leftmost, sep, dn_remainder = dn.partition('.')
 
-    pat = re.compile(r'\A' + r'\.'.join(pats) + r'\Z', re.IGNORECASE)
-    return pat.match(hostname)
+    if '*' in dn_remainder:
+        # Only match wildcard in leftmost segment.
+        raise CertificateError(
+            "wildcard can only be present in the leftmost label: "
+            "{!r}.".format(dn))
+
+    if not sep:
+        # no right side
+        raise CertificateError(
+            "sole wildcard without additional labels are not support: "
+            "{!r}.".format(dn))
+
+    if dn_leftmost != '*':
+        # no partial wildcard matching
+        raise CertificateError(
+            "partial wildcards in leftmost label are not supported: "
+            "{!r}.".format(dn))
+
+    hostname_leftmost, sep, hostname_remainder = hostname.partition('.')
+    if not hostname_leftmost or not sep:
+        # wildcard must match at least one char
+        return False
+    return dn_remainder.lower() == hostname_remainder.lower()
+
+
+def _inet_paton(ipname):
+    """Try to convert an IP address to packed binary form
+
+    Supports IPv4 addresses on all platforms and IPv6 on platforms with IPv6
+    support.
+    """
+    # inet_aton() also accepts strings like '1'
+    if ipname.count('.') == 3:
+        try:
+            return _socket.inet_aton(ipname)
+        except OSError:
+            pass
+
+    try:
+        return _socket.inet_pton(_socket.AF_INET6, ipname)
+    except OSError:
+        raise ValueError("{!r} is neither an IPv4 nor an IP6 "
+                         "address.".format(ipname))
+    except AttributeError:
+        # AF_INET6 not available
+        pass
+
+    raise ValueError("{!r} is not an IPv4 address.".format(ipname))
 
 
 def _ipaddress_match(ipname, host_ip):
@@ -241,14 +260,19 @@
     (section 1.7.2 - "Out of Scope").
     """
     # OpenSSL may add a trailing newline to a subjectAltName's IP address
-    ip = ipaddress.ip_address(ipname.rstrip())
+    ip = _inet_paton(ipname.rstrip())
     return ip == host_ip
 
 
 def match_hostname(cert, hostname):
     """Verify that *cert* (in decoded format as returned by
     SSLSocket.getpeercert()) matches the *hostname*.  RFC 2818 and RFC 6125
-    rules are followed, but IP addresses are not accepted for *hostname*.
+    rules are followed.
+
+    The function matches IP addresses rather than dNSNames if hostname is a
+    valid ipaddress string. IPv4 addresses are supported on all platforms.
+    IPv6 addresses are supported on platforms with IPv6 support (AF_INET6
+    and inet_pton).
 
     CertificateError is raised on failure. On success, the function
     returns nothing.
@@ -258,7 +282,7 @@
                          "SSL socket or SSL context with either "
                          "CERT_OPTIONAL or CERT_REQUIRED")
     try:
-        host_ip = ipaddress.ip_address(hostname)
+        host_ip = _inet_paton(hostname)
     except ValueError:
         # Not an IP address (common case)
         host_ip = None