bpo-42382: In importlib.metadata, `EntryPoint` objects now expose `dist` (#23758) * bpo-42382: In importlib.metadata, `EntryPoint` objects now expose a `.dist` object referencing the `Distribution` when constructed from a `Distribution`. Also, sync importlib_metadata 3.3: - Add support for package discovery under package normalization rules. - The object returned by `metadata()` now has a formally-defined protocol called `PackageMetadata` with declared support for the `.get_all()` method. * Add blurb * Remove latent footnote.

commit: dfdca85dfa64e72df385b3a486f85b773fc0f135 [log] [tgz]
author: Jason R. Coombs <jaraco@jaraco.com> Thu Dec 31 12:56:43 2020 -0500
committer: GitHub <noreply@github.com> Thu Dec 31 12:56:43 2020 -0500
tree: f035325cbc5e8787d8e7824bdd7ad4edbe42e795
parent: f4936ad1c4d0ae1948e428aeddc7d3096252dae4 [diff] [blame]
diff --git a/Lib/importlib/metadata.py b/Lib/importlib/metadata.py
index 302d61d..36bb42e 100644
--- a/Lib/importlib/metadata.py
+++ b/Lib/importlib/metadata.py

@@ -1,4 +1,3 @@
-import io
 import os
 import re
 import abc
@@ -18,6 +17,7 @@
 from importlib import import_module
 from importlib.abc import MetaPathFinder
 from itertools import starmap
+from typing import Any, List, Optional, Protocol, TypeVar, Union
 
 
 __all__ = [
@@ -31,7 +31,7 @@
     'metadata',
     'requires',
     'version',
-    ]
+]
 
 
 class PackageNotFoundError(ModuleNotFoundError):
@@ -43,7 +43,7 @@ def __str__(self):
 
     @property
     def name(self):
-        name, = self.args
+        (name,) = self.args
         return name
 
 
@@ -60,7 +60,7 @@ class EntryPoint(
         r'(?P<module>[\w.]+)\s*'
         r'(:\s*(?P<attr>[\w.]+))?\s*'
         r'(?P<extras>\[.*\])?\s*$'
-        )
+    )
     """
     A regular expression describing the syntax for an entry point,
     which might look like:
@@ -77,6 +77,8 @@ class EntryPoint(
     following the attr, and following any extras.
     """
 
+    dist: Optional['Distribution'] = None
+
     def load(self):
         """Load the entry point from its definition. If only a module
         is indicated by the value, return that module. Otherwise,
@@ -104,23 +106,27 @@ def extras(self):
 
     @classmethod
     def _from_config(cls, config):
-        return [
+        return (
             cls(name, value, group)
             for group in config.sections()
             for name, value in config.items(group)
-            ]
+        )
 
     @classmethod
     def _from_text(cls, text):
         config = ConfigParser(delimiters='=')
         # case sensitive: https://stackoverflow.com/q/1611799/812183
         config.optionxform = str
-        try:
-            config.read_string(text)
-        except AttributeError:  # pragma: nocover
-            # Python 2 has no read_string
-            config.readfp(io.StringIO(text))
-        return EntryPoint._from_config(config)
+        config.read_string(text)
+        return cls._from_config(config)
+
+    @classmethod
+    def _from_text_for(cls, text, dist):
+        return (ep._for(dist) for ep in cls._from_text(text))
+
+    def _for(self, dist):
+        self.dist = dist
+        return self
 
     def __iter__(self):
         """
@@ -132,7 +138,7 @@ def __reduce__(self):
         return (
             self.__class__,
             (self.name, self.value, self.group),
-            )
+        )
 
 
 class PackagePath(pathlib.PurePosixPath):
@@ -159,6 +165,25 @@ def __repr__(self):
         return '<FileHash mode: {} value: {}>'.format(self.mode, self.value)
 
 
+_T = TypeVar("_T")
+
+
+class PackageMetadata(Protocol):
+    def __len__(self) -> int:
+        ...  # pragma: no cover
+
+    def __contains__(self, item: str) -> bool:
+        ...  # pragma: no cover
+
+    def __getitem__(self, key: str) -> str:
+        ...  # pragma: no cover
+
+    def get_all(self, name: str, failobj: _T = ...) -> Union[List[Any], _T]:
+        """
+        Return all values associated with a possibly multi-valued key.
+        """
+
+
 class Distribution:
     """A Python distribution package."""
 
@@ -210,9 +235,8 @@ def discover(cls, **kwargs):
             raise ValueError("cannot accept context and kwargs")
         context = context or DistributionFinder.Context(**kwargs)
         return itertools.chain.from_iterable(
-            resolver(context)
-            for resolver in cls._discover_resolvers()
-            )
+            resolver(context) for resolver in cls._discover_resolvers()
+        )
 
     @staticmethod
     def at(path):
@@ -227,24 +251,24 @@ def at(path):
     def _discover_resolvers():
         """Search the meta_path for resolvers."""
         declared = (
-            getattr(finder, 'find_distributions', None)
-            for finder in sys.meta_path
-            )
+            getattr(finder, 'find_distributions', None) for finder in sys.meta_path
+        )
         return filter(None, declared)
 
     @classmethod
     def _local(cls, root='.'):
         from pep517 import build, meta
+
         system = build.compat_system(root)
         builder = functools.partial(
             meta.build,
             source_dir=root,
             system=system,
-            )
+        )
         return PathDistribution(zipfile.Path(meta.build_as_zip(builder)))
 
     @property
-    def metadata(self):
+    def metadata(self) -> PackageMetadata:
         """Return the parsed metadata for this Distribution.
 
         The returned object will have keys that name the various bits of
@@ -257,17 +281,22 @@ def metadata(self):
             # effect is to just end up using the PathDistribution's self._path
             # (which points to the egg-info file) attribute unchanged.
             or self.read_text('')
-            )
+        )
         return email.message_from_string(text)
 
     @property
+    def name(self):
+        """Return the 'Name' metadata for the distribution package."""
+        return self.metadata['Name']
+
+    @property
     def version(self):
         """Return the 'Version' metadata for the distribution package."""
         return self.metadata['Version']
 
     @property
     def entry_points(self):
-        return EntryPoint._from_text(self.read_text('entry_points.txt'))
+        return list(EntryPoint._from_text_for(self.read_text('entry_points.txt'), self))
 
     @property
     def files(self):
@@ -324,9 +353,10 @@ def _deps_from_requires_text(cls, source):
         section_pairs = cls._read_sections(source.splitlines())
         sections = {
             section: list(map(operator.itemgetter('line'), results))
-            for section, results in
-            itertools.groupby(section_pairs, operator.itemgetter('section'))
-            }
+            for section, results in itertools.groupby(
+                section_pairs, operator.itemgetter('section')
+            )
+        }
         return cls._convert_egg_info_reqs_to_simple_reqs(sections)
 
     @staticmethod
@@ -350,6 +380,7 @@ def _convert_egg_info_reqs_to_simple_reqs(sections):
         requirement. This method converts the former to the
         latter. See _test_deps_from_requires_text for an example.
         """
+
         def make_condition(name):
             return name and 'extra == "{name}"'.format(name=name)
 
@@ -438,48 +469,69 @@ def zip_children(self):
         names = zip_path.root.namelist()
         self.joinpath = zip_path.joinpath
 
-        return dict.fromkeys(
-            child.split(posixpath.sep, 1)[0]
-            for child in names
-            )
-
-    def is_egg(self, search):
-        base = self.base
-        return (
-            base == search.versionless_egg_name
-            or base.startswith(search.prefix)
-            and base.endswith('.egg'))
+        return dict.fromkeys(child.split(posixpath.sep, 1)[0] for child in names)
 
     def search(self, name):
-        for child in self.children():
-            n_low = child.lower()
-            if (n_low in name.exact_matches
-                    or n_low.startswith(name.prefix)
-                    and n_low.endswith(name.suffixes)
-                    # legacy case:
-                    or self.is_egg(name) and n_low == 'egg-info'):
-                yield self.joinpath(child)
+        return (
+            self.joinpath(child)
+            for child in self.children()
+            if name.matches(child, self.base)
+        )
 
 
 class Prepared:
     """
     A prepared search for metadata on a possibly-named package.
     """
-    normalized = ''
-    prefix = ''
+
+    normalized = None
     suffixes = '.dist-info', '.egg-info'
     exact_matches = [''][:0]
-    versionless_egg_name = ''
 
     def __init__(self, name):
         self.name = name
         if name is None:
             return
-        self.normalized = name.lower().replace('-', '_')
-        self.prefix = self.normalized + '-'
-        self.exact_matches = [
-            self.normalized + suffix for suffix in self.suffixes]
-        self.versionless_egg_name = self.normalized + '.egg'
+        self.normalized = self.normalize(name)
+        self.exact_matches = [self.normalized + suffix for suffix in self.suffixes]
+
+    @staticmethod
+    def normalize(name):
+        """
+        PEP 503 normalization plus dashes as underscores.
+        """
+        return re.sub(r"[-_.]+", "-", name).lower().replace('-', '_')
+
+    @staticmethod
+    def legacy_normalize(name):
+        """
+        Normalize the package name as found in the convention in
+        older packaging tools versions and specs.
+        """
+        return name.lower().replace('-', '_')
+
+    def matches(self, cand, base):
+        low = cand.lower()
+        pre, ext = os.path.splitext(low)
+        name, sep, rest = pre.partition('-')
+        return (
+            low in self.exact_matches
+            or ext in self.suffixes
+            and (not self.normalized or name.replace('.', '_') == self.normalized)
+            # legacy case:
+            or self.is_egg(base)
+            and low == 'egg-info'
+        )
+
+    def is_egg(self, base):
+        normalized = self.legacy_normalize(self.name or '')
+        prefix = normalized + '-' if normalized else ''
+        versionless_egg_name = normalized + '.egg' if self.name else ''
+        return (
+            base == versionless_egg_name
+            or base.startswith(prefix)
+            and base.endswith('.egg')
+        )
 
 
 class MetadataPathFinder(DistributionFinder):
@@ -500,9 +552,8 @@ def find_distributions(cls, context=DistributionFinder.Context()):
     def _search_paths(cls, name, paths):
         """Find metadata directories in paths heuristically."""
         return itertools.chain.from_iterable(
-            path.search(Prepared(name))
-            for path in map(FastPath, paths)
-            )
+            path.search(Prepared(name)) for path in map(FastPath, paths)
+        )
 
 
 class PathDistribution(Distribution):
@@ -515,9 +566,15 @@ def __init__(self, path):
         self._path = path
 
     def read_text(self, filename):
-        with suppress(FileNotFoundError, IsADirectoryError, KeyError,
-                      NotADirectoryError, PermissionError):
+        with suppress(
+            FileNotFoundError,
+            IsADirectoryError,
+            KeyError,
+            NotADirectoryError,
+            PermissionError,
+        ):
             return self._path.joinpath(filename).read_text(encoding='utf-8')
+
     read_text.__doc__ = Distribution.read_text.__doc__
 
     def locate_file(self, path):
@@ -541,11 +598,11 @@ def distributions(**kwargs):
     return Distribution.discover(**kwargs)
 
 
-def metadata(distribution_name):
+def metadata(distribution_name) -> PackageMetadata:
     """Get the metadata for the named package.
 
     :param distribution_name: The name of the distribution package to query.
-    :return: An email.Message containing the parsed metadata.
+    :return: A PackageMetadata containing the parsed metadata.
     """
     return Distribution.from_name(distribution_name).metadata
 
@@ -565,15 +622,11 @@ def entry_points():
 
     :return: EntryPoint objects for all installed packages.
     """
-    eps = itertools.chain.from_iterable(
-        dist.entry_points for dist in distributions())
+    eps = itertools.chain.from_iterable(dist.entry_points for dist in distributions())
     by_group = operator.attrgetter('group')
     ordered = sorted(eps, key=by_group)
     grouped = itertools.groupby(ordered, by_group)
-    return {
-        group: tuple(eps)
-        for group, eps in grouped
-        }
+    return {group: tuple(eps) for group, eps in grouped}
 
 
 def files(distribution_name):
commit	dfdca85dfa64e72df385b3a486f85b773fc0f135	[log] [tgz]
author	Jason R. Coombs <jaraco@jaraco.com>	Thu Dec 31 12:56:43 2020 -0500
committer	GitHub <noreply@github.com>	Thu Dec 31 12:56:43 2020 -0500
tree	f035325cbc5e8787d8e7824bdd7ad4edbe42e795
parent	f4936ad1c4d0ae1948e428aeddc7d3096252dae4 [diff] [blame]