[3.8] bpo-39667: Sync zipp 3.0 (GH-18540) (GH-18701)
* bpo-39667: Sync zipp 3.0 (GH-18540)
* bpo-39667: Improve pathlib.Path compatibility on zipfile.Path and correct performance degradation as found in zipp 3.0
* 📜🤖 Added by blurb_it.
* Update docs for new zipfile.Path.open
* Rely on dict, faster than OrderedDict.
* Syntax edits on docs
Co-authored-by: blurb-it[bot] <43283697+blurb-it[bot]@users.noreply.github.com>
(cherry picked from commit 0aeab5c4381f0cc11479362af2533b3a391312ac)
Co-authored-by: Jason R. Coombs <jaraco@jaraco.com>
* Clarify the change in behavior with a couple of workaround options.
* Restore API compatibility while backporting performance improvements.
Co-authored-by: Jason R. Coombs <jaraco@jaraco.com>
diff --git a/Lib/zipfile.py b/Lib/zipfile.py
index 5dc6516..07faacc 100644
--- a/Lib/zipfile.py
+++ b/Lib/zipfile.py
@@ -17,7 +17,6 @@
import threading
import time
import contextlib
-from collections import OrderedDict
try:
import zlib # We may need its compression method
@@ -2125,24 +2124,6 @@
return (fname, archivename)
-def _unique_everseen(iterable, key=None):
- "List unique elements, preserving order. Remember all elements ever seen."
- # unique_everseen('AAAABBBCCDAABBB') --> A B C D
- # unique_everseen('ABBCcAD', str.lower) --> A B C D
- seen = set()
- seen_add = seen.add
- if key is None:
- for element in itertools.filterfalse(seen.__contains__, iterable):
- seen_add(element)
- yield element
- else:
- for element in iterable:
- k = key(element)
- if k not in seen:
- seen_add(k)
- yield element
-
-
def _parents(path):
"""
Given a path with elements separated by
@@ -2184,6 +2165,18 @@
path, tail = posixpath.split(path)
+_dedupe = dict.fromkeys
+"""Deduplicate an iterable in original order"""
+
+
+def _difference(minuend, subtrahend):
+ """
+ Return items in minuend not in subtrahend, retaining order
+ with O(1) lookup.
+ """
+ return itertools.filterfalse(set(subtrahend).__contains__, minuend)
+
+
class CompleteDirs(ZipFile):
"""
A ZipFile subclass that ensures that implied directories
@@ -2193,13 +2186,8 @@
@staticmethod
def _implied_dirs(names):
parents = itertools.chain.from_iterable(map(_parents, names))
- # Deduplicate entries in original order
- implied_dirs = OrderedDict.fromkeys(
- p + posixpath.sep for p in parents
- # Cast names to a set for O(1) lookups
- if p + posixpath.sep not in set(names)
- )
- return implied_dirs
+ as_dirs = (p + posixpath.sep for p in parents)
+ return _dedupe(_difference(as_dirs, names))
def namelist(self):
names = super(CompleteDirs, self).namelist()