bpo-41486: Faster bz2/lzma/zlib via new output buffering (GH-21740)
Faster bz2/lzma/zlib via new output buffering.
Also adds .readall() function to _compression.DecompressReader class
to take best advantage of this in the consume-all-output at once scenario.
Often a 5-20% speedup in common scenarios due to less data copying.
Contributed by Ma Lin.
diff --git a/Lib/_compression.py b/Lib/_compression.py
index b00f31b..e8b70aa 100644
--- a/Lib/_compression.py
+++ b/Lib/_compression.py
@@ -1,7 +1,7 @@
"""Internal classes used by the gzip, lzma and bz2 modules"""
import io
-
+import sys
BUFFER_SIZE = io.DEFAULT_BUFFER_SIZE # Compressed data read chunk size
@@ -110,6 +110,16 @@ def read(self, size=-1):
self._pos += len(data)
return data
+ def readall(self):
+ chunks = []
+ # sys.maxsize means the max length of output buffer is unlimited,
+ # so that the whole input buffer can be decompressed within one
+ # .decompress() call.
+ while data := self.read(sys.maxsize):
+ chunks.append(data)
+
+ return b"".join(chunks)
+
# Rewind the file to the beginning of the data stream.
def _rewind(self):
self._fp.seek(0)