bpo-41486: Faster bz2/lzma/zlib via new output buffering (GH-21740)

Faster bz2/lzma/zlib via new output buffering.
Also adds .readall() function to _compression.DecompressReader class
to take best advantage of this in the consume-all-output at once scenario.

Often a 5-20% speedup in common scenarios due to less data copying.

Contributed by Ma Lin.
diff --git a/Modules/_bz2module.c b/Modules/_bz2module.c
index bfcdac6..9893a63 100644
--- a/Modules/_bz2module.c
+++ b/Modules/_bz2module.c
@@ -8,6 +8,59 @@
 #include <bzlib.h>
 #include <stdio.h>
 
+// Blocks output buffer wrappers
+#include "pycore_blocks_output_buffer.h"
+
+#if OUTPUT_BUFFER_MAX_BLOCK_SIZE > UINT32_MAX
+    #error "The maximum block size accepted by libbzip2 is UINT32_MAX."
+#endif
+
+/* On success, return value >= 0
+   On failure, return -1 */
+static inline Py_ssize_t
+Buffer_InitAndGrow(_BlocksOutputBuffer *buffer, Py_ssize_t max_length,
+                   char **next_out, uint32_t *avail_out)
+{
+    Py_ssize_t allocated;
+
+    allocated = _BlocksOutputBuffer_InitAndGrow(
+                    buffer, max_length, (void**) next_out);
+    *avail_out = (uint32_t) allocated;
+    return allocated;
+}
+
+/* On success, return value >= 0
+   On failure, return -1 */
+static inline Py_ssize_t
+Buffer_Grow(_BlocksOutputBuffer *buffer,
+            char **next_out, uint32_t *avail_out)
+{
+    Py_ssize_t allocated;
+
+    allocated = _BlocksOutputBuffer_Grow(
+                    buffer, (void**) next_out, (Py_ssize_t) *avail_out);
+    *avail_out = (uint32_t) allocated;
+    return allocated;
+}
+
+static inline Py_ssize_t
+Buffer_GetDataSize(_BlocksOutputBuffer *buffer, uint32_t avail_out)
+{
+    return _BlocksOutputBuffer_GetDataSize(buffer, (Py_ssize_t) avail_out);
+}
+
+static inline PyObject *
+Buffer_Finish(_BlocksOutputBuffer *buffer, uint32_t avail_out)
+{
+    return _BlocksOutputBuffer_Finish(buffer, (Py_ssize_t) avail_out);
+}
+
+static inline void
+Buffer_OnError(_BlocksOutputBuffer *buffer)
+{
+    _BlocksOutputBuffer_OnError(buffer);
+}
+
 
 #ifndef BZ_CONFIG_ERROR
 #define BZ2_bzCompress bzCompress
@@ -115,52 +168,22 @@ catch_bz2_error(int bzerror)
     }
 }
 
-#if BUFSIZ < 8192
-#define INITIAL_BUFFER_SIZE 8192
-#else
-#define INITIAL_BUFFER_SIZE BUFSIZ
-#endif
-
-static int
-grow_buffer(PyObject **buf, Py_ssize_t max_length)
-{
-    /* Expand the buffer by an amount proportional to the current size,
-       giving us amortized linear-time behavior. Use a less-than-double
-       growth factor to avoid excessive allocation. */
-    size_t size = PyBytes_GET_SIZE(*buf);
-    size_t new_size = size + (size >> 3) + 6;
-
-    if (max_length > 0 && new_size > (size_t) max_length)
-        new_size = (size_t) max_length;
-
-    if (new_size > size) {
-        return _PyBytes_Resize(buf, new_size);
-    } else {  /* overflow */
-        PyErr_SetString(PyExc_OverflowError,
-                        "Unable to allocate buffer - output too large");
-        return -1;
-    }
-}
-
 
 /* BZ2Compressor class. */
 
 static PyObject *
 compress(BZ2Compressor *c, char *data, size_t len, int action)
 {
-    size_t data_size = 0;
     PyObject *result;
+    _BlocksOutputBuffer buffer = {.list = NULL};
 
-    result = PyBytes_FromStringAndSize(NULL, INITIAL_BUFFER_SIZE);
-    if (result == NULL)
-        return NULL;
-
+    if (Buffer_InitAndGrow(&buffer, -1, &c->bzs.next_out, &c->bzs.avail_out) < 0) {
+        goto error;
+    }
     c->bzs.next_in = data;
     c->bzs.avail_in = 0;
-    c->bzs.next_out = PyBytes_AS_STRING(result);
-    c->bzs.avail_out = INITIAL_BUFFER_SIZE;
+
     for (;;) {
-        char *this_out;
         int bzerror;
 
         /* On a 64-bit system, len might not fit in avail_in (an unsigned int).
@@ -175,21 +198,15 @@ compress(BZ2Compressor *c, char *data, size_t len, int action)
             break;
 
         if (c->bzs.avail_out == 0) {
-            size_t buffer_left = PyBytes_GET_SIZE(result) - data_size;
-            if (buffer_left == 0) {
-                if (grow_buffer(&result, -1) < 0)
-                    goto error;
-                c->bzs.next_out = PyBytes_AS_STRING(result) + data_size;
-                buffer_left = PyBytes_GET_SIZE(result) - data_size;
+            if (Buffer_Grow(&buffer, &c->bzs.next_out, &c->bzs.avail_out) < 0) {
+                goto error;
             }
-            c->bzs.avail_out = (unsigned int)Py_MIN(buffer_left, UINT_MAX);
         }
 
         Py_BEGIN_ALLOW_THREADS
-        this_out = c->bzs.next_out;
         bzerror = BZ2_bzCompress(&c->bzs, action);
-        data_size += c->bzs.next_out - this_out;
         Py_END_ALLOW_THREADS
+
         if (catch_bz2_error(bzerror))
             goto error;
 
@@ -197,13 +214,14 @@ compress(BZ2Compressor *c, char *data, size_t len, int action)
         if (action == BZ_FINISH && bzerror == BZ_STREAM_END)
             break;
     }
-    if (data_size != (size_t)PyBytes_GET_SIZE(result))
-        if (_PyBytes_Resize(&result, data_size) < 0)
-            goto error;
-    return result;
+
+    result = Buffer_Finish(&buffer, c->bzs.avail_out);
+    if (result != NULL) {
+        return result;
+    }
 
 error:
-    Py_XDECREF(result);
+    Buffer_OnError(&buffer);
     return NULL;
 }
 
@@ -420,36 +438,29 @@ decompress_buf(BZ2Decompressor *d, Py_ssize_t max_length)
     /* data_size is strictly positive, but because we repeatedly have to
        compare against max_length and PyBytes_GET_SIZE we declare it as
        signed */
-    Py_ssize_t data_size = 0;
     PyObject *result;
+    _BlocksOutputBuffer buffer = {.list = NULL};
     bz_stream *bzs = &d->bzs;
 
-    if (max_length < 0 || max_length >= INITIAL_BUFFER_SIZE)
-        result = PyBytes_FromStringAndSize(NULL, INITIAL_BUFFER_SIZE);
-    else
-        result = PyBytes_FromStringAndSize(NULL, max_length);
-    if (result == NULL)
-        return NULL;
+    if (Buffer_InitAndGrow(&buffer, max_length, &bzs->next_out, &bzs->avail_out) < 0) {
+        goto error;
+    }
 
-    bzs->next_out = PyBytes_AS_STRING(result);
     for (;;) {
         int bzret;
-        size_t avail;
-
         /* On a 64-bit system, buffer length might not fit in avail_out, so we
            do decompression in chunks of no more than UINT_MAX bytes
            each. Note that the expression for `avail` is guaranteed to be
            positive, so the cast is safe. */
-        avail = (size_t) (PyBytes_GET_SIZE(result) - data_size);
-        bzs->avail_out = (unsigned int)Py_MIN(avail, UINT_MAX);
         bzs->avail_in = (unsigned int)Py_MIN(d->bzs_avail_in_real, UINT_MAX);
         d->bzs_avail_in_real -= bzs->avail_in;
 
         Py_BEGIN_ALLOW_THREADS
         bzret = BZ2_bzDecompress(bzs);
-        data_size = bzs->next_out - PyBytes_AS_STRING(result);
-        d->bzs_avail_in_real += bzs->avail_in;
         Py_END_ALLOW_THREADS
+
+        d->bzs_avail_in_real += bzs->avail_in;
+
         if (catch_bz2_error(bzret))
             goto error;
         if (bzret == BZ_STREAM_END) {
@@ -458,22 +469,21 @@ decompress_buf(BZ2Decompressor *d, Py_ssize_t max_length)
         } else if (d->bzs_avail_in_real == 0) {
             break;
         } else if (bzs->avail_out == 0) {
-            if (data_size == max_length)
+            if (Buffer_GetDataSize(&buffer, bzs->avail_out) == max_length)
                 break;
-            if (data_size == PyBytes_GET_SIZE(result) &&
-                grow_buffer(&result, max_length) == -1)
+            if (Buffer_Grow(&buffer, &bzs->next_out, &bzs->avail_out) < 0) {
                 goto error;
-            bzs->next_out = PyBytes_AS_STRING(result) + data_size;
+            }
         }
     }
-    if (data_size != PyBytes_GET_SIZE(result))
-        if (_PyBytes_Resize(&result, data_size) == -1)
-            goto error;
 
-    return result;
+    result = Buffer_Finish(&buffer, bzs->avail_out);
+    if (result != NULL) {
+        return result;
+    }
 
 error:
-    Py_XDECREF(result);
+    Buffer_OnError(&buffer);
     return NULL;
 }
 
@@ -668,7 +678,7 @@ static int
 _bz2_BZ2Decompressor___init__(PyObject *self, PyObject *args, PyObject *kwargs)
 {
     int return_value = -1;
-    
+
     if (!_PyArg_NoPositional("BZ2Decompressor", args)) {
         goto exit;
     }
diff --git a/Modules/_lzmamodule.c b/Modules/_lzmamodule.c
index b01f630..0d62319 100644
--- a/Modules/_lzmamodule.c
+++ b/Modules/_lzmamodule.c
@@ -15,6 +15,60 @@
 
 #include <lzma.h>
 
+// Blocks output buffer wrappers
+#include "pycore_blocks_output_buffer.h"
+
+#if OUTPUT_BUFFER_MAX_BLOCK_SIZE > SIZE_MAX
+    #error "The maximum block size accepted by liblzma is SIZE_MAX."
+#endif
+
+/* On success, return value >= 0
+   On failure, return -1 */
+static inline Py_ssize_t
+Buffer_InitAndGrow(_BlocksOutputBuffer *buffer, Py_ssize_t max_length,
+                   uint8_t **next_out, size_t *avail_out)
+{
+    Py_ssize_t allocated;
+
+    allocated = _BlocksOutputBuffer_InitAndGrow(
+                    buffer, max_length, (void**) next_out);
+    *avail_out = (size_t) allocated;
+    return allocated;
+}
+
+/* On success, return value >= 0
+   On failure, return -1 */
+static inline Py_ssize_t
+Buffer_Grow(_BlocksOutputBuffer *buffer,
+            uint8_t **next_out, size_t *avail_out)
+{
+    Py_ssize_t allocated;
+
+    allocated = _BlocksOutputBuffer_Grow(
+                    buffer, (void**) next_out, (Py_ssize_t) *avail_out);
+    *avail_out = (size_t) allocated;
+    return allocated;
+}
+
+static inline Py_ssize_t
+Buffer_GetDataSize(_BlocksOutputBuffer *buffer, size_t avail_out)
+{
+    return _BlocksOutputBuffer_GetDataSize(buffer, (Py_ssize_t) avail_out);
+}
+
+static inline PyObject *
+Buffer_Finish(_BlocksOutputBuffer *buffer, size_t avail_out)
+{
+    return _BlocksOutputBuffer_Finish(buffer, (Py_ssize_t) avail_out);
+}
+
+static inline void
+Buffer_OnError(_BlocksOutputBuffer *buffer)
+{
+    _BlocksOutputBuffer_OnError(buffer);
+}
+
+
 #define ACQUIRE_LOCK(obj) do { \
     if (!PyThread_acquire_lock((obj)->lock, 0)) { \
         Py_BEGIN_ALLOW_THREADS \
@@ -128,25 +182,6 @@ PyLzma_Free(void *opaque, void *ptr)
     PyMem_RawFree(ptr);
 }
 
-#if BUFSIZ < 8192
-#define INITIAL_BUFFER_SIZE 8192
-#else
-#define INITIAL_BUFFER_SIZE BUFSIZ
-#endif
-
-static int
-grow_buffer(PyObject **buf, Py_ssize_t max_length)
-{
-    Py_ssize_t size = PyBytes_GET_SIZE(*buf);
-    Py_ssize_t newsize = size + (size >> 3) + 6;
-
-    if (max_length > 0 && newsize > max_length) {
-        newsize = max_length;
-    }
-
-    return _PyBytes_Resize(buf, newsize);
-}
-
 
 /* Some custom type conversions for PyArg_ParseTupleAndKeywords(),
    since the predefined conversion specifiers do not suit our needs:
@@ -510,29 +545,27 @@ class lzma_filter_converter(CConverter):
 static PyObject *
 compress(Compressor *c, uint8_t *data, size_t len, lzma_action action)
 {
-    Py_ssize_t data_size = 0;
     PyObject *result;
+    _BlocksOutputBuffer buffer = {.list = NULL};
     _lzma_state *state = PyType_GetModuleState(Py_TYPE(c));
     assert(state != NULL);
 
-    result = PyBytes_FromStringAndSize(NULL, INITIAL_BUFFER_SIZE);
-    if (result == NULL) {
-        return NULL;
+    if (Buffer_InitAndGrow(&buffer, -1, &c->lzs.next_out, &c->lzs.avail_out) < 0) {
+        goto error;
     }
     c->lzs.next_in = data;
     c->lzs.avail_in = len;
-    c->lzs.next_out = (uint8_t *)PyBytes_AS_STRING(result);
-    c->lzs.avail_out = PyBytes_GET_SIZE(result);
+
     for (;;) {
         lzma_ret lzret;
 
         Py_BEGIN_ALLOW_THREADS
         lzret = lzma_code(&c->lzs, action);
-        data_size = (char *)c->lzs.next_out - PyBytes_AS_STRING(result);
+        Py_END_ALLOW_THREADS
+
         if (lzret == LZMA_BUF_ERROR && len == 0 && c->lzs.avail_out > 0) {
             lzret = LZMA_OK; /* That wasn't a real error */
         }
-        Py_END_ALLOW_THREADS
         if (catch_lzma_error(state, lzret)) {
             goto error;
         }
@@ -540,20 +573,19 @@ compress(Compressor *c, uint8_t *data, size_t len, lzma_action action)
             (action == LZMA_FINISH && lzret == LZMA_STREAM_END)) {
             break;
         } else if (c->lzs.avail_out == 0) {
-            if (grow_buffer(&result, -1) == -1)
+            if (Buffer_Grow(&buffer, &c->lzs.next_out, &c->lzs.avail_out) < 0) {
                 goto error;
-            c->lzs.next_out = (uint8_t *)PyBytes_AS_STRING(result) + data_size;
-            c->lzs.avail_out = PyBytes_GET_SIZE(result) - data_size;
+            }
         }
     }
-    if (data_size != PyBytes_GET_SIZE(result))
-        if (_PyBytes_Resize(&result, data_size) == -1) {
-            goto error;
-        }
-    return result;
+
+    result = Buffer_Finish(&buffer, c->lzs.avail_out);
+    if (result != NULL) {
+        return result;
+    }
 
 error:
-    Py_XDECREF(result);
+    Buffer_OnError(&buffer);
     return NULL;
 }
 
@@ -896,36 +928,26 @@ static PyType_Spec lzma_compressor_type_spec = {
 static PyObject*
 decompress_buf(Decompressor *d, Py_ssize_t max_length)
 {
-    Py_ssize_t data_size = 0;
     PyObject *result;
     lzma_stream *lzs = &d->lzs;
+    _BlocksOutputBuffer buffer = {.list = NULL};
     _lzma_state *state = PyType_GetModuleState(Py_TYPE(d));
     assert(state != NULL);
 
-    if (max_length < 0 || max_length >= INITIAL_BUFFER_SIZE) {
-        result = PyBytes_FromStringAndSize(NULL, INITIAL_BUFFER_SIZE);
+    if (Buffer_InitAndGrow(&buffer, max_length, &lzs->next_out, &lzs->avail_out) < 0) {
+        goto error;
     }
-    else {
-        result = PyBytes_FromStringAndSize(NULL, max_length);
-    }
-    if (result == NULL) {
-        return NULL;
-    }
-
-    lzs->next_out = (uint8_t *)PyBytes_AS_STRING(result);
-    lzs->avail_out = PyBytes_GET_SIZE(result);
 
     for (;;) {
         lzma_ret lzret;
 
         Py_BEGIN_ALLOW_THREADS
         lzret = lzma_code(lzs, LZMA_RUN);
-        data_size = (char *)lzs->next_out - PyBytes_AS_STRING(result);
+        Py_END_ALLOW_THREADS
+
         if (lzret == LZMA_BUF_ERROR && lzs->avail_in == 0 && lzs->avail_out > 0) {
             lzret = LZMA_OK; /* That wasn't a real error */
         }
-        Py_END_ALLOW_THREADS
-
         if (catch_lzma_error(state, lzret)) {
             goto error;
         }
@@ -940,28 +962,24 @@ decompress_buf(Decompressor *d, Py_ssize_t max_length)
                Maybe lzs's internal state still have a few bytes
                can be output, grow the output buffer and continue
                if max_lengh < 0. */
-            if (data_size == max_length) {
+            if (Buffer_GetDataSize(&buffer, lzs->avail_out) == max_length) {
                 break;
             }
-            if (grow_buffer(&result, max_length) == -1) {
+            if (Buffer_Grow(&buffer, &lzs->next_out, &lzs->avail_out) < 0) {
                 goto error;
             }
-            lzs->next_out = (uint8_t *)PyBytes_AS_STRING(result) + data_size;
-            lzs->avail_out = PyBytes_GET_SIZE(result) - data_size;
         } else if (lzs->avail_in == 0) {
             break;
         }
     }
-    if (data_size != PyBytes_GET_SIZE(result)) {
-        if (_PyBytes_Resize(&result, data_size) == -1) {
-            goto error;
-        }
+
+    result = Buffer_Finish(&buffer, lzs->avail_out);
+    if (result != NULL) {
+        return result;
     }
 
-    return result;
-
 error:
-    Py_XDECREF(result);
+    Buffer_OnError(&buffer);
     return NULL;
 }
 
@@ -1042,7 +1060,7 @@ decompress(Decompressor *d, uint8_t *data, size_t len, Py_ssize_t max_length)
                be output, try to output them next time. */
             d->needs_input = 0;
 
-            /* if max_length < 0, lzs->avail_out always > 0 */
+            /* If max_length < 0, lzs->avail_out always > 0 */
             assert(max_length >= 0);
         } else {
             /* Input buffer exhausted, output buffer has space. */
diff --git a/Modules/zlibmodule.c b/Modules/zlibmodule.c
index 1ddaefd..8a20dfc 100644
--- a/Modules/zlibmodule.c
+++ b/Modules/zlibmodule.c
@@ -9,6 +9,79 @@
 #include "structmember.h"         // PyMemberDef
 #include "zlib.h"
 
+// Blocks output buffer wrappers
+#include "pycore_blocks_output_buffer.h"
+
+#if OUTPUT_BUFFER_MAX_BLOCK_SIZE > UINT32_MAX
+    #error "The maximum block size accepted by zlib is UINT32_MAX."
+#endif
+
+/* On success, return value >= 0
+   On failure, return -1 */
+static inline Py_ssize_t
+Buffer_InitAndGrow(_BlocksOutputBuffer *buffer, Py_ssize_t max_length,
+                   Bytef **next_out, uint32_t *avail_out)
+{
+    Py_ssize_t allocated;
+
+    allocated = _BlocksOutputBuffer_InitAndGrow(
+                    buffer, max_length, (void**) next_out);
+    *avail_out = (uint32_t) allocated;
+    return allocated;
+}
+
+/* On success, return value >= 0
+   On failure, return -1 */
+static inline Py_ssize_t
+Buffer_InitWithSize(_BlocksOutputBuffer *buffer, Py_ssize_t init_size,
+                    Bytef **next_out, uint32_t *avail_out)
+{
+    Py_ssize_t allocated;
+
+    if (init_size < 0 || (size_t)init_size > UINT32_MAX) {
+        PyErr_SetString(PyExc_ValueError,
+                        "Initial buffer size should (0 <= size <= UINT32_MAX)");
+        return -1;
+    }
+
+    allocated = _BlocksOutputBuffer_InitWithSize(
+                    buffer, init_size, (void**) next_out);
+    *avail_out = (uint32_t) allocated;
+    return allocated;
+}
+
+/* On success, return value >= 0
+   On failure, return -1 */
+static inline Py_ssize_t
+Buffer_Grow(_BlocksOutputBuffer *buffer,
+            Bytef **next_out, uint32_t *avail_out)
+{
+    Py_ssize_t allocated;
+
+    allocated = _BlocksOutputBuffer_Grow(
+                    buffer, (void**) next_out, (Py_ssize_t) *avail_out);
+    *avail_out = (uint32_t) allocated;
+    return allocated;
+}
+
+static inline Py_ssize_t
+Buffer_GetDataSize(_BlocksOutputBuffer *buffer, uint32_t avail_out)
+{
+    return _BlocksOutputBuffer_GetDataSize(buffer, (Py_ssize_t) avail_out);
+}
+
+static inline PyObject *
+Buffer_Finish(_BlocksOutputBuffer *buffer, uint32_t avail_out)
+{
+    return _BlocksOutputBuffer_Finish(buffer, (Py_ssize_t) avail_out);
+}
+
+static inline void
+Buffer_OnError(_BlocksOutputBuffer *buffer)
+{
+    _BlocksOutputBuffer_OnError(buffer);
+}
+
 
 #define ENTER_ZLIB(obj) do {                      \
     if (!PyThread_acquire_lock((obj)->lock, 0)) { \
@@ -149,56 +222,6 @@ arrange_input_buffer(z_stream *zst, Py_ssize_t *remains)
     *remains -= zst->avail_in;
 }
 
-static Py_ssize_t
-arrange_output_buffer_with_maximum(z_stream *zst, PyObject **buffer,
-                                   Py_ssize_t length,
-                                   Py_ssize_t max_length)
-{
-    Py_ssize_t occupied;
-
-    if (*buffer == NULL) {
-        if (!(*buffer = PyBytes_FromStringAndSize(NULL, length)))
-            return -1;
-        occupied = 0;
-    }
-    else {
-        occupied = zst->next_out - (Byte *)PyBytes_AS_STRING(*buffer);
-
-        if (length == occupied) {
-            Py_ssize_t new_length;
-            assert(length <= max_length);
-            /* can not scale the buffer over max_length */
-            if (length == max_length)
-                return -2;
-            if (length <= (max_length >> 1))
-                new_length = length << 1;
-            else
-                new_length = max_length;
-            if (_PyBytes_Resize(buffer, new_length) < 0)
-                return -1;
-            length = new_length;
-        }
-    }
-
-    zst->avail_out = (uInt)Py_MIN((size_t)(length - occupied), UINT_MAX);
-    zst->next_out = (Byte *)PyBytes_AS_STRING(*buffer) + occupied;
-
-    return length;
-}
-
-static Py_ssize_t
-arrange_output_buffer(z_stream *zst, PyObject **buffer, Py_ssize_t length)
-{
-    Py_ssize_t ret;
-
-    ret = arrange_output_buffer_with_maximum(zst, buffer, length,
-                                             PY_SSIZE_T_MAX);
-    if (ret == -2)
-        PyErr_NoMemory();
-
-    return ret;
-}
-
 /*[clinic input]
 zlib.compress
 
@@ -215,16 +238,20 @@ static PyObject *
 zlib_compress_impl(PyObject *module, Py_buffer *data, int level)
 /*[clinic end generated code: output=d80906d73f6294c8 input=638d54b6315dbed3]*/
 {
-    PyObject *RetVal = NULL;
-    Py_ssize_t obuflen = DEF_BUF_SIZE;
+    PyObject *RetVal;
     int flush;
     z_stream zst;
+    _BlocksOutputBuffer buffer = {.list = NULL};
 
     zlibstate *state = get_zlib_state(module);
 
     Byte *ibuf = data->buf;
     Py_ssize_t ibuflen = data->len;
 
+    if (Buffer_InitAndGrow(&buffer, -1, &zst.next_out, &zst.avail_out) < 0) {
+        goto error;
+    }
+
     zst.opaque = NULL;
     zst.zalloc = PyZlib_Malloc;
     zst.zfree = PyZlib_Free;
@@ -252,10 +279,11 @@ zlib_compress_impl(PyObject *module, Py_buffer *data, int level)
         flush = ibuflen == 0 ? Z_FINISH : Z_NO_FLUSH;
 
         do {
-            obuflen = arrange_output_buffer(&zst, &RetVal, obuflen);
-            if (obuflen < 0) {
-                deflateEnd(&zst);
-                goto error;
+            if (zst.avail_out == 0) {
+                if (Buffer_Grow(&buffer, &zst.next_out, &zst.avail_out) < 0) {
+                    deflateEnd(&zst);
+                    goto error;
+                }
             }
 
             Py_BEGIN_ALLOW_THREADS
@@ -276,15 +304,16 @@ zlib_compress_impl(PyObject *module, Py_buffer *data, int level)
 
     err = deflateEnd(&zst);
     if (err == Z_OK) {
-        if (_PyBytes_Resize(&RetVal, zst.next_out -
-                            (Byte *)PyBytes_AS_STRING(RetVal)) < 0)
+        RetVal = Buffer_Finish(&buffer, zst.avail_out);
+        if (RetVal == NULL) {
             goto error;
+        }
         return RetVal;
     }
     else
         zlib_error(state, zst, err, "while finishing compression");
  error:
-    Py_XDECREF(RetVal);
+    Buffer_OnError(&buffer);
     return NULL;
 }
 
@@ -307,11 +336,12 @@ zlib_decompress_impl(PyObject *module, Py_buffer *data, int wbits,
                      Py_ssize_t bufsize)
 /*[clinic end generated code: output=77c7e35111dc8c42 input=a9ac17beff1f893f]*/
 {
-    PyObject *RetVal = NULL;
+    PyObject *RetVal;
     Byte *ibuf;
     Py_ssize_t ibuflen;
     int err, flush;
     z_stream zst;
+    _BlocksOutputBuffer buffer = {.list = NULL};
 
     zlibstate *state = get_zlib_state(module);
 
@@ -322,6 +352,10 @@ zlib_decompress_impl(PyObject *module, Py_buffer *data, int wbits,
         bufsize = 1;
     }
 
+    if (Buffer_InitWithSize(&buffer, bufsize, &zst.next_out, &zst.avail_out) < 0) {
+        goto error;
+    }
+
     ibuf = data->buf;
     ibuflen = data->len;
 
@@ -350,10 +384,11 @@ zlib_decompress_impl(PyObject *module, Py_buffer *data, int wbits,
         flush = ibuflen == 0 ? Z_FINISH : Z_NO_FLUSH;
 
         do {
-            bufsize = arrange_output_buffer(&zst, &RetVal, bufsize);
-            if (bufsize < 0) {
-                inflateEnd(&zst);
-                goto error;
+            if (zst.avail_out == 0) {
+                if (Buffer_Grow(&buffer, &zst.next_out, &zst.avail_out) < 0) {
+                    inflateEnd(&zst);
+                    goto error;
+                }
             }
 
             Py_BEGIN_ALLOW_THREADS
@@ -393,14 +428,13 @@ zlib_decompress_impl(PyObject *module, Py_buffer *data, int wbits,
         goto error;
     }
 
-    if (_PyBytes_Resize(&RetVal, zst.next_out -
-                        (Byte *)PyBytes_AS_STRING(RetVal)) < 0)
-        goto error;
-
-    return RetVal;
+    RetVal = Buffer_Finish(&buffer, zst.avail_out);
+    if (RetVal != NULL) {
+        return RetVal;
+    }
 
  error:
-    Py_XDECREF(RetVal);
+    Buffer_OnError(&buffer);
     return NULL;
 }
 
@@ -633,9 +667,9 @@ zlib_Compress_compress_impl(compobject *self, PyTypeObject *cls,
                             Py_buffer *data)
 /*[clinic end generated code: output=6731b3f0ff357ca6 input=04d00f65ab01d260]*/
 {
-    PyObject *RetVal = NULL;
-    Py_ssize_t obuflen = DEF_BUF_SIZE;
+    PyObject *RetVal;
     int err;
+    _BlocksOutputBuffer buffer = {.list = NULL};
     zlibstate *state = PyType_GetModuleState(cls);
 
     ENTER_ZLIB(self);
@@ -643,13 +677,18 @@ zlib_Compress_compress_impl(compobject *self, PyTypeObject *cls,
     self->zst.next_in = data->buf;
     Py_ssize_t ibuflen = data->len;
 
+    if (Buffer_InitAndGrow(&buffer, -1, &self->zst.next_out, &self->zst.avail_out) < 0) {
+        goto error;
+    }
+
     do {
         arrange_input_buffer(&self->zst, &ibuflen);
 
         do {
-            obuflen = arrange_output_buffer(&self->zst, &RetVal, obuflen);
-            if (obuflen < 0)
-                goto error;
+            if (self->zst.avail_out == 0) {
+                if (Buffer_Grow(&buffer, &self->zst.next_out, &self->zst.avail_out) < 0)
+                    goto error;
+            }
 
             Py_BEGIN_ALLOW_THREADS
             err = deflate(&self->zst, Z_NO_FLUSH);
@@ -665,12 +704,14 @@ zlib_Compress_compress_impl(compobject *self, PyTypeObject *cls,
 
     } while (ibuflen != 0);
 
-    if (_PyBytes_Resize(&RetVal, self->zst.next_out -
-                        (Byte *)PyBytes_AS_STRING(RetVal)) == 0)
+    RetVal = Buffer_Finish(&buffer, self->zst.avail_out);
+    if (RetVal != NULL) {
         goto success;
+    }
 
  error:
-    Py_CLEAR(RetVal);
+    Buffer_OnError(&buffer);
+    RetVal = NULL;
  success:
     LEAVE_ZLIB(self);
     return RetVal;
@@ -746,8 +787,9 @@ zlib_Decompress_decompress_impl(compobject *self, PyTypeObject *cls,
 /*[clinic end generated code: output=b024a93c2c922d57 input=bfb37b3864cfb606]*/
 {
     int err = Z_OK;
-    Py_ssize_t ibuflen, obuflen = DEF_BUF_SIZE, hard_limit;
-    PyObject *RetVal = NULL;
+    Py_ssize_t ibuflen;
+    PyObject *RetVal;
+    _BlocksOutputBuffer buffer = {.list = NULL};
 
     PyObject *module = PyType_GetModule(cls);
     if (module == NULL)
@@ -758,33 +800,28 @@ zlib_Decompress_decompress_impl(compobject *self, PyTypeObject *cls,
         PyErr_SetString(PyExc_ValueError, "max_length must be non-negative");
         return NULL;
     } else if (max_length == 0)
-        hard_limit = PY_SSIZE_T_MAX;
-    else
-        hard_limit = max_length;
+        max_length = -1;
 
     ENTER_ZLIB(self);
 
     self->zst.next_in = data->buf;
     ibuflen = data->len;
 
-    /* limit amount of data allocated to max_length */
-    if (max_length && obuflen > max_length)
-        obuflen = max_length;
+    if (Buffer_InitAndGrow(&buffer, max_length, &self->zst.next_out, &self->zst.avail_out) < 0) {
+        goto abort;
+    }
 
     do {
         arrange_input_buffer(&self->zst, &ibuflen);
 
         do {
-            obuflen = arrange_output_buffer_with_maximum(&self->zst, &RetVal,
-                                                         obuflen, hard_limit);
-            if (obuflen == -2) {
-                if (max_length > 0) {
+            if (self->zst.avail_out == 0) {
+                if (Buffer_GetDataSize(&buffer, self->zst.avail_out) == max_length) {
                     goto save;
                 }
-                PyErr_NoMemory();
-            }
-            if (obuflen < 0) {
-                goto abort;
+                if (Buffer_Grow(&buffer, &self->zst.next_out, &self->zst.avail_out) < 0) {
+                    goto abort;
+                }
             }
 
             Py_BEGIN_ALLOW_THREADS
@@ -828,12 +865,14 @@ zlib_Decompress_decompress_impl(compobject *self, PyTypeObject *cls,
         goto abort;
     }
 
-    if (_PyBytes_Resize(&RetVal, self->zst.next_out -
-                        (Byte *)PyBytes_AS_STRING(RetVal)) == 0)
+    RetVal = Buffer_Finish(&buffer, self->zst.avail_out);
+    if (RetVal != NULL) {
         goto success;
+    }
 
  abort:
-    Py_CLEAR(RetVal);
+    Buffer_OnError(&buffer);
+    RetVal = NULL;
  success:
     LEAVE_ZLIB(self);
     return RetVal;
@@ -858,8 +897,8 @@ zlib_Compress_flush_impl(compobject *self, PyTypeObject *cls, int mode)
 /*[clinic end generated code: output=c7efd13efd62add2 input=286146e29442eb6c]*/
 {
     int err;
-    Py_ssize_t length = DEF_BUF_SIZE;
-    PyObject *RetVal = NULL;
+    PyObject *RetVal;
+    _BlocksOutputBuffer buffer = {.list = NULL};
 
     zlibstate *state = PyType_GetModuleState(cls);
     /* Flushing with Z_NO_FLUSH is a no-op, so there's no point in
@@ -872,11 +911,15 @@ zlib_Compress_flush_impl(compobject *self, PyTypeObject *cls, int mode)
 
     self->zst.avail_in = 0;
 
+    if (Buffer_InitAndGrow(&buffer, -1, &self->zst.next_out, &self->zst.avail_out) < 0) {
+        goto error;
+    }
+
     do {
-        length = arrange_output_buffer(&self->zst, &RetVal, length);
-        if (length < 0) {
-            Py_CLEAR(RetVal);
-            goto error;
+        if (self->zst.avail_out == 0) {
+            if (Buffer_Grow(&buffer, &self->zst.next_out, &self->zst.avail_out) < 0) {
+                goto error;
+            }
         }
 
         Py_BEGIN_ALLOW_THREADS
@@ -885,7 +928,6 @@ zlib_Compress_flush_impl(compobject *self, PyTypeObject *cls, int mode)
 
         if (err == Z_STREAM_ERROR) {
             zlib_error(state, self->zst, err, "while flushing");
-            Py_CLEAR(RetVal);
             goto error;
         }
     } while (self->zst.avail_out == 0);
@@ -898,7 +940,6 @@ zlib_Compress_flush_impl(compobject *self, PyTypeObject *cls, int mode)
         err = deflateEnd(&self->zst);
         if (err != Z_OK) {
             zlib_error(state, self->zst, err, "while finishing compression");
-            Py_CLEAR(RetVal);
             goto error;
         }
         else
@@ -910,15 +951,18 @@ zlib_Compress_flush_impl(compobject *self, PyTypeObject *cls, int mode)
         */
     } else if (err != Z_OK && err != Z_BUF_ERROR) {
         zlib_error(state, self->zst, err, "while flushing");
-        Py_CLEAR(RetVal);
         goto error;
     }
 
-    if (_PyBytes_Resize(&RetVal, self->zst.next_out -
-                        (Byte *)PyBytes_AS_STRING(RetVal)) < 0)
-        Py_CLEAR(RetVal);
+    RetVal = Buffer_Finish(&buffer, self->zst.avail_out);
+    if (RetVal != NULL) {
+        goto success;
+    }
 
- error:
+error:
+    Buffer_OnError(&buffer);
+    RetVal = NULL;
+success:
     LEAVE_ZLIB(self);
     return RetVal;
 }
@@ -1120,8 +1164,9 @@ zlib_Decompress_flush_impl(compobject *self, PyTypeObject *cls,
 {
     int err, flush;
     Py_buffer data;
-    PyObject *RetVal = NULL;
+    PyObject *RetVal;
     Py_ssize_t ibuflen;
+    _BlocksOutputBuffer buffer = {.list = NULL};
 
     PyObject *module = PyType_GetModule(cls);
     if (module == NULL) {
@@ -1144,14 +1189,19 @@ zlib_Decompress_flush_impl(compobject *self, PyTypeObject *cls,
     self->zst.next_in = data.buf;
     ibuflen = data.len;
 
+    if (Buffer_InitWithSize(&buffer, length, &self->zst.next_out, &self->zst.avail_out) < 0) {
+        goto abort;
+    }
+
     do {
         arrange_input_buffer(&self->zst, &ibuflen);
         flush = ibuflen == 0 ? Z_FINISH : Z_NO_FLUSH;
 
         do {
-            length = arrange_output_buffer(&self->zst, &RetVal, length);
-            if (length < 0)
-                goto abort;
+            if (self->zst.avail_out == 0) {
+                if (Buffer_Grow(&buffer, &self->zst.next_out, &self->zst.avail_out) < 0)
+                    goto abort;
+            }
 
             Py_BEGIN_ALLOW_THREADS
             err = inflate(&self->zst, flush);
@@ -1193,13 +1243,14 @@ zlib_Decompress_flush_impl(compobject *self, PyTypeObject *cls,
         }
     }
 
-    if (_PyBytes_Resize(&RetVal, self->zst.next_out -
-                        (Byte *)PyBytes_AS_STRING(RetVal)) == 0) {
+    RetVal = Buffer_Finish(&buffer, self->zst.avail_out);
+    if (RetVal != NULL) {
         goto success;
     }
 
  abort:
-    Py_CLEAR(RetVal);
+    Buffer_OnError(&buffer);
+    RetVal = NULL;
  success:
     PyBuffer_Release(&data);
     LEAVE_ZLIB(self);