| Antoine Pitrou | 37dc5f8 | 2011-04-03 17:05:46 +0200 | [diff] [blame] | 1 | /* _bz2 - Low-level Python interface to libbzip2. */ | 
 | 2 |  | 
 | 3 | #define PY_SSIZE_T_CLEAN | 
 | 4 |  | 
 | 5 | #include "Python.h" | 
 | 6 | #include "structmember.h" | 
 | 7 |  | 
 | 8 | #ifdef WITH_THREAD | 
 | 9 | #include "pythread.h" | 
 | 10 | #endif | 
 | 11 |  | 
 | 12 | #include <bzlib.h> | 
 | 13 | #include <stdio.h> | 
 | 14 |  | 
 | 15 |  | 
 | 16 | #ifndef BZ_CONFIG_ERROR | 
 | 17 | #define BZ2_bzCompress bzCompress | 
 | 18 | #define BZ2_bzCompressInit bzCompressInit | 
 | 19 | #define BZ2_bzCompressEnd bzCompressEnd | 
 | 20 | #define BZ2_bzDecompress bzDecompress | 
 | 21 | #define BZ2_bzDecompressInit bzDecompressInit | 
 | 22 | #define BZ2_bzDecompressEnd bzDecompressEnd | 
 | 23 | #endif  /* ! BZ_CONFIG_ERROR */ | 
 | 24 |  | 
 | 25 |  | 
 | 26 | #ifdef WITH_THREAD | 
 | 27 | #define ACQUIRE_LOCK(obj) do { \ | 
 | 28 |     if (!PyThread_acquire_lock((obj)->lock, 0)) { \ | 
 | 29 |         Py_BEGIN_ALLOW_THREADS \ | 
 | 30 |         PyThread_acquire_lock((obj)->lock, 1); \ | 
 | 31 |         Py_END_ALLOW_THREADS \ | 
 | 32 |     } } while (0) | 
 | 33 | #define RELEASE_LOCK(obj) PyThread_release_lock((obj)->lock) | 
 | 34 | #else | 
 | 35 | #define ACQUIRE_LOCK(obj) | 
 | 36 | #define RELEASE_LOCK(obj) | 
 | 37 | #endif | 
 | 38 |  | 
| Nadeem Vawda | ea4b46f | 2011-04-12 23:02:42 +0200 | [diff] [blame] | 39 | #define MIN(X, Y) (((X) < (Y)) ? (X) : (Y)) | 
 | 40 |  | 
| Antoine Pitrou | 37dc5f8 | 2011-04-03 17:05:46 +0200 | [diff] [blame] | 41 |  | 
 | 42 | typedef struct { | 
 | 43 |     PyObject_HEAD | 
 | 44 |     bz_stream bzs; | 
 | 45 |     int flushed; | 
 | 46 | #ifdef WITH_THREAD | 
 | 47 |     PyThread_type_lock lock; | 
 | 48 | #endif | 
 | 49 | } BZ2Compressor; | 
 | 50 |  | 
 | 51 | typedef struct { | 
 | 52 |     PyObject_HEAD | 
 | 53 |     bz_stream bzs; | 
 | 54 |     char eof;           /* T_BOOL expects a char */ | 
 | 55 |     PyObject *unused_data; | 
 | 56 | #ifdef WITH_THREAD | 
 | 57 |     PyThread_type_lock lock; | 
 | 58 | #endif | 
 | 59 | } BZ2Decompressor; | 
 | 60 |  | 
 | 61 |  | 
 | 62 | /* Helper functions. */ | 
 | 63 |  | 
 | 64 | static int | 
 | 65 | catch_bz2_error(int bzerror) | 
 | 66 | { | 
 | 67 |     switch(bzerror) { | 
 | 68 |         case BZ_OK: | 
 | 69 |         case BZ_RUN_OK: | 
 | 70 |         case BZ_FLUSH_OK: | 
 | 71 |         case BZ_FINISH_OK: | 
 | 72 |         case BZ_STREAM_END: | 
 | 73 |             return 0; | 
 | 74 |  | 
 | 75 | #ifdef BZ_CONFIG_ERROR | 
 | 76 |         case BZ_CONFIG_ERROR: | 
 | 77 |             PyErr_SetString(PyExc_SystemError, | 
 | 78 |                             "libbzip2 was not compiled correctly"); | 
 | 79 |             return 1; | 
 | 80 | #endif | 
 | 81 |         case BZ_PARAM_ERROR: | 
 | 82 |             PyErr_SetString(PyExc_ValueError, | 
 | 83 |                             "Internal error - " | 
 | 84 |                             "invalid parameters passed to libbzip2"); | 
 | 85 |             return 1; | 
 | 86 |         case BZ_MEM_ERROR: | 
 | 87 |             PyErr_NoMemory(); | 
 | 88 |             return 1; | 
 | 89 |         case BZ_DATA_ERROR: | 
 | 90 |         case BZ_DATA_ERROR_MAGIC: | 
 | 91 |             PyErr_SetString(PyExc_IOError, "Invalid data stream"); | 
 | 92 |             return 1; | 
 | 93 |         case BZ_IO_ERROR: | 
 | 94 |             PyErr_SetString(PyExc_IOError, "Unknown I/O error"); | 
 | 95 |             return 1; | 
 | 96 |         case BZ_UNEXPECTED_EOF: | 
 | 97 |             PyErr_SetString(PyExc_EOFError, | 
 | 98 |                             "Compressed file ended before the logical " | 
 | 99 |                             "end-of-stream was detected"); | 
 | 100 |             return 1; | 
 | 101 |         case BZ_SEQUENCE_ERROR: | 
 | 102 |             PyErr_SetString(PyExc_RuntimeError, | 
 | 103 |                             "Internal error - " | 
 | 104 |                             "Invalid sequence of commands sent to libbzip2"); | 
 | 105 |             return 1; | 
 | 106 |         default: | 
 | 107 |             PyErr_Format(PyExc_IOError, | 
 | 108 |                          "Unrecognized error from libbzip2: %d", bzerror); | 
 | 109 |             return 1; | 
 | 110 |     } | 
 | 111 | } | 
 | 112 |  | 
 | 113 | #if BUFSIZ < 8192 | 
 | 114 | #define SMALLCHUNK 8192 | 
 | 115 | #else | 
 | 116 | #define SMALLCHUNK BUFSIZ | 
 | 117 | #endif | 
 | 118 |  | 
| Antoine Pitrou | 37dc5f8 | 2011-04-03 17:05:46 +0200 | [diff] [blame] | 119 | static int | 
 | 120 | grow_buffer(PyObject **buf) | 
 | 121 | { | 
| Nadeem Vawda | 72d6a13 | 2011-10-13 13:38:14 +0200 | [diff] [blame] | 122 |     /* Expand the buffer by an amount proportional to the current size, | 
 | 123 |        giving us amortized linear-time behavior. Use a less-than-double | 
 | 124 |        growth factor to avoid excessive allocation. */ | 
| Antoine Pitrou | 37dc5f8 | 2011-04-03 17:05:46 +0200 | [diff] [blame] | 125 |     size_t size = PyBytes_GET_SIZE(*buf); | 
| Nadeem Vawda | 18b7fcc | 2012-10-21 21:16:58 +0200 | [diff] [blame] | 126 |     size_t new_size = size + (size >> 3) + 6; | 
 | 127 |     if (new_size > size) { | 
 | 128 |         return _PyBytes_Resize(buf, new_size); | 
 | 129 |     } else {  /* overflow */ | 
 | 130 |         PyErr_SetString(PyExc_OverflowError, | 
 | 131 |                         "Unable to allocate buffer - output too large"); | 
 | 132 |         return -1; | 
 | 133 |     } | 
| Antoine Pitrou | 37dc5f8 | 2011-04-03 17:05:46 +0200 | [diff] [blame] | 134 | } | 
 | 135 |  | 
 | 136 |  | 
 | 137 | /* BZ2Compressor class. */ | 
 | 138 |  | 
 | 139 | static PyObject * | 
 | 140 | compress(BZ2Compressor *c, char *data, size_t len, int action) | 
 | 141 | { | 
 | 142 |     size_t data_size = 0; | 
 | 143 |     PyObject *result; | 
 | 144 |  | 
 | 145 |     result = PyBytes_FromStringAndSize(NULL, SMALLCHUNK); | 
 | 146 |     if (result == NULL) | 
 | 147 |         return NULL; | 
| Nadeem Vawda | 57cb81d | 2013-01-02 23:05:56 +0100 | [diff] [blame] | 148 |  | 
| Antoine Pitrou | 37dc5f8 | 2011-04-03 17:05:46 +0200 | [diff] [blame] | 149 |     c->bzs.next_in = data; | 
| Nadeem Vawda | 57cb81d | 2013-01-02 23:05:56 +0100 | [diff] [blame] | 150 |     c->bzs.avail_in = 0; | 
| Antoine Pitrou | 37dc5f8 | 2011-04-03 17:05:46 +0200 | [diff] [blame] | 151 |     c->bzs.next_out = PyBytes_AS_STRING(result); | 
 | 152 |     c->bzs.avail_out = PyBytes_GET_SIZE(result); | 
 | 153 |     for (;;) { | 
 | 154 |         char *this_out; | 
 | 155 |         int bzerror; | 
 | 156 |  | 
| Nadeem Vawda | 57cb81d | 2013-01-02 23:05:56 +0100 | [diff] [blame] | 157 |         /* On a 64-bit system, len might not fit in avail_in (an unsigned int). | 
 | 158 |            Do compression in chunks of no more than UINT_MAX bytes each. */ | 
| Nadeem Vawda | ea4b46f | 2011-04-12 23:02:42 +0200 | [diff] [blame] | 159 |         if (c->bzs.avail_in == 0 && len > 0) { | 
 | 160 |             c->bzs.avail_in = MIN(len, UINT_MAX); | 
 | 161 |             len -= c->bzs.avail_in; | 
 | 162 |         } | 
 | 163 |  | 
| Nadeem Vawda | 57cb81d | 2013-01-02 23:05:56 +0100 | [diff] [blame] | 164 |         /* In regular compression mode, stop when input data is exhausted. */ | 
 | 165 |         if (action == BZ_RUN && c->bzs.avail_in == 0) | 
| Antoine Pitrou | 37dc5f8 | 2011-04-03 17:05:46 +0200 | [diff] [blame] | 166 |             break; | 
 | 167 |  | 
 | 168 |         if (c->bzs.avail_out == 0) { | 
| Nadeem Vawda | 18b7fcc | 2012-10-21 21:16:58 +0200 | [diff] [blame] | 169 |             size_t buffer_left = PyBytes_GET_SIZE(result) - data_size; | 
 | 170 |             if (buffer_left == 0) { | 
 | 171 |                 if (grow_buffer(&result) < 0) | 
 | 172 |                     goto error; | 
 | 173 |                 c->bzs.next_out = PyBytes_AS_STRING(result) + data_size; | 
 | 174 |                 buffer_left = PyBytes_GET_SIZE(result) - data_size; | 
 | 175 |             } | 
 | 176 |             c->bzs.avail_out = MIN(buffer_left, UINT_MAX); | 
| Antoine Pitrou | 37dc5f8 | 2011-04-03 17:05:46 +0200 | [diff] [blame] | 177 |         } | 
| Nadeem Vawda | 57cb81d | 2013-01-02 23:05:56 +0100 | [diff] [blame] | 178 |  | 
 | 179 |         Py_BEGIN_ALLOW_THREADS | 
 | 180 |         this_out = c->bzs.next_out; | 
 | 181 |         bzerror = BZ2_bzCompress(&c->bzs, action); | 
 | 182 |         data_size += c->bzs.next_out - this_out; | 
 | 183 |         Py_END_ALLOW_THREADS | 
 | 184 |         if (catch_bz2_error(bzerror)) | 
 | 185 |             goto error; | 
 | 186 |  | 
 | 187 |         /* In flushing mode, stop when all buffered data has been flushed. */ | 
 | 188 |         if (action == BZ_FINISH && bzerror == BZ_STREAM_END) | 
 | 189 |             break; | 
| Antoine Pitrou | 37dc5f8 | 2011-04-03 17:05:46 +0200 | [diff] [blame] | 190 |     } | 
 | 191 |     if (data_size != PyBytes_GET_SIZE(result)) | 
 | 192 |         if (_PyBytes_Resize(&result, data_size) < 0) | 
 | 193 |             goto error; | 
 | 194 |     return result; | 
 | 195 |  | 
 | 196 | error: | 
 | 197 |     Py_XDECREF(result); | 
 | 198 |     return NULL; | 
 | 199 | } | 
 | 200 |  | 
 | 201 | PyDoc_STRVAR(BZ2Compressor_compress__doc__, | 
 | 202 | "compress(data) -> bytes\n" | 
 | 203 | "\n" | 
 | 204 | "Provide data to the compressor object. Returns a chunk of\n" | 
 | 205 | "compressed data if possible, or b'' otherwise.\n" | 
 | 206 | "\n" | 
 | 207 | "When you have finished providing data to the compressor, call the\n" | 
 | 208 | "flush() method to finish the compression process.\n"); | 
 | 209 |  | 
 | 210 | static PyObject * | 
 | 211 | BZ2Compressor_compress(BZ2Compressor *self, PyObject *args) | 
 | 212 | { | 
 | 213 |     Py_buffer buffer; | 
 | 214 |     PyObject *result = NULL; | 
 | 215 |  | 
 | 216 |     if (!PyArg_ParseTuple(args, "y*:compress", &buffer)) | 
 | 217 |         return NULL; | 
 | 218 |  | 
 | 219 |     ACQUIRE_LOCK(self); | 
 | 220 |     if (self->flushed) | 
 | 221 |         PyErr_SetString(PyExc_ValueError, "Compressor has been flushed"); | 
 | 222 |     else | 
 | 223 |         result = compress(self, buffer.buf, buffer.len, BZ_RUN); | 
 | 224 |     RELEASE_LOCK(self); | 
 | 225 |     PyBuffer_Release(&buffer); | 
 | 226 |     return result; | 
 | 227 | } | 
 | 228 |  | 
 | 229 | PyDoc_STRVAR(BZ2Compressor_flush__doc__, | 
 | 230 | "flush() -> bytes\n" | 
 | 231 | "\n" | 
 | 232 | "Finish the compression process. Returns the compressed data left\n" | 
 | 233 | "in internal buffers.\n" | 
 | 234 | "\n" | 
 | 235 | "The compressor object may not be used after this method is called.\n"); | 
 | 236 |  | 
 | 237 | static PyObject * | 
 | 238 | BZ2Compressor_flush(BZ2Compressor *self, PyObject *noargs) | 
 | 239 | { | 
 | 240 |     PyObject *result = NULL; | 
 | 241 |  | 
 | 242 |     ACQUIRE_LOCK(self); | 
 | 243 |     if (self->flushed) | 
 | 244 |         PyErr_SetString(PyExc_ValueError, "Repeated call to flush()"); | 
 | 245 |     else { | 
 | 246 |         self->flushed = 1; | 
 | 247 |         result = compress(self, NULL, 0, BZ_FINISH); | 
 | 248 |     } | 
 | 249 |     RELEASE_LOCK(self); | 
 | 250 |     return result; | 
 | 251 | } | 
 | 252 |  | 
| Nadeem Vawda | 3797065 | 2013-10-28 21:35:23 +0100 | [diff] [blame] | 253 | static PyObject * | 
 | 254 | BZ2Compressor_getstate(BZ2Compressor *self, PyObject *noargs) | 
 | 255 | { | 
 | 256 |     PyErr_Format(PyExc_TypeError, "cannot serialize '%s' object", | 
 | 257 |                  Py_TYPE(self)->tp_name); | 
 | 258 |     return NULL; | 
 | 259 | } | 
 | 260 |  | 
| Antoine Pitrou | 37dc5f8 | 2011-04-03 17:05:46 +0200 | [diff] [blame] | 261 | static int | 
 | 262 | BZ2Compressor_init(BZ2Compressor *self, PyObject *args, PyObject *kwargs) | 
 | 263 | { | 
 | 264 |     int compresslevel = 9; | 
 | 265 |     int bzerror; | 
 | 266 |  | 
 | 267 |     if (!PyArg_ParseTuple(args, "|i:BZ2Compressor", &compresslevel)) | 
 | 268 |         return -1; | 
 | 269 |     if (!(1 <= compresslevel && compresslevel <= 9)) { | 
 | 270 |         PyErr_SetString(PyExc_ValueError, | 
 | 271 |                         "compresslevel must be between 1 and 9"); | 
 | 272 |         return -1; | 
 | 273 |     } | 
 | 274 |  | 
 | 275 | #ifdef WITH_THREAD | 
 | 276 |     self->lock = PyThread_allocate_lock(); | 
 | 277 |     if (self->lock == NULL) { | 
 | 278 |         PyErr_SetString(PyExc_MemoryError, "Unable to allocate lock"); | 
 | 279 |         return -1; | 
 | 280 |     } | 
 | 281 | #endif | 
 | 282 |  | 
 | 283 |     bzerror = BZ2_bzCompressInit(&self->bzs, compresslevel, 0, 0); | 
 | 284 |     if (catch_bz2_error(bzerror)) | 
 | 285 |         goto error; | 
 | 286 |  | 
 | 287 |     return 0; | 
 | 288 |  | 
 | 289 | error: | 
 | 290 | #ifdef WITH_THREAD | 
 | 291 |     PyThread_free_lock(self->lock); | 
 | 292 |     self->lock = NULL; | 
 | 293 | #endif | 
 | 294 |     return -1; | 
 | 295 | } | 
 | 296 |  | 
 | 297 | static void | 
 | 298 | BZ2Compressor_dealloc(BZ2Compressor *self) | 
 | 299 | { | 
 | 300 |     BZ2_bzCompressEnd(&self->bzs); | 
 | 301 | #ifdef WITH_THREAD | 
 | 302 |     if (self->lock != NULL) | 
 | 303 |         PyThread_free_lock(self->lock); | 
 | 304 | #endif | 
 | 305 |     Py_TYPE(self)->tp_free((PyObject *)self); | 
 | 306 | } | 
 | 307 |  | 
 | 308 | static PyMethodDef BZ2Compressor_methods[] = { | 
| Nadeem Vawda | 3797065 | 2013-10-28 21:35:23 +0100 | [diff] [blame] | 309 |     {"compress",     (PyCFunction)BZ2Compressor_compress, METH_VARARGS, | 
| Antoine Pitrou | 37dc5f8 | 2011-04-03 17:05:46 +0200 | [diff] [blame] | 310 |      BZ2Compressor_compress__doc__}, | 
| Nadeem Vawda | 3797065 | 2013-10-28 21:35:23 +0100 | [diff] [blame] | 311 |     {"flush",        (PyCFunction)BZ2Compressor_flush,    METH_NOARGS, | 
| Antoine Pitrou | 37dc5f8 | 2011-04-03 17:05:46 +0200 | [diff] [blame] | 312 |      BZ2Compressor_flush__doc__}, | 
| Nadeem Vawda | 3797065 | 2013-10-28 21:35:23 +0100 | [diff] [blame] | 313 |     {"__getstate__", (PyCFunction)BZ2Compressor_getstate, METH_NOARGS}, | 
| Antoine Pitrou | 37dc5f8 | 2011-04-03 17:05:46 +0200 | [diff] [blame] | 314 |     {NULL} | 
 | 315 | }; | 
 | 316 |  | 
 | 317 | PyDoc_STRVAR(BZ2Compressor__doc__, | 
 | 318 | "BZ2Compressor(compresslevel=9)\n" | 
 | 319 | "\n" | 
 | 320 | "Create a compressor object for compressing data incrementally.\n" | 
 | 321 | "\n" | 
 | 322 | "compresslevel, if given, must be a number between 1 and 9.\n" | 
 | 323 | "\n" | 
 | 324 | "For one-shot compression, use the compress() function instead.\n"); | 
 | 325 |  | 
 | 326 | static PyTypeObject BZ2Compressor_Type = { | 
 | 327 |     PyVarObject_HEAD_INIT(NULL, 0) | 
 | 328 |     "_bz2.BZ2Compressor",               /* tp_name */ | 
 | 329 |     sizeof(BZ2Compressor),              /* tp_basicsize */ | 
 | 330 |     0,                                  /* tp_itemsize */ | 
 | 331 |     (destructor)BZ2Compressor_dealloc,  /* tp_dealloc */ | 
 | 332 |     0,                                  /* tp_print */ | 
 | 333 |     0,                                  /* tp_getattr */ | 
 | 334 |     0,                                  /* tp_setattr */ | 
 | 335 |     0,                                  /* tp_reserved */ | 
 | 336 |     0,                                  /* tp_repr */ | 
 | 337 |     0,                                  /* tp_as_number */ | 
 | 338 |     0,                                  /* tp_as_sequence */ | 
 | 339 |     0,                                  /* tp_as_mapping */ | 
 | 340 |     0,                                  /* tp_hash  */ | 
 | 341 |     0,                                  /* tp_call */ | 
 | 342 |     0,                                  /* tp_str */ | 
 | 343 |     0,                                  /* tp_getattro */ | 
 | 344 |     0,                                  /* tp_setattro */ | 
 | 345 |     0,                                  /* tp_as_buffer */ | 
 | 346 |     Py_TPFLAGS_DEFAULT,                 /* tp_flags */ | 
 | 347 |     BZ2Compressor__doc__,               /* tp_doc */ | 
 | 348 |     0,                                  /* tp_traverse */ | 
 | 349 |     0,                                  /* tp_clear */ | 
 | 350 |     0,                                  /* tp_richcompare */ | 
 | 351 |     0,                                  /* tp_weaklistoffset */ | 
 | 352 |     0,                                  /* tp_iter */ | 
 | 353 |     0,                                  /* tp_iternext */ | 
 | 354 |     BZ2Compressor_methods,              /* tp_methods */ | 
 | 355 |     0,                                  /* tp_members */ | 
 | 356 |     0,                                  /* tp_getset */ | 
 | 357 |     0,                                  /* tp_base */ | 
 | 358 |     0,                                  /* tp_dict */ | 
 | 359 |     0,                                  /* tp_descr_get */ | 
 | 360 |     0,                                  /* tp_descr_set */ | 
 | 361 |     0,                                  /* tp_dictoffset */ | 
 | 362 |     (initproc)BZ2Compressor_init,       /* tp_init */ | 
 | 363 |     0,                                  /* tp_alloc */ | 
 | 364 |     PyType_GenericNew,                  /* tp_new */ | 
 | 365 | }; | 
 | 366 |  | 
 | 367 |  | 
 | 368 | /* BZ2Decompressor class. */ | 
 | 369 |  | 
 | 370 | static PyObject * | 
 | 371 | decompress(BZ2Decompressor *d, char *data, size_t len) | 
 | 372 | { | 
 | 373 |     size_t data_size = 0; | 
 | 374 |     PyObject *result; | 
 | 375 |  | 
 | 376 |     result = PyBytes_FromStringAndSize(NULL, SMALLCHUNK); | 
 | 377 |     if (result == NULL) | 
 | 378 |         return result; | 
 | 379 |     d->bzs.next_in = data; | 
| Nadeem Vawda | ea4b46f | 2011-04-12 23:02:42 +0200 | [diff] [blame] | 380 |     /* On a 64-bit system, len might not fit in avail_in (an unsigned int). | 
 | 381 |        Do decompression in chunks of no more than UINT_MAX bytes each. */ | 
 | 382 |     d->bzs.avail_in = MIN(len, UINT_MAX); | 
 | 383 |     len -= d->bzs.avail_in; | 
| Antoine Pitrou | 37dc5f8 | 2011-04-03 17:05:46 +0200 | [diff] [blame] | 384 |     d->bzs.next_out = PyBytes_AS_STRING(result); | 
 | 385 |     d->bzs.avail_out = PyBytes_GET_SIZE(result); | 
 | 386 |     for (;;) { | 
 | 387 |         char *this_out; | 
 | 388 |         int bzerror; | 
 | 389 |  | 
 | 390 |         Py_BEGIN_ALLOW_THREADS | 
 | 391 |         this_out = d->bzs.next_out; | 
 | 392 |         bzerror = BZ2_bzDecompress(&d->bzs); | 
 | 393 |         data_size += d->bzs.next_out - this_out; | 
 | 394 |         Py_END_ALLOW_THREADS | 
 | 395 |         if (catch_bz2_error(bzerror)) | 
 | 396 |             goto error; | 
 | 397 |         if (bzerror == BZ_STREAM_END) { | 
 | 398 |             d->eof = 1; | 
| Nadeem Vawda | ea4b46f | 2011-04-12 23:02:42 +0200 | [diff] [blame] | 399 |             len += d->bzs.avail_in; | 
 | 400 |             if (len > 0) { /* Save leftover input to unused_data */ | 
| Antoine Pitrou | 37dc5f8 | 2011-04-03 17:05:46 +0200 | [diff] [blame] | 401 |                 Py_CLEAR(d->unused_data); | 
| Nadeem Vawda | ea4b46f | 2011-04-12 23:02:42 +0200 | [diff] [blame] | 402 |                 d->unused_data = PyBytes_FromStringAndSize(d->bzs.next_in, len); | 
| Antoine Pitrou | 37dc5f8 | 2011-04-03 17:05:46 +0200 | [diff] [blame] | 403 |                 if (d->unused_data == NULL) | 
 | 404 |                     goto error; | 
 | 405 |             } | 
 | 406 |             break; | 
 | 407 |         } | 
| Nadeem Vawda | ea4b46f | 2011-04-12 23:02:42 +0200 | [diff] [blame] | 408 |         if (d->bzs.avail_in == 0) { | 
 | 409 |             if (len == 0) | 
 | 410 |                 break; | 
 | 411 |             d->bzs.avail_in = MIN(len, UINT_MAX); | 
 | 412 |             len -= d->bzs.avail_in; | 
 | 413 |         } | 
| Antoine Pitrou | 37dc5f8 | 2011-04-03 17:05:46 +0200 | [diff] [blame] | 414 |         if (d->bzs.avail_out == 0) { | 
| Nadeem Vawda | 18b7fcc | 2012-10-21 21:16:58 +0200 | [diff] [blame] | 415 |             size_t buffer_left = PyBytes_GET_SIZE(result) - data_size; | 
 | 416 |             if (buffer_left == 0) { | 
 | 417 |                 if (grow_buffer(&result) < 0) | 
 | 418 |                     goto error; | 
 | 419 |                 d->bzs.next_out = PyBytes_AS_STRING(result) + data_size; | 
 | 420 |                 buffer_left = PyBytes_GET_SIZE(result) - data_size; | 
 | 421 |             } | 
 | 422 |             d->bzs.avail_out = MIN(buffer_left, UINT_MAX); | 
| Antoine Pitrou | 37dc5f8 | 2011-04-03 17:05:46 +0200 | [diff] [blame] | 423 |         } | 
 | 424 |     } | 
 | 425 |     if (data_size != PyBytes_GET_SIZE(result)) | 
 | 426 |         if (_PyBytes_Resize(&result, data_size) < 0) | 
 | 427 |             goto error; | 
 | 428 |     return result; | 
 | 429 |  | 
 | 430 | error: | 
 | 431 |     Py_XDECREF(result); | 
 | 432 |     return NULL; | 
 | 433 | } | 
 | 434 |  | 
 | 435 | PyDoc_STRVAR(BZ2Decompressor_decompress__doc__, | 
 | 436 | "decompress(data) -> bytes\n" | 
 | 437 | "\n" | 
 | 438 | "Provide data to the decompressor object. Returns a chunk of\n" | 
 | 439 | "decompressed data if possible, or b'' otherwise.\n" | 
 | 440 | "\n" | 
 | 441 | "Attempting to decompress data after the end of stream is reached\n" | 
 | 442 | "raises an EOFError. Any data found after the end of the stream\n" | 
 | 443 | "is ignored and saved in the unused_data attribute.\n"); | 
 | 444 |  | 
 | 445 | static PyObject * | 
 | 446 | BZ2Decompressor_decompress(BZ2Decompressor *self, PyObject *args) | 
 | 447 | { | 
 | 448 |     Py_buffer buffer; | 
 | 449 |     PyObject *result = NULL; | 
 | 450 |  | 
 | 451 |     if (!PyArg_ParseTuple(args, "y*:decompress", &buffer)) | 
 | 452 |         return NULL; | 
 | 453 |  | 
 | 454 |     ACQUIRE_LOCK(self); | 
 | 455 |     if (self->eof) | 
 | 456 |         PyErr_SetString(PyExc_EOFError, "End of stream already reached"); | 
 | 457 |     else | 
 | 458 |         result = decompress(self, buffer.buf, buffer.len); | 
 | 459 |     RELEASE_LOCK(self); | 
 | 460 |     PyBuffer_Release(&buffer); | 
 | 461 |     return result; | 
 | 462 | } | 
 | 463 |  | 
| Nadeem Vawda | 3797065 | 2013-10-28 21:35:23 +0100 | [diff] [blame] | 464 | static PyObject * | 
 | 465 | BZ2Decompressor_getstate(BZ2Decompressor *self, PyObject *noargs) | 
 | 466 | { | 
 | 467 |     PyErr_Format(PyExc_TypeError, "cannot serialize '%s' object", | 
 | 468 |                  Py_TYPE(self)->tp_name); | 
 | 469 |     return NULL; | 
 | 470 | } | 
 | 471 |  | 
| Antoine Pitrou | 37dc5f8 | 2011-04-03 17:05:46 +0200 | [diff] [blame] | 472 | static int | 
 | 473 | BZ2Decompressor_init(BZ2Decompressor *self, PyObject *args, PyObject *kwargs) | 
 | 474 | { | 
 | 475 |     int bzerror; | 
 | 476 |  | 
 | 477 |     if (!PyArg_ParseTuple(args, ":BZ2Decompressor")) | 
 | 478 |         return -1; | 
 | 479 |  | 
 | 480 | #ifdef WITH_THREAD | 
 | 481 |     self->lock = PyThread_allocate_lock(); | 
 | 482 |     if (self->lock == NULL) { | 
 | 483 |         PyErr_SetString(PyExc_MemoryError, "Unable to allocate lock"); | 
 | 484 |         return -1; | 
 | 485 |     } | 
 | 486 | #endif | 
 | 487 |  | 
 | 488 |     self->unused_data = PyBytes_FromStringAndSize("", 0); | 
 | 489 |     if (self->unused_data == NULL) | 
 | 490 |         goto error; | 
 | 491 |  | 
 | 492 |     bzerror = BZ2_bzDecompressInit(&self->bzs, 0, 0); | 
 | 493 |     if (catch_bz2_error(bzerror)) | 
 | 494 |         goto error; | 
 | 495 |  | 
 | 496 |     return 0; | 
 | 497 |  | 
 | 498 | error: | 
 | 499 |     Py_CLEAR(self->unused_data); | 
 | 500 | #ifdef WITH_THREAD | 
 | 501 |     PyThread_free_lock(self->lock); | 
 | 502 |     self->lock = NULL; | 
 | 503 | #endif | 
 | 504 |     return -1; | 
 | 505 | } | 
 | 506 |  | 
 | 507 | static void | 
 | 508 | BZ2Decompressor_dealloc(BZ2Decompressor *self) | 
 | 509 | { | 
 | 510 |     BZ2_bzDecompressEnd(&self->bzs); | 
 | 511 |     Py_CLEAR(self->unused_data); | 
 | 512 | #ifdef WITH_THREAD | 
 | 513 |     if (self->lock != NULL) | 
 | 514 |         PyThread_free_lock(self->lock); | 
 | 515 | #endif | 
 | 516 |     Py_TYPE(self)->tp_free((PyObject *)self); | 
 | 517 | } | 
 | 518 |  | 
 | 519 | static PyMethodDef BZ2Decompressor_methods[] = { | 
 | 520 |     {"decompress", (PyCFunction)BZ2Decompressor_decompress, METH_VARARGS, | 
 | 521 |      BZ2Decompressor_decompress__doc__}, | 
| Nadeem Vawda | 3797065 | 2013-10-28 21:35:23 +0100 | [diff] [blame] | 522 |     {"__getstate__", (PyCFunction)BZ2Decompressor_getstate, METH_NOARGS}, | 
| Antoine Pitrou | 37dc5f8 | 2011-04-03 17:05:46 +0200 | [diff] [blame] | 523 |     {NULL} | 
 | 524 | }; | 
 | 525 |  | 
 | 526 | PyDoc_STRVAR(BZ2Decompressor_eof__doc__, | 
 | 527 | "True if the end-of-stream marker has been reached."); | 
 | 528 |  | 
 | 529 | PyDoc_STRVAR(BZ2Decompressor_unused_data__doc__, | 
 | 530 | "Data found after the end of the compressed stream."); | 
 | 531 |  | 
 | 532 | static PyMemberDef BZ2Decompressor_members[] = { | 
 | 533 |     {"eof", T_BOOL, offsetof(BZ2Decompressor, eof), | 
 | 534 |      READONLY, BZ2Decompressor_eof__doc__}, | 
 | 535 |     {"unused_data", T_OBJECT_EX, offsetof(BZ2Decompressor, unused_data), | 
 | 536 |      READONLY, BZ2Decompressor_unused_data__doc__}, | 
 | 537 |     {NULL} | 
 | 538 | }; | 
 | 539 |  | 
 | 540 | PyDoc_STRVAR(BZ2Decompressor__doc__, | 
 | 541 | "BZ2Decompressor()\n" | 
 | 542 | "\n" | 
 | 543 | "Create a decompressor object for decompressing data incrementally.\n" | 
 | 544 | "\n" | 
 | 545 | "For one-shot decompression, use the decompress() function instead.\n"); | 
 | 546 |  | 
 | 547 | static PyTypeObject BZ2Decompressor_Type = { | 
 | 548 |     PyVarObject_HEAD_INIT(NULL, 0) | 
 | 549 |     "_bz2.BZ2Decompressor",             /* tp_name */ | 
 | 550 |     sizeof(BZ2Decompressor),            /* tp_basicsize */ | 
 | 551 |     0,                                  /* tp_itemsize */ | 
 | 552 |     (destructor)BZ2Decompressor_dealloc,/* tp_dealloc */ | 
 | 553 |     0,                                  /* tp_print */ | 
 | 554 |     0,                                  /* tp_getattr */ | 
 | 555 |     0,                                  /* tp_setattr */ | 
 | 556 |     0,                                  /* tp_reserved */ | 
 | 557 |     0,                                  /* tp_repr */ | 
 | 558 |     0,                                  /* tp_as_number */ | 
 | 559 |     0,                                  /* tp_as_sequence */ | 
 | 560 |     0,                                  /* tp_as_mapping */ | 
 | 561 |     0,                                  /* tp_hash  */ | 
 | 562 |     0,                                  /* tp_call */ | 
 | 563 |     0,                                  /* tp_str */ | 
 | 564 |     0,                                  /* tp_getattro */ | 
 | 565 |     0,                                  /* tp_setattro */ | 
 | 566 |     0,                                  /* tp_as_buffer */ | 
 | 567 |     Py_TPFLAGS_DEFAULT,                 /* tp_flags */ | 
 | 568 |     BZ2Decompressor__doc__,             /* tp_doc */ | 
 | 569 |     0,                                  /* tp_traverse */ | 
 | 570 |     0,                                  /* tp_clear */ | 
 | 571 |     0,                                  /* tp_richcompare */ | 
 | 572 |     0,                                  /* tp_weaklistoffset */ | 
 | 573 |     0,                                  /* tp_iter */ | 
 | 574 |     0,                                  /* tp_iternext */ | 
 | 575 |     BZ2Decompressor_methods,            /* tp_methods */ | 
 | 576 |     BZ2Decompressor_members,            /* tp_members */ | 
 | 577 |     0,                                  /* tp_getset */ | 
 | 578 |     0,                                  /* tp_base */ | 
 | 579 |     0,                                  /* tp_dict */ | 
 | 580 |     0,                                  /* tp_descr_get */ | 
 | 581 |     0,                                  /* tp_descr_set */ | 
 | 582 |     0,                                  /* tp_dictoffset */ | 
 | 583 |     (initproc)BZ2Decompressor_init,     /* tp_init */ | 
 | 584 |     0,                                  /* tp_alloc */ | 
 | 585 |     PyType_GenericNew,                  /* tp_new */ | 
 | 586 | }; | 
 | 587 |  | 
 | 588 |  | 
 | 589 | /* Module initialization. */ | 
 | 590 |  | 
 | 591 | static struct PyModuleDef _bz2module = { | 
 | 592 |     PyModuleDef_HEAD_INIT, | 
 | 593 |     "_bz2", | 
 | 594 |     NULL, | 
 | 595 |     -1, | 
 | 596 |     NULL, | 
 | 597 |     NULL, | 
 | 598 |     NULL, | 
 | 599 |     NULL, | 
 | 600 |     NULL | 
 | 601 | }; | 
 | 602 |  | 
 | 603 | PyMODINIT_FUNC | 
 | 604 | PyInit__bz2(void) | 
 | 605 | { | 
 | 606 |     PyObject *m; | 
 | 607 |  | 
 | 608 |     if (PyType_Ready(&BZ2Compressor_Type) < 0) | 
 | 609 |         return NULL; | 
 | 610 |     if (PyType_Ready(&BZ2Decompressor_Type) < 0) | 
 | 611 |         return NULL; | 
 | 612 |  | 
 | 613 |     m = PyModule_Create(&_bz2module); | 
 | 614 |     if (m == NULL) | 
 | 615 |         return NULL; | 
 | 616 |  | 
 | 617 |     Py_INCREF(&BZ2Compressor_Type); | 
 | 618 |     PyModule_AddObject(m, "BZ2Compressor", (PyObject *)&BZ2Compressor_Type); | 
 | 619 |  | 
 | 620 |     Py_INCREF(&BZ2Decompressor_Type); | 
 | 621 |     PyModule_AddObject(m, "BZ2Decompressor", | 
 | 622 |                        (PyObject *)&BZ2Decompressor_Type); | 
 | 623 |  | 
 | 624 |     return m; | 
 | 625 | } |