blob: 2e99df9d867bfac5749ad51b0c7fc5b1228a6ce2 [file] [log] [blame]
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001/*
2
3python-bz2 - python bz2 library interface
4
5Copyright (c) 2002 Gustavo Niemeyer <niemeyer@conectiva.com>
6Copyright (c) 2002 Python Software Foundation; All Rights Reserved
7
8*/
9
Martin v. Löwise17af7b2002-11-23 09:16:19 +000010#include "Python.h"
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +000011#include <stdio.h>
12#include <bzlib.h>
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +000013#include "structmember.h"
14
15#ifdef WITH_THREAD
16#include "pythread.h"
17#endif
18
19static char __author__[] =
20"The bz2 python module was written by:\n\
21\n\
22 Gustavo Niemeyer <niemeyer@conectiva.com>\n\
23";
24
Georg Brandl33a5f2a2005-08-21 14:16:04 +000025/* Our very own off_t-like type, 64-bit if possible */
26/* copied from Objects/fileobject.c */
27#if !defined(HAVE_LARGEFILE_SUPPORT)
28typedef off_t Py_off_t;
29#elif SIZEOF_OFF_T >= 8
30typedef off_t Py_off_t;
31#elif SIZEOF_FPOS_T >= 8
32typedef fpos_t Py_off_t;
33#else
34#error "Large file support, but neither off_t nor fpos_t is large enough."
35#endif
36
Christian Heimes72b710a2008-05-26 13:28:38 +000037#define BUF(v) PyBytes_AS_STRING(v)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +000038
39#define MODE_CLOSED 0
40#define MODE_READ 1
41#define MODE_READ_EOF 2
42#define MODE_WRITE 3
43
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +000044
Nadeem Vawda5f8f0d62012-10-21 21:13:27 +020045#ifndef BZ_CONFIG_ERROR
Gustavo Niemeyer7628f1f2003-04-27 06:25:24 +000046
47#define BZ2_bzRead bzRead
48#define BZ2_bzReadOpen bzReadOpen
49#define BZ2_bzReadClose bzReadClose
50#define BZ2_bzWrite bzWrite
51#define BZ2_bzWriteOpen bzWriteOpen
52#define BZ2_bzWriteClose bzWriteClose
53#define BZ2_bzCompress bzCompress
54#define BZ2_bzCompressInit bzCompressInit
55#define BZ2_bzCompressEnd bzCompressEnd
56#define BZ2_bzDecompress bzDecompress
57#define BZ2_bzDecompressInit bzDecompressInit
58#define BZ2_bzDecompressEnd bzDecompressEnd
59
Gustavo Niemeyer7628f1f2003-04-27 06:25:24 +000060#endif /* ! BZ_CONFIG_ERROR */
61
62
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +000063#ifdef WITH_THREAD
Antoine Pitrouc66363f2009-10-27 17:47:14 +000064#define ACQUIRE_LOCK(obj) do { \
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000065 if (!PyThread_acquire_lock(obj->lock, 0)) { \
66 Py_BEGIN_ALLOW_THREADS \
67 PyThread_acquire_lock(obj->lock, 1); \
68 Py_END_ALLOW_THREADS \
69 } } while(0)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +000070#define RELEASE_LOCK(obj) PyThread_release_lock(obj->lock)
71#else
72#define ACQUIRE_LOCK(obj)
73#define RELEASE_LOCK(obj)
74#endif
75
Nadeem Vawda5f8f0d62012-10-21 21:13:27 +020076#define MIN(X, Y) (((X) < (Y)) ? (X) : (Y))
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +000077
78/* ===================================================================== */
79/* Structure definitions. */
80
81typedef struct {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000082 PyObject_HEAD
83 FILE *rawfp;
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +000084
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000085 char* f_buf; /* Allocated readahead buffer */
86 char* f_bufend; /* Points after last occupied position */
87 char* f_bufptr; /* Current buffer position */
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +000088
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000089 BZFILE *fp;
90 int mode;
91 Py_off_t pos;
92 Py_off_t size;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +000093#ifdef WITH_THREAD
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000094 PyThread_type_lock lock;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +000095#endif
96} BZ2FileObject;
97
98typedef struct {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000099 PyObject_HEAD
100 bz_stream bzs;
101 int running;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000102#ifdef WITH_THREAD
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000103 PyThread_type_lock lock;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000104#endif
105} BZ2CompObject;
106
107typedef struct {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000108 PyObject_HEAD
109 bz_stream bzs;
110 int running;
111 PyObject *unused_data;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000112#ifdef WITH_THREAD
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000113 PyThread_type_lock lock;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000114#endif
115} BZ2DecompObject;
116
117/* ===================================================================== */
118/* Utility functions. */
119
Antoine Pitrou7ffa1962010-08-01 20:08:46 +0000120/* Refuse regular I/O if there's data in the iteration-buffer.
121 * Mixing them would cause data to arrive out of order, as the read*
122 * methods don't use the iteration buffer. */
123static int
124check_iterbuffered(BZ2FileObject *f)
125{
126 if (f->f_buf != NULL &&
127 (f->f_bufend - f->f_bufptr) > 0 &&
128 f->f_buf[0] != '\0') {
129 PyErr_SetString(PyExc_ValueError,
130 "Mixing iteration and read methods would lose data");
131 return -1;
132 }
133 return 0;
134}
135
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000136static int
137Util_CatchBZ2Error(int bzerror)
138{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000139 int ret = 0;
140 switch(bzerror) {
141 case BZ_OK:
142 case BZ_STREAM_END:
143 break;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000144
Gustavo Niemeyer7628f1f2003-04-27 06:25:24 +0000145#ifdef BZ_CONFIG_ERROR
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000146 case BZ_CONFIG_ERROR:
147 PyErr_SetString(PyExc_SystemError,
148 "the bz2 library was not compiled "
149 "correctly");
150 ret = 1;
151 break;
Gustavo Niemeyer7628f1f2003-04-27 06:25:24 +0000152#endif
Tim Peterse3228092002-11-09 04:21:44 +0000153
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000154 case BZ_PARAM_ERROR:
155 PyErr_SetString(PyExc_ValueError,
156 "the bz2 library has received wrong "
157 "parameters");
158 ret = 1;
159 break;
Tim Peterse3228092002-11-09 04:21:44 +0000160
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000161 case BZ_MEM_ERROR:
162 PyErr_NoMemory();
163 ret = 1;
164 break;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000165
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000166 case BZ_DATA_ERROR:
167 case BZ_DATA_ERROR_MAGIC:
168 PyErr_SetString(PyExc_IOError, "invalid data stream");
169 ret = 1;
170 break;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000171
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000172 case BZ_IO_ERROR:
173 PyErr_SetString(PyExc_IOError, "unknown IO error");
174 ret = 1;
175 break;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000176
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000177 case BZ_UNEXPECTED_EOF:
178 PyErr_SetString(PyExc_EOFError,
179 "compressed file ended before the "
180 "logical end-of-stream was detected");
181 ret = 1;
182 break;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000183
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000184 case BZ_SEQUENCE_ERROR:
185 PyErr_SetString(PyExc_RuntimeError,
186 "wrong sequence of bz2 library "
187 "commands used");
188 ret = 1;
189 break;
190 }
191 return ret;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000192}
193
194#if BUFSIZ < 8192
195#define SMALLCHUNK 8192
196#else
197#define SMALLCHUNK BUFSIZ
198#endif
199
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000200/* This is a hacked version of Python's fileobject.c:new_buffersize(). */
201static size_t
202Util_NewBufferSize(size_t currentsize)
203{
Nadeem Vawdad41a98b2011-10-13 13:34:16 +0200204 /* Expand the buffer by an amount proportional to the current size,
205 giving us amortized linear-time behavior. Use a less-than-double
206 growth factor to avoid excessive allocation. */
207 return currentsize + (currentsize >> 3) + 6;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000208}
209
Nadeem Vawda5f8f0d62012-10-21 21:13:27 +0200210static int
211Util_GrowBuffer(PyObject **buf)
212{
213 size_t size = PyBytes_GET_SIZE(*buf);
214 size_t new_size = Util_NewBufferSize(size);
215 if (new_size > size) {
216 return _PyBytes_Resize(buf, new_size);
217 } else { /* overflow */
218 PyErr_SetString(PyExc_OverflowError,
219 "Unable to allocate buffer - output too large");
220 return -1;
221 }
222}
223
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000224/* This is a hacked version of Python's fileobject.c:get_line(). */
225static PyObject *
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000226Util_GetLine(BZ2FileObject *f, int n)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000227{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000228 char c;
229 char *buf, *end;
230 size_t total_v_size; /* total # of slots in buffer */
231 size_t used_v_size; /* # used slots in buffer */
232 size_t increment; /* amount to increment the buffer */
233 PyObject *v;
234 int bzerror;
235 int bytes_read;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000236
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000237 total_v_size = n > 0 ? n : 100;
238 v = PyBytes_FromStringAndSize((char *)NULL, total_v_size);
239 if (v == NULL)
240 return NULL;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000241
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000242 buf = BUF(v);
243 end = buf + total_v_size;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000244
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000245 for (;;) {
246 Py_BEGIN_ALLOW_THREADS
247 do {
248 bytes_read = BZ2_bzRead(&bzerror, f->fp, &c, 1);
249 f->pos++;
250 if (bytes_read == 0)
251 break;
252 *buf++ = c;
253 } while (bzerror == BZ_OK && c != '\n' && buf != end);
254 Py_END_ALLOW_THREADS
255 if (bzerror == BZ_STREAM_END) {
256 f->size = f->pos;
257 f->mode = MODE_READ_EOF;
258 break;
259 } else if (bzerror != BZ_OK) {
260 Util_CatchBZ2Error(bzerror);
261 Py_DECREF(v);
262 return NULL;
263 }
264 if (c == '\n')
265 break;
266 /* Must be because buf == end */
267 if (n > 0)
268 break;
269 used_v_size = total_v_size;
270 increment = total_v_size >> 2; /* mild exponential growth */
271 total_v_size += increment;
272 if (total_v_size > INT_MAX) {
273 PyErr_SetString(PyExc_OverflowError,
274 "line is longer than a Python string can hold");
275 Py_DECREF(v);
276 return NULL;
277 }
278 if (_PyBytes_Resize(&v, total_v_size) < 0) {
279 return NULL;
280 }
281 buf = BUF(v) + used_v_size;
282 end = BUF(v) + total_v_size;
283 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000284
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000285 used_v_size = buf - BUF(v);
286 if (used_v_size != total_v_size) {
287 if (_PyBytes_Resize(&v, used_v_size) < 0) {
288 v = NULL;
289 }
290 }
291 return v;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000292}
293
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000294/* This is a hacked version of Python's fileobject.c:drop_readahead(). */
295static void
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000296Util_DropReadAhead(BZ2FileObject *f)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000297{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000298 if (f->f_buf != NULL) {
299 PyMem_Free(f->f_buf);
300 f->f_buf = NULL;
301 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000302}
303
304/* This is a hacked version of Python's fileobject.c:readahead(). */
305static int
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000306Util_ReadAhead(BZ2FileObject *f, int bufsize)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000307{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000308 int chunksize;
309 int bzerror;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000310
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000311 if (f->f_buf != NULL) {
312 if((f->f_bufend - f->f_bufptr) >= 1)
313 return 0;
314 else
315 Util_DropReadAhead(f);
316 }
317 if (f->mode == MODE_READ_EOF) {
318 f->f_bufptr = f->f_buf;
319 f->f_bufend = f->f_buf;
320 return 0;
321 }
322 if ((f->f_buf = PyMem_Malloc(bufsize)) == NULL) {
323 PyErr_NoMemory();
324 return -1;
325 }
326 Py_BEGIN_ALLOW_THREADS
327 chunksize = BZ2_bzRead(&bzerror, f->fp, f->f_buf, bufsize);
328 Py_END_ALLOW_THREADS
329 f->pos += chunksize;
330 if (bzerror == BZ_STREAM_END) {
331 f->size = f->pos;
332 f->mode = MODE_READ_EOF;
333 } else if (bzerror != BZ_OK) {
334 Util_CatchBZ2Error(bzerror);
335 Util_DropReadAhead(f);
336 return -1;
337 }
338 f->f_bufptr = f->f_buf;
339 f->f_bufend = f->f_buf + chunksize;
340 return 0;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000341}
342
343/* This is a hacked version of Python's
344 * fileobject.c:readahead_get_line_skip(). */
Christian Heimes72b710a2008-05-26 13:28:38 +0000345static PyBytesObject *
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000346Util_ReadAheadGetLineSkip(BZ2FileObject *f, int skip, int bufsize)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000347{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000348 PyBytesObject* s;
349 char *bufptr;
350 char *buf;
351 int len;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000352
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000353 if (f->f_buf == NULL)
354 if (Util_ReadAhead(f, bufsize) < 0)
355 return NULL;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000356
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000357 len = f->f_bufend - f->f_bufptr;
358 if (len == 0)
359 return (PyBytesObject *)
360 PyBytes_FromStringAndSize(NULL, skip);
361 bufptr = memchr(f->f_bufptr, '\n', len);
362 if (bufptr != NULL) {
363 bufptr++; /* Count the '\n' */
364 len = bufptr - f->f_bufptr;
365 s = (PyBytesObject *)
366 PyBytes_FromStringAndSize(NULL, skip+len);
367 if (s == NULL)
368 return NULL;
369 memcpy(PyBytes_AS_STRING(s)+skip, f->f_bufptr, len);
370 f->f_bufptr = bufptr;
371 if (bufptr == f->f_bufend)
372 Util_DropReadAhead(f);
373 } else {
374 bufptr = f->f_bufptr;
375 buf = f->f_buf;
376 f->f_buf = NULL; /* Force new readahead buffer */
377 s = Util_ReadAheadGetLineSkip(f, skip+len,
378 bufsize + (bufsize>>2));
379 if (s == NULL) {
380 PyMem_Free(buf);
381 return NULL;
382 }
383 memcpy(PyBytes_AS_STRING(s)+skip, bufptr, len);
384 PyMem_Free(buf);
385 }
386 return s;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000387}
388
389/* ===================================================================== */
390/* Methods of BZ2File. */
391
392PyDoc_STRVAR(BZ2File_read__doc__,
393"read([size]) -> string\n\
394\n\
395Read at most size uncompressed bytes, returned as a string. If the size\n\
396argument is negative or omitted, read until EOF is reached.\n\
397");
398
399/* This is a hacked version of Python's fileobject.c:file_read(). */
400static PyObject *
401BZ2File_read(BZ2FileObject *self, PyObject *args)
402{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000403 long bytesrequested = -1;
404 size_t bytesread, buffersize, chunksize;
405 int bzerror;
406 PyObject *ret = NULL;
Tim Peterse3228092002-11-09 04:21:44 +0000407
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000408 if (!PyArg_ParseTuple(args, "|l:read", &bytesrequested))
409 return NULL;
Tim Peterse3228092002-11-09 04:21:44 +0000410
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000411 ACQUIRE_LOCK(self);
412 switch (self->mode) {
413 case MODE_READ:
414 break;
415 case MODE_READ_EOF:
416 ret = PyBytes_FromStringAndSize("", 0);
417 goto cleanup;
418 case MODE_CLOSED:
419 PyErr_SetString(PyExc_ValueError,
420 "I/O operation on closed file");
421 goto cleanup;
422 default:
423 PyErr_SetString(PyExc_IOError,
424 "file is not ready for reading");
425 goto cleanup;
426 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000427
Antoine Pitrou7ffa1962010-08-01 20:08:46 +0000428 /* refuse to mix with f.next() */
429 if (check_iterbuffered(self))
430 goto cleanup;
431
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000432 if (bytesrequested < 0)
433 buffersize = Util_NewBufferSize((size_t)0);
434 else
435 buffersize = bytesrequested;
436 if (buffersize > INT_MAX) {
437 PyErr_SetString(PyExc_OverflowError,
438 "requested number of bytes is "
439 "more than a Python string can hold");
440 goto cleanup;
441 }
442 ret = PyBytes_FromStringAndSize((char *)NULL, buffersize);
443 if (ret == NULL || buffersize == 0)
444 goto cleanup;
445 bytesread = 0;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000446
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000447 for (;;) {
448 Py_BEGIN_ALLOW_THREADS
449 chunksize = BZ2_bzRead(&bzerror, self->fp,
450 BUF(ret)+bytesread,
451 buffersize-bytesread);
452 self->pos += chunksize;
453 Py_END_ALLOW_THREADS
454 bytesread += chunksize;
455 if (bzerror == BZ_STREAM_END) {
456 self->size = self->pos;
457 self->mode = MODE_READ_EOF;
458 break;
459 } else if (bzerror != BZ_OK) {
460 Util_CatchBZ2Error(bzerror);
461 Py_DECREF(ret);
462 ret = NULL;
463 goto cleanup;
464 }
465 if (bytesrequested < 0) {
466 buffersize = Util_NewBufferSize(buffersize);
467 if (_PyBytes_Resize(&ret, buffersize) < 0) {
468 ret = NULL;
469 goto cleanup;
470 }
471 } else {
472 break;
473 }
474 }
475 if (bytesread != buffersize) {
476 if (_PyBytes_Resize(&ret, bytesread) < 0) {
477 ret = NULL;
478 }
479 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000480
481cleanup:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000482 RELEASE_LOCK(self);
483 return ret;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000484}
485
486PyDoc_STRVAR(BZ2File_readline__doc__,
487"readline([size]) -> string\n\
488\n\
489Return the next line from the file, as a string, retaining newline.\n\
490A non-negative size argument will limit the maximum number of bytes to\n\
491return (an incomplete line may be returned then). Return an empty\n\
492string at EOF.\n\
493");
494
495static PyObject *
496BZ2File_readline(BZ2FileObject *self, PyObject *args)
497{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000498 PyObject *ret = NULL;
499 int sizehint = -1;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000500
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000501 if (!PyArg_ParseTuple(args, "|i:readline", &sizehint))
502 return NULL;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000503
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000504 ACQUIRE_LOCK(self);
505 switch (self->mode) {
506 case MODE_READ:
507 break;
508 case MODE_READ_EOF:
509 ret = PyBytes_FromStringAndSize("", 0);
510 goto cleanup;
511 case MODE_CLOSED:
512 PyErr_SetString(PyExc_ValueError,
513 "I/O operation on closed file");
514 goto cleanup;
515 default:
516 PyErr_SetString(PyExc_IOError,
517 "file is not ready for reading");
518 goto cleanup;
519 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000520
Antoine Pitrou7ffa1962010-08-01 20:08:46 +0000521 /* refuse to mix with f.next() */
522 if (check_iterbuffered(self))
523 goto cleanup;
524
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000525 if (sizehint == 0)
526 ret = PyBytes_FromStringAndSize("", 0);
527 else
528 ret = Util_GetLine(self, (sizehint < 0) ? 0 : sizehint);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000529
530cleanup:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000531 RELEASE_LOCK(self);
532 return ret;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000533}
534
535PyDoc_STRVAR(BZ2File_readlines__doc__,
536"readlines([size]) -> list\n\
537\n\
538Call readline() repeatedly and return a list of lines read.\n\
539The optional size argument, if given, is an approximate bound on the\n\
540total number of bytes in the lines returned.\n\
541");
542
543/* This is a hacked version of Python's fileobject.c:file_readlines(). */
544static PyObject *
545BZ2File_readlines(BZ2FileObject *self, PyObject *args)
546{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000547 long sizehint = 0;
548 PyObject *list = NULL;
549 PyObject *line;
550 char small_buffer[SMALLCHUNK];
551 char *buffer = small_buffer;
552 size_t buffersize = SMALLCHUNK;
553 PyObject *big_buffer = NULL;
554 size_t nfilled = 0;
555 size_t nread;
556 size_t totalread = 0;
557 char *p, *q, *end;
558 int err;
559 int shortread = 0;
560 int bzerror;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000561
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000562 if (!PyArg_ParseTuple(args, "|l:readlines", &sizehint))
563 return NULL;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000564
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000565 ACQUIRE_LOCK(self);
566 switch (self->mode) {
567 case MODE_READ:
568 break;
569 case MODE_READ_EOF:
570 list = PyList_New(0);
571 goto cleanup;
572 case MODE_CLOSED:
573 PyErr_SetString(PyExc_ValueError,
574 "I/O operation on closed file");
575 goto cleanup;
576 default:
577 PyErr_SetString(PyExc_IOError,
578 "file is not ready for reading");
579 goto cleanup;
580 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000581
Antoine Pitrou7ffa1962010-08-01 20:08:46 +0000582 /* refuse to mix with f.next() */
583 if (check_iterbuffered(self))
584 goto cleanup;
585
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000586 if ((list = PyList_New(0)) == NULL)
587 goto cleanup;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000588
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000589 for (;;) {
590 Py_BEGIN_ALLOW_THREADS
591 nread = BZ2_bzRead(&bzerror, self->fp,
592 buffer+nfilled, buffersize-nfilled);
593 self->pos += nread;
594 Py_END_ALLOW_THREADS
595 if (bzerror == BZ_STREAM_END) {
596 self->size = self->pos;
597 self->mode = MODE_READ_EOF;
598 if (nread == 0) {
599 sizehint = 0;
600 break;
601 }
602 shortread = 1;
603 } else if (bzerror != BZ_OK) {
604 Util_CatchBZ2Error(bzerror);
605 error:
606 Py_DECREF(list);
607 list = NULL;
608 goto cleanup;
609 }
610 totalread += nread;
611 p = memchr(buffer+nfilled, '\n', nread);
612 if (!shortread && p == NULL) {
613 /* Need a larger buffer to fit this line */
614 nfilled += nread;
615 buffersize *= 2;
616 if (buffersize > INT_MAX) {
617 PyErr_SetString(PyExc_OverflowError,
618 "line is longer than a Python string can hold");
619 goto error;
620 }
621 if (big_buffer == NULL) {
622 /* Create the big buffer */
623 big_buffer = PyBytes_FromStringAndSize(
624 NULL, buffersize);
625 if (big_buffer == NULL)
626 goto error;
627 buffer = PyBytes_AS_STRING(big_buffer);
628 memcpy(buffer, small_buffer, nfilled);
629 }
630 else {
631 /* Grow the big buffer */
632 if (_PyBytes_Resize(&big_buffer, buffersize) < 0){
633 big_buffer = NULL;
634 goto error;
635 }
636 buffer = PyBytes_AS_STRING(big_buffer);
637 }
638 continue;
639 }
640 end = buffer+nfilled+nread;
641 q = buffer;
642 while (p != NULL) {
643 /* Process complete lines */
644 p++;
645 line = PyBytes_FromStringAndSize(q, p-q);
646 if (line == NULL)
647 goto error;
648 err = PyList_Append(list, line);
649 Py_DECREF(line);
650 if (err != 0)
651 goto error;
652 q = p;
653 p = memchr(q, '\n', end-q);
654 }
655 /* Move the remaining incomplete line to the start */
656 nfilled = end-q;
657 memmove(buffer, q, nfilled);
658 if (sizehint > 0)
659 if (totalread >= (size_t)sizehint)
660 break;
661 if (shortread) {
662 sizehint = 0;
663 break;
664 }
665 }
666 if (nfilled != 0) {
667 /* Partial last line */
668 line = PyBytes_FromStringAndSize(buffer, nfilled);
669 if (line == NULL)
670 goto error;
671 if (sizehint > 0) {
672 /* Need to complete the last line */
673 PyObject *rest = Util_GetLine(self, 0);
674 if (rest == NULL) {
675 Py_DECREF(line);
676 goto error;
677 }
678 PyBytes_Concat(&line, rest);
679 Py_DECREF(rest);
680 if (line == NULL)
681 goto error;
682 }
683 err = PyList_Append(list, line);
684 Py_DECREF(line);
685 if (err != 0)
686 goto error;
687 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000688
689 cleanup:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000690 RELEASE_LOCK(self);
691 if (big_buffer) {
692 Py_DECREF(big_buffer);
693 }
694 return list;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000695}
696
697PyDoc_STRVAR(BZ2File_write__doc__,
698"write(data) -> None\n\
699\n\
700Write the 'data' string to file. Note that due to buffering, close() may\n\
701be needed before the file on disk reflects the data written.\n\
702");
703
704/* This is a hacked version of Python's fileobject.c:file_write(). */
705static PyObject *
706BZ2File_write(BZ2FileObject *self, PyObject *args)
707{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000708 PyObject *ret = NULL;
709 Py_buffer pbuf;
710 char *buf;
711 int len;
712 int bzerror;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000713
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000714 if (!PyArg_ParseTuple(args, "y*:write", &pbuf))
715 return NULL;
716 buf = pbuf.buf;
717 len = pbuf.len;
Tim Peterse3228092002-11-09 04:21:44 +0000718
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000719 ACQUIRE_LOCK(self);
720 switch (self->mode) {
721 case MODE_WRITE:
722 break;
Tim Peterse3228092002-11-09 04:21:44 +0000723
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000724 case MODE_CLOSED:
725 PyErr_SetString(PyExc_ValueError,
726 "I/O operation on closed file");
727 goto cleanup;
Tim Peterse3228092002-11-09 04:21:44 +0000728
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000729 default:
730 PyErr_SetString(PyExc_IOError,
731 "file is not ready for writing");
732 goto cleanup;
733 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000734
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000735 Py_BEGIN_ALLOW_THREADS
736 BZ2_bzWrite (&bzerror, self->fp, buf, len);
737 self->pos += len;
738 Py_END_ALLOW_THREADS
Tim Peterse3228092002-11-09 04:21:44 +0000739
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000740 if (bzerror != BZ_OK) {
741 Util_CatchBZ2Error(bzerror);
742 goto cleanup;
743 }
Tim Peterse3228092002-11-09 04:21:44 +0000744
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000745 Py_INCREF(Py_None);
746 ret = Py_None;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000747
748cleanup:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000749 PyBuffer_Release(&pbuf);
750 RELEASE_LOCK(self);
751 return ret;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000752}
753
754PyDoc_STRVAR(BZ2File_writelines__doc__,
755"writelines(sequence_of_strings) -> None\n\
756\n\
757Write the sequence of strings to the file. Note that newlines are not\n\
758added. The sequence can be any iterable object producing strings. This is\n\
759equivalent to calling write() for each string.\n\
760");
761
762/* This is a hacked version of Python's fileobject.c:file_writelines(). */
763static PyObject *
764BZ2File_writelines(BZ2FileObject *self, PyObject *seq)
765{
766#define CHUNKSIZE 1000
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000767 PyObject *list = NULL;
768 PyObject *iter = NULL;
769 PyObject *ret = NULL;
770 PyObject *line;
771 int i, j, index, len, islist;
772 int bzerror;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000773
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000774 ACQUIRE_LOCK(self);
775 switch (self->mode) {
776 case MODE_WRITE:
777 break;
Thomas Wouters00ee7ba2006-08-21 19:07:27 +0000778
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000779 case MODE_CLOSED:
780 PyErr_SetString(PyExc_ValueError,
781 "I/O operation on closed file");
782 goto error;
Thomas Wouters00ee7ba2006-08-21 19:07:27 +0000783
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000784 default:
785 PyErr_SetString(PyExc_IOError,
786 "file is not ready for writing");
787 goto error;
788 }
Thomas Wouters00ee7ba2006-08-21 19:07:27 +0000789
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000790 islist = PyList_Check(seq);
791 if (!islist) {
792 iter = PyObject_GetIter(seq);
793 if (iter == NULL) {
794 PyErr_SetString(PyExc_TypeError,
795 "writelines() requires an iterable argument");
796 goto error;
797 }
798 list = PyList_New(CHUNKSIZE);
799 if (list == NULL)
800 goto error;
801 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000802
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000803 /* Strategy: slurp CHUNKSIZE lines into a private list,
804 checking that they are all strings, then write that list
805 without holding the interpreter lock, then come back for more. */
806 for (index = 0; ; index += CHUNKSIZE) {
807 if (islist) {
808 Py_XDECREF(list);
809 list = PyList_GetSlice(seq, index, index+CHUNKSIZE);
810 if (list == NULL)
811 goto error;
812 j = PyList_GET_SIZE(list);
813 }
814 else {
815 for (j = 0; j < CHUNKSIZE; j++) {
816 line = PyIter_Next(iter);
817 if (line == NULL) {
818 if (PyErr_Occurred())
819 goto error;
820 break;
821 }
822 PyList_SetItem(list, j, line);
823 }
824 }
825 if (j == 0)
826 break;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000827
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000828 /* Check that all entries are indeed byte strings. If not,
829 apply the same rules as for file.write() and
830 convert the rets to strings. This is slow, but
831 seems to be the only way since all conversion APIs
832 could potentially execute Python code. */
833 for (i = 0; i < j; i++) {
834 PyObject *v = PyList_GET_ITEM(list, i);
835 if (!PyBytes_Check(v)) {
836 const char *buffer;
837 Py_ssize_t len;
838 if (PyObject_AsCharBuffer(v, &buffer, &len)) {
839 PyErr_SetString(PyExc_TypeError,
840 "writelines() "
841 "argument must be "
842 "a sequence of "
843 "bytes objects");
844 goto error;
845 }
846 line = PyBytes_FromStringAndSize(buffer,
847 len);
848 if (line == NULL)
849 goto error;
850 Py_DECREF(v);
851 PyList_SET_ITEM(list, i, line);
852 }
853 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000854
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000855 /* Since we are releasing the global lock, the
856 following code may *not* execute Python code. */
857 Py_BEGIN_ALLOW_THREADS
858 for (i = 0; i < j; i++) {
859 line = PyList_GET_ITEM(list, i);
860 len = PyBytes_GET_SIZE(line);
861 BZ2_bzWrite (&bzerror, self->fp,
862 PyBytes_AS_STRING(line), len);
863 if (bzerror != BZ_OK) {
864 Py_BLOCK_THREADS
865 Util_CatchBZ2Error(bzerror);
866 goto error;
867 }
868 }
869 Py_END_ALLOW_THREADS
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000870
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000871 if (j < CHUNKSIZE)
872 break;
873 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000874
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000875 Py_INCREF(Py_None);
876 ret = Py_None;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000877
878 error:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000879 RELEASE_LOCK(self);
880 Py_XDECREF(list);
881 Py_XDECREF(iter);
882 return ret;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000883#undef CHUNKSIZE
884}
885
886PyDoc_STRVAR(BZ2File_seek__doc__,
887"seek(offset [, whence]) -> None\n\
888\n\
889Move to new file position. Argument offset is a byte count. Optional\n\
890argument whence defaults to 0 (offset from start of file, offset\n\
891should be >= 0); other values are 1 (move relative to current position,\n\
892positive or negative), and 2 (move relative to end of file, usually\n\
893negative, although many platforms allow seeking beyond the end of a file).\n\
894\n\
895Note that seeking of bz2 files is emulated, and depending on the parameters\n\
896the operation may be extremely slow.\n\
897");
898
899static PyObject *
900BZ2File_seek(BZ2FileObject *self, PyObject *args)
901{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000902 int where = 0;
903 PyObject *offobj;
904 Py_off_t offset;
905 char small_buffer[SMALLCHUNK];
906 char *buffer = small_buffer;
907 size_t buffersize = SMALLCHUNK;
908 Py_off_t bytesread = 0;
909 size_t readsize;
910 int chunksize;
911 int bzerror;
912 PyObject *ret = NULL;
Tim Peterse3228092002-11-09 04:21:44 +0000913
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000914 if (!PyArg_ParseTuple(args, "O|i:seek", &offobj, &where))
915 return NULL;
Georg Brandl33a5f2a2005-08-21 14:16:04 +0000916#if !defined(HAVE_LARGEFILE_SUPPORT)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000917 offset = PyLong_AsLong(offobj);
Georg Brandl33a5f2a2005-08-21 14:16:04 +0000918#else
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000919 offset = PyLong_Check(offobj) ?
920 PyLong_AsLongLong(offobj) : PyLong_AsLong(offobj);
Georg Brandl33a5f2a2005-08-21 14:16:04 +0000921#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000922 if (PyErr_Occurred())
923 return NULL;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000924
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000925 ACQUIRE_LOCK(self);
926 Util_DropReadAhead(self);
927 switch (self->mode) {
928 case MODE_READ:
929 case MODE_READ_EOF:
930 break;
Tim Peterse3228092002-11-09 04:21:44 +0000931
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000932 case MODE_CLOSED:
933 PyErr_SetString(PyExc_ValueError,
934 "I/O operation on closed file");
935 goto cleanup;
Tim Peterse3228092002-11-09 04:21:44 +0000936
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000937 default:
938 PyErr_SetString(PyExc_IOError,
939 "seek works only while reading");
940 goto cleanup;
941 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000942
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000943 if (where == 2) {
944 if (self->size == -1) {
945 assert(self->mode != MODE_READ_EOF);
946 for (;;) {
947 Py_BEGIN_ALLOW_THREADS
948 chunksize = BZ2_bzRead(&bzerror, self->fp,
949 buffer, buffersize);
950 self->pos += chunksize;
951 Py_END_ALLOW_THREADS
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000952
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000953 bytesread += chunksize;
954 if (bzerror == BZ_STREAM_END) {
955 break;
956 } else if (bzerror != BZ_OK) {
957 Util_CatchBZ2Error(bzerror);
958 goto cleanup;
959 }
960 }
961 self->mode = MODE_READ_EOF;
962 self->size = self->pos;
963 bytesread = 0;
964 }
965 offset = self->size + offset;
966 } else if (where == 1) {
967 offset = self->pos + offset;
968 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000969
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000970 /* Before getting here, offset must be the absolute position the file
971 * pointer should be set to. */
Georg Brandl47fab922006-02-18 21:57:25 +0000972
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000973 if (offset >= self->pos) {
974 /* we can move forward */
975 offset -= self->pos;
976 } else {
977 /* we cannot move back, so rewind the stream */
978 BZ2_bzReadClose(&bzerror, self->fp);
979 if (bzerror != BZ_OK) {
980 Util_CatchBZ2Error(bzerror);
981 goto cleanup;
982 }
983 rewind(self->rawfp);
984 self->pos = 0;
985 self->fp = BZ2_bzReadOpen(&bzerror, self->rawfp,
986 0, 0, NULL, 0);
987 if (bzerror != BZ_OK) {
988 Util_CatchBZ2Error(bzerror);
989 goto cleanup;
990 }
991 self->mode = MODE_READ;
992 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000993
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000994 if (offset <= 0 || self->mode == MODE_READ_EOF)
995 goto exit;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000996
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000997 /* Before getting here, offset must be set to the number of bytes
998 * to walk forward. */
999 for (;;) {
1000 if (offset-bytesread > buffersize)
1001 readsize = buffersize;
1002 else
1003 /* offset might be wider that readsize, but the result
1004 * of the subtraction is bound by buffersize (see the
1005 * condition above). buffersize is 8192. */
1006 readsize = (size_t)(offset-bytesread);
1007 Py_BEGIN_ALLOW_THREADS
1008 chunksize = BZ2_bzRead(&bzerror, self->fp, buffer, readsize);
1009 self->pos += chunksize;
1010 Py_END_ALLOW_THREADS
1011 bytesread += chunksize;
1012 if (bzerror == BZ_STREAM_END) {
1013 self->size = self->pos;
1014 self->mode = MODE_READ_EOF;
1015 break;
1016 } else if (bzerror != BZ_OK) {
1017 Util_CatchBZ2Error(bzerror);
1018 goto cleanup;
1019 }
1020 if (bytesread == offset)
1021 break;
1022 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001023
1024exit:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001025 Py_INCREF(Py_None);
1026 ret = Py_None;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001027
1028cleanup:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001029 RELEASE_LOCK(self);
1030 return ret;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001031}
1032
1033PyDoc_STRVAR(BZ2File_tell__doc__,
1034"tell() -> int\n\
1035\n\
1036Return the current file position, an integer (may be a long integer).\n\
1037");
1038
1039static PyObject *
1040BZ2File_tell(BZ2FileObject *self, PyObject *args)
1041{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001042 PyObject *ret = NULL;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001043
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001044 if (self->mode == MODE_CLOSED) {
1045 PyErr_SetString(PyExc_ValueError,
1046 "I/O operation on closed file");
1047 goto cleanup;
1048 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001049
Georg Brandla8bcecc2005-09-03 07:49:53 +00001050#if !defined(HAVE_LARGEFILE_SUPPORT)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001051 ret = PyLong_FromLong(self->pos);
Georg Brandla8bcecc2005-09-03 07:49:53 +00001052#else
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001053 ret = PyLong_FromLongLong(self->pos);
Georg Brandla8bcecc2005-09-03 07:49:53 +00001054#endif
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001055
1056cleanup:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001057 return ret;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001058}
1059
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001060PyDoc_STRVAR(BZ2File_close__doc__,
1061"close() -> None or (perhaps) an integer\n\
1062\n\
1063Close the file. Sets data attribute .closed to true. A closed file\n\
1064cannot be used for further I/O operations. close() may be called more\n\
1065than once without error.\n\
1066");
1067
1068static PyObject *
1069BZ2File_close(BZ2FileObject *self)
1070{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001071 PyObject *ret = NULL;
1072 int bzerror = BZ_OK;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001073
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001074 if (self->mode == MODE_CLOSED) {
1075 Py_RETURN_NONE;
1076 }
Guido van Rossumf09ca142007-06-13 00:03:05 +00001077
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001078 ACQUIRE_LOCK(self);
1079 switch (self->mode) {
1080 case MODE_READ:
1081 case MODE_READ_EOF:
1082 BZ2_bzReadClose(&bzerror, self->fp);
1083 break;
1084 case MODE_WRITE:
1085 BZ2_bzWriteClose(&bzerror, self->fp,
1086 0, NULL, NULL);
1087 break;
1088 }
1089 self->mode = MODE_CLOSED;
1090 fclose(self->rawfp);
1091 self->rawfp = NULL;
1092 if (bzerror == BZ_OK) {
1093 Py_INCREF(Py_None);
1094 ret = Py_None;
1095 }
1096 else {
1097 Util_CatchBZ2Error(bzerror);
1098 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001099
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001100 RELEASE_LOCK(self);
1101 return ret;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001102}
1103
Antoine Pitrou308705e2009-01-10 16:22:51 +00001104PyDoc_STRVAR(BZ2File_enter_doc,
1105"__enter__() -> self.");
1106
1107static PyObject *
1108BZ2File_enter(BZ2FileObject *self)
1109{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001110 if (self->mode == MODE_CLOSED) {
1111 PyErr_SetString(PyExc_ValueError,
1112 "I/O operation on closed file");
1113 return NULL;
1114 }
1115 Py_INCREF(self);
1116 return (PyObject *) self;
Antoine Pitrou308705e2009-01-10 16:22:51 +00001117}
1118
1119PyDoc_STRVAR(BZ2File_exit_doc,
1120"__exit__(*excinfo) -> None. Closes the file.");
1121
1122static PyObject *
1123BZ2File_exit(BZ2FileObject *self, PyObject *args)
1124{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001125 PyObject *ret = PyObject_CallMethod((PyObject *) self, "close", NULL);
1126 if (!ret)
1127 /* If error occurred, pass through */
1128 return NULL;
1129 Py_DECREF(ret);
1130 Py_RETURN_NONE;
Antoine Pitrou308705e2009-01-10 16:22:51 +00001131}
1132
1133
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001134static PyObject *BZ2File_getiter(BZ2FileObject *self);
1135
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001136static PyMethodDef BZ2File_methods[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001137 {"read", (PyCFunction)BZ2File_read, METH_VARARGS, BZ2File_read__doc__},
1138 {"readline", (PyCFunction)BZ2File_readline, METH_VARARGS, BZ2File_readline__doc__},
1139 {"readlines", (PyCFunction)BZ2File_readlines, METH_VARARGS, BZ2File_readlines__doc__},
1140 {"write", (PyCFunction)BZ2File_write, METH_VARARGS, BZ2File_write__doc__},
1141 {"writelines", (PyCFunction)BZ2File_writelines, METH_O, BZ2File_writelines__doc__},
1142 {"seek", (PyCFunction)BZ2File_seek, METH_VARARGS, BZ2File_seek__doc__},
1143 {"tell", (PyCFunction)BZ2File_tell, METH_NOARGS, BZ2File_tell__doc__},
1144 {"close", (PyCFunction)BZ2File_close, METH_NOARGS, BZ2File_close__doc__},
1145 {"__enter__", (PyCFunction)BZ2File_enter, METH_NOARGS, BZ2File_enter_doc},
1146 {"__exit__", (PyCFunction)BZ2File_exit, METH_VARARGS, BZ2File_exit_doc},
1147 {NULL, NULL} /* sentinel */
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001148};
1149
1150
1151/* ===================================================================== */
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001152/* Getters and setters of BZ2File. */
1153
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001154static PyObject *
1155BZ2File_get_closed(BZ2FileObject *self, void *closure)
1156{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001157 return PyLong_FromLong(self->mode == MODE_CLOSED);
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001158}
1159
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001160static PyGetSetDef BZ2File_getset[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001161 {"closed", (getter)BZ2File_get_closed, NULL,
1162 "True if the file is closed"},
1163 {NULL} /* Sentinel */
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001164};
1165
1166
1167/* ===================================================================== */
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001168/* Slot definitions for BZ2File_Type. */
1169
1170static int
1171BZ2File_init(BZ2FileObject *self, PyObject *args, PyObject *kwargs)
1172{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001173 static char *kwlist[] = {"filename", "mode", "buffering",
1174 "compresslevel", 0};
1175 PyObject *name_obj = NULL;
1176 char *name;
1177 char *mode = "r";
1178 int buffering = -1;
1179 int compresslevel = 9;
1180 int bzerror;
1181 int mode_char = 0;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001182
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001183 self->size = -1;
Tim Peterse3228092002-11-09 04:21:44 +00001184
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001185 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O&|sii:BZ2File",
1186 kwlist, PyUnicode_FSConverter, &name_obj,
1187 &mode, &buffering,
1188 &compresslevel))
1189 return -1;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001190
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001191 name = PyBytes_AsString(name_obj);
1192 if (compresslevel < 1 || compresslevel > 9) {
1193 PyErr_SetString(PyExc_ValueError,
1194 "compresslevel must be between 1 and 9");
1195 Py_DECREF(name_obj);
1196 return -1;
1197 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001198
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001199 for (;;) {
1200 int error = 0;
1201 switch (*mode) {
1202 case 'r':
1203 case 'w':
1204 if (mode_char)
1205 error = 1;
1206 mode_char = *mode;
1207 break;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001208
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001209 case 'b':
1210 break;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001211
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001212 default:
1213 error = 1;
1214 break;
1215 }
1216 if (error) {
1217 PyErr_Format(PyExc_ValueError,
1218 "invalid mode char %c", *mode);
1219 Py_DECREF(name_obj);
1220 return -1;
1221 }
1222 mode++;
1223 if (*mode == '\0')
1224 break;
1225 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001226
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001227 if (mode_char == 0) {
1228 mode_char = 'r';
1229 }
Georg Brandl6b95f1d2005-06-03 19:47:00 +00001230
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001231 mode = (mode_char == 'r') ? "rb" : "wb";
Tim Peterse3228092002-11-09 04:21:44 +00001232
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001233 self->rawfp = fopen(name, mode);
1234 Py_DECREF(name_obj);
1235 if (self->rawfp == NULL) {
1236 PyErr_SetFromErrno(PyExc_IOError);
1237 return -1;
1238 }
1239 /* XXX Ignore buffering */
Gustavo Niemeyer49ea7be2002-11-08 14:31:49 +00001240
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001241 /* From now on, we have stuff to dealloc, so jump to error label
1242 * instead of returning */
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001243
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001244#ifdef WITH_THREAD
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001245 self->lock = PyThread_allocate_lock();
1246 if (!self->lock) {
1247 PyErr_SetString(PyExc_MemoryError, "unable to allocate lock");
1248 goto error;
1249 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001250#endif
1251
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001252 if (mode_char == 'r')
1253 self->fp = BZ2_bzReadOpen(&bzerror, self->rawfp,
1254 0, 0, NULL, 0);
1255 else
1256 self->fp = BZ2_bzWriteOpen(&bzerror, self->rawfp,
1257 compresslevel, 0, 0);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001258
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001259 if (bzerror != BZ_OK) {
1260 Util_CatchBZ2Error(bzerror);
1261 goto error;
1262 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001263
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001264 self->mode = (mode_char == 'r') ? MODE_READ : MODE_WRITE;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001265
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001266 return 0;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001267
1268error:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001269 fclose(self->rawfp);
1270 self->rawfp = NULL;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001271#ifdef WITH_THREAD
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001272 if (self->lock) {
1273 PyThread_free_lock(self->lock);
1274 self->lock = NULL;
1275 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001276#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001277 return -1;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001278}
1279
1280static void
1281BZ2File_dealloc(BZ2FileObject *self)
1282{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001283 int bzerror;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001284#ifdef WITH_THREAD
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001285 if (self->lock)
1286 PyThread_free_lock(self->lock);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001287#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001288 switch (self->mode) {
1289 case MODE_READ:
1290 case MODE_READ_EOF:
1291 BZ2_bzReadClose(&bzerror, self->fp);
1292 break;
1293 case MODE_WRITE:
1294 BZ2_bzWriteClose(&bzerror, self->fp,
1295 0, NULL, NULL);
1296 break;
1297 }
1298 Util_DropReadAhead(self);
1299 if (self->rawfp != NULL)
1300 fclose(self->rawfp);
1301 Py_TYPE(self)->tp_free((PyObject *)self);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001302}
1303
1304/* This is a hacked version of Python's fileobject.c:file_getiter(). */
1305static PyObject *
1306BZ2File_getiter(BZ2FileObject *self)
1307{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001308 if (self->mode == MODE_CLOSED) {
1309 PyErr_SetString(PyExc_ValueError,
1310 "I/O operation on closed file");
1311 return NULL;
1312 }
1313 Py_INCREF((PyObject*)self);
1314 return (PyObject *)self;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001315}
1316
1317/* This is a hacked version of Python's fileobject.c:file_iternext(). */
1318#define READAHEAD_BUFSIZE 8192
1319static PyObject *
1320BZ2File_iternext(BZ2FileObject *self)
1321{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001322 PyBytesObject* ret;
1323 ACQUIRE_LOCK(self);
1324 if (self->mode == MODE_CLOSED) {
1325 RELEASE_LOCK(self);
1326 PyErr_SetString(PyExc_ValueError,
1327 "I/O operation on closed file");
1328 return NULL;
1329 }
1330 ret = Util_ReadAheadGetLineSkip(self, 0, READAHEAD_BUFSIZE);
1331 RELEASE_LOCK(self);
1332 if (ret == NULL || PyBytes_GET_SIZE(ret) == 0) {
1333 Py_XDECREF(ret);
1334 return NULL;
1335 }
1336 return (PyObject *)ret;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001337}
1338
1339/* ===================================================================== */
1340/* BZ2File_Type definition. */
1341
1342PyDoc_VAR(BZ2File__doc__) =
1343PyDoc_STR(
1344"BZ2File(name [, mode='r', buffering=0, compresslevel=9]) -> file object\n\
1345\n\
1346Open a bz2 file. The mode can be 'r' or 'w', for reading (default) or\n\
1347writing. When opened for writing, the file will be created if it doesn't\n\
1348exist, and truncated otherwise. If the buffering argument is given, 0 means\n\
1349unbuffered, and larger numbers specify the buffer size. If compresslevel\n\
1350is given, must be a number between 1 and 9.\n\
Guido van Rossum88e860c2007-06-13 01:46:31 +00001351Data read is always returned in bytes; data written ought to be bytes.\n\
1352");
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001353
Gustavo Niemeyer49ea7be2002-11-08 14:31:49 +00001354static PyTypeObject BZ2File_Type = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001355 PyVarObject_HEAD_INIT(NULL, 0)
1356 "bz2.BZ2File", /*tp_name*/
1357 sizeof(BZ2FileObject), /*tp_basicsize*/
1358 0, /*tp_itemsize*/
1359 (destructor)BZ2File_dealloc, /*tp_dealloc*/
1360 0, /*tp_print*/
1361 0, /*tp_getattr*/
1362 0, /*tp_setattr*/
1363 0, /*tp_reserved*/
1364 0, /*tp_repr*/
1365 0, /*tp_as_number*/
1366 0, /*tp_as_sequence*/
1367 0, /*tp_as_mapping*/
1368 0, /*tp_hash*/
1369 0, /*tp_call*/
1370 0, /*tp_str*/
1371 PyObject_GenericGetAttr,/*tp_getattro*/
1372 PyObject_GenericSetAttr,/*tp_setattro*/
1373 0, /*tp_as_buffer*/
1374 Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE, /*tp_flags*/
1375 BZ2File__doc__, /*tp_doc*/
1376 0, /*tp_traverse*/
1377 0, /*tp_clear*/
1378 0, /*tp_richcompare*/
1379 0, /*tp_weaklistoffset*/
1380 (getiterfunc)BZ2File_getiter, /*tp_iter*/
1381 (iternextfunc)BZ2File_iternext, /*tp_iternext*/
1382 BZ2File_methods, /*tp_methods*/
1383 0, /*tp_members*/
1384 BZ2File_getset, /*tp_getset*/
1385 0, /*tp_base*/
1386 0, /*tp_dict*/
1387 0, /*tp_descr_get*/
1388 0, /*tp_descr_set*/
1389 0, /*tp_dictoffset*/
1390 (initproc)BZ2File_init, /*tp_init*/
1391 PyType_GenericAlloc, /*tp_alloc*/
1392 PyType_GenericNew, /*tp_new*/
1393 PyObject_Free, /*tp_free*/
1394 0, /*tp_is_gc*/
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001395};
1396
1397
1398/* ===================================================================== */
1399/* Methods of BZ2Comp. */
1400
1401PyDoc_STRVAR(BZ2Comp_compress__doc__,
1402"compress(data) -> string\n\
1403\n\
1404Provide more data to the compressor object. It will return chunks of\n\
1405compressed data whenever possible. When you've finished providing data\n\
1406to compress, call the flush() method to finish the compression process,\n\
1407and return what is left in the internal buffers.\n\
1408");
1409
1410static PyObject *
1411BZ2Comp_compress(BZ2CompObject *self, PyObject *args)
1412{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001413 Py_buffer pdata;
Nadeem Vawda5f8f0d62012-10-21 21:13:27 +02001414 size_t input_left;
1415 size_t output_size = 0;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001416 PyObject *ret = NULL;
1417 bz_stream *bzs = &self->bzs;
1418 int bzerror;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001419
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001420 if (!PyArg_ParseTuple(args, "y*:compress", &pdata))
1421 return NULL;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001422
Nadeem Vawda5f8f0d62012-10-21 21:13:27 +02001423 if (pdata.len == 0) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001424 PyBuffer_Release(&pdata);
1425 return PyBytes_FromStringAndSize("", 0);
1426 }
Gustavo Niemeyera6e436e2004-02-14 00:02:45 +00001427
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001428 ACQUIRE_LOCK(self);
1429 if (!self->running) {
1430 PyErr_SetString(PyExc_ValueError,
1431 "this object was already flushed");
1432 goto error;
1433 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001434
Nadeem Vawda5f8f0d62012-10-21 21:13:27 +02001435 ret = PyBytes_FromStringAndSize(NULL, SMALLCHUNK);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001436 if (!ret)
1437 goto error;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001438
Nadeem Vawda5f8f0d62012-10-21 21:13:27 +02001439 bzs->next_in = pdata.buf;
1440 bzs->avail_in = MIN(pdata.len, UINT_MAX);
1441 input_left = pdata.len - bzs->avail_in;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001442
Nadeem Vawda5f8f0d62012-10-21 21:13:27 +02001443 bzs->next_out = BUF(ret);
1444 bzs->avail_out = PyBytes_GET_SIZE(ret);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001445
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001446 for (;;) {
Nadeem Vawda5f8f0d62012-10-21 21:13:27 +02001447 char *saved_next_out;
1448
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001449 Py_BEGIN_ALLOW_THREADS
Nadeem Vawda5f8f0d62012-10-21 21:13:27 +02001450 saved_next_out = bzs->next_out;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001451 bzerror = BZ2_bzCompress(bzs, BZ_RUN);
Nadeem Vawda5f8f0d62012-10-21 21:13:27 +02001452 output_size += bzs->next_out - saved_next_out;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001453 Py_END_ALLOW_THREADS
Nadeem Vawda5f8f0d62012-10-21 21:13:27 +02001454
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001455 if (bzerror != BZ_RUN_OK) {
1456 Util_CatchBZ2Error(bzerror);
1457 goto error;
1458 }
Nadeem Vawda5f8f0d62012-10-21 21:13:27 +02001459 if (bzs->avail_in == 0) {
1460 if (input_left == 0)
1461 break; /* no more input data */
1462 bzs->avail_in = MIN(input_left, UINT_MAX);
1463 input_left -= bzs->avail_in;
1464 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001465 if (bzs->avail_out == 0) {
Nadeem Vawda5f8f0d62012-10-21 21:13:27 +02001466 size_t buffer_left = PyBytes_GET_SIZE(ret) - output_size;
1467 if (buffer_left == 0) {
1468 if (Util_GrowBuffer(&ret) < 0) {
1469 BZ2_bzCompressEnd(bzs);
1470 goto error;
1471 }
1472 bzs->next_out = BUF(ret) + output_size;
1473 buffer_left = PyBytes_GET_SIZE(ret) - output_size;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001474 }
Nadeem Vawda5f8f0d62012-10-21 21:13:27 +02001475 bzs->avail_out = MIN(buffer_left, UINT_MAX);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001476 }
1477 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001478
Nadeem Vawda5f8f0d62012-10-21 21:13:27 +02001479 if (_PyBytes_Resize(&ret, output_size) < 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001480 goto error;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001481
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001482 RELEASE_LOCK(self);
1483 PyBuffer_Release(&pdata);
1484 return ret;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001485
1486error:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001487 RELEASE_LOCK(self);
1488 PyBuffer_Release(&pdata);
1489 Py_XDECREF(ret);
1490 return NULL;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001491}
1492
1493PyDoc_STRVAR(BZ2Comp_flush__doc__,
1494"flush() -> string\n\
1495\n\
1496Finish the compression process and return what is left in internal buffers.\n\
1497You must not use the compressor object after calling this method.\n\
1498");
1499
1500static PyObject *
1501BZ2Comp_flush(BZ2CompObject *self)
1502{
Nadeem Vawda5f8f0d62012-10-21 21:13:27 +02001503 size_t output_size = 0;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001504 PyObject *ret = NULL;
1505 bz_stream *bzs = &self->bzs;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001506 int bzerror;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001507
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001508 ACQUIRE_LOCK(self);
1509 if (!self->running) {
Nadeem Vawda5f8f0d62012-10-21 21:13:27 +02001510 PyErr_SetString(PyExc_ValueError, "object was already flushed");
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001511 goto error;
1512 }
1513 self->running = 0;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001514
Nadeem Vawda5f8f0d62012-10-21 21:13:27 +02001515 ret = PyBytes_FromStringAndSize(NULL, SMALLCHUNK);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001516 if (!ret)
1517 goto error;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001518
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001519 bzs->next_out = BUF(ret);
Nadeem Vawda5f8f0d62012-10-21 21:13:27 +02001520 bzs->avail_out = PyBytes_GET_SIZE(ret);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001521
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001522 for (;;) {
Nadeem Vawda5f8f0d62012-10-21 21:13:27 +02001523 char *saved_next_out;
1524
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001525 Py_BEGIN_ALLOW_THREADS
Nadeem Vawda5f8f0d62012-10-21 21:13:27 +02001526 saved_next_out = bzs->next_out;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001527 bzerror = BZ2_bzCompress(bzs, BZ_FINISH);
Nadeem Vawda5f8f0d62012-10-21 21:13:27 +02001528 output_size += bzs->next_out - saved_next_out;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001529 Py_END_ALLOW_THREADS
Nadeem Vawda5f8f0d62012-10-21 21:13:27 +02001530
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001531 if (bzerror == BZ_STREAM_END) {
1532 break;
1533 } else if (bzerror != BZ_FINISH_OK) {
1534 Util_CatchBZ2Error(bzerror);
1535 goto error;
1536 }
1537 if (bzs->avail_out == 0) {
Nadeem Vawda5f8f0d62012-10-21 21:13:27 +02001538 size_t buffer_left = PyBytes_GET_SIZE(ret) - output_size;
1539 if (buffer_left == 0) {
1540 if (Util_GrowBuffer(&ret) < 0)
1541 goto error;
1542 bzs->next_out = BUF(ret) + output_size;
1543 buffer_left = PyBytes_GET_SIZE(ret) - output_size;
1544 }
1545 bzs->avail_out = MIN(buffer_left, UINT_MAX);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001546 }
1547 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001548
Nadeem Vawda5f8f0d62012-10-21 21:13:27 +02001549 if (output_size != PyBytes_GET_SIZE(ret))
1550 if (_PyBytes_Resize(&ret, output_size) < 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001551 goto error;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001552
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001553 RELEASE_LOCK(self);
1554 return ret;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001555
1556error:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001557 RELEASE_LOCK(self);
1558 Py_XDECREF(ret);
1559 return NULL;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001560}
1561
1562static PyMethodDef BZ2Comp_methods[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001563 {"compress", (PyCFunction)BZ2Comp_compress, METH_VARARGS,
1564 BZ2Comp_compress__doc__},
1565 {"flush", (PyCFunction)BZ2Comp_flush, METH_NOARGS,
1566 BZ2Comp_flush__doc__},
1567 {NULL, NULL} /* sentinel */
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001568};
1569
1570
1571/* ===================================================================== */
1572/* Slot definitions for BZ2Comp_Type. */
1573
1574static int
1575BZ2Comp_init(BZ2CompObject *self, PyObject *args, PyObject *kwargs)
1576{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001577 int compresslevel = 9;
1578 int bzerror;
1579 static char *kwlist[] = {"compresslevel", 0};
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001580
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001581 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|i:BZ2Compressor",
1582 kwlist, &compresslevel))
1583 return -1;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001584
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001585 if (compresslevel < 1 || compresslevel > 9) {
1586 PyErr_SetString(PyExc_ValueError,
1587 "compresslevel must be between 1 and 9");
1588 goto error;
1589 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001590
1591#ifdef WITH_THREAD
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001592 self->lock = PyThread_allocate_lock();
1593 if (!self->lock) {
1594 PyErr_SetString(PyExc_MemoryError, "unable to allocate lock");
1595 goto error;
1596 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001597#endif
1598
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001599 memset(&self->bzs, 0, sizeof(bz_stream));
1600 bzerror = BZ2_bzCompressInit(&self->bzs, compresslevel, 0, 0);
1601 if (bzerror != BZ_OK) {
1602 Util_CatchBZ2Error(bzerror);
1603 goto error;
1604 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001605
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001606 self->running = 1;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001607
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001608 return 0;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001609error:
1610#ifdef WITH_THREAD
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001611 if (self->lock) {
1612 PyThread_free_lock(self->lock);
1613 self->lock = NULL;
1614 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001615#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001616 return -1;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001617}
1618
1619static void
1620BZ2Comp_dealloc(BZ2CompObject *self)
1621{
1622#ifdef WITH_THREAD
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001623 if (self->lock)
1624 PyThread_free_lock(self->lock);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001625#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001626 BZ2_bzCompressEnd(&self->bzs);
1627 Py_TYPE(self)->tp_free((PyObject *)self);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001628}
1629
1630
1631/* ===================================================================== */
1632/* BZ2Comp_Type definition. */
1633
1634PyDoc_STRVAR(BZ2Comp__doc__,
1635"BZ2Compressor([compresslevel=9]) -> compressor object\n\
1636\n\
1637Create a new compressor object. This object may be used to compress\n\
1638data sequentially. If you want to compress data in one shot, use the\n\
1639compress() function instead. The compresslevel parameter, if given,\n\
1640must be a number between 1 and 9.\n\
1641");
1642
Gustavo Niemeyer49ea7be2002-11-08 14:31:49 +00001643static PyTypeObject BZ2Comp_Type = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001644 PyVarObject_HEAD_INIT(NULL, 0)
1645 "bz2.BZ2Compressor", /*tp_name*/
1646 sizeof(BZ2CompObject), /*tp_basicsize*/
1647 0, /*tp_itemsize*/
1648 (destructor)BZ2Comp_dealloc, /*tp_dealloc*/
1649 0, /*tp_print*/
1650 0, /*tp_getattr*/
1651 0, /*tp_setattr*/
1652 0, /*tp_reserved*/
1653 0, /*tp_repr*/
1654 0, /*tp_as_number*/
1655 0, /*tp_as_sequence*/
1656 0, /*tp_as_mapping*/
1657 0, /*tp_hash*/
1658 0, /*tp_call*/
1659 0, /*tp_str*/
1660 PyObject_GenericGetAttr,/*tp_getattro*/
1661 PyObject_GenericSetAttr,/*tp_setattro*/
1662 0, /*tp_as_buffer*/
1663 Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE, /*tp_flags*/
1664 BZ2Comp__doc__, /*tp_doc*/
1665 0, /*tp_traverse*/
1666 0, /*tp_clear*/
1667 0, /*tp_richcompare*/
1668 0, /*tp_weaklistoffset*/
1669 0, /*tp_iter*/
1670 0, /*tp_iternext*/
1671 BZ2Comp_methods, /*tp_methods*/
1672 0, /*tp_members*/
1673 0, /*tp_getset*/
1674 0, /*tp_base*/
1675 0, /*tp_dict*/
1676 0, /*tp_descr_get*/
1677 0, /*tp_descr_set*/
1678 0, /*tp_dictoffset*/
1679 (initproc)BZ2Comp_init, /*tp_init*/
1680 PyType_GenericAlloc, /*tp_alloc*/
1681 PyType_GenericNew, /*tp_new*/
1682 PyObject_Free, /*tp_free*/
1683 0, /*tp_is_gc*/
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001684};
1685
1686
1687/* ===================================================================== */
1688/* Members of BZ2Decomp. */
1689
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001690#undef OFF
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001691#define OFF(x) offsetof(BZ2DecompObject, x)
1692
1693static PyMemberDef BZ2Decomp_members[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001694 {"unused_data", T_OBJECT, OFF(unused_data), READONLY},
1695 {NULL} /* Sentinel */
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001696};
1697
1698
1699/* ===================================================================== */
1700/* Methods of BZ2Decomp. */
1701
1702PyDoc_STRVAR(BZ2Decomp_decompress__doc__,
1703"decompress(data) -> string\n\
1704\n\
1705Provide more data to the decompressor object. It will return chunks\n\
1706of decompressed data whenever possible. If you try to decompress data\n\
1707after the end of stream is found, EOFError will be raised. If any data\n\
1708was found after the end of stream, it'll be ignored and saved in\n\
1709unused_data attribute.\n\
1710");
1711
1712static PyObject *
1713BZ2Decomp_decompress(BZ2DecompObject *self, PyObject *args)
1714{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001715 Py_buffer pdata;
Nadeem Vawda5f8f0d62012-10-21 21:13:27 +02001716 size_t input_left;
1717 size_t output_size = 0;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001718 PyObject *ret = NULL;
1719 bz_stream *bzs = &self->bzs;
1720 int bzerror;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001721
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001722 if (!PyArg_ParseTuple(args, "y*:decompress", &pdata))
1723 return NULL;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001724
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001725 ACQUIRE_LOCK(self);
1726 if (!self->running) {
1727 PyErr_SetString(PyExc_EOFError, "end of stream was "
1728 "already found");
1729 goto error;
1730 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001731
Nadeem Vawda5f8f0d62012-10-21 21:13:27 +02001732 ret = PyBytes_FromStringAndSize(NULL, SMALLCHUNK);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001733 if (!ret)
1734 goto error;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001735
Nadeem Vawda5f8f0d62012-10-21 21:13:27 +02001736 bzs->next_in = pdata.buf;
1737 bzs->avail_in = MIN(pdata.len, UINT_MAX);
1738 input_left = pdata.len - bzs->avail_in;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001739
Nadeem Vawda5f8f0d62012-10-21 21:13:27 +02001740 bzs->next_out = BUF(ret);
1741 bzs->avail_out = PyBytes_GET_SIZE(ret);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001742
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001743 for (;;) {
Nadeem Vawda5f8f0d62012-10-21 21:13:27 +02001744 char *saved_next_out;
1745
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001746 Py_BEGIN_ALLOW_THREADS
Nadeem Vawda5f8f0d62012-10-21 21:13:27 +02001747 saved_next_out = bzs->next_out;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001748 bzerror = BZ2_bzDecompress(bzs);
Nadeem Vawda5f8f0d62012-10-21 21:13:27 +02001749 output_size += bzs->next_out - saved_next_out;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001750 Py_END_ALLOW_THREADS
Nadeem Vawda5f8f0d62012-10-21 21:13:27 +02001751
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001752 if (bzerror == BZ_STREAM_END) {
Nadeem Vawda5f8f0d62012-10-21 21:13:27 +02001753 self->running = 0;
1754 input_left += bzs->avail_in;
1755 if (input_left != 0) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001756 Py_DECREF(self->unused_data);
1757 self->unused_data =
Nadeem Vawda5f8f0d62012-10-21 21:13:27 +02001758 PyBytes_FromStringAndSize(bzs->next_in, input_left);
1759 if (self->unused_data == NULL)
1760 goto error;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001761 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001762 break;
1763 }
1764 if (bzerror != BZ_OK) {
1765 Util_CatchBZ2Error(bzerror);
1766 goto error;
1767 }
Nadeem Vawda5f8f0d62012-10-21 21:13:27 +02001768 if (bzs->avail_in == 0) {
1769 if (input_left == 0)
1770 break; /* no more input data */
1771 bzs->avail_in = MIN(input_left, UINT_MAX);
1772 input_left -= bzs->avail_in;
1773 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001774 if (bzs->avail_out == 0) {
Nadeem Vawda5f8f0d62012-10-21 21:13:27 +02001775 size_t buffer_left = PyBytes_GET_SIZE(ret) - output_size;
1776 if (buffer_left == 0) {
1777 if (Util_GrowBuffer(&ret) < 0) {
1778 BZ2_bzDecompressEnd(bzs);
1779 goto error;
1780 }
1781 bzs->next_out = BUF(ret) + output_size;
1782 buffer_left = PyBytes_GET_SIZE(ret) - output_size;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001783 }
Nadeem Vawda5f8f0d62012-10-21 21:13:27 +02001784 bzs->avail_out = MIN(buffer_left, UINT_MAX);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001785 }
1786 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001787
Nadeem Vawda5f8f0d62012-10-21 21:13:27 +02001788 if (output_size != PyBytes_GET_SIZE(ret))
1789 if (_PyBytes_Resize(&ret, output_size) < 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001790 goto error;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001791
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001792 RELEASE_LOCK(self);
1793 PyBuffer_Release(&pdata);
1794 return ret;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001795
1796error:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001797 RELEASE_LOCK(self);
1798 PyBuffer_Release(&pdata);
1799 Py_XDECREF(ret);
1800 return NULL;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001801}
1802
1803static PyMethodDef BZ2Decomp_methods[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001804 {"decompress", (PyCFunction)BZ2Decomp_decompress, METH_VARARGS, BZ2Decomp_decompress__doc__},
1805 {NULL, NULL} /* sentinel */
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001806};
1807
1808
1809/* ===================================================================== */
1810/* Slot definitions for BZ2Decomp_Type. */
1811
1812static int
1813BZ2Decomp_init(BZ2DecompObject *self, PyObject *args, PyObject *kwargs)
1814{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001815 int bzerror;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001816
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001817 if (!PyArg_ParseTuple(args, ":BZ2Decompressor"))
1818 return -1;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001819
1820#ifdef WITH_THREAD
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001821 self->lock = PyThread_allocate_lock();
1822 if (!self->lock) {
1823 PyErr_SetString(PyExc_MemoryError, "unable to allocate lock");
1824 goto error;
1825 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001826#endif
1827
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001828 self->unused_data = PyBytes_FromStringAndSize("", 0);
1829 if (!self->unused_data)
1830 goto error;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001831
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001832 memset(&self->bzs, 0, sizeof(bz_stream));
1833 bzerror = BZ2_bzDecompressInit(&self->bzs, 0, 0);
1834 if (bzerror != BZ_OK) {
1835 Util_CatchBZ2Error(bzerror);
1836 goto error;
1837 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001838
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001839 self->running = 1;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001840
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001841 return 0;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001842
1843error:
1844#ifdef WITH_THREAD
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001845 if (self->lock) {
1846 PyThread_free_lock(self->lock);
1847 self->lock = NULL;
1848 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001849#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001850 Py_CLEAR(self->unused_data);
1851 return -1;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001852}
1853
1854static void
1855BZ2Decomp_dealloc(BZ2DecompObject *self)
1856{
1857#ifdef WITH_THREAD
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001858 if (self->lock)
1859 PyThread_free_lock(self->lock);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001860#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001861 Py_XDECREF(self->unused_data);
1862 BZ2_bzDecompressEnd(&self->bzs);
1863 Py_TYPE(self)->tp_free((PyObject *)self);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001864}
1865
1866
1867/* ===================================================================== */
1868/* BZ2Decomp_Type definition. */
1869
1870PyDoc_STRVAR(BZ2Decomp__doc__,
1871"BZ2Decompressor() -> decompressor object\n\
1872\n\
1873Create a new decompressor object. This object may be used to decompress\n\
1874data sequentially. If you want to decompress data in one shot, use the\n\
1875decompress() function instead.\n\
1876");
1877
Gustavo Niemeyer49ea7be2002-11-08 14:31:49 +00001878static PyTypeObject BZ2Decomp_Type = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001879 PyVarObject_HEAD_INIT(NULL, 0)
1880 "bz2.BZ2Decompressor", /*tp_name*/
1881 sizeof(BZ2DecompObject), /*tp_basicsize*/
1882 0, /*tp_itemsize*/
1883 (destructor)BZ2Decomp_dealloc, /*tp_dealloc*/
1884 0, /*tp_print*/
1885 0, /*tp_getattr*/
1886 0, /*tp_setattr*/
1887 0, /*tp_reserved*/
1888 0, /*tp_repr*/
1889 0, /*tp_as_number*/
1890 0, /*tp_as_sequence*/
1891 0, /*tp_as_mapping*/
1892 0, /*tp_hash*/
1893 0, /*tp_call*/
1894 0, /*tp_str*/
1895 PyObject_GenericGetAttr,/*tp_getattro*/
1896 PyObject_GenericSetAttr,/*tp_setattro*/
1897 0, /*tp_as_buffer*/
1898 Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE, /*tp_flags*/
1899 BZ2Decomp__doc__, /*tp_doc*/
1900 0, /*tp_traverse*/
1901 0, /*tp_clear*/
1902 0, /*tp_richcompare*/
1903 0, /*tp_weaklistoffset*/
1904 0, /*tp_iter*/
1905 0, /*tp_iternext*/
1906 BZ2Decomp_methods, /*tp_methods*/
1907 BZ2Decomp_members, /*tp_members*/
1908 0, /*tp_getset*/
1909 0, /*tp_base*/
1910 0, /*tp_dict*/
1911 0, /*tp_descr_get*/
1912 0, /*tp_descr_set*/
1913 0, /*tp_dictoffset*/
1914 (initproc)BZ2Decomp_init, /*tp_init*/
1915 PyType_GenericAlloc, /*tp_alloc*/
1916 PyType_GenericNew, /*tp_new*/
1917 PyObject_Free, /*tp_free*/
1918 0, /*tp_is_gc*/
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001919};
1920
1921
1922/* ===================================================================== */
1923/* Module functions. */
1924
1925PyDoc_STRVAR(bz2_compress__doc__,
1926"compress(data [, compresslevel=9]) -> string\n\
1927\n\
1928Compress data in one shot. If you want to compress data sequentially,\n\
1929use an instance of BZ2Compressor instead. The compresslevel parameter, if\n\
1930given, must be a number between 1 and 9.\n\
1931");
1932
1933static PyObject *
1934bz2_compress(PyObject *self, PyObject *args, PyObject *kwargs)
1935{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001936 int compresslevel=9;
Nadeem Vawda5f8f0d62012-10-21 21:13:27 +02001937 int action;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001938 Py_buffer pdata;
Nadeem Vawda5f8f0d62012-10-21 21:13:27 +02001939 size_t input_left;
1940 size_t output_size = 0;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001941 PyObject *ret = NULL;
1942 bz_stream _bzs;
1943 bz_stream *bzs = &_bzs;
1944 int bzerror;
1945 static char *kwlist[] = {"data", "compresslevel", 0};
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001946
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001947 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "y*|i",
1948 kwlist, &pdata,
1949 &compresslevel))
1950 return NULL;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001951
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001952 if (compresslevel < 1 || compresslevel > 9) {
1953 PyErr_SetString(PyExc_ValueError,
1954 "compresslevel must be between 1 and 9");
1955 PyBuffer_Release(&pdata);
1956 return NULL;
1957 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001958
Nadeem Vawda5f8f0d62012-10-21 21:13:27 +02001959 ret = PyBytes_FromStringAndSize(NULL, SMALLCHUNK);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001960 if (!ret) {
1961 PyBuffer_Release(&pdata);
1962 return NULL;
1963 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001964
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001965 memset(bzs, 0, sizeof(bz_stream));
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001966
Nadeem Vawda5f8f0d62012-10-21 21:13:27 +02001967 bzs->next_in = pdata.buf;
1968 bzs->avail_in = MIN(pdata.len, UINT_MAX);
1969 input_left = pdata.len - bzs->avail_in;
1970
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001971 bzs->next_out = BUF(ret);
Nadeem Vawda5f8f0d62012-10-21 21:13:27 +02001972 bzs->avail_out = PyBytes_GET_SIZE(ret);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001973
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001974 bzerror = BZ2_bzCompressInit(bzs, compresslevel, 0, 0);
1975 if (bzerror != BZ_OK) {
1976 Util_CatchBZ2Error(bzerror);
1977 PyBuffer_Release(&pdata);
1978 Py_DECREF(ret);
1979 return NULL;
1980 }
Tim Peterse3228092002-11-09 04:21:44 +00001981
Nadeem Vawda638fb9b2013-01-02 23:02:00 +01001982 action = input_left > 0 ? BZ_RUN : BZ_FINISH;
Nadeem Vawda5f8f0d62012-10-21 21:13:27 +02001983
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001984 for (;;) {
Nadeem Vawda5f8f0d62012-10-21 21:13:27 +02001985 char *saved_next_out;
1986
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001987 Py_BEGIN_ALLOW_THREADS
Nadeem Vawda5f8f0d62012-10-21 21:13:27 +02001988 saved_next_out = bzs->next_out;
1989 bzerror = BZ2_bzCompress(bzs, action);
1990 output_size += bzs->next_out - saved_next_out;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001991 Py_END_ALLOW_THREADS
Nadeem Vawda5f8f0d62012-10-21 21:13:27 +02001992
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001993 if (bzerror == BZ_STREAM_END) {
1994 break;
Nadeem Vawda5f8f0d62012-10-21 21:13:27 +02001995 } else if (bzerror != BZ_RUN_OK && bzerror != BZ_FINISH_OK) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001996 BZ2_bzCompressEnd(bzs);
1997 Util_CatchBZ2Error(bzerror);
1998 PyBuffer_Release(&pdata);
1999 Py_DECREF(ret);
2000 return NULL;
2001 }
Nadeem Vawda5f8f0d62012-10-21 21:13:27 +02002002 if (action == BZ_RUN && bzs->avail_in == 0) {
2003 if (input_left == 0) {
2004 action = BZ_FINISH;
2005 } else {
2006 bzs->avail_in = MIN(input_left, UINT_MAX);
2007 input_left -= bzs->avail_in;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002008 }
Nadeem Vawda5f8f0d62012-10-21 21:13:27 +02002009 }
2010 if (bzs->avail_out == 0) {
2011 size_t buffer_left = PyBytes_GET_SIZE(ret) - output_size;
2012 if (buffer_left == 0) {
2013 if (Util_GrowBuffer(&ret) < 0) {
2014 BZ2_bzCompressEnd(bzs);
2015 PyBuffer_Release(&pdata);
2016 return NULL;
2017 }
2018 bzs->next_out = BUF(ret) + output_size;
2019 buffer_left = PyBytes_GET_SIZE(ret) - output_size;
2020 }
2021 bzs->avail_out = MIN(buffer_left, UINT_MAX);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002022 }
2023 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002024
Nadeem Vawda5f8f0d62012-10-21 21:13:27 +02002025 if (output_size != PyBytes_GET_SIZE(ret))
2026 _PyBytes_Resize(&ret, output_size); /* Sets ret to NULL on failure. */
2027
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002028 BZ2_bzCompressEnd(bzs);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002029 PyBuffer_Release(&pdata);
2030 return ret;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002031}
2032
2033PyDoc_STRVAR(bz2_decompress__doc__,
2034"decompress(data) -> decompressed data\n\
2035\n\
2036Decompress data in one shot. If you want to decompress data sequentially,\n\
2037use an instance of BZ2Decompressor instead.\n\
2038");
2039
2040static PyObject *
2041bz2_decompress(PyObject *self, PyObject *args)
2042{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002043 Py_buffer pdata;
Nadeem Vawda5f8f0d62012-10-21 21:13:27 +02002044 size_t input_left;
2045 size_t output_size = 0;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002046 PyObject *ret;
2047 bz_stream _bzs;
2048 bz_stream *bzs = &_bzs;
2049 int bzerror;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002050
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002051 if (!PyArg_ParseTuple(args, "y*:decompress", &pdata))
2052 return NULL;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002053
Nadeem Vawda5f8f0d62012-10-21 21:13:27 +02002054 if (pdata.len == 0) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002055 PyBuffer_Release(&pdata);
2056 return PyBytes_FromStringAndSize("", 0);
2057 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002058
Nadeem Vawda5f8f0d62012-10-21 21:13:27 +02002059 ret = PyBytes_FromStringAndSize(NULL, SMALLCHUNK);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002060 if (!ret) {
2061 PyBuffer_Release(&pdata);
2062 return NULL;
2063 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002064
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002065 memset(bzs, 0, sizeof(bz_stream));
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002066
Nadeem Vawda5f8f0d62012-10-21 21:13:27 +02002067 bzs->next_in = pdata.buf;
2068 bzs->avail_in = MIN(pdata.len, UINT_MAX);
2069 input_left = pdata.len - bzs->avail_in;
2070
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002071 bzs->next_out = BUF(ret);
Nadeem Vawda5f8f0d62012-10-21 21:13:27 +02002072 bzs->avail_out = PyBytes_GET_SIZE(ret);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002073
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002074 bzerror = BZ2_bzDecompressInit(bzs, 0, 0);
2075 if (bzerror != BZ_OK) {
2076 Util_CatchBZ2Error(bzerror);
2077 Py_DECREF(ret);
2078 PyBuffer_Release(&pdata);
2079 return NULL;
2080 }
Tim Peterse3228092002-11-09 04:21:44 +00002081
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002082 for (;;) {
Nadeem Vawda5f8f0d62012-10-21 21:13:27 +02002083 char *saved_next_out;
2084
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002085 Py_BEGIN_ALLOW_THREADS
Nadeem Vawda5f8f0d62012-10-21 21:13:27 +02002086 saved_next_out = bzs->next_out;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002087 bzerror = BZ2_bzDecompress(bzs);
Nadeem Vawda5f8f0d62012-10-21 21:13:27 +02002088 output_size += bzs->next_out - saved_next_out;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002089 Py_END_ALLOW_THREADS
Nadeem Vawda5f8f0d62012-10-21 21:13:27 +02002090
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002091 if (bzerror == BZ_STREAM_END) {
2092 break;
2093 } else if (bzerror != BZ_OK) {
2094 BZ2_bzDecompressEnd(bzs);
2095 Util_CatchBZ2Error(bzerror);
2096 PyBuffer_Release(&pdata);
2097 Py_DECREF(ret);
2098 return NULL;
2099 }
2100 if (bzs->avail_in == 0) {
Nadeem Vawda5f8f0d62012-10-21 21:13:27 +02002101 if (input_left == 0) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002102 BZ2_bzDecompressEnd(bzs);
Nadeem Vawda5f8f0d62012-10-21 21:13:27 +02002103 PyErr_SetString(PyExc_ValueError,
2104 "couldn't find end of stream");
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002105 PyBuffer_Release(&pdata);
Nadeem Vawda5f8f0d62012-10-21 21:13:27 +02002106 Py_DECREF(ret);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002107 return NULL;
2108 }
Nadeem Vawda5f8f0d62012-10-21 21:13:27 +02002109 bzs->avail_in = MIN(input_left, UINT_MAX);
2110 input_left -= bzs->avail_in;
2111 }
2112 if (bzs->avail_out == 0) {
2113 size_t buffer_left = PyBytes_GET_SIZE(ret) - output_size;
2114 if (buffer_left == 0) {
2115 if (Util_GrowBuffer(&ret) < 0) {
2116 BZ2_bzDecompressEnd(bzs);
2117 PyBuffer_Release(&pdata);
2118 return NULL;
2119 }
2120 bzs->next_out = BUF(ret) + output_size;
2121 buffer_left = PyBytes_GET_SIZE(ret) - output_size;
2122 }
2123 bzs->avail_out = MIN(buffer_left, UINT_MAX);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002124 }
2125 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002126
Nadeem Vawda5f8f0d62012-10-21 21:13:27 +02002127 if (output_size != PyBytes_GET_SIZE(ret))
2128 _PyBytes_Resize(&ret, output_size); /* Sets ret to NULL on failure. */
2129
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002130 BZ2_bzDecompressEnd(bzs);
2131 PyBuffer_Release(&pdata);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002132 return ret;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002133}
2134
2135static PyMethodDef bz2_methods[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002136 {"compress", (PyCFunction) bz2_compress, METH_VARARGS|METH_KEYWORDS,
2137 bz2_compress__doc__},
2138 {"decompress", (PyCFunction) bz2_decompress, METH_VARARGS,
2139 bz2_decompress__doc__},
2140 {NULL, NULL} /* sentinel */
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002141};
2142
2143/* ===================================================================== */
2144/* Initialization function. */
2145
2146PyDoc_STRVAR(bz2__doc__,
2147"The python bz2 module provides a comprehensive interface for\n\
2148the bz2 compression library. It implements a complete file\n\
2149interface, one shot (de)compression functions, and types for\n\
2150sequential (de)compression.\n\
2151");
2152
Martin v. Löwis1a214512008-06-11 05:26:20 +00002153
2154static struct PyModuleDef bz2module = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002155 PyModuleDef_HEAD_INIT,
2156 "bz2",
2157 bz2__doc__,
2158 -1,
2159 bz2_methods,
2160 NULL,
2161 NULL,
2162 NULL,
2163 NULL
Martin v. Löwis1a214512008-06-11 05:26:20 +00002164};
2165
Neal Norwitz21d896c2003-07-01 20:15:21 +00002166PyMODINIT_FUNC
Martin v. Löwis1a214512008-06-11 05:26:20 +00002167PyInit_bz2(void)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002168{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002169 PyObject *m;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002170
Antoine Pitrou70c60442010-09-23 19:51:39 +00002171 if (PyType_Ready(&BZ2File_Type) < 0)
2172 return NULL;
2173 if (PyType_Ready(&BZ2Comp_Type) < 0)
2174 return NULL;
2175 if (PyType_Ready(&BZ2Decomp_Type) < 0)
2176 return NULL;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002177
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002178 m = PyModule_Create(&bz2module);
2179 if (m == NULL)
2180 return NULL;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002181
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002182 PyModule_AddObject(m, "__author__", PyUnicode_FromString(__author__));
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002183
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002184 Py_INCREF(&BZ2File_Type);
2185 PyModule_AddObject(m, "BZ2File", (PyObject *)&BZ2File_Type);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002186
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002187 Py_INCREF(&BZ2Comp_Type);
2188 PyModule_AddObject(m, "BZ2Compressor", (PyObject *)&BZ2Comp_Type);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002189
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002190 Py_INCREF(&BZ2Decomp_Type);
2191 PyModule_AddObject(m, "BZ2Decompressor", (PyObject *)&BZ2Decomp_Type);
2192 return m;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002193}