blob: 550f1cf9999102252e3c88e83ff2cfe6a98607a1 [file] [log] [blame]
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001/*
2
3python-bz2 - python bz2 library interface
4
5Copyright (c) 2002 Gustavo Niemeyer <niemeyer@conectiva.com>
6Copyright (c) 2002 Python Software Foundation; All Rights Reserved
7
8*/
9
Martin v. Löwise17af7b2002-11-23 09:16:19 +000010#include "Python.h"
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +000011#include <stdio.h>
12#include <bzlib.h>
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +000013#include "structmember.h"
14
15#ifdef WITH_THREAD
16#include "pythread.h"
17#endif
18
19static char __author__[] =
20"The bz2 python module was written by:\n\
21\n\
22 Gustavo Niemeyer <niemeyer@conectiva.com>\n\
23";
24
Georg Brandl33a5f2a2005-08-21 14:16:04 +000025/* Our very own off_t-like type, 64-bit if possible */
26/* copied from Objects/fileobject.c */
27#if !defined(HAVE_LARGEFILE_SUPPORT)
28typedef off_t Py_off_t;
29#elif SIZEOF_OFF_T >= 8
30typedef off_t Py_off_t;
31#elif SIZEOF_FPOS_T >= 8
32typedef fpos_t Py_off_t;
33#else
34#error "Large file support, but neither off_t nor fpos_t is large enough."
35#endif
36
Christian Heimes72b710a2008-05-26 13:28:38 +000037#define BUF(v) PyBytes_AS_STRING(v)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +000038
39#define MODE_CLOSED 0
40#define MODE_READ 1
41#define MODE_READ_EOF 2
42#define MODE_WRITE 3
43
Christian Heimes90aa7642007-12-19 02:45:37 +000044#define BZ2FileObject_Check(v) (Py_TYPE(v) == &BZ2File_Type)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +000045
Gustavo Niemeyer7628f1f2003-04-27 06:25:24 +000046
47#ifdef BZ_CONFIG_ERROR
48
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +000049#if SIZEOF_LONG >= 8
50#define BZS_TOTAL_OUT(bzs) \
51 (((long)bzs->total_out_hi32 << 32) + bzs->total_out_lo32)
52#elif SIZEOF_LONG_LONG >= 8
53#define BZS_TOTAL_OUT(bzs) \
Martin v. Löwisb9a0f912003-03-29 10:06:18 +000054 (((PY_LONG_LONG)bzs->total_out_hi32 << 32) + bzs->total_out_lo32)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +000055#else
56#define BZS_TOTAL_OUT(bzs) \
Neal Norwitz20bad742006-01-17 05:27:39 +000057 bzs->total_out_lo32
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +000058#endif
59
Gustavo Niemeyer7628f1f2003-04-27 06:25:24 +000060#else /* ! BZ_CONFIG_ERROR */
61
62#define BZ2_bzRead bzRead
63#define BZ2_bzReadOpen bzReadOpen
64#define BZ2_bzReadClose bzReadClose
65#define BZ2_bzWrite bzWrite
66#define BZ2_bzWriteOpen bzWriteOpen
67#define BZ2_bzWriteClose bzWriteClose
68#define BZ2_bzCompress bzCompress
69#define BZ2_bzCompressInit bzCompressInit
70#define BZ2_bzCompressEnd bzCompressEnd
71#define BZ2_bzDecompress bzDecompress
72#define BZ2_bzDecompressInit bzDecompressInit
73#define BZ2_bzDecompressEnd bzDecompressEnd
74
75#define BZS_TOTAL_OUT(bzs) bzs->total_out
76
77#endif /* ! BZ_CONFIG_ERROR */
78
79
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +000080#ifdef WITH_THREAD
Antoine Pitroua3af0eb2009-10-27 17:49:21 +000081#define ACQUIRE_LOCK(obj) do { \
82 if (!PyThread_acquire_lock(obj->lock, 0)) { \
83 Py_BEGIN_ALLOW_THREADS \
84 PyThread_acquire_lock(obj->lock, 1); \
85 Py_END_ALLOW_THREADS \
86 } } while(0)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +000087#define RELEASE_LOCK(obj) PyThread_release_lock(obj->lock)
88#else
89#define ACQUIRE_LOCK(obj)
90#define RELEASE_LOCK(obj)
91#endif
92
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +000093/* Bits in f_newlinetypes */
94#define NEWLINE_UNKNOWN 0 /* No newline seen, yet */
95#define NEWLINE_CR 1 /* \r newline seen */
96#define NEWLINE_LF 2 /* \n newline seen */
97#define NEWLINE_CRLF 4 /* \r\n newline seen */
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +000098
99/* ===================================================================== */
100/* Structure definitions. */
101
102typedef struct {
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000103 PyObject_HEAD
Guido van Rossumf09ca142007-06-13 00:03:05 +0000104 FILE *rawfp;
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000105
106 char* f_buf; /* Allocated readahead buffer */
107 char* f_bufend; /* Points after last occupied position */
108 char* f_bufptr; /* Current buffer position */
109
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000110 BZFILE *fp;
111 int mode;
Georg Brandla8bcecc2005-09-03 07:49:53 +0000112 Py_off_t pos;
113 Py_off_t size;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000114#ifdef WITH_THREAD
115 PyThread_type_lock lock;
116#endif
117} BZ2FileObject;
118
119typedef struct {
120 PyObject_HEAD
121 bz_stream bzs;
122 int running;
123#ifdef WITH_THREAD
124 PyThread_type_lock lock;
125#endif
126} BZ2CompObject;
127
128typedef struct {
129 PyObject_HEAD
130 bz_stream bzs;
131 int running;
132 PyObject *unused_data;
133#ifdef WITH_THREAD
134 PyThread_type_lock lock;
135#endif
136} BZ2DecompObject;
137
138/* ===================================================================== */
139/* Utility functions. */
140
141static int
142Util_CatchBZ2Error(int bzerror)
143{
144 int ret = 0;
145 switch(bzerror) {
146 case BZ_OK:
147 case BZ_STREAM_END:
148 break;
149
Gustavo Niemeyer7628f1f2003-04-27 06:25:24 +0000150#ifdef BZ_CONFIG_ERROR
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000151 case BZ_CONFIG_ERROR:
152 PyErr_SetString(PyExc_SystemError,
153 "the bz2 library was not compiled "
154 "correctly");
155 ret = 1;
156 break;
Gustavo Niemeyer7628f1f2003-04-27 06:25:24 +0000157#endif
Tim Peterse3228092002-11-09 04:21:44 +0000158
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000159 case BZ_PARAM_ERROR:
160 PyErr_SetString(PyExc_ValueError,
161 "the bz2 library has received wrong "
162 "parameters");
163 ret = 1;
164 break;
Tim Peterse3228092002-11-09 04:21:44 +0000165
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000166 case BZ_MEM_ERROR:
167 PyErr_NoMemory();
168 ret = 1;
169 break;
170
171 case BZ_DATA_ERROR:
172 case BZ_DATA_ERROR_MAGIC:
173 PyErr_SetString(PyExc_IOError, "invalid data stream");
174 ret = 1;
175 break;
176
177 case BZ_IO_ERROR:
178 PyErr_SetString(PyExc_IOError, "unknown IO error");
179 ret = 1;
180 break;
181
182 case BZ_UNEXPECTED_EOF:
183 PyErr_SetString(PyExc_EOFError,
184 "compressed file ended before the "
185 "logical end-of-stream was detected");
186 ret = 1;
187 break;
188
189 case BZ_SEQUENCE_ERROR:
190 PyErr_SetString(PyExc_RuntimeError,
191 "wrong sequence of bz2 library "
192 "commands used");
193 ret = 1;
194 break;
195 }
196 return ret;
197}
198
199#if BUFSIZ < 8192
200#define SMALLCHUNK 8192
201#else
202#define SMALLCHUNK BUFSIZ
203#endif
204
205#if SIZEOF_INT < 4
206#define BIGCHUNK (512 * 32)
207#else
208#define BIGCHUNK (512 * 1024)
209#endif
210
211/* This is a hacked version of Python's fileobject.c:new_buffersize(). */
212static size_t
213Util_NewBufferSize(size_t currentsize)
214{
215 if (currentsize > SMALLCHUNK) {
216 /* Keep doubling until we reach BIGCHUNK;
217 then keep adding BIGCHUNK. */
218 if (currentsize <= BIGCHUNK)
219 return currentsize + currentsize;
220 else
221 return currentsize + BIGCHUNK;
222 }
223 return currentsize + SMALLCHUNK;
224}
225
226/* This is a hacked version of Python's fileobject.c:get_line(). */
227static PyObject *
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000228Util_GetLine(BZ2FileObject *f, int n)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000229{
230 char c;
231 char *buf, *end;
232 size_t total_v_size; /* total # of slots in buffer */
233 size_t used_v_size; /* # used slots in buffer */
234 size_t increment; /* amount to increment the buffer */
235 PyObject *v;
236 int bzerror;
Thomas Wouters1b7f8912007-09-19 03:06:30 +0000237 int bytes_read;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000238
239 total_v_size = n > 0 ? n : 100;
Christian Heimes72b710a2008-05-26 13:28:38 +0000240 v = PyBytes_FromStringAndSize((char *)NULL, total_v_size);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000241 if (v == NULL)
242 return NULL;
243
244 buf = BUF(v);
245 end = buf + total_v_size;
246
247 for (;;) {
248 Py_BEGIN_ALLOW_THREADS
Guido van Rossumf09ca142007-06-13 00:03:05 +0000249 do {
Thomas Wouters1b7f8912007-09-19 03:06:30 +0000250 bytes_read = BZ2_bzRead(&bzerror, f->fp, &c, 1);
Guido van Rossumf09ca142007-06-13 00:03:05 +0000251 f->pos++;
Thomas Wouters1b7f8912007-09-19 03:06:30 +0000252 if (bytes_read == 0)
253 break;
Guido van Rossumf09ca142007-06-13 00:03:05 +0000254 *buf++ = c;
255 } while (bzerror == BZ_OK && c != '\n' && buf != end);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000256 Py_END_ALLOW_THREADS
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000257 if (bzerror == BZ_STREAM_END) {
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000258 f->size = f->pos;
259 f->mode = MODE_READ_EOF;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000260 break;
261 } else if (bzerror != BZ_OK) {
262 Util_CatchBZ2Error(bzerror);
263 Py_DECREF(v);
264 return NULL;
265 }
266 if (c == '\n')
267 break;
268 /* Must be because buf == end */
269 if (n > 0)
270 break;
271 used_v_size = total_v_size;
272 increment = total_v_size >> 2; /* mild exponential growth */
273 total_v_size += increment;
274 if (total_v_size > INT_MAX) {
275 PyErr_SetString(PyExc_OverflowError,
276 "line is longer than a Python string can hold");
277 Py_DECREF(v);
278 return NULL;
279 }
Christian Heimes72b710a2008-05-26 13:28:38 +0000280 if (_PyBytes_Resize(&v, total_v_size) < 0) {
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000281 return NULL;
Guido van Rossum522a6c62007-05-22 23:13:45 +0000282 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000283 buf = BUF(v) + used_v_size;
284 end = BUF(v) + total_v_size;
285 }
286
287 used_v_size = buf - BUF(v);
Guido van Rossum522a6c62007-05-22 23:13:45 +0000288 if (used_v_size != total_v_size) {
Christian Heimes72b710a2008-05-26 13:28:38 +0000289 if (_PyBytes_Resize(&v, used_v_size) < 0) {
Guido van Rossum522a6c62007-05-22 23:13:45 +0000290 v = NULL;
291 }
292 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000293 return v;
294}
295
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000296/* This is a hacked version of Python's fileobject.c:drop_readahead(). */
297static void
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000298Util_DropReadAhead(BZ2FileObject *f)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000299{
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000300 if (f->f_buf != NULL) {
301 PyMem_Free(f->f_buf);
302 f->f_buf = NULL;
303 }
304}
305
306/* This is a hacked version of Python's fileobject.c:readahead(). */
307static int
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000308Util_ReadAhead(BZ2FileObject *f, int bufsize)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000309{
310 int chunksize;
311 int bzerror;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000312
313 if (f->f_buf != NULL) {
Tim Peterse3228092002-11-09 04:21:44 +0000314 if((f->f_bufend - f->f_bufptr) >= 1)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000315 return 0;
316 else
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000317 Util_DropReadAhead(f);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000318 }
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000319 if (f->mode == MODE_READ_EOF) {
Georg Brandl33a5f2a2005-08-21 14:16:04 +0000320 f->f_bufptr = f->f_buf;
321 f->f_bufend = f->f_buf;
322 return 0;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000323 }
324 if ((f->f_buf = PyMem_Malloc(bufsize)) == NULL) {
Georg Brandl6aa2d1f2008-08-12 08:35:52 +0000325 PyErr_NoMemory();
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000326 return -1;
327 }
328 Py_BEGIN_ALLOW_THREADS
Guido van Rossumf09ca142007-06-13 00:03:05 +0000329 chunksize = BZ2_bzRead(&bzerror, f->fp, f->f_buf, bufsize);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000330 Py_END_ALLOW_THREADS
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000331 f->pos += chunksize;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000332 if (bzerror == BZ_STREAM_END) {
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000333 f->size = f->pos;
334 f->mode = MODE_READ_EOF;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000335 } else if (bzerror != BZ_OK) {
336 Util_CatchBZ2Error(bzerror);
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000337 Util_DropReadAhead(f);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000338 return -1;
339 }
340 f->f_bufptr = f->f_buf;
341 f->f_bufend = f->f_buf + chunksize;
342 return 0;
343}
344
345/* This is a hacked version of Python's
346 * fileobject.c:readahead_get_line_skip(). */
Christian Heimes72b710a2008-05-26 13:28:38 +0000347static PyBytesObject *
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000348Util_ReadAheadGetLineSkip(BZ2FileObject *f, int skip, int bufsize)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000349{
Christian Heimes72b710a2008-05-26 13:28:38 +0000350 PyBytesObject* s;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000351 char *bufptr;
352 char *buf;
353 int len;
354
355 if (f->f_buf == NULL)
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000356 if (Util_ReadAhead(f, bufsize) < 0)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000357 return NULL;
358
359 len = f->f_bufend - f->f_bufptr;
Tim Peterse3228092002-11-09 04:21:44 +0000360 if (len == 0)
Christian Heimes72b710a2008-05-26 13:28:38 +0000361 return (PyBytesObject *)
362 PyBytes_FromStringAndSize(NULL, skip);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000363 bufptr = memchr(f->f_bufptr, '\n', len);
364 if (bufptr != NULL) {
365 bufptr++; /* Count the '\n' */
366 len = bufptr - f->f_bufptr;
Christian Heimes72b710a2008-05-26 13:28:38 +0000367 s = (PyBytesObject *)
368 PyBytes_FromStringAndSize(NULL, skip+len);
Tim Peterse3228092002-11-09 04:21:44 +0000369 if (s == NULL)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000370 return NULL;
Christian Heimes72b710a2008-05-26 13:28:38 +0000371 memcpy(PyBytes_AS_STRING(s)+skip, f->f_bufptr, len);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000372 f->f_bufptr = bufptr;
373 if (bufptr == f->f_bufend)
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000374 Util_DropReadAhead(f);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000375 } else {
376 bufptr = f->f_bufptr;
377 buf = f->f_buf;
378 f->f_buf = NULL; /* Force new readahead buffer */
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000379 s = Util_ReadAheadGetLineSkip(f, skip+len,
380 bufsize + (bufsize>>2));
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000381 if (s == NULL) {
382 PyMem_Free(buf);
383 return NULL;
384 }
Christian Heimes72b710a2008-05-26 13:28:38 +0000385 memcpy(PyBytes_AS_STRING(s)+skip, bufptr, len);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000386 PyMem_Free(buf);
387 }
388 return s;
389}
390
391/* ===================================================================== */
392/* Methods of BZ2File. */
393
394PyDoc_STRVAR(BZ2File_read__doc__,
395"read([size]) -> string\n\
396\n\
397Read at most size uncompressed bytes, returned as a string. If the size\n\
398argument is negative or omitted, read until EOF is reached.\n\
399");
400
401/* This is a hacked version of Python's fileobject.c:file_read(). */
402static PyObject *
403BZ2File_read(BZ2FileObject *self, PyObject *args)
404{
405 long bytesrequested = -1;
406 size_t bytesread, buffersize, chunksize;
407 int bzerror;
408 PyObject *ret = NULL;
Tim Peterse3228092002-11-09 04:21:44 +0000409
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000410 if (!PyArg_ParseTuple(args, "|l:read", &bytesrequested))
411 return NULL;
Tim Peterse3228092002-11-09 04:21:44 +0000412
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000413 ACQUIRE_LOCK(self);
414 switch (self->mode) {
415 case MODE_READ:
416 break;
417 case MODE_READ_EOF:
Christian Heimes72b710a2008-05-26 13:28:38 +0000418 ret = PyBytes_FromStringAndSize("", 0);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000419 goto cleanup;
420 case MODE_CLOSED:
421 PyErr_SetString(PyExc_ValueError,
422 "I/O operation on closed file");
423 goto cleanup;
424 default:
425 PyErr_SetString(PyExc_IOError,
426 "file is not ready for reading");
427 goto cleanup;
428 }
429
430 if (bytesrequested < 0)
431 buffersize = Util_NewBufferSize((size_t)0);
432 else
433 buffersize = bytesrequested;
434 if (buffersize > INT_MAX) {
435 PyErr_SetString(PyExc_OverflowError,
Gustavo Niemeyer49ea7be2002-11-08 14:31:49 +0000436 "requested number of bytes is "
437 "more than a Python string can hold");
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000438 goto cleanup;
439 }
Christian Heimes72b710a2008-05-26 13:28:38 +0000440 ret = PyBytes_FromStringAndSize((char *)NULL, buffersize);
Guido van Rossum75c26bc2007-08-07 23:29:20 +0000441 if (ret == NULL || buffersize == 0)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000442 goto cleanup;
443 bytesread = 0;
444
445 for (;;) {
446 Py_BEGIN_ALLOW_THREADS
Guido van Rossumf09ca142007-06-13 00:03:05 +0000447 chunksize = BZ2_bzRead(&bzerror, self->fp,
448 BUF(ret)+bytesread,
449 buffersize-bytesread);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000450 self->pos += chunksize;
451 Py_END_ALLOW_THREADS
452 bytesread += chunksize;
453 if (bzerror == BZ_STREAM_END) {
454 self->size = self->pos;
455 self->mode = MODE_READ_EOF;
456 break;
457 } else if (bzerror != BZ_OK) {
458 Util_CatchBZ2Error(bzerror);
459 Py_DECREF(ret);
460 ret = NULL;
461 goto cleanup;
462 }
463 if (bytesrequested < 0) {
464 buffersize = Util_NewBufferSize(buffersize);
Christian Heimes72b710a2008-05-26 13:28:38 +0000465 if (_PyBytes_Resize(&ret, buffersize) < 0) {
Guido van Rossum522a6c62007-05-22 23:13:45 +0000466 ret = NULL;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000467 goto cleanup;
Guido van Rossum522a6c62007-05-22 23:13:45 +0000468 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000469 } else {
470 break;
471 }
472 }
Guido van Rossum522a6c62007-05-22 23:13:45 +0000473 if (bytesread != buffersize) {
Christian Heimes72b710a2008-05-26 13:28:38 +0000474 if (_PyBytes_Resize(&ret, bytesread) < 0) {
Guido van Rossum522a6c62007-05-22 23:13:45 +0000475 ret = NULL;
476 }
477 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000478
479cleanup:
480 RELEASE_LOCK(self);
481 return ret;
482}
483
484PyDoc_STRVAR(BZ2File_readline__doc__,
485"readline([size]) -> string\n\
486\n\
487Return the next line from the file, as a string, retaining newline.\n\
488A non-negative size argument will limit the maximum number of bytes to\n\
489return (an incomplete line may be returned then). Return an empty\n\
490string at EOF.\n\
491");
492
493static PyObject *
494BZ2File_readline(BZ2FileObject *self, PyObject *args)
495{
496 PyObject *ret = NULL;
497 int sizehint = -1;
498
499 if (!PyArg_ParseTuple(args, "|i:readline", &sizehint))
500 return NULL;
501
502 ACQUIRE_LOCK(self);
503 switch (self->mode) {
504 case MODE_READ:
505 break;
506 case MODE_READ_EOF:
Christian Heimes72b710a2008-05-26 13:28:38 +0000507 ret = PyBytes_FromStringAndSize("", 0);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000508 goto cleanup;
509 case MODE_CLOSED:
510 PyErr_SetString(PyExc_ValueError,
511 "I/O operation on closed file");
512 goto cleanup;
513 default:
514 PyErr_SetString(PyExc_IOError,
515 "file is not ready for reading");
516 goto cleanup;
517 }
518
519 if (sizehint == 0)
Christian Heimes72b710a2008-05-26 13:28:38 +0000520 ret = PyBytes_FromStringAndSize("", 0);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000521 else
522 ret = Util_GetLine(self, (sizehint < 0) ? 0 : sizehint);
523
524cleanup:
525 RELEASE_LOCK(self);
526 return ret;
527}
528
529PyDoc_STRVAR(BZ2File_readlines__doc__,
530"readlines([size]) -> list\n\
531\n\
532Call readline() repeatedly and return a list of lines read.\n\
533The optional size argument, if given, is an approximate bound on the\n\
534total number of bytes in the lines returned.\n\
535");
536
537/* This is a hacked version of Python's fileobject.c:file_readlines(). */
538static PyObject *
539BZ2File_readlines(BZ2FileObject *self, PyObject *args)
540{
541 long sizehint = 0;
542 PyObject *list = NULL;
543 PyObject *line;
544 char small_buffer[SMALLCHUNK];
545 char *buffer = small_buffer;
546 size_t buffersize = SMALLCHUNK;
547 PyObject *big_buffer = NULL;
548 size_t nfilled = 0;
549 size_t nread;
550 size_t totalread = 0;
551 char *p, *q, *end;
552 int err;
553 int shortread = 0;
554 int bzerror;
555
556 if (!PyArg_ParseTuple(args, "|l:readlines", &sizehint))
557 return NULL;
558
559 ACQUIRE_LOCK(self);
560 switch (self->mode) {
561 case MODE_READ:
562 break;
563 case MODE_READ_EOF:
564 list = PyList_New(0);
565 goto cleanup;
566 case MODE_CLOSED:
567 PyErr_SetString(PyExc_ValueError,
568 "I/O operation on closed file");
569 goto cleanup;
570 default:
571 PyErr_SetString(PyExc_IOError,
572 "file is not ready for reading");
573 goto cleanup;
574 }
575
576 if ((list = PyList_New(0)) == NULL)
577 goto cleanup;
578
579 for (;;) {
580 Py_BEGIN_ALLOW_THREADS
Guido van Rossumf09ca142007-06-13 00:03:05 +0000581 nread = BZ2_bzRead(&bzerror, self->fp,
582 buffer+nfilled, buffersize-nfilled);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000583 self->pos += nread;
584 Py_END_ALLOW_THREADS
585 if (bzerror == BZ_STREAM_END) {
586 self->size = self->pos;
587 self->mode = MODE_READ_EOF;
588 if (nread == 0) {
589 sizehint = 0;
590 break;
591 }
592 shortread = 1;
593 } else if (bzerror != BZ_OK) {
594 Util_CatchBZ2Error(bzerror);
595 error:
596 Py_DECREF(list);
597 list = NULL;
598 goto cleanup;
599 }
600 totalread += nread;
601 p = memchr(buffer+nfilled, '\n', nread);
Georg Brandl33a5f2a2005-08-21 14:16:04 +0000602 if (!shortread && p == NULL) {
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000603 /* Need a larger buffer to fit this line */
604 nfilled += nread;
605 buffersize *= 2;
606 if (buffersize > INT_MAX) {
607 PyErr_SetString(PyExc_OverflowError,
Georg Brandl33a5f2a2005-08-21 14:16:04 +0000608 "line is longer than a Python string can hold");
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000609 goto error;
610 }
611 if (big_buffer == NULL) {
612 /* Create the big buffer */
Christian Heimes72b710a2008-05-26 13:28:38 +0000613 big_buffer = PyBytes_FromStringAndSize(
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000614 NULL, buffersize);
615 if (big_buffer == NULL)
616 goto error;
Christian Heimes72b710a2008-05-26 13:28:38 +0000617 buffer = PyBytes_AS_STRING(big_buffer);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000618 memcpy(buffer, small_buffer, nfilled);
619 }
620 else {
621 /* Grow the big buffer */
Christian Heimes72b710a2008-05-26 13:28:38 +0000622 if (_PyBytes_Resize(&big_buffer, buffersize) < 0){
Guido van Rossum522a6c62007-05-22 23:13:45 +0000623 big_buffer = NULL;
624 goto error;
625 }
Christian Heimes72b710a2008-05-26 13:28:38 +0000626 buffer = PyBytes_AS_STRING(big_buffer);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000627 }
Guido van Rossum522a6c62007-05-22 23:13:45 +0000628 continue;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000629 }
630 end = buffer+nfilled+nread;
631 q = buffer;
Georg Brandl33a5f2a2005-08-21 14:16:04 +0000632 while (p != NULL) {
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000633 /* Process complete lines */
634 p++;
Christian Heimes72b710a2008-05-26 13:28:38 +0000635 line = PyBytes_FromStringAndSize(q, p-q);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000636 if (line == NULL)
637 goto error;
638 err = PyList_Append(list, line);
639 Py_DECREF(line);
640 if (err != 0)
641 goto error;
642 q = p;
643 p = memchr(q, '\n', end-q);
Georg Brandl33a5f2a2005-08-21 14:16:04 +0000644 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000645 /* Move the remaining incomplete line to the start */
646 nfilled = end-q;
647 memmove(buffer, q, nfilled);
648 if (sizehint > 0)
649 if (totalread >= (size_t)sizehint)
650 break;
651 if (shortread) {
652 sizehint = 0;
653 break;
654 }
655 }
656 if (nfilled != 0) {
657 /* Partial last line */
Christian Heimes72b710a2008-05-26 13:28:38 +0000658 line = PyBytes_FromStringAndSize(buffer, nfilled);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000659 if (line == NULL)
660 goto error;
661 if (sizehint > 0) {
662 /* Need to complete the last line */
663 PyObject *rest = Util_GetLine(self, 0);
664 if (rest == NULL) {
665 Py_DECREF(line);
666 goto error;
667 }
Christian Heimes72b710a2008-05-26 13:28:38 +0000668 PyBytes_Concat(&line, rest);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000669 Py_DECREF(rest);
670 if (line == NULL)
671 goto error;
672 }
673 err = PyList_Append(list, line);
674 Py_DECREF(line);
675 if (err != 0)
676 goto error;
677 }
678
679 cleanup:
680 RELEASE_LOCK(self);
681 if (big_buffer) {
682 Py_DECREF(big_buffer);
683 }
684 return list;
685}
686
687PyDoc_STRVAR(BZ2File_write__doc__,
688"write(data) -> None\n\
689\n\
690Write the 'data' string to file. Note that due to buffering, close() may\n\
691be needed before the file on disk reflects the data written.\n\
692");
693
694/* This is a hacked version of Python's fileobject.c:file_write(). */
695static PyObject *
696BZ2File_write(BZ2FileObject *self, PyObject *args)
697{
698 PyObject *ret = NULL;
Martin v. Löwis423be952008-08-13 15:53:07 +0000699 Py_buffer pbuf;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000700 char *buf;
701 int len;
702 int bzerror;
703
Martin v. Löwis423be952008-08-13 15:53:07 +0000704 if (!PyArg_ParseTuple(args, "y*:write", &pbuf))
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000705 return NULL;
Martin v. Löwis423be952008-08-13 15:53:07 +0000706 buf = pbuf.buf;
707 len = pbuf.len;
Tim Peterse3228092002-11-09 04:21:44 +0000708
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000709 ACQUIRE_LOCK(self);
710 switch (self->mode) {
711 case MODE_WRITE:
712 break;
Tim Peterse3228092002-11-09 04:21:44 +0000713
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000714 case MODE_CLOSED:
715 PyErr_SetString(PyExc_ValueError,
716 "I/O operation on closed file");
Thomas Wouters00ee7ba2006-08-21 19:07:27 +0000717 goto cleanup;
Tim Peterse3228092002-11-09 04:21:44 +0000718
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000719 default:
720 PyErr_SetString(PyExc_IOError,
721 "file is not ready for writing");
Thomas Wouters00ee7ba2006-08-21 19:07:27 +0000722 goto cleanup;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000723 }
724
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000725 Py_BEGIN_ALLOW_THREADS
726 BZ2_bzWrite (&bzerror, self->fp, buf, len);
727 self->pos += len;
728 Py_END_ALLOW_THREADS
Tim Peterse3228092002-11-09 04:21:44 +0000729
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000730 if (bzerror != BZ_OK) {
731 Util_CatchBZ2Error(bzerror);
732 goto cleanup;
733 }
Tim Peterse3228092002-11-09 04:21:44 +0000734
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000735 Py_INCREF(Py_None);
736 ret = Py_None;
737
738cleanup:
Martin v. Löwis423be952008-08-13 15:53:07 +0000739 PyBuffer_Release(&pbuf);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000740 RELEASE_LOCK(self);
741 return ret;
742}
743
744PyDoc_STRVAR(BZ2File_writelines__doc__,
745"writelines(sequence_of_strings) -> None\n\
746\n\
747Write the sequence of strings to the file. Note that newlines are not\n\
748added. The sequence can be any iterable object producing strings. This is\n\
749equivalent to calling write() for each string.\n\
750");
751
752/* This is a hacked version of Python's fileobject.c:file_writelines(). */
753static PyObject *
754BZ2File_writelines(BZ2FileObject *self, PyObject *seq)
755{
756#define CHUNKSIZE 1000
757 PyObject *list = NULL;
758 PyObject *iter = NULL;
759 PyObject *ret = NULL;
760 PyObject *line;
761 int i, j, index, len, islist;
762 int bzerror;
763
764 ACQUIRE_LOCK(self);
Thomas Wouters00ee7ba2006-08-21 19:07:27 +0000765 switch (self->mode) {
766 case MODE_WRITE:
767 break;
768
769 case MODE_CLOSED:
770 PyErr_SetString(PyExc_ValueError,
771 "I/O operation on closed file");
772 goto error;
773
774 default:
775 PyErr_SetString(PyExc_IOError,
776 "file is not ready for writing");
777 goto error;
778 }
779
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000780 islist = PyList_Check(seq);
781 if (!islist) {
782 iter = PyObject_GetIter(seq);
783 if (iter == NULL) {
784 PyErr_SetString(PyExc_TypeError,
785 "writelines() requires an iterable argument");
786 goto error;
787 }
788 list = PyList_New(CHUNKSIZE);
789 if (list == NULL)
790 goto error;
791 }
792
793 /* Strategy: slurp CHUNKSIZE lines into a private list,
794 checking that they are all strings, then write that list
795 without holding the interpreter lock, then come back for more. */
796 for (index = 0; ; index += CHUNKSIZE) {
797 if (islist) {
798 Py_XDECREF(list);
799 list = PyList_GetSlice(seq, index, index+CHUNKSIZE);
800 if (list == NULL)
801 goto error;
802 j = PyList_GET_SIZE(list);
803 }
804 else {
805 for (j = 0; j < CHUNKSIZE; j++) {
806 line = PyIter_Next(iter);
807 if (line == NULL) {
808 if (PyErr_Occurred())
809 goto error;
810 break;
811 }
812 PyList_SetItem(list, j, line);
813 }
814 }
815 if (j == 0)
816 break;
817
Guido van Rossum522a6c62007-05-22 23:13:45 +0000818 /* Check that all entries are indeed byte strings. If not,
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000819 apply the same rules as for file.write() and
820 convert the rets to strings. This is slow, but
821 seems to be the only way since all conversion APIs
822 could potentially execute Python code. */
823 for (i = 0; i < j; i++) {
824 PyObject *v = PyList_GET_ITEM(list, i);
Christian Heimes72b710a2008-05-26 13:28:38 +0000825 if (!PyBytes_Check(v)) {
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000826 const char *buffer;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000827 Py_ssize_t len;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000828 if (PyObject_AsCharBuffer(v, &buffer, &len)) {
829 PyErr_SetString(PyExc_TypeError,
830 "writelines() "
831 "argument must be "
832 "a sequence of "
Guido van Rossum522a6c62007-05-22 23:13:45 +0000833 "bytes objects");
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000834 goto error;
835 }
Christian Heimes72b710a2008-05-26 13:28:38 +0000836 line = PyBytes_FromStringAndSize(buffer,
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000837 len);
838 if (line == NULL)
839 goto error;
840 Py_DECREF(v);
841 PyList_SET_ITEM(list, i, line);
842 }
843 }
844
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000845 /* Since we are releasing the global lock, the
846 following code may *not* execute Python code. */
847 Py_BEGIN_ALLOW_THREADS
848 for (i = 0; i < j; i++) {
849 line = PyList_GET_ITEM(list, i);
Christian Heimes72b710a2008-05-26 13:28:38 +0000850 len = PyBytes_GET_SIZE(line);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000851 BZ2_bzWrite (&bzerror, self->fp,
Christian Heimes72b710a2008-05-26 13:28:38 +0000852 PyBytes_AS_STRING(line), len);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000853 if (bzerror != BZ_OK) {
854 Py_BLOCK_THREADS
855 Util_CatchBZ2Error(bzerror);
856 goto error;
857 }
858 }
859 Py_END_ALLOW_THREADS
860
861 if (j < CHUNKSIZE)
862 break;
863 }
864
865 Py_INCREF(Py_None);
866 ret = Py_None;
867
868 error:
869 RELEASE_LOCK(self);
870 Py_XDECREF(list);
871 Py_XDECREF(iter);
872 return ret;
873#undef CHUNKSIZE
874}
875
876PyDoc_STRVAR(BZ2File_seek__doc__,
877"seek(offset [, whence]) -> None\n\
878\n\
879Move to new file position. Argument offset is a byte count. Optional\n\
880argument whence defaults to 0 (offset from start of file, offset\n\
881should be >= 0); other values are 1 (move relative to current position,\n\
882positive or negative), and 2 (move relative to end of file, usually\n\
883negative, although many platforms allow seeking beyond the end of a file).\n\
884\n\
885Note that seeking of bz2 files is emulated, and depending on the parameters\n\
886the operation may be extremely slow.\n\
887");
888
889static PyObject *
890BZ2File_seek(BZ2FileObject *self, PyObject *args)
891{
892 int where = 0;
Georg Brandl33a5f2a2005-08-21 14:16:04 +0000893 PyObject *offobj;
894 Py_off_t offset;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000895 char small_buffer[SMALLCHUNK];
896 char *buffer = small_buffer;
897 size_t buffersize = SMALLCHUNK;
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000898 Py_off_t bytesread = 0;
Georg Brandla8bcecc2005-09-03 07:49:53 +0000899 size_t readsize;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000900 int chunksize;
901 int bzerror;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000902 PyObject *ret = NULL;
Tim Peterse3228092002-11-09 04:21:44 +0000903
Georg Brandl33a5f2a2005-08-21 14:16:04 +0000904 if (!PyArg_ParseTuple(args, "O|i:seek", &offobj, &where))
905 return NULL;
906#if !defined(HAVE_LARGEFILE_SUPPORT)
Christian Heimes217cfd12007-12-02 14:31:20 +0000907 offset = PyLong_AsLong(offobj);
Georg Brandl33a5f2a2005-08-21 14:16:04 +0000908#else
909 offset = PyLong_Check(offobj) ?
Christian Heimes217cfd12007-12-02 14:31:20 +0000910 PyLong_AsLongLong(offobj) : PyLong_AsLong(offobj);
Georg Brandl33a5f2a2005-08-21 14:16:04 +0000911#endif
912 if (PyErr_Occurred())
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000913 return NULL;
914
915 ACQUIRE_LOCK(self);
916 Util_DropReadAhead(self);
917 switch (self->mode) {
918 case MODE_READ:
919 case MODE_READ_EOF:
920 break;
Tim Peterse3228092002-11-09 04:21:44 +0000921
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000922 case MODE_CLOSED:
923 PyErr_SetString(PyExc_ValueError,
924 "I/O operation on closed file");
Thomas Wouters89f507f2006-12-13 04:49:30 +0000925 goto cleanup;
Tim Peterse3228092002-11-09 04:21:44 +0000926
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000927 default:
928 PyErr_SetString(PyExc_IOError,
929 "seek works only while reading");
Thomas Wouters89f507f2006-12-13 04:49:30 +0000930 goto cleanup;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000931 }
932
Georg Brandl47fab922006-02-18 21:57:25 +0000933 if (where == 2) {
934 if (self->size == -1) {
935 assert(self->mode != MODE_READ_EOF);
936 for (;;) {
937 Py_BEGIN_ALLOW_THREADS
Guido van Rossumf09ca142007-06-13 00:03:05 +0000938 chunksize = BZ2_bzRead(&bzerror, self->fp,
939 buffer, buffersize);
Georg Brandl47fab922006-02-18 21:57:25 +0000940 self->pos += chunksize;
941 Py_END_ALLOW_THREADS
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000942
Georg Brandl47fab922006-02-18 21:57:25 +0000943 bytesread += chunksize;
944 if (bzerror == BZ_STREAM_END) {
945 break;
946 } else if (bzerror != BZ_OK) {
947 Util_CatchBZ2Error(bzerror);
948 goto cleanup;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000949 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000950 }
Georg Brandl47fab922006-02-18 21:57:25 +0000951 self->mode = MODE_READ_EOF;
952 self->size = self->pos;
953 bytesread = 0;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000954 }
Georg Brandl47fab922006-02-18 21:57:25 +0000955 offset = self->size + offset;
956 } else if (where == 1) {
957 offset = self->pos + offset;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000958 }
959
Guido van Rossum522a6c62007-05-22 23:13:45 +0000960 /* Before getting here, offset must be the absolute position the file
Georg Brandl47fab922006-02-18 21:57:25 +0000961 * pointer should be set to. */
962
963 if (offset >= self->pos) {
964 /* we can move forward */
965 offset -= self->pos;
966 } else {
967 /* we cannot move back, so rewind the stream */
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000968 BZ2_bzReadClose(&bzerror, self->fp);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000969 if (bzerror != BZ_OK) {
970 Util_CatchBZ2Error(bzerror);
971 goto cleanup;
972 }
Guido van Rossumf09ca142007-06-13 00:03:05 +0000973 rewind(self->rawfp);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000974 self->pos = 0;
Guido van Rossumf09ca142007-06-13 00:03:05 +0000975 self->fp = BZ2_bzReadOpen(&bzerror, self->rawfp,
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000976 0, 0, NULL, 0);
977 if (bzerror != BZ_OK) {
978 Util_CatchBZ2Error(bzerror);
979 goto cleanup;
980 }
981 self->mode = MODE_READ;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000982 }
983
Georg Brandl47fab922006-02-18 21:57:25 +0000984 if (offset <= 0 || self->mode == MODE_READ_EOF)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000985 goto exit;
986
987 /* Before getting here, offset must be set to the number of bytes
988 * to walk forward. */
989 for (;;) {
Georg Brandla8bcecc2005-09-03 07:49:53 +0000990 if (offset-bytesread > buffersize)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000991 readsize = buffersize;
992 else
Georg Brandla8bcecc2005-09-03 07:49:53 +0000993 /* offset might be wider that readsize, but the result
994 * of the subtraction is bound by buffersize (see the
995 * condition above). buffersize is 8192. */
996 readsize = (size_t)(offset-bytesread);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000997 Py_BEGIN_ALLOW_THREADS
Guido van Rossumf09ca142007-06-13 00:03:05 +0000998 chunksize = BZ2_bzRead(&bzerror, self->fp, buffer, readsize);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000999 self->pos += chunksize;
1000 Py_END_ALLOW_THREADS
1001 bytesread += chunksize;
1002 if (bzerror == BZ_STREAM_END) {
1003 self->size = self->pos;
1004 self->mode = MODE_READ_EOF;
1005 break;
1006 } else if (bzerror != BZ_OK) {
1007 Util_CatchBZ2Error(bzerror);
1008 goto cleanup;
1009 }
1010 if (bytesread == offset)
1011 break;
1012 }
1013
1014exit:
1015 Py_INCREF(Py_None);
1016 ret = Py_None;
1017
1018cleanup:
1019 RELEASE_LOCK(self);
1020 return ret;
1021}
1022
1023PyDoc_STRVAR(BZ2File_tell__doc__,
1024"tell() -> int\n\
1025\n\
1026Return the current file position, an integer (may be a long integer).\n\
1027");
1028
1029static PyObject *
1030BZ2File_tell(BZ2FileObject *self, PyObject *args)
1031{
1032 PyObject *ret = NULL;
1033
1034 if (self->mode == MODE_CLOSED) {
1035 PyErr_SetString(PyExc_ValueError,
1036 "I/O operation on closed file");
1037 goto cleanup;
1038 }
1039
Georg Brandla8bcecc2005-09-03 07:49:53 +00001040#if !defined(HAVE_LARGEFILE_SUPPORT)
Christian Heimes217cfd12007-12-02 14:31:20 +00001041 ret = PyLong_FromLong(self->pos);
Georg Brandla8bcecc2005-09-03 07:49:53 +00001042#else
1043 ret = PyLong_FromLongLong(self->pos);
1044#endif
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001045
1046cleanup:
1047 return ret;
1048}
1049
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001050PyDoc_STRVAR(BZ2File_close__doc__,
1051"close() -> None or (perhaps) an integer\n\
1052\n\
1053Close the file. Sets data attribute .closed to true. A closed file\n\
1054cannot be used for further I/O operations. close() may be called more\n\
1055than once without error.\n\
1056");
1057
1058static PyObject *
1059BZ2File_close(BZ2FileObject *self)
1060{
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001061 PyObject *ret = NULL;
1062 int bzerror = BZ_OK;
1063
Guido van Rossumf09ca142007-06-13 00:03:05 +00001064 if (self->mode == MODE_CLOSED) {
1065 Py_RETURN_NONE;
1066 }
1067
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001068 ACQUIRE_LOCK(self);
1069 switch (self->mode) {
1070 case MODE_READ:
1071 case MODE_READ_EOF:
1072 BZ2_bzReadClose(&bzerror, self->fp);
1073 break;
1074 case MODE_WRITE:
1075 BZ2_bzWriteClose(&bzerror, self->fp,
1076 0, NULL, NULL);
1077 break;
1078 }
1079 self->mode = MODE_CLOSED;
Guido van Rossumf09ca142007-06-13 00:03:05 +00001080 fclose(self->rawfp);
1081 self->rawfp = NULL;
1082 if (bzerror == BZ_OK) {
1083 Py_INCREF(Py_None);
1084 ret = Py_None;
1085 }
1086 else {
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001087 Util_CatchBZ2Error(bzerror);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001088 }
1089
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001090 RELEASE_LOCK(self);
1091 return ret;
1092}
1093
Antoine Pitrou308705e2009-01-10 16:22:51 +00001094PyDoc_STRVAR(BZ2File_enter_doc,
1095"__enter__() -> self.");
1096
1097static PyObject *
1098BZ2File_enter(BZ2FileObject *self)
1099{
1100 if (self->mode == MODE_CLOSED) {
1101 PyErr_SetString(PyExc_ValueError,
1102 "I/O operation on closed file");
1103 return NULL;
1104 }
1105 Py_INCREF(self);
1106 return (PyObject *) self;
1107}
1108
1109PyDoc_STRVAR(BZ2File_exit_doc,
1110"__exit__(*excinfo) -> None. Closes the file.");
1111
1112static PyObject *
1113BZ2File_exit(BZ2FileObject *self, PyObject *args)
1114{
1115 PyObject *ret = PyObject_CallMethod((PyObject *) self, "close", NULL);
1116 if (!ret)
1117 /* If error occurred, pass through */
1118 return NULL;
1119 Py_DECREF(ret);
1120 Py_RETURN_NONE;
1121}
1122
1123
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001124static PyObject *BZ2File_getiter(BZ2FileObject *self);
1125
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001126static PyMethodDef BZ2File_methods[] = {
1127 {"read", (PyCFunction)BZ2File_read, METH_VARARGS, BZ2File_read__doc__},
1128 {"readline", (PyCFunction)BZ2File_readline, METH_VARARGS, BZ2File_readline__doc__},
1129 {"readlines", (PyCFunction)BZ2File_readlines, METH_VARARGS, BZ2File_readlines__doc__},
1130 {"write", (PyCFunction)BZ2File_write, METH_VARARGS, BZ2File_write__doc__},
1131 {"writelines", (PyCFunction)BZ2File_writelines, METH_O, BZ2File_writelines__doc__},
1132 {"seek", (PyCFunction)BZ2File_seek, METH_VARARGS, BZ2File_seek__doc__},
1133 {"tell", (PyCFunction)BZ2File_tell, METH_NOARGS, BZ2File_tell__doc__},
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001134 {"close", (PyCFunction)BZ2File_close, METH_NOARGS, BZ2File_close__doc__},
Antoine Pitrou308705e2009-01-10 16:22:51 +00001135 {"__enter__", (PyCFunction)BZ2File_enter, METH_NOARGS, BZ2File_enter_doc},
1136 {"__exit__", (PyCFunction)BZ2File_exit, METH_VARARGS, BZ2File_exit_doc},
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001137 {NULL, NULL} /* sentinel */
1138};
1139
1140
1141/* ===================================================================== */
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001142/* Getters and setters of BZ2File. */
1143
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001144static PyObject *
1145BZ2File_get_closed(BZ2FileObject *self, void *closure)
1146{
Christian Heimes217cfd12007-12-02 14:31:20 +00001147 return PyLong_FromLong(self->mode == MODE_CLOSED);
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001148}
1149
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001150static PyGetSetDef BZ2File_getset[] = {
1151 {"closed", (getter)BZ2File_get_closed, NULL,
1152 "True if the file is closed"},
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001153 {NULL} /* Sentinel */
1154};
1155
1156
1157/* ===================================================================== */
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001158/* Slot definitions for BZ2File_Type. */
1159
1160static int
1161BZ2File_init(BZ2FileObject *self, PyObject *args, PyObject *kwargs)
1162{
Martin v. Löwis15e62742006-02-27 16:46:16 +00001163 static char *kwlist[] = {"filename", "mode", "buffering",
Guido van Rossumf09ca142007-06-13 00:03:05 +00001164 "compresslevel", 0};
1165 char *name;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001166 char *mode = "r";
1167 int buffering = -1;
1168 int compresslevel = 9;
1169 int bzerror;
1170 int mode_char = 0;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001171
1172 self->size = -1;
Tim Peterse3228092002-11-09 04:21:44 +00001173
Guido van Rossumf09ca142007-06-13 00:03:05 +00001174 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s|sii:BZ2File",
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001175 kwlist, &name, &mode, &buffering,
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001176 &compresslevel))
1177 return -1;
1178
1179 if (compresslevel < 1 || compresslevel > 9) {
1180 PyErr_SetString(PyExc_ValueError,
1181 "compresslevel must be between 1 and 9");
1182 return -1;
1183 }
1184
1185 for (;;) {
1186 int error = 0;
1187 switch (*mode) {
1188 case 'r':
1189 case 'w':
1190 if (mode_char)
1191 error = 1;
1192 mode_char = *mode;
1193 break;
1194
1195 case 'b':
1196 break;
1197
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001198 default:
1199 error = 1;
Gustavo Niemeyer49ea7be2002-11-08 14:31:49 +00001200 break;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001201 }
1202 if (error) {
Gustavo Niemeyer49ea7be2002-11-08 14:31:49 +00001203 PyErr_Format(PyExc_ValueError,
1204 "invalid mode char %c", *mode);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001205 return -1;
1206 }
1207 mode++;
Gustavo Niemeyer49ea7be2002-11-08 14:31:49 +00001208 if (*mode == '\0')
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001209 break;
1210 }
1211
Georg Brandl6b95f1d2005-06-03 19:47:00 +00001212 if (mode_char == 0) {
1213 mode_char = 'r';
1214 }
1215
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001216 mode = (mode_char == 'r') ? "rb" : "wb";
Tim Peterse3228092002-11-09 04:21:44 +00001217
Guido van Rossumf09ca142007-06-13 00:03:05 +00001218 self->rawfp = fopen(name, mode);
1219 if (self->rawfp == NULL) {
1220 PyErr_SetFromErrno(PyExc_IOError);
Gustavo Niemeyer49ea7be2002-11-08 14:31:49 +00001221 return -1;
Guido van Rossumf09ca142007-06-13 00:03:05 +00001222 }
1223 /* XXX Ignore buffering */
Gustavo Niemeyer49ea7be2002-11-08 14:31:49 +00001224
1225 /* From now on, we have stuff to dealloc, so jump to error label
1226 * instead of returning */
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001227
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001228#ifdef WITH_THREAD
1229 self->lock = PyThread_allocate_lock();
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001230 if (!self->lock) {
1231 PyErr_SetString(PyExc_MemoryError, "unable to allocate lock");
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001232 goto error;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001233 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001234#endif
1235
1236 if (mode_char == 'r')
Guido van Rossumf09ca142007-06-13 00:03:05 +00001237 self->fp = BZ2_bzReadOpen(&bzerror, self->rawfp,
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001238 0, 0, NULL, 0);
1239 else
Guido van Rossumf09ca142007-06-13 00:03:05 +00001240 self->fp = BZ2_bzWriteOpen(&bzerror, self->rawfp,
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001241 compresslevel, 0, 0);
1242
1243 if (bzerror != BZ_OK) {
1244 Util_CatchBZ2Error(bzerror);
1245 goto error;
1246 }
1247
1248 self->mode = (mode_char == 'r') ? MODE_READ : MODE_WRITE;
1249
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001250 return 0;
1251
1252error:
Guido van Rossumf09ca142007-06-13 00:03:05 +00001253 fclose(self->rawfp);
1254 self->rawfp = NULL;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001255#ifdef WITH_THREAD
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001256 if (self->lock) {
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001257 PyThread_free_lock(self->lock);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001258 self->lock = NULL;
1259 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001260#endif
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001261 return -1;
1262}
1263
1264static void
1265BZ2File_dealloc(BZ2FileObject *self)
1266{
1267 int bzerror;
1268#ifdef WITH_THREAD
1269 if (self->lock)
1270 PyThread_free_lock(self->lock);
1271#endif
1272 switch (self->mode) {
1273 case MODE_READ:
1274 case MODE_READ_EOF:
1275 BZ2_bzReadClose(&bzerror, self->fp);
1276 break;
1277 case MODE_WRITE:
1278 BZ2_bzWriteClose(&bzerror, self->fp,
1279 0, NULL, NULL);
1280 break;
1281 }
Gustavo Niemeyer49ea7be2002-11-08 14:31:49 +00001282 Util_DropReadAhead(self);
Guido van Rossumf09ca142007-06-13 00:03:05 +00001283 if (self->rawfp != NULL)
1284 fclose(self->rawfp);
Christian Heimes90aa7642007-12-19 02:45:37 +00001285 Py_TYPE(self)->tp_free((PyObject *)self);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001286}
1287
1288/* This is a hacked version of Python's fileobject.c:file_getiter(). */
1289static PyObject *
1290BZ2File_getiter(BZ2FileObject *self)
1291{
1292 if (self->mode == MODE_CLOSED) {
1293 PyErr_SetString(PyExc_ValueError,
1294 "I/O operation on closed file");
1295 return NULL;
1296 }
1297 Py_INCREF((PyObject*)self);
1298 return (PyObject *)self;
1299}
1300
1301/* This is a hacked version of Python's fileobject.c:file_iternext(). */
1302#define READAHEAD_BUFSIZE 8192
1303static PyObject *
1304BZ2File_iternext(BZ2FileObject *self)
1305{
Christian Heimes72b710a2008-05-26 13:28:38 +00001306 PyBytesObject* ret;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001307 ACQUIRE_LOCK(self);
1308 if (self->mode == MODE_CLOSED) {
Georg Brandl86b2fb92008-07-16 03:43:04 +00001309 RELEASE_LOCK(self);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001310 PyErr_SetString(PyExc_ValueError,
1311 "I/O operation on closed file");
1312 return NULL;
1313 }
1314 ret = Util_ReadAheadGetLineSkip(self, 0, READAHEAD_BUFSIZE);
1315 RELEASE_LOCK(self);
Christian Heimes72b710a2008-05-26 13:28:38 +00001316 if (ret == NULL || PyBytes_GET_SIZE(ret) == 0) {
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001317 Py_XDECREF(ret);
1318 return NULL;
1319 }
1320 return (PyObject *)ret;
1321}
1322
1323/* ===================================================================== */
1324/* BZ2File_Type definition. */
1325
1326PyDoc_VAR(BZ2File__doc__) =
1327PyDoc_STR(
1328"BZ2File(name [, mode='r', buffering=0, compresslevel=9]) -> file object\n\
1329\n\
1330Open a bz2 file. The mode can be 'r' or 'w', for reading (default) or\n\
1331writing. When opened for writing, the file will be created if it doesn't\n\
1332exist, and truncated otherwise. If the buffering argument is given, 0 means\n\
1333unbuffered, and larger numbers specify the buffer size. If compresslevel\n\
1334is given, must be a number between 1 and 9.\n\
Guido van Rossum88e860c2007-06-13 01:46:31 +00001335Data read is always returned in bytes; data written ought to be bytes.\n\
1336");
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001337
Gustavo Niemeyer49ea7be2002-11-08 14:31:49 +00001338static PyTypeObject BZ2File_Type = {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00001339 PyVarObject_HEAD_INIT(NULL, 0)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001340 "bz2.BZ2File", /*tp_name*/
1341 sizeof(BZ2FileObject), /*tp_basicsize*/
1342 0, /*tp_itemsize*/
1343 (destructor)BZ2File_dealloc, /*tp_dealloc*/
1344 0, /*tp_print*/
1345 0, /*tp_getattr*/
1346 0, /*tp_setattr*/
Mark Dickinsone94c6792009-02-02 20:36:42 +00001347 0, /*tp_reserved*/
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001348 0, /*tp_repr*/
1349 0, /*tp_as_number*/
1350 0, /*tp_as_sequence*/
1351 0, /*tp_as_mapping*/
1352 0, /*tp_hash*/
1353 0, /*tp_call*/
1354 0, /*tp_str*/
Jason Tishlerfb8595d2003-01-06 12:41:26 +00001355 PyObject_GenericGetAttr,/*tp_getattro*/
1356 PyObject_GenericSetAttr,/*tp_setattro*/
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001357 0, /*tp_as_buffer*/
1358 Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE, /*tp_flags*/
1359 BZ2File__doc__, /*tp_doc*/
1360 0, /*tp_traverse*/
1361 0, /*tp_clear*/
1362 0, /*tp_richcompare*/
1363 0, /*tp_weaklistoffset*/
1364 (getiterfunc)BZ2File_getiter, /*tp_iter*/
1365 (iternextfunc)BZ2File_iternext, /*tp_iternext*/
1366 BZ2File_methods, /*tp_methods*/
Guido van Rossum79139b22007-02-09 23:20:19 +00001367 0, /*tp_members*/
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001368 BZ2File_getset, /*tp_getset*/
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001369 0, /*tp_base*/
1370 0, /*tp_dict*/
1371 0, /*tp_descr_get*/
1372 0, /*tp_descr_set*/
1373 0, /*tp_dictoffset*/
1374 (initproc)BZ2File_init, /*tp_init*/
Jason Tishlerfb8595d2003-01-06 12:41:26 +00001375 PyType_GenericAlloc, /*tp_alloc*/
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001376 PyType_GenericNew, /*tp_new*/
Neal Norwitz30d1c512007-08-19 22:48:23 +00001377 PyObject_Free, /*tp_free*/
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001378 0, /*tp_is_gc*/
1379};
1380
1381
1382/* ===================================================================== */
1383/* Methods of BZ2Comp. */
1384
1385PyDoc_STRVAR(BZ2Comp_compress__doc__,
1386"compress(data) -> string\n\
1387\n\
1388Provide more data to the compressor object. It will return chunks of\n\
1389compressed data whenever possible. When you've finished providing data\n\
1390to compress, call the flush() method to finish the compression process,\n\
1391and return what is left in the internal buffers.\n\
1392");
1393
1394static PyObject *
1395BZ2Comp_compress(BZ2CompObject *self, PyObject *args)
1396{
Martin v. Löwis423be952008-08-13 15:53:07 +00001397 Py_buffer pdata;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001398 char *data;
1399 int datasize;
1400 int bufsize = SMALLCHUNK;
Martin v. Löwisb9a0f912003-03-29 10:06:18 +00001401 PY_LONG_LONG totalout;
Gustavo Niemeyer7d7930b2002-11-05 18:41:53 +00001402 PyObject *ret = NULL;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001403 bz_stream *bzs = &self->bzs;
1404 int bzerror;
1405
Martin v. Löwis423be952008-08-13 15:53:07 +00001406 if (!PyArg_ParseTuple(args, "y*:compress", &pdata))
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001407 return NULL;
Martin v. Löwis423be952008-08-13 15:53:07 +00001408 data = pdata.buf;
1409 datasize = pdata.len;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001410
Martin v. Löwis423be952008-08-13 15:53:07 +00001411 if (datasize == 0) {
1412 PyBuffer_Release(&pdata);
Christian Heimes72b710a2008-05-26 13:28:38 +00001413 return PyBytes_FromStringAndSize("", 0);
Martin v. Löwis423be952008-08-13 15:53:07 +00001414 }
Gustavo Niemeyera6e436e2004-02-14 00:02:45 +00001415
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001416 ACQUIRE_LOCK(self);
1417 if (!self->running) {
Gustavo Niemeyer49ea7be2002-11-08 14:31:49 +00001418 PyErr_SetString(PyExc_ValueError,
1419 "this object was already flushed");
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001420 goto error;
1421 }
1422
Christian Heimes72b710a2008-05-26 13:28:38 +00001423 ret = PyBytes_FromStringAndSize(NULL, bufsize);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001424 if (!ret)
1425 goto error;
1426
1427 bzs->next_in = data;
1428 bzs->avail_in = datasize;
1429 bzs->next_out = BUF(ret);
1430 bzs->avail_out = bufsize;
1431
1432 totalout = BZS_TOTAL_OUT(bzs);
1433
1434 for (;;) {
1435 Py_BEGIN_ALLOW_THREADS
1436 bzerror = BZ2_bzCompress(bzs, BZ_RUN);
1437 Py_END_ALLOW_THREADS
1438 if (bzerror != BZ_RUN_OK) {
1439 Util_CatchBZ2Error(bzerror);
1440 goto error;
1441 }
Guido van Rossumd8faa362007-04-27 19:54:29 +00001442 if (bzs->avail_in == 0)
1443 break; /* no more input data */
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001444 if (bzs->avail_out == 0) {
1445 bufsize = Util_NewBufferSize(bufsize);
Christian Heimes72b710a2008-05-26 13:28:38 +00001446 if (_PyBytes_Resize(&ret, bufsize) < 0) {
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001447 BZ2_bzCompressEnd(bzs);
1448 goto error;
1449 }
1450 bzs->next_out = BUF(ret) + (BZS_TOTAL_OUT(bzs)
1451 - totalout);
1452 bzs->avail_out = bufsize - (bzs->next_out - BUF(ret));
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001453 }
1454 }
1455
Christian Heimes72b710a2008-05-26 13:28:38 +00001456 if (_PyBytes_Resize(&ret,
Guido van Rossum522a6c62007-05-22 23:13:45 +00001457 (Py_ssize_t)(BZS_TOTAL_OUT(bzs) - totalout)) < 0)
1458 goto error;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001459
1460 RELEASE_LOCK(self);
Martin v. Löwis423be952008-08-13 15:53:07 +00001461 PyBuffer_Release(&pdata);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001462 return ret;
1463
1464error:
1465 RELEASE_LOCK(self);
Martin v. Löwis423be952008-08-13 15:53:07 +00001466 PyBuffer_Release(&pdata);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001467 Py_XDECREF(ret);
1468 return NULL;
1469}
1470
1471PyDoc_STRVAR(BZ2Comp_flush__doc__,
1472"flush() -> string\n\
1473\n\
1474Finish the compression process and return what is left in internal buffers.\n\
1475You must not use the compressor object after calling this method.\n\
1476");
1477
1478static PyObject *
1479BZ2Comp_flush(BZ2CompObject *self)
1480{
1481 int bufsize = SMALLCHUNK;
Gustavo Niemeyer7d7930b2002-11-05 18:41:53 +00001482 PyObject *ret = NULL;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001483 bz_stream *bzs = &self->bzs;
Martin v. Löwisb9a0f912003-03-29 10:06:18 +00001484 PY_LONG_LONG totalout;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001485 int bzerror;
1486
1487 ACQUIRE_LOCK(self);
1488 if (!self->running) {
1489 PyErr_SetString(PyExc_ValueError, "object was already "
1490 "flushed");
1491 goto error;
1492 }
1493 self->running = 0;
1494
Christian Heimes72b710a2008-05-26 13:28:38 +00001495 ret = PyBytes_FromStringAndSize(NULL, bufsize);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001496 if (!ret)
1497 goto error;
1498
1499 bzs->next_out = BUF(ret);
1500 bzs->avail_out = bufsize;
1501
1502 totalout = BZS_TOTAL_OUT(bzs);
1503
1504 for (;;) {
1505 Py_BEGIN_ALLOW_THREADS
1506 bzerror = BZ2_bzCompress(bzs, BZ_FINISH);
1507 Py_END_ALLOW_THREADS
1508 if (bzerror == BZ_STREAM_END) {
1509 break;
1510 } else if (bzerror != BZ_FINISH_OK) {
1511 Util_CatchBZ2Error(bzerror);
1512 goto error;
1513 }
1514 if (bzs->avail_out == 0) {
1515 bufsize = Util_NewBufferSize(bufsize);
Christian Heimes72b710a2008-05-26 13:28:38 +00001516 if (_PyBytes_Resize(&ret, bufsize) < 0)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001517 goto error;
1518 bzs->next_out = BUF(ret);
1519 bzs->next_out = BUF(ret) + (BZS_TOTAL_OUT(bzs)
1520 - totalout);
1521 bzs->avail_out = bufsize - (bzs->next_out - BUF(ret));
1522 }
1523 }
1524
Guido van Rossum522a6c62007-05-22 23:13:45 +00001525 if (bzs->avail_out != 0) {
Christian Heimes72b710a2008-05-26 13:28:38 +00001526 if (_PyBytes_Resize(&ret,
Guido van Rossum522a6c62007-05-22 23:13:45 +00001527 (Py_ssize_t)(BZS_TOTAL_OUT(bzs) - totalout)) < 0)
1528 goto error;
1529 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001530
1531 RELEASE_LOCK(self);
1532 return ret;
1533
1534error:
1535 RELEASE_LOCK(self);
1536 Py_XDECREF(ret);
1537 return NULL;
1538}
1539
1540static PyMethodDef BZ2Comp_methods[] = {
Gustavo Niemeyer49ea7be2002-11-08 14:31:49 +00001541 {"compress", (PyCFunction)BZ2Comp_compress, METH_VARARGS,
1542 BZ2Comp_compress__doc__},
1543 {"flush", (PyCFunction)BZ2Comp_flush, METH_NOARGS,
1544 BZ2Comp_flush__doc__},
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001545 {NULL, NULL} /* sentinel */
1546};
1547
1548
1549/* ===================================================================== */
1550/* Slot definitions for BZ2Comp_Type. */
1551
1552static int
1553BZ2Comp_init(BZ2CompObject *self, PyObject *args, PyObject *kwargs)
1554{
1555 int compresslevel = 9;
1556 int bzerror;
Martin v. Löwis15e62742006-02-27 16:46:16 +00001557 static char *kwlist[] = {"compresslevel", 0};
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001558
1559 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|i:BZ2Compressor",
1560 kwlist, &compresslevel))
1561 return -1;
1562
1563 if (compresslevel < 1 || compresslevel > 9) {
1564 PyErr_SetString(PyExc_ValueError,
1565 "compresslevel must be between 1 and 9");
1566 goto error;
1567 }
1568
1569#ifdef WITH_THREAD
1570 self->lock = PyThread_allocate_lock();
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001571 if (!self->lock) {
1572 PyErr_SetString(PyExc_MemoryError, "unable to allocate lock");
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001573 goto error;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001574 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001575#endif
1576
1577 memset(&self->bzs, 0, sizeof(bz_stream));
1578 bzerror = BZ2_bzCompressInit(&self->bzs, compresslevel, 0, 0);
1579 if (bzerror != BZ_OK) {
1580 Util_CatchBZ2Error(bzerror);
1581 goto error;
1582 }
1583
1584 self->running = 1;
1585
1586 return 0;
1587error:
1588#ifdef WITH_THREAD
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001589 if (self->lock) {
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001590 PyThread_free_lock(self->lock);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001591 self->lock = NULL;
1592 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001593#endif
1594 return -1;
1595}
1596
1597static void
1598BZ2Comp_dealloc(BZ2CompObject *self)
1599{
1600#ifdef WITH_THREAD
1601 if (self->lock)
1602 PyThread_free_lock(self->lock);
1603#endif
1604 BZ2_bzCompressEnd(&self->bzs);
Christian Heimes90aa7642007-12-19 02:45:37 +00001605 Py_TYPE(self)->tp_free((PyObject *)self);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001606}
1607
1608
1609/* ===================================================================== */
1610/* BZ2Comp_Type definition. */
1611
1612PyDoc_STRVAR(BZ2Comp__doc__,
1613"BZ2Compressor([compresslevel=9]) -> compressor object\n\
1614\n\
1615Create a new compressor object. This object may be used to compress\n\
1616data sequentially. If you want to compress data in one shot, use the\n\
1617compress() function instead. The compresslevel parameter, if given,\n\
1618must be a number between 1 and 9.\n\
1619");
1620
Gustavo Niemeyer49ea7be2002-11-08 14:31:49 +00001621static PyTypeObject BZ2Comp_Type = {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00001622 PyVarObject_HEAD_INIT(NULL, 0)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001623 "bz2.BZ2Compressor", /*tp_name*/
1624 sizeof(BZ2CompObject), /*tp_basicsize*/
1625 0, /*tp_itemsize*/
1626 (destructor)BZ2Comp_dealloc, /*tp_dealloc*/
1627 0, /*tp_print*/
1628 0, /*tp_getattr*/
1629 0, /*tp_setattr*/
Mark Dickinsone94c6792009-02-02 20:36:42 +00001630 0, /*tp_reserved*/
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001631 0, /*tp_repr*/
1632 0, /*tp_as_number*/
1633 0, /*tp_as_sequence*/
1634 0, /*tp_as_mapping*/
1635 0, /*tp_hash*/
1636 0, /*tp_call*/
1637 0, /*tp_str*/
Jason Tishlerfb8595d2003-01-06 12:41:26 +00001638 PyObject_GenericGetAttr,/*tp_getattro*/
1639 PyObject_GenericSetAttr,/*tp_setattro*/
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001640 0, /*tp_as_buffer*/
1641 Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE, /*tp_flags*/
1642 BZ2Comp__doc__, /*tp_doc*/
1643 0, /*tp_traverse*/
1644 0, /*tp_clear*/
1645 0, /*tp_richcompare*/
1646 0, /*tp_weaklistoffset*/
1647 0, /*tp_iter*/
1648 0, /*tp_iternext*/
1649 BZ2Comp_methods, /*tp_methods*/
1650 0, /*tp_members*/
1651 0, /*tp_getset*/
1652 0, /*tp_base*/
1653 0, /*tp_dict*/
1654 0, /*tp_descr_get*/
1655 0, /*tp_descr_set*/
1656 0, /*tp_dictoffset*/
1657 (initproc)BZ2Comp_init, /*tp_init*/
Jason Tishlerfb8595d2003-01-06 12:41:26 +00001658 PyType_GenericAlloc, /*tp_alloc*/
1659 PyType_GenericNew, /*tp_new*/
Neal Norwitz30d1c512007-08-19 22:48:23 +00001660 PyObject_Free, /*tp_free*/
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001661 0, /*tp_is_gc*/
1662};
1663
1664
1665/* ===================================================================== */
1666/* Members of BZ2Decomp. */
1667
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001668#undef OFF
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001669#define OFF(x) offsetof(BZ2DecompObject, x)
1670
1671static PyMemberDef BZ2Decomp_members[] = {
Guido van Rossum33d26892007-08-05 15:29:28 +00001672 {"unused_data", T_OBJECT, OFF(unused_data), READONLY},
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001673 {NULL} /* Sentinel */
1674};
1675
1676
1677/* ===================================================================== */
1678/* Methods of BZ2Decomp. */
1679
1680PyDoc_STRVAR(BZ2Decomp_decompress__doc__,
1681"decompress(data) -> string\n\
1682\n\
1683Provide more data to the decompressor object. It will return chunks\n\
1684of decompressed data whenever possible. If you try to decompress data\n\
1685after the end of stream is found, EOFError will be raised. If any data\n\
1686was found after the end of stream, it'll be ignored and saved in\n\
1687unused_data attribute.\n\
1688");
1689
1690static PyObject *
1691BZ2Decomp_decompress(BZ2DecompObject *self, PyObject *args)
1692{
Martin v. Löwis423be952008-08-13 15:53:07 +00001693 Py_buffer pdata;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001694 char *data;
1695 int datasize;
1696 int bufsize = SMALLCHUNK;
Martin v. Löwisb9a0f912003-03-29 10:06:18 +00001697 PY_LONG_LONG totalout;
Neal Norwitz18142c02002-11-05 18:17:32 +00001698 PyObject *ret = NULL;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001699 bz_stream *bzs = &self->bzs;
1700 int bzerror;
1701
Martin v. Löwis423be952008-08-13 15:53:07 +00001702 if (!PyArg_ParseTuple(args, "y*:decompress", &pdata))
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001703 return NULL;
Martin v. Löwis423be952008-08-13 15:53:07 +00001704 data = pdata.buf;
1705 datasize = pdata.len;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001706
1707 ACQUIRE_LOCK(self);
1708 if (!self->running) {
1709 PyErr_SetString(PyExc_EOFError, "end of stream was "
1710 "already found");
1711 goto error;
1712 }
1713
Christian Heimes72b710a2008-05-26 13:28:38 +00001714 ret = PyBytes_FromStringAndSize(NULL, bufsize);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001715 if (!ret)
1716 goto error;
1717
1718 bzs->next_in = data;
1719 bzs->avail_in = datasize;
1720 bzs->next_out = BUF(ret);
1721 bzs->avail_out = bufsize;
1722
1723 totalout = BZS_TOTAL_OUT(bzs);
1724
1725 for (;;) {
1726 Py_BEGIN_ALLOW_THREADS
1727 bzerror = BZ2_bzDecompress(bzs);
1728 Py_END_ALLOW_THREADS
1729 if (bzerror == BZ_STREAM_END) {
1730 if (bzs->avail_in != 0) {
1731 Py_DECREF(self->unused_data);
1732 self->unused_data =
Christian Heimes72b710a2008-05-26 13:28:38 +00001733 PyBytes_FromStringAndSize(bzs->next_in,
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001734 bzs->avail_in);
1735 }
1736 self->running = 0;
1737 break;
1738 }
1739 if (bzerror != BZ_OK) {
1740 Util_CatchBZ2Error(bzerror);
1741 goto error;
1742 }
Guido van Rossumd8faa362007-04-27 19:54:29 +00001743 if (bzs->avail_in == 0)
1744 break; /* no more input data */
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001745 if (bzs->avail_out == 0) {
1746 bufsize = Util_NewBufferSize(bufsize);
Christian Heimes72b710a2008-05-26 13:28:38 +00001747 if (_PyBytes_Resize(&ret, bufsize) < 0) {
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001748 BZ2_bzDecompressEnd(bzs);
1749 goto error;
1750 }
1751 bzs->next_out = BUF(ret);
1752 bzs->next_out = BUF(ret) + (BZS_TOTAL_OUT(bzs)
1753 - totalout);
1754 bzs->avail_out = bufsize - (bzs->next_out - BUF(ret));
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001755 }
1756 }
1757
Guido van Rossum522a6c62007-05-22 23:13:45 +00001758 if (bzs->avail_out != 0) {
Christian Heimes72b710a2008-05-26 13:28:38 +00001759 if (_PyBytes_Resize(&ret,
Guido van Rossum522a6c62007-05-22 23:13:45 +00001760 (Py_ssize_t)(BZS_TOTAL_OUT(bzs) - totalout)) < 0)
1761 goto error;
1762 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001763
1764 RELEASE_LOCK(self);
Martin v. Löwis423be952008-08-13 15:53:07 +00001765 PyBuffer_Release(&pdata);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001766 return ret;
1767
1768error:
1769 RELEASE_LOCK(self);
Martin v. Löwis423be952008-08-13 15:53:07 +00001770 PyBuffer_Release(&pdata);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001771 Py_XDECREF(ret);
1772 return NULL;
1773}
1774
1775static PyMethodDef BZ2Decomp_methods[] = {
1776 {"decompress", (PyCFunction)BZ2Decomp_decompress, METH_VARARGS, BZ2Decomp_decompress__doc__},
1777 {NULL, NULL} /* sentinel */
1778};
1779
1780
1781/* ===================================================================== */
1782/* Slot definitions for BZ2Decomp_Type. */
1783
1784static int
1785BZ2Decomp_init(BZ2DecompObject *self, PyObject *args, PyObject *kwargs)
1786{
1787 int bzerror;
1788
1789 if (!PyArg_ParseTuple(args, ":BZ2Decompressor"))
1790 return -1;
1791
1792#ifdef WITH_THREAD
1793 self->lock = PyThread_allocate_lock();
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001794 if (!self->lock) {
1795 PyErr_SetString(PyExc_MemoryError, "unable to allocate lock");
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001796 goto error;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001797 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001798#endif
1799
Christian Heimes72b710a2008-05-26 13:28:38 +00001800 self->unused_data = PyBytes_FromStringAndSize("", 0);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001801 if (!self->unused_data)
1802 goto error;
1803
1804 memset(&self->bzs, 0, sizeof(bz_stream));
1805 bzerror = BZ2_bzDecompressInit(&self->bzs, 0, 0);
1806 if (bzerror != BZ_OK) {
1807 Util_CatchBZ2Error(bzerror);
1808 goto error;
1809 }
1810
1811 self->running = 1;
1812
1813 return 0;
1814
1815error:
1816#ifdef WITH_THREAD
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001817 if (self->lock) {
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001818 PyThread_free_lock(self->lock);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001819 self->lock = NULL;
1820 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001821#endif
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001822 Py_CLEAR(self->unused_data);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001823 return -1;
1824}
1825
1826static void
1827BZ2Decomp_dealloc(BZ2DecompObject *self)
1828{
1829#ifdef WITH_THREAD
1830 if (self->lock)
1831 PyThread_free_lock(self->lock);
1832#endif
1833 Py_XDECREF(self->unused_data);
1834 BZ2_bzDecompressEnd(&self->bzs);
Christian Heimes90aa7642007-12-19 02:45:37 +00001835 Py_TYPE(self)->tp_free((PyObject *)self);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001836}
1837
1838
1839/* ===================================================================== */
1840/* BZ2Decomp_Type definition. */
1841
1842PyDoc_STRVAR(BZ2Decomp__doc__,
1843"BZ2Decompressor() -> decompressor object\n\
1844\n\
1845Create a new decompressor object. This object may be used to decompress\n\
1846data sequentially. If you want to decompress data in one shot, use the\n\
1847decompress() function instead.\n\
1848");
1849
Gustavo Niemeyer49ea7be2002-11-08 14:31:49 +00001850static PyTypeObject BZ2Decomp_Type = {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00001851 PyVarObject_HEAD_INIT(NULL, 0)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001852 "bz2.BZ2Decompressor", /*tp_name*/
1853 sizeof(BZ2DecompObject), /*tp_basicsize*/
1854 0, /*tp_itemsize*/
1855 (destructor)BZ2Decomp_dealloc, /*tp_dealloc*/
1856 0, /*tp_print*/
1857 0, /*tp_getattr*/
1858 0, /*tp_setattr*/
Mark Dickinsone94c6792009-02-02 20:36:42 +00001859 0, /*tp_reserved*/
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001860 0, /*tp_repr*/
1861 0, /*tp_as_number*/
1862 0, /*tp_as_sequence*/
1863 0, /*tp_as_mapping*/
1864 0, /*tp_hash*/
1865 0, /*tp_call*/
1866 0, /*tp_str*/
Jason Tishlerfb8595d2003-01-06 12:41:26 +00001867 PyObject_GenericGetAttr,/*tp_getattro*/
1868 PyObject_GenericSetAttr,/*tp_setattro*/
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001869 0, /*tp_as_buffer*/
1870 Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE, /*tp_flags*/
1871 BZ2Decomp__doc__, /*tp_doc*/
1872 0, /*tp_traverse*/
1873 0, /*tp_clear*/
1874 0, /*tp_richcompare*/
1875 0, /*tp_weaklistoffset*/
1876 0, /*tp_iter*/
1877 0, /*tp_iternext*/
1878 BZ2Decomp_methods, /*tp_methods*/
1879 BZ2Decomp_members, /*tp_members*/
1880 0, /*tp_getset*/
1881 0, /*tp_base*/
1882 0, /*tp_dict*/
1883 0, /*tp_descr_get*/
1884 0, /*tp_descr_set*/
1885 0, /*tp_dictoffset*/
1886 (initproc)BZ2Decomp_init, /*tp_init*/
Jason Tishlerfb8595d2003-01-06 12:41:26 +00001887 PyType_GenericAlloc, /*tp_alloc*/
1888 PyType_GenericNew, /*tp_new*/
Neal Norwitz30d1c512007-08-19 22:48:23 +00001889 PyObject_Free, /*tp_free*/
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001890 0, /*tp_is_gc*/
1891};
1892
1893
1894/* ===================================================================== */
1895/* Module functions. */
1896
1897PyDoc_STRVAR(bz2_compress__doc__,
1898"compress(data [, compresslevel=9]) -> string\n\
1899\n\
1900Compress data in one shot. If you want to compress data sequentially,\n\
1901use an instance of BZ2Compressor instead. The compresslevel parameter, if\n\
1902given, must be a number between 1 and 9.\n\
1903");
1904
1905static PyObject *
1906bz2_compress(PyObject *self, PyObject *args, PyObject *kwargs)
1907{
1908 int compresslevel=9;
Martin v. Löwis423be952008-08-13 15:53:07 +00001909 Py_buffer pdata;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001910 char *data;
1911 int datasize;
1912 int bufsize;
Gustavo Niemeyer7d7930b2002-11-05 18:41:53 +00001913 PyObject *ret = NULL;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001914 bz_stream _bzs;
1915 bz_stream *bzs = &_bzs;
1916 int bzerror;
Martin v. Löwis15e62742006-02-27 16:46:16 +00001917 static char *kwlist[] = {"data", "compresslevel", 0};
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001918
Martin v. Löwis423be952008-08-13 15:53:07 +00001919 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "y*|i",
1920 kwlist, &pdata,
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001921 &compresslevel))
1922 return NULL;
Martin v. Löwis423be952008-08-13 15:53:07 +00001923 data = pdata.buf;
1924 datasize = pdata.len;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001925
1926 if (compresslevel < 1 || compresslevel > 9) {
1927 PyErr_SetString(PyExc_ValueError,
1928 "compresslevel must be between 1 and 9");
Martin v. Löwis423be952008-08-13 15:53:07 +00001929 PyBuffer_Release(&pdata);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001930 return NULL;
1931 }
1932
1933 /* Conforming to bz2 manual, this is large enough to fit compressed
1934 * data in one shot. We will check it later anyway. */
1935 bufsize = datasize + (datasize/100+1) + 600;
1936
Christian Heimes72b710a2008-05-26 13:28:38 +00001937 ret = PyBytes_FromStringAndSize(NULL, bufsize);
Martin v. Löwis423be952008-08-13 15:53:07 +00001938 if (!ret) {
1939 PyBuffer_Release(&pdata);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001940 return NULL;
Martin v. Löwis423be952008-08-13 15:53:07 +00001941 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001942
1943 memset(bzs, 0, sizeof(bz_stream));
1944
1945 bzs->next_in = data;
1946 bzs->avail_in = datasize;
1947 bzs->next_out = BUF(ret);
1948 bzs->avail_out = bufsize;
1949
1950 bzerror = BZ2_bzCompressInit(bzs, compresslevel, 0, 0);
1951 if (bzerror != BZ_OK) {
1952 Util_CatchBZ2Error(bzerror);
Martin v. Löwis423be952008-08-13 15:53:07 +00001953 PyBuffer_Release(&pdata);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001954 Py_DECREF(ret);
1955 return NULL;
1956 }
Tim Peterse3228092002-11-09 04:21:44 +00001957
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001958 for (;;) {
1959 Py_BEGIN_ALLOW_THREADS
1960 bzerror = BZ2_bzCompress(bzs, BZ_FINISH);
1961 Py_END_ALLOW_THREADS
1962 if (bzerror == BZ_STREAM_END) {
1963 break;
1964 } else if (bzerror != BZ_FINISH_OK) {
1965 BZ2_bzCompressEnd(bzs);
1966 Util_CatchBZ2Error(bzerror);
Martin v. Löwis423be952008-08-13 15:53:07 +00001967 PyBuffer_Release(&pdata);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001968 Py_DECREF(ret);
1969 return NULL;
1970 }
1971 if (bzs->avail_out == 0) {
1972 bufsize = Util_NewBufferSize(bufsize);
Christian Heimes72b710a2008-05-26 13:28:38 +00001973 if (_PyBytes_Resize(&ret, bufsize) < 0) {
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001974 BZ2_bzCompressEnd(bzs);
Martin v. Löwis423be952008-08-13 15:53:07 +00001975 PyBuffer_Release(&pdata);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001976 return NULL;
1977 }
1978 bzs->next_out = BUF(ret) + BZS_TOTAL_OUT(bzs);
1979 bzs->avail_out = bufsize - (bzs->next_out - BUF(ret));
1980 }
1981 }
1982
Guido van Rossum522a6c62007-05-22 23:13:45 +00001983 if (bzs->avail_out != 0) {
Christian Heimes72b710a2008-05-26 13:28:38 +00001984 if (_PyBytes_Resize(&ret, (Py_ssize_t)BZS_TOTAL_OUT(bzs)) < 0) {
Guido van Rossum522a6c62007-05-22 23:13:45 +00001985 ret = NULL;
1986 }
1987 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001988 BZ2_bzCompressEnd(bzs);
1989
Martin v. Löwis423be952008-08-13 15:53:07 +00001990 PyBuffer_Release(&pdata);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001991 return ret;
1992}
1993
1994PyDoc_STRVAR(bz2_decompress__doc__,
1995"decompress(data) -> decompressed data\n\
1996\n\
1997Decompress data in one shot. If you want to decompress data sequentially,\n\
1998use an instance of BZ2Decompressor instead.\n\
1999");
2000
2001static PyObject *
2002bz2_decompress(PyObject *self, PyObject *args)
2003{
Martin v. Löwis423be952008-08-13 15:53:07 +00002004 Py_buffer pdata;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002005 char *data;
2006 int datasize;
2007 int bufsize = SMALLCHUNK;
2008 PyObject *ret;
2009 bz_stream _bzs;
2010 bz_stream *bzs = &_bzs;
2011 int bzerror;
2012
Martin v. Löwis423be952008-08-13 15:53:07 +00002013 if (!PyArg_ParseTuple(args, "y*:decompress", &pdata))
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002014 return NULL;
Martin v. Löwis423be952008-08-13 15:53:07 +00002015 data = pdata.buf;
2016 datasize = pdata.len;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002017
Martin v. Löwis423be952008-08-13 15:53:07 +00002018 if (datasize == 0) {
2019 PyBuffer_Release(&pdata);
Christian Heimes72b710a2008-05-26 13:28:38 +00002020 return PyBytes_FromStringAndSize("", 0);
Martin v. Löwis423be952008-08-13 15:53:07 +00002021 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002022
Christian Heimes72b710a2008-05-26 13:28:38 +00002023 ret = PyBytes_FromStringAndSize(NULL, bufsize);
Martin v. Löwis423be952008-08-13 15:53:07 +00002024 if (!ret) {
2025 PyBuffer_Release(&pdata);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002026 return NULL;
Martin v. Löwis423be952008-08-13 15:53:07 +00002027 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002028
2029 memset(bzs, 0, sizeof(bz_stream));
2030
2031 bzs->next_in = data;
2032 bzs->avail_in = datasize;
2033 bzs->next_out = BUF(ret);
2034 bzs->avail_out = bufsize;
2035
2036 bzerror = BZ2_bzDecompressInit(bzs, 0, 0);
2037 if (bzerror != BZ_OK) {
2038 Util_CatchBZ2Error(bzerror);
2039 Py_DECREF(ret);
Martin v. Löwis423be952008-08-13 15:53:07 +00002040 PyBuffer_Release(&pdata);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002041 return NULL;
2042 }
Tim Peterse3228092002-11-09 04:21:44 +00002043
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002044 for (;;) {
2045 Py_BEGIN_ALLOW_THREADS
2046 bzerror = BZ2_bzDecompress(bzs);
2047 Py_END_ALLOW_THREADS
2048 if (bzerror == BZ_STREAM_END) {
2049 break;
2050 } else if (bzerror != BZ_OK) {
2051 BZ2_bzDecompressEnd(bzs);
2052 Util_CatchBZ2Error(bzerror);
Martin v. Löwis423be952008-08-13 15:53:07 +00002053 PyBuffer_Release(&pdata);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002054 Py_DECREF(ret);
2055 return NULL;
2056 }
Guido van Rossumd8faa362007-04-27 19:54:29 +00002057 if (bzs->avail_in == 0) {
2058 BZ2_bzDecompressEnd(bzs);
2059 PyErr_SetString(PyExc_ValueError,
2060 "couldn't find end of stream");
Martin v. Löwis423be952008-08-13 15:53:07 +00002061 PyBuffer_Release(&pdata);
Guido van Rossumd8faa362007-04-27 19:54:29 +00002062 Py_DECREF(ret);
2063 return NULL;
2064 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002065 if (bzs->avail_out == 0) {
2066 bufsize = Util_NewBufferSize(bufsize);
Christian Heimes72b710a2008-05-26 13:28:38 +00002067 if (_PyBytes_Resize(&ret, bufsize) < 0) {
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002068 BZ2_bzDecompressEnd(bzs);
Martin v. Löwis423be952008-08-13 15:53:07 +00002069 PyBuffer_Release(&pdata);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002070 return NULL;
2071 }
2072 bzs->next_out = BUF(ret) + BZS_TOTAL_OUT(bzs);
2073 bzs->avail_out = bufsize - (bzs->next_out - BUF(ret));
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002074 }
2075 }
2076
Guido van Rossum522a6c62007-05-22 23:13:45 +00002077 if (bzs->avail_out != 0) {
Christian Heimes72b710a2008-05-26 13:28:38 +00002078 if (_PyBytes_Resize(&ret, (Py_ssize_t)BZS_TOTAL_OUT(bzs)) < 0) {
Guido van Rossum522a6c62007-05-22 23:13:45 +00002079 ret = NULL;
2080 }
2081 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002082 BZ2_bzDecompressEnd(bzs);
Martin v. Löwis423be952008-08-13 15:53:07 +00002083 PyBuffer_Release(&pdata);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002084
2085 return ret;
2086}
2087
2088static PyMethodDef bz2_methods[] = {
2089 {"compress", (PyCFunction) bz2_compress, METH_VARARGS|METH_KEYWORDS,
2090 bz2_compress__doc__},
2091 {"decompress", (PyCFunction) bz2_decompress, METH_VARARGS,
2092 bz2_decompress__doc__},
2093 {NULL, NULL} /* sentinel */
2094};
2095
2096/* ===================================================================== */
2097/* Initialization function. */
2098
2099PyDoc_STRVAR(bz2__doc__,
2100"The python bz2 module provides a comprehensive interface for\n\
2101the bz2 compression library. It implements a complete file\n\
2102interface, one shot (de)compression functions, and types for\n\
2103sequential (de)compression.\n\
2104");
2105
Martin v. Löwis1a214512008-06-11 05:26:20 +00002106
2107static struct PyModuleDef bz2module = {
2108 PyModuleDef_HEAD_INIT,
2109 "bz2",
2110 bz2__doc__,
2111 -1,
2112 bz2_methods,
2113 NULL,
2114 NULL,
2115 NULL,
2116 NULL
2117};
2118
Neal Norwitz21d896c2003-07-01 20:15:21 +00002119PyMODINIT_FUNC
Martin v. Löwis1a214512008-06-11 05:26:20 +00002120PyInit_bz2(void)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002121{
2122 PyObject *m;
2123
Christian Heimes90aa7642007-12-19 02:45:37 +00002124 Py_TYPE(&BZ2File_Type) = &PyType_Type;
2125 Py_TYPE(&BZ2Comp_Type) = &PyType_Type;
2126 Py_TYPE(&BZ2Decomp_Type) = &PyType_Type;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002127
Martin v. Löwis1a214512008-06-11 05:26:20 +00002128 m = PyModule_Create(&bz2module);
Neal Norwitz1ac754f2006-01-19 06:09:39 +00002129 if (m == NULL)
Martin v. Löwis1a214512008-06-11 05:26:20 +00002130 return NULL;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002131
Neal Norwitz53cbdaa2007-08-23 21:42:55 +00002132 PyModule_AddObject(m, "__author__", PyUnicode_FromString(__author__));
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002133
2134 Py_INCREF(&BZ2File_Type);
2135 PyModule_AddObject(m, "BZ2File", (PyObject *)&BZ2File_Type);
2136
2137 Py_INCREF(&BZ2Comp_Type);
2138 PyModule_AddObject(m, "BZ2Compressor", (PyObject *)&BZ2Comp_Type);
2139
2140 Py_INCREF(&BZ2Decomp_Type);
2141 PyModule_AddObject(m, "BZ2Decompressor", (PyObject *)&BZ2Decomp_Type);
Martin v. Löwis1a214512008-06-11 05:26:20 +00002142 return m;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002143}