blob: 562198be05b21129cce3024709e73be36fbcc76e [file] [log] [blame]
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001/*
2
3python-bz2 - python bz2 library interface
4
5Copyright (c) 2002 Gustavo Niemeyer <niemeyer@conectiva.com>
6Copyright (c) 2002 Python Software Foundation; All Rights Reserved
7
8*/
9
Martin v. Löwise17af7b2002-11-23 09:16:19 +000010#include "Python.h"
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +000011#include <stdio.h>
12#include <bzlib.h>
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +000013#include "structmember.h"
14
15#ifdef WITH_THREAD
16#include "pythread.h"
17#endif
18
19static char __author__[] =
20"The bz2 python module was written by:\n\
21\n\
22 Gustavo Niemeyer <niemeyer@conectiva.com>\n\
23";
24
Georg Brandl33a5f2a2005-08-21 14:16:04 +000025/* Our very own off_t-like type, 64-bit if possible */
26/* copied from Objects/fileobject.c */
27#if !defined(HAVE_LARGEFILE_SUPPORT)
28typedef off_t Py_off_t;
29#elif SIZEOF_OFF_T >= 8
30typedef off_t Py_off_t;
31#elif SIZEOF_FPOS_T >= 8
32typedef fpos_t Py_off_t;
33#else
34#error "Large file support, but neither off_t nor fpos_t is large enough."
35#endif
36
Christian Heimes72b710a2008-05-26 13:28:38 +000037#define BUF(v) PyBytes_AS_STRING(v)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +000038
39#define MODE_CLOSED 0
40#define MODE_READ 1
41#define MODE_READ_EOF 2
42#define MODE_WRITE 3
43
Christian Heimes90aa7642007-12-19 02:45:37 +000044#define BZ2FileObject_Check(v) (Py_TYPE(v) == &BZ2File_Type)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +000045
Gustavo Niemeyer7628f1f2003-04-27 06:25:24 +000046
47#ifdef BZ_CONFIG_ERROR
48
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +000049#if SIZEOF_LONG >= 8
50#define BZS_TOTAL_OUT(bzs) \
51 (((long)bzs->total_out_hi32 << 32) + bzs->total_out_lo32)
52#elif SIZEOF_LONG_LONG >= 8
53#define BZS_TOTAL_OUT(bzs) \
Martin v. Löwisb9a0f912003-03-29 10:06:18 +000054 (((PY_LONG_LONG)bzs->total_out_hi32 << 32) + bzs->total_out_lo32)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +000055#else
56#define BZS_TOTAL_OUT(bzs) \
Neal Norwitz20bad742006-01-17 05:27:39 +000057 bzs->total_out_lo32
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +000058#endif
59
Gustavo Niemeyer7628f1f2003-04-27 06:25:24 +000060#else /* ! BZ_CONFIG_ERROR */
61
62#define BZ2_bzRead bzRead
63#define BZ2_bzReadOpen bzReadOpen
64#define BZ2_bzReadClose bzReadClose
65#define BZ2_bzWrite bzWrite
66#define BZ2_bzWriteOpen bzWriteOpen
67#define BZ2_bzWriteClose bzWriteClose
68#define BZ2_bzCompress bzCompress
69#define BZ2_bzCompressInit bzCompressInit
70#define BZ2_bzCompressEnd bzCompressEnd
71#define BZ2_bzDecompress bzDecompress
72#define BZ2_bzDecompressInit bzDecompressInit
73#define BZ2_bzDecompressEnd bzDecompressEnd
74
75#define BZS_TOTAL_OUT(bzs) bzs->total_out
76
77#endif /* ! BZ_CONFIG_ERROR */
78
79
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +000080#ifdef WITH_THREAD
81#define ACQUIRE_LOCK(obj) PyThread_acquire_lock(obj->lock, 1)
82#define RELEASE_LOCK(obj) PyThread_release_lock(obj->lock)
83#else
84#define ACQUIRE_LOCK(obj)
85#define RELEASE_LOCK(obj)
86#endif
87
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +000088/* Bits in f_newlinetypes */
89#define NEWLINE_UNKNOWN 0 /* No newline seen, yet */
90#define NEWLINE_CR 1 /* \r newline seen */
91#define NEWLINE_LF 2 /* \n newline seen */
92#define NEWLINE_CRLF 4 /* \r\n newline seen */
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +000093
94/* ===================================================================== */
95/* Structure definitions. */
96
97typedef struct {
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +000098 PyObject_HEAD
Guido van Rossumf09ca142007-06-13 00:03:05 +000099 FILE *rawfp;
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000100
101 char* f_buf; /* Allocated readahead buffer */
102 char* f_bufend; /* Points after last occupied position */
103 char* f_bufptr; /* Current buffer position */
104
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000105 BZFILE *fp;
106 int mode;
Georg Brandla8bcecc2005-09-03 07:49:53 +0000107 Py_off_t pos;
108 Py_off_t size;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000109#ifdef WITH_THREAD
110 PyThread_type_lock lock;
111#endif
112} BZ2FileObject;
113
114typedef struct {
115 PyObject_HEAD
116 bz_stream bzs;
117 int running;
118#ifdef WITH_THREAD
119 PyThread_type_lock lock;
120#endif
121} BZ2CompObject;
122
123typedef struct {
124 PyObject_HEAD
125 bz_stream bzs;
126 int running;
127 PyObject *unused_data;
128#ifdef WITH_THREAD
129 PyThread_type_lock lock;
130#endif
131} BZ2DecompObject;
132
133/* ===================================================================== */
134/* Utility functions. */
135
136static int
137Util_CatchBZ2Error(int bzerror)
138{
139 int ret = 0;
140 switch(bzerror) {
141 case BZ_OK:
142 case BZ_STREAM_END:
143 break;
144
Gustavo Niemeyer7628f1f2003-04-27 06:25:24 +0000145#ifdef BZ_CONFIG_ERROR
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000146 case BZ_CONFIG_ERROR:
147 PyErr_SetString(PyExc_SystemError,
148 "the bz2 library was not compiled "
149 "correctly");
150 ret = 1;
151 break;
Gustavo Niemeyer7628f1f2003-04-27 06:25:24 +0000152#endif
Tim Peterse3228092002-11-09 04:21:44 +0000153
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000154 case BZ_PARAM_ERROR:
155 PyErr_SetString(PyExc_ValueError,
156 "the bz2 library has received wrong "
157 "parameters");
158 ret = 1;
159 break;
Tim Peterse3228092002-11-09 04:21:44 +0000160
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000161 case BZ_MEM_ERROR:
162 PyErr_NoMemory();
163 ret = 1;
164 break;
165
166 case BZ_DATA_ERROR:
167 case BZ_DATA_ERROR_MAGIC:
168 PyErr_SetString(PyExc_IOError, "invalid data stream");
169 ret = 1;
170 break;
171
172 case BZ_IO_ERROR:
173 PyErr_SetString(PyExc_IOError, "unknown IO error");
174 ret = 1;
175 break;
176
177 case BZ_UNEXPECTED_EOF:
178 PyErr_SetString(PyExc_EOFError,
179 "compressed file ended before the "
180 "logical end-of-stream was detected");
181 ret = 1;
182 break;
183
184 case BZ_SEQUENCE_ERROR:
185 PyErr_SetString(PyExc_RuntimeError,
186 "wrong sequence of bz2 library "
187 "commands used");
188 ret = 1;
189 break;
190 }
191 return ret;
192}
193
194#if BUFSIZ < 8192
195#define SMALLCHUNK 8192
196#else
197#define SMALLCHUNK BUFSIZ
198#endif
199
200#if SIZEOF_INT < 4
201#define BIGCHUNK (512 * 32)
202#else
203#define BIGCHUNK (512 * 1024)
204#endif
205
206/* This is a hacked version of Python's fileobject.c:new_buffersize(). */
207static size_t
208Util_NewBufferSize(size_t currentsize)
209{
210 if (currentsize > SMALLCHUNK) {
211 /* Keep doubling until we reach BIGCHUNK;
212 then keep adding BIGCHUNK. */
213 if (currentsize <= BIGCHUNK)
214 return currentsize + currentsize;
215 else
216 return currentsize + BIGCHUNK;
217 }
218 return currentsize + SMALLCHUNK;
219}
220
221/* This is a hacked version of Python's fileobject.c:get_line(). */
222static PyObject *
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000223Util_GetLine(BZ2FileObject *f, int n)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000224{
225 char c;
226 char *buf, *end;
227 size_t total_v_size; /* total # of slots in buffer */
228 size_t used_v_size; /* # used slots in buffer */
229 size_t increment; /* amount to increment the buffer */
230 PyObject *v;
231 int bzerror;
Thomas Wouters1b7f8912007-09-19 03:06:30 +0000232 int bytes_read;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000233
234 total_v_size = n > 0 ? n : 100;
Christian Heimes72b710a2008-05-26 13:28:38 +0000235 v = PyBytes_FromStringAndSize((char *)NULL, total_v_size);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000236 if (v == NULL)
237 return NULL;
238
239 buf = BUF(v);
240 end = buf + total_v_size;
241
242 for (;;) {
243 Py_BEGIN_ALLOW_THREADS
Guido van Rossumf09ca142007-06-13 00:03:05 +0000244 do {
Thomas Wouters1b7f8912007-09-19 03:06:30 +0000245 bytes_read = BZ2_bzRead(&bzerror, f->fp, &c, 1);
Guido van Rossumf09ca142007-06-13 00:03:05 +0000246 f->pos++;
Thomas Wouters1b7f8912007-09-19 03:06:30 +0000247 if (bytes_read == 0)
248 break;
Guido van Rossumf09ca142007-06-13 00:03:05 +0000249 *buf++ = c;
250 } while (bzerror == BZ_OK && c != '\n' && buf != end);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000251 Py_END_ALLOW_THREADS
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000252 if (bzerror == BZ_STREAM_END) {
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000253 f->size = f->pos;
254 f->mode = MODE_READ_EOF;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000255 break;
256 } else if (bzerror != BZ_OK) {
257 Util_CatchBZ2Error(bzerror);
258 Py_DECREF(v);
259 return NULL;
260 }
261 if (c == '\n')
262 break;
263 /* Must be because buf == end */
264 if (n > 0)
265 break;
266 used_v_size = total_v_size;
267 increment = total_v_size >> 2; /* mild exponential growth */
268 total_v_size += increment;
269 if (total_v_size > INT_MAX) {
270 PyErr_SetString(PyExc_OverflowError,
271 "line is longer than a Python string can hold");
272 Py_DECREF(v);
273 return NULL;
274 }
Christian Heimes72b710a2008-05-26 13:28:38 +0000275 if (_PyBytes_Resize(&v, total_v_size) < 0) {
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000276 return NULL;
Guido van Rossum522a6c62007-05-22 23:13:45 +0000277 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000278 buf = BUF(v) + used_v_size;
279 end = BUF(v) + total_v_size;
280 }
281
282 used_v_size = buf - BUF(v);
Guido van Rossum522a6c62007-05-22 23:13:45 +0000283 if (used_v_size != total_v_size) {
Christian Heimes72b710a2008-05-26 13:28:38 +0000284 if (_PyBytes_Resize(&v, used_v_size) < 0) {
Guido van Rossum522a6c62007-05-22 23:13:45 +0000285 v = NULL;
286 }
287 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000288 return v;
289}
290
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000291/* This is a hacked version of Python's fileobject.c:drop_readahead(). */
292static void
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000293Util_DropReadAhead(BZ2FileObject *f)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000294{
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000295 if (f->f_buf != NULL) {
296 PyMem_Free(f->f_buf);
297 f->f_buf = NULL;
298 }
299}
300
301/* This is a hacked version of Python's fileobject.c:readahead(). */
302static int
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000303Util_ReadAhead(BZ2FileObject *f, int bufsize)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000304{
305 int chunksize;
306 int bzerror;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000307
308 if (f->f_buf != NULL) {
Tim Peterse3228092002-11-09 04:21:44 +0000309 if((f->f_bufend - f->f_bufptr) >= 1)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000310 return 0;
311 else
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000312 Util_DropReadAhead(f);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000313 }
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000314 if (f->mode == MODE_READ_EOF) {
Georg Brandl33a5f2a2005-08-21 14:16:04 +0000315 f->f_bufptr = f->f_buf;
316 f->f_bufend = f->f_buf;
317 return 0;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000318 }
319 if ((f->f_buf = PyMem_Malloc(bufsize)) == NULL) {
320 return -1;
321 }
322 Py_BEGIN_ALLOW_THREADS
Guido van Rossumf09ca142007-06-13 00:03:05 +0000323 chunksize = BZ2_bzRead(&bzerror, f->fp, f->f_buf, bufsize);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000324 Py_END_ALLOW_THREADS
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000325 f->pos += chunksize;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000326 if (bzerror == BZ_STREAM_END) {
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000327 f->size = f->pos;
328 f->mode = MODE_READ_EOF;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000329 } else if (bzerror != BZ_OK) {
330 Util_CatchBZ2Error(bzerror);
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000331 Util_DropReadAhead(f);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000332 return -1;
333 }
334 f->f_bufptr = f->f_buf;
335 f->f_bufend = f->f_buf + chunksize;
336 return 0;
337}
338
339/* This is a hacked version of Python's
340 * fileobject.c:readahead_get_line_skip(). */
Christian Heimes72b710a2008-05-26 13:28:38 +0000341static PyBytesObject *
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000342Util_ReadAheadGetLineSkip(BZ2FileObject *f, int skip, int bufsize)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000343{
Christian Heimes72b710a2008-05-26 13:28:38 +0000344 PyBytesObject* s;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000345 char *bufptr;
346 char *buf;
347 int len;
348
349 if (f->f_buf == NULL)
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000350 if (Util_ReadAhead(f, bufsize) < 0)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000351 return NULL;
352
353 len = f->f_bufend - f->f_bufptr;
Tim Peterse3228092002-11-09 04:21:44 +0000354 if (len == 0)
Christian Heimes72b710a2008-05-26 13:28:38 +0000355 return (PyBytesObject *)
356 PyBytes_FromStringAndSize(NULL, skip);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000357 bufptr = memchr(f->f_bufptr, '\n', len);
358 if (bufptr != NULL) {
359 bufptr++; /* Count the '\n' */
360 len = bufptr - f->f_bufptr;
Christian Heimes72b710a2008-05-26 13:28:38 +0000361 s = (PyBytesObject *)
362 PyBytes_FromStringAndSize(NULL, skip+len);
Tim Peterse3228092002-11-09 04:21:44 +0000363 if (s == NULL)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000364 return NULL;
Christian Heimes72b710a2008-05-26 13:28:38 +0000365 memcpy(PyBytes_AS_STRING(s)+skip, f->f_bufptr, len);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000366 f->f_bufptr = bufptr;
367 if (bufptr == f->f_bufend)
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000368 Util_DropReadAhead(f);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000369 } else {
370 bufptr = f->f_bufptr;
371 buf = f->f_buf;
372 f->f_buf = NULL; /* Force new readahead buffer */
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000373 s = Util_ReadAheadGetLineSkip(f, skip+len,
374 bufsize + (bufsize>>2));
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000375 if (s == NULL) {
376 PyMem_Free(buf);
377 return NULL;
378 }
Christian Heimes72b710a2008-05-26 13:28:38 +0000379 memcpy(PyBytes_AS_STRING(s)+skip, bufptr, len);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000380 PyMem_Free(buf);
381 }
382 return s;
383}
384
385/* ===================================================================== */
386/* Methods of BZ2File. */
387
388PyDoc_STRVAR(BZ2File_read__doc__,
389"read([size]) -> string\n\
390\n\
391Read at most size uncompressed bytes, returned as a string. If the size\n\
392argument is negative or omitted, read until EOF is reached.\n\
393");
394
395/* This is a hacked version of Python's fileobject.c:file_read(). */
396static PyObject *
397BZ2File_read(BZ2FileObject *self, PyObject *args)
398{
399 long bytesrequested = -1;
400 size_t bytesread, buffersize, chunksize;
401 int bzerror;
402 PyObject *ret = NULL;
Tim Peterse3228092002-11-09 04:21:44 +0000403
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000404 if (!PyArg_ParseTuple(args, "|l:read", &bytesrequested))
405 return NULL;
Tim Peterse3228092002-11-09 04:21:44 +0000406
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000407 ACQUIRE_LOCK(self);
408 switch (self->mode) {
409 case MODE_READ:
410 break;
411 case MODE_READ_EOF:
Christian Heimes72b710a2008-05-26 13:28:38 +0000412 ret = PyBytes_FromStringAndSize("", 0);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000413 goto cleanup;
414 case MODE_CLOSED:
415 PyErr_SetString(PyExc_ValueError,
416 "I/O operation on closed file");
417 goto cleanup;
418 default:
419 PyErr_SetString(PyExc_IOError,
420 "file is not ready for reading");
421 goto cleanup;
422 }
423
424 if (bytesrequested < 0)
425 buffersize = Util_NewBufferSize((size_t)0);
426 else
427 buffersize = bytesrequested;
428 if (buffersize > INT_MAX) {
429 PyErr_SetString(PyExc_OverflowError,
Gustavo Niemeyer49ea7be2002-11-08 14:31:49 +0000430 "requested number of bytes is "
431 "more than a Python string can hold");
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000432 goto cleanup;
433 }
Christian Heimes72b710a2008-05-26 13:28:38 +0000434 ret = PyBytes_FromStringAndSize((char *)NULL, buffersize);
Guido van Rossum75c26bc2007-08-07 23:29:20 +0000435 if (ret == NULL || buffersize == 0)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000436 goto cleanup;
437 bytesread = 0;
438
439 for (;;) {
440 Py_BEGIN_ALLOW_THREADS
Guido van Rossumf09ca142007-06-13 00:03:05 +0000441 chunksize = BZ2_bzRead(&bzerror, self->fp,
442 BUF(ret)+bytesread,
443 buffersize-bytesread);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000444 self->pos += chunksize;
445 Py_END_ALLOW_THREADS
446 bytesread += chunksize;
447 if (bzerror == BZ_STREAM_END) {
448 self->size = self->pos;
449 self->mode = MODE_READ_EOF;
450 break;
451 } else if (bzerror != BZ_OK) {
452 Util_CatchBZ2Error(bzerror);
453 Py_DECREF(ret);
454 ret = NULL;
455 goto cleanup;
456 }
457 if (bytesrequested < 0) {
458 buffersize = Util_NewBufferSize(buffersize);
Christian Heimes72b710a2008-05-26 13:28:38 +0000459 if (_PyBytes_Resize(&ret, buffersize) < 0) {
Guido van Rossum522a6c62007-05-22 23:13:45 +0000460 ret = NULL;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000461 goto cleanup;
Guido van Rossum522a6c62007-05-22 23:13:45 +0000462 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000463 } else {
464 break;
465 }
466 }
Guido van Rossum522a6c62007-05-22 23:13:45 +0000467 if (bytesread != buffersize) {
Christian Heimes72b710a2008-05-26 13:28:38 +0000468 if (_PyBytes_Resize(&ret, bytesread) < 0) {
Guido van Rossum522a6c62007-05-22 23:13:45 +0000469 ret = NULL;
470 }
471 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000472
473cleanup:
474 RELEASE_LOCK(self);
475 return ret;
476}
477
478PyDoc_STRVAR(BZ2File_readline__doc__,
479"readline([size]) -> string\n\
480\n\
481Return the next line from the file, as a string, retaining newline.\n\
482A non-negative size argument will limit the maximum number of bytes to\n\
483return (an incomplete line may be returned then). Return an empty\n\
484string at EOF.\n\
485");
486
487static PyObject *
488BZ2File_readline(BZ2FileObject *self, PyObject *args)
489{
490 PyObject *ret = NULL;
491 int sizehint = -1;
492
493 if (!PyArg_ParseTuple(args, "|i:readline", &sizehint))
494 return NULL;
495
496 ACQUIRE_LOCK(self);
497 switch (self->mode) {
498 case MODE_READ:
499 break;
500 case MODE_READ_EOF:
Christian Heimes72b710a2008-05-26 13:28:38 +0000501 ret = PyBytes_FromStringAndSize("", 0);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000502 goto cleanup;
503 case MODE_CLOSED:
504 PyErr_SetString(PyExc_ValueError,
505 "I/O operation on closed file");
506 goto cleanup;
507 default:
508 PyErr_SetString(PyExc_IOError,
509 "file is not ready for reading");
510 goto cleanup;
511 }
512
513 if (sizehint == 0)
Christian Heimes72b710a2008-05-26 13:28:38 +0000514 ret = PyBytes_FromStringAndSize("", 0);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000515 else
516 ret = Util_GetLine(self, (sizehint < 0) ? 0 : sizehint);
517
518cleanup:
519 RELEASE_LOCK(self);
520 return ret;
521}
522
523PyDoc_STRVAR(BZ2File_readlines__doc__,
524"readlines([size]) -> list\n\
525\n\
526Call readline() repeatedly and return a list of lines read.\n\
527The optional size argument, if given, is an approximate bound on the\n\
528total number of bytes in the lines returned.\n\
529");
530
531/* This is a hacked version of Python's fileobject.c:file_readlines(). */
532static PyObject *
533BZ2File_readlines(BZ2FileObject *self, PyObject *args)
534{
535 long sizehint = 0;
536 PyObject *list = NULL;
537 PyObject *line;
538 char small_buffer[SMALLCHUNK];
539 char *buffer = small_buffer;
540 size_t buffersize = SMALLCHUNK;
541 PyObject *big_buffer = NULL;
542 size_t nfilled = 0;
543 size_t nread;
544 size_t totalread = 0;
545 char *p, *q, *end;
546 int err;
547 int shortread = 0;
548 int bzerror;
549
550 if (!PyArg_ParseTuple(args, "|l:readlines", &sizehint))
551 return NULL;
552
553 ACQUIRE_LOCK(self);
554 switch (self->mode) {
555 case MODE_READ:
556 break;
557 case MODE_READ_EOF:
558 list = PyList_New(0);
559 goto cleanup;
560 case MODE_CLOSED:
561 PyErr_SetString(PyExc_ValueError,
562 "I/O operation on closed file");
563 goto cleanup;
564 default:
565 PyErr_SetString(PyExc_IOError,
566 "file is not ready for reading");
567 goto cleanup;
568 }
569
570 if ((list = PyList_New(0)) == NULL)
571 goto cleanup;
572
573 for (;;) {
574 Py_BEGIN_ALLOW_THREADS
Guido van Rossumf09ca142007-06-13 00:03:05 +0000575 nread = BZ2_bzRead(&bzerror, self->fp,
576 buffer+nfilled, buffersize-nfilled);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000577 self->pos += nread;
578 Py_END_ALLOW_THREADS
579 if (bzerror == BZ_STREAM_END) {
580 self->size = self->pos;
581 self->mode = MODE_READ_EOF;
582 if (nread == 0) {
583 sizehint = 0;
584 break;
585 }
586 shortread = 1;
587 } else if (bzerror != BZ_OK) {
588 Util_CatchBZ2Error(bzerror);
589 error:
590 Py_DECREF(list);
591 list = NULL;
592 goto cleanup;
593 }
594 totalread += nread;
595 p = memchr(buffer+nfilled, '\n', nread);
Georg Brandl33a5f2a2005-08-21 14:16:04 +0000596 if (!shortread && p == NULL) {
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000597 /* Need a larger buffer to fit this line */
598 nfilled += nread;
599 buffersize *= 2;
600 if (buffersize > INT_MAX) {
601 PyErr_SetString(PyExc_OverflowError,
Georg Brandl33a5f2a2005-08-21 14:16:04 +0000602 "line is longer than a Python string can hold");
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000603 goto error;
604 }
605 if (big_buffer == NULL) {
606 /* Create the big buffer */
Christian Heimes72b710a2008-05-26 13:28:38 +0000607 big_buffer = PyBytes_FromStringAndSize(
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000608 NULL, buffersize);
609 if (big_buffer == NULL)
610 goto error;
Christian Heimes72b710a2008-05-26 13:28:38 +0000611 buffer = PyBytes_AS_STRING(big_buffer);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000612 memcpy(buffer, small_buffer, nfilled);
613 }
614 else {
615 /* Grow the big buffer */
Christian Heimes72b710a2008-05-26 13:28:38 +0000616 if (_PyBytes_Resize(&big_buffer, buffersize) < 0){
Guido van Rossum522a6c62007-05-22 23:13:45 +0000617 big_buffer = NULL;
618 goto error;
619 }
Christian Heimes72b710a2008-05-26 13:28:38 +0000620 buffer = PyBytes_AS_STRING(big_buffer);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000621 }
Guido van Rossum522a6c62007-05-22 23:13:45 +0000622 continue;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000623 }
624 end = buffer+nfilled+nread;
625 q = buffer;
Georg Brandl33a5f2a2005-08-21 14:16:04 +0000626 while (p != NULL) {
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000627 /* Process complete lines */
628 p++;
Christian Heimes72b710a2008-05-26 13:28:38 +0000629 line = PyBytes_FromStringAndSize(q, p-q);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000630 if (line == NULL)
631 goto error;
632 err = PyList_Append(list, line);
633 Py_DECREF(line);
634 if (err != 0)
635 goto error;
636 q = p;
637 p = memchr(q, '\n', end-q);
Georg Brandl33a5f2a2005-08-21 14:16:04 +0000638 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000639 /* Move the remaining incomplete line to the start */
640 nfilled = end-q;
641 memmove(buffer, q, nfilled);
642 if (sizehint > 0)
643 if (totalread >= (size_t)sizehint)
644 break;
645 if (shortread) {
646 sizehint = 0;
647 break;
648 }
649 }
650 if (nfilled != 0) {
651 /* Partial last line */
Christian Heimes72b710a2008-05-26 13:28:38 +0000652 line = PyBytes_FromStringAndSize(buffer, nfilled);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000653 if (line == NULL)
654 goto error;
655 if (sizehint > 0) {
656 /* Need to complete the last line */
657 PyObject *rest = Util_GetLine(self, 0);
658 if (rest == NULL) {
659 Py_DECREF(line);
660 goto error;
661 }
Christian Heimes72b710a2008-05-26 13:28:38 +0000662 PyBytes_Concat(&line, rest);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000663 Py_DECREF(rest);
664 if (line == NULL)
665 goto error;
666 }
667 err = PyList_Append(list, line);
668 Py_DECREF(line);
669 if (err != 0)
670 goto error;
671 }
672
673 cleanup:
674 RELEASE_LOCK(self);
675 if (big_buffer) {
676 Py_DECREF(big_buffer);
677 }
678 return list;
679}
680
681PyDoc_STRVAR(BZ2File_write__doc__,
682"write(data) -> None\n\
683\n\
684Write the 'data' string to file. Note that due to buffering, close() may\n\
685be needed before the file on disk reflects the data written.\n\
686");
687
688/* This is a hacked version of Python's fileobject.c:file_write(). */
689static PyObject *
690BZ2File_write(BZ2FileObject *self, PyObject *args)
691{
692 PyObject *ret = NULL;
693 char *buf;
694 int len;
695 int bzerror;
696
Guido van Rossum98297ee2007-11-06 21:34:58 +0000697 if (!PyArg_ParseTuple(args, "y#:write", &buf, &len))
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000698 return NULL;
Tim Peterse3228092002-11-09 04:21:44 +0000699
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000700 ACQUIRE_LOCK(self);
701 switch (self->mode) {
702 case MODE_WRITE:
703 break;
Tim Peterse3228092002-11-09 04:21:44 +0000704
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000705 case MODE_CLOSED:
706 PyErr_SetString(PyExc_ValueError,
707 "I/O operation on closed file");
Thomas Wouters00ee7ba2006-08-21 19:07:27 +0000708 goto cleanup;
Tim Peterse3228092002-11-09 04:21:44 +0000709
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000710 default:
711 PyErr_SetString(PyExc_IOError,
712 "file is not ready for writing");
Thomas Wouters00ee7ba2006-08-21 19:07:27 +0000713 goto cleanup;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000714 }
715
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000716 Py_BEGIN_ALLOW_THREADS
717 BZ2_bzWrite (&bzerror, self->fp, buf, len);
718 self->pos += len;
719 Py_END_ALLOW_THREADS
Tim Peterse3228092002-11-09 04:21:44 +0000720
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000721 if (bzerror != BZ_OK) {
722 Util_CatchBZ2Error(bzerror);
723 goto cleanup;
724 }
Tim Peterse3228092002-11-09 04:21:44 +0000725
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000726 Py_INCREF(Py_None);
727 ret = Py_None;
728
729cleanup:
730 RELEASE_LOCK(self);
731 return ret;
732}
733
734PyDoc_STRVAR(BZ2File_writelines__doc__,
735"writelines(sequence_of_strings) -> None\n\
736\n\
737Write the sequence of strings to the file. Note that newlines are not\n\
738added. The sequence can be any iterable object producing strings. This is\n\
739equivalent to calling write() for each string.\n\
740");
741
742/* This is a hacked version of Python's fileobject.c:file_writelines(). */
743static PyObject *
744BZ2File_writelines(BZ2FileObject *self, PyObject *seq)
745{
746#define CHUNKSIZE 1000
747 PyObject *list = NULL;
748 PyObject *iter = NULL;
749 PyObject *ret = NULL;
750 PyObject *line;
751 int i, j, index, len, islist;
752 int bzerror;
753
754 ACQUIRE_LOCK(self);
Thomas Wouters00ee7ba2006-08-21 19:07:27 +0000755 switch (self->mode) {
756 case MODE_WRITE:
757 break;
758
759 case MODE_CLOSED:
760 PyErr_SetString(PyExc_ValueError,
761 "I/O operation on closed file");
762 goto error;
763
764 default:
765 PyErr_SetString(PyExc_IOError,
766 "file is not ready for writing");
767 goto error;
768 }
769
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000770 islist = PyList_Check(seq);
771 if (!islist) {
772 iter = PyObject_GetIter(seq);
773 if (iter == NULL) {
774 PyErr_SetString(PyExc_TypeError,
775 "writelines() requires an iterable argument");
776 goto error;
777 }
778 list = PyList_New(CHUNKSIZE);
779 if (list == NULL)
780 goto error;
781 }
782
783 /* Strategy: slurp CHUNKSIZE lines into a private list,
784 checking that they are all strings, then write that list
785 without holding the interpreter lock, then come back for more. */
786 for (index = 0; ; index += CHUNKSIZE) {
787 if (islist) {
788 Py_XDECREF(list);
789 list = PyList_GetSlice(seq, index, index+CHUNKSIZE);
790 if (list == NULL)
791 goto error;
792 j = PyList_GET_SIZE(list);
793 }
794 else {
795 for (j = 0; j < CHUNKSIZE; j++) {
796 line = PyIter_Next(iter);
797 if (line == NULL) {
798 if (PyErr_Occurred())
799 goto error;
800 break;
801 }
802 PyList_SetItem(list, j, line);
803 }
804 }
805 if (j == 0)
806 break;
807
Guido van Rossum522a6c62007-05-22 23:13:45 +0000808 /* Check that all entries are indeed byte strings. If not,
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000809 apply the same rules as for file.write() and
810 convert the rets to strings. This is slow, but
811 seems to be the only way since all conversion APIs
812 could potentially execute Python code. */
813 for (i = 0; i < j; i++) {
814 PyObject *v = PyList_GET_ITEM(list, i);
Christian Heimes72b710a2008-05-26 13:28:38 +0000815 if (!PyBytes_Check(v)) {
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000816 const char *buffer;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000817 Py_ssize_t len;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000818 if (PyObject_AsCharBuffer(v, &buffer, &len)) {
819 PyErr_SetString(PyExc_TypeError,
820 "writelines() "
821 "argument must be "
822 "a sequence of "
Guido van Rossum522a6c62007-05-22 23:13:45 +0000823 "bytes objects");
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000824 goto error;
825 }
Christian Heimes72b710a2008-05-26 13:28:38 +0000826 line = PyBytes_FromStringAndSize(buffer,
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000827 len);
828 if (line == NULL)
829 goto error;
830 Py_DECREF(v);
831 PyList_SET_ITEM(list, i, line);
832 }
833 }
834
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000835 /* Since we are releasing the global lock, the
836 following code may *not* execute Python code. */
837 Py_BEGIN_ALLOW_THREADS
838 for (i = 0; i < j; i++) {
839 line = PyList_GET_ITEM(list, i);
Christian Heimes72b710a2008-05-26 13:28:38 +0000840 len = PyBytes_GET_SIZE(line);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000841 BZ2_bzWrite (&bzerror, self->fp,
Christian Heimes72b710a2008-05-26 13:28:38 +0000842 PyBytes_AS_STRING(line), len);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000843 if (bzerror != BZ_OK) {
844 Py_BLOCK_THREADS
845 Util_CatchBZ2Error(bzerror);
846 goto error;
847 }
848 }
849 Py_END_ALLOW_THREADS
850
851 if (j < CHUNKSIZE)
852 break;
853 }
854
855 Py_INCREF(Py_None);
856 ret = Py_None;
857
858 error:
859 RELEASE_LOCK(self);
860 Py_XDECREF(list);
861 Py_XDECREF(iter);
862 return ret;
863#undef CHUNKSIZE
864}
865
866PyDoc_STRVAR(BZ2File_seek__doc__,
867"seek(offset [, whence]) -> None\n\
868\n\
869Move to new file position. Argument offset is a byte count. Optional\n\
870argument whence defaults to 0 (offset from start of file, offset\n\
871should be >= 0); other values are 1 (move relative to current position,\n\
872positive or negative), and 2 (move relative to end of file, usually\n\
873negative, although many platforms allow seeking beyond the end of a file).\n\
874\n\
875Note that seeking of bz2 files is emulated, and depending on the parameters\n\
876the operation may be extremely slow.\n\
877");
878
879static PyObject *
880BZ2File_seek(BZ2FileObject *self, PyObject *args)
881{
882 int where = 0;
Georg Brandl33a5f2a2005-08-21 14:16:04 +0000883 PyObject *offobj;
884 Py_off_t offset;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000885 char small_buffer[SMALLCHUNK];
886 char *buffer = small_buffer;
887 size_t buffersize = SMALLCHUNK;
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000888 Py_off_t bytesread = 0;
Georg Brandla8bcecc2005-09-03 07:49:53 +0000889 size_t readsize;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000890 int chunksize;
891 int bzerror;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000892 PyObject *ret = NULL;
Tim Peterse3228092002-11-09 04:21:44 +0000893
Georg Brandl33a5f2a2005-08-21 14:16:04 +0000894 if (!PyArg_ParseTuple(args, "O|i:seek", &offobj, &where))
895 return NULL;
896#if !defined(HAVE_LARGEFILE_SUPPORT)
Christian Heimes217cfd12007-12-02 14:31:20 +0000897 offset = PyLong_AsLong(offobj);
Georg Brandl33a5f2a2005-08-21 14:16:04 +0000898#else
899 offset = PyLong_Check(offobj) ?
Christian Heimes217cfd12007-12-02 14:31:20 +0000900 PyLong_AsLongLong(offobj) : PyLong_AsLong(offobj);
Georg Brandl33a5f2a2005-08-21 14:16:04 +0000901#endif
902 if (PyErr_Occurred())
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000903 return NULL;
904
905 ACQUIRE_LOCK(self);
906 Util_DropReadAhead(self);
907 switch (self->mode) {
908 case MODE_READ:
909 case MODE_READ_EOF:
910 break;
Tim Peterse3228092002-11-09 04:21:44 +0000911
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000912 case MODE_CLOSED:
913 PyErr_SetString(PyExc_ValueError,
914 "I/O operation on closed file");
Thomas Wouters89f507f2006-12-13 04:49:30 +0000915 goto cleanup;
Tim Peterse3228092002-11-09 04:21:44 +0000916
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000917 default:
918 PyErr_SetString(PyExc_IOError,
919 "seek works only while reading");
Thomas Wouters89f507f2006-12-13 04:49:30 +0000920 goto cleanup;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000921 }
922
Georg Brandl47fab922006-02-18 21:57:25 +0000923 if (where == 2) {
924 if (self->size == -1) {
925 assert(self->mode != MODE_READ_EOF);
926 for (;;) {
927 Py_BEGIN_ALLOW_THREADS
Guido van Rossumf09ca142007-06-13 00:03:05 +0000928 chunksize = BZ2_bzRead(&bzerror, self->fp,
929 buffer, buffersize);
Georg Brandl47fab922006-02-18 21:57:25 +0000930 self->pos += chunksize;
931 Py_END_ALLOW_THREADS
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000932
Georg Brandl47fab922006-02-18 21:57:25 +0000933 bytesread += chunksize;
934 if (bzerror == BZ_STREAM_END) {
935 break;
936 } else if (bzerror != BZ_OK) {
937 Util_CatchBZ2Error(bzerror);
938 goto cleanup;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000939 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000940 }
Georg Brandl47fab922006-02-18 21:57:25 +0000941 self->mode = MODE_READ_EOF;
942 self->size = self->pos;
943 bytesread = 0;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000944 }
Georg Brandl47fab922006-02-18 21:57:25 +0000945 offset = self->size + offset;
946 } else if (where == 1) {
947 offset = self->pos + offset;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000948 }
949
Guido van Rossum522a6c62007-05-22 23:13:45 +0000950 /* Before getting here, offset must be the absolute position the file
Georg Brandl47fab922006-02-18 21:57:25 +0000951 * pointer should be set to. */
952
953 if (offset >= self->pos) {
954 /* we can move forward */
955 offset -= self->pos;
956 } else {
957 /* we cannot move back, so rewind the stream */
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000958 BZ2_bzReadClose(&bzerror, self->fp);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000959 if (bzerror != BZ_OK) {
960 Util_CatchBZ2Error(bzerror);
961 goto cleanup;
962 }
Guido van Rossumf09ca142007-06-13 00:03:05 +0000963 rewind(self->rawfp);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000964 self->pos = 0;
Guido van Rossumf09ca142007-06-13 00:03:05 +0000965 self->fp = BZ2_bzReadOpen(&bzerror, self->rawfp,
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000966 0, 0, NULL, 0);
967 if (bzerror != BZ_OK) {
968 Util_CatchBZ2Error(bzerror);
969 goto cleanup;
970 }
971 self->mode = MODE_READ;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000972 }
973
Georg Brandl47fab922006-02-18 21:57:25 +0000974 if (offset <= 0 || self->mode == MODE_READ_EOF)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000975 goto exit;
976
977 /* Before getting here, offset must be set to the number of bytes
978 * to walk forward. */
979 for (;;) {
Georg Brandla8bcecc2005-09-03 07:49:53 +0000980 if (offset-bytesread > buffersize)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000981 readsize = buffersize;
982 else
Georg Brandla8bcecc2005-09-03 07:49:53 +0000983 /* offset might be wider that readsize, but the result
984 * of the subtraction is bound by buffersize (see the
985 * condition above). buffersize is 8192. */
986 readsize = (size_t)(offset-bytesread);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000987 Py_BEGIN_ALLOW_THREADS
Guido van Rossumf09ca142007-06-13 00:03:05 +0000988 chunksize = BZ2_bzRead(&bzerror, self->fp, buffer, readsize);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000989 self->pos += chunksize;
990 Py_END_ALLOW_THREADS
991 bytesread += chunksize;
992 if (bzerror == BZ_STREAM_END) {
993 self->size = self->pos;
994 self->mode = MODE_READ_EOF;
995 break;
996 } else if (bzerror != BZ_OK) {
997 Util_CatchBZ2Error(bzerror);
998 goto cleanup;
999 }
1000 if (bytesread == offset)
1001 break;
1002 }
1003
1004exit:
1005 Py_INCREF(Py_None);
1006 ret = Py_None;
1007
1008cleanup:
1009 RELEASE_LOCK(self);
1010 return ret;
1011}
1012
1013PyDoc_STRVAR(BZ2File_tell__doc__,
1014"tell() -> int\n\
1015\n\
1016Return the current file position, an integer (may be a long integer).\n\
1017");
1018
1019static PyObject *
1020BZ2File_tell(BZ2FileObject *self, PyObject *args)
1021{
1022 PyObject *ret = NULL;
1023
1024 if (self->mode == MODE_CLOSED) {
1025 PyErr_SetString(PyExc_ValueError,
1026 "I/O operation on closed file");
1027 goto cleanup;
1028 }
1029
Georg Brandla8bcecc2005-09-03 07:49:53 +00001030#if !defined(HAVE_LARGEFILE_SUPPORT)
Christian Heimes217cfd12007-12-02 14:31:20 +00001031 ret = PyLong_FromLong(self->pos);
Georg Brandla8bcecc2005-09-03 07:49:53 +00001032#else
1033 ret = PyLong_FromLongLong(self->pos);
1034#endif
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001035
1036cleanup:
1037 return ret;
1038}
1039
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001040PyDoc_STRVAR(BZ2File_close__doc__,
1041"close() -> None or (perhaps) an integer\n\
1042\n\
1043Close the file. Sets data attribute .closed to true. A closed file\n\
1044cannot be used for further I/O operations. close() may be called more\n\
1045than once without error.\n\
1046");
1047
1048static PyObject *
1049BZ2File_close(BZ2FileObject *self)
1050{
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001051 PyObject *ret = NULL;
1052 int bzerror = BZ_OK;
1053
Guido van Rossumf09ca142007-06-13 00:03:05 +00001054 if (self->mode == MODE_CLOSED) {
1055 Py_RETURN_NONE;
1056 }
1057
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001058 ACQUIRE_LOCK(self);
1059 switch (self->mode) {
1060 case MODE_READ:
1061 case MODE_READ_EOF:
1062 BZ2_bzReadClose(&bzerror, self->fp);
1063 break;
1064 case MODE_WRITE:
1065 BZ2_bzWriteClose(&bzerror, self->fp,
1066 0, NULL, NULL);
1067 break;
1068 }
1069 self->mode = MODE_CLOSED;
Guido van Rossumf09ca142007-06-13 00:03:05 +00001070 fclose(self->rawfp);
1071 self->rawfp = NULL;
1072 if (bzerror == BZ_OK) {
1073 Py_INCREF(Py_None);
1074 ret = Py_None;
1075 }
1076 else {
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001077 Util_CatchBZ2Error(bzerror);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001078 }
1079
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001080 RELEASE_LOCK(self);
1081 return ret;
1082}
1083
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001084static PyObject *BZ2File_getiter(BZ2FileObject *self);
1085
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001086static PyMethodDef BZ2File_methods[] = {
1087 {"read", (PyCFunction)BZ2File_read, METH_VARARGS, BZ2File_read__doc__},
1088 {"readline", (PyCFunction)BZ2File_readline, METH_VARARGS, BZ2File_readline__doc__},
1089 {"readlines", (PyCFunction)BZ2File_readlines, METH_VARARGS, BZ2File_readlines__doc__},
1090 {"write", (PyCFunction)BZ2File_write, METH_VARARGS, BZ2File_write__doc__},
1091 {"writelines", (PyCFunction)BZ2File_writelines, METH_O, BZ2File_writelines__doc__},
1092 {"seek", (PyCFunction)BZ2File_seek, METH_VARARGS, BZ2File_seek__doc__},
1093 {"tell", (PyCFunction)BZ2File_tell, METH_NOARGS, BZ2File_tell__doc__},
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001094 {"close", (PyCFunction)BZ2File_close, METH_NOARGS, BZ2File_close__doc__},
1095 {NULL, NULL} /* sentinel */
1096};
1097
1098
1099/* ===================================================================== */
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001100/* Getters and setters of BZ2File. */
1101
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001102static PyObject *
1103BZ2File_get_closed(BZ2FileObject *self, void *closure)
1104{
Christian Heimes217cfd12007-12-02 14:31:20 +00001105 return PyLong_FromLong(self->mode == MODE_CLOSED);
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001106}
1107
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001108static PyGetSetDef BZ2File_getset[] = {
1109 {"closed", (getter)BZ2File_get_closed, NULL,
1110 "True if the file is closed"},
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001111 {NULL} /* Sentinel */
1112};
1113
1114
1115/* ===================================================================== */
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001116/* Slot definitions for BZ2File_Type. */
1117
1118static int
1119BZ2File_init(BZ2FileObject *self, PyObject *args, PyObject *kwargs)
1120{
Martin v. Löwis15e62742006-02-27 16:46:16 +00001121 static char *kwlist[] = {"filename", "mode", "buffering",
Guido van Rossumf09ca142007-06-13 00:03:05 +00001122 "compresslevel", 0};
1123 char *name;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001124 char *mode = "r";
1125 int buffering = -1;
1126 int compresslevel = 9;
1127 int bzerror;
1128 int mode_char = 0;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001129
1130 self->size = -1;
Tim Peterse3228092002-11-09 04:21:44 +00001131
Guido van Rossumf09ca142007-06-13 00:03:05 +00001132 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s|sii:BZ2File",
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001133 kwlist, &name, &mode, &buffering,
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001134 &compresslevel))
1135 return -1;
1136
1137 if (compresslevel < 1 || compresslevel > 9) {
1138 PyErr_SetString(PyExc_ValueError,
1139 "compresslevel must be between 1 and 9");
1140 return -1;
1141 }
1142
1143 for (;;) {
1144 int error = 0;
1145 switch (*mode) {
1146 case 'r':
1147 case 'w':
1148 if (mode_char)
1149 error = 1;
1150 mode_char = *mode;
1151 break;
1152
1153 case 'b':
1154 break;
1155
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001156 default:
1157 error = 1;
Gustavo Niemeyer49ea7be2002-11-08 14:31:49 +00001158 break;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001159 }
1160 if (error) {
Gustavo Niemeyer49ea7be2002-11-08 14:31:49 +00001161 PyErr_Format(PyExc_ValueError,
1162 "invalid mode char %c", *mode);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001163 return -1;
1164 }
1165 mode++;
Gustavo Niemeyer49ea7be2002-11-08 14:31:49 +00001166 if (*mode == '\0')
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001167 break;
1168 }
1169
Georg Brandl6b95f1d2005-06-03 19:47:00 +00001170 if (mode_char == 0) {
1171 mode_char = 'r';
1172 }
1173
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001174 mode = (mode_char == 'r') ? "rb" : "wb";
Tim Peterse3228092002-11-09 04:21:44 +00001175
Guido van Rossumf09ca142007-06-13 00:03:05 +00001176 self->rawfp = fopen(name, mode);
1177 if (self->rawfp == NULL) {
1178 PyErr_SetFromErrno(PyExc_IOError);
Gustavo Niemeyer49ea7be2002-11-08 14:31:49 +00001179 return -1;
Guido van Rossumf09ca142007-06-13 00:03:05 +00001180 }
1181 /* XXX Ignore buffering */
Gustavo Niemeyer49ea7be2002-11-08 14:31:49 +00001182
1183 /* From now on, we have stuff to dealloc, so jump to error label
1184 * instead of returning */
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001185
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001186#ifdef WITH_THREAD
1187 self->lock = PyThread_allocate_lock();
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001188 if (!self->lock) {
1189 PyErr_SetString(PyExc_MemoryError, "unable to allocate lock");
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001190 goto error;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001191 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001192#endif
1193
1194 if (mode_char == 'r')
Guido van Rossumf09ca142007-06-13 00:03:05 +00001195 self->fp = BZ2_bzReadOpen(&bzerror, self->rawfp,
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001196 0, 0, NULL, 0);
1197 else
Guido van Rossumf09ca142007-06-13 00:03:05 +00001198 self->fp = BZ2_bzWriteOpen(&bzerror, self->rawfp,
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001199 compresslevel, 0, 0);
1200
1201 if (bzerror != BZ_OK) {
1202 Util_CatchBZ2Error(bzerror);
1203 goto error;
1204 }
1205
1206 self->mode = (mode_char == 'r') ? MODE_READ : MODE_WRITE;
1207
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001208 return 0;
1209
1210error:
Guido van Rossumf09ca142007-06-13 00:03:05 +00001211 fclose(self->rawfp);
1212 self->rawfp = NULL;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001213#ifdef WITH_THREAD
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001214 if (self->lock) {
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001215 PyThread_free_lock(self->lock);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001216 self->lock = NULL;
1217 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001218#endif
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001219 return -1;
1220}
1221
1222static void
1223BZ2File_dealloc(BZ2FileObject *self)
1224{
1225 int bzerror;
1226#ifdef WITH_THREAD
1227 if (self->lock)
1228 PyThread_free_lock(self->lock);
1229#endif
1230 switch (self->mode) {
1231 case MODE_READ:
1232 case MODE_READ_EOF:
1233 BZ2_bzReadClose(&bzerror, self->fp);
1234 break;
1235 case MODE_WRITE:
1236 BZ2_bzWriteClose(&bzerror, self->fp,
1237 0, NULL, NULL);
1238 break;
1239 }
Gustavo Niemeyer49ea7be2002-11-08 14:31:49 +00001240 Util_DropReadAhead(self);
Guido van Rossumf09ca142007-06-13 00:03:05 +00001241 if (self->rawfp != NULL)
1242 fclose(self->rawfp);
Christian Heimes90aa7642007-12-19 02:45:37 +00001243 Py_TYPE(self)->tp_free((PyObject *)self);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001244}
1245
1246/* This is a hacked version of Python's fileobject.c:file_getiter(). */
1247static PyObject *
1248BZ2File_getiter(BZ2FileObject *self)
1249{
1250 if (self->mode == MODE_CLOSED) {
1251 PyErr_SetString(PyExc_ValueError,
1252 "I/O operation on closed file");
1253 return NULL;
1254 }
1255 Py_INCREF((PyObject*)self);
1256 return (PyObject *)self;
1257}
1258
1259/* This is a hacked version of Python's fileobject.c:file_iternext(). */
1260#define READAHEAD_BUFSIZE 8192
1261static PyObject *
1262BZ2File_iternext(BZ2FileObject *self)
1263{
Christian Heimes72b710a2008-05-26 13:28:38 +00001264 PyBytesObject* ret;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001265 ACQUIRE_LOCK(self);
1266 if (self->mode == MODE_CLOSED) {
Georg Brandl86b2fb92008-07-16 03:43:04 +00001267 RELEASE_LOCK(self);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001268 PyErr_SetString(PyExc_ValueError,
1269 "I/O operation on closed file");
1270 return NULL;
1271 }
1272 ret = Util_ReadAheadGetLineSkip(self, 0, READAHEAD_BUFSIZE);
1273 RELEASE_LOCK(self);
Christian Heimes72b710a2008-05-26 13:28:38 +00001274 if (ret == NULL || PyBytes_GET_SIZE(ret) == 0) {
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001275 Py_XDECREF(ret);
1276 return NULL;
1277 }
1278 return (PyObject *)ret;
1279}
1280
1281/* ===================================================================== */
1282/* BZ2File_Type definition. */
1283
1284PyDoc_VAR(BZ2File__doc__) =
1285PyDoc_STR(
1286"BZ2File(name [, mode='r', buffering=0, compresslevel=9]) -> file object\n\
1287\n\
1288Open a bz2 file. The mode can be 'r' or 'w', for reading (default) or\n\
1289writing. When opened for writing, the file will be created if it doesn't\n\
1290exist, and truncated otherwise. If the buffering argument is given, 0 means\n\
1291unbuffered, and larger numbers specify the buffer size. If compresslevel\n\
1292is given, must be a number between 1 and 9.\n\
Guido van Rossum88e860c2007-06-13 01:46:31 +00001293Data read is always returned in bytes; data written ought to be bytes.\n\
1294");
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001295
Gustavo Niemeyer49ea7be2002-11-08 14:31:49 +00001296static PyTypeObject BZ2File_Type = {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00001297 PyVarObject_HEAD_INIT(NULL, 0)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001298 "bz2.BZ2File", /*tp_name*/
1299 sizeof(BZ2FileObject), /*tp_basicsize*/
1300 0, /*tp_itemsize*/
1301 (destructor)BZ2File_dealloc, /*tp_dealloc*/
1302 0, /*tp_print*/
1303 0, /*tp_getattr*/
1304 0, /*tp_setattr*/
1305 0, /*tp_compare*/
1306 0, /*tp_repr*/
1307 0, /*tp_as_number*/
1308 0, /*tp_as_sequence*/
1309 0, /*tp_as_mapping*/
1310 0, /*tp_hash*/
1311 0, /*tp_call*/
1312 0, /*tp_str*/
Jason Tishlerfb8595d2003-01-06 12:41:26 +00001313 PyObject_GenericGetAttr,/*tp_getattro*/
1314 PyObject_GenericSetAttr,/*tp_setattro*/
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001315 0, /*tp_as_buffer*/
1316 Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE, /*tp_flags*/
1317 BZ2File__doc__, /*tp_doc*/
1318 0, /*tp_traverse*/
1319 0, /*tp_clear*/
1320 0, /*tp_richcompare*/
1321 0, /*tp_weaklistoffset*/
1322 (getiterfunc)BZ2File_getiter, /*tp_iter*/
1323 (iternextfunc)BZ2File_iternext, /*tp_iternext*/
1324 BZ2File_methods, /*tp_methods*/
Guido van Rossum79139b22007-02-09 23:20:19 +00001325 0, /*tp_members*/
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001326 BZ2File_getset, /*tp_getset*/
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001327 0, /*tp_base*/
1328 0, /*tp_dict*/
1329 0, /*tp_descr_get*/
1330 0, /*tp_descr_set*/
1331 0, /*tp_dictoffset*/
1332 (initproc)BZ2File_init, /*tp_init*/
Jason Tishlerfb8595d2003-01-06 12:41:26 +00001333 PyType_GenericAlloc, /*tp_alloc*/
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001334 PyType_GenericNew, /*tp_new*/
Neal Norwitz30d1c512007-08-19 22:48:23 +00001335 PyObject_Free, /*tp_free*/
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001336 0, /*tp_is_gc*/
1337};
1338
1339
1340/* ===================================================================== */
1341/* Methods of BZ2Comp. */
1342
1343PyDoc_STRVAR(BZ2Comp_compress__doc__,
1344"compress(data) -> string\n\
1345\n\
1346Provide more data to the compressor object. It will return chunks of\n\
1347compressed data whenever possible. When you've finished providing data\n\
1348to compress, call the flush() method to finish the compression process,\n\
1349and return what is left in the internal buffers.\n\
1350");
1351
1352static PyObject *
1353BZ2Comp_compress(BZ2CompObject *self, PyObject *args)
1354{
1355 char *data;
1356 int datasize;
1357 int bufsize = SMALLCHUNK;
Martin v. Löwisb9a0f912003-03-29 10:06:18 +00001358 PY_LONG_LONG totalout;
Gustavo Niemeyer7d7930b2002-11-05 18:41:53 +00001359 PyObject *ret = NULL;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001360 bz_stream *bzs = &self->bzs;
1361 int bzerror;
1362
Guido van Rossum98297ee2007-11-06 21:34:58 +00001363 if (!PyArg_ParseTuple(args, "y#:compress", &data, &datasize))
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001364 return NULL;
1365
Gustavo Niemeyera6e436e2004-02-14 00:02:45 +00001366 if (datasize == 0)
Christian Heimes72b710a2008-05-26 13:28:38 +00001367 return PyBytes_FromStringAndSize("", 0);
Gustavo Niemeyera6e436e2004-02-14 00:02:45 +00001368
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001369 ACQUIRE_LOCK(self);
1370 if (!self->running) {
Gustavo Niemeyer49ea7be2002-11-08 14:31:49 +00001371 PyErr_SetString(PyExc_ValueError,
1372 "this object was already flushed");
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001373 goto error;
1374 }
1375
Christian Heimes72b710a2008-05-26 13:28:38 +00001376 ret = PyBytes_FromStringAndSize(NULL, bufsize);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001377 if (!ret)
1378 goto error;
1379
1380 bzs->next_in = data;
1381 bzs->avail_in = datasize;
1382 bzs->next_out = BUF(ret);
1383 bzs->avail_out = bufsize;
1384
1385 totalout = BZS_TOTAL_OUT(bzs);
1386
1387 for (;;) {
1388 Py_BEGIN_ALLOW_THREADS
1389 bzerror = BZ2_bzCompress(bzs, BZ_RUN);
1390 Py_END_ALLOW_THREADS
1391 if (bzerror != BZ_RUN_OK) {
1392 Util_CatchBZ2Error(bzerror);
1393 goto error;
1394 }
Guido van Rossumd8faa362007-04-27 19:54:29 +00001395 if (bzs->avail_in == 0)
1396 break; /* no more input data */
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001397 if (bzs->avail_out == 0) {
1398 bufsize = Util_NewBufferSize(bufsize);
Christian Heimes72b710a2008-05-26 13:28:38 +00001399 if (_PyBytes_Resize(&ret, bufsize) < 0) {
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001400 BZ2_bzCompressEnd(bzs);
1401 goto error;
1402 }
1403 bzs->next_out = BUF(ret) + (BZS_TOTAL_OUT(bzs)
1404 - totalout);
1405 bzs->avail_out = bufsize - (bzs->next_out - BUF(ret));
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001406 }
1407 }
1408
Christian Heimes72b710a2008-05-26 13:28:38 +00001409 if (_PyBytes_Resize(&ret,
Guido van Rossum522a6c62007-05-22 23:13:45 +00001410 (Py_ssize_t)(BZS_TOTAL_OUT(bzs) - totalout)) < 0)
1411 goto error;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001412
1413 RELEASE_LOCK(self);
1414 return ret;
1415
1416error:
1417 RELEASE_LOCK(self);
1418 Py_XDECREF(ret);
1419 return NULL;
1420}
1421
1422PyDoc_STRVAR(BZ2Comp_flush__doc__,
1423"flush() -> string\n\
1424\n\
1425Finish the compression process and return what is left in internal buffers.\n\
1426You must not use the compressor object after calling this method.\n\
1427");
1428
1429static PyObject *
1430BZ2Comp_flush(BZ2CompObject *self)
1431{
1432 int bufsize = SMALLCHUNK;
Gustavo Niemeyer7d7930b2002-11-05 18:41:53 +00001433 PyObject *ret = NULL;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001434 bz_stream *bzs = &self->bzs;
Martin v. Löwisb9a0f912003-03-29 10:06:18 +00001435 PY_LONG_LONG totalout;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001436 int bzerror;
1437
1438 ACQUIRE_LOCK(self);
1439 if (!self->running) {
1440 PyErr_SetString(PyExc_ValueError, "object was already "
1441 "flushed");
1442 goto error;
1443 }
1444 self->running = 0;
1445
Christian Heimes72b710a2008-05-26 13:28:38 +00001446 ret = PyBytes_FromStringAndSize(NULL, bufsize);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001447 if (!ret)
1448 goto error;
1449
1450 bzs->next_out = BUF(ret);
1451 bzs->avail_out = bufsize;
1452
1453 totalout = BZS_TOTAL_OUT(bzs);
1454
1455 for (;;) {
1456 Py_BEGIN_ALLOW_THREADS
1457 bzerror = BZ2_bzCompress(bzs, BZ_FINISH);
1458 Py_END_ALLOW_THREADS
1459 if (bzerror == BZ_STREAM_END) {
1460 break;
1461 } else if (bzerror != BZ_FINISH_OK) {
1462 Util_CatchBZ2Error(bzerror);
1463 goto error;
1464 }
1465 if (bzs->avail_out == 0) {
1466 bufsize = Util_NewBufferSize(bufsize);
Christian Heimes72b710a2008-05-26 13:28:38 +00001467 if (_PyBytes_Resize(&ret, bufsize) < 0)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001468 goto error;
1469 bzs->next_out = BUF(ret);
1470 bzs->next_out = BUF(ret) + (BZS_TOTAL_OUT(bzs)
1471 - totalout);
1472 bzs->avail_out = bufsize - (bzs->next_out - BUF(ret));
1473 }
1474 }
1475
Guido van Rossum522a6c62007-05-22 23:13:45 +00001476 if (bzs->avail_out != 0) {
Christian Heimes72b710a2008-05-26 13:28:38 +00001477 if (_PyBytes_Resize(&ret,
Guido van Rossum522a6c62007-05-22 23:13:45 +00001478 (Py_ssize_t)(BZS_TOTAL_OUT(bzs) - totalout)) < 0)
1479 goto error;
1480 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001481
1482 RELEASE_LOCK(self);
1483 return ret;
1484
1485error:
1486 RELEASE_LOCK(self);
1487 Py_XDECREF(ret);
1488 return NULL;
1489}
1490
1491static PyMethodDef BZ2Comp_methods[] = {
Gustavo Niemeyer49ea7be2002-11-08 14:31:49 +00001492 {"compress", (PyCFunction)BZ2Comp_compress, METH_VARARGS,
1493 BZ2Comp_compress__doc__},
1494 {"flush", (PyCFunction)BZ2Comp_flush, METH_NOARGS,
1495 BZ2Comp_flush__doc__},
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001496 {NULL, NULL} /* sentinel */
1497};
1498
1499
1500/* ===================================================================== */
1501/* Slot definitions for BZ2Comp_Type. */
1502
1503static int
1504BZ2Comp_init(BZ2CompObject *self, PyObject *args, PyObject *kwargs)
1505{
1506 int compresslevel = 9;
1507 int bzerror;
Martin v. Löwis15e62742006-02-27 16:46:16 +00001508 static char *kwlist[] = {"compresslevel", 0};
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001509
1510 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|i:BZ2Compressor",
1511 kwlist, &compresslevel))
1512 return -1;
1513
1514 if (compresslevel < 1 || compresslevel > 9) {
1515 PyErr_SetString(PyExc_ValueError,
1516 "compresslevel must be between 1 and 9");
1517 goto error;
1518 }
1519
1520#ifdef WITH_THREAD
1521 self->lock = PyThread_allocate_lock();
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001522 if (!self->lock) {
1523 PyErr_SetString(PyExc_MemoryError, "unable to allocate lock");
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001524 goto error;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001525 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001526#endif
1527
1528 memset(&self->bzs, 0, sizeof(bz_stream));
1529 bzerror = BZ2_bzCompressInit(&self->bzs, compresslevel, 0, 0);
1530 if (bzerror != BZ_OK) {
1531 Util_CatchBZ2Error(bzerror);
1532 goto error;
1533 }
1534
1535 self->running = 1;
1536
1537 return 0;
1538error:
1539#ifdef WITH_THREAD
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001540 if (self->lock) {
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001541 PyThread_free_lock(self->lock);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001542 self->lock = NULL;
1543 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001544#endif
1545 return -1;
1546}
1547
1548static void
1549BZ2Comp_dealloc(BZ2CompObject *self)
1550{
1551#ifdef WITH_THREAD
1552 if (self->lock)
1553 PyThread_free_lock(self->lock);
1554#endif
1555 BZ2_bzCompressEnd(&self->bzs);
Christian Heimes90aa7642007-12-19 02:45:37 +00001556 Py_TYPE(self)->tp_free((PyObject *)self);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001557}
1558
1559
1560/* ===================================================================== */
1561/* BZ2Comp_Type definition. */
1562
1563PyDoc_STRVAR(BZ2Comp__doc__,
1564"BZ2Compressor([compresslevel=9]) -> compressor object\n\
1565\n\
1566Create a new compressor object. This object may be used to compress\n\
1567data sequentially. If you want to compress data in one shot, use the\n\
1568compress() function instead. The compresslevel parameter, if given,\n\
1569must be a number between 1 and 9.\n\
1570");
1571
Gustavo Niemeyer49ea7be2002-11-08 14:31:49 +00001572static PyTypeObject BZ2Comp_Type = {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00001573 PyVarObject_HEAD_INIT(NULL, 0)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001574 "bz2.BZ2Compressor", /*tp_name*/
1575 sizeof(BZ2CompObject), /*tp_basicsize*/
1576 0, /*tp_itemsize*/
1577 (destructor)BZ2Comp_dealloc, /*tp_dealloc*/
1578 0, /*tp_print*/
1579 0, /*tp_getattr*/
1580 0, /*tp_setattr*/
1581 0, /*tp_compare*/
1582 0, /*tp_repr*/
1583 0, /*tp_as_number*/
1584 0, /*tp_as_sequence*/
1585 0, /*tp_as_mapping*/
1586 0, /*tp_hash*/
1587 0, /*tp_call*/
1588 0, /*tp_str*/
Jason Tishlerfb8595d2003-01-06 12:41:26 +00001589 PyObject_GenericGetAttr,/*tp_getattro*/
1590 PyObject_GenericSetAttr,/*tp_setattro*/
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001591 0, /*tp_as_buffer*/
1592 Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE, /*tp_flags*/
1593 BZ2Comp__doc__, /*tp_doc*/
1594 0, /*tp_traverse*/
1595 0, /*tp_clear*/
1596 0, /*tp_richcompare*/
1597 0, /*tp_weaklistoffset*/
1598 0, /*tp_iter*/
1599 0, /*tp_iternext*/
1600 BZ2Comp_methods, /*tp_methods*/
1601 0, /*tp_members*/
1602 0, /*tp_getset*/
1603 0, /*tp_base*/
1604 0, /*tp_dict*/
1605 0, /*tp_descr_get*/
1606 0, /*tp_descr_set*/
1607 0, /*tp_dictoffset*/
1608 (initproc)BZ2Comp_init, /*tp_init*/
Jason Tishlerfb8595d2003-01-06 12:41:26 +00001609 PyType_GenericAlloc, /*tp_alloc*/
1610 PyType_GenericNew, /*tp_new*/
Neal Norwitz30d1c512007-08-19 22:48:23 +00001611 PyObject_Free, /*tp_free*/
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001612 0, /*tp_is_gc*/
1613};
1614
1615
1616/* ===================================================================== */
1617/* Members of BZ2Decomp. */
1618
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001619#undef OFF
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001620#define OFF(x) offsetof(BZ2DecompObject, x)
1621
1622static PyMemberDef BZ2Decomp_members[] = {
Guido van Rossum33d26892007-08-05 15:29:28 +00001623 {"unused_data", T_OBJECT, OFF(unused_data), READONLY},
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001624 {NULL} /* Sentinel */
1625};
1626
1627
1628/* ===================================================================== */
1629/* Methods of BZ2Decomp. */
1630
1631PyDoc_STRVAR(BZ2Decomp_decompress__doc__,
1632"decompress(data) -> string\n\
1633\n\
1634Provide more data to the decompressor object. It will return chunks\n\
1635of decompressed data whenever possible. If you try to decompress data\n\
1636after the end of stream is found, EOFError will be raised. If any data\n\
1637was found after the end of stream, it'll be ignored and saved in\n\
1638unused_data attribute.\n\
1639");
1640
1641static PyObject *
1642BZ2Decomp_decompress(BZ2DecompObject *self, PyObject *args)
1643{
1644 char *data;
1645 int datasize;
1646 int bufsize = SMALLCHUNK;
Martin v. Löwisb9a0f912003-03-29 10:06:18 +00001647 PY_LONG_LONG totalout;
Neal Norwitz18142c02002-11-05 18:17:32 +00001648 PyObject *ret = NULL;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001649 bz_stream *bzs = &self->bzs;
1650 int bzerror;
1651
Guido van Rossum98297ee2007-11-06 21:34:58 +00001652 if (!PyArg_ParseTuple(args, "y#:decompress", &data, &datasize))
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001653 return NULL;
1654
1655 ACQUIRE_LOCK(self);
1656 if (!self->running) {
1657 PyErr_SetString(PyExc_EOFError, "end of stream was "
1658 "already found");
1659 goto error;
1660 }
1661
Christian Heimes72b710a2008-05-26 13:28:38 +00001662 ret = PyBytes_FromStringAndSize(NULL, bufsize);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001663 if (!ret)
1664 goto error;
1665
1666 bzs->next_in = data;
1667 bzs->avail_in = datasize;
1668 bzs->next_out = BUF(ret);
1669 bzs->avail_out = bufsize;
1670
1671 totalout = BZS_TOTAL_OUT(bzs);
1672
1673 for (;;) {
1674 Py_BEGIN_ALLOW_THREADS
1675 bzerror = BZ2_bzDecompress(bzs);
1676 Py_END_ALLOW_THREADS
1677 if (bzerror == BZ_STREAM_END) {
1678 if (bzs->avail_in != 0) {
1679 Py_DECREF(self->unused_data);
1680 self->unused_data =
Christian Heimes72b710a2008-05-26 13:28:38 +00001681 PyBytes_FromStringAndSize(bzs->next_in,
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001682 bzs->avail_in);
1683 }
1684 self->running = 0;
1685 break;
1686 }
1687 if (bzerror != BZ_OK) {
1688 Util_CatchBZ2Error(bzerror);
1689 goto error;
1690 }
Guido van Rossumd8faa362007-04-27 19:54:29 +00001691 if (bzs->avail_in == 0)
1692 break; /* no more input data */
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001693 if (bzs->avail_out == 0) {
1694 bufsize = Util_NewBufferSize(bufsize);
Christian Heimes72b710a2008-05-26 13:28:38 +00001695 if (_PyBytes_Resize(&ret, bufsize) < 0) {
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001696 BZ2_bzDecompressEnd(bzs);
1697 goto error;
1698 }
1699 bzs->next_out = BUF(ret);
1700 bzs->next_out = BUF(ret) + (BZS_TOTAL_OUT(bzs)
1701 - totalout);
1702 bzs->avail_out = bufsize - (bzs->next_out - BUF(ret));
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001703 }
1704 }
1705
Guido van Rossum522a6c62007-05-22 23:13:45 +00001706 if (bzs->avail_out != 0) {
Christian Heimes72b710a2008-05-26 13:28:38 +00001707 if (_PyBytes_Resize(&ret,
Guido van Rossum522a6c62007-05-22 23:13:45 +00001708 (Py_ssize_t)(BZS_TOTAL_OUT(bzs) - totalout)) < 0)
1709 goto error;
1710 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001711
1712 RELEASE_LOCK(self);
1713 return ret;
1714
1715error:
1716 RELEASE_LOCK(self);
1717 Py_XDECREF(ret);
1718 return NULL;
1719}
1720
1721static PyMethodDef BZ2Decomp_methods[] = {
1722 {"decompress", (PyCFunction)BZ2Decomp_decompress, METH_VARARGS, BZ2Decomp_decompress__doc__},
1723 {NULL, NULL} /* sentinel */
1724};
1725
1726
1727/* ===================================================================== */
1728/* Slot definitions for BZ2Decomp_Type. */
1729
1730static int
1731BZ2Decomp_init(BZ2DecompObject *self, PyObject *args, PyObject *kwargs)
1732{
1733 int bzerror;
1734
1735 if (!PyArg_ParseTuple(args, ":BZ2Decompressor"))
1736 return -1;
1737
1738#ifdef WITH_THREAD
1739 self->lock = PyThread_allocate_lock();
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001740 if (!self->lock) {
1741 PyErr_SetString(PyExc_MemoryError, "unable to allocate lock");
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001742 goto error;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001743 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001744#endif
1745
Christian Heimes72b710a2008-05-26 13:28:38 +00001746 self->unused_data = PyBytes_FromStringAndSize("", 0);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001747 if (!self->unused_data)
1748 goto error;
1749
1750 memset(&self->bzs, 0, sizeof(bz_stream));
1751 bzerror = BZ2_bzDecompressInit(&self->bzs, 0, 0);
1752 if (bzerror != BZ_OK) {
1753 Util_CatchBZ2Error(bzerror);
1754 goto error;
1755 }
1756
1757 self->running = 1;
1758
1759 return 0;
1760
1761error:
1762#ifdef WITH_THREAD
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001763 if (self->lock) {
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001764 PyThread_free_lock(self->lock);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001765 self->lock = NULL;
1766 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001767#endif
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001768 Py_CLEAR(self->unused_data);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001769 return -1;
1770}
1771
1772static void
1773BZ2Decomp_dealloc(BZ2DecompObject *self)
1774{
1775#ifdef WITH_THREAD
1776 if (self->lock)
1777 PyThread_free_lock(self->lock);
1778#endif
1779 Py_XDECREF(self->unused_data);
1780 BZ2_bzDecompressEnd(&self->bzs);
Christian Heimes90aa7642007-12-19 02:45:37 +00001781 Py_TYPE(self)->tp_free((PyObject *)self);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001782}
1783
1784
1785/* ===================================================================== */
1786/* BZ2Decomp_Type definition. */
1787
1788PyDoc_STRVAR(BZ2Decomp__doc__,
1789"BZ2Decompressor() -> decompressor object\n\
1790\n\
1791Create a new decompressor object. This object may be used to decompress\n\
1792data sequentially. If you want to decompress data in one shot, use the\n\
1793decompress() function instead.\n\
1794");
1795
Gustavo Niemeyer49ea7be2002-11-08 14:31:49 +00001796static PyTypeObject BZ2Decomp_Type = {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00001797 PyVarObject_HEAD_INIT(NULL, 0)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001798 "bz2.BZ2Decompressor", /*tp_name*/
1799 sizeof(BZ2DecompObject), /*tp_basicsize*/
1800 0, /*tp_itemsize*/
1801 (destructor)BZ2Decomp_dealloc, /*tp_dealloc*/
1802 0, /*tp_print*/
1803 0, /*tp_getattr*/
1804 0, /*tp_setattr*/
1805 0, /*tp_compare*/
1806 0, /*tp_repr*/
1807 0, /*tp_as_number*/
1808 0, /*tp_as_sequence*/
1809 0, /*tp_as_mapping*/
1810 0, /*tp_hash*/
1811 0, /*tp_call*/
1812 0, /*tp_str*/
Jason Tishlerfb8595d2003-01-06 12:41:26 +00001813 PyObject_GenericGetAttr,/*tp_getattro*/
1814 PyObject_GenericSetAttr,/*tp_setattro*/
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001815 0, /*tp_as_buffer*/
1816 Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE, /*tp_flags*/
1817 BZ2Decomp__doc__, /*tp_doc*/
1818 0, /*tp_traverse*/
1819 0, /*tp_clear*/
1820 0, /*tp_richcompare*/
1821 0, /*tp_weaklistoffset*/
1822 0, /*tp_iter*/
1823 0, /*tp_iternext*/
1824 BZ2Decomp_methods, /*tp_methods*/
1825 BZ2Decomp_members, /*tp_members*/
1826 0, /*tp_getset*/
1827 0, /*tp_base*/
1828 0, /*tp_dict*/
1829 0, /*tp_descr_get*/
1830 0, /*tp_descr_set*/
1831 0, /*tp_dictoffset*/
1832 (initproc)BZ2Decomp_init, /*tp_init*/
Jason Tishlerfb8595d2003-01-06 12:41:26 +00001833 PyType_GenericAlloc, /*tp_alloc*/
1834 PyType_GenericNew, /*tp_new*/
Neal Norwitz30d1c512007-08-19 22:48:23 +00001835 PyObject_Free, /*tp_free*/
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001836 0, /*tp_is_gc*/
1837};
1838
1839
1840/* ===================================================================== */
1841/* Module functions. */
1842
1843PyDoc_STRVAR(bz2_compress__doc__,
1844"compress(data [, compresslevel=9]) -> string\n\
1845\n\
1846Compress data in one shot. If you want to compress data sequentially,\n\
1847use an instance of BZ2Compressor instead. The compresslevel parameter, if\n\
1848given, must be a number between 1 and 9.\n\
1849");
1850
1851static PyObject *
1852bz2_compress(PyObject *self, PyObject *args, PyObject *kwargs)
1853{
1854 int compresslevel=9;
1855 char *data;
1856 int datasize;
1857 int bufsize;
Gustavo Niemeyer7d7930b2002-11-05 18:41:53 +00001858 PyObject *ret = NULL;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001859 bz_stream _bzs;
1860 bz_stream *bzs = &_bzs;
1861 int bzerror;
Martin v. Löwis15e62742006-02-27 16:46:16 +00001862 static char *kwlist[] = {"data", "compresslevel", 0};
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001863
Guido van Rossum98297ee2007-11-06 21:34:58 +00001864 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "y#|i",
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001865 kwlist, &data, &datasize,
1866 &compresslevel))
1867 return NULL;
1868
1869 if (compresslevel < 1 || compresslevel > 9) {
1870 PyErr_SetString(PyExc_ValueError,
1871 "compresslevel must be between 1 and 9");
1872 return NULL;
1873 }
1874
1875 /* Conforming to bz2 manual, this is large enough to fit compressed
1876 * data in one shot. We will check it later anyway. */
1877 bufsize = datasize + (datasize/100+1) + 600;
1878
Christian Heimes72b710a2008-05-26 13:28:38 +00001879 ret = PyBytes_FromStringAndSize(NULL, bufsize);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001880 if (!ret)
1881 return NULL;
1882
1883 memset(bzs, 0, sizeof(bz_stream));
1884
1885 bzs->next_in = data;
1886 bzs->avail_in = datasize;
1887 bzs->next_out = BUF(ret);
1888 bzs->avail_out = bufsize;
1889
1890 bzerror = BZ2_bzCompressInit(bzs, compresslevel, 0, 0);
1891 if (bzerror != BZ_OK) {
1892 Util_CatchBZ2Error(bzerror);
1893 Py_DECREF(ret);
1894 return NULL;
1895 }
Tim Peterse3228092002-11-09 04:21:44 +00001896
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001897 for (;;) {
1898 Py_BEGIN_ALLOW_THREADS
1899 bzerror = BZ2_bzCompress(bzs, BZ_FINISH);
1900 Py_END_ALLOW_THREADS
1901 if (bzerror == BZ_STREAM_END) {
1902 break;
1903 } else if (bzerror != BZ_FINISH_OK) {
1904 BZ2_bzCompressEnd(bzs);
1905 Util_CatchBZ2Error(bzerror);
1906 Py_DECREF(ret);
1907 return NULL;
1908 }
1909 if (bzs->avail_out == 0) {
1910 bufsize = Util_NewBufferSize(bufsize);
Christian Heimes72b710a2008-05-26 13:28:38 +00001911 if (_PyBytes_Resize(&ret, bufsize) < 0) {
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001912 BZ2_bzCompressEnd(bzs);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001913 return NULL;
1914 }
1915 bzs->next_out = BUF(ret) + BZS_TOTAL_OUT(bzs);
1916 bzs->avail_out = bufsize - (bzs->next_out - BUF(ret));
1917 }
1918 }
1919
Guido van Rossum522a6c62007-05-22 23:13:45 +00001920 if (bzs->avail_out != 0) {
Christian Heimes72b710a2008-05-26 13:28:38 +00001921 if (_PyBytes_Resize(&ret, (Py_ssize_t)BZS_TOTAL_OUT(bzs)) < 0) {
Guido van Rossum522a6c62007-05-22 23:13:45 +00001922 ret = NULL;
1923 }
1924 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001925 BZ2_bzCompressEnd(bzs);
1926
1927 return ret;
1928}
1929
1930PyDoc_STRVAR(bz2_decompress__doc__,
1931"decompress(data) -> decompressed data\n\
1932\n\
1933Decompress data in one shot. If you want to decompress data sequentially,\n\
1934use an instance of BZ2Decompressor instead.\n\
1935");
1936
1937static PyObject *
1938bz2_decompress(PyObject *self, PyObject *args)
1939{
1940 char *data;
1941 int datasize;
1942 int bufsize = SMALLCHUNK;
1943 PyObject *ret;
1944 bz_stream _bzs;
1945 bz_stream *bzs = &_bzs;
1946 int bzerror;
1947
Guido van Rossum98297ee2007-11-06 21:34:58 +00001948 if (!PyArg_ParseTuple(args, "y#:decompress", &data, &datasize))
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001949 return NULL;
1950
1951 if (datasize == 0)
Christian Heimes72b710a2008-05-26 13:28:38 +00001952 return PyBytes_FromStringAndSize("", 0);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001953
Christian Heimes72b710a2008-05-26 13:28:38 +00001954 ret = PyBytes_FromStringAndSize(NULL, bufsize);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001955 if (!ret)
1956 return NULL;
1957
1958 memset(bzs, 0, sizeof(bz_stream));
1959
1960 bzs->next_in = data;
1961 bzs->avail_in = datasize;
1962 bzs->next_out = BUF(ret);
1963 bzs->avail_out = bufsize;
1964
1965 bzerror = BZ2_bzDecompressInit(bzs, 0, 0);
1966 if (bzerror != BZ_OK) {
1967 Util_CatchBZ2Error(bzerror);
1968 Py_DECREF(ret);
1969 return NULL;
1970 }
Tim Peterse3228092002-11-09 04:21:44 +00001971
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001972 for (;;) {
1973 Py_BEGIN_ALLOW_THREADS
1974 bzerror = BZ2_bzDecompress(bzs);
1975 Py_END_ALLOW_THREADS
1976 if (bzerror == BZ_STREAM_END) {
1977 break;
1978 } else if (bzerror != BZ_OK) {
1979 BZ2_bzDecompressEnd(bzs);
1980 Util_CatchBZ2Error(bzerror);
1981 Py_DECREF(ret);
1982 return NULL;
1983 }
Guido van Rossumd8faa362007-04-27 19:54:29 +00001984 if (bzs->avail_in == 0) {
1985 BZ2_bzDecompressEnd(bzs);
1986 PyErr_SetString(PyExc_ValueError,
1987 "couldn't find end of stream");
1988 Py_DECREF(ret);
1989 return NULL;
1990 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001991 if (bzs->avail_out == 0) {
1992 bufsize = Util_NewBufferSize(bufsize);
Christian Heimes72b710a2008-05-26 13:28:38 +00001993 if (_PyBytes_Resize(&ret, bufsize) < 0) {
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001994 BZ2_bzDecompressEnd(bzs);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001995 return NULL;
1996 }
1997 bzs->next_out = BUF(ret) + BZS_TOTAL_OUT(bzs);
1998 bzs->avail_out = bufsize - (bzs->next_out - BUF(ret));
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001999 }
2000 }
2001
Guido van Rossum522a6c62007-05-22 23:13:45 +00002002 if (bzs->avail_out != 0) {
Christian Heimes72b710a2008-05-26 13:28:38 +00002003 if (_PyBytes_Resize(&ret, (Py_ssize_t)BZS_TOTAL_OUT(bzs)) < 0) {
Guido van Rossum522a6c62007-05-22 23:13:45 +00002004 ret = NULL;
2005 }
2006 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002007 BZ2_bzDecompressEnd(bzs);
2008
2009 return ret;
2010}
2011
2012static PyMethodDef bz2_methods[] = {
2013 {"compress", (PyCFunction) bz2_compress, METH_VARARGS|METH_KEYWORDS,
2014 bz2_compress__doc__},
2015 {"decompress", (PyCFunction) bz2_decompress, METH_VARARGS,
2016 bz2_decompress__doc__},
2017 {NULL, NULL} /* sentinel */
2018};
2019
2020/* ===================================================================== */
2021/* Initialization function. */
2022
2023PyDoc_STRVAR(bz2__doc__,
2024"The python bz2 module provides a comprehensive interface for\n\
2025the bz2 compression library. It implements a complete file\n\
2026interface, one shot (de)compression functions, and types for\n\
2027sequential (de)compression.\n\
2028");
2029
Martin v. Löwis1a214512008-06-11 05:26:20 +00002030
2031static struct PyModuleDef bz2module = {
2032 PyModuleDef_HEAD_INIT,
2033 "bz2",
2034 bz2__doc__,
2035 -1,
2036 bz2_methods,
2037 NULL,
2038 NULL,
2039 NULL,
2040 NULL
2041};
2042
Neal Norwitz21d896c2003-07-01 20:15:21 +00002043PyMODINIT_FUNC
Martin v. Löwis1a214512008-06-11 05:26:20 +00002044PyInit_bz2(void)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002045{
2046 PyObject *m;
2047
Christian Heimes90aa7642007-12-19 02:45:37 +00002048 Py_TYPE(&BZ2File_Type) = &PyType_Type;
2049 Py_TYPE(&BZ2Comp_Type) = &PyType_Type;
2050 Py_TYPE(&BZ2Decomp_Type) = &PyType_Type;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002051
Martin v. Löwis1a214512008-06-11 05:26:20 +00002052 m = PyModule_Create(&bz2module);
Neal Norwitz1ac754f2006-01-19 06:09:39 +00002053 if (m == NULL)
Martin v. Löwis1a214512008-06-11 05:26:20 +00002054 return NULL;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002055
Neal Norwitz53cbdaa2007-08-23 21:42:55 +00002056 PyModule_AddObject(m, "__author__", PyUnicode_FromString(__author__));
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002057
2058 Py_INCREF(&BZ2File_Type);
2059 PyModule_AddObject(m, "BZ2File", (PyObject *)&BZ2File_Type);
2060
2061 Py_INCREF(&BZ2Comp_Type);
2062 PyModule_AddObject(m, "BZ2Compressor", (PyObject *)&BZ2Comp_Type);
2063
2064 Py_INCREF(&BZ2Decomp_Type);
2065 PyModule_AddObject(m, "BZ2Decompressor", (PyObject *)&BZ2Decomp_Type);
Martin v. Löwis1a214512008-06-11 05:26:20 +00002066 return m;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002067}