blob: c6819914153dfe41cbdc767b1783958afe6d5590 [file] [log] [blame]
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001/*
2
3python-bz2 - python bz2 library interface
4
5Copyright (c) 2002 Gustavo Niemeyer <niemeyer@conectiva.com>
6Copyright (c) 2002 Python Software Foundation; All Rights Reserved
7
8*/
9
Martin v. Löwise17af7b2002-11-23 09:16:19 +000010#include "Python.h"
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +000011#include <stdio.h>
12#include <bzlib.h>
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +000013#include "structmember.h"
14
15#ifdef WITH_THREAD
16#include "pythread.h"
17#endif
18
19static char __author__[] =
20"The bz2 python module was written by:\n\
21\n\
22 Gustavo Niemeyer <niemeyer@conectiva.com>\n\
23";
24
Georg Brandl33a5f2a2005-08-21 14:16:04 +000025/* Our very own off_t-like type, 64-bit if possible */
26/* copied from Objects/fileobject.c */
27#if !defined(HAVE_LARGEFILE_SUPPORT)
28typedef off_t Py_off_t;
29#elif SIZEOF_OFF_T >= 8
30typedef off_t Py_off_t;
31#elif SIZEOF_FPOS_T >= 8
32typedef fpos_t Py_off_t;
33#else
34#error "Large file support, but neither off_t nor fpos_t is large enough."
35#endif
36
Christian Heimes72b710a2008-05-26 13:28:38 +000037#define BUF(v) PyBytes_AS_STRING(v)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +000038
39#define MODE_CLOSED 0
40#define MODE_READ 1
41#define MODE_READ_EOF 2
42#define MODE_WRITE 3
43
Christian Heimes90aa7642007-12-19 02:45:37 +000044#define BZ2FileObject_Check(v) (Py_TYPE(v) == &BZ2File_Type)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +000045
Gustavo Niemeyer7628f1f2003-04-27 06:25:24 +000046
47#ifdef BZ_CONFIG_ERROR
48
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +000049#if SIZEOF_LONG >= 8
50#define BZS_TOTAL_OUT(bzs) \
51 (((long)bzs->total_out_hi32 << 32) + bzs->total_out_lo32)
52#elif SIZEOF_LONG_LONG >= 8
53#define BZS_TOTAL_OUT(bzs) \
Martin v. Löwisb9a0f912003-03-29 10:06:18 +000054 (((PY_LONG_LONG)bzs->total_out_hi32 << 32) + bzs->total_out_lo32)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +000055#else
56#define BZS_TOTAL_OUT(bzs) \
Neal Norwitz20bad742006-01-17 05:27:39 +000057 bzs->total_out_lo32
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +000058#endif
59
Gustavo Niemeyer7628f1f2003-04-27 06:25:24 +000060#else /* ! BZ_CONFIG_ERROR */
61
62#define BZ2_bzRead bzRead
63#define BZ2_bzReadOpen bzReadOpen
64#define BZ2_bzReadClose bzReadClose
65#define BZ2_bzWrite bzWrite
66#define BZ2_bzWriteOpen bzWriteOpen
67#define BZ2_bzWriteClose bzWriteClose
68#define BZ2_bzCompress bzCompress
69#define BZ2_bzCompressInit bzCompressInit
70#define BZ2_bzCompressEnd bzCompressEnd
71#define BZ2_bzDecompress bzDecompress
72#define BZ2_bzDecompressInit bzDecompressInit
73#define BZ2_bzDecompressEnd bzDecompressEnd
74
75#define BZS_TOTAL_OUT(bzs) bzs->total_out
76
77#endif /* ! BZ_CONFIG_ERROR */
78
79
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +000080#ifdef WITH_THREAD
81#define ACQUIRE_LOCK(obj) PyThread_acquire_lock(obj->lock, 1)
82#define RELEASE_LOCK(obj) PyThread_release_lock(obj->lock)
83#else
84#define ACQUIRE_LOCK(obj)
85#define RELEASE_LOCK(obj)
86#endif
87
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +000088/* Bits in f_newlinetypes */
89#define NEWLINE_UNKNOWN 0 /* No newline seen, yet */
90#define NEWLINE_CR 1 /* \r newline seen */
91#define NEWLINE_LF 2 /* \n newline seen */
92#define NEWLINE_CRLF 4 /* \r\n newline seen */
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +000093
94/* ===================================================================== */
95/* Structure definitions. */
96
97typedef struct {
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +000098 PyObject_HEAD
Guido van Rossumf09ca142007-06-13 00:03:05 +000099 FILE *rawfp;
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000100
101 char* f_buf; /* Allocated readahead buffer */
102 char* f_bufend; /* Points after last occupied position */
103 char* f_bufptr; /* Current buffer position */
104
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000105 BZFILE *fp;
106 int mode;
Georg Brandla8bcecc2005-09-03 07:49:53 +0000107 Py_off_t pos;
108 Py_off_t size;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000109#ifdef WITH_THREAD
110 PyThread_type_lock lock;
111#endif
112} BZ2FileObject;
113
114typedef struct {
115 PyObject_HEAD
116 bz_stream bzs;
117 int running;
118#ifdef WITH_THREAD
119 PyThread_type_lock lock;
120#endif
121} BZ2CompObject;
122
123typedef struct {
124 PyObject_HEAD
125 bz_stream bzs;
126 int running;
127 PyObject *unused_data;
128#ifdef WITH_THREAD
129 PyThread_type_lock lock;
130#endif
131} BZ2DecompObject;
132
133/* ===================================================================== */
134/* Utility functions. */
135
136static int
137Util_CatchBZ2Error(int bzerror)
138{
139 int ret = 0;
140 switch(bzerror) {
141 case BZ_OK:
142 case BZ_STREAM_END:
143 break;
144
Gustavo Niemeyer7628f1f2003-04-27 06:25:24 +0000145#ifdef BZ_CONFIG_ERROR
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000146 case BZ_CONFIG_ERROR:
147 PyErr_SetString(PyExc_SystemError,
148 "the bz2 library was not compiled "
149 "correctly");
150 ret = 1;
151 break;
Gustavo Niemeyer7628f1f2003-04-27 06:25:24 +0000152#endif
Tim Peterse3228092002-11-09 04:21:44 +0000153
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000154 case BZ_PARAM_ERROR:
155 PyErr_SetString(PyExc_ValueError,
156 "the bz2 library has received wrong "
157 "parameters");
158 ret = 1;
159 break;
Tim Peterse3228092002-11-09 04:21:44 +0000160
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000161 case BZ_MEM_ERROR:
162 PyErr_NoMemory();
163 ret = 1;
164 break;
165
166 case BZ_DATA_ERROR:
167 case BZ_DATA_ERROR_MAGIC:
168 PyErr_SetString(PyExc_IOError, "invalid data stream");
169 ret = 1;
170 break;
171
172 case BZ_IO_ERROR:
173 PyErr_SetString(PyExc_IOError, "unknown IO error");
174 ret = 1;
175 break;
176
177 case BZ_UNEXPECTED_EOF:
178 PyErr_SetString(PyExc_EOFError,
179 "compressed file ended before the "
180 "logical end-of-stream was detected");
181 ret = 1;
182 break;
183
184 case BZ_SEQUENCE_ERROR:
185 PyErr_SetString(PyExc_RuntimeError,
186 "wrong sequence of bz2 library "
187 "commands used");
188 ret = 1;
189 break;
190 }
191 return ret;
192}
193
194#if BUFSIZ < 8192
195#define SMALLCHUNK 8192
196#else
197#define SMALLCHUNK BUFSIZ
198#endif
199
200#if SIZEOF_INT < 4
201#define BIGCHUNK (512 * 32)
202#else
203#define BIGCHUNK (512 * 1024)
204#endif
205
206/* This is a hacked version of Python's fileobject.c:new_buffersize(). */
207static size_t
208Util_NewBufferSize(size_t currentsize)
209{
210 if (currentsize > SMALLCHUNK) {
211 /* Keep doubling until we reach BIGCHUNK;
212 then keep adding BIGCHUNK. */
213 if (currentsize <= BIGCHUNK)
214 return currentsize + currentsize;
215 else
216 return currentsize + BIGCHUNK;
217 }
218 return currentsize + SMALLCHUNK;
219}
220
221/* This is a hacked version of Python's fileobject.c:get_line(). */
222static PyObject *
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000223Util_GetLine(BZ2FileObject *f, int n)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000224{
225 char c;
226 char *buf, *end;
227 size_t total_v_size; /* total # of slots in buffer */
228 size_t used_v_size; /* # used slots in buffer */
229 size_t increment; /* amount to increment the buffer */
230 PyObject *v;
231 int bzerror;
Thomas Wouters1b7f8912007-09-19 03:06:30 +0000232 int bytes_read;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000233
234 total_v_size = n > 0 ? n : 100;
Christian Heimes72b710a2008-05-26 13:28:38 +0000235 v = PyBytes_FromStringAndSize((char *)NULL, total_v_size);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000236 if (v == NULL)
237 return NULL;
238
239 buf = BUF(v);
240 end = buf + total_v_size;
241
242 for (;;) {
243 Py_BEGIN_ALLOW_THREADS
Guido van Rossumf09ca142007-06-13 00:03:05 +0000244 do {
Thomas Wouters1b7f8912007-09-19 03:06:30 +0000245 bytes_read = BZ2_bzRead(&bzerror, f->fp, &c, 1);
Guido van Rossumf09ca142007-06-13 00:03:05 +0000246 f->pos++;
Thomas Wouters1b7f8912007-09-19 03:06:30 +0000247 if (bytes_read == 0)
248 break;
Guido van Rossumf09ca142007-06-13 00:03:05 +0000249 *buf++ = c;
250 } while (bzerror == BZ_OK && c != '\n' && buf != end);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000251 Py_END_ALLOW_THREADS
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000252 if (bzerror == BZ_STREAM_END) {
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000253 f->size = f->pos;
254 f->mode = MODE_READ_EOF;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000255 break;
256 } else if (bzerror != BZ_OK) {
257 Util_CatchBZ2Error(bzerror);
258 Py_DECREF(v);
259 return NULL;
260 }
261 if (c == '\n')
262 break;
263 /* Must be because buf == end */
264 if (n > 0)
265 break;
266 used_v_size = total_v_size;
267 increment = total_v_size >> 2; /* mild exponential growth */
268 total_v_size += increment;
269 if (total_v_size > INT_MAX) {
270 PyErr_SetString(PyExc_OverflowError,
271 "line is longer than a Python string can hold");
272 Py_DECREF(v);
273 return NULL;
274 }
Christian Heimes72b710a2008-05-26 13:28:38 +0000275 if (_PyBytes_Resize(&v, total_v_size) < 0) {
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000276 return NULL;
Guido van Rossum522a6c62007-05-22 23:13:45 +0000277 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000278 buf = BUF(v) + used_v_size;
279 end = BUF(v) + total_v_size;
280 }
281
282 used_v_size = buf - BUF(v);
Guido van Rossum522a6c62007-05-22 23:13:45 +0000283 if (used_v_size != total_v_size) {
Christian Heimes72b710a2008-05-26 13:28:38 +0000284 if (_PyBytes_Resize(&v, used_v_size) < 0) {
Guido van Rossum522a6c62007-05-22 23:13:45 +0000285 v = NULL;
286 }
287 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000288 return v;
289}
290
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000291/* This is a hacked version of Python's fileobject.c:drop_readahead(). */
292static void
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000293Util_DropReadAhead(BZ2FileObject *f)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000294{
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000295 if (f->f_buf != NULL) {
296 PyMem_Free(f->f_buf);
297 f->f_buf = NULL;
298 }
299}
300
301/* This is a hacked version of Python's fileobject.c:readahead(). */
302static int
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000303Util_ReadAhead(BZ2FileObject *f, int bufsize)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000304{
305 int chunksize;
306 int bzerror;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000307
308 if (f->f_buf != NULL) {
Tim Peterse3228092002-11-09 04:21:44 +0000309 if((f->f_bufend - f->f_bufptr) >= 1)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000310 return 0;
311 else
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000312 Util_DropReadAhead(f);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000313 }
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000314 if (f->mode == MODE_READ_EOF) {
Georg Brandl33a5f2a2005-08-21 14:16:04 +0000315 f->f_bufptr = f->f_buf;
316 f->f_bufend = f->f_buf;
317 return 0;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000318 }
319 if ((f->f_buf = PyMem_Malloc(bufsize)) == NULL) {
Georg Brandl6aa2d1f2008-08-12 08:35:52 +0000320 PyErr_NoMemory();
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000321 return -1;
322 }
323 Py_BEGIN_ALLOW_THREADS
Guido van Rossumf09ca142007-06-13 00:03:05 +0000324 chunksize = BZ2_bzRead(&bzerror, f->fp, f->f_buf, bufsize);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000325 Py_END_ALLOW_THREADS
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000326 f->pos += chunksize;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000327 if (bzerror == BZ_STREAM_END) {
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000328 f->size = f->pos;
329 f->mode = MODE_READ_EOF;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000330 } else if (bzerror != BZ_OK) {
331 Util_CatchBZ2Error(bzerror);
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000332 Util_DropReadAhead(f);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000333 return -1;
334 }
335 f->f_bufptr = f->f_buf;
336 f->f_bufend = f->f_buf + chunksize;
337 return 0;
338}
339
340/* This is a hacked version of Python's
341 * fileobject.c:readahead_get_line_skip(). */
Christian Heimes72b710a2008-05-26 13:28:38 +0000342static PyBytesObject *
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000343Util_ReadAheadGetLineSkip(BZ2FileObject *f, int skip, int bufsize)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000344{
Christian Heimes72b710a2008-05-26 13:28:38 +0000345 PyBytesObject* s;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000346 char *bufptr;
347 char *buf;
348 int len;
349
350 if (f->f_buf == NULL)
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000351 if (Util_ReadAhead(f, bufsize) < 0)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000352 return NULL;
353
354 len = f->f_bufend - f->f_bufptr;
Tim Peterse3228092002-11-09 04:21:44 +0000355 if (len == 0)
Christian Heimes72b710a2008-05-26 13:28:38 +0000356 return (PyBytesObject *)
357 PyBytes_FromStringAndSize(NULL, skip);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000358 bufptr = memchr(f->f_bufptr, '\n', len);
359 if (bufptr != NULL) {
360 bufptr++; /* Count the '\n' */
361 len = bufptr - f->f_bufptr;
Christian Heimes72b710a2008-05-26 13:28:38 +0000362 s = (PyBytesObject *)
363 PyBytes_FromStringAndSize(NULL, skip+len);
Tim Peterse3228092002-11-09 04:21:44 +0000364 if (s == NULL)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000365 return NULL;
Christian Heimes72b710a2008-05-26 13:28:38 +0000366 memcpy(PyBytes_AS_STRING(s)+skip, f->f_bufptr, len);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000367 f->f_bufptr = bufptr;
368 if (bufptr == f->f_bufend)
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000369 Util_DropReadAhead(f);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000370 } else {
371 bufptr = f->f_bufptr;
372 buf = f->f_buf;
373 f->f_buf = NULL; /* Force new readahead buffer */
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000374 s = Util_ReadAheadGetLineSkip(f, skip+len,
375 bufsize + (bufsize>>2));
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000376 if (s == NULL) {
377 PyMem_Free(buf);
378 return NULL;
379 }
Christian Heimes72b710a2008-05-26 13:28:38 +0000380 memcpy(PyBytes_AS_STRING(s)+skip, bufptr, len);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000381 PyMem_Free(buf);
382 }
383 return s;
384}
385
386/* ===================================================================== */
387/* Methods of BZ2File. */
388
389PyDoc_STRVAR(BZ2File_read__doc__,
390"read([size]) -> string\n\
391\n\
392Read at most size uncompressed bytes, returned as a string. If the size\n\
393argument is negative or omitted, read until EOF is reached.\n\
394");
395
396/* This is a hacked version of Python's fileobject.c:file_read(). */
397static PyObject *
398BZ2File_read(BZ2FileObject *self, PyObject *args)
399{
400 long bytesrequested = -1;
401 size_t bytesread, buffersize, chunksize;
402 int bzerror;
403 PyObject *ret = NULL;
Tim Peterse3228092002-11-09 04:21:44 +0000404
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000405 if (!PyArg_ParseTuple(args, "|l:read", &bytesrequested))
406 return NULL;
Tim Peterse3228092002-11-09 04:21:44 +0000407
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000408 ACQUIRE_LOCK(self);
409 switch (self->mode) {
410 case MODE_READ:
411 break;
412 case MODE_READ_EOF:
Christian Heimes72b710a2008-05-26 13:28:38 +0000413 ret = PyBytes_FromStringAndSize("", 0);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000414 goto cleanup;
415 case MODE_CLOSED:
416 PyErr_SetString(PyExc_ValueError,
417 "I/O operation on closed file");
418 goto cleanup;
419 default:
420 PyErr_SetString(PyExc_IOError,
421 "file is not ready for reading");
422 goto cleanup;
423 }
424
425 if (bytesrequested < 0)
426 buffersize = Util_NewBufferSize((size_t)0);
427 else
428 buffersize = bytesrequested;
429 if (buffersize > INT_MAX) {
430 PyErr_SetString(PyExc_OverflowError,
Gustavo Niemeyer49ea7be2002-11-08 14:31:49 +0000431 "requested number of bytes is "
432 "more than a Python string can hold");
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000433 goto cleanup;
434 }
Christian Heimes72b710a2008-05-26 13:28:38 +0000435 ret = PyBytes_FromStringAndSize((char *)NULL, buffersize);
Guido van Rossum75c26bc2007-08-07 23:29:20 +0000436 if (ret == NULL || buffersize == 0)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000437 goto cleanup;
438 bytesread = 0;
439
440 for (;;) {
441 Py_BEGIN_ALLOW_THREADS
Guido van Rossumf09ca142007-06-13 00:03:05 +0000442 chunksize = BZ2_bzRead(&bzerror, self->fp,
443 BUF(ret)+bytesread,
444 buffersize-bytesread);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000445 self->pos += chunksize;
446 Py_END_ALLOW_THREADS
447 bytesread += chunksize;
448 if (bzerror == BZ_STREAM_END) {
449 self->size = self->pos;
450 self->mode = MODE_READ_EOF;
451 break;
452 } else if (bzerror != BZ_OK) {
453 Util_CatchBZ2Error(bzerror);
454 Py_DECREF(ret);
455 ret = NULL;
456 goto cleanup;
457 }
458 if (bytesrequested < 0) {
459 buffersize = Util_NewBufferSize(buffersize);
Christian Heimes72b710a2008-05-26 13:28:38 +0000460 if (_PyBytes_Resize(&ret, buffersize) < 0) {
Guido van Rossum522a6c62007-05-22 23:13:45 +0000461 ret = NULL;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000462 goto cleanup;
Guido van Rossum522a6c62007-05-22 23:13:45 +0000463 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000464 } else {
465 break;
466 }
467 }
Guido van Rossum522a6c62007-05-22 23:13:45 +0000468 if (bytesread != buffersize) {
Christian Heimes72b710a2008-05-26 13:28:38 +0000469 if (_PyBytes_Resize(&ret, bytesread) < 0) {
Guido van Rossum522a6c62007-05-22 23:13:45 +0000470 ret = NULL;
471 }
472 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000473
474cleanup:
475 RELEASE_LOCK(self);
476 return ret;
477}
478
479PyDoc_STRVAR(BZ2File_readline__doc__,
480"readline([size]) -> string\n\
481\n\
482Return the next line from the file, as a string, retaining newline.\n\
483A non-negative size argument will limit the maximum number of bytes to\n\
484return (an incomplete line may be returned then). Return an empty\n\
485string at EOF.\n\
486");
487
488static PyObject *
489BZ2File_readline(BZ2FileObject *self, PyObject *args)
490{
491 PyObject *ret = NULL;
492 int sizehint = -1;
493
494 if (!PyArg_ParseTuple(args, "|i:readline", &sizehint))
495 return NULL;
496
497 ACQUIRE_LOCK(self);
498 switch (self->mode) {
499 case MODE_READ:
500 break;
501 case MODE_READ_EOF:
Christian Heimes72b710a2008-05-26 13:28:38 +0000502 ret = PyBytes_FromStringAndSize("", 0);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000503 goto cleanup;
504 case MODE_CLOSED:
505 PyErr_SetString(PyExc_ValueError,
506 "I/O operation on closed file");
507 goto cleanup;
508 default:
509 PyErr_SetString(PyExc_IOError,
510 "file is not ready for reading");
511 goto cleanup;
512 }
513
514 if (sizehint == 0)
Christian Heimes72b710a2008-05-26 13:28:38 +0000515 ret = PyBytes_FromStringAndSize("", 0);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000516 else
517 ret = Util_GetLine(self, (sizehint < 0) ? 0 : sizehint);
518
519cleanup:
520 RELEASE_LOCK(self);
521 return ret;
522}
523
524PyDoc_STRVAR(BZ2File_readlines__doc__,
525"readlines([size]) -> list\n\
526\n\
527Call readline() repeatedly and return a list of lines read.\n\
528The optional size argument, if given, is an approximate bound on the\n\
529total number of bytes in the lines returned.\n\
530");
531
532/* This is a hacked version of Python's fileobject.c:file_readlines(). */
533static PyObject *
534BZ2File_readlines(BZ2FileObject *self, PyObject *args)
535{
536 long sizehint = 0;
537 PyObject *list = NULL;
538 PyObject *line;
539 char small_buffer[SMALLCHUNK];
540 char *buffer = small_buffer;
541 size_t buffersize = SMALLCHUNK;
542 PyObject *big_buffer = NULL;
543 size_t nfilled = 0;
544 size_t nread;
545 size_t totalread = 0;
546 char *p, *q, *end;
547 int err;
548 int shortread = 0;
549 int bzerror;
550
551 if (!PyArg_ParseTuple(args, "|l:readlines", &sizehint))
552 return NULL;
553
554 ACQUIRE_LOCK(self);
555 switch (self->mode) {
556 case MODE_READ:
557 break;
558 case MODE_READ_EOF:
559 list = PyList_New(0);
560 goto cleanup;
561 case MODE_CLOSED:
562 PyErr_SetString(PyExc_ValueError,
563 "I/O operation on closed file");
564 goto cleanup;
565 default:
566 PyErr_SetString(PyExc_IOError,
567 "file is not ready for reading");
568 goto cleanup;
569 }
570
571 if ((list = PyList_New(0)) == NULL)
572 goto cleanup;
573
574 for (;;) {
575 Py_BEGIN_ALLOW_THREADS
Guido van Rossumf09ca142007-06-13 00:03:05 +0000576 nread = BZ2_bzRead(&bzerror, self->fp,
577 buffer+nfilled, buffersize-nfilled);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000578 self->pos += nread;
579 Py_END_ALLOW_THREADS
580 if (bzerror == BZ_STREAM_END) {
581 self->size = self->pos;
582 self->mode = MODE_READ_EOF;
583 if (nread == 0) {
584 sizehint = 0;
585 break;
586 }
587 shortread = 1;
588 } else if (bzerror != BZ_OK) {
589 Util_CatchBZ2Error(bzerror);
590 error:
591 Py_DECREF(list);
592 list = NULL;
593 goto cleanup;
594 }
595 totalread += nread;
596 p = memchr(buffer+nfilled, '\n', nread);
Georg Brandl33a5f2a2005-08-21 14:16:04 +0000597 if (!shortread && p == NULL) {
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000598 /* Need a larger buffer to fit this line */
599 nfilled += nread;
600 buffersize *= 2;
601 if (buffersize > INT_MAX) {
602 PyErr_SetString(PyExc_OverflowError,
Georg Brandl33a5f2a2005-08-21 14:16:04 +0000603 "line is longer than a Python string can hold");
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000604 goto error;
605 }
606 if (big_buffer == NULL) {
607 /* Create the big buffer */
Christian Heimes72b710a2008-05-26 13:28:38 +0000608 big_buffer = PyBytes_FromStringAndSize(
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000609 NULL, buffersize);
610 if (big_buffer == NULL)
611 goto error;
Christian Heimes72b710a2008-05-26 13:28:38 +0000612 buffer = PyBytes_AS_STRING(big_buffer);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000613 memcpy(buffer, small_buffer, nfilled);
614 }
615 else {
616 /* Grow the big buffer */
Christian Heimes72b710a2008-05-26 13:28:38 +0000617 if (_PyBytes_Resize(&big_buffer, buffersize) < 0){
Guido van Rossum522a6c62007-05-22 23:13:45 +0000618 big_buffer = NULL;
619 goto error;
620 }
Christian Heimes72b710a2008-05-26 13:28:38 +0000621 buffer = PyBytes_AS_STRING(big_buffer);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000622 }
Guido van Rossum522a6c62007-05-22 23:13:45 +0000623 continue;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000624 }
625 end = buffer+nfilled+nread;
626 q = buffer;
Georg Brandl33a5f2a2005-08-21 14:16:04 +0000627 while (p != NULL) {
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000628 /* Process complete lines */
629 p++;
Christian Heimes72b710a2008-05-26 13:28:38 +0000630 line = PyBytes_FromStringAndSize(q, p-q);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000631 if (line == NULL)
632 goto error;
633 err = PyList_Append(list, line);
634 Py_DECREF(line);
635 if (err != 0)
636 goto error;
637 q = p;
638 p = memchr(q, '\n', end-q);
Georg Brandl33a5f2a2005-08-21 14:16:04 +0000639 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000640 /* Move the remaining incomplete line to the start */
641 nfilled = end-q;
642 memmove(buffer, q, nfilled);
643 if (sizehint > 0)
644 if (totalread >= (size_t)sizehint)
645 break;
646 if (shortread) {
647 sizehint = 0;
648 break;
649 }
650 }
651 if (nfilled != 0) {
652 /* Partial last line */
Christian Heimes72b710a2008-05-26 13:28:38 +0000653 line = PyBytes_FromStringAndSize(buffer, nfilled);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000654 if (line == NULL)
655 goto error;
656 if (sizehint > 0) {
657 /* Need to complete the last line */
658 PyObject *rest = Util_GetLine(self, 0);
659 if (rest == NULL) {
660 Py_DECREF(line);
661 goto error;
662 }
Christian Heimes72b710a2008-05-26 13:28:38 +0000663 PyBytes_Concat(&line, rest);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000664 Py_DECREF(rest);
665 if (line == NULL)
666 goto error;
667 }
668 err = PyList_Append(list, line);
669 Py_DECREF(line);
670 if (err != 0)
671 goto error;
672 }
673
674 cleanup:
675 RELEASE_LOCK(self);
676 if (big_buffer) {
677 Py_DECREF(big_buffer);
678 }
679 return list;
680}
681
682PyDoc_STRVAR(BZ2File_write__doc__,
683"write(data) -> None\n\
684\n\
685Write the 'data' string to file. Note that due to buffering, close() may\n\
686be needed before the file on disk reflects the data written.\n\
687");
688
689/* This is a hacked version of Python's fileobject.c:file_write(). */
690static PyObject *
691BZ2File_write(BZ2FileObject *self, PyObject *args)
692{
693 PyObject *ret = NULL;
694 char *buf;
695 int len;
696 int bzerror;
697
Guido van Rossum98297ee2007-11-06 21:34:58 +0000698 if (!PyArg_ParseTuple(args, "y#:write", &buf, &len))
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000699 return NULL;
Tim Peterse3228092002-11-09 04:21:44 +0000700
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000701 ACQUIRE_LOCK(self);
702 switch (self->mode) {
703 case MODE_WRITE:
704 break;
Tim Peterse3228092002-11-09 04:21:44 +0000705
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000706 case MODE_CLOSED:
707 PyErr_SetString(PyExc_ValueError,
708 "I/O operation on closed file");
Thomas Wouters00ee7ba2006-08-21 19:07:27 +0000709 goto cleanup;
Tim Peterse3228092002-11-09 04:21:44 +0000710
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000711 default:
712 PyErr_SetString(PyExc_IOError,
713 "file is not ready for writing");
Thomas Wouters00ee7ba2006-08-21 19:07:27 +0000714 goto cleanup;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000715 }
716
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000717 Py_BEGIN_ALLOW_THREADS
718 BZ2_bzWrite (&bzerror, self->fp, buf, len);
719 self->pos += len;
720 Py_END_ALLOW_THREADS
Tim Peterse3228092002-11-09 04:21:44 +0000721
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000722 if (bzerror != BZ_OK) {
723 Util_CatchBZ2Error(bzerror);
724 goto cleanup;
725 }
Tim Peterse3228092002-11-09 04:21:44 +0000726
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000727 Py_INCREF(Py_None);
728 ret = Py_None;
729
730cleanup:
731 RELEASE_LOCK(self);
732 return ret;
733}
734
735PyDoc_STRVAR(BZ2File_writelines__doc__,
736"writelines(sequence_of_strings) -> None\n\
737\n\
738Write the sequence of strings to the file. Note that newlines are not\n\
739added. The sequence can be any iterable object producing strings. This is\n\
740equivalent to calling write() for each string.\n\
741");
742
743/* This is a hacked version of Python's fileobject.c:file_writelines(). */
744static PyObject *
745BZ2File_writelines(BZ2FileObject *self, PyObject *seq)
746{
747#define CHUNKSIZE 1000
748 PyObject *list = NULL;
749 PyObject *iter = NULL;
750 PyObject *ret = NULL;
751 PyObject *line;
752 int i, j, index, len, islist;
753 int bzerror;
754
755 ACQUIRE_LOCK(self);
Thomas Wouters00ee7ba2006-08-21 19:07:27 +0000756 switch (self->mode) {
757 case MODE_WRITE:
758 break;
759
760 case MODE_CLOSED:
761 PyErr_SetString(PyExc_ValueError,
762 "I/O operation on closed file");
763 goto error;
764
765 default:
766 PyErr_SetString(PyExc_IOError,
767 "file is not ready for writing");
768 goto error;
769 }
770
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000771 islist = PyList_Check(seq);
772 if (!islist) {
773 iter = PyObject_GetIter(seq);
774 if (iter == NULL) {
775 PyErr_SetString(PyExc_TypeError,
776 "writelines() requires an iterable argument");
777 goto error;
778 }
779 list = PyList_New(CHUNKSIZE);
780 if (list == NULL)
781 goto error;
782 }
783
784 /* Strategy: slurp CHUNKSIZE lines into a private list,
785 checking that they are all strings, then write that list
786 without holding the interpreter lock, then come back for more. */
787 for (index = 0; ; index += CHUNKSIZE) {
788 if (islist) {
789 Py_XDECREF(list);
790 list = PyList_GetSlice(seq, index, index+CHUNKSIZE);
791 if (list == NULL)
792 goto error;
793 j = PyList_GET_SIZE(list);
794 }
795 else {
796 for (j = 0; j < CHUNKSIZE; j++) {
797 line = PyIter_Next(iter);
798 if (line == NULL) {
799 if (PyErr_Occurred())
800 goto error;
801 break;
802 }
803 PyList_SetItem(list, j, line);
804 }
805 }
806 if (j == 0)
807 break;
808
Guido van Rossum522a6c62007-05-22 23:13:45 +0000809 /* Check that all entries are indeed byte strings. If not,
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000810 apply the same rules as for file.write() and
811 convert the rets to strings. This is slow, but
812 seems to be the only way since all conversion APIs
813 could potentially execute Python code. */
814 for (i = 0; i < j; i++) {
815 PyObject *v = PyList_GET_ITEM(list, i);
Christian Heimes72b710a2008-05-26 13:28:38 +0000816 if (!PyBytes_Check(v)) {
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000817 const char *buffer;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000818 Py_ssize_t len;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000819 if (PyObject_AsCharBuffer(v, &buffer, &len)) {
820 PyErr_SetString(PyExc_TypeError,
821 "writelines() "
822 "argument must be "
823 "a sequence of "
Guido van Rossum522a6c62007-05-22 23:13:45 +0000824 "bytes objects");
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000825 goto error;
826 }
Christian Heimes72b710a2008-05-26 13:28:38 +0000827 line = PyBytes_FromStringAndSize(buffer,
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000828 len);
829 if (line == NULL)
830 goto error;
831 Py_DECREF(v);
832 PyList_SET_ITEM(list, i, line);
833 }
834 }
835
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000836 /* Since we are releasing the global lock, the
837 following code may *not* execute Python code. */
838 Py_BEGIN_ALLOW_THREADS
839 for (i = 0; i < j; i++) {
840 line = PyList_GET_ITEM(list, i);
Christian Heimes72b710a2008-05-26 13:28:38 +0000841 len = PyBytes_GET_SIZE(line);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000842 BZ2_bzWrite (&bzerror, self->fp,
Christian Heimes72b710a2008-05-26 13:28:38 +0000843 PyBytes_AS_STRING(line), len);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000844 if (bzerror != BZ_OK) {
845 Py_BLOCK_THREADS
846 Util_CatchBZ2Error(bzerror);
847 goto error;
848 }
849 }
850 Py_END_ALLOW_THREADS
851
852 if (j < CHUNKSIZE)
853 break;
854 }
855
856 Py_INCREF(Py_None);
857 ret = Py_None;
858
859 error:
860 RELEASE_LOCK(self);
861 Py_XDECREF(list);
862 Py_XDECREF(iter);
863 return ret;
864#undef CHUNKSIZE
865}
866
867PyDoc_STRVAR(BZ2File_seek__doc__,
868"seek(offset [, whence]) -> None\n\
869\n\
870Move to new file position. Argument offset is a byte count. Optional\n\
871argument whence defaults to 0 (offset from start of file, offset\n\
872should be >= 0); other values are 1 (move relative to current position,\n\
873positive or negative), and 2 (move relative to end of file, usually\n\
874negative, although many platforms allow seeking beyond the end of a file).\n\
875\n\
876Note that seeking of bz2 files is emulated, and depending on the parameters\n\
877the operation may be extremely slow.\n\
878");
879
880static PyObject *
881BZ2File_seek(BZ2FileObject *self, PyObject *args)
882{
883 int where = 0;
Georg Brandl33a5f2a2005-08-21 14:16:04 +0000884 PyObject *offobj;
885 Py_off_t offset;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000886 char small_buffer[SMALLCHUNK];
887 char *buffer = small_buffer;
888 size_t buffersize = SMALLCHUNK;
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000889 Py_off_t bytesread = 0;
Georg Brandla8bcecc2005-09-03 07:49:53 +0000890 size_t readsize;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000891 int chunksize;
892 int bzerror;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000893 PyObject *ret = NULL;
Tim Peterse3228092002-11-09 04:21:44 +0000894
Georg Brandl33a5f2a2005-08-21 14:16:04 +0000895 if (!PyArg_ParseTuple(args, "O|i:seek", &offobj, &where))
896 return NULL;
897#if !defined(HAVE_LARGEFILE_SUPPORT)
Christian Heimes217cfd12007-12-02 14:31:20 +0000898 offset = PyLong_AsLong(offobj);
Georg Brandl33a5f2a2005-08-21 14:16:04 +0000899#else
900 offset = PyLong_Check(offobj) ?
Christian Heimes217cfd12007-12-02 14:31:20 +0000901 PyLong_AsLongLong(offobj) : PyLong_AsLong(offobj);
Georg Brandl33a5f2a2005-08-21 14:16:04 +0000902#endif
903 if (PyErr_Occurred())
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000904 return NULL;
905
906 ACQUIRE_LOCK(self);
907 Util_DropReadAhead(self);
908 switch (self->mode) {
909 case MODE_READ:
910 case MODE_READ_EOF:
911 break;
Tim Peterse3228092002-11-09 04:21:44 +0000912
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000913 case MODE_CLOSED:
914 PyErr_SetString(PyExc_ValueError,
915 "I/O operation on closed file");
Thomas Wouters89f507f2006-12-13 04:49:30 +0000916 goto cleanup;
Tim Peterse3228092002-11-09 04:21:44 +0000917
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000918 default:
919 PyErr_SetString(PyExc_IOError,
920 "seek works only while reading");
Thomas Wouters89f507f2006-12-13 04:49:30 +0000921 goto cleanup;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000922 }
923
Georg Brandl47fab922006-02-18 21:57:25 +0000924 if (where == 2) {
925 if (self->size == -1) {
926 assert(self->mode != MODE_READ_EOF);
927 for (;;) {
928 Py_BEGIN_ALLOW_THREADS
Guido van Rossumf09ca142007-06-13 00:03:05 +0000929 chunksize = BZ2_bzRead(&bzerror, self->fp,
930 buffer, buffersize);
Georg Brandl47fab922006-02-18 21:57:25 +0000931 self->pos += chunksize;
932 Py_END_ALLOW_THREADS
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000933
Georg Brandl47fab922006-02-18 21:57:25 +0000934 bytesread += chunksize;
935 if (bzerror == BZ_STREAM_END) {
936 break;
937 } else if (bzerror != BZ_OK) {
938 Util_CatchBZ2Error(bzerror);
939 goto cleanup;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000940 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000941 }
Georg Brandl47fab922006-02-18 21:57:25 +0000942 self->mode = MODE_READ_EOF;
943 self->size = self->pos;
944 bytesread = 0;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000945 }
Georg Brandl47fab922006-02-18 21:57:25 +0000946 offset = self->size + offset;
947 } else if (where == 1) {
948 offset = self->pos + offset;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000949 }
950
Guido van Rossum522a6c62007-05-22 23:13:45 +0000951 /* Before getting here, offset must be the absolute position the file
Georg Brandl47fab922006-02-18 21:57:25 +0000952 * pointer should be set to. */
953
954 if (offset >= self->pos) {
955 /* we can move forward */
956 offset -= self->pos;
957 } else {
958 /* we cannot move back, so rewind the stream */
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000959 BZ2_bzReadClose(&bzerror, self->fp);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000960 if (bzerror != BZ_OK) {
961 Util_CatchBZ2Error(bzerror);
962 goto cleanup;
963 }
Guido van Rossumf09ca142007-06-13 00:03:05 +0000964 rewind(self->rawfp);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000965 self->pos = 0;
Guido van Rossumf09ca142007-06-13 00:03:05 +0000966 self->fp = BZ2_bzReadOpen(&bzerror, self->rawfp,
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000967 0, 0, NULL, 0);
968 if (bzerror != BZ_OK) {
969 Util_CatchBZ2Error(bzerror);
970 goto cleanup;
971 }
972 self->mode = MODE_READ;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000973 }
974
Georg Brandl47fab922006-02-18 21:57:25 +0000975 if (offset <= 0 || self->mode == MODE_READ_EOF)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000976 goto exit;
977
978 /* Before getting here, offset must be set to the number of bytes
979 * to walk forward. */
980 for (;;) {
Georg Brandla8bcecc2005-09-03 07:49:53 +0000981 if (offset-bytesread > buffersize)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000982 readsize = buffersize;
983 else
Georg Brandla8bcecc2005-09-03 07:49:53 +0000984 /* offset might be wider that readsize, but the result
985 * of the subtraction is bound by buffersize (see the
986 * condition above). buffersize is 8192. */
987 readsize = (size_t)(offset-bytesread);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000988 Py_BEGIN_ALLOW_THREADS
Guido van Rossumf09ca142007-06-13 00:03:05 +0000989 chunksize = BZ2_bzRead(&bzerror, self->fp, buffer, readsize);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000990 self->pos += chunksize;
991 Py_END_ALLOW_THREADS
992 bytesread += chunksize;
993 if (bzerror == BZ_STREAM_END) {
994 self->size = self->pos;
995 self->mode = MODE_READ_EOF;
996 break;
997 } else if (bzerror != BZ_OK) {
998 Util_CatchBZ2Error(bzerror);
999 goto cleanup;
1000 }
1001 if (bytesread == offset)
1002 break;
1003 }
1004
1005exit:
1006 Py_INCREF(Py_None);
1007 ret = Py_None;
1008
1009cleanup:
1010 RELEASE_LOCK(self);
1011 return ret;
1012}
1013
1014PyDoc_STRVAR(BZ2File_tell__doc__,
1015"tell() -> int\n\
1016\n\
1017Return the current file position, an integer (may be a long integer).\n\
1018");
1019
1020static PyObject *
1021BZ2File_tell(BZ2FileObject *self, PyObject *args)
1022{
1023 PyObject *ret = NULL;
1024
1025 if (self->mode == MODE_CLOSED) {
1026 PyErr_SetString(PyExc_ValueError,
1027 "I/O operation on closed file");
1028 goto cleanup;
1029 }
1030
Georg Brandla8bcecc2005-09-03 07:49:53 +00001031#if !defined(HAVE_LARGEFILE_SUPPORT)
Christian Heimes217cfd12007-12-02 14:31:20 +00001032 ret = PyLong_FromLong(self->pos);
Georg Brandla8bcecc2005-09-03 07:49:53 +00001033#else
1034 ret = PyLong_FromLongLong(self->pos);
1035#endif
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001036
1037cleanup:
1038 return ret;
1039}
1040
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001041PyDoc_STRVAR(BZ2File_close__doc__,
1042"close() -> None or (perhaps) an integer\n\
1043\n\
1044Close the file. Sets data attribute .closed to true. A closed file\n\
1045cannot be used for further I/O operations. close() may be called more\n\
1046than once without error.\n\
1047");
1048
1049static PyObject *
1050BZ2File_close(BZ2FileObject *self)
1051{
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001052 PyObject *ret = NULL;
1053 int bzerror = BZ_OK;
1054
Guido van Rossumf09ca142007-06-13 00:03:05 +00001055 if (self->mode == MODE_CLOSED) {
1056 Py_RETURN_NONE;
1057 }
1058
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001059 ACQUIRE_LOCK(self);
1060 switch (self->mode) {
1061 case MODE_READ:
1062 case MODE_READ_EOF:
1063 BZ2_bzReadClose(&bzerror, self->fp);
1064 break;
1065 case MODE_WRITE:
1066 BZ2_bzWriteClose(&bzerror, self->fp,
1067 0, NULL, NULL);
1068 break;
1069 }
1070 self->mode = MODE_CLOSED;
Guido van Rossumf09ca142007-06-13 00:03:05 +00001071 fclose(self->rawfp);
1072 self->rawfp = NULL;
1073 if (bzerror == BZ_OK) {
1074 Py_INCREF(Py_None);
1075 ret = Py_None;
1076 }
1077 else {
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001078 Util_CatchBZ2Error(bzerror);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001079 }
1080
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001081 RELEASE_LOCK(self);
1082 return ret;
1083}
1084
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001085static PyObject *BZ2File_getiter(BZ2FileObject *self);
1086
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001087static PyMethodDef BZ2File_methods[] = {
1088 {"read", (PyCFunction)BZ2File_read, METH_VARARGS, BZ2File_read__doc__},
1089 {"readline", (PyCFunction)BZ2File_readline, METH_VARARGS, BZ2File_readline__doc__},
1090 {"readlines", (PyCFunction)BZ2File_readlines, METH_VARARGS, BZ2File_readlines__doc__},
1091 {"write", (PyCFunction)BZ2File_write, METH_VARARGS, BZ2File_write__doc__},
1092 {"writelines", (PyCFunction)BZ2File_writelines, METH_O, BZ2File_writelines__doc__},
1093 {"seek", (PyCFunction)BZ2File_seek, METH_VARARGS, BZ2File_seek__doc__},
1094 {"tell", (PyCFunction)BZ2File_tell, METH_NOARGS, BZ2File_tell__doc__},
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001095 {"close", (PyCFunction)BZ2File_close, METH_NOARGS, BZ2File_close__doc__},
1096 {NULL, NULL} /* sentinel */
1097};
1098
1099
1100/* ===================================================================== */
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001101/* Getters and setters of BZ2File. */
1102
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001103static PyObject *
1104BZ2File_get_closed(BZ2FileObject *self, void *closure)
1105{
Christian Heimes217cfd12007-12-02 14:31:20 +00001106 return PyLong_FromLong(self->mode == MODE_CLOSED);
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001107}
1108
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001109static PyGetSetDef BZ2File_getset[] = {
1110 {"closed", (getter)BZ2File_get_closed, NULL,
1111 "True if the file is closed"},
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001112 {NULL} /* Sentinel */
1113};
1114
1115
1116/* ===================================================================== */
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001117/* Slot definitions for BZ2File_Type. */
1118
1119static int
1120BZ2File_init(BZ2FileObject *self, PyObject *args, PyObject *kwargs)
1121{
Martin v. Löwis15e62742006-02-27 16:46:16 +00001122 static char *kwlist[] = {"filename", "mode", "buffering",
Guido van Rossumf09ca142007-06-13 00:03:05 +00001123 "compresslevel", 0};
1124 char *name;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001125 char *mode = "r";
1126 int buffering = -1;
1127 int compresslevel = 9;
1128 int bzerror;
1129 int mode_char = 0;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001130
1131 self->size = -1;
Tim Peterse3228092002-11-09 04:21:44 +00001132
Guido van Rossumf09ca142007-06-13 00:03:05 +00001133 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s|sii:BZ2File",
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001134 kwlist, &name, &mode, &buffering,
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001135 &compresslevel))
1136 return -1;
1137
1138 if (compresslevel < 1 || compresslevel > 9) {
1139 PyErr_SetString(PyExc_ValueError,
1140 "compresslevel must be between 1 and 9");
1141 return -1;
1142 }
1143
1144 for (;;) {
1145 int error = 0;
1146 switch (*mode) {
1147 case 'r':
1148 case 'w':
1149 if (mode_char)
1150 error = 1;
1151 mode_char = *mode;
1152 break;
1153
1154 case 'b':
1155 break;
1156
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001157 default:
1158 error = 1;
Gustavo Niemeyer49ea7be2002-11-08 14:31:49 +00001159 break;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001160 }
1161 if (error) {
Gustavo Niemeyer49ea7be2002-11-08 14:31:49 +00001162 PyErr_Format(PyExc_ValueError,
1163 "invalid mode char %c", *mode);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001164 return -1;
1165 }
1166 mode++;
Gustavo Niemeyer49ea7be2002-11-08 14:31:49 +00001167 if (*mode == '\0')
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001168 break;
1169 }
1170
Georg Brandl6b95f1d2005-06-03 19:47:00 +00001171 if (mode_char == 0) {
1172 mode_char = 'r';
1173 }
1174
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001175 mode = (mode_char == 'r') ? "rb" : "wb";
Tim Peterse3228092002-11-09 04:21:44 +00001176
Guido van Rossumf09ca142007-06-13 00:03:05 +00001177 self->rawfp = fopen(name, mode);
1178 if (self->rawfp == NULL) {
1179 PyErr_SetFromErrno(PyExc_IOError);
Gustavo Niemeyer49ea7be2002-11-08 14:31:49 +00001180 return -1;
Guido van Rossumf09ca142007-06-13 00:03:05 +00001181 }
1182 /* XXX Ignore buffering */
Gustavo Niemeyer49ea7be2002-11-08 14:31:49 +00001183
1184 /* From now on, we have stuff to dealloc, so jump to error label
1185 * instead of returning */
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001186
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001187#ifdef WITH_THREAD
1188 self->lock = PyThread_allocate_lock();
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001189 if (!self->lock) {
1190 PyErr_SetString(PyExc_MemoryError, "unable to allocate lock");
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001191 goto error;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001192 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001193#endif
1194
1195 if (mode_char == 'r')
Guido van Rossumf09ca142007-06-13 00:03:05 +00001196 self->fp = BZ2_bzReadOpen(&bzerror, self->rawfp,
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001197 0, 0, NULL, 0);
1198 else
Guido van Rossumf09ca142007-06-13 00:03:05 +00001199 self->fp = BZ2_bzWriteOpen(&bzerror, self->rawfp,
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001200 compresslevel, 0, 0);
1201
1202 if (bzerror != BZ_OK) {
1203 Util_CatchBZ2Error(bzerror);
1204 goto error;
1205 }
1206
1207 self->mode = (mode_char == 'r') ? MODE_READ : MODE_WRITE;
1208
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001209 return 0;
1210
1211error:
Guido van Rossumf09ca142007-06-13 00:03:05 +00001212 fclose(self->rawfp);
1213 self->rawfp = NULL;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001214#ifdef WITH_THREAD
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001215 if (self->lock) {
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001216 PyThread_free_lock(self->lock);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001217 self->lock = NULL;
1218 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001219#endif
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001220 return -1;
1221}
1222
1223static void
1224BZ2File_dealloc(BZ2FileObject *self)
1225{
1226 int bzerror;
1227#ifdef WITH_THREAD
1228 if (self->lock)
1229 PyThread_free_lock(self->lock);
1230#endif
1231 switch (self->mode) {
1232 case MODE_READ:
1233 case MODE_READ_EOF:
1234 BZ2_bzReadClose(&bzerror, self->fp);
1235 break;
1236 case MODE_WRITE:
1237 BZ2_bzWriteClose(&bzerror, self->fp,
1238 0, NULL, NULL);
1239 break;
1240 }
Gustavo Niemeyer49ea7be2002-11-08 14:31:49 +00001241 Util_DropReadAhead(self);
Guido van Rossumf09ca142007-06-13 00:03:05 +00001242 if (self->rawfp != NULL)
1243 fclose(self->rawfp);
Christian Heimes90aa7642007-12-19 02:45:37 +00001244 Py_TYPE(self)->tp_free((PyObject *)self);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001245}
1246
1247/* This is a hacked version of Python's fileobject.c:file_getiter(). */
1248static PyObject *
1249BZ2File_getiter(BZ2FileObject *self)
1250{
1251 if (self->mode == MODE_CLOSED) {
1252 PyErr_SetString(PyExc_ValueError,
1253 "I/O operation on closed file");
1254 return NULL;
1255 }
1256 Py_INCREF((PyObject*)self);
1257 return (PyObject *)self;
1258}
1259
1260/* This is a hacked version of Python's fileobject.c:file_iternext(). */
1261#define READAHEAD_BUFSIZE 8192
1262static PyObject *
1263BZ2File_iternext(BZ2FileObject *self)
1264{
Christian Heimes72b710a2008-05-26 13:28:38 +00001265 PyBytesObject* ret;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001266 ACQUIRE_LOCK(self);
1267 if (self->mode == MODE_CLOSED) {
Georg Brandl86b2fb92008-07-16 03:43:04 +00001268 RELEASE_LOCK(self);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001269 PyErr_SetString(PyExc_ValueError,
1270 "I/O operation on closed file");
1271 return NULL;
1272 }
1273 ret = Util_ReadAheadGetLineSkip(self, 0, READAHEAD_BUFSIZE);
1274 RELEASE_LOCK(self);
Christian Heimes72b710a2008-05-26 13:28:38 +00001275 if (ret == NULL || PyBytes_GET_SIZE(ret) == 0) {
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001276 Py_XDECREF(ret);
1277 return NULL;
1278 }
1279 return (PyObject *)ret;
1280}
1281
1282/* ===================================================================== */
1283/* BZ2File_Type definition. */
1284
1285PyDoc_VAR(BZ2File__doc__) =
1286PyDoc_STR(
1287"BZ2File(name [, mode='r', buffering=0, compresslevel=9]) -> file object\n\
1288\n\
1289Open a bz2 file. The mode can be 'r' or 'w', for reading (default) or\n\
1290writing. When opened for writing, the file will be created if it doesn't\n\
1291exist, and truncated otherwise. If the buffering argument is given, 0 means\n\
1292unbuffered, and larger numbers specify the buffer size. If compresslevel\n\
1293is given, must be a number between 1 and 9.\n\
Guido van Rossum88e860c2007-06-13 01:46:31 +00001294Data read is always returned in bytes; data written ought to be bytes.\n\
1295");
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001296
Gustavo Niemeyer49ea7be2002-11-08 14:31:49 +00001297static PyTypeObject BZ2File_Type = {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00001298 PyVarObject_HEAD_INIT(NULL, 0)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001299 "bz2.BZ2File", /*tp_name*/
1300 sizeof(BZ2FileObject), /*tp_basicsize*/
1301 0, /*tp_itemsize*/
1302 (destructor)BZ2File_dealloc, /*tp_dealloc*/
1303 0, /*tp_print*/
1304 0, /*tp_getattr*/
1305 0, /*tp_setattr*/
1306 0, /*tp_compare*/
1307 0, /*tp_repr*/
1308 0, /*tp_as_number*/
1309 0, /*tp_as_sequence*/
1310 0, /*tp_as_mapping*/
1311 0, /*tp_hash*/
1312 0, /*tp_call*/
1313 0, /*tp_str*/
Jason Tishlerfb8595d2003-01-06 12:41:26 +00001314 PyObject_GenericGetAttr,/*tp_getattro*/
1315 PyObject_GenericSetAttr,/*tp_setattro*/
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001316 0, /*tp_as_buffer*/
1317 Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE, /*tp_flags*/
1318 BZ2File__doc__, /*tp_doc*/
1319 0, /*tp_traverse*/
1320 0, /*tp_clear*/
1321 0, /*tp_richcompare*/
1322 0, /*tp_weaklistoffset*/
1323 (getiterfunc)BZ2File_getiter, /*tp_iter*/
1324 (iternextfunc)BZ2File_iternext, /*tp_iternext*/
1325 BZ2File_methods, /*tp_methods*/
Guido van Rossum79139b22007-02-09 23:20:19 +00001326 0, /*tp_members*/
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001327 BZ2File_getset, /*tp_getset*/
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001328 0, /*tp_base*/
1329 0, /*tp_dict*/
1330 0, /*tp_descr_get*/
1331 0, /*tp_descr_set*/
1332 0, /*tp_dictoffset*/
1333 (initproc)BZ2File_init, /*tp_init*/
Jason Tishlerfb8595d2003-01-06 12:41:26 +00001334 PyType_GenericAlloc, /*tp_alloc*/
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001335 PyType_GenericNew, /*tp_new*/
Neal Norwitz30d1c512007-08-19 22:48:23 +00001336 PyObject_Free, /*tp_free*/
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001337 0, /*tp_is_gc*/
1338};
1339
1340
1341/* ===================================================================== */
1342/* Methods of BZ2Comp. */
1343
1344PyDoc_STRVAR(BZ2Comp_compress__doc__,
1345"compress(data) -> string\n\
1346\n\
1347Provide more data to the compressor object. It will return chunks of\n\
1348compressed data whenever possible. When you've finished providing data\n\
1349to compress, call the flush() method to finish the compression process,\n\
1350and return what is left in the internal buffers.\n\
1351");
1352
1353static PyObject *
1354BZ2Comp_compress(BZ2CompObject *self, PyObject *args)
1355{
1356 char *data;
1357 int datasize;
1358 int bufsize = SMALLCHUNK;
Martin v. Löwisb9a0f912003-03-29 10:06:18 +00001359 PY_LONG_LONG totalout;
Gustavo Niemeyer7d7930b2002-11-05 18:41:53 +00001360 PyObject *ret = NULL;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001361 bz_stream *bzs = &self->bzs;
1362 int bzerror;
1363
Guido van Rossum98297ee2007-11-06 21:34:58 +00001364 if (!PyArg_ParseTuple(args, "y#:compress", &data, &datasize))
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001365 return NULL;
1366
Gustavo Niemeyera6e436e2004-02-14 00:02:45 +00001367 if (datasize == 0)
Christian Heimes72b710a2008-05-26 13:28:38 +00001368 return PyBytes_FromStringAndSize("", 0);
Gustavo Niemeyera6e436e2004-02-14 00:02:45 +00001369
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001370 ACQUIRE_LOCK(self);
1371 if (!self->running) {
Gustavo Niemeyer49ea7be2002-11-08 14:31:49 +00001372 PyErr_SetString(PyExc_ValueError,
1373 "this object was already flushed");
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001374 goto error;
1375 }
1376
Christian Heimes72b710a2008-05-26 13:28:38 +00001377 ret = PyBytes_FromStringAndSize(NULL, bufsize);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001378 if (!ret)
1379 goto error;
1380
1381 bzs->next_in = data;
1382 bzs->avail_in = datasize;
1383 bzs->next_out = BUF(ret);
1384 bzs->avail_out = bufsize;
1385
1386 totalout = BZS_TOTAL_OUT(bzs);
1387
1388 for (;;) {
1389 Py_BEGIN_ALLOW_THREADS
1390 bzerror = BZ2_bzCompress(bzs, BZ_RUN);
1391 Py_END_ALLOW_THREADS
1392 if (bzerror != BZ_RUN_OK) {
1393 Util_CatchBZ2Error(bzerror);
1394 goto error;
1395 }
Guido van Rossumd8faa362007-04-27 19:54:29 +00001396 if (bzs->avail_in == 0)
1397 break; /* no more input data */
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001398 if (bzs->avail_out == 0) {
1399 bufsize = Util_NewBufferSize(bufsize);
Christian Heimes72b710a2008-05-26 13:28:38 +00001400 if (_PyBytes_Resize(&ret, bufsize) < 0) {
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001401 BZ2_bzCompressEnd(bzs);
1402 goto error;
1403 }
1404 bzs->next_out = BUF(ret) + (BZS_TOTAL_OUT(bzs)
1405 - totalout);
1406 bzs->avail_out = bufsize - (bzs->next_out - BUF(ret));
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001407 }
1408 }
1409
Christian Heimes72b710a2008-05-26 13:28:38 +00001410 if (_PyBytes_Resize(&ret,
Guido van Rossum522a6c62007-05-22 23:13:45 +00001411 (Py_ssize_t)(BZS_TOTAL_OUT(bzs) - totalout)) < 0)
1412 goto error;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001413
1414 RELEASE_LOCK(self);
1415 return ret;
1416
1417error:
1418 RELEASE_LOCK(self);
1419 Py_XDECREF(ret);
1420 return NULL;
1421}
1422
1423PyDoc_STRVAR(BZ2Comp_flush__doc__,
1424"flush() -> string\n\
1425\n\
1426Finish the compression process and return what is left in internal buffers.\n\
1427You must not use the compressor object after calling this method.\n\
1428");
1429
1430static PyObject *
1431BZ2Comp_flush(BZ2CompObject *self)
1432{
1433 int bufsize = SMALLCHUNK;
Gustavo Niemeyer7d7930b2002-11-05 18:41:53 +00001434 PyObject *ret = NULL;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001435 bz_stream *bzs = &self->bzs;
Martin v. Löwisb9a0f912003-03-29 10:06:18 +00001436 PY_LONG_LONG totalout;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001437 int bzerror;
1438
1439 ACQUIRE_LOCK(self);
1440 if (!self->running) {
1441 PyErr_SetString(PyExc_ValueError, "object was already "
1442 "flushed");
1443 goto error;
1444 }
1445 self->running = 0;
1446
Christian Heimes72b710a2008-05-26 13:28:38 +00001447 ret = PyBytes_FromStringAndSize(NULL, bufsize);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001448 if (!ret)
1449 goto error;
1450
1451 bzs->next_out = BUF(ret);
1452 bzs->avail_out = bufsize;
1453
1454 totalout = BZS_TOTAL_OUT(bzs);
1455
1456 for (;;) {
1457 Py_BEGIN_ALLOW_THREADS
1458 bzerror = BZ2_bzCompress(bzs, BZ_FINISH);
1459 Py_END_ALLOW_THREADS
1460 if (bzerror == BZ_STREAM_END) {
1461 break;
1462 } else if (bzerror != BZ_FINISH_OK) {
1463 Util_CatchBZ2Error(bzerror);
1464 goto error;
1465 }
1466 if (bzs->avail_out == 0) {
1467 bufsize = Util_NewBufferSize(bufsize);
Christian Heimes72b710a2008-05-26 13:28:38 +00001468 if (_PyBytes_Resize(&ret, bufsize) < 0)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001469 goto error;
1470 bzs->next_out = BUF(ret);
1471 bzs->next_out = BUF(ret) + (BZS_TOTAL_OUT(bzs)
1472 - totalout);
1473 bzs->avail_out = bufsize - (bzs->next_out - BUF(ret));
1474 }
1475 }
1476
Guido van Rossum522a6c62007-05-22 23:13:45 +00001477 if (bzs->avail_out != 0) {
Christian Heimes72b710a2008-05-26 13:28:38 +00001478 if (_PyBytes_Resize(&ret,
Guido van Rossum522a6c62007-05-22 23:13:45 +00001479 (Py_ssize_t)(BZS_TOTAL_OUT(bzs) - totalout)) < 0)
1480 goto error;
1481 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001482
1483 RELEASE_LOCK(self);
1484 return ret;
1485
1486error:
1487 RELEASE_LOCK(self);
1488 Py_XDECREF(ret);
1489 return NULL;
1490}
1491
1492static PyMethodDef BZ2Comp_methods[] = {
Gustavo Niemeyer49ea7be2002-11-08 14:31:49 +00001493 {"compress", (PyCFunction)BZ2Comp_compress, METH_VARARGS,
1494 BZ2Comp_compress__doc__},
1495 {"flush", (PyCFunction)BZ2Comp_flush, METH_NOARGS,
1496 BZ2Comp_flush__doc__},
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001497 {NULL, NULL} /* sentinel */
1498};
1499
1500
1501/* ===================================================================== */
1502/* Slot definitions for BZ2Comp_Type. */
1503
1504static int
1505BZ2Comp_init(BZ2CompObject *self, PyObject *args, PyObject *kwargs)
1506{
1507 int compresslevel = 9;
1508 int bzerror;
Martin v. Löwis15e62742006-02-27 16:46:16 +00001509 static char *kwlist[] = {"compresslevel", 0};
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001510
1511 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|i:BZ2Compressor",
1512 kwlist, &compresslevel))
1513 return -1;
1514
1515 if (compresslevel < 1 || compresslevel > 9) {
1516 PyErr_SetString(PyExc_ValueError,
1517 "compresslevel must be between 1 and 9");
1518 goto error;
1519 }
1520
1521#ifdef WITH_THREAD
1522 self->lock = PyThread_allocate_lock();
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001523 if (!self->lock) {
1524 PyErr_SetString(PyExc_MemoryError, "unable to allocate lock");
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001525 goto error;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001526 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001527#endif
1528
1529 memset(&self->bzs, 0, sizeof(bz_stream));
1530 bzerror = BZ2_bzCompressInit(&self->bzs, compresslevel, 0, 0);
1531 if (bzerror != BZ_OK) {
1532 Util_CatchBZ2Error(bzerror);
1533 goto error;
1534 }
1535
1536 self->running = 1;
1537
1538 return 0;
1539error:
1540#ifdef WITH_THREAD
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001541 if (self->lock) {
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001542 PyThread_free_lock(self->lock);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001543 self->lock = NULL;
1544 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001545#endif
1546 return -1;
1547}
1548
1549static void
1550BZ2Comp_dealloc(BZ2CompObject *self)
1551{
1552#ifdef WITH_THREAD
1553 if (self->lock)
1554 PyThread_free_lock(self->lock);
1555#endif
1556 BZ2_bzCompressEnd(&self->bzs);
Christian Heimes90aa7642007-12-19 02:45:37 +00001557 Py_TYPE(self)->tp_free((PyObject *)self);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001558}
1559
1560
1561/* ===================================================================== */
1562/* BZ2Comp_Type definition. */
1563
1564PyDoc_STRVAR(BZ2Comp__doc__,
1565"BZ2Compressor([compresslevel=9]) -> compressor object\n\
1566\n\
1567Create a new compressor object. This object may be used to compress\n\
1568data sequentially. If you want to compress data in one shot, use the\n\
1569compress() function instead. The compresslevel parameter, if given,\n\
1570must be a number between 1 and 9.\n\
1571");
1572
Gustavo Niemeyer49ea7be2002-11-08 14:31:49 +00001573static PyTypeObject BZ2Comp_Type = {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00001574 PyVarObject_HEAD_INIT(NULL, 0)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001575 "bz2.BZ2Compressor", /*tp_name*/
1576 sizeof(BZ2CompObject), /*tp_basicsize*/
1577 0, /*tp_itemsize*/
1578 (destructor)BZ2Comp_dealloc, /*tp_dealloc*/
1579 0, /*tp_print*/
1580 0, /*tp_getattr*/
1581 0, /*tp_setattr*/
1582 0, /*tp_compare*/
1583 0, /*tp_repr*/
1584 0, /*tp_as_number*/
1585 0, /*tp_as_sequence*/
1586 0, /*tp_as_mapping*/
1587 0, /*tp_hash*/
1588 0, /*tp_call*/
1589 0, /*tp_str*/
Jason Tishlerfb8595d2003-01-06 12:41:26 +00001590 PyObject_GenericGetAttr,/*tp_getattro*/
1591 PyObject_GenericSetAttr,/*tp_setattro*/
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001592 0, /*tp_as_buffer*/
1593 Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE, /*tp_flags*/
1594 BZ2Comp__doc__, /*tp_doc*/
1595 0, /*tp_traverse*/
1596 0, /*tp_clear*/
1597 0, /*tp_richcompare*/
1598 0, /*tp_weaklistoffset*/
1599 0, /*tp_iter*/
1600 0, /*tp_iternext*/
1601 BZ2Comp_methods, /*tp_methods*/
1602 0, /*tp_members*/
1603 0, /*tp_getset*/
1604 0, /*tp_base*/
1605 0, /*tp_dict*/
1606 0, /*tp_descr_get*/
1607 0, /*tp_descr_set*/
1608 0, /*tp_dictoffset*/
1609 (initproc)BZ2Comp_init, /*tp_init*/
Jason Tishlerfb8595d2003-01-06 12:41:26 +00001610 PyType_GenericAlloc, /*tp_alloc*/
1611 PyType_GenericNew, /*tp_new*/
Neal Norwitz30d1c512007-08-19 22:48:23 +00001612 PyObject_Free, /*tp_free*/
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001613 0, /*tp_is_gc*/
1614};
1615
1616
1617/* ===================================================================== */
1618/* Members of BZ2Decomp. */
1619
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001620#undef OFF
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001621#define OFF(x) offsetof(BZ2DecompObject, x)
1622
1623static PyMemberDef BZ2Decomp_members[] = {
Guido van Rossum33d26892007-08-05 15:29:28 +00001624 {"unused_data", T_OBJECT, OFF(unused_data), READONLY},
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001625 {NULL} /* Sentinel */
1626};
1627
1628
1629/* ===================================================================== */
1630/* Methods of BZ2Decomp. */
1631
1632PyDoc_STRVAR(BZ2Decomp_decompress__doc__,
1633"decompress(data) -> string\n\
1634\n\
1635Provide more data to the decompressor object. It will return chunks\n\
1636of decompressed data whenever possible. If you try to decompress data\n\
1637after the end of stream is found, EOFError will be raised. If any data\n\
1638was found after the end of stream, it'll be ignored and saved in\n\
1639unused_data attribute.\n\
1640");
1641
1642static PyObject *
1643BZ2Decomp_decompress(BZ2DecompObject *self, PyObject *args)
1644{
1645 char *data;
1646 int datasize;
1647 int bufsize = SMALLCHUNK;
Martin v. Löwisb9a0f912003-03-29 10:06:18 +00001648 PY_LONG_LONG totalout;
Neal Norwitz18142c02002-11-05 18:17:32 +00001649 PyObject *ret = NULL;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001650 bz_stream *bzs = &self->bzs;
1651 int bzerror;
1652
Guido van Rossum98297ee2007-11-06 21:34:58 +00001653 if (!PyArg_ParseTuple(args, "y#:decompress", &data, &datasize))
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001654 return NULL;
1655
1656 ACQUIRE_LOCK(self);
1657 if (!self->running) {
1658 PyErr_SetString(PyExc_EOFError, "end of stream was "
1659 "already found");
1660 goto error;
1661 }
1662
Christian Heimes72b710a2008-05-26 13:28:38 +00001663 ret = PyBytes_FromStringAndSize(NULL, bufsize);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001664 if (!ret)
1665 goto error;
1666
1667 bzs->next_in = data;
1668 bzs->avail_in = datasize;
1669 bzs->next_out = BUF(ret);
1670 bzs->avail_out = bufsize;
1671
1672 totalout = BZS_TOTAL_OUT(bzs);
1673
1674 for (;;) {
1675 Py_BEGIN_ALLOW_THREADS
1676 bzerror = BZ2_bzDecompress(bzs);
1677 Py_END_ALLOW_THREADS
1678 if (bzerror == BZ_STREAM_END) {
1679 if (bzs->avail_in != 0) {
1680 Py_DECREF(self->unused_data);
1681 self->unused_data =
Christian Heimes72b710a2008-05-26 13:28:38 +00001682 PyBytes_FromStringAndSize(bzs->next_in,
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001683 bzs->avail_in);
1684 }
1685 self->running = 0;
1686 break;
1687 }
1688 if (bzerror != BZ_OK) {
1689 Util_CatchBZ2Error(bzerror);
1690 goto error;
1691 }
Guido van Rossumd8faa362007-04-27 19:54:29 +00001692 if (bzs->avail_in == 0)
1693 break; /* no more input data */
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001694 if (bzs->avail_out == 0) {
1695 bufsize = Util_NewBufferSize(bufsize);
Christian Heimes72b710a2008-05-26 13:28:38 +00001696 if (_PyBytes_Resize(&ret, bufsize) < 0) {
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001697 BZ2_bzDecompressEnd(bzs);
1698 goto error;
1699 }
1700 bzs->next_out = BUF(ret);
1701 bzs->next_out = BUF(ret) + (BZS_TOTAL_OUT(bzs)
1702 - totalout);
1703 bzs->avail_out = bufsize - (bzs->next_out - BUF(ret));
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001704 }
1705 }
1706
Guido van Rossum522a6c62007-05-22 23:13:45 +00001707 if (bzs->avail_out != 0) {
Christian Heimes72b710a2008-05-26 13:28:38 +00001708 if (_PyBytes_Resize(&ret,
Guido van Rossum522a6c62007-05-22 23:13:45 +00001709 (Py_ssize_t)(BZS_TOTAL_OUT(bzs) - totalout)) < 0)
1710 goto error;
1711 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001712
1713 RELEASE_LOCK(self);
1714 return ret;
1715
1716error:
1717 RELEASE_LOCK(self);
1718 Py_XDECREF(ret);
1719 return NULL;
1720}
1721
1722static PyMethodDef BZ2Decomp_methods[] = {
1723 {"decompress", (PyCFunction)BZ2Decomp_decompress, METH_VARARGS, BZ2Decomp_decompress__doc__},
1724 {NULL, NULL} /* sentinel */
1725};
1726
1727
1728/* ===================================================================== */
1729/* Slot definitions for BZ2Decomp_Type. */
1730
1731static int
1732BZ2Decomp_init(BZ2DecompObject *self, PyObject *args, PyObject *kwargs)
1733{
1734 int bzerror;
1735
1736 if (!PyArg_ParseTuple(args, ":BZ2Decompressor"))
1737 return -1;
1738
1739#ifdef WITH_THREAD
1740 self->lock = PyThread_allocate_lock();
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001741 if (!self->lock) {
1742 PyErr_SetString(PyExc_MemoryError, "unable to allocate lock");
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001743 goto error;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001744 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001745#endif
1746
Christian Heimes72b710a2008-05-26 13:28:38 +00001747 self->unused_data = PyBytes_FromStringAndSize("", 0);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001748 if (!self->unused_data)
1749 goto error;
1750
1751 memset(&self->bzs, 0, sizeof(bz_stream));
1752 bzerror = BZ2_bzDecompressInit(&self->bzs, 0, 0);
1753 if (bzerror != BZ_OK) {
1754 Util_CatchBZ2Error(bzerror);
1755 goto error;
1756 }
1757
1758 self->running = 1;
1759
1760 return 0;
1761
1762error:
1763#ifdef WITH_THREAD
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001764 if (self->lock) {
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001765 PyThread_free_lock(self->lock);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001766 self->lock = NULL;
1767 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001768#endif
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001769 Py_CLEAR(self->unused_data);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001770 return -1;
1771}
1772
1773static void
1774BZ2Decomp_dealloc(BZ2DecompObject *self)
1775{
1776#ifdef WITH_THREAD
1777 if (self->lock)
1778 PyThread_free_lock(self->lock);
1779#endif
1780 Py_XDECREF(self->unused_data);
1781 BZ2_bzDecompressEnd(&self->bzs);
Christian Heimes90aa7642007-12-19 02:45:37 +00001782 Py_TYPE(self)->tp_free((PyObject *)self);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001783}
1784
1785
1786/* ===================================================================== */
1787/* BZ2Decomp_Type definition. */
1788
1789PyDoc_STRVAR(BZ2Decomp__doc__,
1790"BZ2Decompressor() -> decompressor object\n\
1791\n\
1792Create a new decompressor object. This object may be used to decompress\n\
1793data sequentially. If you want to decompress data in one shot, use the\n\
1794decompress() function instead.\n\
1795");
1796
Gustavo Niemeyer49ea7be2002-11-08 14:31:49 +00001797static PyTypeObject BZ2Decomp_Type = {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00001798 PyVarObject_HEAD_INIT(NULL, 0)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001799 "bz2.BZ2Decompressor", /*tp_name*/
1800 sizeof(BZ2DecompObject), /*tp_basicsize*/
1801 0, /*tp_itemsize*/
1802 (destructor)BZ2Decomp_dealloc, /*tp_dealloc*/
1803 0, /*tp_print*/
1804 0, /*tp_getattr*/
1805 0, /*tp_setattr*/
1806 0, /*tp_compare*/
1807 0, /*tp_repr*/
1808 0, /*tp_as_number*/
1809 0, /*tp_as_sequence*/
1810 0, /*tp_as_mapping*/
1811 0, /*tp_hash*/
1812 0, /*tp_call*/
1813 0, /*tp_str*/
Jason Tishlerfb8595d2003-01-06 12:41:26 +00001814 PyObject_GenericGetAttr,/*tp_getattro*/
1815 PyObject_GenericSetAttr,/*tp_setattro*/
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001816 0, /*tp_as_buffer*/
1817 Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE, /*tp_flags*/
1818 BZ2Decomp__doc__, /*tp_doc*/
1819 0, /*tp_traverse*/
1820 0, /*tp_clear*/
1821 0, /*tp_richcompare*/
1822 0, /*tp_weaklistoffset*/
1823 0, /*tp_iter*/
1824 0, /*tp_iternext*/
1825 BZ2Decomp_methods, /*tp_methods*/
1826 BZ2Decomp_members, /*tp_members*/
1827 0, /*tp_getset*/
1828 0, /*tp_base*/
1829 0, /*tp_dict*/
1830 0, /*tp_descr_get*/
1831 0, /*tp_descr_set*/
1832 0, /*tp_dictoffset*/
1833 (initproc)BZ2Decomp_init, /*tp_init*/
Jason Tishlerfb8595d2003-01-06 12:41:26 +00001834 PyType_GenericAlloc, /*tp_alloc*/
1835 PyType_GenericNew, /*tp_new*/
Neal Norwitz30d1c512007-08-19 22:48:23 +00001836 PyObject_Free, /*tp_free*/
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001837 0, /*tp_is_gc*/
1838};
1839
1840
1841/* ===================================================================== */
1842/* Module functions. */
1843
1844PyDoc_STRVAR(bz2_compress__doc__,
1845"compress(data [, compresslevel=9]) -> string\n\
1846\n\
1847Compress data in one shot. If you want to compress data sequentially,\n\
1848use an instance of BZ2Compressor instead. The compresslevel parameter, if\n\
1849given, must be a number between 1 and 9.\n\
1850");
1851
1852static PyObject *
1853bz2_compress(PyObject *self, PyObject *args, PyObject *kwargs)
1854{
1855 int compresslevel=9;
1856 char *data;
1857 int datasize;
1858 int bufsize;
Gustavo Niemeyer7d7930b2002-11-05 18:41:53 +00001859 PyObject *ret = NULL;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001860 bz_stream _bzs;
1861 bz_stream *bzs = &_bzs;
1862 int bzerror;
Martin v. Löwis15e62742006-02-27 16:46:16 +00001863 static char *kwlist[] = {"data", "compresslevel", 0};
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001864
Guido van Rossum98297ee2007-11-06 21:34:58 +00001865 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "y#|i",
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001866 kwlist, &data, &datasize,
1867 &compresslevel))
1868 return NULL;
1869
1870 if (compresslevel < 1 || compresslevel > 9) {
1871 PyErr_SetString(PyExc_ValueError,
1872 "compresslevel must be between 1 and 9");
1873 return NULL;
1874 }
1875
1876 /* Conforming to bz2 manual, this is large enough to fit compressed
1877 * data in one shot. We will check it later anyway. */
1878 bufsize = datasize + (datasize/100+1) + 600;
1879
Christian Heimes72b710a2008-05-26 13:28:38 +00001880 ret = PyBytes_FromStringAndSize(NULL, bufsize);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001881 if (!ret)
1882 return NULL;
1883
1884 memset(bzs, 0, sizeof(bz_stream));
1885
1886 bzs->next_in = data;
1887 bzs->avail_in = datasize;
1888 bzs->next_out = BUF(ret);
1889 bzs->avail_out = bufsize;
1890
1891 bzerror = BZ2_bzCompressInit(bzs, compresslevel, 0, 0);
1892 if (bzerror != BZ_OK) {
1893 Util_CatchBZ2Error(bzerror);
1894 Py_DECREF(ret);
1895 return NULL;
1896 }
Tim Peterse3228092002-11-09 04:21:44 +00001897
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001898 for (;;) {
1899 Py_BEGIN_ALLOW_THREADS
1900 bzerror = BZ2_bzCompress(bzs, BZ_FINISH);
1901 Py_END_ALLOW_THREADS
1902 if (bzerror == BZ_STREAM_END) {
1903 break;
1904 } else if (bzerror != BZ_FINISH_OK) {
1905 BZ2_bzCompressEnd(bzs);
1906 Util_CatchBZ2Error(bzerror);
1907 Py_DECREF(ret);
1908 return NULL;
1909 }
1910 if (bzs->avail_out == 0) {
1911 bufsize = Util_NewBufferSize(bufsize);
Christian Heimes72b710a2008-05-26 13:28:38 +00001912 if (_PyBytes_Resize(&ret, bufsize) < 0) {
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001913 BZ2_bzCompressEnd(bzs);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001914 return NULL;
1915 }
1916 bzs->next_out = BUF(ret) + BZS_TOTAL_OUT(bzs);
1917 bzs->avail_out = bufsize - (bzs->next_out - BUF(ret));
1918 }
1919 }
1920
Guido van Rossum522a6c62007-05-22 23:13:45 +00001921 if (bzs->avail_out != 0) {
Christian Heimes72b710a2008-05-26 13:28:38 +00001922 if (_PyBytes_Resize(&ret, (Py_ssize_t)BZS_TOTAL_OUT(bzs)) < 0) {
Guido van Rossum522a6c62007-05-22 23:13:45 +00001923 ret = NULL;
1924 }
1925 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001926 BZ2_bzCompressEnd(bzs);
1927
1928 return ret;
1929}
1930
1931PyDoc_STRVAR(bz2_decompress__doc__,
1932"decompress(data) -> decompressed data\n\
1933\n\
1934Decompress data in one shot. If you want to decompress data sequentially,\n\
1935use an instance of BZ2Decompressor instead.\n\
1936");
1937
1938static PyObject *
1939bz2_decompress(PyObject *self, PyObject *args)
1940{
1941 char *data;
1942 int datasize;
1943 int bufsize = SMALLCHUNK;
1944 PyObject *ret;
1945 bz_stream _bzs;
1946 bz_stream *bzs = &_bzs;
1947 int bzerror;
1948
Guido van Rossum98297ee2007-11-06 21:34:58 +00001949 if (!PyArg_ParseTuple(args, "y#:decompress", &data, &datasize))
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001950 return NULL;
1951
1952 if (datasize == 0)
Christian Heimes72b710a2008-05-26 13:28:38 +00001953 return PyBytes_FromStringAndSize("", 0);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001954
Christian Heimes72b710a2008-05-26 13:28:38 +00001955 ret = PyBytes_FromStringAndSize(NULL, bufsize);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001956 if (!ret)
1957 return NULL;
1958
1959 memset(bzs, 0, sizeof(bz_stream));
1960
1961 bzs->next_in = data;
1962 bzs->avail_in = datasize;
1963 bzs->next_out = BUF(ret);
1964 bzs->avail_out = bufsize;
1965
1966 bzerror = BZ2_bzDecompressInit(bzs, 0, 0);
1967 if (bzerror != BZ_OK) {
1968 Util_CatchBZ2Error(bzerror);
1969 Py_DECREF(ret);
1970 return NULL;
1971 }
Tim Peterse3228092002-11-09 04:21:44 +00001972
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001973 for (;;) {
1974 Py_BEGIN_ALLOW_THREADS
1975 bzerror = BZ2_bzDecompress(bzs);
1976 Py_END_ALLOW_THREADS
1977 if (bzerror == BZ_STREAM_END) {
1978 break;
1979 } else if (bzerror != BZ_OK) {
1980 BZ2_bzDecompressEnd(bzs);
1981 Util_CatchBZ2Error(bzerror);
1982 Py_DECREF(ret);
1983 return NULL;
1984 }
Guido van Rossumd8faa362007-04-27 19:54:29 +00001985 if (bzs->avail_in == 0) {
1986 BZ2_bzDecompressEnd(bzs);
1987 PyErr_SetString(PyExc_ValueError,
1988 "couldn't find end of stream");
1989 Py_DECREF(ret);
1990 return NULL;
1991 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001992 if (bzs->avail_out == 0) {
1993 bufsize = Util_NewBufferSize(bufsize);
Christian Heimes72b710a2008-05-26 13:28:38 +00001994 if (_PyBytes_Resize(&ret, bufsize) < 0) {
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001995 BZ2_bzDecompressEnd(bzs);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001996 return NULL;
1997 }
1998 bzs->next_out = BUF(ret) + BZS_TOTAL_OUT(bzs);
1999 bzs->avail_out = bufsize - (bzs->next_out - BUF(ret));
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002000 }
2001 }
2002
Guido van Rossum522a6c62007-05-22 23:13:45 +00002003 if (bzs->avail_out != 0) {
Christian Heimes72b710a2008-05-26 13:28:38 +00002004 if (_PyBytes_Resize(&ret, (Py_ssize_t)BZS_TOTAL_OUT(bzs)) < 0) {
Guido van Rossum522a6c62007-05-22 23:13:45 +00002005 ret = NULL;
2006 }
2007 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002008 BZ2_bzDecompressEnd(bzs);
2009
2010 return ret;
2011}
2012
2013static PyMethodDef bz2_methods[] = {
2014 {"compress", (PyCFunction) bz2_compress, METH_VARARGS|METH_KEYWORDS,
2015 bz2_compress__doc__},
2016 {"decompress", (PyCFunction) bz2_decompress, METH_VARARGS,
2017 bz2_decompress__doc__},
2018 {NULL, NULL} /* sentinel */
2019};
2020
2021/* ===================================================================== */
2022/* Initialization function. */
2023
2024PyDoc_STRVAR(bz2__doc__,
2025"The python bz2 module provides a comprehensive interface for\n\
2026the bz2 compression library. It implements a complete file\n\
2027interface, one shot (de)compression functions, and types for\n\
2028sequential (de)compression.\n\
2029");
2030
Martin v. Löwis1a214512008-06-11 05:26:20 +00002031
2032static struct PyModuleDef bz2module = {
2033 PyModuleDef_HEAD_INIT,
2034 "bz2",
2035 bz2__doc__,
2036 -1,
2037 bz2_methods,
2038 NULL,
2039 NULL,
2040 NULL,
2041 NULL
2042};
2043
Neal Norwitz21d896c2003-07-01 20:15:21 +00002044PyMODINIT_FUNC
Martin v. Löwis1a214512008-06-11 05:26:20 +00002045PyInit_bz2(void)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002046{
2047 PyObject *m;
2048
Christian Heimes90aa7642007-12-19 02:45:37 +00002049 Py_TYPE(&BZ2File_Type) = &PyType_Type;
2050 Py_TYPE(&BZ2Comp_Type) = &PyType_Type;
2051 Py_TYPE(&BZ2Decomp_Type) = &PyType_Type;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002052
Martin v. Löwis1a214512008-06-11 05:26:20 +00002053 m = PyModule_Create(&bz2module);
Neal Norwitz1ac754f2006-01-19 06:09:39 +00002054 if (m == NULL)
Martin v. Löwis1a214512008-06-11 05:26:20 +00002055 return NULL;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002056
Neal Norwitz53cbdaa2007-08-23 21:42:55 +00002057 PyModule_AddObject(m, "__author__", PyUnicode_FromString(__author__));
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002058
2059 Py_INCREF(&BZ2File_Type);
2060 PyModule_AddObject(m, "BZ2File", (PyObject *)&BZ2File_Type);
2061
2062 Py_INCREF(&BZ2Comp_Type);
2063 PyModule_AddObject(m, "BZ2Compressor", (PyObject *)&BZ2Comp_Type);
2064
2065 Py_INCREF(&BZ2Decomp_Type);
2066 PyModule_AddObject(m, "BZ2Decompressor", (PyObject *)&BZ2Decomp_Type);
Martin v. Löwis1a214512008-06-11 05:26:20 +00002067 return m;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002068}