blob: 5f1d01b7751a5bbccf6a4f94bf06645c3e426b5e [file] [log] [blame]
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001/*
2
3python-bz2 - python bz2 library interface
4
5Copyright (c) 2002 Gustavo Niemeyer <niemeyer@conectiva.com>
6Copyright (c) 2002 Python Software Foundation; All Rights Reserved
7
8*/
9
Martin v. Löwise17af7b2002-11-23 09:16:19 +000010#include "Python.h"
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +000011#include <stdio.h>
12#include <bzlib.h>
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +000013#include "structmember.h"
14
15#ifdef WITH_THREAD
16#include "pythread.h"
17#endif
18
19static char __author__[] =
20"The bz2 python module was written by:\n\
21\n\
22 Gustavo Niemeyer <niemeyer@conectiva.com>\n\
23";
24
Georg Brandl33a5f2a2005-08-21 14:16:04 +000025/* Our very own off_t-like type, 64-bit if possible */
26/* copied from Objects/fileobject.c */
27#if !defined(HAVE_LARGEFILE_SUPPORT)
28typedef off_t Py_off_t;
29#elif SIZEOF_OFF_T >= 8
30typedef off_t Py_off_t;
31#elif SIZEOF_FPOS_T >= 8
32typedef fpos_t Py_off_t;
33#else
34#error "Large file support, but neither off_t nor fpos_t is large enough."
35#endif
36
Christian Heimes72b710a2008-05-26 13:28:38 +000037#define BUF(v) PyBytes_AS_STRING(v)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +000038
39#define MODE_CLOSED 0
40#define MODE_READ 1
41#define MODE_READ_EOF 2
42#define MODE_WRITE 3
43
Christian Heimes90aa7642007-12-19 02:45:37 +000044#define BZ2FileObject_Check(v) (Py_TYPE(v) == &BZ2File_Type)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +000045
Gustavo Niemeyer7628f1f2003-04-27 06:25:24 +000046
47#ifdef BZ_CONFIG_ERROR
48
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +000049#if SIZEOF_LONG >= 8
50#define BZS_TOTAL_OUT(bzs) \
51 (((long)bzs->total_out_hi32 << 32) + bzs->total_out_lo32)
52#elif SIZEOF_LONG_LONG >= 8
53#define BZS_TOTAL_OUT(bzs) \
Martin v. Löwisb9a0f912003-03-29 10:06:18 +000054 (((PY_LONG_LONG)bzs->total_out_hi32 << 32) + bzs->total_out_lo32)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +000055#else
56#define BZS_TOTAL_OUT(bzs) \
Neal Norwitz20bad742006-01-17 05:27:39 +000057 bzs->total_out_lo32
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +000058#endif
59
Gustavo Niemeyer7628f1f2003-04-27 06:25:24 +000060#else /* ! BZ_CONFIG_ERROR */
61
62#define BZ2_bzRead bzRead
63#define BZ2_bzReadOpen bzReadOpen
64#define BZ2_bzReadClose bzReadClose
65#define BZ2_bzWrite bzWrite
66#define BZ2_bzWriteOpen bzWriteOpen
67#define BZ2_bzWriteClose bzWriteClose
68#define BZ2_bzCompress bzCompress
69#define BZ2_bzCompressInit bzCompressInit
70#define BZ2_bzCompressEnd bzCompressEnd
71#define BZ2_bzDecompress bzDecompress
72#define BZ2_bzDecompressInit bzDecompressInit
73#define BZ2_bzDecompressEnd bzDecompressEnd
74
75#define BZS_TOTAL_OUT(bzs) bzs->total_out
76
77#endif /* ! BZ_CONFIG_ERROR */
78
79
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +000080#ifdef WITH_THREAD
81#define ACQUIRE_LOCK(obj) PyThread_acquire_lock(obj->lock, 1)
82#define RELEASE_LOCK(obj) PyThread_release_lock(obj->lock)
83#else
84#define ACQUIRE_LOCK(obj)
85#define RELEASE_LOCK(obj)
86#endif
87
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +000088/* Bits in f_newlinetypes */
89#define NEWLINE_UNKNOWN 0 /* No newline seen, yet */
90#define NEWLINE_CR 1 /* \r newline seen */
91#define NEWLINE_LF 2 /* \n newline seen */
92#define NEWLINE_CRLF 4 /* \r\n newline seen */
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +000093
94/* ===================================================================== */
95/* Structure definitions. */
96
97typedef struct {
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +000098 PyObject_HEAD
Guido van Rossumf09ca142007-06-13 00:03:05 +000099 FILE *rawfp;
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000100
101 char* f_buf; /* Allocated readahead buffer */
102 char* f_bufend; /* Points after last occupied position */
103 char* f_bufptr; /* Current buffer position */
104
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000105 BZFILE *fp;
106 int mode;
Georg Brandla8bcecc2005-09-03 07:49:53 +0000107 Py_off_t pos;
108 Py_off_t size;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000109#ifdef WITH_THREAD
110 PyThread_type_lock lock;
111#endif
112} BZ2FileObject;
113
114typedef struct {
115 PyObject_HEAD
116 bz_stream bzs;
117 int running;
118#ifdef WITH_THREAD
119 PyThread_type_lock lock;
120#endif
121} BZ2CompObject;
122
123typedef struct {
124 PyObject_HEAD
125 bz_stream bzs;
126 int running;
127 PyObject *unused_data;
128#ifdef WITH_THREAD
129 PyThread_type_lock lock;
130#endif
131} BZ2DecompObject;
132
133/* ===================================================================== */
134/* Utility functions. */
135
136static int
137Util_CatchBZ2Error(int bzerror)
138{
139 int ret = 0;
140 switch(bzerror) {
141 case BZ_OK:
142 case BZ_STREAM_END:
143 break;
144
Gustavo Niemeyer7628f1f2003-04-27 06:25:24 +0000145#ifdef BZ_CONFIG_ERROR
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000146 case BZ_CONFIG_ERROR:
147 PyErr_SetString(PyExc_SystemError,
148 "the bz2 library was not compiled "
149 "correctly");
150 ret = 1;
151 break;
Gustavo Niemeyer7628f1f2003-04-27 06:25:24 +0000152#endif
Tim Peterse3228092002-11-09 04:21:44 +0000153
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000154 case BZ_PARAM_ERROR:
155 PyErr_SetString(PyExc_ValueError,
156 "the bz2 library has received wrong "
157 "parameters");
158 ret = 1;
159 break;
Tim Peterse3228092002-11-09 04:21:44 +0000160
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000161 case BZ_MEM_ERROR:
162 PyErr_NoMemory();
163 ret = 1;
164 break;
165
166 case BZ_DATA_ERROR:
167 case BZ_DATA_ERROR_MAGIC:
168 PyErr_SetString(PyExc_IOError, "invalid data stream");
169 ret = 1;
170 break;
171
172 case BZ_IO_ERROR:
173 PyErr_SetString(PyExc_IOError, "unknown IO error");
174 ret = 1;
175 break;
176
177 case BZ_UNEXPECTED_EOF:
178 PyErr_SetString(PyExc_EOFError,
179 "compressed file ended before the "
180 "logical end-of-stream was detected");
181 ret = 1;
182 break;
183
184 case BZ_SEQUENCE_ERROR:
185 PyErr_SetString(PyExc_RuntimeError,
186 "wrong sequence of bz2 library "
187 "commands used");
188 ret = 1;
189 break;
190 }
191 return ret;
192}
193
194#if BUFSIZ < 8192
195#define SMALLCHUNK 8192
196#else
197#define SMALLCHUNK BUFSIZ
198#endif
199
200#if SIZEOF_INT < 4
201#define BIGCHUNK (512 * 32)
202#else
203#define BIGCHUNK (512 * 1024)
204#endif
205
206/* This is a hacked version of Python's fileobject.c:new_buffersize(). */
207static size_t
208Util_NewBufferSize(size_t currentsize)
209{
210 if (currentsize > SMALLCHUNK) {
211 /* Keep doubling until we reach BIGCHUNK;
212 then keep adding BIGCHUNK. */
213 if (currentsize <= BIGCHUNK)
214 return currentsize + currentsize;
215 else
216 return currentsize + BIGCHUNK;
217 }
218 return currentsize + SMALLCHUNK;
219}
220
221/* This is a hacked version of Python's fileobject.c:get_line(). */
222static PyObject *
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000223Util_GetLine(BZ2FileObject *f, int n)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000224{
225 char c;
226 char *buf, *end;
227 size_t total_v_size; /* total # of slots in buffer */
228 size_t used_v_size; /* # used slots in buffer */
229 size_t increment; /* amount to increment the buffer */
230 PyObject *v;
231 int bzerror;
Thomas Wouters1b7f8912007-09-19 03:06:30 +0000232 int bytes_read;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000233
234 total_v_size = n > 0 ? n : 100;
Christian Heimes72b710a2008-05-26 13:28:38 +0000235 v = PyBytes_FromStringAndSize((char *)NULL, total_v_size);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000236 if (v == NULL)
237 return NULL;
238
239 buf = BUF(v);
240 end = buf + total_v_size;
241
242 for (;;) {
243 Py_BEGIN_ALLOW_THREADS
Guido van Rossumf09ca142007-06-13 00:03:05 +0000244 do {
Thomas Wouters1b7f8912007-09-19 03:06:30 +0000245 bytes_read = BZ2_bzRead(&bzerror, f->fp, &c, 1);
Guido van Rossumf09ca142007-06-13 00:03:05 +0000246 f->pos++;
Thomas Wouters1b7f8912007-09-19 03:06:30 +0000247 if (bytes_read == 0)
248 break;
Guido van Rossumf09ca142007-06-13 00:03:05 +0000249 *buf++ = c;
250 } while (bzerror == BZ_OK && c != '\n' && buf != end);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000251 Py_END_ALLOW_THREADS
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000252 if (bzerror == BZ_STREAM_END) {
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000253 f->size = f->pos;
254 f->mode = MODE_READ_EOF;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000255 break;
256 } else if (bzerror != BZ_OK) {
257 Util_CatchBZ2Error(bzerror);
258 Py_DECREF(v);
259 return NULL;
260 }
261 if (c == '\n')
262 break;
263 /* Must be because buf == end */
264 if (n > 0)
265 break;
266 used_v_size = total_v_size;
267 increment = total_v_size >> 2; /* mild exponential growth */
268 total_v_size += increment;
269 if (total_v_size > INT_MAX) {
270 PyErr_SetString(PyExc_OverflowError,
271 "line is longer than a Python string can hold");
272 Py_DECREF(v);
273 return NULL;
274 }
Christian Heimes72b710a2008-05-26 13:28:38 +0000275 if (_PyBytes_Resize(&v, total_v_size) < 0) {
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000276 return NULL;
Guido van Rossum522a6c62007-05-22 23:13:45 +0000277 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000278 buf = BUF(v) + used_v_size;
279 end = BUF(v) + total_v_size;
280 }
281
282 used_v_size = buf - BUF(v);
Guido van Rossum522a6c62007-05-22 23:13:45 +0000283 if (used_v_size != total_v_size) {
Christian Heimes72b710a2008-05-26 13:28:38 +0000284 if (_PyBytes_Resize(&v, used_v_size) < 0) {
Guido van Rossum522a6c62007-05-22 23:13:45 +0000285 v = NULL;
286 }
287 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000288 return v;
289}
290
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000291/* This is a hacked version of Python's fileobject.c:drop_readahead(). */
292static void
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000293Util_DropReadAhead(BZ2FileObject *f)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000294{
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000295 if (f->f_buf != NULL) {
296 PyMem_Free(f->f_buf);
297 f->f_buf = NULL;
298 }
299}
300
301/* This is a hacked version of Python's fileobject.c:readahead(). */
302static int
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000303Util_ReadAhead(BZ2FileObject *f, int bufsize)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000304{
305 int chunksize;
306 int bzerror;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000307
308 if (f->f_buf != NULL) {
Tim Peterse3228092002-11-09 04:21:44 +0000309 if((f->f_bufend - f->f_bufptr) >= 1)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000310 return 0;
311 else
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000312 Util_DropReadAhead(f);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000313 }
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000314 if (f->mode == MODE_READ_EOF) {
Georg Brandl33a5f2a2005-08-21 14:16:04 +0000315 f->f_bufptr = f->f_buf;
316 f->f_bufend = f->f_buf;
317 return 0;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000318 }
319 if ((f->f_buf = PyMem_Malloc(bufsize)) == NULL) {
Georg Brandl6aa2d1f2008-08-12 08:35:52 +0000320 PyErr_NoMemory();
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000321 return -1;
322 }
323 Py_BEGIN_ALLOW_THREADS
Guido van Rossumf09ca142007-06-13 00:03:05 +0000324 chunksize = BZ2_bzRead(&bzerror, f->fp, f->f_buf, bufsize);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000325 Py_END_ALLOW_THREADS
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000326 f->pos += chunksize;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000327 if (bzerror == BZ_STREAM_END) {
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000328 f->size = f->pos;
329 f->mode = MODE_READ_EOF;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000330 } else if (bzerror != BZ_OK) {
331 Util_CatchBZ2Error(bzerror);
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000332 Util_DropReadAhead(f);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000333 return -1;
334 }
335 f->f_bufptr = f->f_buf;
336 f->f_bufend = f->f_buf + chunksize;
337 return 0;
338}
339
340/* This is a hacked version of Python's
341 * fileobject.c:readahead_get_line_skip(). */
Christian Heimes72b710a2008-05-26 13:28:38 +0000342static PyBytesObject *
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000343Util_ReadAheadGetLineSkip(BZ2FileObject *f, int skip, int bufsize)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000344{
Christian Heimes72b710a2008-05-26 13:28:38 +0000345 PyBytesObject* s;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000346 char *bufptr;
347 char *buf;
348 int len;
349
350 if (f->f_buf == NULL)
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000351 if (Util_ReadAhead(f, bufsize) < 0)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000352 return NULL;
353
354 len = f->f_bufend - f->f_bufptr;
Tim Peterse3228092002-11-09 04:21:44 +0000355 if (len == 0)
Christian Heimes72b710a2008-05-26 13:28:38 +0000356 return (PyBytesObject *)
357 PyBytes_FromStringAndSize(NULL, skip);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000358 bufptr = memchr(f->f_bufptr, '\n', len);
359 if (bufptr != NULL) {
360 bufptr++; /* Count the '\n' */
361 len = bufptr - f->f_bufptr;
Christian Heimes72b710a2008-05-26 13:28:38 +0000362 s = (PyBytesObject *)
363 PyBytes_FromStringAndSize(NULL, skip+len);
Tim Peterse3228092002-11-09 04:21:44 +0000364 if (s == NULL)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000365 return NULL;
Christian Heimes72b710a2008-05-26 13:28:38 +0000366 memcpy(PyBytes_AS_STRING(s)+skip, f->f_bufptr, len);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000367 f->f_bufptr = bufptr;
368 if (bufptr == f->f_bufend)
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000369 Util_DropReadAhead(f);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000370 } else {
371 bufptr = f->f_bufptr;
372 buf = f->f_buf;
373 f->f_buf = NULL; /* Force new readahead buffer */
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000374 s = Util_ReadAheadGetLineSkip(f, skip+len,
375 bufsize + (bufsize>>2));
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000376 if (s == NULL) {
377 PyMem_Free(buf);
378 return NULL;
379 }
Christian Heimes72b710a2008-05-26 13:28:38 +0000380 memcpy(PyBytes_AS_STRING(s)+skip, bufptr, len);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000381 PyMem_Free(buf);
382 }
383 return s;
384}
385
386/* ===================================================================== */
387/* Methods of BZ2File. */
388
389PyDoc_STRVAR(BZ2File_read__doc__,
390"read([size]) -> string\n\
391\n\
392Read at most size uncompressed bytes, returned as a string. If the size\n\
393argument is negative or omitted, read until EOF is reached.\n\
394");
395
396/* This is a hacked version of Python's fileobject.c:file_read(). */
397static PyObject *
398BZ2File_read(BZ2FileObject *self, PyObject *args)
399{
400 long bytesrequested = -1;
401 size_t bytesread, buffersize, chunksize;
402 int bzerror;
403 PyObject *ret = NULL;
Tim Peterse3228092002-11-09 04:21:44 +0000404
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000405 if (!PyArg_ParseTuple(args, "|l:read", &bytesrequested))
406 return NULL;
Tim Peterse3228092002-11-09 04:21:44 +0000407
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000408 ACQUIRE_LOCK(self);
409 switch (self->mode) {
410 case MODE_READ:
411 break;
412 case MODE_READ_EOF:
Christian Heimes72b710a2008-05-26 13:28:38 +0000413 ret = PyBytes_FromStringAndSize("", 0);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000414 goto cleanup;
415 case MODE_CLOSED:
416 PyErr_SetString(PyExc_ValueError,
417 "I/O operation on closed file");
418 goto cleanup;
419 default:
420 PyErr_SetString(PyExc_IOError,
421 "file is not ready for reading");
422 goto cleanup;
423 }
424
425 if (bytesrequested < 0)
426 buffersize = Util_NewBufferSize((size_t)0);
427 else
428 buffersize = bytesrequested;
429 if (buffersize > INT_MAX) {
430 PyErr_SetString(PyExc_OverflowError,
Gustavo Niemeyer49ea7be2002-11-08 14:31:49 +0000431 "requested number of bytes is "
432 "more than a Python string can hold");
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000433 goto cleanup;
434 }
Christian Heimes72b710a2008-05-26 13:28:38 +0000435 ret = PyBytes_FromStringAndSize((char *)NULL, buffersize);
Guido van Rossum75c26bc2007-08-07 23:29:20 +0000436 if (ret == NULL || buffersize == 0)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000437 goto cleanup;
438 bytesread = 0;
439
440 for (;;) {
441 Py_BEGIN_ALLOW_THREADS
Guido van Rossumf09ca142007-06-13 00:03:05 +0000442 chunksize = BZ2_bzRead(&bzerror, self->fp,
443 BUF(ret)+bytesread,
444 buffersize-bytesread);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000445 self->pos += chunksize;
446 Py_END_ALLOW_THREADS
447 bytesread += chunksize;
448 if (bzerror == BZ_STREAM_END) {
449 self->size = self->pos;
450 self->mode = MODE_READ_EOF;
451 break;
452 } else if (bzerror != BZ_OK) {
453 Util_CatchBZ2Error(bzerror);
454 Py_DECREF(ret);
455 ret = NULL;
456 goto cleanup;
457 }
458 if (bytesrequested < 0) {
459 buffersize = Util_NewBufferSize(buffersize);
Christian Heimes72b710a2008-05-26 13:28:38 +0000460 if (_PyBytes_Resize(&ret, buffersize) < 0) {
Guido van Rossum522a6c62007-05-22 23:13:45 +0000461 ret = NULL;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000462 goto cleanup;
Guido van Rossum522a6c62007-05-22 23:13:45 +0000463 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000464 } else {
465 break;
466 }
467 }
Guido van Rossum522a6c62007-05-22 23:13:45 +0000468 if (bytesread != buffersize) {
Christian Heimes72b710a2008-05-26 13:28:38 +0000469 if (_PyBytes_Resize(&ret, bytesread) < 0) {
Guido van Rossum522a6c62007-05-22 23:13:45 +0000470 ret = NULL;
471 }
472 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000473
474cleanup:
475 RELEASE_LOCK(self);
476 return ret;
477}
478
479PyDoc_STRVAR(BZ2File_readline__doc__,
480"readline([size]) -> string\n\
481\n\
482Return the next line from the file, as a string, retaining newline.\n\
483A non-negative size argument will limit the maximum number of bytes to\n\
484return (an incomplete line may be returned then). Return an empty\n\
485string at EOF.\n\
486");
487
488static PyObject *
489BZ2File_readline(BZ2FileObject *self, PyObject *args)
490{
491 PyObject *ret = NULL;
492 int sizehint = -1;
493
494 if (!PyArg_ParseTuple(args, "|i:readline", &sizehint))
495 return NULL;
496
497 ACQUIRE_LOCK(self);
498 switch (self->mode) {
499 case MODE_READ:
500 break;
501 case MODE_READ_EOF:
Christian Heimes72b710a2008-05-26 13:28:38 +0000502 ret = PyBytes_FromStringAndSize("", 0);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000503 goto cleanup;
504 case MODE_CLOSED:
505 PyErr_SetString(PyExc_ValueError,
506 "I/O operation on closed file");
507 goto cleanup;
508 default:
509 PyErr_SetString(PyExc_IOError,
510 "file is not ready for reading");
511 goto cleanup;
512 }
513
514 if (sizehint == 0)
Christian Heimes72b710a2008-05-26 13:28:38 +0000515 ret = PyBytes_FromStringAndSize("", 0);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000516 else
517 ret = Util_GetLine(self, (sizehint < 0) ? 0 : sizehint);
518
519cleanup:
520 RELEASE_LOCK(self);
521 return ret;
522}
523
524PyDoc_STRVAR(BZ2File_readlines__doc__,
525"readlines([size]) -> list\n\
526\n\
527Call readline() repeatedly and return a list of lines read.\n\
528The optional size argument, if given, is an approximate bound on the\n\
529total number of bytes in the lines returned.\n\
530");
531
532/* This is a hacked version of Python's fileobject.c:file_readlines(). */
533static PyObject *
534BZ2File_readlines(BZ2FileObject *self, PyObject *args)
535{
536 long sizehint = 0;
537 PyObject *list = NULL;
538 PyObject *line;
539 char small_buffer[SMALLCHUNK];
540 char *buffer = small_buffer;
541 size_t buffersize = SMALLCHUNK;
542 PyObject *big_buffer = NULL;
543 size_t nfilled = 0;
544 size_t nread;
545 size_t totalread = 0;
546 char *p, *q, *end;
547 int err;
548 int shortread = 0;
549 int bzerror;
550
551 if (!PyArg_ParseTuple(args, "|l:readlines", &sizehint))
552 return NULL;
553
554 ACQUIRE_LOCK(self);
555 switch (self->mode) {
556 case MODE_READ:
557 break;
558 case MODE_READ_EOF:
559 list = PyList_New(0);
560 goto cleanup;
561 case MODE_CLOSED:
562 PyErr_SetString(PyExc_ValueError,
563 "I/O operation on closed file");
564 goto cleanup;
565 default:
566 PyErr_SetString(PyExc_IOError,
567 "file is not ready for reading");
568 goto cleanup;
569 }
570
571 if ((list = PyList_New(0)) == NULL)
572 goto cleanup;
573
574 for (;;) {
575 Py_BEGIN_ALLOW_THREADS
Guido van Rossumf09ca142007-06-13 00:03:05 +0000576 nread = BZ2_bzRead(&bzerror, self->fp,
577 buffer+nfilled, buffersize-nfilled);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000578 self->pos += nread;
579 Py_END_ALLOW_THREADS
580 if (bzerror == BZ_STREAM_END) {
581 self->size = self->pos;
582 self->mode = MODE_READ_EOF;
583 if (nread == 0) {
584 sizehint = 0;
585 break;
586 }
587 shortread = 1;
588 } else if (bzerror != BZ_OK) {
589 Util_CatchBZ2Error(bzerror);
590 error:
591 Py_DECREF(list);
592 list = NULL;
593 goto cleanup;
594 }
595 totalread += nread;
596 p = memchr(buffer+nfilled, '\n', nread);
Georg Brandl33a5f2a2005-08-21 14:16:04 +0000597 if (!shortread && p == NULL) {
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000598 /* Need a larger buffer to fit this line */
599 nfilled += nread;
600 buffersize *= 2;
601 if (buffersize > INT_MAX) {
602 PyErr_SetString(PyExc_OverflowError,
Georg Brandl33a5f2a2005-08-21 14:16:04 +0000603 "line is longer than a Python string can hold");
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000604 goto error;
605 }
606 if (big_buffer == NULL) {
607 /* Create the big buffer */
Christian Heimes72b710a2008-05-26 13:28:38 +0000608 big_buffer = PyBytes_FromStringAndSize(
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000609 NULL, buffersize);
610 if (big_buffer == NULL)
611 goto error;
Christian Heimes72b710a2008-05-26 13:28:38 +0000612 buffer = PyBytes_AS_STRING(big_buffer);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000613 memcpy(buffer, small_buffer, nfilled);
614 }
615 else {
616 /* Grow the big buffer */
Christian Heimes72b710a2008-05-26 13:28:38 +0000617 if (_PyBytes_Resize(&big_buffer, buffersize) < 0){
Guido van Rossum522a6c62007-05-22 23:13:45 +0000618 big_buffer = NULL;
619 goto error;
620 }
Christian Heimes72b710a2008-05-26 13:28:38 +0000621 buffer = PyBytes_AS_STRING(big_buffer);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000622 }
Guido van Rossum522a6c62007-05-22 23:13:45 +0000623 continue;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000624 }
625 end = buffer+nfilled+nread;
626 q = buffer;
Georg Brandl33a5f2a2005-08-21 14:16:04 +0000627 while (p != NULL) {
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000628 /* Process complete lines */
629 p++;
Christian Heimes72b710a2008-05-26 13:28:38 +0000630 line = PyBytes_FromStringAndSize(q, p-q);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000631 if (line == NULL)
632 goto error;
633 err = PyList_Append(list, line);
634 Py_DECREF(line);
635 if (err != 0)
636 goto error;
637 q = p;
638 p = memchr(q, '\n', end-q);
Georg Brandl33a5f2a2005-08-21 14:16:04 +0000639 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000640 /* Move the remaining incomplete line to the start */
641 nfilled = end-q;
642 memmove(buffer, q, nfilled);
643 if (sizehint > 0)
644 if (totalread >= (size_t)sizehint)
645 break;
646 if (shortread) {
647 sizehint = 0;
648 break;
649 }
650 }
651 if (nfilled != 0) {
652 /* Partial last line */
Christian Heimes72b710a2008-05-26 13:28:38 +0000653 line = PyBytes_FromStringAndSize(buffer, nfilled);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000654 if (line == NULL)
655 goto error;
656 if (sizehint > 0) {
657 /* Need to complete the last line */
658 PyObject *rest = Util_GetLine(self, 0);
659 if (rest == NULL) {
660 Py_DECREF(line);
661 goto error;
662 }
Christian Heimes72b710a2008-05-26 13:28:38 +0000663 PyBytes_Concat(&line, rest);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000664 Py_DECREF(rest);
665 if (line == NULL)
666 goto error;
667 }
668 err = PyList_Append(list, line);
669 Py_DECREF(line);
670 if (err != 0)
671 goto error;
672 }
673
674 cleanup:
675 RELEASE_LOCK(self);
676 if (big_buffer) {
677 Py_DECREF(big_buffer);
678 }
679 return list;
680}
681
682PyDoc_STRVAR(BZ2File_write__doc__,
683"write(data) -> None\n\
684\n\
685Write the 'data' string to file. Note that due to buffering, close() may\n\
686be needed before the file on disk reflects the data written.\n\
687");
688
689/* This is a hacked version of Python's fileobject.c:file_write(). */
690static PyObject *
691BZ2File_write(BZ2FileObject *self, PyObject *args)
692{
693 PyObject *ret = NULL;
Martin v. Löwis423be952008-08-13 15:53:07 +0000694 Py_buffer pbuf;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000695 char *buf;
696 int len;
697 int bzerror;
698
Martin v. Löwis423be952008-08-13 15:53:07 +0000699 if (!PyArg_ParseTuple(args, "y*:write", &pbuf))
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000700 return NULL;
Martin v. Löwis423be952008-08-13 15:53:07 +0000701 buf = pbuf.buf;
702 len = pbuf.len;
Tim Peterse3228092002-11-09 04:21:44 +0000703
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000704 ACQUIRE_LOCK(self);
705 switch (self->mode) {
706 case MODE_WRITE:
707 break;
Tim Peterse3228092002-11-09 04:21:44 +0000708
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000709 case MODE_CLOSED:
710 PyErr_SetString(PyExc_ValueError,
711 "I/O operation on closed file");
Thomas Wouters00ee7ba2006-08-21 19:07:27 +0000712 goto cleanup;
Tim Peterse3228092002-11-09 04:21:44 +0000713
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000714 default:
715 PyErr_SetString(PyExc_IOError,
716 "file is not ready for writing");
Thomas Wouters00ee7ba2006-08-21 19:07:27 +0000717 goto cleanup;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000718 }
719
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000720 Py_BEGIN_ALLOW_THREADS
721 BZ2_bzWrite (&bzerror, self->fp, buf, len);
722 self->pos += len;
723 Py_END_ALLOW_THREADS
Tim Peterse3228092002-11-09 04:21:44 +0000724
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000725 if (bzerror != BZ_OK) {
726 Util_CatchBZ2Error(bzerror);
727 goto cleanup;
728 }
Tim Peterse3228092002-11-09 04:21:44 +0000729
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000730 Py_INCREF(Py_None);
731 ret = Py_None;
732
733cleanup:
Martin v. Löwis423be952008-08-13 15:53:07 +0000734 PyBuffer_Release(&pbuf);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000735 RELEASE_LOCK(self);
736 return ret;
737}
738
739PyDoc_STRVAR(BZ2File_writelines__doc__,
740"writelines(sequence_of_strings) -> None\n\
741\n\
742Write the sequence of strings to the file. Note that newlines are not\n\
743added. The sequence can be any iterable object producing strings. This is\n\
744equivalent to calling write() for each string.\n\
745");
746
747/* This is a hacked version of Python's fileobject.c:file_writelines(). */
748static PyObject *
749BZ2File_writelines(BZ2FileObject *self, PyObject *seq)
750{
751#define CHUNKSIZE 1000
752 PyObject *list = NULL;
753 PyObject *iter = NULL;
754 PyObject *ret = NULL;
755 PyObject *line;
756 int i, j, index, len, islist;
757 int bzerror;
758
759 ACQUIRE_LOCK(self);
Thomas Wouters00ee7ba2006-08-21 19:07:27 +0000760 switch (self->mode) {
761 case MODE_WRITE:
762 break;
763
764 case MODE_CLOSED:
765 PyErr_SetString(PyExc_ValueError,
766 "I/O operation on closed file");
767 goto error;
768
769 default:
770 PyErr_SetString(PyExc_IOError,
771 "file is not ready for writing");
772 goto error;
773 }
774
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000775 islist = PyList_Check(seq);
776 if (!islist) {
777 iter = PyObject_GetIter(seq);
778 if (iter == NULL) {
779 PyErr_SetString(PyExc_TypeError,
780 "writelines() requires an iterable argument");
781 goto error;
782 }
783 list = PyList_New(CHUNKSIZE);
784 if (list == NULL)
785 goto error;
786 }
787
788 /* Strategy: slurp CHUNKSIZE lines into a private list,
789 checking that they are all strings, then write that list
790 without holding the interpreter lock, then come back for more. */
791 for (index = 0; ; index += CHUNKSIZE) {
792 if (islist) {
793 Py_XDECREF(list);
794 list = PyList_GetSlice(seq, index, index+CHUNKSIZE);
795 if (list == NULL)
796 goto error;
797 j = PyList_GET_SIZE(list);
798 }
799 else {
800 for (j = 0; j < CHUNKSIZE; j++) {
801 line = PyIter_Next(iter);
802 if (line == NULL) {
803 if (PyErr_Occurred())
804 goto error;
805 break;
806 }
807 PyList_SetItem(list, j, line);
808 }
809 }
810 if (j == 0)
811 break;
812
Guido van Rossum522a6c62007-05-22 23:13:45 +0000813 /* Check that all entries are indeed byte strings. If not,
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000814 apply the same rules as for file.write() and
815 convert the rets to strings. This is slow, but
816 seems to be the only way since all conversion APIs
817 could potentially execute Python code. */
818 for (i = 0; i < j; i++) {
819 PyObject *v = PyList_GET_ITEM(list, i);
Christian Heimes72b710a2008-05-26 13:28:38 +0000820 if (!PyBytes_Check(v)) {
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000821 const char *buffer;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000822 Py_ssize_t len;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000823 if (PyObject_AsCharBuffer(v, &buffer, &len)) {
824 PyErr_SetString(PyExc_TypeError,
825 "writelines() "
826 "argument must be "
827 "a sequence of "
Guido van Rossum522a6c62007-05-22 23:13:45 +0000828 "bytes objects");
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000829 goto error;
830 }
Christian Heimes72b710a2008-05-26 13:28:38 +0000831 line = PyBytes_FromStringAndSize(buffer,
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000832 len);
833 if (line == NULL)
834 goto error;
835 Py_DECREF(v);
836 PyList_SET_ITEM(list, i, line);
837 }
838 }
839
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000840 /* Since we are releasing the global lock, the
841 following code may *not* execute Python code. */
842 Py_BEGIN_ALLOW_THREADS
843 for (i = 0; i < j; i++) {
844 line = PyList_GET_ITEM(list, i);
Christian Heimes72b710a2008-05-26 13:28:38 +0000845 len = PyBytes_GET_SIZE(line);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000846 BZ2_bzWrite (&bzerror, self->fp,
Christian Heimes72b710a2008-05-26 13:28:38 +0000847 PyBytes_AS_STRING(line), len);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000848 if (bzerror != BZ_OK) {
849 Py_BLOCK_THREADS
850 Util_CatchBZ2Error(bzerror);
851 goto error;
852 }
853 }
854 Py_END_ALLOW_THREADS
855
856 if (j < CHUNKSIZE)
857 break;
858 }
859
860 Py_INCREF(Py_None);
861 ret = Py_None;
862
863 error:
864 RELEASE_LOCK(self);
865 Py_XDECREF(list);
866 Py_XDECREF(iter);
867 return ret;
868#undef CHUNKSIZE
869}
870
871PyDoc_STRVAR(BZ2File_seek__doc__,
872"seek(offset [, whence]) -> None\n\
873\n\
874Move to new file position. Argument offset is a byte count. Optional\n\
875argument whence defaults to 0 (offset from start of file, offset\n\
876should be >= 0); other values are 1 (move relative to current position,\n\
877positive or negative), and 2 (move relative to end of file, usually\n\
878negative, although many platforms allow seeking beyond the end of a file).\n\
879\n\
880Note that seeking of bz2 files is emulated, and depending on the parameters\n\
881the operation may be extremely slow.\n\
882");
883
884static PyObject *
885BZ2File_seek(BZ2FileObject *self, PyObject *args)
886{
887 int where = 0;
Georg Brandl33a5f2a2005-08-21 14:16:04 +0000888 PyObject *offobj;
889 Py_off_t offset;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000890 char small_buffer[SMALLCHUNK];
891 char *buffer = small_buffer;
892 size_t buffersize = SMALLCHUNK;
Thomas Wouters902d6eb2007-01-09 23:18:33 +0000893 Py_off_t bytesread = 0;
Georg Brandla8bcecc2005-09-03 07:49:53 +0000894 size_t readsize;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000895 int chunksize;
896 int bzerror;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000897 PyObject *ret = NULL;
Tim Peterse3228092002-11-09 04:21:44 +0000898
Georg Brandl33a5f2a2005-08-21 14:16:04 +0000899 if (!PyArg_ParseTuple(args, "O|i:seek", &offobj, &where))
900 return NULL;
901#if !defined(HAVE_LARGEFILE_SUPPORT)
Christian Heimes217cfd12007-12-02 14:31:20 +0000902 offset = PyLong_AsLong(offobj);
Georg Brandl33a5f2a2005-08-21 14:16:04 +0000903#else
904 offset = PyLong_Check(offobj) ?
Christian Heimes217cfd12007-12-02 14:31:20 +0000905 PyLong_AsLongLong(offobj) : PyLong_AsLong(offobj);
Georg Brandl33a5f2a2005-08-21 14:16:04 +0000906#endif
907 if (PyErr_Occurred())
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000908 return NULL;
909
910 ACQUIRE_LOCK(self);
911 Util_DropReadAhead(self);
912 switch (self->mode) {
913 case MODE_READ:
914 case MODE_READ_EOF:
915 break;
Tim Peterse3228092002-11-09 04:21:44 +0000916
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000917 case MODE_CLOSED:
918 PyErr_SetString(PyExc_ValueError,
919 "I/O operation on closed file");
Thomas Wouters89f507f2006-12-13 04:49:30 +0000920 goto cleanup;
Tim Peterse3228092002-11-09 04:21:44 +0000921
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000922 default:
923 PyErr_SetString(PyExc_IOError,
924 "seek works only while reading");
Thomas Wouters89f507f2006-12-13 04:49:30 +0000925 goto cleanup;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000926 }
927
Georg Brandl47fab922006-02-18 21:57:25 +0000928 if (where == 2) {
929 if (self->size == -1) {
930 assert(self->mode != MODE_READ_EOF);
931 for (;;) {
932 Py_BEGIN_ALLOW_THREADS
Guido van Rossumf09ca142007-06-13 00:03:05 +0000933 chunksize = BZ2_bzRead(&bzerror, self->fp,
934 buffer, buffersize);
Georg Brandl47fab922006-02-18 21:57:25 +0000935 self->pos += chunksize;
936 Py_END_ALLOW_THREADS
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000937
Georg Brandl47fab922006-02-18 21:57:25 +0000938 bytesread += chunksize;
939 if (bzerror == BZ_STREAM_END) {
940 break;
941 } else if (bzerror != BZ_OK) {
942 Util_CatchBZ2Error(bzerror);
943 goto cleanup;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000944 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000945 }
Georg Brandl47fab922006-02-18 21:57:25 +0000946 self->mode = MODE_READ_EOF;
947 self->size = self->pos;
948 bytesread = 0;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000949 }
Georg Brandl47fab922006-02-18 21:57:25 +0000950 offset = self->size + offset;
951 } else if (where == 1) {
952 offset = self->pos + offset;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000953 }
954
Guido van Rossum522a6c62007-05-22 23:13:45 +0000955 /* Before getting here, offset must be the absolute position the file
Georg Brandl47fab922006-02-18 21:57:25 +0000956 * pointer should be set to. */
957
958 if (offset >= self->pos) {
959 /* we can move forward */
960 offset -= self->pos;
961 } else {
962 /* we cannot move back, so rewind the stream */
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000963 BZ2_bzReadClose(&bzerror, self->fp);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000964 if (bzerror != BZ_OK) {
965 Util_CatchBZ2Error(bzerror);
966 goto cleanup;
967 }
Guido van Rossumf09ca142007-06-13 00:03:05 +0000968 rewind(self->rawfp);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000969 self->pos = 0;
Guido van Rossumf09ca142007-06-13 00:03:05 +0000970 self->fp = BZ2_bzReadOpen(&bzerror, self->rawfp,
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000971 0, 0, NULL, 0);
972 if (bzerror != BZ_OK) {
973 Util_CatchBZ2Error(bzerror);
974 goto cleanup;
975 }
976 self->mode = MODE_READ;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000977 }
978
Georg Brandl47fab922006-02-18 21:57:25 +0000979 if (offset <= 0 || self->mode == MODE_READ_EOF)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000980 goto exit;
981
982 /* Before getting here, offset must be set to the number of bytes
983 * to walk forward. */
984 for (;;) {
Georg Brandla8bcecc2005-09-03 07:49:53 +0000985 if (offset-bytesread > buffersize)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000986 readsize = buffersize;
987 else
Georg Brandla8bcecc2005-09-03 07:49:53 +0000988 /* offset might be wider that readsize, but the result
989 * of the subtraction is bound by buffersize (see the
990 * condition above). buffersize is 8192. */
991 readsize = (size_t)(offset-bytesread);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000992 Py_BEGIN_ALLOW_THREADS
Guido van Rossumf09ca142007-06-13 00:03:05 +0000993 chunksize = BZ2_bzRead(&bzerror, self->fp, buffer, readsize);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000994 self->pos += chunksize;
995 Py_END_ALLOW_THREADS
996 bytesread += chunksize;
997 if (bzerror == BZ_STREAM_END) {
998 self->size = self->pos;
999 self->mode = MODE_READ_EOF;
1000 break;
1001 } else if (bzerror != BZ_OK) {
1002 Util_CatchBZ2Error(bzerror);
1003 goto cleanup;
1004 }
1005 if (bytesread == offset)
1006 break;
1007 }
1008
1009exit:
1010 Py_INCREF(Py_None);
1011 ret = Py_None;
1012
1013cleanup:
1014 RELEASE_LOCK(self);
1015 return ret;
1016}
1017
1018PyDoc_STRVAR(BZ2File_tell__doc__,
1019"tell() -> int\n\
1020\n\
1021Return the current file position, an integer (may be a long integer).\n\
1022");
1023
1024static PyObject *
1025BZ2File_tell(BZ2FileObject *self, PyObject *args)
1026{
1027 PyObject *ret = NULL;
1028
1029 if (self->mode == MODE_CLOSED) {
1030 PyErr_SetString(PyExc_ValueError,
1031 "I/O operation on closed file");
1032 goto cleanup;
1033 }
1034
Georg Brandla8bcecc2005-09-03 07:49:53 +00001035#if !defined(HAVE_LARGEFILE_SUPPORT)
Christian Heimes217cfd12007-12-02 14:31:20 +00001036 ret = PyLong_FromLong(self->pos);
Georg Brandla8bcecc2005-09-03 07:49:53 +00001037#else
1038 ret = PyLong_FromLongLong(self->pos);
1039#endif
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001040
1041cleanup:
1042 return ret;
1043}
1044
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001045PyDoc_STRVAR(BZ2File_close__doc__,
1046"close() -> None or (perhaps) an integer\n\
1047\n\
1048Close the file. Sets data attribute .closed to true. A closed file\n\
1049cannot be used for further I/O operations. close() may be called more\n\
1050than once without error.\n\
1051");
1052
1053static PyObject *
1054BZ2File_close(BZ2FileObject *self)
1055{
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001056 PyObject *ret = NULL;
1057 int bzerror = BZ_OK;
1058
Guido van Rossumf09ca142007-06-13 00:03:05 +00001059 if (self->mode == MODE_CLOSED) {
1060 Py_RETURN_NONE;
1061 }
1062
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001063 ACQUIRE_LOCK(self);
1064 switch (self->mode) {
1065 case MODE_READ:
1066 case MODE_READ_EOF:
1067 BZ2_bzReadClose(&bzerror, self->fp);
1068 break;
1069 case MODE_WRITE:
1070 BZ2_bzWriteClose(&bzerror, self->fp,
1071 0, NULL, NULL);
1072 break;
1073 }
1074 self->mode = MODE_CLOSED;
Guido van Rossumf09ca142007-06-13 00:03:05 +00001075 fclose(self->rawfp);
1076 self->rawfp = NULL;
1077 if (bzerror == BZ_OK) {
1078 Py_INCREF(Py_None);
1079 ret = Py_None;
1080 }
1081 else {
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001082 Util_CatchBZ2Error(bzerror);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001083 }
1084
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001085 RELEASE_LOCK(self);
1086 return ret;
1087}
1088
Antoine Pitrou308705e2009-01-10 16:22:51 +00001089PyDoc_STRVAR(BZ2File_enter_doc,
1090"__enter__() -> self.");
1091
1092static PyObject *
1093BZ2File_enter(BZ2FileObject *self)
1094{
1095 if (self->mode == MODE_CLOSED) {
1096 PyErr_SetString(PyExc_ValueError,
1097 "I/O operation on closed file");
1098 return NULL;
1099 }
1100 Py_INCREF(self);
1101 return (PyObject *) self;
1102}
1103
1104PyDoc_STRVAR(BZ2File_exit_doc,
1105"__exit__(*excinfo) -> None. Closes the file.");
1106
1107static PyObject *
1108BZ2File_exit(BZ2FileObject *self, PyObject *args)
1109{
1110 PyObject *ret = PyObject_CallMethod((PyObject *) self, "close", NULL);
1111 if (!ret)
1112 /* If error occurred, pass through */
1113 return NULL;
1114 Py_DECREF(ret);
1115 Py_RETURN_NONE;
1116}
1117
1118
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001119static PyObject *BZ2File_getiter(BZ2FileObject *self);
1120
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001121static PyMethodDef BZ2File_methods[] = {
1122 {"read", (PyCFunction)BZ2File_read, METH_VARARGS, BZ2File_read__doc__},
1123 {"readline", (PyCFunction)BZ2File_readline, METH_VARARGS, BZ2File_readline__doc__},
1124 {"readlines", (PyCFunction)BZ2File_readlines, METH_VARARGS, BZ2File_readlines__doc__},
1125 {"write", (PyCFunction)BZ2File_write, METH_VARARGS, BZ2File_write__doc__},
1126 {"writelines", (PyCFunction)BZ2File_writelines, METH_O, BZ2File_writelines__doc__},
1127 {"seek", (PyCFunction)BZ2File_seek, METH_VARARGS, BZ2File_seek__doc__},
1128 {"tell", (PyCFunction)BZ2File_tell, METH_NOARGS, BZ2File_tell__doc__},
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001129 {"close", (PyCFunction)BZ2File_close, METH_NOARGS, BZ2File_close__doc__},
Antoine Pitrou308705e2009-01-10 16:22:51 +00001130 {"__enter__", (PyCFunction)BZ2File_enter, METH_NOARGS, BZ2File_enter_doc},
1131 {"__exit__", (PyCFunction)BZ2File_exit, METH_VARARGS, BZ2File_exit_doc},
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001132 {NULL, NULL} /* sentinel */
1133};
1134
1135
1136/* ===================================================================== */
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001137/* Getters and setters of BZ2File. */
1138
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001139static PyObject *
1140BZ2File_get_closed(BZ2FileObject *self, void *closure)
1141{
Christian Heimes217cfd12007-12-02 14:31:20 +00001142 return PyLong_FromLong(self->mode == MODE_CLOSED);
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001143}
1144
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001145static PyGetSetDef BZ2File_getset[] = {
1146 {"closed", (getter)BZ2File_get_closed, NULL,
1147 "True if the file is closed"},
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001148 {NULL} /* Sentinel */
1149};
1150
1151
1152/* ===================================================================== */
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001153/* Slot definitions for BZ2File_Type. */
1154
1155static int
1156BZ2File_init(BZ2FileObject *self, PyObject *args, PyObject *kwargs)
1157{
Martin v. Löwis15e62742006-02-27 16:46:16 +00001158 static char *kwlist[] = {"filename", "mode", "buffering",
Guido van Rossumf09ca142007-06-13 00:03:05 +00001159 "compresslevel", 0};
1160 char *name;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001161 char *mode = "r";
1162 int buffering = -1;
1163 int compresslevel = 9;
1164 int bzerror;
1165 int mode_char = 0;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001166
1167 self->size = -1;
Tim Peterse3228092002-11-09 04:21:44 +00001168
Guido van Rossumf09ca142007-06-13 00:03:05 +00001169 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s|sii:BZ2File",
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001170 kwlist, &name, &mode, &buffering,
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001171 &compresslevel))
1172 return -1;
1173
1174 if (compresslevel < 1 || compresslevel > 9) {
1175 PyErr_SetString(PyExc_ValueError,
1176 "compresslevel must be between 1 and 9");
1177 return -1;
1178 }
1179
1180 for (;;) {
1181 int error = 0;
1182 switch (*mode) {
1183 case 'r':
1184 case 'w':
1185 if (mode_char)
1186 error = 1;
1187 mode_char = *mode;
1188 break;
1189
1190 case 'b':
1191 break;
1192
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001193 default:
1194 error = 1;
Gustavo Niemeyer49ea7be2002-11-08 14:31:49 +00001195 break;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001196 }
1197 if (error) {
Gustavo Niemeyer49ea7be2002-11-08 14:31:49 +00001198 PyErr_Format(PyExc_ValueError,
1199 "invalid mode char %c", *mode);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001200 return -1;
1201 }
1202 mode++;
Gustavo Niemeyer49ea7be2002-11-08 14:31:49 +00001203 if (*mode == '\0')
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001204 break;
1205 }
1206
Georg Brandl6b95f1d2005-06-03 19:47:00 +00001207 if (mode_char == 0) {
1208 mode_char = 'r';
1209 }
1210
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001211 mode = (mode_char == 'r') ? "rb" : "wb";
Tim Peterse3228092002-11-09 04:21:44 +00001212
Guido van Rossumf09ca142007-06-13 00:03:05 +00001213 self->rawfp = fopen(name, mode);
1214 if (self->rawfp == NULL) {
1215 PyErr_SetFromErrno(PyExc_IOError);
Gustavo Niemeyer49ea7be2002-11-08 14:31:49 +00001216 return -1;
Guido van Rossumf09ca142007-06-13 00:03:05 +00001217 }
1218 /* XXX Ignore buffering */
Gustavo Niemeyer49ea7be2002-11-08 14:31:49 +00001219
1220 /* From now on, we have stuff to dealloc, so jump to error label
1221 * instead of returning */
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001222
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001223#ifdef WITH_THREAD
1224 self->lock = PyThread_allocate_lock();
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001225 if (!self->lock) {
1226 PyErr_SetString(PyExc_MemoryError, "unable to allocate lock");
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001227 goto error;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001228 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001229#endif
1230
1231 if (mode_char == 'r')
Guido van Rossumf09ca142007-06-13 00:03:05 +00001232 self->fp = BZ2_bzReadOpen(&bzerror, self->rawfp,
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001233 0, 0, NULL, 0);
1234 else
Guido van Rossumf09ca142007-06-13 00:03:05 +00001235 self->fp = BZ2_bzWriteOpen(&bzerror, self->rawfp,
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001236 compresslevel, 0, 0);
1237
1238 if (bzerror != BZ_OK) {
1239 Util_CatchBZ2Error(bzerror);
1240 goto error;
1241 }
1242
1243 self->mode = (mode_char == 'r') ? MODE_READ : MODE_WRITE;
1244
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001245 return 0;
1246
1247error:
Guido van Rossumf09ca142007-06-13 00:03:05 +00001248 fclose(self->rawfp);
1249 self->rawfp = NULL;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001250#ifdef WITH_THREAD
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001251 if (self->lock) {
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001252 PyThread_free_lock(self->lock);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001253 self->lock = NULL;
1254 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001255#endif
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001256 return -1;
1257}
1258
1259static void
1260BZ2File_dealloc(BZ2FileObject *self)
1261{
1262 int bzerror;
1263#ifdef WITH_THREAD
1264 if (self->lock)
1265 PyThread_free_lock(self->lock);
1266#endif
1267 switch (self->mode) {
1268 case MODE_READ:
1269 case MODE_READ_EOF:
1270 BZ2_bzReadClose(&bzerror, self->fp);
1271 break;
1272 case MODE_WRITE:
1273 BZ2_bzWriteClose(&bzerror, self->fp,
1274 0, NULL, NULL);
1275 break;
1276 }
Gustavo Niemeyer49ea7be2002-11-08 14:31:49 +00001277 Util_DropReadAhead(self);
Guido van Rossumf09ca142007-06-13 00:03:05 +00001278 if (self->rawfp != NULL)
1279 fclose(self->rawfp);
Christian Heimes90aa7642007-12-19 02:45:37 +00001280 Py_TYPE(self)->tp_free((PyObject *)self);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001281}
1282
1283/* This is a hacked version of Python's fileobject.c:file_getiter(). */
1284static PyObject *
1285BZ2File_getiter(BZ2FileObject *self)
1286{
1287 if (self->mode == MODE_CLOSED) {
1288 PyErr_SetString(PyExc_ValueError,
1289 "I/O operation on closed file");
1290 return NULL;
1291 }
1292 Py_INCREF((PyObject*)self);
1293 return (PyObject *)self;
1294}
1295
1296/* This is a hacked version of Python's fileobject.c:file_iternext(). */
1297#define READAHEAD_BUFSIZE 8192
1298static PyObject *
1299BZ2File_iternext(BZ2FileObject *self)
1300{
Christian Heimes72b710a2008-05-26 13:28:38 +00001301 PyBytesObject* ret;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001302 ACQUIRE_LOCK(self);
1303 if (self->mode == MODE_CLOSED) {
Georg Brandl86b2fb92008-07-16 03:43:04 +00001304 RELEASE_LOCK(self);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001305 PyErr_SetString(PyExc_ValueError,
1306 "I/O operation on closed file");
1307 return NULL;
1308 }
1309 ret = Util_ReadAheadGetLineSkip(self, 0, READAHEAD_BUFSIZE);
1310 RELEASE_LOCK(self);
Christian Heimes72b710a2008-05-26 13:28:38 +00001311 if (ret == NULL || PyBytes_GET_SIZE(ret) == 0) {
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001312 Py_XDECREF(ret);
1313 return NULL;
1314 }
1315 return (PyObject *)ret;
1316}
1317
1318/* ===================================================================== */
1319/* BZ2File_Type definition. */
1320
1321PyDoc_VAR(BZ2File__doc__) =
1322PyDoc_STR(
1323"BZ2File(name [, mode='r', buffering=0, compresslevel=9]) -> file object\n\
1324\n\
1325Open a bz2 file. The mode can be 'r' or 'w', for reading (default) or\n\
1326writing. When opened for writing, the file will be created if it doesn't\n\
1327exist, and truncated otherwise. If the buffering argument is given, 0 means\n\
1328unbuffered, and larger numbers specify the buffer size. If compresslevel\n\
1329is given, must be a number between 1 and 9.\n\
Guido van Rossum88e860c2007-06-13 01:46:31 +00001330Data read is always returned in bytes; data written ought to be bytes.\n\
1331");
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001332
Gustavo Niemeyer49ea7be2002-11-08 14:31:49 +00001333static PyTypeObject BZ2File_Type = {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00001334 PyVarObject_HEAD_INIT(NULL, 0)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001335 "bz2.BZ2File", /*tp_name*/
1336 sizeof(BZ2FileObject), /*tp_basicsize*/
1337 0, /*tp_itemsize*/
1338 (destructor)BZ2File_dealloc, /*tp_dealloc*/
1339 0, /*tp_print*/
1340 0, /*tp_getattr*/
1341 0, /*tp_setattr*/
Mark Dickinsone94c6792009-02-02 20:36:42 +00001342 0, /*tp_reserved*/
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001343 0, /*tp_repr*/
1344 0, /*tp_as_number*/
1345 0, /*tp_as_sequence*/
1346 0, /*tp_as_mapping*/
1347 0, /*tp_hash*/
1348 0, /*tp_call*/
1349 0, /*tp_str*/
Jason Tishlerfb8595d2003-01-06 12:41:26 +00001350 PyObject_GenericGetAttr,/*tp_getattro*/
1351 PyObject_GenericSetAttr,/*tp_setattro*/
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001352 0, /*tp_as_buffer*/
1353 Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE, /*tp_flags*/
1354 BZ2File__doc__, /*tp_doc*/
1355 0, /*tp_traverse*/
1356 0, /*tp_clear*/
1357 0, /*tp_richcompare*/
1358 0, /*tp_weaklistoffset*/
1359 (getiterfunc)BZ2File_getiter, /*tp_iter*/
1360 (iternextfunc)BZ2File_iternext, /*tp_iternext*/
1361 BZ2File_methods, /*tp_methods*/
Guido van Rossum79139b22007-02-09 23:20:19 +00001362 0, /*tp_members*/
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001363 BZ2File_getset, /*tp_getset*/
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001364 0, /*tp_base*/
1365 0, /*tp_dict*/
1366 0, /*tp_descr_get*/
1367 0, /*tp_descr_set*/
1368 0, /*tp_dictoffset*/
1369 (initproc)BZ2File_init, /*tp_init*/
Jason Tishlerfb8595d2003-01-06 12:41:26 +00001370 PyType_GenericAlloc, /*tp_alloc*/
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001371 PyType_GenericNew, /*tp_new*/
Neal Norwitz30d1c512007-08-19 22:48:23 +00001372 PyObject_Free, /*tp_free*/
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001373 0, /*tp_is_gc*/
1374};
1375
1376
1377/* ===================================================================== */
1378/* Methods of BZ2Comp. */
1379
1380PyDoc_STRVAR(BZ2Comp_compress__doc__,
1381"compress(data) -> string\n\
1382\n\
1383Provide more data to the compressor object. It will return chunks of\n\
1384compressed data whenever possible. When you've finished providing data\n\
1385to compress, call the flush() method to finish the compression process,\n\
1386and return what is left in the internal buffers.\n\
1387");
1388
1389static PyObject *
1390BZ2Comp_compress(BZ2CompObject *self, PyObject *args)
1391{
Martin v. Löwis423be952008-08-13 15:53:07 +00001392 Py_buffer pdata;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001393 char *data;
1394 int datasize;
1395 int bufsize = SMALLCHUNK;
Martin v. Löwisb9a0f912003-03-29 10:06:18 +00001396 PY_LONG_LONG totalout;
Gustavo Niemeyer7d7930b2002-11-05 18:41:53 +00001397 PyObject *ret = NULL;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001398 bz_stream *bzs = &self->bzs;
1399 int bzerror;
1400
Martin v. Löwis423be952008-08-13 15:53:07 +00001401 if (!PyArg_ParseTuple(args, "y*:compress", &pdata))
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001402 return NULL;
Martin v. Löwis423be952008-08-13 15:53:07 +00001403 data = pdata.buf;
1404 datasize = pdata.len;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001405
Martin v. Löwis423be952008-08-13 15:53:07 +00001406 if (datasize == 0) {
1407 PyBuffer_Release(&pdata);
Christian Heimes72b710a2008-05-26 13:28:38 +00001408 return PyBytes_FromStringAndSize("", 0);
Martin v. Löwis423be952008-08-13 15:53:07 +00001409 }
Gustavo Niemeyera6e436e2004-02-14 00:02:45 +00001410
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001411 ACQUIRE_LOCK(self);
1412 if (!self->running) {
Gustavo Niemeyer49ea7be2002-11-08 14:31:49 +00001413 PyErr_SetString(PyExc_ValueError,
1414 "this object was already flushed");
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001415 goto error;
1416 }
1417
Christian Heimes72b710a2008-05-26 13:28:38 +00001418 ret = PyBytes_FromStringAndSize(NULL, bufsize);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001419 if (!ret)
1420 goto error;
1421
1422 bzs->next_in = data;
1423 bzs->avail_in = datasize;
1424 bzs->next_out = BUF(ret);
1425 bzs->avail_out = bufsize;
1426
1427 totalout = BZS_TOTAL_OUT(bzs);
1428
1429 for (;;) {
1430 Py_BEGIN_ALLOW_THREADS
1431 bzerror = BZ2_bzCompress(bzs, BZ_RUN);
1432 Py_END_ALLOW_THREADS
1433 if (bzerror != BZ_RUN_OK) {
1434 Util_CatchBZ2Error(bzerror);
1435 goto error;
1436 }
Guido van Rossumd8faa362007-04-27 19:54:29 +00001437 if (bzs->avail_in == 0)
1438 break; /* no more input data */
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001439 if (bzs->avail_out == 0) {
1440 bufsize = Util_NewBufferSize(bufsize);
Christian Heimes72b710a2008-05-26 13:28:38 +00001441 if (_PyBytes_Resize(&ret, bufsize) < 0) {
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001442 BZ2_bzCompressEnd(bzs);
1443 goto error;
1444 }
1445 bzs->next_out = BUF(ret) + (BZS_TOTAL_OUT(bzs)
1446 - totalout);
1447 bzs->avail_out = bufsize - (bzs->next_out - BUF(ret));
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001448 }
1449 }
1450
Christian Heimes72b710a2008-05-26 13:28:38 +00001451 if (_PyBytes_Resize(&ret,
Guido van Rossum522a6c62007-05-22 23:13:45 +00001452 (Py_ssize_t)(BZS_TOTAL_OUT(bzs) - totalout)) < 0)
1453 goto error;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001454
1455 RELEASE_LOCK(self);
Martin v. Löwis423be952008-08-13 15:53:07 +00001456 PyBuffer_Release(&pdata);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001457 return ret;
1458
1459error:
1460 RELEASE_LOCK(self);
Martin v. Löwis423be952008-08-13 15:53:07 +00001461 PyBuffer_Release(&pdata);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001462 Py_XDECREF(ret);
1463 return NULL;
1464}
1465
1466PyDoc_STRVAR(BZ2Comp_flush__doc__,
1467"flush() -> string\n\
1468\n\
1469Finish the compression process and return what is left in internal buffers.\n\
1470You must not use the compressor object after calling this method.\n\
1471");
1472
1473static PyObject *
1474BZ2Comp_flush(BZ2CompObject *self)
1475{
1476 int bufsize = SMALLCHUNK;
Gustavo Niemeyer7d7930b2002-11-05 18:41:53 +00001477 PyObject *ret = NULL;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001478 bz_stream *bzs = &self->bzs;
Martin v. Löwisb9a0f912003-03-29 10:06:18 +00001479 PY_LONG_LONG totalout;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001480 int bzerror;
1481
1482 ACQUIRE_LOCK(self);
1483 if (!self->running) {
1484 PyErr_SetString(PyExc_ValueError, "object was already "
1485 "flushed");
1486 goto error;
1487 }
1488 self->running = 0;
1489
Christian Heimes72b710a2008-05-26 13:28:38 +00001490 ret = PyBytes_FromStringAndSize(NULL, bufsize);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001491 if (!ret)
1492 goto error;
1493
1494 bzs->next_out = BUF(ret);
1495 bzs->avail_out = bufsize;
1496
1497 totalout = BZS_TOTAL_OUT(bzs);
1498
1499 for (;;) {
1500 Py_BEGIN_ALLOW_THREADS
1501 bzerror = BZ2_bzCompress(bzs, BZ_FINISH);
1502 Py_END_ALLOW_THREADS
1503 if (bzerror == BZ_STREAM_END) {
1504 break;
1505 } else if (bzerror != BZ_FINISH_OK) {
1506 Util_CatchBZ2Error(bzerror);
1507 goto error;
1508 }
1509 if (bzs->avail_out == 0) {
1510 bufsize = Util_NewBufferSize(bufsize);
Christian Heimes72b710a2008-05-26 13:28:38 +00001511 if (_PyBytes_Resize(&ret, bufsize) < 0)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001512 goto error;
1513 bzs->next_out = BUF(ret);
1514 bzs->next_out = BUF(ret) + (BZS_TOTAL_OUT(bzs)
1515 - totalout);
1516 bzs->avail_out = bufsize - (bzs->next_out - BUF(ret));
1517 }
1518 }
1519
Guido van Rossum522a6c62007-05-22 23:13:45 +00001520 if (bzs->avail_out != 0) {
Christian Heimes72b710a2008-05-26 13:28:38 +00001521 if (_PyBytes_Resize(&ret,
Guido van Rossum522a6c62007-05-22 23:13:45 +00001522 (Py_ssize_t)(BZS_TOTAL_OUT(bzs) - totalout)) < 0)
1523 goto error;
1524 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001525
1526 RELEASE_LOCK(self);
1527 return ret;
1528
1529error:
1530 RELEASE_LOCK(self);
1531 Py_XDECREF(ret);
1532 return NULL;
1533}
1534
1535static PyMethodDef BZ2Comp_methods[] = {
Gustavo Niemeyer49ea7be2002-11-08 14:31:49 +00001536 {"compress", (PyCFunction)BZ2Comp_compress, METH_VARARGS,
1537 BZ2Comp_compress__doc__},
1538 {"flush", (PyCFunction)BZ2Comp_flush, METH_NOARGS,
1539 BZ2Comp_flush__doc__},
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001540 {NULL, NULL} /* sentinel */
1541};
1542
1543
1544/* ===================================================================== */
1545/* Slot definitions for BZ2Comp_Type. */
1546
1547static int
1548BZ2Comp_init(BZ2CompObject *self, PyObject *args, PyObject *kwargs)
1549{
1550 int compresslevel = 9;
1551 int bzerror;
Martin v. Löwis15e62742006-02-27 16:46:16 +00001552 static char *kwlist[] = {"compresslevel", 0};
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001553
1554 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|i:BZ2Compressor",
1555 kwlist, &compresslevel))
1556 return -1;
1557
1558 if (compresslevel < 1 || compresslevel > 9) {
1559 PyErr_SetString(PyExc_ValueError,
1560 "compresslevel must be between 1 and 9");
1561 goto error;
1562 }
1563
1564#ifdef WITH_THREAD
1565 self->lock = PyThread_allocate_lock();
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001566 if (!self->lock) {
1567 PyErr_SetString(PyExc_MemoryError, "unable to allocate lock");
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001568 goto error;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001569 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001570#endif
1571
1572 memset(&self->bzs, 0, sizeof(bz_stream));
1573 bzerror = BZ2_bzCompressInit(&self->bzs, compresslevel, 0, 0);
1574 if (bzerror != BZ_OK) {
1575 Util_CatchBZ2Error(bzerror);
1576 goto error;
1577 }
1578
1579 self->running = 1;
1580
1581 return 0;
1582error:
1583#ifdef WITH_THREAD
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001584 if (self->lock) {
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001585 PyThread_free_lock(self->lock);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001586 self->lock = NULL;
1587 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001588#endif
1589 return -1;
1590}
1591
1592static void
1593BZ2Comp_dealloc(BZ2CompObject *self)
1594{
1595#ifdef WITH_THREAD
1596 if (self->lock)
1597 PyThread_free_lock(self->lock);
1598#endif
1599 BZ2_bzCompressEnd(&self->bzs);
Christian Heimes90aa7642007-12-19 02:45:37 +00001600 Py_TYPE(self)->tp_free((PyObject *)self);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001601}
1602
1603
1604/* ===================================================================== */
1605/* BZ2Comp_Type definition. */
1606
1607PyDoc_STRVAR(BZ2Comp__doc__,
1608"BZ2Compressor([compresslevel=9]) -> compressor object\n\
1609\n\
1610Create a new compressor object. This object may be used to compress\n\
1611data sequentially. If you want to compress data in one shot, use the\n\
1612compress() function instead. The compresslevel parameter, if given,\n\
1613must be a number between 1 and 9.\n\
1614");
1615
Gustavo Niemeyer49ea7be2002-11-08 14:31:49 +00001616static PyTypeObject BZ2Comp_Type = {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00001617 PyVarObject_HEAD_INIT(NULL, 0)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001618 "bz2.BZ2Compressor", /*tp_name*/
1619 sizeof(BZ2CompObject), /*tp_basicsize*/
1620 0, /*tp_itemsize*/
1621 (destructor)BZ2Comp_dealloc, /*tp_dealloc*/
1622 0, /*tp_print*/
1623 0, /*tp_getattr*/
1624 0, /*tp_setattr*/
Mark Dickinsone94c6792009-02-02 20:36:42 +00001625 0, /*tp_reserved*/
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001626 0, /*tp_repr*/
1627 0, /*tp_as_number*/
1628 0, /*tp_as_sequence*/
1629 0, /*tp_as_mapping*/
1630 0, /*tp_hash*/
1631 0, /*tp_call*/
1632 0, /*tp_str*/
Jason Tishlerfb8595d2003-01-06 12:41:26 +00001633 PyObject_GenericGetAttr,/*tp_getattro*/
1634 PyObject_GenericSetAttr,/*tp_setattro*/
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001635 0, /*tp_as_buffer*/
1636 Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE, /*tp_flags*/
1637 BZ2Comp__doc__, /*tp_doc*/
1638 0, /*tp_traverse*/
1639 0, /*tp_clear*/
1640 0, /*tp_richcompare*/
1641 0, /*tp_weaklistoffset*/
1642 0, /*tp_iter*/
1643 0, /*tp_iternext*/
1644 BZ2Comp_methods, /*tp_methods*/
1645 0, /*tp_members*/
1646 0, /*tp_getset*/
1647 0, /*tp_base*/
1648 0, /*tp_dict*/
1649 0, /*tp_descr_get*/
1650 0, /*tp_descr_set*/
1651 0, /*tp_dictoffset*/
1652 (initproc)BZ2Comp_init, /*tp_init*/
Jason Tishlerfb8595d2003-01-06 12:41:26 +00001653 PyType_GenericAlloc, /*tp_alloc*/
1654 PyType_GenericNew, /*tp_new*/
Neal Norwitz30d1c512007-08-19 22:48:23 +00001655 PyObject_Free, /*tp_free*/
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001656 0, /*tp_is_gc*/
1657};
1658
1659
1660/* ===================================================================== */
1661/* Members of BZ2Decomp. */
1662
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001663#undef OFF
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001664#define OFF(x) offsetof(BZ2DecompObject, x)
1665
1666static PyMemberDef BZ2Decomp_members[] = {
Guido van Rossum33d26892007-08-05 15:29:28 +00001667 {"unused_data", T_OBJECT, OFF(unused_data), READONLY},
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001668 {NULL} /* Sentinel */
1669};
1670
1671
1672/* ===================================================================== */
1673/* Methods of BZ2Decomp. */
1674
1675PyDoc_STRVAR(BZ2Decomp_decompress__doc__,
1676"decompress(data) -> string\n\
1677\n\
1678Provide more data to the decompressor object. It will return chunks\n\
1679of decompressed data whenever possible. If you try to decompress data\n\
1680after the end of stream is found, EOFError will be raised. If any data\n\
1681was found after the end of stream, it'll be ignored and saved in\n\
1682unused_data attribute.\n\
1683");
1684
1685static PyObject *
1686BZ2Decomp_decompress(BZ2DecompObject *self, PyObject *args)
1687{
Martin v. Löwis423be952008-08-13 15:53:07 +00001688 Py_buffer pdata;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001689 char *data;
1690 int datasize;
1691 int bufsize = SMALLCHUNK;
Martin v. Löwisb9a0f912003-03-29 10:06:18 +00001692 PY_LONG_LONG totalout;
Neal Norwitz18142c02002-11-05 18:17:32 +00001693 PyObject *ret = NULL;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001694 bz_stream *bzs = &self->bzs;
1695 int bzerror;
1696
Martin v. Löwis423be952008-08-13 15:53:07 +00001697 if (!PyArg_ParseTuple(args, "y*:decompress", &pdata))
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001698 return NULL;
Martin v. Löwis423be952008-08-13 15:53:07 +00001699 data = pdata.buf;
1700 datasize = pdata.len;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001701
1702 ACQUIRE_LOCK(self);
1703 if (!self->running) {
1704 PyErr_SetString(PyExc_EOFError, "end of stream was "
1705 "already found");
1706 goto error;
1707 }
1708
Christian Heimes72b710a2008-05-26 13:28:38 +00001709 ret = PyBytes_FromStringAndSize(NULL, bufsize);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001710 if (!ret)
1711 goto error;
1712
1713 bzs->next_in = data;
1714 bzs->avail_in = datasize;
1715 bzs->next_out = BUF(ret);
1716 bzs->avail_out = bufsize;
1717
1718 totalout = BZS_TOTAL_OUT(bzs);
1719
1720 for (;;) {
1721 Py_BEGIN_ALLOW_THREADS
1722 bzerror = BZ2_bzDecompress(bzs);
1723 Py_END_ALLOW_THREADS
1724 if (bzerror == BZ_STREAM_END) {
1725 if (bzs->avail_in != 0) {
1726 Py_DECREF(self->unused_data);
1727 self->unused_data =
Christian Heimes72b710a2008-05-26 13:28:38 +00001728 PyBytes_FromStringAndSize(bzs->next_in,
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001729 bzs->avail_in);
1730 }
1731 self->running = 0;
1732 break;
1733 }
1734 if (bzerror != BZ_OK) {
1735 Util_CatchBZ2Error(bzerror);
1736 goto error;
1737 }
Guido van Rossumd8faa362007-04-27 19:54:29 +00001738 if (bzs->avail_in == 0)
1739 break; /* no more input data */
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001740 if (bzs->avail_out == 0) {
1741 bufsize = Util_NewBufferSize(bufsize);
Christian Heimes72b710a2008-05-26 13:28:38 +00001742 if (_PyBytes_Resize(&ret, bufsize) < 0) {
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001743 BZ2_bzDecompressEnd(bzs);
1744 goto error;
1745 }
1746 bzs->next_out = BUF(ret);
1747 bzs->next_out = BUF(ret) + (BZS_TOTAL_OUT(bzs)
1748 - totalout);
1749 bzs->avail_out = bufsize - (bzs->next_out - BUF(ret));
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001750 }
1751 }
1752
Guido van Rossum522a6c62007-05-22 23:13:45 +00001753 if (bzs->avail_out != 0) {
Christian Heimes72b710a2008-05-26 13:28:38 +00001754 if (_PyBytes_Resize(&ret,
Guido van Rossum522a6c62007-05-22 23:13:45 +00001755 (Py_ssize_t)(BZS_TOTAL_OUT(bzs) - totalout)) < 0)
1756 goto error;
1757 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001758
1759 RELEASE_LOCK(self);
Martin v. Löwis423be952008-08-13 15:53:07 +00001760 PyBuffer_Release(&pdata);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001761 return ret;
1762
1763error:
1764 RELEASE_LOCK(self);
Martin v. Löwis423be952008-08-13 15:53:07 +00001765 PyBuffer_Release(&pdata);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001766 Py_XDECREF(ret);
1767 return NULL;
1768}
1769
1770static PyMethodDef BZ2Decomp_methods[] = {
1771 {"decompress", (PyCFunction)BZ2Decomp_decompress, METH_VARARGS, BZ2Decomp_decompress__doc__},
1772 {NULL, NULL} /* sentinel */
1773};
1774
1775
1776/* ===================================================================== */
1777/* Slot definitions for BZ2Decomp_Type. */
1778
1779static int
1780BZ2Decomp_init(BZ2DecompObject *self, PyObject *args, PyObject *kwargs)
1781{
1782 int bzerror;
1783
1784 if (!PyArg_ParseTuple(args, ":BZ2Decompressor"))
1785 return -1;
1786
1787#ifdef WITH_THREAD
1788 self->lock = PyThread_allocate_lock();
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001789 if (!self->lock) {
1790 PyErr_SetString(PyExc_MemoryError, "unable to allocate lock");
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001791 goto error;
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001792 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001793#endif
1794
Christian Heimes72b710a2008-05-26 13:28:38 +00001795 self->unused_data = PyBytes_FromStringAndSize("", 0);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001796 if (!self->unused_data)
1797 goto error;
1798
1799 memset(&self->bzs, 0, sizeof(bz_stream));
1800 bzerror = BZ2_bzDecompressInit(&self->bzs, 0, 0);
1801 if (bzerror != BZ_OK) {
1802 Util_CatchBZ2Error(bzerror);
1803 goto error;
1804 }
1805
1806 self->running = 1;
1807
1808 return 0;
1809
1810error:
1811#ifdef WITH_THREAD
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001812 if (self->lock) {
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001813 PyThread_free_lock(self->lock);
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001814 self->lock = NULL;
1815 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001816#endif
Thomas Wouters0e3f5912006-08-11 14:57:12 +00001817 Py_CLEAR(self->unused_data);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001818 return -1;
1819}
1820
1821static void
1822BZ2Decomp_dealloc(BZ2DecompObject *self)
1823{
1824#ifdef WITH_THREAD
1825 if (self->lock)
1826 PyThread_free_lock(self->lock);
1827#endif
1828 Py_XDECREF(self->unused_data);
1829 BZ2_bzDecompressEnd(&self->bzs);
Christian Heimes90aa7642007-12-19 02:45:37 +00001830 Py_TYPE(self)->tp_free((PyObject *)self);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001831}
1832
1833
1834/* ===================================================================== */
1835/* BZ2Decomp_Type definition. */
1836
1837PyDoc_STRVAR(BZ2Decomp__doc__,
1838"BZ2Decompressor() -> decompressor object\n\
1839\n\
1840Create a new decompressor object. This object may be used to decompress\n\
1841data sequentially. If you want to decompress data in one shot, use the\n\
1842decompress() function instead.\n\
1843");
1844
Gustavo Niemeyer49ea7be2002-11-08 14:31:49 +00001845static PyTypeObject BZ2Decomp_Type = {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00001846 PyVarObject_HEAD_INIT(NULL, 0)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001847 "bz2.BZ2Decompressor", /*tp_name*/
1848 sizeof(BZ2DecompObject), /*tp_basicsize*/
1849 0, /*tp_itemsize*/
1850 (destructor)BZ2Decomp_dealloc, /*tp_dealloc*/
1851 0, /*tp_print*/
1852 0, /*tp_getattr*/
1853 0, /*tp_setattr*/
Mark Dickinsone94c6792009-02-02 20:36:42 +00001854 0, /*tp_reserved*/
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001855 0, /*tp_repr*/
1856 0, /*tp_as_number*/
1857 0, /*tp_as_sequence*/
1858 0, /*tp_as_mapping*/
1859 0, /*tp_hash*/
1860 0, /*tp_call*/
1861 0, /*tp_str*/
Jason Tishlerfb8595d2003-01-06 12:41:26 +00001862 PyObject_GenericGetAttr,/*tp_getattro*/
1863 PyObject_GenericSetAttr,/*tp_setattro*/
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001864 0, /*tp_as_buffer*/
1865 Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE, /*tp_flags*/
1866 BZ2Decomp__doc__, /*tp_doc*/
1867 0, /*tp_traverse*/
1868 0, /*tp_clear*/
1869 0, /*tp_richcompare*/
1870 0, /*tp_weaklistoffset*/
1871 0, /*tp_iter*/
1872 0, /*tp_iternext*/
1873 BZ2Decomp_methods, /*tp_methods*/
1874 BZ2Decomp_members, /*tp_members*/
1875 0, /*tp_getset*/
1876 0, /*tp_base*/
1877 0, /*tp_dict*/
1878 0, /*tp_descr_get*/
1879 0, /*tp_descr_set*/
1880 0, /*tp_dictoffset*/
1881 (initproc)BZ2Decomp_init, /*tp_init*/
Jason Tishlerfb8595d2003-01-06 12:41:26 +00001882 PyType_GenericAlloc, /*tp_alloc*/
1883 PyType_GenericNew, /*tp_new*/
Neal Norwitz30d1c512007-08-19 22:48:23 +00001884 PyObject_Free, /*tp_free*/
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001885 0, /*tp_is_gc*/
1886};
1887
1888
1889/* ===================================================================== */
1890/* Module functions. */
1891
1892PyDoc_STRVAR(bz2_compress__doc__,
1893"compress(data [, compresslevel=9]) -> string\n\
1894\n\
1895Compress data in one shot. If you want to compress data sequentially,\n\
1896use an instance of BZ2Compressor instead. The compresslevel parameter, if\n\
1897given, must be a number between 1 and 9.\n\
1898");
1899
1900static PyObject *
1901bz2_compress(PyObject *self, PyObject *args, PyObject *kwargs)
1902{
1903 int compresslevel=9;
Martin v. Löwis423be952008-08-13 15:53:07 +00001904 Py_buffer pdata;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001905 char *data;
1906 int datasize;
1907 int bufsize;
Gustavo Niemeyer7d7930b2002-11-05 18:41:53 +00001908 PyObject *ret = NULL;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001909 bz_stream _bzs;
1910 bz_stream *bzs = &_bzs;
1911 int bzerror;
Martin v. Löwis15e62742006-02-27 16:46:16 +00001912 static char *kwlist[] = {"data", "compresslevel", 0};
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001913
Martin v. Löwis423be952008-08-13 15:53:07 +00001914 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "y*|i",
1915 kwlist, &pdata,
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001916 &compresslevel))
1917 return NULL;
Martin v. Löwis423be952008-08-13 15:53:07 +00001918 data = pdata.buf;
1919 datasize = pdata.len;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001920
1921 if (compresslevel < 1 || compresslevel > 9) {
1922 PyErr_SetString(PyExc_ValueError,
1923 "compresslevel must be between 1 and 9");
Martin v. Löwis423be952008-08-13 15:53:07 +00001924 PyBuffer_Release(&pdata);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001925 return NULL;
1926 }
1927
1928 /* Conforming to bz2 manual, this is large enough to fit compressed
1929 * data in one shot. We will check it later anyway. */
1930 bufsize = datasize + (datasize/100+1) + 600;
1931
Christian Heimes72b710a2008-05-26 13:28:38 +00001932 ret = PyBytes_FromStringAndSize(NULL, bufsize);
Martin v. Löwis423be952008-08-13 15:53:07 +00001933 if (!ret) {
1934 PyBuffer_Release(&pdata);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001935 return NULL;
Martin v. Löwis423be952008-08-13 15:53:07 +00001936 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001937
1938 memset(bzs, 0, sizeof(bz_stream));
1939
1940 bzs->next_in = data;
1941 bzs->avail_in = datasize;
1942 bzs->next_out = BUF(ret);
1943 bzs->avail_out = bufsize;
1944
1945 bzerror = BZ2_bzCompressInit(bzs, compresslevel, 0, 0);
1946 if (bzerror != BZ_OK) {
1947 Util_CatchBZ2Error(bzerror);
Martin v. Löwis423be952008-08-13 15:53:07 +00001948 PyBuffer_Release(&pdata);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001949 Py_DECREF(ret);
1950 return NULL;
1951 }
Tim Peterse3228092002-11-09 04:21:44 +00001952
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001953 for (;;) {
1954 Py_BEGIN_ALLOW_THREADS
1955 bzerror = BZ2_bzCompress(bzs, BZ_FINISH);
1956 Py_END_ALLOW_THREADS
1957 if (bzerror == BZ_STREAM_END) {
1958 break;
1959 } else if (bzerror != BZ_FINISH_OK) {
1960 BZ2_bzCompressEnd(bzs);
1961 Util_CatchBZ2Error(bzerror);
Martin v. Löwis423be952008-08-13 15:53:07 +00001962 PyBuffer_Release(&pdata);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001963 Py_DECREF(ret);
1964 return NULL;
1965 }
1966 if (bzs->avail_out == 0) {
1967 bufsize = Util_NewBufferSize(bufsize);
Christian Heimes72b710a2008-05-26 13:28:38 +00001968 if (_PyBytes_Resize(&ret, bufsize) < 0) {
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001969 BZ2_bzCompressEnd(bzs);
Martin v. Löwis423be952008-08-13 15:53:07 +00001970 PyBuffer_Release(&pdata);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001971 return NULL;
1972 }
1973 bzs->next_out = BUF(ret) + BZS_TOTAL_OUT(bzs);
1974 bzs->avail_out = bufsize - (bzs->next_out - BUF(ret));
1975 }
1976 }
1977
Guido van Rossum522a6c62007-05-22 23:13:45 +00001978 if (bzs->avail_out != 0) {
Christian Heimes72b710a2008-05-26 13:28:38 +00001979 if (_PyBytes_Resize(&ret, (Py_ssize_t)BZS_TOTAL_OUT(bzs)) < 0) {
Guido van Rossum522a6c62007-05-22 23:13:45 +00001980 ret = NULL;
1981 }
1982 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001983 BZ2_bzCompressEnd(bzs);
1984
Martin v. Löwis423be952008-08-13 15:53:07 +00001985 PyBuffer_Release(&pdata);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001986 return ret;
1987}
1988
1989PyDoc_STRVAR(bz2_decompress__doc__,
1990"decompress(data) -> decompressed data\n\
1991\n\
1992Decompress data in one shot. If you want to decompress data sequentially,\n\
1993use an instance of BZ2Decompressor instead.\n\
1994");
1995
1996static PyObject *
1997bz2_decompress(PyObject *self, PyObject *args)
1998{
Martin v. Löwis423be952008-08-13 15:53:07 +00001999 Py_buffer pdata;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002000 char *data;
2001 int datasize;
2002 int bufsize = SMALLCHUNK;
2003 PyObject *ret;
2004 bz_stream _bzs;
2005 bz_stream *bzs = &_bzs;
2006 int bzerror;
2007
Martin v. Löwis423be952008-08-13 15:53:07 +00002008 if (!PyArg_ParseTuple(args, "y*:decompress", &pdata))
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002009 return NULL;
Martin v. Löwis423be952008-08-13 15:53:07 +00002010 data = pdata.buf;
2011 datasize = pdata.len;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002012
Martin v. Löwis423be952008-08-13 15:53:07 +00002013 if (datasize == 0) {
2014 PyBuffer_Release(&pdata);
Christian Heimes72b710a2008-05-26 13:28:38 +00002015 return PyBytes_FromStringAndSize("", 0);
Martin v. Löwis423be952008-08-13 15:53:07 +00002016 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002017
Christian Heimes72b710a2008-05-26 13:28:38 +00002018 ret = PyBytes_FromStringAndSize(NULL, bufsize);
Martin v. Löwis423be952008-08-13 15:53:07 +00002019 if (!ret) {
2020 PyBuffer_Release(&pdata);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002021 return NULL;
Martin v. Löwis423be952008-08-13 15:53:07 +00002022 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002023
2024 memset(bzs, 0, sizeof(bz_stream));
2025
2026 bzs->next_in = data;
2027 bzs->avail_in = datasize;
2028 bzs->next_out = BUF(ret);
2029 bzs->avail_out = bufsize;
2030
2031 bzerror = BZ2_bzDecompressInit(bzs, 0, 0);
2032 if (bzerror != BZ_OK) {
2033 Util_CatchBZ2Error(bzerror);
2034 Py_DECREF(ret);
Martin v. Löwis423be952008-08-13 15:53:07 +00002035 PyBuffer_Release(&pdata);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002036 return NULL;
2037 }
Tim Peterse3228092002-11-09 04:21:44 +00002038
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002039 for (;;) {
2040 Py_BEGIN_ALLOW_THREADS
2041 bzerror = BZ2_bzDecompress(bzs);
2042 Py_END_ALLOW_THREADS
2043 if (bzerror == BZ_STREAM_END) {
2044 break;
2045 } else if (bzerror != BZ_OK) {
2046 BZ2_bzDecompressEnd(bzs);
2047 Util_CatchBZ2Error(bzerror);
Martin v. Löwis423be952008-08-13 15:53:07 +00002048 PyBuffer_Release(&pdata);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002049 Py_DECREF(ret);
2050 return NULL;
2051 }
Guido van Rossumd8faa362007-04-27 19:54:29 +00002052 if (bzs->avail_in == 0) {
2053 BZ2_bzDecompressEnd(bzs);
2054 PyErr_SetString(PyExc_ValueError,
2055 "couldn't find end of stream");
Martin v. Löwis423be952008-08-13 15:53:07 +00002056 PyBuffer_Release(&pdata);
Guido van Rossumd8faa362007-04-27 19:54:29 +00002057 Py_DECREF(ret);
2058 return NULL;
2059 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002060 if (bzs->avail_out == 0) {
2061 bufsize = Util_NewBufferSize(bufsize);
Christian Heimes72b710a2008-05-26 13:28:38 +00002062 if (_PyBytes_Resize(&ret, bufsize) < 0) {
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002063 BZ2_bzDecompressEnd(bzs);
Martin v. Löwis423be952008-08-13 15:53:07 +00002064 PyBuffer_Release(&pdata);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002065 return NULL;
2066 }
2067 bzs->next_out = BUF(ret) + BZS_TOTAL_OUT(bzs);
2068 bzs->avail_out = bufsize - (bzs->next_out - BUF(ret));
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002069 }
2070 }
2071
Guido van Rossum522a6c62007-05-22 23:13:45 +00002072 if (bzs->avail_out != 0) {
Christian Heimes72b710a2008-05-26 13:28:38 +00002073 if (_PyBytes_Resize(&ret, (Py_ssize_t)BZS_TOTAL_OUT(bzs)) < 0) {
Guido van Rossum522a6c62007-05-22 23:13:45 +00002074 ret = NULL;
2075 }
2076 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002077 BZ2_bzDecompressEnd(bzs);
Martin v. Löwis423be952008-08-13 15:53:07 +00002078 PyBuffer_Release(&pdata);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002079
2080 return ret;
2081}
2082
2083static PyMethodDef bz2_methods[] = {
2084 {"compress", (PyCFunction) bz2_compress, METH_VARARGS|METH_KEYWORDS,
2085 bz2_compress__doc__},
2086 {"decompress", (PyCFunction) bz2_decompress, METH_VARARGS,
2087 bz2_decompress__doc__},
2088 {NULL, NULL} /* sentinel */
2089};
2090
2091/* ===================================================================== */
2092/* Initialization function. */
2093
2094PyDoc_STRVAR(bz2__doc__,
2095"The python bz2 module provides a comprehensive interface for\n\
2096the bz2 compression library. It implements a complete file\n\
2097interface, one shot (de)compression functions, and types for\n\
2098sequential (de)compression.\n\
2099");
2100
Martin v. Löwis1a214512008-06-11 05:26:20 +00002101
2102static struct PyModuleDef bz2module = {
2103 PyModuleDef_HEAD_INIT,
2104 "bz2",
2105 bz2__doc__,
2106 -1,
2107 bz2_methods,
2108 NULL,
2109 NULL,
2110 NULL,
2111 NULL
2112};
2113
Neal Norwitz21d896c2003-07-01 20:15:21 +00002114PyMODINIT_FUNC
Martin v. Löwis1a214512008-06-11 05:26:20 +00002115PyInit_bz2(void)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002116{
2117 PyObject *m;
2118
Christian Heimes90aa7642007-12-19 02:45:37 +00002119 Py_TYPE(&BZ2File_Type) = &PyType_Type;
2120 Py_TYPE(&BZ2Comp_Type) = &PyType_Type;
2121 Py_TYPE(&BZ2Decomp_Type) = &PyType_Type;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002122
Martin v. Löwis1a214512008-06-11 05:26:20 +00002123 m = PyModule_Create(&bz2module);
Neal Norwitz1ac754f2006-01-19 06:09:39 +00002124 if (m == NULL)
Martin v. Löwis1a214512008-06-11 05:26:20 +00002125 return NULL;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002126
Neal Norwitz53cbdaa2007-08-23 21:42:55 +00002127 PyModule_AddObject(m, "__author__", PyUnicode_FromString(__author__));
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002128
2129 Py_INCREF(&BZ2File_Type);
2130 PyModule_AddObject(m, "BZ2File", (PyObject *)&BZ2File_Type);
2131
2132 Py_INCREF(&BZ2Comp_Type);
2133 PyModule_AddObject(m, "BZ2Compressor", (PyObject *)&BZ2Comp_Type);
2134
2135 Py_INCREF(&BZ2Decomp_Type);
2136 PyModule_AddObject(m, "BZ2Decompressor", (PyObject *)&BZ2Decomp_Type);
Martin v. Löwis1a214512008-06-11 05:26:20 +00002137 return m;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002138}