blob: 29f85927f43d20e13c6cc91fa144ad1fb1151632 [file] [log] [blame]
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001/*
2
3python-bz2 - python bz2 library interface
4
5Copyright (c) 2002 Gustavo Niemeyer <niemeyer@conectiva.com>
6Copyright (c) 2002 Python Software Foundation; All Rights Reserved
7
8*/
9
Martin v. Löwise17af7b2002-11-23 09:16:19 +000010#include "Python.h"
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +000011#include <stdio.h>
12#include <bzlib.h>
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +000013#include "structmember.h"
14
15#ifdef WITH_THREAD
16#include "pythread.h"
17#endif
18
19static char __author__[] =
20"The bz2 python module was written by:\n\
21\n\
22 Gustavo Niemeyer <niemeyer@conectiva.com>\n\
23";
24
Georg Brandl33a5f2a2005-08-21 14:16:04 +000025/* Our very own off_t-like type, 64-bit if possible */
26/* copied from Objects/fileobject.c */
27#if !defined(HAVE_LARGEFILE_SUPPORT)
28typedef off_t Py_off_t;
29#elif SIZEOF_OFF_T >= 8
30typedef off_t Py_off_t;
31#elif SIZEOF_FPOS_T >= 8
32typedef fpos_t Py_off_t;
33#else
34#error "Large file support, but neither off_t nor fpos_t is large enough."
35#endif
36
Christian Heimes72b710a2008-05-26 13:28:38 +000037#define BUF(v) PyBytes_AS_STRING(v)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +000038
39#define MODE_CLOSED 0
40#define MODE_READ 1
41#define MODE_READ_EOF 2
42#define MODE_WRITE 3
43
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000044#define BZ2FileObject_Check(v) (Py_TYPE(v) == &BZ2File_Type)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +000045
Gustavo Niemeyer7628f1f2003-04-27 06:25:24 +000046
47#ifdef BZ_CONFIG_ERROR
48
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +000049#if SIZEOF_LONG >= 8
50#define BZS_TOTAL_OUT(bzs) \
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000051 (((long)bzs->total_out_hi32 << 32) + bzs->total_out_lo32)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +000052#elif SIZEOF_LONG_LONG >= 8
53#define BZS_TOTAL_OUT(bzs) \
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000054 (((PY_LONG_LONG)bzs->total_out_hi32 << 32) + bzs->total_out_lo32)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +000055#else
56#define BZS_TOTAL_OUT(bzs) \
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000057 bzs->total_out_lo32
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +000058#endif
59
Gustavo Niemeyer7628f1f2003-04-27 06:25:24 +000060#else /* ! BZ_CONFIG_ERROR */
61
62#define BZ2_bzRead bzRead
63#define BZ2_bzReadOpen bzReadOpen
64#define BZ2_bzReadClose bzReadClose
65#define BZ2_bzWrite bzWrite
66#define BZ2_bzWriteOpen bzWriteOpen
67#define BZ2_bzWriteClose bzWriteClose
68#define BZ2_bzCompress bzCompress
69#define BZ2_bzCompressInit bzCompressInit
70#define BZ2_bzCompressEnd bzCompressEnd
71#define BZ2_bzDecompress bzDecompress
72#define BZ2_bzDecompressInit bzDecompressInit
73#define BZ2_bzDecompressEnd bzDecompressEnd
74
75#define BZS_TOTAL_OUT(bzs) bzs->total_out
76
77#endif /* ! BZ_CONFIG_ERROR */
78
79
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +000080#ifdef WITH_THREAD
Antoine Pitrouc66363f2009-10-27 17:47:14 +000081#define ACQUIRE_LOCK(obj) do { \
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000082 if (!PyThread_acquire_lock(obj->lock, 0)) { \
83 Py_BEGIN_ALLOW_THREADS \
84 PyThread_acquire_lock(obj->lock, 1); \
85 Py_END_ALLOW_THREADS \
86 } } while(0)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +000087#define RELEASE_LOCK(obj) PyThread_release_lock(obj->lock)
88#else
89#define ACQUIRE_LOCK(obj)
90#define RELEASE_LOCK(obj)
91#endif
92
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +000093/* Bits in f_newlinetypes */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000094#define NEWLINE_UNKNOWN 0 /* No newline seen, yet */
95#define NEWLINE_CR 1 /* \r newline seen */
96#define NEWLINE_LF 2 /* \n newline seen */
97#define NEWLINE_CRLF 4 /* \r\n newline seen */
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +000098
99/* ===================================================================== */
100/* Structure definitions. */
101
102typedef struct {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000103 PyObject_HEAD
104 FILE *rawfp;
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000105
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000106 char* f_buf; /* Allocated readahead buffer */
107 char* f_bufend; /* Points after last occupied position */
108 char* f_bufptr; /* Current buffer position */
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000109
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000110 BZFILE *fp;
111 int mode;
112 Py_off_t pos;
113 Py_off_t size;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000114#ifdef WITH_THREAD
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000115 PyThread_type_lock lock;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000116#endif
117} BZ2FileObject;
118
119typedef struct {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000120 PyObject_HEAD
121 bz_stream bzs;
122 int running;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000123#ifdef WITH_THREAD
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000124 PyThread_type_lock lock;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000125#endif
126} BZ2CompObject;
127
128typedef struct {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000129 PyObject_HEAD
130 bz_stream bzs;
131 int running;
132 PyObject *unused_data;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000133#ifdef WITH_THREAD
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000134 PyThread_type_lock lock;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000135#endif
136} BZ2DecompObject;
137
138/* ===================================================================== */
139/* Utility functions. */
140
141static int
142Util_CatchBZ2Error(int bzerror)
143{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000144 int ret = 0;
145 switch(bzerror) {
146 case BZ_OK:
147 case BZ_STREAM_END:
148 break;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000149
Gustavo Niemeyer7628f1f2003-04-27 06:25:24 +0000150#ifdef BZ_CONFIG_ERROR
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000151 case BZ_CONFIG_ERROR:
152 PyErr_SetString(PyExc_SystemError,
153 "the bz2 library was not compiled "
154 "correctly");
155 ret = 1;
156 break;
Gustavo Niemeyer7628f1f2003-04-27 06:25:24 +0000157#endif
Tim Peterse3228092002-11-09 04:21:44 +0000158
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000159 case BZ_PARAM_ERROR:
160 PyErr_SetString(PyExc_ValueError,
161 "the bz2 library has received wrong "
162 "parameters");
163 ret = 1;
164 break;
Tim Peterse3228092002-11-09 04:21:44 +0000165
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000166 case BZ_MEM_ERROR:
167 PyErr_NoMemory();
168 ret = 1;
169 break;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000170
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000171 case BZ_DATA_ERROR:
172 case BZ_DATA_ERROR_MAGIC:
173 PyErr_SetString(PyExc_IOError, "invalid data stream");
174 ret = 1;
175 break;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000176
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000177 case BZ_IO_ERROR:
178 PyErr_SetString(PyExc_IOError, "unknown IO error");
179 ret = 1;
180 break;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000181
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000182 case BZ_UNEXPECTED_EOF:
183 PyErr_SetString(PyExc_EOFError,
184 "compressed file ended before the "
185 "logical end-of-stream was detected");
186 ret = 1;
187 break;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000188
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000189 case BZ_SEQUENCE_ERROR:
190 PyErr_SetString(PyExc_RuntimeError,
191 "wrong sequence of bz2 library "
192 "commands used");
193 ret = 1;
194 break;
195 }
196 return ret;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000197}
198
199#if BUFSIZ < 8192
200#define SMALLCHUNK 8192
201#else
202#define SMALLCHUNK BUFSIZ
203#endif
204
205#if SIZEOF_INT < 4
206#define BIGCHUNK (512 * 32)
207#else
208#define BIGCHUNK (512 * 1024)
209#endif
210
211/* This is a hacked version of Python's fileobject.c:new_buffersize(). */
212static size_t
213Util_NewBufferSize(size_t currentsize)
214{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000215 if (currentsize > SMALLCHUNK) {
216 /* Keep doubling until we reach BIGCHUNK;
217 then keep adding BIGCHUNK. */
218 if (currentsize <= BIGCHUNK)
219 return currentsize + currentsize;
220 else
221 return currentsize + BIGCHUNK;
222 }
223 return currentsize + SMALLCHUNK;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000224}
225
226/* This is a hacked version of Python's fileobject.c:get_line(). */
227static PyObject *
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000228Util_GetLine(BZ2FileObject *f, int n)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000229{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000230 char c;
231 char *buf, *end;
232 size_t total_v_size; /* total # of slots in buffer */
233 size_t used_v_size; /* # used slots in buffer */
234 size_t increment; /* amount to increment the buffer */
235 PyObject *v;
236 int bzerror;
237 int bytes_read;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000238
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000239 total_v_size = n > 0 ? n : 100;
240 v = PyBytes_FromStringAndSize((char *)NULL, total_v_size);
241 if (v == NULL)
242 return NULL;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000243
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000244 buf = BUF(v);
245 end = buf + total_v_size;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000246
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000247 for (;;) {
248 Py_BEGIN_ALLOW_THREADS
249 do {
250 bytes_read = BZ2_bzRead(&bzerror, f->fp, &c, 1);
251 f->pos++;
252 if (bytes_read == 0)
253 break;
254 *buf++ = c;
255 } while (bzerror == BZ_OK && c != '\n' && buf != end);
256 Py_END_ALLOW_THREADS
257 if (bzerror == BZ_STREAM_END) {
258 f->size = f->pos;
259 f->mode = MODE_READ_EOF;
260 break;
261 } else if (bzerror != BZ_OK) {
262 Util_CatchBZ2Error(bzerror);
263 Py_DECREF(v);
264 return NULL;
265 }
266 if (c == '\n')
267 break;
268 /* Must be because buf == end */
269 if (n > 0)
270 break;
271 used_v_size = total_v_size;
272 increment = total_v_size >> 2; /* mild exponential growth */
273 total_v_size += increment;
274 if (total_v_size > INT_MAX) {
275 PyErr_SetString(PyExc_OverflowError,
276 "line is longer than a Python string can hold");
277 Py_DECREF(v);
278 return NULL;
279 }
280 if (_PyBytes_Resize(&v, total_v_size) < 0) {
281 return NULL;
282 }
283 buf = BUF(v) + used_v_size;
284 end = BUF(v) + total_v_size;
285 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000286
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000287 used_v_size = buf - BUF(v);
288 if (used_v_size != total_v_size) {
289 if (_PyBytes_Resize(&v, used_v_size) < 0) {
290 v = NULL;
291 }
292 }
293 return v;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000294}
295
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000296/* This is a hacked version of Python's fileobject.c:drop_readahead(). */
297static void
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000298Util_DropReadAhead(BZ2FileObject *f)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000299{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000300 if (f->f_buf != NULL) {
301 PyMem_Free(f->f_buf);
302 f->f_buf = NULL;
303 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000304}
305
306/* This is a hacked version of Python's fileobject.c:readahead(). */
307static int
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000308Util_ReadAhead(BZ2FileObject *f, int bufsize)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000309{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000310 int chunksize;
311 int bzerror;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000312
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000313 if (f->f_buf != NULL) {
314 if((f->f_bufend - f->f_bufptr) >= 1)
315 return 0;
316 else
317 Util_DropReadAhead(f);
318 }
319 if (f->mode == MODE_READ_EOF) {
320 f->f_bufptr = f->f_buf;
321 f->f_bufend = f->f_buf;
322 return 0;
323 }
324 if ((f->f_buf = PyMem_Malloc(bufsize)) == NULL) {
325 PyErr_NoMemory();
326 return -1;
327 }
328 Py_BEGIN_ALLOW_THREADS
329 chunksize = BZ2_bzRead(&bzerror, f->fp, f->f_buf, bufsize);
330 Py_END_ALLOW_THREADS
331 f->pos += chunksize;
332 if (bzerror == BZ_STREAM_END) {
333 f->size = f->pos;
334 f->mode = MODE_READ_EOF;
335 } else if (bzerror != BZ_OK) {
336 Util_CatchBZ2Error(bzerror);
337 Util_DropReadAhead(f);
338 return -1;
339 }
340 f->f_bufptr = f->f_buf;
341 f->f_bufend = f->f_buf + chunksize;
342 return 0;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000343}
344
345/* This is a hacked version of Python's
346 * fileobject.c:readahead_get_line_skip(). */
Christian Heimes72b710a2008-05-26 13:28:38 +0000347static PyBytesObject *
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000348Util_ReadAheadGetLineSkip(BZ2FileObject *f, int skip, int bufsize)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000349{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000350 PyBytesObject* s;
351 char *bufptr;
352 char *buf;
353 int len;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000354
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000355 if (f->f_buf == NULL)
356 if (Util_ReadAhead(f, bufsize) < 0)
357 return NULL;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000358
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000359 len = f->f_bufend - f->f_bufptr;
360 if (len == 0)
361 return (PyBytesObject *)
362 PyBytes_FromStringAndSize(NULL, skip);
363 bufptr = memchr(f->f_bufptr, '\n', len);
364 if (bufptr != NULL) {
365 bufptr++; /* Count the '\n' */
366 len = bufptr - f->f_bufptr;
367 s = (PyBytesObject *)
368 PyBytes_FromStringAndSize(NULL, skip+len);
369 if (s == NULL)
370 return NULL;
371 memcpy(PyBytes_AS_STRING(s)+skip, f->f_bufptr, len);
372 f->f_bufptr = bufptr;
373 if (bufptr == f->f_bufend)
374 Util_DropReadAhead(f);
375 } else {
376 bufptr = f->f_bufptr;
377 buf = f->f_buf;
378 f->f_buf = NULL; /* Force new readahead buffer */
379 s = Util_ReadAheadGetLineSkip(f, skip+len,
380 bufsize + (bufsize>>2));
381 if (s == NULL) {
382 PyMem_Free(buf);
383 return NULL;
384 }
385 memcpy(PyBytes_AS_STRING(s)+skip, bufptr, len);
386 PyMem_Free(buf);
387 }
388 return s;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000389}
390
391/* ===================================================================== */
392/* Methods of BZ2File. */
393
394PyDoc_STRVAR(BZ2File_read__doc__,
395"read([size]) -> string\n\
396\n\
397Read at most size uncompressed bytes, returned as a string. If the size\n\
398argument is negative or omitted, read until EOF is reached.\n\
399");
400
401/* This is a hacked version of Python's fileobject.c:file_read(). */
402static PyObject *
403BZ2File_read(BZ2FileObject *self, PyObject *args)
404{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000405 long bytesrequested = -1;
406 size_t bytesread, buffersize, chunksize;
407 int bzerror;
408 PyObject *ret = NULL;
Tim Peterse3228092002-11-09 04:21:44 +0000409
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000410 if (!PyArg_ParseTuple(args, "|l:read", &bytesrequested))
411 return NULL;
Tim Peterse3228092002-11-09 04:21:44 +0000412
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000413 ACQUIRE_LOCK(self);
414 switch (self->mode) {
415 case MODE_READ:
416 break;
417 case MODE_READ_EOF:
418 ret = PyBytes_FromStringAndSize("", 0);
419 goto cleanup;
420 case MODE_CLOSED:
421 PyErr_SetString(PyExc_ValueError,
422 "I/O operation on closed file");
423 goto cleanup;
424 default:
425 PyErr_SetString(PyExc_IOError,
426 "file is not ready for reading");
427 goto cleanup;
428 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000429
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000430 if (bytesrequested < 0)
431 buffersize = Util_NewBufferSize((size_t)0);
432 else
433 buffersize = bytesrequested;
434 if (buffersize > INT_MAX) {
435 PyErr_SetString(PyExc_OverflowError,
436 "requested number of bytes is "
437 "more than a Python string can hold");
438 goto cleanup;
439 }
440 ret = PyBytes_FromStringAndSize((char *)NULL, buffersize);
441 if (ret == NULL || buffersize == 0)
442 goto cleanup;
443 bytesread = 0;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000444
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000445 for (;;) {
446 Py_BEGIN_ALLOW_THREADS
447 chunksize = BZ2_bzRead(&bzerror, self->fp,
448 BUF(ret)+bytesread,
449 buffersize-bytesread);
450 self->pos += chunksize;
451 Py_END_ALLOW_THREADS
452 bytesread += chunksize;
453 if (bzerror == BZ_STREAM_END) {
454 self->size = self->pos;
455 self->mode = MODE_READ_EOF;
456 break;
457 } else if (bzerror != BZ_OK) {
458 Util_CatchBZ2Error(bzerror);
459 Py_DECREF(ret);
460 ret = NULL;
461 goto cleanup;
462 }
463 if (bytesrequested < 0) {
464 buffersize = Util_NewBufferSize(buffersize);
465 if (_PyBytes_Resize(&ret, buffersize) < 0) {
466 ret = NULL;
467 goto cleanup;
468 }
469 } else {
470 break;
471 }
472 }
473 if (bytesread != buffersize) {
474 if (_PyBytes_Resize(&ret, bytesread) < 0) {
475 ret = NULL;
476 }
477 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000478
479cleanup:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000480 RELEASE_LOCK(self);
481 return ret;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000482}
483
484PyDoc_STRVAR(BZ2File_readline__doc__,
485"readline([size]) -> string\n\
486\n\
487Return the next line from the file, as a string, retaining newline.\n\
488A non-negative size argument will limit the maximum number of bytes to\n\
489return (an incomplete line may be returned then). Return an empty\n\
490string at EOF.\n\
491");
492
493static PyObject *
494BZ2File_readline(BZ2FileObject *self, PyObject *args)
495{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000496 PyObject *ret = NULL;
497 int sizehint = -1;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000498
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000499 if (!PyArg_ParseTuple(args, "|i:readline", &sizehint))
500 return NULL;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000501
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000502 ACQUIRE_LOCK(self);
503 switch (self->mode) {
504 case MODE_READ:
505 break;
506 case MODE_READ_EOF:
507 ret = PyBytes_FromStringAndSize("", 0);
508 goto cleanup;
509 case MODE_CLOSED:
510 PyErr_SetString(PyExc_ValueError,
511 "I/O operation on closed file");
512 goto cleanup;
513 default:
514 PyErr_SetString(PyExc_IOError,
515 "file is not ready for reading");
516 goto cleanup;
517 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000518
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000519 if (sizehint == 0)
520 ret = PyBytes_FromStringAndSize("", 0);
521 else
522 ret = Util_GetLine(self, (sizehint < 0) ? 0 : sizehint);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000523
524cleanup:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000525 RELEASE_LOCK(self);
526 return ret;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000527}
528
529PyDoc_STRVAR(BZ2File_readlines__doc__,
530"readlines([size]) -> list\n\
531\n\
532Call readline() repeatedly and return a list of lines read.\n\
533The optional size argument, if given, is an approximate bound on the\n\
534total number of bytes in the lines returned.\n\
535");
536
537/* This is a hacked version of Python's fileobject.c:file_readlines(). */
538static PyObject *
539BZ2File_readlines(BZ2FileObject *self, PyObject *args)
540{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000541 long sizehint = 0;
542 PyObject *list = NULL;
543 PyObject *line;
544 char small_buffer[SMALLCHUNK];
545 char *buffer = small_buffer;
546 size_t buffersize = SMALLCHUNK;
547 PyObject *big_buffer = NULL;
548 size_t nfilled = 0;
549 size_t nread;
550 size_t totalread = 0;
551 char *p, *q, *end;
552 int err;
553 int shortread = 0;
554 int bzerror;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000555
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000556 if (!PyArg_ParseTuple(args, "|l:readlines", &sizehint))
557 return NULL;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000558
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000559 ACQUIRE_LOCK(self);
560 switch (self->mode) {
561 case MODE_READ:
562 break;
563 case MODE_READ_EOF:
564 list = PyList_New(0);
565 goto cleanup;
566 case MODE_CLOSED:
567 PyErr_SetString(PyExc_ValueError,
568 "I/O operation on closed file");
569 goto cleanup;
570 default:
571 PyErr_SetString(PyExc_IOError,
572 "file is not ready for reading");
573 goto cleanup;
574 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000575
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000576 if ((list = PyList_New(0)) == NULL)
577 goto cleanup;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000578
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000579 for (;;) {
580 Py_BEGIN_ALLOW_THREADS
581 nread = BZ2_bzRead(&bzerror, self->fp,
582 buffer+nfilled, buffersize-nfilled);
583 self->pos += nread;
584 Py_END_ALLOW_THREADS
585 if (bzerror == BZ_STREAM_END) {
586 self->size = self->pos;
587 self->mode = MODE_READ_EOF;
588 if (nread == 0) {
589 sizehint = 0;
590 break;
591 }
592 shortread = 1;
593 } else if (bzerror != BZ_OK) {
594 Util_CatchBZ2Error(bzerror);
595 error:
596 Py_DECREF(list);
597 list = NULL;
598 goto cleanup;
599 }
600 totalread += nread;
601 p = memchr(buffer+nfilled, '\n', nread);
602 if (!shortread && p == NULL) {
603 /* Need a larger buffer to fit this line */
604 nfilled += nread;
605 buffersize *= 2;
606 if (buffersize > INT_MAX) {
607 PyErr_SetString(PyExc_OverflowError,
608 "line is longer than a Python string can hold");
609 goto error;
610 }
611 if (big_buffer == NULL) {
612 /* Create the big buffer */
613 big_buffer = PyBytes_FromStringAndSize(
614 NULL, buffersize);
615 if (big_buffer == NULL)
616 goto error;
617 buffer = PyBytes_AS_STRING(big_buffer);
618 memcpy(buffer, small_buffer, nfilled);
619 }
620 else {
621 /* Grow the big buffer */
622 if (_PyBytes_Resize(&big_buffer, buffersize) < 0){
623 big_buffer = NULL;
624 goto error;
625 }
626 buffer = PyBytes_AS_STRING(big_buffer);
627 }
628 continue;
629 }
630 end = buffer+nfilled+nread;
631 q = buffer;
632 while (p != NULL) {
633 /* Process complete lines */
634 p++;
635 line = PyBytes_FromStringAndSize(q, p-q);
636 if (line == NULL)
637 goto error;
638 err = PyList_Append(list, line);
639 Py_DECREF(line);
640 if (err != 0)
641 goto error;
642 q = p;
643 p = memchr(q, '\n', end-q);
644 }
645 /* Move the remaining incomplete line to the start */
646 nfilled = end-q;
647 memmove(buffer, q, nfilled);
648 if (sizehint > 0)
649 if (totalread >= (size_t)sizehint)
650 break;
651 if (shortread) {
652 sizehint = 0;
653 break;
654 }
655 }
656 if (nfilled != 0) {
657 /* Partial last line */
658 line = PyBytes_FromStringAndSize(buffer, nfilled);
659 if (line == NULL)
660 goto error;
661 if (sizehint > 0) {
662 /* Need to complete the last line */
663 PyObject *rest = Util_GetLine(self, 0);
664 if (rest == NULL) {
665 Py_DECREF(line);
666 goto error;
667 }
668 PyBytes_Concat(&line, rest);
669 Py_DECREF(rest);
670 if (line == NULL)
671 goto error;
672 }
673 err = PyList_Append(list, line);
674 Py_DECREF(line);
675 if (err != 0)
676 goto error;
677 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000678
679 cleanup:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000680 RELEASE_LOCK(self);
681 if (big_buffer) {
682 Py_DECREF(big_buffer);
683 }
684 return list;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000685}
686
687PyDoc_STRVAR(BZ2File_write__doc__,
688"write(data) -> None\n\
689\n\
690Write the 'data' string to file. Note that due to buffering, close() may\n\
691be needed before the file on disk reflects the data written.\n\
692");
693
694/* This is a hacked version of Python's fileobject.c:file_write(). */
695static PyObject *
696BZ2File_write(BZ2FileObject *self, PyObject *args)
697{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000698 PyObject *ret = NULL;
699 Py_buffer pbuf;
700 char *buf;
701 int len;
702 int bzerror;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000703
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000704 if (!PyArg_ParseTuple(args, "y*:write", &pbuf))
705 return NULL;
706 buf = pbuf.buf;
707 len = pbuf.len;
Tim Peterse3228092002-11-09 04:21:44 +0000708
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000709 ACQUIRE_LOCK(self);
710 switch (self->mode) {
711 case MODE_WRITE:
712 break;
Tim Peterse3228092002-11-09 04:21:44 +0000713
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000714 case MODE_CLOSED:
715 PyErr_SetString(PyExc_ValueError,
716 "I/O operation on closed file");
717 goto cleanup;
Tim Peterse3228092002-11-09 04:21:44 +0000718
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000719 default:
720 PyErr_SetString(PyExc_IOError,
721 "file is not ready for writing");
722 goto cleanup;
723 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000724
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000725 Py_BEGIN_ALLOW_THREADS
726 BZ2_bzWrite (&bzerror, self->fp, buf, len);
727 self->pos += len;
728 Py_END_ALLOW_THREADS
Tim Peterse3228092002-11-09 04:21:44 +0000729
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000730 if (bzerror != BZ_OK) {
731 Util_CatchBZ2Error(bzerror);
732 goto cleanup;
733 }
Tim Peterse3228092002-11-09 04:21:44 +0000734
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000735 Py_INCREF(Py_None);
736 ret = Py_None;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000737
738cleanup:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000739 PyBuffer_Release(&pbuf);
740 RELEASE_LOCK(self);
741 return ret;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000742}
743
744PyDoc_STRVAR(BZ2File_writelines__doc__,
745"writelines(sequence_of_strings) -> None\n\
746\n\
747Write the sequence of strings to the file. Note that newlines are not\n\
748added. The sequence can be any iterable object producing strings. This is\n\
749equivalent to calling write() for each string.\n\
750");
751
752/* This is a hacked version of Python's fileobject.c:file_writelines(). */
753static PyObject *
754BZ2File_writelines(BZ2FileObject *self, PyObject *seq)
755{
756#define CHUNKSIZE 1000
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000757 PyObject *list = NULL;
758 PyObject *iter = NULL;
759 PyObject *ret = NULL;
760 PyObject *line;
761 int i, j, index, len, islist;
762 int bzerror;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000763
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000764 ACQUIRE_LOCK(self);
765 switch (self->mode) {
766 case MODE_WRITE:
767 break;
Thomas Wouters00ee7ba2006-08-21 19:07:27 +0000768
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000769 case MODE_CLOSED:
770 PyErr_SetString(PyExc_ValueError,
771 "I/O operation on closed file");
772 goto error;
Thomas Wouters00ee7ba2006-08-21 19:07:27 +0000773
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000774 default:
775 PyErr_SetString(PyExc_IOError,
776 "file is not ready for writing");
777 goto error;
778 }
Thomas Wouters00ee7ba2006-08-21 19:07:27 +0000779
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000780 islist = PyList_Check(seq);
781 if (!islist) {
782 iter = PyObject_GetIter(seq);
783 if (iter == NULL) {
784 PyErr_SetString(PyExc_TypeError,
785 "writelines() requires an iterable argument");
786 goto error;
787 }
788 list = PyList_New(CHUNKSIZE);
789 if (list == NULL)
790 goto error;
791 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000792
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000793 /* Strategy: slurp CHUNKSIZE lines into a private list,
794 checking that they are all strings, then write that list
795 without holding the interpreter lock, then come back for more. */
796 for (index = 0; ; index += CHUNKSIZE) {
797 if (islist) {
798 Py_XDECREF(list);
799 list = PyList_GetSlice(seq, index, index+CHUNKSIZE);
800 if (list == NULL)
801 goto error;
802 j = PyList_GET_SIZE(list);
803 }
804 else {
805 for (j = 0; j < CHUNKSIZE; j++) {
806 line = PyIter_Next(iter);
807 if (line == NULL) {
808 if (PyErr_Occurred())
809 goto error;
810 break;
811 }
812 PyList_SetItem(list, j, line);
813 }
814 }
815 if (j == 0)
816 break;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000817
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000818 /* Check that all entries are indeed byte strings. If not,
819 apply the same rules as for file.write() and
820 convert the rets to strings. This is slow, but
821 seems to be the only way since all conversion APIs
822 could potentially execute Python code. */
823 for (i = 0; i < j; i++) {
824 PyObject *v = PyList_GET_ITEM(list, i);
825 if (!PyBytes_Check(v)) {
826 const char *buffer;
827 Py_ssize_t len;
828 if (PyObject_AsCharBuffer(v, &buffer, &len)) {
829 PyErr_SetString(PyExc_TypeError,
830 "writelines() "
831 "argument must be "
832 "a sequence of "
833 "bytes objects");
834 goto error;
835 }
836 line = PyBytes_FromStringAndSize(buffer,
837 len);
838 if (line == NULL)
839 goto error;
840 Py_DECREF(v);
841 PyList_SET_ITEM(list, i, line);
842 }
843 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000844
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000845 /* Since we are releasing the global lock, the
846 following code may *not* execute Python code. */
847 Py_BEGIN_ALLOW_THREADS
848 for (i = 0; i < j; i++) {
849 line = PyList_GET_ITEM(list, i);
850 len = PyBytes_GET_SIZE(line);
851 BZ2_bzWrite (&bzerror, self->fp,
852 PyBytes_AS_STRING(line), len);
853 if (bzerror != BZ_OK) {
854 Py_BLOCK_THREADS
855 Util_CatchBZ2Error(bzerror);
856 goto error;
857 }
858 }
859 Py_END_ALLOW_THREADS
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000860
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000861 if (j < CHUNKSIZE)
862 break;
863 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000864
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000865 Py_INCREF(Py_None);
866 ret = Py_None;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000867
868 error:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000869 RELEASE_LOCK(self);
870 Py_XDECREF(list);
871 Py_XDECREF(iter);
872 return ret;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000873#undef CHUNKSIZE
874}
875
876PyDoc_STRVAR(BZ2File_seek__doc__,
877"seek(offset [, whence]) -> None\n\
878\n\
879Move to new file position. Argument offset is a byte count. Optional\n\
880argument whence defaults to 0 (offset from start of file, offset\n\
881should be >= 0); other values are 1 (move relative to current position,\n\
882positive or negative), and 2 (move relative to end of file, usually\n\
883negative, although many platforms allow seeking beyond the end of a file).\n\
884\n\
885Note that seeking of bz2 files is emulated, and depending on the parameters\n\
886the operation may be extremely slow.\n\
887");
888
889static PyObject *
890BZ2File_seek(BZ2FileObject *self, PyObject *args)
891{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000892 int where = 0;
893 PyObject *offobj;
894 Py_off_t offset;
895 char small_buffer[SMALLCHUNK];
896 char *buffer = small_buffer;
897 size_t buffersize = SMALLCHUNK;
898 Py_off_t bytesread = 0;
899 size_t readsize;
900 int chunksize;
901 int bzerror;
902 PyObject *ret = NULL;
Tim Peterse3228092002-11-09 04:21:44 +0000903
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000904 if (!PyArg_ParseTuple(args, "O|i:seek", &offobj, &where))
905 return NULL;
Georg Brandl33a5f2a2005-08-21 14:16:04 +0000906#if !defined(HAVE_LARGEFILE_SUPPORT)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000907 offset = PyLong_AsLong(offobj);
Georg Brandl33a5f2a2005-08-21 14:16:04 +0000908#else
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000909 offset = PyLong_Check(offobj) ?
910 PyLong_AsLongLong(offobj) : PyLong_AsLong(offobj);
Georg Brandl33a5f2a2005-08-21 14:16:04 +0000911#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000912 if (PyErr_Occurred())
913 return NULL;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000914
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000915 ACQUIRE_LOCK(self);
916 Util_DropReadAhead(self);
917 switch (self->mode) {
918 case MODE_READ:
919 case MODE_READ_EOF:
920 break;
Tim Peterse3228092002-11-09 04:21:44 +0000921
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000922 case MODE_CLOSED:
923 PyErr_SetString(PyExc_ValueError,
924 "I/O operation on closed file");
925 goto cleanup;
Tim Peterse3228092002-11-09 04:21:44 +0000926
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000927 default:
928 PyErr_SetString(PyExc_IOError,
929 "seek works only while reading");
930 goto cleanup;
931 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000932
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000933 if (where == 2) {
934 if (self->size == -1) {
935 assert(self->mode != MODE_READ_EOF);
936 for (;;) {
937 Py_BEGIN_ALLOW_THREADS
938 chunksize = BZ2_bzRead(&bzerror, self->fp,
939 buffer, buffersize);
940 self->pos += chunksize;
941 Py_END_ALLOW_THREADS
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000942
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000943 bytesread += chunksize;
944 if (bzerror == BZ_STREAM_END) {
945 break;
946 } else if (bzerror != BZ_OK) {
947 Util_CatchBZ2Error(bzerror);
948 goto cleanup;
949 }
950 }
951 self->mode = MODE_READ_EOF;
952 self->size = self->pos;
953 bytesread = 0;
954 }
955 offset = self->size + offset;
956 } else if (where == 1) {
957 offset = self->pos + offset;
958 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000959
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000960 /* Before getting here, offset must be the absolute position the file
961 * pointer should be set to. */
Georg Brandl47fab922006-02-18 21:57:25 +0000962
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000963 if (offset >= self->pos) {
964 /* we can move forward */
965 offset -= self->pos;
966 } else {
967 /* we cannot move back, so rewind the stream */
968 BZ2_bzReadClose(&bzerror, self->fp);
969 if (bzerror != BZ_OK) {
970 Util_CatchBZ2Error(bzerror);
971 goto cleanup;
972 }
973 rewind(self->rawfp);
974 self->pos = 0;
975 self->fp = BZ2_bzReadOpen(&bzerror, self->rawfp,
976 0, 0, NULL, 0);
977 if (bzerror != BZ_OK) {
978 Util_CatchBZ2Error(bzerror);
979 goto cleanup;
980 }
981 self->mode = MODE_READ;
982 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000983
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000984 if (offset <= 0 || self->mode == MODE_READ_EOF)
985 goto exit;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000986
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000987 /* Before getting here, offset must be set to the number of bytes
988 * to walk forward. */
989 for (;;) {
990 if (offset-bytesread > buffersize)
991 readsize = buffersize;
992 else
993 /* offset might be wider that readsize, but the result
994 * of the subtraction is bound by buffersize (see the
995 * condition above). buffersize is 8192. */
996 readsize = (size_t)(offset-bytesread);
997 Py_BEGIN_ALLOW_THREADS
998 chunksize = BZ2_bzRead(&bzerror, self->fp, buffer, readsize);
999 self->pos += chunksize;
1000 Py_END_ALLOW_THREADS
1001 bytesread += chunksize;
1002 if (bzerror == BZ_STREAM_END) {
1003 self->size = self->pos;
1004 self->mode = MODE_READ_EOF;
1005 break;
1006 } else if (bzerror != BZ_OK) {
1007 Util_CatchBZ2Error(bzerror);
1008 goto cleanup;
1009 }
1010 if (bytesread == offset)
1011 break;
1012 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001013
1014exit:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001015 Py_INCREF(Py_None);
1016 ret = Py_None;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001017
1018cleanup:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001019 RELEASE_LOCK(self);
1020 return ret;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001021}
1022
1023PyDoc_STRVAR(BZ2File_tell__doc__,
1024"tell() -> int\n\
1025\n\
1026Return the current file position, an integer (may be a long integer).\n\
1027");
1028
1029static PyObject *
1030BZ2File_tell(BZ2FileObject *self, PyObject *args)
1031{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001032 PyObject *ret = NULL;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001033
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001034 if (self->mode == MODE_CLOSED) {
1035 PyErr_SetString(PyExc_ValueError,
1036 "I/O operation on closed file");
1037 goto cleanup;
1038 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001039
Georg Brandla8bcecc2005-09-03 07:49:53 +00001040#if !defined(HAVE_LARGEFILE_SUPPORT)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001041 ret = PyLong_FromLong(self->pos);
Georg Brandla8bcecc2005-09-03 07:49:53 +00001042#else
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001043 ret = PyLong_FromLongLong(self->pos);
Georg Brandla8bcecc2005-09-03 07:49:53 +00001044#endif
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001045
1046cleanup:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001047 return ret;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001048}
1049
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001050PyDoc_STRVAR(BZ2File_close__doc__,
1051"close() -> None or (perhaps) an integer\n\
1052\n\
1053Close the file. Sets data attribute .closed to true. A closed file\n\
1054cannot be used for further I/O operations. close() may be called more\n\
1055than once without error.\n\
1056");
1057
1058static PyObject *
1059BZ2File_close(BZ2FileObject *self)
1060{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001061 PyObject *ret = NULL;
1062 int bzerror = BZ_OK;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001063
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001064 if (self->mode == MODE_CLOSED) {
1065 Py_RETURN_NONE;
1066 }
Guido van Rossumf09ca142007-06-13 00:03:05 +00001067
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001068 ACQUIRE_LOCK(self);
1069 switch (self->mode) {
1070 case MODE_READ:
1071 case MODE_READ_EOF:
1072 BZ2_bzReadClose(&bzerror, self->fp);
1073 break;
1074 case MODE_WRITE:
1075 BZ2_bzWriteClose(&bzerror, self->fp,
1076 0, NULL, NULL);
1077 break;
1078 }
1079 self->mode = MODE_CLOSED;
1080 fclose(self->rawfp);
1081 self->rawfp = NULL;
1082 if (bzerror == BZ_OK) {
1083 Py_INCREF(Py_None);
1084 ret = Py_None;
1085 }
1086 else {
1087 Util_CatchBZ2Error(bzerror);
1088 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001089
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001090 RELEASE_LOCK(self);
1091 return ret;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001092}
1093
Antoine Pitrou308705e2009-01-10 16:22:51 +00001094PyDoc_STRVAR(BZ2File_enter_doc,
1095"__enter__() -> self.");
1096
1097static PyObject *
1098BZ2File_enter(BZ2FileObject *self)
1099{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001100 if (self->mode == MODE_CLOSED) {
1101 PyErr_SetString(PyExc_ValueError,
1102 "I/O operation on closed file");
1103 return NULL;
1104 }
1105 Py_INCREF(self);
1106 return (PyObject *) self;
Antoine Pitrou308705e2009-01-10 16:22:51 +00001107}
1108
1109PyDoc_STRVAR(BZ2File_exit_doc,
1110"__exit__(*excinfo) -> None. Closes the file.");
1111
1112static PyObject *
1113BZ2File_exit(BZ2FileObject *self, PyObject *args)
1114{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001115 PyObject *ret = PyObject_CallMethod((PyObject *) self, "close", NULL);
1116 if (!ret)
1117 /* If error occurred, pass through */
1118 return NULL;
1119 Py_DECREF(ret);
1120 Py_RETURN_NONE;
Antoine Pitrou308705e2009-01-10 16:22:51 +00001121}
1122
1123
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001124static PyObject *BZ2File_getiter(BZ2FileObject *self);
1125
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001126static PyMethodDef BZ2File_methods[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001127 {"read", (PyCFunction)BZ2File_read, METH_VARARGS, BZ2File_read__doc__},
1128 {"readline", (PyCFunction)BZ2File_readline, METH_VARARGS, BZ2File_readline__doc__},
1129 {"readlines", (PyCFunction)BZ2File_readlines, METH_VARARGS, BZ2File_readlines__doc__},
1130 {"write", (PyCFunction)BZ2File_write, METH_VARARGS, BZ2File_write__doc__},
1131 {"writelines", (PyCFunction)BZ2File_writelines, METH_O, BZ2File_writelines__doc__},
1132 {"seek", (PyCFunction)BZ2File_seek, METH_VARARGS, BZ2File_seek__doc__},
1133 {"tell", (PyCFunction)BZ2File_tell, METH_NOARGS, BZ2File_tell__doc__},
1134 {"close", (PyCFunction)BZ2File_close, METH_NOARGS, BZ2File_close__doc__},
1135 {"__enter__", (PyCFunction)BZ2File_enter, METH_NOARGS, BZ2File_enter_doc},
1136 {"__exit__", (PyCFunction)BZ2File_exit, METH_VARARGS, BZ2File_exit_doc},
1137 {NULL, NULL} /* sentinel */
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001138};
1139
1140
1141/* ===================================================================== */
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001142/* Getters and setters of BZ2File. */
1143
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001144static PyObject *
1145BZ2File_get_closed(BZ2FileObject *self, void *closure)
1146{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001147 return PyLong_FromLong(self->mode == MODE_CLOSED);
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001148}
1149
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001150static PyGetSetDef BZ2File_getset[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001151 {"closed", (getter)BZ2File_get_closed, NULL,
1152 "True if the file is closed"},
1153 {NULL} /* Sentinel */
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001154};
1155
1156
1157/* ===================================================================== */
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001158/* Slot definitions for BZ2File_Type. */
1159
1160static int
1161BZ2File_init(BZ2FileObject *self, PyObject *args, PyObject *kwargs)
1162{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001163 static char *kwlist[] = {"filename", "mode", "buffering",
1164 "compresslevel", 0};
1165 PyObject *name_obj = NULL;
1166 char *name;
1167 char *mode = "r";
1168 int buffering = -1;
1169 int compresslevel = 9;
1170 int bzerror;
1171 int mode_char = 0;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001172
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001173 self->size = -1;
Tim Peterse3228092002-11-09 04:21:44 +00001174
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001175 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O&|sii:BZ2File",
1176 kwlist, PyUnicode_FSConverter, &name_obj,
1177 &mode, &buffering,
1178 &compresslevel))
1179 return -1;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001180
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001181 name = PyBytes_AsString(name_obj);
1182 if (compresslevel < 1 || compresslevel > 9) {
1183 PyErr_SetString(PyExc_ValueError,
1184 "compresslevel must be between 1 and 9");
1185 Py_DECREF(name_obj);
1186 return -1;
1187 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001188
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001189 for (;;) {
1190 int error = 0;
1191 switch (*mode) {
1192 case 'r':
1193 case 'w':
1194 if (mode_char)
1195 error = 1;
1196 mode_char = *mode;
1197 break;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001198
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001199 case 'b':
1200 break;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001201
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001202 default:
1203 error = 1;
1204 break;
1205 }
1206 if (error) {
1207 PyErr_Format(PyExc_ValueError,
1208 "invalid mode char %c", *mode);
1209 Py_DECREF(name_obj);
1210 return -1;
1211 }
1212 mode++;
1213 if (*mode == '\0')
1214 break;
1215 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001216
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001217 if (mode_char == 0) {
1218 mode_char = 'r';
1219 }
Georg Brandl6b95f1d2005-06-03 19:47:00 +00001220
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001221 mode = (mode_char == 'r') ? "rb" : "wb";
Tim Peterse3228092002-11-09 04:21:44 +00001222
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001223 self->rawfp = fopen(name, mode);
1224 Py_DECREF(name_obj);
1225 if (self->rawfp == NULL) {
1226 PyErr_SetFromErrno(PyExc_IOError);
1227 return -1;
1228 }
1229 /* XXX Ignore buffering */
Gustavo Niemeyer49ea7be2002-11-08 14:31:49 +00001230
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001231 /* From now on, we have stuff to dealloc, so jump to error label
1232 * instead of returning */
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001233
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001234#ifdef WITH_THREAD
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001235 self->lock = PyThread_allocate_lock();
1236 if (!self->lock) {
1237 PyErr_SetString(PyExc_MemoryError, "unable to allocate lock");
1238 goto error;
1239 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001240#endif
1241
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001242 if (mode_char == 'r')
1243 self->fp = BZ2_bzReadOpen(&bzerror, self->rawfp,
1244 0, 0, NULL, 0);
1245 else
1246 self->fp = BZ2_bzWriteOpen(&bzerror, self->rawfp,
1247 compresslevel, 0, 0);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001248
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001249 if (bzerror != BZ_OK) {
1250 Util_CatchBZ2Error(bzerror);
1251 goto error;
1252 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001253
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001254 self->mode = (mode_char == 'r') ? MODE_READ : MODE_WRITE;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001255
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001256 return 0;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001257
1258error:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001259 fclose(self->rawfp);
1260 self->rawfp = NULL;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001261#ifdef WITH_THREAD
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001262 if (self->lock) {
1263 PyThread_free_lock(self->lock);
1264 self->lock = NULL;
1265 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001266#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001267 return -1;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001268}
1269
1270static void
1271BZ2File_dealloc(BZ2FileObject *self)
1272{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001273 int bzerror;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001274#ifdef WITH_THREAD
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001275 if (self->lock)
1276 PyThread_free_lock(self->lock);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001277#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001278 switch (self->mode) {
1279 case MODE_READ:
1280 case MODE_READ_EOF:
1281 BZ2_bzReadClose(&bzerror, self->fp);
1282 break;
1283 case MODE_WRITE:
1284 BZ2_bzWriteClose(&bzerror, self->fp,
1285 0, NULL, NULL);
1286 break;
1287 }
1288 Util_DropReadAhead(self);
1289 if (self->rawfp != NULL)
1290 fclose(self->rawfp);
1291 Py_TYPE(self)->tp_free((PyObject *)self);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001292}
1293
1294/* This is a hacked version of Python's fileobject.c:file_getiter(). */
1295static PyObject *
1296BZ2File_getiter(BZ2FileObject *self)
1297{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001298 if (self->mode == MODE_CLOSED) {
1299 PyErr_SetString(PyExc_ValueError,
1300 "I/O operation on closed file");
1301 return NULL;
1302 }
1303 Py_INCREF((PyObject*)self);
1304 return (PyObject *)self;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001305}
1306
1307/* This is a hacked version of Python's fileobject.c:file_iternext(). */
1308#define READAHEAD_BUFSIZE 8192
1309static PyObject *
1310BZ2File_iternext(BZ2FileObject *self)
1311{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001312 PyBytesObject* ret;
1313 ACQUIRE_LOCK(self);
1314 if (self->mode == MODE_CLOSED) {
1315 RELEASE_LOCK(self);
1316 PyErr_SetString(PyExc_ValueError,
1317 "I/O operation on closed file");
1318 return NULL;
1319 }
1320 ret = Util_ReadAheadGetLineSkip(self, 0, READAHEAD_BUFSIZE);
1321 RELEASE_LOCK(self);
1322 if (ret == NULL || PyBytes_GET_SIZE(ret) == 0) {
1323 Py_XDECREF(ret);
1324 return NULL;
1325 }
1326 return (PyObject *)ret;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001327}
1328
1329/* ===================================================================== */
1330/* BZ2File_Type definition. */
1331
1332PyDoc_VAR(BZ2File__doc__) =
1333PyDoc_STR(
1334"BZ2File(name [, mode='r', buffering=0, compresslevel=9]) -> file object\n\
1335\n\
1336Open a bz2 file. The mode can be 'r' or 'w', for reading (default) or\n\
1337writing. When opened for writing, the file will be created if it doesn't\n\
1338exist, and truncated otherwise. If the buffering argument is given, 0 means\n\
1339unbuffered, and larger numbers specify the buffer size. If compresslevel\n\
1340is given, must be a number between 1 and 9.\n\
Guido van Rossum88e860c2007-06-13 01:46:31 +00001341Data read is always returned in bytes; data written ought to be bytes.\n\
1342");
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001343
Gustavo Niemeyer49ea7be2002-11-08 14:31:49 +00001344static PyTypeObject BZ2File_Type = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001345 PyVarObject_HEAD_INIT(NULL, 0)
1346 "bz2.BZ2File", /*tp_name*/
1347 sizeof(BZ2FileObject), /*tp_basicsize*/
1348 0, /*tp_itemsize*/
1349 (destructor)BZ2File_dealloc, /*tp_dealloc*/
1350 0, /*tp_print*/
1351 0, /*tp_getattr*/
1352 0, /*tp_setattr*/
1353 0, /*tp_reserved*/
1354 0, /*tp_repr*/
1355 0, /*tp_as_number*/
1356 0, /*tp_as_sequence*/
1357 0, /*tp_as_mapping*/
1358 0, /*tp_hash*/
1359 0, /*tp_call*/
1360 0, /*tp_str*/
1361 PyObject_GenericGetAttr,/*tp_getattro*/
1362 PyObject_GenericSetAttr,/*tp_setattro*/
1363 0, /*tp_as_buffer*/
1364 Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE, /*tp_flags*/
1365 BZ2File__doc__, /*tp_doc*/
1366 0, /*tp_traverse*/
1367 0, /*tp_clear*/
1368 0, /*tp_richcompare*/
1369 0, /*tp_weaklistoffset*/
1370 (getiterfunc)BZ2File_getiter, /*tp_iter*/
1371 (iternextfunc)BZ2File_iternext, /*tp_iternext*/
1372 BZ2File_methods, /*tp_methods*/
1373 0, /*tp_members*/
1374 BZ2File_getset, /*tp_getset*/
1375 0, /*tp_base*/
1376 0, /*tp_dict*/
1377 0, /*tp_descr_get*/
1378 0, /*tp_descr_set*/
1379 0, /*tp_dictoffset*/
1380 (initproc)BZ2File_init, /*tp_init*/
1381 PyType_GenericAlloc, /*tp_alloc*/
1382 PyType_GenericNew, /*tp_new*/
1383 PyObject_Free, /*tp_free*/
1384 0, /*tp_is_gc*/
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001385};
1386
1387
1388/* ===================================================================== */
1389/* Methods of BZ2Comp. */
1390
1391PyDoc_STRVAR(BZ2Comp_compress__doc__,
1392"compress(data) -> string\n\
1393\n\
1394Provide more data to the compressor object. It will return chunks of\n\
1395compressed data whenever possible. When you've finished providing data\n\
1396to compress, call the flush() method to finish the compression process,\n\
1397and return what is left in the internal buffers.\n\
1398");
1399
1400static PyObject *
1401BZ2Comp_compress(BZ2CompObject *self, PyObject *args)
1402{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001403 Py_buffer pdata;
1404 char *data;
1405 int datasize;
1406 int bufsize = SMALLCHUNK;
1407 PY_LONG_LONG totalout;
1408 PyObject *ret = NULL;
1409 bz_stream *bzs = &self->bzs;
1410 int bzerror;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001411
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001412 if (!PyArg_ParseTuple(args, "y*:compress", &pdata))
1413 return NULL;
1414 data = pdata.buf;
1415 datasize = pdata.len;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001416
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001417 if (datasize == 0) {
1418 PyBuffer_Release(&pdata);
1419 return PyBytes_FromStringAndSize("", 0);
1420 }
Gustavo Niemeyera6e436e2004-02-14 00:02:45 +00001421
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001422 ACQUIRE_LOCK(self);
1423 if (!self->running) {
1424 PyErr_SetString(PyExc_ValueError,
1425 "this object was already flushed");
1426 goto error;
1427 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001428
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001429 ret = PyBytes_FromStringAndSize(NULL, bufsize);
1430 if (!ret)
1431 goto error;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001432
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001433 bzs->next_in = data;
1434 bzs->avail_in = datasize;
1435 bzs->next_out = BUF(ret);
1436 bzs->avail_out = bufsize;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001437
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001438 totalout = BZS_TOTAL_OUT(bzs);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001439
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001440 for (;;) {
1441 Py_BEGIN_ALLOW_THREADS
1442 bzerror = BZ2_bzCompress(bzs, BZ_RUN);
1443 Py_END_ALLOW_THREADS
1444 if (bzerror != BZ_RUN_OK) {
1445 Util_CatchBZ2Error(bzerror);
1446 goto error;
1447 }
1448 if (bzs->avail_in == 0)
1449 break; /* no more input data */
1450 if (bzs->avail_out == 0) {
1451 bufsize = Util_NewBufferSize(bufsize);
1452 if (_PyBytes_Resize(&ret, bufsize) < 0) {
1453 BZ2_bzCompressEnd(bzs);
1454 goto error;
1455 }
1456 bzs->next_out = BUF(ret) + (BZS_TOTAL_OUT(bzs)
1457 - totalout);
1458 bzs->avail_out = bufsize - (bzs->next_out - BUF(ret));
1459 }
1460 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001461
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001462 if (_PyBytes_Resize(&ret,
1463 (Py_ssize_t)(BZS_TOTAL_OUT(bzs) - totalout)) < 0)
1464 goto error;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001465
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001466 RELEASE_LOCK(self);
1467 PyBuffer_Release(&pdata);
1468 return ret;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001469
1470error:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001471 RELEASE_LOCK(self);
1472 PyBuffer_Release(&pdata);
1473 Py_XDECREF(ret);
1474 return NULL;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001475}
1476
1477PyDoc_STRVAR(BZ2Comp_flush__doc__,
1478"flush() -> string\n\
1479\n\
1480Finish the compression process and return what is left in internal buffers.\n\
1481You must not use the compressor object after calling this method.\n\
1482");
1483
1484static PyObject *
1485BZ2Comp_flush(BZ2CompObject *self)
1486{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001487 int bufsize = SMALLCHUNK;
1488 PyObject *ret = NULL;
1489 bz_stream *bzs = &self->bzs;
1490 PY_LONG_LONG totalout;
1491 int bzerror;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001492
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001493 ACQUIRE_LOCK(self);
1494 if (!self->running) {
1495 PyErr_SetString(PyExc_ValueError, "object was already "
1496 "flushed");
1497 goto error;
1498 }
1499 self->running = 0;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001500
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001501 ret = PyBytes_FromStringAndSize(NULL, bufsize);
1502 if (!ret)
1503 goto error;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001504
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001505 bzs->next_out = BUF(ret);
1506 bzs->avail_out = bufsize;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001507
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001508 totalout = BZS_TOTAL_OUT(bzs);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001509
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001510 for (;;) {
1511 Py_BEGIN_ALLOW_THREADS
1512 bzerror = BZ2_bzCompress(bzs, BZ_FINISH);
1513 Py_END_ALLOW_THREADS
1514 if (bzerror == BZ_STREAM_END) {
1515 break;
1516 } else if (bzerror != BZ_FINISH_OK) {
1517 Util_CatchBZ2Error(bzerror);
1518 goto error;
1519 }
1520 if (bzs->avail_out == 0) {
1521 bufsize = Util_NewBufferSize(bufsize);
1522 if (_PyBytes_Resize(&ret, bufsize) < 0)
1523 goto error;
1524 bzs->next_out = BUF(ret);
1525 bzs->next_out = BUF(ret) + (BZS_TOTAL_OUT(bzs)
1526 - totalout);
1527 bzs->avail_out = bufsize - (bzs->next_out - BUF(ret));
1528 }
1529 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001530
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001531 if (bzs->avail_out != 0) {
1532 if (_PyBytes_Resize(&ret,
1533 (Py_ssize_t)(BZS_TOTAL_OUT(bzs) - totalout)) < 0)
1534 goto error;
1535 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001536
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001537 RELEASE_LOCK(self);
1538 return ret;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001539
1540error:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001541 RELEASE_LOCK(self);
1542 Py_XDECREF(ret);
1543 return NULL;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001544}
1545
1546static PyMethodDef BZ2Comp_methods[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001547 {"compress", (PyCFunction)BZ2Comp_compress, METH_VARARGS,
1548 BZ2Comp_compress__doc__},
1549 {"flush", (PyCFunction)BZ2Comp_flush, METH_NOARGS,
1550 BZ2Comp_flush__doc__},
1551 {NULL, NULL} /* sentinel */
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001552};
1553
1554
1555/* ===================================================================== */
1556/* Slot definitions for BZ2Comp_Type. */
1557
1558static int
1559BZ2Comp_init(BZ2CompObject *self, PyObject *args, PyObject *kwargs)
1560{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001561 int compresslevel = 9;
1562 int bzerror;
1563 static char *kwlist[] = {"compresslevel", 0};
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001564
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001565 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|i:BZ2Compressor",
1566 kwlist, &compresslevel))
1567 return -1;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001568
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001569 if (compresslevel < 1 || compresslevel > 9) {
1570 PyErr_SetString(PyExc_ValueError,
1571 "compresslevel must be between 1 and 9");
1572 goto error;
1573 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001574
1575#ifdef WITH_THREAD
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001576 self->lock = PyThread_allocate_lock();
1577 if (!self->lock) {
1578 PyErr_SetString(PyExc_MemoryError, "unable to allocate lock");
1579 goto error;
1580 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001581#endif
1582
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001583 memset(&self->bzs, 0, sizeof(bz_stream));
1584 bzerror = BZ2_bzCompressInit(&self->bzs, compresslevel, 0, 0);
1585 if (bzerror != BZ_OK) {
1586 Util_CatchBZ2Error(bzerror);
1587 goto error;
1588 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001589
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001590 self->running = 1;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001591
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001592 return 0;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001593error:
1594#ifdef WITH_THREAD
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001595 if (self->lock) {
1596 PyThread_free_lock(self->lock);
1597 self->lock = NULL;
1598 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001599#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001600 return -1;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001601}
1602
1603static void
1604BZ2Comp_dealloc(BZ2CompObject *self)
1605{
1606#ifdef WITH_THREAD
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001607 if (self->lock)
1608 PyThread_free_lock(self->lock);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001609#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001610 BZ2_bzCompressEnd(&self->bzs);
1611 Py_TYPE(self)->tp_free((PyObject *)self);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001612}
1613
1614
1615/* ===================================================================== */
1616/* BZ2Comp_Type definition. */
1617
1618PyDoc_STRVAR(BZ2Comp__doc__,
1619"BZ2Compressor([compresslevel=9]) -> compressor object\n\
1620\n\
1621Create a new compressor object. This object may be used to compress\n\
1622data sequentially. If you want to compress data in one shot, use the\n\
1623compress() function instead. The compresslevel parameter, if given,\n\
1624must be a number between 1 and 9.\n\
1625");
1626
Gustavo Niemeyer49ea7be2002-11-08 14:31:49 +00001627static PyTypeObject BZ2Comp_Type = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001628 PyVarObject_HEAD_INIT(NULL, 0)
1629 "bz2.BZ2Compressor", /*tp_name*/
1630 sizeof(BZ2CompObject), /*tp_basicsize*/
1631 0, /*tp_itemsize*/
1632 (destructor)BZ2Comp_dealloc, /*tp_dealloc*/
1633 0, /*tp_print*/
1634 0, /*tp_getattr*/
1635 0, /*tp_setattr*/
1636 0, /*tp_reserved*/
1637 0, /*tp_repr*/
1638 0, /*tp_as_number*/
1639 0, /*tp_as_sequence*/
1640 0, /*tp_as_mapping*/
1641 0, /*tp_hash*/
1642 0, /*tp_call*/
1643 0, /*tp_str*/
1644 PyObject_GenericGetAttr,/*tp_getattro*/
1645 PyObject_GenericSetAttr,/*tp_setattro*/
1646 0, /*tp_as_buffer*/
1647 Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE, /*tp_flags*/
1648 BZ2Comp__doc__, /*tp_doc*/
1649 0, /*tp_traverse*/
1650 0, /*tp_clear*/
1651 0, /*tp_richcompare*/
1652 0, /*tp_weaklistoffset*/
1653 0, /*tp_iter*/
1654 0, /*tp_iternext*/
1655 BZ2Comp_methods, /*tp_methods*/
1656 0, /*tp_members*/
1657 0, /*tp_getset*/
1658 0, /*tp_base*/
1659 0, /*tp_dict*/
1660 0, /*tp_descr_get*/
1661 0, /*tp_descr_set*/
1662 0, /*tp_dictoffset*/
1663 (initproc)BZ2Comp_init, /*tp_init*/
1664 PyType_GenericAlloc, /*tp_alloc*/
1665 PyType_GenericNew, /*tp_new*/
1666 PyObject_Free, /*tp_free*/
1667 0, /*tp_is_gc*/
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001668};
1669
1670
1671/* ===================================================================== */
1672/* Members of BZ2Decomp. */
1673
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001674#undef OFF
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001675#define OFF(x) offsetof(BZ2DecompObject, x)
1676
1677static PyMemberDef BZ2Decomp_members[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001678 {"unused_data", T_OBJECT, OFF(unused_data), READONLY},
1679 {NULL} /* Sentinel */
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001680};
1681
1682
1683/* ===================================================================== */
1684/* Methods of BZ2Decomp. */
1685
1686PyDoc_STRVAR(BZ2Decomp_decompress__doc__,
1687"decompress(data) -> string\n\
1688\n\
1689Provide more data to the decompressor object. It will return chunks\n\
1690of decompressed data whenever possible. If you try to decompress data\n\
1691after the end of stream is found, EOFError will be raised. If any data\n\
1692was found after the end of stream, it'll be ignored and saved in\n\
1693unused_data attribute.\n\
1694");
1695
1696static PyObject *
1697BZ2Decomp_decompress(BZ2DecompObject *self, PyObject *args)
1698{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001699 Py_buffer pdata;
1700 char *data;
1701 int datasize;
1702 int bufsize = SMALLCHUNK;
1703 PY_LONG_LONG totalout;
1704 PyObject *ret = NULL;
1705 bz_stream *bzs = &self->bzs;
1706 int bzerror;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001707
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001708 if (!PyArg_ParseTuple(args, "y*:decompress", &pdata))
1709 return NULL;
1710 data = pdata.buf;
1711 datasize = pdata.len;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001712
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001713 ACQUIRE_LOCK(self);
1714 if (!self->running) {
1715 PyErr_SetString(PyExc_EOFError, "end of stream was "
1716 "already found");
1717 goto error;
1718 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001719
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001720 ret = PyBytes_FromStringAndSize(NULL, bufsize);
1721 if (!ret)
1722 goto error;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001723
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001724 bzs->next_in = data;
1725 bzs->avail_in = datasize;
1726 bzs->next_out = BUF(ret);
1727 bzs->avail_out = bufsize;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001728
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001729 totalout = BZS_TOTAL_OUT(bzs);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001730
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001731 for (;;) {
1732 Py_BEGIN_ALLOW_THREADS
1733 bzerror = BZ2_bzDecompress(bzs);
1734 Py_END_ALLOW_THREADS
1735 if (bzerror == BZ_STREAM_END) {
1736 if (bzs->avail_in != 0) {
1737 Py_DECREF(self->unused_data);
1738 self->unused_data =
1739 PyBytes_FromStringAndSize(bzs->next_in,
1740 bzs->avail_in);
1741 }
1742 self->running = 0;
1743 break;
1744 }
1745 if (bzerror != BZ_OK) {
1746 Util_CatchBZ2Error(bzerror);
1747 goto error;
1748 }
1749 if (bzs->avail_in == 0)
1750 break; /* no more input data */
1751 if (bzs->avail_out == 0) {
1752 bufsize = Util_NewBufferSize(bufsize);
1753 if (_PyBytes_Resize(&ret, bufsize) < 0) {
1754 BZ2_bzDecompressEnd(bzs);
1755 goto error;
1756 }
1757 bzs->next_out = BUF(ret);
1758 bzs->next_out = BUF(ret) + (BZS_TOTAL_OUT(bzs)
1759 - totalout);
1760 bzs->avail_out = bufsize - (bzs->next_out - BUF(ret));
1761 }
1762 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001763
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001764 if (bzs->avail_out != 0) {
1765 if (_PyBytes_Resize(&ret,
1766 (Py_ssize_t)(BZS_TOTAL_OUT(bzs) - totalout)) < 0)
1767 goto error;
1768 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001769
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001770 RELEASE_LOCK(self);
1771 PyBuffer_Release(&pdata);
1772 return ret;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001773
1774error:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001775 RELEASE_LOCK(self);
1776 PyBuffer_Release(&pdata);
1777 Py_XDECREF(ret);
1778 return NULL;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001779}
1780
1781static PyMethodDef BZ2Decomp_methods[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001782 {"decompress", (PyCFunction)BZ2Decomp_decompress, METH_VARARGS, BZ2Decomp_decompress__doc__},
1783 {NULL, NULL} /* sentinel */
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001784};
1785
1786
1787/* ===================================================================== */
1788/* Slot definitions for BZ2Decomp_Type. */
1789
1790static int
1791BZ2Decomp_init(BZ2DecompObject *self, PyObject *args, PyObject *kwargs)
1792{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001793 int bzerror;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001794
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001795 if (!PyArg_ParseTuple(args, ":BZ2Decompressor"))
1796 return -1;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001797
1798#ifdef WITH_THREAD
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001799 self->lock = PyThread_allocate_lock();
1800 if (!self->lock) {
1801 PyErr_SetString(PyExc_MemoryError, "unable to allocate lock");
1802 goto error;
1803 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001804#endif
1805
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001806 self->unused_data = PyBytes_FromStringAndSize("", 0);
1807 if (!self->unused_data)
1808 goto error;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001809
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001810 memset(&self->bzs, 0, sizeof(bz_stream));
1811 bzerror = BZ2_bzDecompressInit(&self->bzs, 0, 0);
1812 if (bzerror != BZ_OK) {
1813 Util_CatchBZ2Error(bzerror);
1814 goto error;
1815 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001816
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001817 self->running = 1;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001818
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001819 return 0;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001820
1821error:
1822#ifdef WITH_THREAD
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001823 if (self->lock) {
1824 PyThread_free_lock(self->lock);
1825 self->lock = NULL;
1826 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001827#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001828 Py_CLEAR(self->unused_data);
1829 return -1;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001830}
1831
1832static void
1833BZ2Decomp_dealloc(BZ2DecompObject *self)
1834{
1835#ifdef WITH_THREAD
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001836 if (self->lock)
1837 PyThread_free_lock(self->lock);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001838#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001839 Py_XDECREF(self->unused_data);
1840 BZ2_bzDecompressEnd(&self->bzs);
1841 Py_TYPE(self)->tp_free((PyObject *)self);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001842}
1843
1844
1845/* ===================================================================== */
1846/* BZ2Decomp_Type definition. */
1847
1848PyDoc_STRVAR(BZ2Decomp__doc__,
1849"BZ2Decompressor() -> decompressor object\n\
1850\n\
1851Create a new decompressor object. This object may be used to decompress\n\
1852data sequentially. If you want to decompress data in one shot, use the\n\
1853decompress() function instead.\n\
1854");
1855
Gustavo Niemeyer49ea7be2002-11-08 14:31:49 +00001856static PyTypeObject BZ2Decomp_Type = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001857 PyVarObject_HEAD_INIT(NULL, 0)
1858 "bz2.BZ2Decompressor", /*tp_name*/
1859 sizeof(BZ2DecompObject), /*tp_basicsize*/
1860 0, /*tp_itemsize*/
1861 (destructor)BZ2Decomp_dealloc, /*tp_dealloc*/
1862 0, /*tp_print*/
1863 0, /*tp_getattr*/
1864 0, /*tp_setattr*/
1865 0, /*tp_reserved*/
1866 0, /*tp_repr*/
1867 0, /*tp_as_number*/
1868 0, /*tp_as_sequence*/
1869 0, /*tp_as_mapping*/
1870 0, /*tp_hash*/
1871 0, /*tp_call*/
1872 0, /*tp_str*/
1873 PyObject_GenericGetAttr,/*tp_getattro*/
1874 PyObject_GenericSetAttr,/*tp_setattro*/
1875 0, /*tp_as_buffer*/
1876 Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE, /*tp_flags*/
1877 BZ2Decomp__doc__, /*tp_doc*/
1878 0, /*tp_traverse*/
1879 0, /*tp_clear*/
1880 0, /*tp_richcompare*/
1881 0, /*tp_weaklistoffset*/
1882 0, /*tp_iter*/
1883 0, /*tp_iternext*/
1884 BZ2Decomp_methods, /*tp_methods*/
1885 BZ2Decomp_members, /*tp_members*/
1886 0, /*tp_getset*/
1887 0, /*tp_base*/
1888 0, /*tp_dict*/
1889 0, /*tp_descr_get*/
1890 0, /*tp_descr_set*/
1891 0, /*tp_dictoffset*/
1892 (initproc)BZ2Decomp_init, /*tp_init*/
1893 PyType_GenericAlloc, /*tp_alloc*/
1894 PyType_GenericNew, /*tp_new*/
1895 PyObject_Free, /*tp_free*/
1896 0, /*tp_is_gc*/
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001897};
1898
1899
1900/* ===================================================================== */
1901/* Module functions. */
1902
1903PyDoc_STRVAR(bz2_compress__doc__,
1904"compress(data [, compresslevel=9]) -> string\n\
1905\n\
1906Compress data in one shot. If you want to compress data sequentially,\n\
1907use an instance of BZ2Compressor instead. The compresslevel parameter, if\n\
1908given, must be a number between 1 and 9.\n\
1909");
1910
1911static PyObject *
1912bz2_compress(PyObject *self, PyObject *args, PyObject *kwargs)
1913{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001914 int compresslevel=9;
1915 Py_buffer pdata;
1916 char *data;
1917 int datasize;
1918 int bufsize;
1919 PyObject *ret = NULL;
1920 bz_stream _bzs;
1921 bz_stream *bzs = &_bzs;
1922 int bzerror;
1923 static char *kwlist[] = {"data", "compresslevel", 0};
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001924
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001925 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "y*|i",
1926 kwlist, &pdata,
1927 &compresslevel))
1928 return NULL;
1929 data = pdata.buf;
1930 datasize = pdata.len;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001931
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001932 if (compresslevel < 1 || compresslevel > 9) {
1933 PyErr_SetString(PyExc_ValueError,
1934 "compresslevel must be between 1 and 9");
1935 PyBuffer_Release(&pdata);
1936 return NULL;
1937 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001938
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001939 /* Conforming to bz2 manual, this is large enough to fit compressed
1940 * data in one shot. We will check it later anyway. */
1941 bufsize = datasize + (datasize/100+1) + 600;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001942
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001943 ret = PyBytes_FromStringAndSize(NULL, bufsize);
1944 if (!ret) {
1945 PyBuffer_Release(&pdata);
1946 return NULL;
1947 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001948
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001949 memset(bzs, 0, sizeof(bz_stream));
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001950
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001951 bzs->next_in = data;
1952 bzs->avail_in = datasize;
1953 bzs->next_out = BUF(ret);
1954 bzs->avail_out = bufsize;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001955
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001956 bzerror = BZ2_bzCompressInit(bzs, compresslevel, 0, 0);
1957 if (bzerror != BZ_OK) {
1958 Util_CatchBZ2Error(bzerror);
1959 PyBuffer_Release(&pdata);
1960 Py_DECREF(ret);
1961 return NULL;
1962 }
Tim Peterse3228092002-11-09 04:21:44 +00001963
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001964 for (;;) {
1965 Py_BEGIN_ALLOW_THREADS
1966 bzerror = BZ2_bzCompress(bzs, BZ_FINISH);
1967 Py_END_ALLOW_THREADS
1968 if (bzerror == BZ_STREAM_END) {
1969 break;
1970 } else if (bzerror != BZ_FINISH_OK) {
1971 BZ2_bzCompressEnd(bzs);
1972 Util_CatchBZ2Error(bzerror);
1973 PyBuffer_Release(&pdata);
1974 Py_DECREF(ret);
1975 return NULL;
1976 }
1977 if (bzs->avail_out == 0) {
1978 bufsize = Util_NewBufferSize(bufsize);
1979 if (_PyBytes_Resize(&ret, bufsize) < 0) {
1980 BZ2_bzCompressEnd(bzs);
1981 PyBuffer_Release(&pdata);
1982 return NULL;
1983 }
1984 bzs->next_out = BUF(ret) + BZS_TOTAL_OUT(bzs);
1985 bzs->avail_out = bufsize - (bzs->next_out - BUF(ret));
1986 }
1987 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001988
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001989 if (bzs->avail_out != 0) {
1990 if (_PyBytes_Resize(&ret, (Py_ssize_t)BZS_TOTAL_OUT(bzs)) < 0) {
1991 ret = NULL;
1992 }
1993 }
1994 BZ2_bzCompressEnd(bzs);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001995
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001996 PyBuffer_Release(&pdata);
1997 return ret;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001998}
1999
2000PyDoc_STRVAR(bz2_decompress__doc__,
2001"decompress(data) -> decompressed data\n\
2002\n\
2003Decompress data in one shot. If you want to decompress data sequentially,\n\
2004use an instance of BZ2Decompressor instead.\n\
2005");
2006
2007static PyObject *
2008bz2_decompress(PyObject *self, PyObject *args)
2009{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002010 Py_buffer pdata;
2011 char *data;
2012 int datasize;
2013 int bufsize = SMALLCHUNK;
2014 PyObject *ret;
2015 bz_stream _bzs;
2016 bz_stream *bzs = &_bzs;
2017 int bzerror;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002018
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002019 if (!PyArg_ParseTuple(args, "y*:decompress", &pdata))
2020 return NULL;
2021 data = pdata.buf;
2022 datasize = pdata.len;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002023
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002024 if (datasize == 0) {
2025 PyBuffer_Release(&pdata);
2026 return PyBytes_FromStringAndSize("", 0);
2027 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002028
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002029 ret = PyBytes_FromStringAndSize(NULL, bufsize);
2030 if (!ret) {
2031 PyBuffer_Release(&pdata);
2032 return NULL;
2033 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002034
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002035 memset(bzs, 0, sizeof(bz_stream));
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002036
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002037 bzs->next_in = data;
2038 bzs->avail_in = datasize;
2039 bzs->next_out = BUF(ret);
2040 bzs->avail_out = bufsize;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002041
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002042 bzerror = BZ2_bzDecompressInit(bzs, 0, 0);
2043 if (bzerror != BZ_OK) {
2044 Util_CatchBZ2Error(bzerror);
2045 Py_DECREF(ret);
2046 PyBuffer_Release(&pdata);
2047 return NULL;
2048 }
Tim Peterse3228092002-11-09 04:21:44 +00002049
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002050 for (;;) {
2051 Py_BEGIN_ALLOW_THREADS
2052 bzerror = BZ2_bzDecompress(bzs);
2053 Py_END_ALLOW_THREADS
2054 if (bzerror == BZ_STREAM_END) {
2055 break;
2056 } else if (bzerror != BZ_OK) {
2057 BZ2_bzDecompressEnd(bzs);
2058 Util_CatchBZ2Error(bzerror);
2059 PyBuffer_Release(&pdata);
2060 Py_DECREF(ret);
2061 return NULL;
2062 }
2063 if (bzs->avail_in == 0) {
2064 BZ2_bzDecompressEnd(bzs);
2065 PyErr_SetString(PyExc_ValueError,
2066 "couldn't find end of stream");
2067 PyBuffer_Release(&pdata);
2068 Py_DECREF(ret);
2069 return NULL;
2070 }
2071 if (bzs->avail_out == 0) {
2072 bufsize = Util_NewBufferSize(bufsize);
2073 if (_PyBytes_Resize(&ret, bufsize) < 0) {
2074 BZ2_bzDecompressEnd(bzs);
2075 PyBuffer_Release(&pdata);
2076 return NULL;
2077 }
2078 bzs->next_out = BUF(ret) + BZS_TOTAL_OUT(bzs);
2079 bzs->avail_out = bufsize - (bzs->next_out - BUF(ret));
2080 }
2081 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002082
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002083 if (bzs->avail_out != 0) {
2084 if (_PyBytes_Resize(&ret, (Py_ssize_t)BZS_TOTAL_OUT(bzs)) < 0) {
2085 ret = NULL;
2086 }
2087 }
2088 BZ2_bzDecompressEnd(bzs);
2089 PyBuffer_Release(&pdata);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002090
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002091 return ret;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002092}
2093
2094static PyMethodDef bz2_methods[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002095 {"compress", (PyCFunction) bz2_compress, METH_VARARGS|METH_KEYWORDS,
2096 bz2_compress__doc__},
2097 {"decompress", (PyCFunction) bz2_decompress, METH_VARARGS,
2098 bz2_decompress__doc__},
2099 {NULL, NULL} /* sentinel */
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002100};
2101
2102/* ===================================================================== */
2103/* Initialization function. */
2104
2105PyDoc_STRVAR(bz2__doc__,
2106"The python bz2 module provides a comprehensive interface for\n\
2107the bz2 compression library. It implements a complete file\n\
2108interface, one shot (de)compression functions, and types for\n\
2109sequential (de)compression.\n\
2110");
2111
Martin v. Löwis1a214512008-06-11 05:26:20 +00002112
2113static struct PyModuleDef bz2module = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002114 PyModuleDef_HEAD_INIT,
2115 "bz2",
2116 bz2__doc__,
2117 -1,
2118 bz2_methods,
2119 NULL,
2120 NULL,
2121 NULL,
2122 NULL
Martin v. Löwis1a214512008-06-11 05:26:20 +00002123};
2124
Neal Norwitz21d896c2003-07-01 20:15:21 +00002125PyMODINIT_FUNC
Martin v. Löwis1a214512008-06-11 05:26:20 +00002126PyInit_bz2(void)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002127{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002128 PyObject *m;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002129
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002130 Py_TYPE(&BZ2File_Type) = &PyType_Type;
2131 Py_TYPE(&BZ2Comp_Type) = &PyType_Type;
2132 Py_TYPE(&BZ2Decomp_Type) = &PyType_Type;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002133
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002134 m = PyModule_Create(&bz2module);
2135 if (m == NULL)
2136 return NULL;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002137
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002138 PyModule_AddObject(m, "__author__", PyUnicode_FromString(__author__));
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002139
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002140 Py_INCREF(&BZ2File_Type);
2141 PyModule_AddObject(m, "BZ2File", (PyObject *)&BZ2File_Type);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002142
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002143 Py_INCREF(&BZ2Comp_Type);
2144 PyModule_AddObject(m, "BZ2Compressor", (PyObject *)&BZ2Comp_Type);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002145
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002146 Py_INCREF(&BZ2Decomp_Type);
2147 PyModule_AddObject(m, "BZ2Decompressor", (PyObject *)&BZ2Decomp_Type);
2148 return m;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002149}