blob: f41896998d9befda0e2cab1588628ea86a65e6e2 [file] [log] [blame]
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001/*
2
3python-bz2 - python bz2 library interface
4
5Copyright (c) 2002 Gustavo Niemeyer <niemeyer@conectiva.com>
6Copyright (c) 2002 Python Software Foundation; All Rights Reserved
7
8*/
9
Martin v. Löwise17af7b2002-11-23 09:16:19 +000010#include "Python.h"
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +000011#include <stdio.h>
12#include <bzlib.h>
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +000013#include "structmember.h"
14
15#ifdef WITH_THREAD
16#include "pythread.h"
17#endif
18
19static char __author__[] =
20"The bz2 python module was written by:\n\
21\n\
22 Gustavo Niemeyer <niemeyer@conectiva.com>\n\
23";
24
Georg Brandl33a5f2a2005-08-21 14:16:04 +000025/* Our very own off_t-like type, 64-bit if possible */
26/* copied from Objects/fileobject.c */
27#if !defined(HAVE_LARGEFILE_SUPPORT)
28typedef off_t Py_off_t;
29#elif SIZEOF_OFF_T >= 8
30typedef off_t Py_off_t;
31#elif SIZEOF_FPOS_T >= 8
32typedef fpos_t Py_off_t;
33#else
34#error "Large file support, but neither off_t nor fpos_t is large enough."
35#endif
36
Christian Heimes72b710a2008-05-26 13:28:38 +000037#define BUF(v) PyBytes_AS_STRING(v)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +000038
39#define MODE_CLOSED 0
40#define MODE_READ 1
41#define MODE_READ_EOF 2
42#define MODE_WRITE 3
43
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +000044#define BZ2FileObject_Check(v) (Py_TYPE(v) == &BZ2File_Type)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +000045
Gustavo Niemeyer7628f1f2003-04-27 06:25:24 +000046
47#ifdef BZ_CONFIG_ERROR
48
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +000049#if SIZEOF_LONG >= 8
50#define BZS_TOTAL_OUT(bzs) \
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +000051 (((long)bzs->total_out_hi32 << 32) + bzs->total_out_lo32)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +000052#elif SIZEOF_LONG_LONG >= 8
53#define BZS_TOTAL_OUT(bzs) \
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +000054 (((PY_LONG_LONG)bzs->total_out_hi32 << 32) + bzs->total_out_lo32)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +000055#else
56#define BZS_TOTAL_OUT(bzs) \
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +000057 bzs->total_out_lo32
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +000058#endif
59
Gustavo Niemeyer7628f1f2003-04-27 06:25:24 +000060#else /* ! BZ_CONFIG_ERROR */
61
62#define BZ2_bzRead bzRead
63#define BZ2_bzReadOpen bzReadOpen
64#define BZ2_bzReadClose bzReadClose
65#define BZ2_bzWrite bzWrite
66#define BZ2_bzWriteOpen bzWriteOpen
67#define BZ2_bzWriteClose bzWriteClose
68#define BZ2_bzCompress bzCompress
69#define BZ2_bzCompressInit bzCompressInit
70#define BZ2_bzCompressEnd bzCompressEnd
71#define BZ2_bzDecompress bzDecompress
72#define BZ2_bzDecompressInit bzDecompressInit
73#define BZ2_bzDecompressEnd bzDecompressEnd
74
75#define BZS_TOTAL_OUT(bzs) bzs->total_out
76
77#endif /* ! BZ_CONFIG_ERROR */
78
79
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +000080#ifdef WITH_THREAD
Antoine Pitroua3af0eb2009-10-27 17:49:21 +000081#define ACQUIRE_LOCK(obj) do { \
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +000082 if (!PyThread_acquire_lock(obj->lock, 0)) { \
83 Py_BEGIN_ALLOW_THREADS \
84 PyThread_acquire_lock(obj->lock, 1); \
85 Py_END_ALLOW_THREADS \
86 } } while(0)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +000087#define RELEASE_LOCK(obj) PyThread_release_lock(obj->lock)
88#else
89#define ACQUIRE_LOCK(obj)
90#define RELEASE_LOCK(obj)
91#endif
92
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +000093/* Bits in f_newlinetypes */
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +000094#define NEWLINE_UNKNOWN 0 /* No newline seen, yet */
95#define NEWLINE_CR 1 /* \r newline seen */
96#define NEWLINE_LF 2 /* \n newline seen */
97#define NEWLINE_CRLF 4 /* \r\n newline seen */
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +000098
99/* ===================================================================== */
100/* Structure definitions. */
101
102typedef struct {
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000103 PyObject_HEAD
104 FILE *rawfp;
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000105
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000106 char* f_buf; /* Allocated readahead buffer */
107 char* f_bufend; /* Points after last occupied position */
108 char* f_bufptr; /* Current buffer position */
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000109
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000110 BZFILE *fp;
111 int mode;
112 Py_off_t pos;
113 Py_off_t size;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000114#ifdef WITH_THREAD
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000115 PyThread_type_lock lock;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000116#endif
117} BZ2FileObject;
118
119typedef struct {
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000120 PyObject_HEAD
121 bz_stream bzs;
122 int running;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000123#ifdef WITH_THREAD
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000124 PyThread_type_lock lock;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000125#endif
126} BZ2CompObject;
127
128typedef struct {
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000129 PyObject_HEAD
130 bz_stream bzs;
131 int running;
132 PyObject *unused_data;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000133#ifdef WITH_THREAD
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000134 PyThread_type_lock lock;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000135#endif
136} BZ2DecompObject;
137
138/* ===================================================================== */
139/* Utility functions. */
140
Antoine Pitrou1e2abe72010-08-01 20:12:04 +0000141/* Refuse regular I/O if there's data in the iteration-buffer.
142 * Mixing them would cause data to arrive out of order, as the read*
143 * methods don't use the iteration buffer. */
144static int
145check_iterbuffered(BZ2FileObject *f)
146{
147 if (f->f_buf != NULL &&
148 (f->f_bufend - f->f_bufptr) > 0 &&
149 f->f_buf[0] != '\0') {
150 PyErr_SetString(PyExc_ValueError,
151 "Mixing iteration and read methods would lose data");
152 return -1;
153 }
154 return 0;
155}
156
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000157static int
158Util_CatchBZ2Error(int bzerror)
159{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000160 int ret = 0;
161 switch(bzerror) {
162 case BZ_OK:
163 case BZ_STREAM_END:
164 break;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000165
Gustavo Niemeyer7628f1f2003-04-27 06:25:24 +0000166#ifdef BZ_CONFIG_ERROR
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000167 case BZ_CONFIG_ERROR:
168 PyErr_SetString(PyExc_SystemError,
169 "the bz2 library was not compiled "
170 "correctly");
171 ret = 1;
172 break;
Gustavo Niemeyer7628f1f2003-04-27 06:25:24 +0000173#endif
Tim Peterse3228092002-11-09 04:21:44 +0000174
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000175 case BZ_PARAM_ERROR:
176 PyErr_SetString(PyExc_ValueError,
177 "the bz2 library has received wrong "
178 "parameters");
179 ret = 1;
180 break;
Tim Peterse3228092002-11-09 04:21:44 +0000181
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000182 case BZ_MEM_ERROR:
183 PyErr_NoMemory();
184 ret = 1;
185 break;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000186
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000187 case BZ_DATA_ERROR:
188 case BZ_DATA_ERROR_MAGIC:
189 PyErr_SetString(PyExc_IOError, "invalid data stream");
190 ret = 1;
191 break;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000192
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000193 case BZ_IO_ERROR:
194 PyErr_SetString(PyExc_IOError, "unknown IO error");
195 ret = 1;
196 break;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000197
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000198 case BZ_UNEXPECTED_EOF:
199 PyErr_SetString(PyExc_EOFError,
200 "compressed file ended before the "
201 "logical end-of-stream was detected");
202 ret = 1;
203 break;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000204
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000205 case BZ_SEQUENCE_ERROR:
206 PyErr_SetString(PyExc_RuntimeError,
207 "wrong sequence of bz2 library "
208 "commands used");
209 ret = 1;
210 break;
211 }
212 return ret;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000213}
214
215#if BUFSIZ < 8192
216#define SMALLCHUNK 8192
217#else
218#define SMALLCHUNK BUFSIZ
219#endif
220
221#if SIZEOF_INT < 4
222#define BIGCHUNK (512 * 32)
223#else
224#define BIGCHUNK (512 * 1024)
225#endif
226
227/* This is a hacked version of Python's fileobject.c:new_buffersize(). */
228static size_t
229Util_NewBufferSize(size_t currentsize)
230{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000231 if (currentsize > SMALLCHUNK) {
232 /* Keep doubling until we reach BIGCHUNK;
233 then keep adding BIGCHUNK. */
234 if (currentsize <= BIGCHUNK)
235 return currentsize + currentsize;
236 else
237 return currentsize + BIGCHUNK;
238 }
239 return currentsize + SMALLCHUNK;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000240}
241
242/* This is a hacked version of Python's fileobject.c:get_line(). */
243static PyObject *
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000244Util_GetLine(BZ2FileObject *f, int n)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000245{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000246 char c;
247 char *buf, *end;
248 size_t total_v_size; /* total # of slots in buffer */
249 size_t used_v_size; /* # used slots in buffer */
250 size_t increment; /* amount to increment the buffer */
251 PyObject *v;
252 int bzerror;
253 int bytes_read;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000254
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000255 total_v_size = n > 0 ? n : 100;
256 v = PyBytes_FromStringAndSize((char *)NULL, total_v_size);
257 if (v == NULL)
258 return NULL;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000259
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000260 buf = BUF(v);
261 end = buf + total_v_size;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000262
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000263 for (;;) {
264 Py_BEGIN_ALLOW_THREADS
265 do {
266 bytes_read = BZ2_bzRead(&bzerror, f->fp, &c, 1);
267 f->pos++;
268 if (bytes_read == 0)
269 break;
270 *buf++ = c;
271 } while (bzerror == BZ_OK && c != '\n' && buf != end);
272 Py_END_ALLOW_THREADS
273 if (bzerror == BZ_STREAM_END) {
274 f->size = f->pos;
275 f->mode = MODE_READ_EOF;
276 break;
277 } else if (bzerror != BZ_OK) {
278 Util_CatchBZ2Error(bzerror);
279 Py_DECREF(v);
280 return NULL;
281 }
282 if (c == '\n')
283 break;
284 /* Must be because buf == end */
285 if (n > 0)
286 break;
287 used_v_size = total_v_size;
288 increment = total_v_size >> 2; /* mild exponential growth */
289 total_v_size += increment;
290 if (total_v_size > INT_MAX) {
291 PyErr_SetString(PyExc_OverflowError,
292 "line is longer than a Python string can hold");
293 Py_DECREF(v);
294 return NULL;
295 }
296 if (_PyBytes_Resize(&v, total_v_size) < 0) {
297 return NULL;
298 }
299 buf = BUF(v) + used_v_size;
300 end = BUF(v) + total_v_size;
301 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000302
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000303 used_v_size = buf - BUF(v);
304 if (used_v_size != total_v_size) {
305 if (_PyBytes_Resize(&v, used_v_size) < 0) {
306 v = NULL;
307 }
308 }
309 return v;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000310}
311
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000312/* This is a hacked version of Python's fileobject.c:drop_readahead(). */
313static void
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000314Util_DropReadAhead(BZ2FileObject *f)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000315{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000316 if (f->f_buf != NULL) {
317 PyMem_Free(f->f_buf);
318 f->f_buf = NULL;
319 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000320}
321
322/* This is a hacked version of Python's fileobject.c:readahead(). */
323static int
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000324Util_ReadAhead(BZ2FileObject *f, int bufsize)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000325{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000326 int chunksize;
327 int bzerror;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000328
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000329 if (f->f_buf != NULL) {
330 if((f->f_bufend - f->f_bufptr) >= 1)
331 return 0;
332 else
333 Util_DropReadAhead(f);
334 }
335 if (f->mode == MODE_READ_EOF) {
336 f->f_bufptr = f->f_buf;
337 f->f_bufend = f->f_buf;
338 return 0;
339 }
340 if ((f->f_buf = PyMem_Malloc(bufsize)) == NULL) {
341 PyErr_NoMemory();
342 return -1;
343 }
344 Py_BEGIN_ALLOW_THREADS
345 chunksize = BZ2_bzRead(&bzerror, f->fp, f->f_buf, bufsize);
346 Py_END_ALLOW_THREADS
347 f->pos += chunksize;
348 if (bzerror == BZ_STREAM_END) {
349 f->size = f->pos;
350 f->mode = MODE_READ_EOF;
351 } else if (bzerror != BZ_OK) {
352 Util_CatchBZ2Error(bzerror);
353 Util_DropReadAhead(f);
354 return -1;
355 }
356 f->f_bufptr = f->f_buf;
357 f->f_bufend = f->f_buf + chunksize;
358 return 0;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000359}
360
361/* This is a hacked version of Python's
362 * fileobject.c:readahead_get_line_skip(). */
Christian Heimes72b710a2008-05-26 13:28:38 +0000363static PyBytesObject *
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000364Util_ReadAheadGetLineSkip(BZ2FileObject *f, int skip, int bufsize)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000365{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000366 PyBytesObject* s;
367 char *bufptr;
368 char *buf;
369 int len;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000370
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000371 if (f->f_buf == NULL)
372 if (Util_ReadAhead(f, bufsize) < 0)
373 return NULL;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000374
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000375 len = f->f_bufend - f->f_bufptr;
376 if (len == 0)
377 return (PyBytesObject *)
378 PyBytes_FromStringAndSize(NULL, skip);
379 bufptr = memchr(f->f_bufptr, '\n', len);
380 if (bufptr != NULL) {
381 bufptr++; /* Count the '\n' */
382 len = bufptr - f->f_bufptr;
383 s = (PyBytesObject *)
384 PyBytes_FromStringAndSize(NULL, skip+len);
385 if (s == NULL)
386 return NULL;
387 memcpy(PyBytes_AS_STRING(s)+skip, f->f_bufptr, len);
388 f->f_bufptr = bufptr;
389 if (bufptr == f->f_bufend)
390 Util_DropReadAhead(f);
391 } else {
392 bufptr = f->f_bufptr;
393 buf = f->f_buf;
394 f->f_buf = NULL; /* Force new readahead buffer */
395 s = Util_ReadAheadGetLineSkip(f, skip+len,
396 bufsize + (bufsize>>2));
397 if (s == NULL) {
398 PyMem_Free(buf);
399 return NULL;
400 }
401 memcpy(PyBytes_AS_STRING(s)+skip, bufptr, len);
402 PyMem_Free(buf);
403 }
404 return s;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000405}
406
407/* ===================================================================== */
408/* Methods of BZ2File. */
409
410PyDoc_STRVAR(BZ2File_read__doc__,
411"read([size]) -> string\n\
412\n\
413Read at most size uncompressed bytes, returned as a string. If the size\n\
414argument is negative or omitted, read until EOF is reached.\n\
415");
416
417/* This is a hacked version of Python's fileobject.c:file_read(). */
418static PyObject *
419BZ2File_read(BZ2FileObject *self, PyObject *args)
420{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000421 long bytesrequested = -1;
422 size_t bytesread, buffersize, chunksize;
423 int bzerror;
424 PyObject *ret = NULL;
Tim Peterse3228092002-11-09 04:21:44 +0000425
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000426 if (!PyArg_ParseTuple(args, "|l:read", &bytesrequested))
427 return NULL;
Tim Peterse3228092002-11-09 04:21:44 +0000428
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000429 ACQUIRE_LOCK(self);
430 switch (self->mode) {
431 case MODE_READ:
432 break;
433 case MODE_READ_EOF:
434 ret = PyBytes_FromStringAndSize("", 0);
435 goto cleanup;
436 case MODE_CLOSED:
437 PyErr_SetString(PyExc_ValueError,
438 "I/O operation on closed file");
439 goto cleanup;
440 default:
441 PyErr_SetString(PyExc_IOError,
442 "file is not ready for reading");
443 goto cleanup;
444 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000445
Antoine Pitrou1e2abe72010-08-01 20:12:04 +0000446 /* refuse to mix with f.next() */
447 if (check_iterbuffered(self))
448 goto cleanup;
449
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000450 if (bytesrequested < 0)
451 buffersize = Util_NewBufferSize((size_t)0);
452 else
453 buffersize = bytesrequested;
454 if (buffersize > INT_MAX) {
455 PyErr_SetString(PyExc_OverflowError,
456 "requested number of bytes is "
457 "more than a Python string can hold");
458 goto cleanup;
459 }
460 ret = PyBytes_FromStringAndSize((char *)NULL, buffersize);
461 if (ret == NULL || buffersize == 0)
462 goto cleanup;
463 bytesread = 0;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000464
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000465 for (;;) {
466 Py_BEGIN_ALLOW_THREADS
467 chunksize = BZ2_bzRead(&bzerror, self->fp,
468 BUF(ret)+bytesread,
469 buffersize-bytesread);
470 self->pos += chunksize;
471 Py_END_ALLOW_THREADS
472 bytesread += chunksize;
473 if (bzerror == BZ_STREAM_END) {
474 self->size = self->pos;
475 self->mode = MODE_READ_EOF;
476 break;
477 } else if (bzerror != BZ_OK) {
478 Util_CatchBZ2Error(bzerror);
479 Py_DECREF(ret);
480 ret = NULL;
481 goto cleanup;
482 }
483 if (bytesrequested < 0) {
484 buffersize = Util_NewBufferSize(buffersize);
485 if (_PyBytes_Resize(&ret, buffersize) < 0) {
486 ret = NULL;
487 goto cleanup;
488 }
489 } else {
490 break;
491 }
492 }
493 if (bytesread != buffersize) {
494 if (_PyBytes_Resize(&ret, bytesread) < 0) {
495 ret = NULL;
496 }
497 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000498
499cleanup:
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000500 RELEASE_LOCK(self);
501 return ret;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000502}
503
504PyDoc_STRVAR(BZ2File_readline__doc__,
505"readline([size]) -> string\n\
506\n\
507Return the next line from the file, as a string, retaining newline.\n\
508A non-negative size argument will limit the maximum number of bytes to\n\
509return (an incomplete line may be returned then). Return an empty\n\
510string at EOF.\n\
511");
512
513static PyObject *
514BZ2File_readline(BZ2FileObject *self, PyObject *args)
515{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000516 PyObject *ret = NULL;
517 int sizehint = -1;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000518
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000519 if (!PyArg_ParseTuple(args, "|i:readline", &sizehint))
520 return NULL;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000521
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000522 ACQUIRE_LOCK(self);
523 switch (self->mode) {
524 case MODE_READ:
525 break;
526 case MODE_READ_EOF:
527 ret = PyBytes_FromStringAndSize("", 0);
528 goto cleanup;
529 case MODE_CLOSED:
530 PyErr_SetString(PyExc_ValueError,
531 "I/O operation on closed file");
532 goto cleanup;
533 default:
534 PyErr_SetString(PyExc_IOError,
535 "file is not ready for reading");
536 goto cleanup;
537 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000538
Antoine Pitrou1e2abe72010-08-01 20:12:04 +0000539 /* refuse to mix with f.next() */
540 if (check_iterbuffered(self))
541 goto cleanup;
542
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000543 if (sizehint == 0)
544 ret = PyBytes_FromStringAndSize("", 0);
545 else
546 ret = Util_GetLine(self, (sizehint < 0) ? 0 : sizehint);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000547
548cleanup:
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000549 RELEASE_LOCK(self);
550 return ret;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000551}
552
553PyDoc_STRVAR(BZ2File_readlines__doc__,
554"readlines([size]) -> list\n\
555\n\
556Call readline() repeatedly and return a list of lines read.\n\
557The optional size argument, if given, is an approximate bound on the\n\
558total number of bytes in the lines returned.\n\
559");
560
561/* This is a hacked version of Python's fileobject.c:file_readlines(). */
562static PyObject *
563BZ2File_readlines(BZ2FileObject *self, PyObject *args)
564{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000565 long sizehint = 0;
566 PyObject *list = NULL;
567 PyObject *line;
568 char small_buffer[SMALLCHUNK];
569 char *buffer = small_buffer;
570 size_t buffersize = SMALLCHUNK;
571 PyObject *big_buffer = NULL;
572 size_t nfilled = 0;
573 size_t nread;
574 size_t totalread = 0;
575 char *p, *q, *end;
576 int err;
577 int shortread = 0;
578 int bzerror;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000579
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000580 if (!PyArg_ParseTuple(args, "|l:readlines", &sizehint))
581 return NULL;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000582
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000583 ACQUIRE_LOCK(self);
584 switch (self->mode) {
585 case MODE_READ:
586 break;
587 case MODE_READ_EOF:
588 list = PyList_New(0);
589 goto cleanup;
590 case MODE_CLOSED:
591 PyErr_SetString(PyExc_ValueError,
592 "I/O operation on closed file");
593 goto cleanup;
594 default:
595 PyErr_SetString(PyExc_IOError,
596 "file is not ready for reading");
597 goto cleanup;
598 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000599
Antoine Pitrou1e2abe72010-08-01 20:12:04 +0000600 /* refuse to mix with f.next() */
601 if (check_iterbuffered(self))
602 goto cleanup;
603
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000604 if ((list = PyList_New(0)) == NULL)
605 goto cleanup;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000606
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000607 for (;;) {
608 Py_BEGIN_ALLOW_THREADS
609 nread = BZ2_bzRead(&bzerror, self->fp,
610 buffer+nfilled, buffersize-nfilled);
611 self->pos += nread;
612 Py_END_ALLOW_THREADS
613 if (bzerror == BZ_STREAM_END) {
614 self->size = self->pos;
615 self->mode = MODE_READ_EOF;
616 if (nread == 0) {
617 sizehint = 0;
618 break;
619 }
620 shortread = 1;
621 } else if (bzerror != BZ_OK) {
622 Util_CatchBZ2Error(bzerror);
623 error:
624 Py_DECREF(list);
625 list = NULL;
626 goto cleanup;
627 }
628 totalread += nread;
629 p = memchr(buffer+nfilled, '\n', nread);
630 if (!shortread && p == NULL) {
631 /* Need a larger buffer to fit this line */
632 nfilled += nread;
633 buffersize *= 2;
634 if (buffersize > INT_MAX) {
635 PyErr_SetString(PyExc_OverflowError,
636 "line is longer than a Python string can hold");
637 goto error;
638 }
639 if (big_buffer == NULL) {
640 /* Create the big buffer */
641 big_buffer = PyBytes_FromStringAndSize(
642 NULL, buffersize);
643 if (big_buffer == NULL)
644 goto error;
645 buffer = PyBytes_AS_STRING(big_buffer);
646 memcpy(buffer, small_buffer, nfilled);
647 }
648 else {
649 /* Grow the big buffer */
650 if (_PyBytes_Resize(&big_buffer, buffersize) < 0){
651 big_buffer = NULL;
652 goto error;
653 }
654 buffer = PyBytes_AS_STRING(big_buffer);
655 }
656 continue;
657 }
658 end = buffer+nfilled+nread;
659 q = buffer;
660 while (p != NULL) {
661 /* Process complete lines */
662 p++;
663 line = PyBytes_FromStringAndSize(q, p-q);
664 if (line == NULL)
665 goto error;
666 err = PyList_Append(list, line);
667 Py_DECREF(line);
668 if (err != 0)
669 goto error;
670 q = p;
671 p = memchr(q, '\n', end-q);
672 }
673 /* Move the remaining incomplete line to the start */
674 nfilled = end-q;
675 memmove(buffer, q, nfilled);
676 if (sizehint > 0)
677 if (totalread >= (size_t)sizehint)
678 break;
679 if (shortread) {
680 sizehint = 0;
681 break;
682 }
683 }
684 if (nfilled != 0) {
685 /* Partial last line */
686 line = PyBytes_FromStringAndSize(buffer, nfilled);
687 if (line == NULL)
688 goto error;
689 if (sizehint > 0) {
690 /* Need to complete the last line */
691 PyObject *rest = Util_GetLine(self, 0);
692 if (rest == NULL) {
693 Py_DECREF(line);
694 goto error;
695 }
696 PyBytes_Concat(&line, rest);
697 Py_DECREF(rest);
698 if (line == NULL)
699 goto error;
700 }
701 err = PyList_Append(list, line);
702 Py_DECREF(line);
703 if (err != 0)
704 goto error;
705 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000706
707 cleanup:
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000708 RELEASE_LOCK(self);
709 if (big_buffer) {
710 Py_DECREF(big_buffer);
711 }
712 return list;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000713}
714
715PyDoc_STRVAR(BZ2File_write__doc__,
716"write(data) -> None\n\
717\n\
718Write the 'data' string to file. Note that due to buffering, close() may\n\
719be needed before the file on disk reflects the data written.\n\
720");
721
722/* This is a hacked version of Python's fileobject.c:file_write(). */
723static PyObject *
724BZ2File_write(BZ2FileObject *self, PyObject *args)
725{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000726 PyObject *ret = NULL;
727 Py_buffer pbuf;
728 char *buf;
729 int len;
730 int bzerror;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000731
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000732 if (!PyArg_ParseTuple(args, "y*:write", &pbuf))
733 return NULL;
734 buf = pbuf.buf;
735 len = pbuf.len;
Tim Peterse3228092002-11-09 04:21:44 +0000736
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000737 ACQUIRE_LOCK(self);
738 switch (self->mode) {
739 case MODE_WRITE:
740 break;
Tim Peterse3228092002-11-09 04:21:44 +0000741
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000742 case MODE_CLOSED:
743 PyErr_SetString(PyExc_ValueError,
744 "I/O operation on closed file");
745 goto cleanup;
Tim Peterse3228092002-11-09 04:21:44 +0000746
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000747 default:
748 PyErr_SetString(PyExc_IOError,
749 "file is not ready for writing");
750 goto cleanup;
751 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000752
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000753 Py_BEGIN_ALLOW_THREADS
754 BZ2_bzWrite (&bzerror, self->fp, buf, len);
755 self->pos += len;
756 Py_END_ALLOW_THREADS
Tim Peterse3228092002-11-09 04:21:44 +0000757
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000758 if (bzerror != BZ_OK) {
759 Util_CatchBZ2Error(bzerror);
760 goto cleanup;
761 }
Tim Peterse3228092002-11-09 04:21:44 +0000762
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000763 Py_INCREF(Py_None);
764 ret = Py_None;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000765
766cleanup:
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000767 PyBuffer_Release(&pbuf);
768 RELEASE_LOCK(self);
769 return ret;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000770}
771
772PyDoc_STRVAR(BZ2File_writelines__doc__,
773"writelines(sequence_of_strings) -> None\n\
774\n\
775Write the sequence of strings to the file. Note that newlines are not\n\
776added. The sequence can be any iterable object producing strings. This is\n\
777equivalent to calling write() for each string.\n\
778");
779
780/* This is a hacked version of Python's fileobject.c:file_writelines(). */
781static PyObject *
782BZ2File_writelines(BZ2FileObject *self, PyObject *seq)
783{
784#define CHUNKSIZE 1000
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000785 PyObject *list = NULL;
786 PyObject *iter = NULL;
787 PyObject *ret = NULL;
788 PyObject *line;
789 int i, j, index, len, islist;
790 int bzerror;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000791
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000792 ACQUIRE_LOCK(self);
793 switch (self->mode) {
794 case MODE_WRITE:
795 break;
Thomas Wouters00ee7ba2006-08-21 19:07:27 +0000796
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000797 case MODE_CLOSED:
798 PyErr_SetString(PyExc_ValueError,
799 "I/O operation on closed file");
800 goto error;
Thomas Wouters00ee7ba2006-08-21 19:07:27 +0000801
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000802 default:
803 PyErr_SetString(PyExc_IOError,
804 "file is not ready for writing");
805 goto error;
806 }
Thomas Wouters00ee7ba2006-08-21 19:07:27 +0000807
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000808 islist = PyList_Check(seq);
809 if (!islist) {
810 iter = PyObject_GetIter(seq);
811 if (iter == NULL) {
812 PyErr_SetString(PyExc_TypeError,
813 "writelines() requires an iterable argument");
814 goto error;
815 }
816 list = PyList_New(CHUNKSIZE);
817 if (list == NULL)
818 goto error;
819 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000820
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000821 /* Strategy: slurp CHUNKSIZE lines into a private list,
822 checking that they are all strings, then write that list
823 without holding the interpreter lock, then come back for more. */
824 for (index = 0; ; index += CHUNKSIZE) {
825 if (islist) {
826 Py_XDECREF(list);
827 list = PyList_GetSlice(seq, index, index+CHUNKSIZE);
828 if (list == NULL)
829 goto error;
830 j = PyList_GET_SIZE(list);
831 }
832 else {
833 for (j = 0; j < CHUNKSIZE; j++) {
834 line = PyIter_Next(iter);
835 if (line == NULL) {
836 if (PyErr_Occurred())
837 goto error;
838 break;
839 }
840 PyList_SetItem(list, j, line);
841 }
842 }
843 if (j == 0)
844 break;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000845
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000846 /* Check that all entries are indeed byte strings. If not,
847 apply the same rules as for file.write() and
848 convert the rets to strings. This is slow, but
849 seems to be the only way since all conversion APIs
850 could potentially execute Python code. */
851 for (i = 0; i < j; i++) {
852 PyObject *v = PyList_GET_ITEM(list, i);
853 if (!PyBytes_Check(v)) {
854 const char *buffer;
855 Py_ssize_t len;
856 if (PyObject_AsCharBuffer(v, &buffer, &len)) {
857 PyErr_SetString(PyExc_TypeError,
858 "writelines() "
859 "argument must be "
860 "a sequence of "
861 "bytes objects");
862 goto error;
863 }
864 line = PyBytes_FromStringAndSize(buffer,
865 len);
866 if (line == NULL)
867 goto error;
868 Py_DECREF(v);
869 PyList_SET_ITEM(list, i, line);
870 }
871 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000872
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000873 /* Since we are releasing the global lock, the
874 following code may *not* execute Python code. */
875 Py_BEGIN_ALLOW_THREADS
876 for (i = 0; i < j; i++) {
877 line = PyList_GET_ITEM(list, i);
878 len = PyBytes_GET_SIZE(line);
879 BZ2_bzWrite (&bzerror, self->fp,
880 PyBytes_AS_STRING(line), len);
881 if (bzerror != BZ_OK) {
882 Py_BLOCK_THREADS
883 Util_CatchBZ2Error(bzerror);
884 goto error;
885 }
886 }
887 Py_END_ALLOW_THREADS
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000888
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000889 if (j < CHUNKSIZE)
890 break;
891 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000892
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000893 Py_INCREF(Py_None);
894 ret = Py_None;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000895
896 error:
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000897 RELEASE_LOCK(self);
898 Py_XDECREF(list);
899 Py_XDECREF(iter);
900 return ret;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000901#undef CHUNKSIZE
902}
903
904PyDoc_STRVAR(BZ2File_seek__doc__,
905"seek(offset [, whence]) -> None\n\
906\n\
907Move to new file position. Argument offset is a byte count. Optional\n\
908argument whence defaults to 0 (offset from start of file, offset\n\
909should be >= 0); other values are 1 (move relative to current position,\n\
910positive or negative), and 2 (move relative to end of file, usually\n\
911negative, although many platforms allow seeking beyond the end of a file).\n\
912\n\
913Note that seeking of bz2 files is emulated, and depending on the parameters\n\
914the operation may be extremely slow.\n\
915");
916
917static PyObject *
918BZ2File_seek(BZ2FileObject *self, PyObject *args)
919{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000920 int where = 0;
921 PyObject *offobj;
922 Py_off_t offset;
923 char small_buffer[SMALLCHUNK];
924 char *buffer = small_buffer;
925 size_t buffersize = SMALLCHUNK;
926 Py_off_t bytesread = 0;
927 size_t readsize;
928 int chunksize;
929 int bzerror;
930 PyObject *ret = NULL;
Tim Peterse3228092002-11-09 04:21:44 +0000931
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000932 if (!PyArg_ParseTuple(args, "O|i:seek", &offobj, &where))
933 return NULL;
Georg Brandl33a5f2a2005-08-21 14:16:04 +0000934#if !defined(HAVE_LARGEFILE_SUPPORT)
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000935 offset = PyLong_AsLong(offobj);
Georg Brandl33a5f2a2005-08-21 14:16:04 +0000936#else
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000937 offset = PyLong_Check(offobj) ?
938 PyLong_AsLongLong(offobj) : PyLong_AsLong(offobj);
Georg Brandl33a5f2a2005-08-21 14:16:04 +0000939#endif
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000940 if (PyErr_Occurred())
941 return NULL;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000942
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000943 ACQUIRE_LOCK(self);
944 Util_DropReadAhead(self);
945 switch (self->mode) {
946 case MODE_READ:
947 case MODE_READ_EOF:
948 break;
Tim Peterse3228092002-11-09 04:21:44 +0000949
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000950 case MODE_CLOSED:
951 PyErr_SetString(PyExc_ValueError,
952 "I/O operation on closed file");
953 goto cleanup;
Tim Peterse3228092002-11-09 04:21:44 +0000954
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000955 default:
956 PyErr_SetString(PyExc_IOError,
957 "seek works only while reading");
958 goto cleanup;
959 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000960
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000961 if (where == 2) {
962 if (self->size == -1) {
963 assert(self->mode != MODE_READ_EOF);
964 for (;;) {
965 Py_BEGIN_ALLOW_THREADS
966 chunksize = BZ2_bzRead(&bzerror, self->fp,
967 buffer, buffersize);
968 self->pos += chunksize;
969 Py_END_ALLOW_THREADS
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000970
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000971 bytesread += chunksize;
972 if (bzerror == BZ_STREAM_END) {
973 break;
974 } else if (bzerror != BZ_OK) {
975 Util_CatchBZ2Error(bzerror);
976 goto cleanup;
977 }
978 }
979 self->mode = MODE_READ_EOF;
980 self->size = self->pos;
981 bytesread = 0;
982 }
983 offset = self->size + offset;
984 } else if (where == 1) {
985 offset = self->pos + offset;
986 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000987
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000988 /* Before getting here, offset must be the absolute position the file
989 * pointer should be set to. */
Georg Brandl47fab922006-02-18 21:57:25 +0000990
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +0000991 if (offset >= self->pos) {
992 /* we can move forward */
993 offset -= self->pos;
994 } else {
995 /* we cannot move back, so rewind the stream */
996 BZ2_bzReadClose(&bzerror, self->fp);
997 if (bzerror != BZ_OK) {
998 Util_CatchBZ2Error(bzerror);
999 goto cleanup;
1000 }
1001 rewind(self->rawfp);
1002 self->pos = 0;
1003 self->fp = BZ2_bzReadOpen(&bzerror, self->rawfp,
1004 0, 0, NULL, 0);
1005 if (bzerror != BZ_OK) {
1006 Util_CatchBZ2Error(bzerror);
1007 goto cleanup;
1008 }
1009 self->mode = MODE_READ;
1010 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001011
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001012 if (offset <= 0 || self->mode == MODE_READ_EOF)
1013 goto exit;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001014
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001015 /* Before getting here, offset must be set to the number of bytes
1016 * to walk forward. */
1017 for (;;) {
1018 if (offset-bytesread > buffersize)
1019 readsize = buffersize;
1020 else
1021 /* offset might be wider that readsize, but the result
1022 * of the subtraction is bound by buffersize (see the
1023 * condition above). buffersize is 8192. */
1024 readsize = (size_t)(offset-bytesread);
1025 Py_BEGIN_ALLOW_THREADS
1026 chunksize = BZ2_bzRead(&bzerror, self->fp, buffer, readsize);
1027 self->pos += chunksize;
1028 Py_END_ALLOW_THREADS
1029 bytesread += chunksize;
1030 if (bzerror == BZ_STREAM_END) {
1031 self->size = self->pos;
1032 self->mode = MODE_READ_EOF;
1033 break;
1034 } else if (bzerror != BZ_OK) {
1035 Util_CatchBZ2Error(bzerror);
1036 goto cleanup;
1037 }
1038 if (bytesread == offset)
1039 break;
1040 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001041
1042exit:
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001043 Py_INCREF(Py_None);
1044 ret = Py_None;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001045
1046cleanup:
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001047 RELEASE_LOCK(self);
1048 return ret;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001049}
1050
1051PyDoc_STRVAR(BZ2File_tell__doc__,
1052"tell() -> int\n\
1053\n\
1054Return the current file position, an integer (may be a long integer).\n\
1055");
1056
1057static PyObject *
1058BZ2File_tell(BZ2FileObject *self, PyObject *args)
1059{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001060 PyObject *ret = NULL;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001061
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001062 if (self->mode == MODE_CLOSED) {
1063 PyErr_SetString(PyExc_ValueError,
1064 "I/O operation on closed file");
1065 goto cleanup;
1066 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001067
Georg Brandla8bcecc2005-09-03 07:49:53 +00001068#if !defined(HAVE_LARGEFILE_SUPPORT)
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001069 ret = PyLong_FromLong(self->pos);
Georg Brandla8bcecc2005-09-03 07:49:53 +00001070#else
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001071 ret = PyLong_FromLongLong(self->pos);
Georg Brandla8bcecc2005-09-03 07:49:53 +00001072#endif
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001073
1074cleanup:
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001075 return ret;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001076}
1077
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001078PyDoc_STRVAR(BZ2File_close__doc__,
1079"close() -> None or (perhaps) an integer\n\
1080\n\
1081Close the file. Sets data attribute .closed to true. A closed file\n\
1082cannot be used for further I/O operations. close() may be called more\n\
1083than once without error.\n\
1084");
1085
1086static PyObject *
1087BZ2File_close(BZ2FileObject *self)
1088{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001089 PyObject *ret = NULL;
1090 int bzerror = BZ_OK;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001091
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001092 if (self->mode == MODE_CLOSED) {
1093 Py_RETURN_NONE;
1094 }
Guido van Rossumf09ca142007-06-13 00:03:05 +00001095
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001096 ACQUIRE_LOCK(self);
1097 switch (self->mode) {
1098 case MODE_READ:
1099 case MODE_READ_EOF:
1100 BZ2_bzReadClose(&bzerror, self->fp);
1101 break;
1102 case MODE_WRITE:
1103 BZ2_bzWriteClose(&bzerror, self->fp,
1104 0, NULL, NULL);
1105 break;
1106 }
1107 self->mode = MODE_CLOSED;
1108 fclose(self->rawfp);
1109 self->rawfp = NULL;
1110 if (bzerror == BZ_OK) {
1111 Py_INCREF(Py_None);
1112 ret = Py_None;
1113 }
1114 else {
1115 Util_CatchBZ2Error(bzerror);
1116 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001117
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001118 RELEASE_LOCK(self);
1119 return ret;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001120}
1121
Antoine Pitrou308705e2009-01-10 16:22:51 +00001122PyDoc_STRVAR(BZ2File_enter_doc,
1123"__enter__() -> self.");
1124
1125static PyObject *
1126BZ2File_enter(BZ2FileObject *self)
1127{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001128 if (self->mode == MODE_CLOSED) {
1129 PyErr_SetString(PyExc_ValueError,
1130 "I/O operation on closed file");
1131 return NULL;
1132 }
1133 Py_INCREF(self);
1134 return (PyObject *) self;
Antoine Pitrou308705e2009-01-10 16:22:51 +00001135}
1136
1137PyDoc_STRVAR(BZ2File_exit_doc,
1138"__exit__(*excinfo) -> None. Closes the file.");
1139
1140static PyObject *
1141BZ2File_exit(BZ2FileObject *self, PyObject *args)
1142{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001143 PyObject *ret = PyObject_CallMethod((PyObject *) self, "close", NULL);
1144 if (!ret)
1145 /* If error occurred, pass through */
1146 return NULL;
1147 Py_DECREF(ret);
1148 Py_RETURN_NONE;
Antoine Pitrou308705e2009-01-10 16:22:51 +00001149}
1150
1151
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001152static PyObject *BZ2File_getiter(BZ2FileObject *self);
1153
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001154static PyMethodDef BZ2File_methods[] = {
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001155 {"read", (PyCFunction)BZ2File_read, METH_VARARGS, BZ2File_read__doc__},
1156 {"readline", (PyCFunction)BZ2File_readline, METH_VARARGS, BZ2File_readline__doc__},
1157 {"readlines", (PyCFunction)BZ2File_readlines, METH_VARARGS, BZ2File_readlines__doc__},
1158 {"write", (PyCFunction)BZ2File_write, METH_VARARGS, BZ2File_write__doc__},
1159 {"writelines", (PyCFunction)BZ2File_writelines, METH_O, BZ2File_writelines__doc__},
1160 {"seek", (PyCFunction)BZ2File_seek, METH_VARARGS, BZ2File_seek__doc__},
1161 {"tell", (PyCFunction)BZ2File_tell, METH_NOARGS, BZ2File_tell__doc__},
1162 {"close", (PyCFunction)BZ2File_close, METH_NOARGS, BZ2File_close__doc__},
1163 {"__enter__", (PyCFunction)BZ2File_enter, METH_NOARGS, BZ2File_enter_doc},
1164 {"__exit__", (PyCFunction)BZ2File_exit, METH_VARARGS, BZ2File_exit_doc},
1165 {NULL, NULL} /* sentinel */
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001166};
1167
1168
1169/* ===================================================================== */
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001170/* Getters and setters of BZ2File. */
1171
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001172static PyObject *
1173BZ2File_get_closed(BZ2FileObject *self, void *closure)
1174{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001175 return PyLong_FromLong(self->mode == MODE_CLOSED);
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001176}
1177
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001178static PyGetSetDef BZ2File_getset[] = {
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001179 {"closed", (getter)BZ2File_get_closed, NULL,
1180 "True if the file is closed"},
1181 {NULL} /* Sentinel */
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001182};
1183
1184
1185/* ===================================================================== */
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001186/* Slot definitions for BZ2File_Type. */
1187
1188static int
1189BZ2File_init(BZ2FileObject *self, PyObject *args, PyObject *kwargs)
1190{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001191 static char *kwlist[] = {"filename", "mode", "buffering",
1192 "compresslevel", 0};
1193 PyObject *name_obj = NULL;
1194 char *name;
1195 char *mode = "r";
1196 int buffering = -1;
1197 int compresslevel = 9;
1198 int bzerror;
1199 int mode_char = 0;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001200
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001201 self->size = -1;
Tim Peterse3228092002-11-09 04:21:44 +00001202
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001203 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O&|sii:BZ2File",
1204 kwlist, PyUnicode_FSConverter, &name_obj,
1205 &mode, &buffering,
1206 &compresslevel))
1207 return -1;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001208
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001209 if (PyBytes_Check(name_obj))
1210 name = PyBytes_AsString(name_obj);
1211 else
1212 name = PyByteArray_AsString(name_obj);
1213 if (compresslevel < 1 || compresslevel > 9) {
1214 PyErr_SetString(PyExc_ValueError,
1215 "compresslevel must be between 1 and 9");
1216 Py_DECREF(name_obj);
1217 return -1;
1218 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001219
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001220 for (;;) {
1221 int error = 0;
1222 switch (*mode) {
1223 case 'r':
1224 case 'w':
1225 if (mode_char)
1226 error = 1;
1227 mode_char = *mode;
1228 break;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001229
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001230 case 'b':
1231 break;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001232
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001233 default:
1234 error = 1;
1235 break;
1236 }
1237 if (error) {
1238 PyErr_Format(PyExc_ValueError,
1239 "invalid mode char %c", *mode);
1240 Py_DECREF(name_obj);
1241 return -1;
1242 }
1243 mode++;
1244 if (*mode == '\0')
1245 break;
1246 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001247
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001248 if (mode_char == 0) {
1249 mode_char = 'r';
1250 }
Georg Brandl6b95f1d2005-06-03 19:47:00 +00001251
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001252 mode = (mode_char == 'r') ? "rb" : "wb";
Tim Peterse3228092002-11-09 04:21:44 +00001253
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001254 self->rawfp = fopen(name, mode);
1255 Py_DECREF(name_obj);
1256 if (self->rawfp == NULL) {
1257 PyErr_SetFromErrno(PyExc_IOError);
1258 return -1;
1259 }
1260 /* XXX Ignore buffering */
Gustavo Niemeyer49ea7be2002-11-08 14:31:49 +00001261
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001262 /* From now on, we have stuff to dealloc, so jump to error label
1263 * instead of returning */
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001264
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001265#ifdef WITH_THREAD
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001266 self->lock = PyThread_allocate_lock();
1267 if (!self->lock) {
1268 PyErr_SetString(PyExc_MemoryError, "unable to allocate lock");
1269 goto error;
1270 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001271#endif
1272
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001273 if (mode_char == 'r')
1274 self->fp = BZ2_bzReadOpen(&bzerror, self->rawfp,
1275 0, 0, NULL, 0);
1276 else
1277 self->fp = BZ2_bzWriteOpen(&bzerror, self->rawfp,
1278 compresslevel, 0, 0);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001279
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001280 if (bzerror != BZ_OK) {
1281 Util_CatchBZ2Error(bzerror);
1282 goto error;
1283 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001284
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001285 self->mode = (mode_char == 'r') ? MODE_READ : MODE_WRITE;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001286
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001287 return 0;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001288
1289error:
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001290 fclose(self->rawfp);
1291 self->rawfp = NULL;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001292#ifdef WITH_THREAD
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001293 if (self->lock) {
1294 PyThread_free_lock(self->lock);
1295 self->lock = NULL;
1296 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001297#endif
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001298 return -1;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001299}
1300
1301static void
1302BZ2File_dealloc(BZ2FileObject *self)
1303{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001304 int bzerror;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001305#ifdef WITH_THREAD
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001306 if (self->lock)
1307 PyThread_free_lock(self->lock);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001308#endif
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001309 switch (self->mode) {
1310 case MODE_READ:
1311 case MODE_READ_EOF:
1312 BZ2_bzReadClose(&bzerror, self->fp);
1313 break;
1314 case MODE_WRITE:
1315 BZ2_bzWriteClose(&bzerror, self->fp,
1316 0, NULL, NULL);
1317 break;
1318 }
1319 Util_DropReadAhead(self);
1320 if (self->rawfp != NULL)
1321 fclose(self->rawfp);
1322 Py_TYPE(self)->tp_free((PyObject *)self);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001323}
1324
1325/* This is a hacked version of Python's fileobject.c:file_getiter(). */
1326static PyObject *
1327BZ2File_getiter(BZ2FileObject *self)
1328{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001329 if (self->mode == MODE_CLOSED) {
1330 PyErr_SetString(PyExc_ValueError,
1331 "I/O operation on closed file");
1332 return NULL;
1333 }
1334 Py_INCREF((PyObject*)self);
1335 return (PyObject *)self;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001336}
1337
1338/* This is a hacked version of Python's fileobject.c:file_iternext(). */
1339#define READAHEAD_BUFSIZE 8192
1340static PyObject *
1341BZ2File_iternext(BZ2FileObject *self)
1342{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001343 PyBytesObject* ret;
1344 ACQUIRE_LOCK(self);
1345 if (self->mode == MODE_CLOSED) {
1346 RELEASE_LOCK(self);
1347 PyErr_SetString(PyExc_ValueError,
1348 "I/O operation on closed file");
1349 return NULL;
1350 }
1351 ret = Util_ReadAheadGetLineSkip(self, 0, READAHEAD_BUFSIZE);
1352 RELEASE_LOCK(self);
1353 if (ret == NULL || PyBytes_GET_SIZE(ret) == 0) {
1354 Py_XDECREF(ret);
1355 return NULL;
1356 }
1357 return (PyObject *)ret;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001358}
1359
1360/* ===================================================================== */
1361/* BZ2File_Type definition. */
1362
1363PyDoc_VAR(BZ2File__doc__) =
1364PyDoc_STR(
1365"BZ2File(name [, mode='r', buffering=0, compresslevel=9]) -> file object\n\
1366\n\
1367Open a bz2 file. The mode can be 'r' or 'w', for reading (default) or\n\
1368writing. When opened for writing, the file will be created if it doesn't\n\
1369exist, and truncated otherwise. If the buffering argument is given, 0 means\n\
1370unbuffered, and larger numbers specify the buffer size. If compresslevel\n\
1371is given, must be a number between 1 and 9.\n\
Guido van Rossum88e860c2007-06-13 01:46:31 +00001372Data read is always returned in bytes; data written ought to be bytes.\n\
1373");
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001374
Gustavo Niemeyer49ea7be2002-11-08 14:31:49 +00001375static PyTypeObject BZ2File_Type = {
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001376 PyVarObject_HEAD_INIT(NULL, 0)
1377 "bz2.BZ2File", /*tp_name*/
1378 sizeof(BZ2FileObject), /*tp_basicsize*/
1379 0, /*tp_itemsize*/
1380 (destructor)BZ2File_dealloc, /*tp_dealloc*/
1381 0, /*tp_print*/
1382 0, /*tp_getattr*/
1383 0, /*tp_setattr*/
1384 0, /*tp_reserved*/
1385 0, /*tp_repr*/
1386 0, /*tp_as_number*/
1387 0, /*tp_as_sequence*/
1388 0, /*tp_as_mapping*/
1389 0, /*tp_hash*/
1390 0, /*tp_call*/
1391 0, /*tp_str*/
1392 PyObject_GenericGetAttr,/*tp_getattro*/
1393 PyObject_GenericSetAttr,/*tp_setattro*/
1394 0, /*tp_as_buffer*/
1395 Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE, /*tp_flags*/
1396 BZ2File__doc__, /*tp_doc*/
1397 0, /*tp_traverse*/
1398 0, /*tp_clear*/
1399 0, /*tp_richcompare*/
1400 0, /*tp_weaklistoffset*/
1401 (getiterfunc)BZ2File_getiter, /*tp_iter*/
1402 (iternextfunc)BZ2File_iternext, /*tp_iternext*/
1403 BZ2File_methods, /*tp_methods*/
1404 0, /*tp_members*/
1405 BZ2File_getset, /*tp_getset*/
1406 0, /*tp_base*/
1407 0, /*tp_dict*/
1408 0, /*tp_descr_get*/
1409 0, /*tp_descr_set*/
1410 0, /*tp_dictoffset*/
1411 (initproc)BZ2File_init, /*tp_init*/
1412 PyType_GenericAlloc, /*tp_alloc*/
1413 PyType_GenericNew, /*tp_new*/
1414 PyObject_Free, /*tp_free*/
1415 0, /*tp_is_gc*/
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001416};
1417
1418
1419/* ===================================================================== */
1420/* Methods of BZ2Comp. */
1421
1422PyDoc_STRVAR(BZ2Comp_compress__doc__,
1423"compress(data) -> string\n\
1424\n\
1425Provide more data to the compressor object. It will return chunks of\n\
1426compressed data whenever possible. When you've finished providing data\n\
1427to compress, call the flush() method to finish the compression process,\n\
1428and return what is left in the internal buffers.\n\
1429");
1430
1431static PyObject *
1432BZ2Comp_compress(BZ2CompObject *self, PyObject *args)
1433{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001434 Py_buffer pdata;
1435 char *data;
1436 int datasize;
1437 int bufsize = SMALLCHUNK;
1438 PY_LONG_LONG totalout;
1439 PyObject *ret = NULL;
1440 bz_stream *bzs = &self->bzs;
1441 int bzerror;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001442
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001443 if (!PyArg_ParseTuple(args, "y*:compress", &pdata))
1444 return NULL;
1445 data = pdata.buf;
1446 datasize = pdata.len;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001447
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001448 if (datasize == 0) {
1449 PyBuffer_Release(&pdata);
1450 return PyBytes_FromStringAndSize("", 0);
1451 }
Gustavo Niemeyera6e436e2004-02-14 00:02:45 +00001452
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001453 ACQUIRE_LOCK(self);
1454 if (!self->running) {
1455 PyErr_SetString(PyExc_ValueError,
1456 "this object was already flushed");
1457 goto error;
1458 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001459
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001460 ret = PyBytes_FromStringAndSize(NULL, bufsize);
1461 if (!ret)
1462 goto error;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001463
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001464 bzs->next_in = data;
1465 bzs->avail_in = datasize;
1466 bzs->next_out = BUF(ret);
1467 bzs->avail_out = bufsize;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001468
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001469 totalout = BZS_TOTAL_OUT(bzs);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001470
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001471 for (;;) {
1472 Py_BEGIN_ALLOW_THREADS
1473 bzerror = BZ2_bzCompress(bzs, BZ_RUN);
1474 Py_END_ALLOW_THREADS
1475 if (bzerror != BZ_RUN_OK) {
1476 Util_CatchBZ2Error(bzerror);
1477 goto error;
1478 }
1479 if (bzs->avail_in == 0)
1480 break; /* no more input data */
1481 if (bzs->avail_out == 0) {
1482 bufsize = Util_NewBufferSize(bufsize);
1483 if (_PyBytes_Resize(&ret, bufsize) < 0) {
1484 BZ2_bzCompressEnd(bzs);
1485 goto error;
1486 }
1487 bzs->next_out = BUF(ret) + (BZS_TOTAL_OUT(bzs)
1488 - totalout);
1489 bzs->avail_out = bufsize - (bzs->next_out - BUF(ret));
1490 }
1491 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001492
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001493 if (_PyBytes_Resize(&ret,
1494 (Py_ssize_t)(BZS_TOTAL_OUT(bzs) - totalout)) < 0)
1495 goto error;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001496
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001497 RELEASE_LOCK(self);
1498 PyBuffer_Release(&pdata);
1499 return ret;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001500
1501error:
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001502 RELEASE_LOCK(self);
1503 PyBuffer_Release(&pdata);
1504 Py_XDECREF(ret);
1505 return NULL;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001506}
1507
1508PyDoc_STRVAR(BZ2Comp_flush__doc__,
1509"flush() -> string\n\
1510\n\
1511Finish the compression process and return what is left in internal buffers.\n\
1512You must not use the compressor object after calling this method.\n\
1513");
1514
1515static PyObject *
1516BZ2Comp_flush(BZ2CompObject *self)
1517{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001518 int bufsize = SMALLCHUNK;
1519 PyObject *ret = NULL;
1520 bz_stream *bzs = &self->bzs;
1521 PY_LONG_LONG totalout;
1522 int bzerror;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001523
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001524 ACQUIRE_LOCK(self);
1525 if (!self->running) {
1526 PyErr_SetString(PyExc_ValueError, "object was already "
1527 "flushed");
1528 goto error;
1529 }
1530 self->running = 0;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001531
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001532 ret = PyBytes_FromStringAndSize(NULL, bufsize);
1533 if (!ret)
1534 goto error;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001535
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001536 bzs->next_out = BUF(ret);
1537 bzs->avail_out = bufsize;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001538
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001539 totalout = BZS_TOTAL_OUT(bzs);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001540
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001541 for (;;) {
1542 Py_BEGIN_ALLOW_THREADS
1543 bzerror = BZ2_bzCompress(bzs, BZ_FINISH);
1544 Py_END_ALLOW_THREADS
1545 if (bzerror == BZ_STREAM_END) {
1546 break;
1547 } else if (bzerror != BZ_FINISH_OK) {
1548 Util_CatchBZ2Error(bzerror);
1549 goto error;
1550 }
1551 if (bzs->avail_out == 0) {
1552 bufsize = Util_NewBufferSize(bufsize);
1553 if (_PyBytes_Resize(&ret, bufsize) < 0)
1554 goto error;
1555 bzs->next_out = BUF(ret);
1556 bzs->next_out = BUF(ret) + (BZS_TOTAL_OUT(bzs)
1557 - totalout);
1558 bzs->avail_out = bufsize - (bzs->next_out - BUF(ret));
1559 }
1560 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001561
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001562 if (bzs->avail_out != 0) {
1563 if (_PyBytes_Resize(&ret,
1564 (Py_ssize_t)(BZS_TOTAL_OUT(bzs) - totalout)) < 0)
1565 goto error;
1566 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001567
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001568 RELEASE_LOCK(self);
1569 return ret;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001570
1571error:
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001572 RELEASE_LOCK(self);
1573 Py_XDECREF(ret);
1574 return NULL;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001575}
1576
1577static PyMethodDef BZ2Comp_methods[] = {
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001578 {"compress", (PyCFunction)BZ2Comp_compress, METH_VARARGS,
1579 BZ2Comp_compress__doc__},
1580 {"flush", (PyCFunction)BZ2Comp_flush, METH_NOARGS,
1581 BZ2Comp_flush__doc__},
1582 {NULL, NULL} /* sentinel */
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001583};
1584
1585
1586/* ===================================================================== */
1587/* Slot definitions for BZ2Comp_Type. */
1588
1589static int
1590BZ2Comp_init(BZ2CompObject *self, PyObject *args, PyObject *kwargs)
1591{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001592 int compresslevel = 9;
1593 int bzerror;
1594 static char *kwlist[] = {"compresslevel", 0};
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001595
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001596 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|i:BZ2Compressor",
1597 kwlist, &compresslevel))
1598 return -1;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001599
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001600 if (compresslevel < 1 || compresslevel > 9) {
1601 PyErr_SetString(PyExc_ValueError,
1602 "compresslevel must be between 1 and 9");
1603 goto error;
1604 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001605
1606#ifdef WITH_THREAD
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001607 self->lock = PyThread_allocate_lock();
1608 if (!self->lock) {
1609 PyErr_SetString(PyExc_MemoryError, "unable to allocate lock");
1610 goto error;
1611 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001612#endif
1613
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001614 memset(&self->bzs, 0, sizeof(bz_stream));
1615 bzerror = BZ2_bzCompressInit(&self->bzs, compresslevel, 0, 0);
1616 if (bzerror != BZ_OK) {
1617 Util_CatchBZ2Error(bzerror);
1618 goto error;
1619 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001620
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001621 self->running = 1;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001622
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001623 return 0;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001624error:
1625#ifdef WITH_THREAD
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001626 if (self->lock) {
1627 PyThread_free_lock(self->lock);
1628 self->lock = NULL;
1629 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001630#endif
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001631 return -1;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001632}
1633
1634static void
1635BZ2Comp_dealloc(BZ2CompObject *self)
1636{
1637#ifdef WITH_THREAD
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001638 if (self->lock)
1639 PyThread_free_lock(self->lock);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001640#endif
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001641 BZ2_bzCompressEnd(&self->bzs);
1642 Py_TYPE(self)->tp_free((PyObject *)self);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001643}
1644
1645
1646/* ===================================================================== */
1647/* BZ2Comp_Type definition. */
1648
1649PyDoc_STRVAR(BZ2Comp__doc__,
1650"BZ2Compressor([compresslevel=9]) -> compressor object\n\
1651\n\
1652Create a new compressor object. This object may be used to compress\n\
1653data sequentially. If you want to compress data in one shot, use the\n\
1654compress() function instead. The compresslevel parameter, if given,\n\
1655must be a number between 1 and 9.\n\
1656");
1657
Gustavo Niemeyer49ea7be2002-11-08 14:31:49 +00001658static PyTypeObject BZ2Comp_Type = {
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001659 PyVarObject_HEAD_INIT(NULL, 0)
1660 "bz2.BZ2Compressor", /*tp_name*/
1661 sizeof(BZ2CompObject), /*tp_basicsize*/
1662 0, /*tp_itemsize*/
1663 (destructor)BZ2Comp_dealloc, /*tp_dealloc*/
1664 0, /*tp_print*/
1665 0, /*tp_getattr*/
1666 0, /*tp_setattr*/
1667 0, /*tp_reserved*/
1668 0, /*tp_repr*/
1669 0, /*tp_as_number*/
1670 0, /*tp_as_sequence*/
1671 0, /*tp_as_mapping*/
1672 0, /*tp_hash*/
1673 0, /*tp_call*/
1674 0, /*tp_str*/
1675 PyObject_GenericGetAttr,/*tp_getattro*/
1676 PyObject_GenericSetAttr,/*tp_setattro*/
1677 0, /*tp_as_buffer*/
1678 Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE, /*tp_flags*/
1679 BZ2Comp__doc__, /*tp_doc*/
1680 0, /*tp_traverse*/
1681 0, /*tp_clear*/
1682 0, /*tp_richcompare*/
1683 0, /*tp_weaklistoffset*/
1684 0, /*tp_iter*/
1685 0, /*tp_iternext*/
1686 BZ2Comp_methods, /*tp_methods*/
1687 0, /*tp_members*/
1688 0, /*tp_getset*/
1689 0, /*tp_base*/
1690 0, /*tp_dict*/
1691 0, /*tp_descr_get*/
1692 0, /*tp_descr_set*/
1693 0, /*tp_dictoffset*/
1694 (initproc)BZ2Comp_init, /*tp_init*/
1695 PyType_GenericAlloc, /*tp_alloc*/
1696 PyType_GenericNew, /*tp_new*/
1697 PyObject_Free, /*tp_free*/
1698 0, /*tp_is_gc*/
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001699};
1700
1701
1702/* ===================================================================== */
1703/* Members of BZ2Decomp. */
1704
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001705#undef OFF
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001706#define OFF(x) offsetof(BZ2DecompObject, x)
1707
1708static PyMemberDef BZ2Decomp_members[] = {
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001709 {"unused_data", T_OBJECT, OFF(unused_data), READONLY},
1710 {NULL} /* Sentinel */
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001711};
1712
1713
1714/* ===================================================================== */
1715/* Methods of BZ2Decomp. */
1716
1717PyDoc_STRVAR(BZ2Decomp_decompress__doc__,
1718"decompress(data) -> string\n\
1719\n\
1720Provide more data to the decompressor object. It will return chunks\n\
1721of decompressed data whenever possible. If you try to decompress data\n\
1722after the end of stream is found, EOFError will be raised. If any data\n\
1723was found after the end of stream, it'll be ignored and saved in\n\
1724unused_data attribute.\n\
1725");
1726
1727static PyObject *
1728BZ2Decomp_decompress(BZ2DecompObject *self, PyObject *args)
1729{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001730 Py_buffer pdata;
1731 char *data;
1732 int datasize;
1733 int bufsize = SMALLCHUNK;
1734 PY_LONG_LONG totalout;
1735 PyObject *ret = NULL;
1736 bz_stream *bzs = &self->bzs;
1737 int bzerror;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001738
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001739 if (!PyArg_ParseTuple(args, "y*:decompress", &pdata))
1740 return NULL;
1741 data = pdata.buf;
1742 datasize = pdata.len;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001743
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001744 ACQUIRE_LOCK(self);
1745 if (!self->running) {
1746 PyErr_SetString(PyExc_EOFError, "end of stream was "
1747 "already found");
1748 goto error;
1749 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001750
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001751 ret = PyBytes_FromStringAndSize(NULL, bufsize);
1752 if (!ret)
1753 goto error;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001754
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001755 bzs->next_in = data;
1756 bzs->avail_in = datasize;
1757 bzs->next_out = BUF(ret);
1758 bzs->avail_out = bufsize;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001759
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001760 totalout = BZS_TOTAL_OUT(bzs);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001761
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001762 for (;;) {
1763 Py_BEGIN_ALLOW_THREADS
1764 bzerror = BZ2_bzDecompress(bzs);
1765 Py_END_ALLOW_THREADS
1766 if (bzerror == BZ_STREAM_END) {
1767 if (bzs->avail_in != 0) {
1768 Py_DECREF(self->unused_data);
1769 self->unused_data =
1770 PyBytes_FromStringAndSize(bzs->next_in,
1771 bzs->avail_in);
1772 }
1773 self->running = 0;
1774 break;
1775 }
1776 if (bzerror != BZ_OK) {
1777 Util_CatchBZ2Error(bzerror);
1778 goto error;
1779 }
1780 if (bzs->avail_in == 0)
1781 break; /* no more input data */
1782 if (bzs->avail_out == 0) {
1783 bufsize = Util_NewBufferSize(bufsize);
1784 if (_PyBytes_Resize(&ret, bufsize) < 0) {
1785 BZ2_bzDecompressEnd(bzs);
1786 goto error;
1787 }
1788 bzs->next_out = BUF(ret);
1789 bzs->next_out = BUF(ret) + (BZS_TOTAL_OUT(bzs)
1790 - totalout);
1791 bzs->avail_out = bufsize - (bzs->next_out - BUF(ret));
1792 }
1793 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001794
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001795 if (bzs->avail_out != 0) {
1796 if (_PyBytes_Resize(&ret,
1797 (Py_ssize_t)(BZS_TOTAL_OUT(bzs) - totalout)) < 0)
1798 goto error;
1799 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001800
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001801 RELEASE_LOCK(self);
1802 PyBuffer_Release(&pdata);
1803 return ret;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001804
1805error:
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001806 RELEASE_LOCK(self);
1807 PyBuffer_Release(&pdata);
1808 Py_XDECREF(ret);
1809 return NULL;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001810}
1811
1812static PyMethodDef BZ2Decomp_methods[] = {
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001813 {"decompress", (PyCFunction)BZ2Decomp_decompress, METH_VARARGS, BZ2Decomp_decompress__doc__},
1814 {NULL, NULL} /* sentinel */
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001815};
1816
1817
1818/* ===================================================================== */
1819/* Slot definitions for BZ2Decomp_Type. */
1820
1821static int
1822BZ2Decomp_init(BZ2DecompObject *self, PyObject *args, PyObject *kwargs)
1823{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001824 int bzerror;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001825
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001826 if (!PyArg_ParseTuple(args, ":BZ2Decompressor"))
1827 return -1;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001828
1829#ifdef WITH_THREAD
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001830 self->lock = PyThread_allocate_lock();
1831 if (!self->lock) {
1832 PyErr_SetString(PyExc_MemoryError, "unable to allocate lock");
1833 goto error;
1834 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001835#endif
1836
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001837 self->unused_data = PyBytes_FromStringAndSize("", 0);
1838 if (!self->unused_data)
1839 goto error;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001840
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001841 memset(&self->bzs, 0, sizeof(bz_stream));
1842 bzerror = BZ2_bzDecompressInit(&self->bzs, 0, 0);
1843 if (bzerror != BZ_OK) {
1844 Util_CatchBZ2Error(bzerror);
1845 goto error;
1846 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001847
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001848 self->running = 1;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001849
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001850 return 0;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001851
1852error:
1853#ifdef WITH_THREAD
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001854 if (self->lock) {
1855 PyThread_free_lock(self->lock);
1856 self->lock = NULL;
1857 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001858#endif
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001859 Py_CLEAR(self->unused_data);
1860 return -1;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001861}
1862
1863static void
1864BZ2Decomp_dealloc(BZ2DecompObject *self)
1865{
1866#ifdef WITH_THREAD
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001867 if (self->lock)
1868 PyThread_free_lock(self->lock);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001869#endif
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001870 Py_XDECREF(self->unused_data);
1871 BZ2_bzDecompressEnd(&self->bzs);
1872 Py_TYPE(self)->tp_free((PyObject *)self);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001873}
1874
1875
1876/* ===================================================================== */
1877/* BZ2Decomp_Type definition. */
1878
1879PyDoc_STRVAR(BZ2Decomp__doc__,
1880"BZ2Decompressor() -> decompressor object\n\
1881\n\
1882Create a new decompressor object. This object may be used to decompress\n\
1883data sequentially. If you want to decompress data in one shot, use the\n\
1884decompress() function instead.\n\
1885");
1886
Gustavo Niemeyer49ea7be2002-11-08 14:31:49 +00001887static PyTypeObject BZ2Decomp_Type = {
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001888 PyVarObject_HEAD_INIT(NULL, 0)
1889 "bz2.BZ2Decompressor", /*tp_name*/
1890 sizeof(BZ2DecompObject), /*tp_basicsize*/
1891 0, /*tp_itemsize*/
1892 (destructor)BZ2Decomp_dealloc, /*tp_dealloc*/
1893 0, /*tp_print*/
1894 0, /*tp_getattr*/
1895 0, /*tp_setattr*/
1896 0, /*tp_reserved*/
1897 0, /*tp_repr*/
1898 0, /*tp_as_number*/
1899 0, /*tp_as_sequence*/
1900 0, /*tp_as_mapping*/
1901 0, /*tp_hash*/
1902 0, /*tp_call*/
1903 0, /*tp_str*/
1904 PyObject_GenericGetAttr,/*tp_getattro*/
1905 PyObject_GenericSetAttr,/*tp_setattro*/
1906 0, /*tp_as_buffer*/
1907 Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE, /*tp_flags*/
1908 BZ2Decomp__doc__, /*tp_doc*/
1909 0, /*tp_traverse*/
1910 0, /*tp_clear*/
1911 0, /*tp_richcompare*/
1912 0, /*tp_weaklistoffset*/
1913 0, /*tp_iter*/
1914 0, /*tp_iternext*/
1915 BZ2Decomp_methods, /*tp_methods*/
1916 BZ2Decomp_members, /*tp_members*/
1917 0, /*tp_getset*/
1918 0, /*tp_base*/
1919 0, /*tp_dict*/
1920 0, /*tp_descr_get*/
1921 0, /*tp_descr_set*/
1922 0, /*tp_dictoffset*/
1923 (initproc)BZ2Decomp_init, /*tp_init*/
1924 PyType_GenericAlloc, /*tp_alloc*/
1925 PyType_GenericNew, /*tp_new*/
1926 PyObject_Free, /*tp_free*/
1927 0, /*tp_is_gc*/
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001928};
1929
1930
1931/* ===================================================================== */
1932/* Module functions. */
1933
1934PyDoc_STRVAR(bz2_compress__doc__,
1935"compress(data [, compresslevel=9]) -> string\n\
1936\n\
1937Compress data in one shot. If you want to compress data sequentially,\n\
1938use an instance of BZ2Compressor instead. The compresslevel parameter, if\n\
1939given, must be a number between 1 and 9.\n\
1940");
1941
1942static PyObject *
1943bz2_compress(PyObject *self, PyObject *args, PyObject *kwargs)
1944{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001945 int compresslevel=9;
1946 Py_buffer pdata;
1947 char *data;
1948 int datasize;
1949 int bufsize;
1950 PyObject *ret = NULL;
1951 bz_stream _bzs;
1952 bz_stream *bzs = &_bzs;
1953 int bzerror;
1954 static char *kwlist[] = {"data", "compresslevel", 0};
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001955
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001956 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "y*|i",
1957 kwlist, &pdata,
1958 &compresslevel))
1959 return NULL;
1960 data = pdata.buf;
1961 datasize = pdata.len;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001962
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001963 if (compresslevel < 1 || compresslevel > 9) {
1964 PyErr_SetString(PyExc_ValueError,
1965 "compresslevel must be between 1 and 9");
1966 PyBuffer_Release(&pdata);
1967 return NULL;
1968 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001969
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001970 /* Conforming to bz2 manual, this is large enough to fit compressed
1971 * data in one shot. We will check it later anyway. */
1972 bufsize = datasize + (datasize/100+1) + 600;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001973
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001974 ret = PyBytes_FromStringAndSize(NULL, bufsize);
1975 if (!ret) {
1976 PyBuffer_Release(&pdata);
1977 return NULL;
1978 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001979
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001980 memset(bzs, 0, sizeof(bz_stream));
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001981
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001982 bzs->next_in = data;
1983 bzs->avail_in = datasize;
1984 bzs->next_out = BUF(ret);
1985 bzs->avail_out = bufsize;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001986
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001987 bzerror = BZ2_bzCompressInit(bzs, compresslevel, 0, 0);
1988 if (bzerror != BZ_OK) {
1989 Util_CatchBZ2Error(bzerror);
1990 PyBuffer_Release(&pdata);
1991 Py_DECREF(ret);
1992 return NULL;
1993 }
Tim Peterse3228092002-11-09 04:21:44 +00001994
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00001995 for (;;) {
1996 Py_BEGIN_ALLOW_THREADS
1997 bzerror = BZ2_bzCompress(bzs, BZ_FINISH);
1998 Py_END_ALLOW_THREADS
1999 if (bzerror == BZ_STREAM_END) {
2000 break;
2001 } else if (bzerror != BZ_FINISH_OK) {
2002 BZ2_bzCompressEnd(bzs);
2003 Util_CatchBZ2Error(bzerror);
2004 PyBuffer_Release(&pdata);
2005 Py_DECREF(ret);
2006 return NULL;
2007 }
2008 if (bzs->avail_out == 0) {
2009 bufsize = Util_NewBufferSize(bufsize);
2010 if (_PyBytes_Resize(&ret, bufsize) < 0) {
2011 BZ2_bzCompressEnd(bzs);
2012 PyBuffer_Release(&pdata);
2013 return NULL;
2014 }
2015 bzs->next_out = BUF(ret) + BZS_TOTAL_OUT(bzs);
2016 bzs->avail_out = bufsize - (bzs->next_out - BUF(ret));
2017 }
2018 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002019
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002020 if (bzs->avail_out != 0) {
2021 if (_PyBytes_Resize(&ret, (Py_ssize_t)BZS_TOTAL_OUT(bzs)) < 0) {
2022 ret = NULL;
2023 }
2024 }
2025 BZ2_bzCompressEnd(bzs);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002026
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002027 PyBuffer_Release(&pdata);
2028 return ret;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002029}
2030
2031PyDoc_STRVAR(bz2_decompress__doc__,
2032"decompress(data) -> decompressed data\n\
2033\n\
2034Decompress data in one shot. If you want to decompress data sequentially,\n\
2035use an instance of BZ2Decompressor instead.\n\
2036");
2037
2038static PyObject *
2039bz2_decompress(PyObject *self, PyObject *args)
2040{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002041 Py_buffer pdata;
2042 char *data;
2043 int datasize;
2044 int bufsize = SMALLCHUNK;
2045 PyObject *ret;
2046 bz_stream _bzs;
2047 bz_stream *bzs = &_bzs;
2048 int bzerror;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002049
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002050 if (!PyArg_ParseTuple(args, "y*:decompress", &pdata))
2051 return NULL;
2052 data = pdata.buf;
2053 datasize = pdata.len;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002054
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002055 if (datasize == 0) {
2056 PyBuffer_Release(&pdata);
2057 return PyBytes_FromStringAndSize("", 0);
2058 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002059
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002060 ret = PyBytes_FromStringAndSize(NULL, bufsize);
2061 if (!ret) {
2062 PyBuffer_Release(&pdata);
2063 return NULL;
2064 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002065
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002066 memset(bzs, 0, sizeof(bz_stream));
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002067
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002068 bzs->next_in = data;
2069 bzs->avail_in = datasize;
2070 bzs->next_out = BUF(ret);
2071 bzs->avail_out = bufsize;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002072
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002073 bzerror = BZ2_bzDecompressInit(bzs, 0, 0);
2074 if (bzerror != BZ_OK) {
2075 Util_CatchBZ2Error(bzerror);
2076 Py_DECREF(ret);
2077 PyBuffer_Release(&pdata);
2078 return NULL;
2079 }
Tim Peterse3228092002-11-09 04:21:44 +00002080
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002081 for (;;) {
2082 Py_BEGIN_ALLOW_THREADS
2083 bzerror = BZ2_bzDecompress(bzs);
2084 Py_END_ALLOW_THREADS
2085 if (bzerror == BZ_STREAM_END) {
2086 break;
2087 } else if (bzerror != BZ_OK) {
2088 BZ2_bzDecompressEnd(bzs);
2089 Util_CatchBZ2Error(bzerror);
2090 PyBuffer_Release(&pdata);
2091 Py_DECREF(ret);
2092 return NULL;
2093 }
2094 if (bzs->avail_in == 0) {
2095 BZ2_bzDecompressEnd(bzs);
2096 PyErr_SetString(PyExc_ValueError,
2097 "couldn't find end of stream");
2098 PyBuffer_Release(&pdata);
2099 Py_DECREF(ret);
2100 return NULL;
2101 }
2102 if (bzs->avail_out == 0) {
2103 bufsize = Util_NewBufferSize(bufsize);
2104 if (_PyBytes_Resize(&ret, bufsize) < 0) {
2105 BZ2_bzDecompressEnd(bzs);
2106 PyBuffer_Release(&pdata);
2107 return NULL;
2108 }
2109 bzs->next_out = BUF(ret) + BZS_TOTAL_OUT(bzs);
2110 bzs->avail_out = bufsize - (bzs->next_out - BUF(ret));
2111 }
2112 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002113
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002114 if (bzs->avail_out != 0) {
2115 if (_PyBytes_Resize(&ret, (Py_ssize_t)BZS_TOTAL_OUT(bzs)) < 0) {
2116 ret = NULL;
2117 }
2118 }
2119 BZ2_bzDecompressEnd(bzs);
2120 PyBuffer_Release(&pdata);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002121
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002122 return ret;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002123}
2124
2125static PyMethodDef bz2_methods[] = {
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002126 {"compress", (PyCFunction) bz2_compress, METH_VARARGS|METH_KEYWORDS,
2127 bz2_compress__doc__},
2128 {"decompress", (PyCFunction) bz2_decompress, METH_VARARGS,
2129 bz2_decompress__doc__},
2130 {NULL, NULL} /* sentinel */
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002131};
2132
2133/* ===================================================================== */
2134/* Initialization function. */
2135
2136PyDoc_STRVAR(bz2__doc__,
2137"The python bz2 module provides a comprehensive interface for\n\
2138the bz2 compression library. It implements a complete file\n\
2139interface, one shot (de)compression functions, and types for\n\
2140sequential (de)compression.\n\
2141");
2142
Martin v. Löwis1a214512008-06-11 05:26:20 +00002143
2144static struct PyModuleDef bz2module = {
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002145 PyModuleDef_HEAD_INIT,
2146 "bz2",
2147 bz2__doc__,
2148 -1,
2149 bz2_methods,
2150 NULL,
2151 NULL,
2152 NULL,
2153 NULL
Martin v. Löwis1a214512008-06-11 05:26:20 +00002154};
2155
Neal Norwitz21d896c2003-07-01 20:15:21 +00002156PyMODINIT_FUNC
Martin v. Löwis1a214512008-06-11 05:26:20 +00002157PyInit_bz2(void)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002158{
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002159 PyObject *m;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002160
Antoine Pitrou92275082010-09-23 19:54:28 +00002161 if (PyType_Ready(&BZ2File_Type) < 0)
2162 return NULL;
2163 if (PyType_Ready(&BZ2Comp_Type) < 0)
2164 return NULL;
2165 if (PyType_Ready(&BZ2Decomp_Type) < 0)
2166 return NULL;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002167
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002168 m = PyModule_Create(&bz2module);
2169 if (m == NULL)
2170 return NULL;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002171
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002172 PyModule_AddObject(m, "__author__", PyUnicode_FromString(__author__));
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002173
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002174 Py_INCREF(&BZ2File_Type);
2175 PyModule_AddObject(m, "BZ2File", (PyObject *)&BZ2File_Type);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002176
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002177 Py_INCREF(&BZ2Comp_Type);
2178 PyModule_AddObject(m, "BZ2Compressor", (PyObject *)&BZ2Comp_Type);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002179
Antoine Pitrou7f14f0d2010-05-09 16:14:21 +00002180 Py_INCREF(&BZ2Decomp_Type);
2181 PyModule_AddObject(m, "BZ2Decompressor", (PyObject *)&BZ2Decomp_Type);
2182 return m;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002183}