blob: a671e8d9e6b87e2c2c56f20e42038f8450b8ec14 [file] [log] [blame]
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001/*
2
3python-bz2 - python bz2 library interface
4
5Copyright (c) 2002 Gustavo Niemeyer <niemeyer@conectiva.com>
6Copyright (c) 2002 Python Software Foundation; All Rights Reserved
7
8*/
9
Martin v. Löwise17af7b2002-11-23 09:16:19 +000010#include "Python.h"
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +000011#include <stdio.h>
12#include <bzlib.h>
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +000013#include "structmember.h"
14
15#ifdef WITH_THREAD
16#include "pythread.h"
17#endif
18
19static char __author__[] =
20"The bz2 python module was written by:\n\
21\n\
22 Gustavo Niemeyer <niemeyer@conectiva.com>\n\
23";
24
Georg Brandl33a5f2a2005-08-21 14:16:04 +000025/* Our very own off_t-like type, 64-bit if possible */
26/* copied from Objects/fileobject.c */
27#if !defined(HAVE_LARGEFILE_SUPPORT)
28typedef off_t Py_off_t;
29#elif SIZEOF_OFF_T >= 8
30typedef off_t Py_off_t;
31#elif SIZEOF_FPOS_T >= 8
32typedef fpos_t Py_off_t;
33#else
34#error "Large file support, but neither off_t nor fpos_t is large enough."
35#endif
36
Christian Heimes72b710a2008-05-26 13:28:38 +000037#define BUF(v) PyBytes_AS_STRING(v)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +000038
39#define MODE_CLOSED 0
40#define MODE_READ 1
41#define MODE_READ_EOF 2
42#define MODE_WRITE 3
43
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000044#define BZ2FileObject_Check(v) (Py_TYPE(v) == &BZ2File_Type)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +000045
Gustavo Niemeyer7628f1f2003-04-27 06:25:24 +000046
47#ifdef BZ_CONFIG_ERROR
48
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +000049#if SIZEOF_LONG >= 8
50#define BZS_TOTAL_OUT(bzs) \
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000051 (((long)bzs->total_out_hi32 << 32) + bzs->total_out_lo32)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +000052#elif SIZEOF_LONG_LONG >= 8
53#define BZS_TOTAL_OUT(bzs) \
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000054 (((PY_LONG_LONG)bzs->total_out_hi32 << 32) + bzs->total_out_lo32)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +000055#else
56#define BZS_TOTAL_OUT(bzs) \
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000057 bzs->total_out_lo32
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +000058#endif
59
Gustavo Niemeyer7628f1f2003-04-27 06:25:24 +000060#else /* ! BZ_CONFIG_ERROR */
61
62#define BZ2_bzRead bzRead
63#define BZ2_bzReadOpen bzReadOpen
64#define BZ2_bzReadClose bzReadClose
65#define BZ2_bzWrite bzWrite
66#define BZ2_bzWriteOpen bzWriteOpen
67#define BZ2_bzWriteClose bzWriteClose
68#define BZ2_bzCompress bzCompress
69#define BZ2_bzCompressInit bzCompressInit
70#define BZ2_bzCompressEnd bzCompressEnd
71#define BZ2_bzDecompress bzDecompress
72#define BZ2_bzDecompressInit bzDecompressInit
73#define BZ2_bzDecompressEnd bzDecompressEnd
74
75#define BZS_TOTAL_OUT(bzs) bzs->total_out
76
77#endif /* ! BZ_CONFIG_ERROR */
78
79
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +000080#ifdef WITH_THREAD
Antoine Pitrouc66363f2009-10-27 17:47:14 +000081#define ACQUIRE_LOCK(obj) do { \
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000082 if (!PyThread_acquire_lock(obj->lock, 0)) { \
83 Py_BEGIN_ALLOW_THREADS \
84 PyThread_acquire_lock(obj->lock, 1); \
85 Py_END_ALLOW_THREADS \
86 } } while(0)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +000087#define RELEASE_LOCK(obj) PyThread_release_lock(obj->lock)
88#else
89#define ACQUIRE_LOCK(obj)
90#define RELEASE_LOCK(obj)
91#endif
92
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +000093/* Bits in f_newlinetypes */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000094#define NEWLINE_UNKNOWN 0 /* No newline seen, yet */
95#define NEWLINE_CR 1 /* \r newline seen */
96#define NEWLINE_LF 2 /* \n newline seen */
97#define NEWLINE_CRLF 4 /* \r\n newline seen */
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +000098
99/* ===================================================================== */
100/* Structure definitions. */
101
102typedef struct {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000103 PyObject_HEAD
104 FILE *rawfp;
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000105
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000106 char* f_buf; /* Allocated readahead buffer */
107 char* f_bufend; /* Points after last occupied position */
108 char* f_bufptr; /* Current buffer position */
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000109
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000110 BZFILE *fp;
111 int mode;
112 Py_off_t pos;
113 Py_off_t size;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000114#ifdef WITH_THREAD
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000115 PyThread_type_lock lock;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000116#endif
117} BZ2FileObject;
118
119typedef struct {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000120 PyObject_HEAD
121 bz_stream bzs;
122 int running;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000123#ifdef WITH_THREAD
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000124 PyThread_type_lock lock;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000125#endif
126} BZ2CompObject;
127
128typedef struct {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000129 PyObject_HEAD
130 bz_stream bzs;
131 int running;
132 PyObject *unused_data;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000133#ifdef WITH_THREAD
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000134 PyThread_type_lock lock;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000135#endif
136} BZ2DecompObject;
137
138/* ===================================================================== */
139/* Utility functions. */
140
Antoine Pitrou7ffa1962010-08-01 20:08:46 +0000141/* Refuse regular I/O if there's data in the iteration-buffer.
142 * Mixing them would cause data to arrive out of order, as the read*
143 * methods don't use the iteration buffer. */
144static int
145check_iterbuffered(BZ2FileObject *f)
146{
147 if (f->f_buf != NULL &&
148 (f->f_bufend - f->f_bufptr) > 0 &&
149 f->f_buf[0] != '\0') {
150 PyErr_SetString(PyExc_ValueError,
151 "Mixing iteration and read methods would lose data");
152 return -1;
153 }
154 return 0;
155}
156
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000157static int
158Util_CatchBZ2Error(int bzerror)
159{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000160 int ret = 0;
161 switch(bzerror) {
162 case BZ_OK:
163 case BZ_STREAM_END:
164 break;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000165
Gustavo Niemeyer7628f1f2003-04-27 06:25:24 +0000166#ifdef BZ_CONFIG_ERROR
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000167 case BZ_CONFIG_ERROR:
168 PyErr_SetString(PyExc_SystemError,
169 "the bz2 library was not compiled "
170 "correctly");
171 ret = 1;
172 break;
Gustavo Niemeyer7628f1f2003-04-27 06:25:24 +0000173#endif
Tim Peterse3228092002-11-09 04:21:44 +0000174
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000175 case BZ_PARAM_ERROR:
176 PyErr_SetString(PyExc_ValueError,
177 "the bz2 library has received wrong "
178 "parameters");
179 ret = 1;
180 break;
Tim Peterse3228092002-11-09 04:21:44 +0000181
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000182 case BZ_MEM_ERROR:
183 PyErr_NoMemory();
184 ret = 1;
185 break;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000186
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000187 case BZ_DATA_ERROR:
188 case BZ_DATA_ERROR_MAGIC:
189 PyErr_SetString(PyExc_IOError, "invalid data stream");
190 ret = 1;
191 break;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000192
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000193 case BZ_IO_ERROR:
194 PyErr_SetString(PyExc_IOError, "unknown IO error");
195 ret = 1;
196 break;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000197
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000198 case BZ_UNEXPECTED_EOF:
199 PyErr_SetString(PyExc_EOFError,
200 "compressed file ended before the "
201 "logical end-of-stream was detected");
202 ret = 1;
203 break;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000204
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000205 case BZ_SEQUENCE_ERROR:
206 PyErr_SetString(PyExc_RuntimeError,
207 "wrong sequence of bz2 library "
208 "commands used");
209 ret = 1;
210 break;
211 }
212 return ret;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000213}
214
215#if BUFSIZ < 8192
216#define SMALLCHUNK 8192
217#else
218#define SMALLCHUNK BUFSIZ
219#endif
220
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000221/* This is a hacked version of Python's fileobject.c:new_buffersize(). */
222static size_t
223Util_NewBufferSize(size_t currentsize)
224{
Nadeem Vawdad41a98b2011-10-13 13:34:16 +0200225 /* Expand the buffer by an amount proportional to the current size,
226 giving us amortized linear-time behavior. Use a less-than-double
227 growth factor to avoid excessive allocation. */
228 return currentsize + (currentsize >> 3) + 6;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000229}
230
231/* This is a hacked version of Python's fileobject.c:get_line(). */
232static PyObject *
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000233Util_GetLine(BZ2FileObject *f, int n)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000234{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000235 char c;
236 char *buf, *end;
237 size_t total_v_size; /* total # of slots in buffer */
238 size_t used_v_size; /* # used slots in buffer */
239 size_t increment; /* amount to increment the buffer */
240 PyObject *v;
241 int bzerror;
242 int bytes_read;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000243
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000244 total_v_size = n > 0 ? n : 100;
245 v = PyBytes_FromStringAndSize((char *)NULL, total_v_size);
246 if (v == NULL)
247 return NULL;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000248
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000249 buf = BUF(v);
250 end = buf + total_v_size;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000251
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000252 for (;;) {
253 Py_BEGIN_ALLOW_THREADS
254 do {
255 bytes_read = BZ2_bzRead(&bzerror, f->fp, &c, 1);
256 f->pos++;
257 if (bytes_read == 0)
258 break;
259 *buf++ = c;
260 } while (bzerror == BZ_OK && c != '\n' && buf != end);
261 Py_END_ALLOW_THREADS
262 if (bzerror == BZ_STREAM_END) {
263 f->size = f->pos;
264 f->mode = MODE_READ_EOF;
265 break;
266 } else if (bzerror != BZ_OK) {
267 Util_CatchBZ2Error(bzerror);
268 Py_DECREF(v);
269 return NULL;
270 }
271 if (c == '\n')
272 break;
273 /* Must be because buf == end */
274 if (n > 0)
275 break;
276 used_v_size = total_v_size;
277 increment = total_v_size >> 2; /* mild exponential growth */
278 total_v_size += increment;
279 if (total_v_size > INT_MAX) {
280 PyErr_SetString(PyExc_OverflowError,
281 "line is longer than a Python string can hold");
282 Py_DECREF(v);
283 return NULL;
284 }
285 if (_PyBytes_Resize(&v, total_v_size) < 0) {
286 return NULL;
287 }
288 buf = BUF(v) + used_v_size;
289 end = BUF(v) + total_v_size;
290 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000291
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000292 used_v_size = buf - BUF(v);
293 if (used_v_size != total_v_size) {
294 if (_PyBytes_Resize(&v, used_v_size) < 0) {
295 v = NULL;
296 }
297 }
298 return v;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000299}
300
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000301/* This is a hacked version of Python's fileobject.c:drop_readahead(). */
302static void
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000303Util_DropReadAhead(BZ2FileObject *f)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000304{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000305 if (f->f_buf != NULL) {
306 PyMem_Free(f->f_buf);
307 f->f_buf = NULL;
308 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000309}
310
311/* This is a hacked version of Python's fileobject.c:readahead(). */
312static int
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000313Util_ReadAhead(BZ2FileObject *f, int bufsize)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000314{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000315 int chunksize;
316 int bzerror;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000317
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000318 if (f->f_buf != NULL) {
319 if((f->f_bufend - f->f_bufptr) >= 1)
320 return 0;
321 else
322 Util_DropReadAhead(f);
323 }
324 if (f->mode == MODE_READ_EOF) {
325 f->f_bufptr = f->f_buf;
326 f->f_bufend = f->f_buf;
327 return 0;
328 }
329 if ((f->f_buf = PyMem_Malloc(bufsize)) == NULL) {
330 PyErr_NoMemory();
331 return -1;
332 }
333 Py_BEGIN_ALLOW_THREADS
334 chunksize = BZ2_bzRead(&bzerror, f->fp, f->f_buf, bufsize);
335 Py_END_ALLOW_THREADS
336 f->pos += chunksize;
337 if (bzerror == BZ_STREAM_END) {
338 f->size = f->pos;
339 f->mode = MODE_READ_EOF;
340 } else if (bzerror != BZ_OK) {
341 Util_CatchBZ2Error(bzerror);
342 Util_DropReadAhead(f);
343 return -1;
344 }
345 f->f_bufptr = f->f_buf;
346 f->f_bufend = f->f_buf + chunksize;
347 return 0;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000348}
349
350/* This is a hacked version of Python's
351 * fileobject.c:readahead_get_line_skip(). */
Christian Heimes72b710a2008-05-26 13:28:38 +0000352static PyBytesObject *
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000353Util_ReadAheadGetLineSkip(BZ2FileObject *f, int skip, int bufsize)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000354{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000355 PyBytesObject* s;
356 char *bufptr;
357 char *buf;
358 int len;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000359
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000360 if (f->f_buf == NULL)
361 if (Util_ReadAhead(f, bufsize) < 0)
362 return NULL;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000363
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000364 len = f->f_bufend - f->f_bufptr;
365 if (len == 0)
366 return (PyBytesObject *)
367 PyBytes_FromStringAndSize(NULL, skip);
368 bufptr = memchr(f->f_bufptr, '\n', len);
369 if (bufptr != NULL) {
370 bufptr++; /* Count the '\n' */
371 len = bufptr - f->f_bufptr;
372 s = (PyBytesObject *)
373 PyBytes_FromStringAndSize(NULL, skip+len);
374 if (s == NULL)
375 return NULL;
376 memcpy(PyBytes_AS_STRING(s)+skip, f->f_bufptr, len);
377 f->f_bufptr = bufptr;
378 if (bufptr == f->f_bufend)
379 Util_DropReadAhead(f);
380 } else {
381 bufptr = f->f_bufptr;
382 buf = f->f_buf;
383 f->f_buf = NULL; /* Force new readahead buffer */
384 s = Util_ReadAheadGetLineSkip(f, skip+len,
385 bufsize + (bufsize>>2));
386 if (s == NULL) {
387 PyMem_Free(buf);
388 return NULL;
389 }
390 memcpy(PyBytes_AS_STRING(s)+skip, bufptr, len);
391 PyMem_Free(buf);
392 }
393 return s;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000394}
395
396/* ===================================================================== */
397/* Methods of BZ2File. */
398
399PyDoc_STRVAR(BZ2File_read__doc__,
400"read([size]) -> string\n\
401\n\
402Read at most size uncompressed bytes, returned as a string. If the size\n\
403argument is negative or omitted, read until EOF is reached.\n\
404");
405
406/* This is a hacked version of Python's fileobject.c:file_read(). */
407static PyObject *
408BZ2File_read(BZ2FileObject *self, PyObject *args)
409{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000410 long bytesrequested = -1;
411 size_t bytesread, buffersize, chunksize;
412 int bzerror;
413 PyObject *ret = NULL;
Tim Peterse3228092002-11-09 04:21:44 +0000414
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000415 if (!PyArg_ParseTuple(args, "|l:read", &bytesrequested))
416 return NULL;
Tim Peterse3228092002-11-09 04:21:44 +0000417
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000418 ACQUIRE_LOCK(self);
419 switch (self->mode) {
420 case MODE_READ:
421 break;
422 case MODE_READ_EOF:
423 ret = PyBytes_FromStringAndSize("", 0);
424 goto cleanup;
425 case MODE_CLOSED:
426 PyErr_SetString(PyExc_ValueError,
427 "I/O operation on closed file");
428 goto cleanup;
429 default:
430 PyErr_SetString(PyExc_IOError,
431 "file is not ready for reading");
432 goto cleanup;
433 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000434
Antoine Pitrou7ffa1962010-08-01 20:08:46 +0000435 /* refuse to mix with f.next() */
436 if (check_iterbuffered(self))
437 goto cleanup;
438
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000439 if (bytesrequested < 0)
440 buffersize = Util_NewBufferSize((size_t)0);
441 else
442 buffersize = bytesrequested;
443 if (buffersize > INT_MAX) {
444 PyErr_SetString(PyExc_OverflowError,
445 "requested number of bytes is "
446 "more than a Python string can hold");
447 goto cleanup;
448 }
449 ret = PyBytes_FromStringAndSize((char *)NULL, buffersize);
450 if (ret == NULL || buffersize == 0)
451 goto cleanup;
452 bytesread = 0;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000453
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000454 for (;;) {
455 Py_BEGIN_ALLOW_THREADS
456 chunksize = BZ2_bzRead(&bzerror, self->fp,
457 BUF(ret)+bytesread,
458 buffersize-bytesread);
459 self->pos += chunksize;
460 Py_END_ALLOW_THREADS
461 bytesread += chunksize;
462 if (bzerror == BZ_STREAM_END) {
463 self->size = self->pos;
464 self->mode = MODE_READ_EOF;
465 break;
466 } else if (bzerror != BZ_OK) {
467 Util_CatchBZ2Error(bzerror);
468 Py_DECREF(ret);
469 ret = NULL;
470 goto cleanup;
471 }
472 if (bytesrequested < 0) {
473 buffersize = Util_NewBufferSize(buffersize);
474 if (_PyBytes_Resize(&ret, buffersize) < 0) {
475 ret = NULL;
476 goto cleanup;
477 }
478 } else {
479 break;
480 }
481 }
482 if (bytesread != buffersize) {
483 if (_PyBytes_Resize(&ret, bytesread) < 0) {
484 ret = NULL;
485 }
486 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000487
488cleanup:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000489 RELEASE_LOCK(self);
490 return ret;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000491}
492
493PyDoc_STRVAR(BZ2File_readline__doc__,
494"readline([size]) -> string\n\
495\n\
496Return the next line from the file, as a string, retaining newline.\n\
497A non-negative size argument will limit the maximum number of bytes to\n\
498return (an incomplete line may be returned then). Return an empty\n\
499string at EOF.\n\
500");
501
502static PyObject *
503BZ2File_readline(BZ2FileObject *self, PyObject *args)
504{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000505 PyObject *ret = NULL;
506 int sizehint = -1;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000507
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000508 if (!PyArg_ParseTuple(args, "|i:readline", &sizehint))
509 return NULL;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000510
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000511 ACQUIRE_LOCK(self);
512 switch (self->mode) {
513 case MODE_READ:
514 break;
515 case MODE_READ_EOF:
516 ret = PyBytes_FromStringAndSize("", 0);
517 goto cleanup;
518 case MODE_CLOSED:
519 PyErr_SetString(PyExc_ValueError,
520 "I/O operation on closed file");
521 goto cleanup;
522 default:
523 PyErr_SetString(PyExc_IOError,
524 "file is not ready for reading");
525 goto cleanup;
526 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000527
Antoine Pitrou7ffa1962010-08-01 20:08:46 +0000528 /* refuse to mix with f.next() */
529 if (check_iterbuffered(self))
530 goto cleanup;
531
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000532 if (sizehint == 0)
533 ret = PyBytes_FromStringAndSize("", 0);
534 else
535 ret = Util_GetLine(self, (sizehint < 0) ? 0 : sizehint);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000536
537cleanup:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000538 RELEASE_LOCK(self);
539 return ret;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000540}
541
542PyDoc_STRVAR(BZ2File_readlines__doc__,
543"readlines([size]) -> list\n\
544\n\
545Call readline() repeatedly and return a list of lines read.\n\
546The optional size argument, if given, is an approximate bound on the\n\
547total number of bytes in the lines returned.\n\
548");
549
550/* This is a hacked version of Python's fileobject.c:file_readlines(). */
551static PyObject *
552BZ2File_readlines(BZ2FileObject *self, PyObject *args)
553{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000554 long sizehint = 0;
555 PyObject *list = NULL;
556 PyObject *line;
557 char small_buffer[SMALLCHUNK];
558 char *buffer = small_buffer;
559 size_t buffersize = SMALLCHUNK;
560 PyObject *big_buffer = NULL;
561 size_t nfilled = 0;
562 size_t nread;
563 size_t totalread = 0;
564 char *p, *q, *end;
565 int err;
566 int shortread = 0;
567 int bzerror;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000568
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000569 if (!PyArg_ParseTuple(args, "|l:readlines", &sizehint))
570 return NULL;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000571
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000572 ACQUIRE_LOCK(self);
573 switch (self->mode) {
574 case MODE_READ:
575 break;
576 case MODE_READ_EOF:
577 list = PyList_New(0);
578 goto cleanup;
579 case MODE_CLOSED:
580 PyErr_SetString(PyExc_ValueError,
581 "I/O operation on closed file");
582 goto cleanup;
583 default:
584 PyErr_SetString(PyExc_IOError,
585 "file is not ready for reading");
586 goto cleanup;
587 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000588
Antoine Pitrou7ffa1962010-08-01 20:08:46 +0000589 /* refuse to mix with f.next() */
590 if (check_iterbuffered(self))
591 goto cleanup;
592
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000593 if ((list = PyList_New(0)) == NULL)
594 goto cleanup;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000595
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000596 for (;;) {
597 Py_BEGIN_ALLOW_THREADS
598 nread = BZ2_bzRead(&bzerror, self->fp,
599 buffer+nfilled, buffersize-nfilled);
600 self->pos += nread;
601 Py_END_ALLOW_THREADS
602 if (bzerror == BZ_STREAM_END) {
603 self->size = self->pos;
604 self->mode = MODE_READ_EOF;
605 if (nread == 0) {
606 sizehint = 0;
607 break;
608 }
609 shortread = 1;
610 } else if (bzerror != BZ_OK) {
611 Util_CatchBZ2Error(bzerror);
612 error:
613 Py_DECREF(list);
614 list = NULL;
615 goto cleanup;
616 }
617 totalread += nread;
618 p = memchr(buffer+nfilled, '\n', nread);
619 if (!shortread && p == NULL) {
620 /* Need a larger buffer to fit this line */
621 nfilled += nread;
622 buffersize *= 2;
623 if (buffersize > INT_MAX) {
624 PyErr_SetString(PyExc_OverflowError,
625 "line is longer than a Python string can hold");
626 goto error;
627 }
628 if (big_buffer == NULL) {
629 /* Create the big buffer */
630 big_buffer = PyBytes_FromStringAndSize(
631 NULL, buffersize);
632 if (big_buffer == NULL)
633 goto error;
634 buffer = PyBytes_AS_STRING(big_buffer);
635 memcpy(buffer, small_buffer, nfilled);
636 }
637 else {
638 /* Grow the big buffer */
639 if (_PyBytes_Resize(&big_buffer, buffersize) < 0){
640 big_buffer = NULL;
641 goto error;
642 }
643 buffer = PyBytes_AS_STRING(big_buffer);
644 }
645 continue;
646 }
647 end = buffer+nfilled+nread;
648 q = buffer;
649 while (p != NULL) {
650 /* Process complete lines */
651 p++;
652 line = PyBytes_FromStringAndSize(q, p-q);
653 if (line == NULL)
654 goto error;
655 err = PyList_Append(list, line);
656 Py_DECREF(line);
657 if (err != 0)
658 goto error;
659 q = p;
660 p = memchr(q, '\n', end-q);
661 }
662 /* Move the remaining incomplete line to the start */
663 nfilled = end-q;
664 memmove(buffer, q, nfilled);
665 if (sizehint > 0)
666 if (totalread >= (size_t)sizehint)
667 break;
668 if (shortread) {
669 sizehint = 0;
670 break;
671 }
672 }
673 if (nfilled != 0) {
674 /* Partial last line */
675 line = PyBytes_FromStringAndSize(buffer, nfilled);
676 if (line == NULL)
677 goto error;
678 if (sizehint > 0) {
679 /* Need to complete the last line */
680 PyObject *rest = Util_GetLine(self, 0);
681 if (rest == NULL) {
682 Py_DECREF(line);
683 goto error;
684 }
685 PyBytes_Concat(&line, rest);
686 Py_DECREF(rest);
687 if (line == NULL)
688 goto error;
689 }
690 err = PyList_Append(list, line);
691 Py_DECREF(line);
692 if (err != 0)
693 goto error;
694 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000695
696 cleanup:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000697 RELEASE_LOCK(self);
698 if (big_buffer) {
699 Py_DECREF(big_buffer);
700 }
701 return list;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000702}
703
704PyDoc_STRVAR(BZ2File_write__doc__,
705"write(data) -> None\n\
706\n\
707Write the 'data' string to file. Note that due to buffering, close() may\n\
708be needed before the file on disk reflects the data written.\n\
709");
710
711/* This is a hacked version of Python's fileobject.c:file_write(). */
712static PyObject *
713BZ2File_write(BZ2FileObject *self, PyObject *args)
714{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000715 PyObject *ret = NULL;
716 Py_buffer pbuf;
717 char *buf;
718 int len;
719 int bzerror;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000720
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000721 if (!PyArg_ParseTuple(args, "y*:write", &pbuf))
722 return NULL;
723 buf = pbuf.buf;
724 len = pbuf.len;
Tim Peterse3228092002-11-09 04:21:44 +0000725
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000726 ACQUIRE_LOCK(self);
727 switch (self->mode) {
728 case MODE_WRITE:
729 break;
Tim Peterse3228092002-11-09 04:21:44 +0000730
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000731 case MODE_CLOSED:
732 PyErr_SetString(PyExc_ValueError,
733 "I/O operation on closed file");
734 goto cleanup;
Tim Peterse3228092002-11-09 04:21:44 +0000735
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000736 default:
737 PyErr_SetString(PyExc_IOError,
738 "file is not ready for writing");
739 goto cleanup;
740 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000741
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000742 Py_BEGIN_ALLOW_THREADS
743 BZ2_bzWrite (&bzerror, self->fp, buf, len);
744 self->pos += len;
745 Py_END_ALLOW_THREADS
Tim Peterse3228092002-11-09 04:21:44 +0000746
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000747 if (bzerror != BZ_OK) {
748 Util_CatchBZ2Error(bzerror);
749 goto cleanup;
750 }
Tim Peterse3228092002-11-09 04:21:44 +0000751
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000752 Py_INCREF(Py_None);
753 ret = Py_None;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000754
755cleanup:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000756 PyBuffer_Release(&pbuf);
757 RELEASE_LOCK(self);
758 return ret;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000759}
760
761PyDoc_STRVAR(BZ2File_writelines__doc__,
762"writelines(sequence_of_strings) -> None\n\
763\n\
764Write the sequence of strings to the file. Note that newlines are not\n\
765added. The sequence can be any iterable object producing strings. This is\n\
766equivalent to calling write() for each string.\n\
767");
768
769/* This is a hacked version of Python's fileobject.c:file_writelines(). */
770static PyObject *
771BZ2File_writelines(BZ2FileObject *self, PyObject *seq)
772{
773#define CHUNKSIZE 1000
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000774 PyObject *list = NULL;
775 PyObject *iter = NULL;
776 PyObject *ret = NULL;
777 PyObject *line;
778 int i, j, index, len, islist;
779 int bzerror;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000780
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000781 ACQUIRE_LOCK(self);
782 switch (self->mode) {
783 case MODE_WRITE:
784 break;
Thomas Wouters00ee7ba2006-08-21 19:07:27 +0000785
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000786 case MODE_CLOSED:
787 PyErr_SetString(PyExc_ValueError,
788 "I/O operation on closed file");
789 goto error;
Thomas Wouters00ee7ba2006-08-21 19:07:27 +0000790
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000791 default:
792 PyErr_SetString(PyExc_IOError,
793 "file is not ready for writing");
794 goto error;
795 }
Thomas Wouters00ee7ba2006-08-21 19:07:27 +0000796
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000797 islist = PyList_Check(seq);
798 if (!islist) {
799 iter = PyObject_GetIter(seq);
800 if (iter == NULL) {
801 PyErr_SetString(PyExc_TypeError,
802 "writelines() requires an iterable argument");
803 goto error;
804 }
805 list = PyList_New(CHUNKSIZE);
806 if (list == NULL)
807 goto error;
808 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000809
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000810 /* Strategy: slurp CHUNKSIZE lines into a private list,
811 checking that they are all strings, then write that list
812 without holding the interpreter lock, then come back for more. */
813 for (index = 0; ; index += CHUNKSIZE) {
814 if (islist) {
815 Py_XDECREF(list);
816 list = PyList_GetSlice(seq, index, index+CHUNKSIZE);
817 if (list == NULL)
818 goto error;
819 j = PyList_GET_SIZE(list);
820 }
821 else {
822 for (j = 0; j < CHUNKSIZE; j++) {
823 line = PyIter_Next(iter);
824 if (line == NULL) {
825 if (PyErr_Occurred())
826 goto error;
827 break;
828 }
829 PyList_SetItem(list, j, line);
830 }
831 }
832 if (j == 0)
833 break;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000834
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000835 /* Check that all entries are indeed byte strings. If not,
836 apply the same rules as for file.write() and
837 convert the rets to strings. This is slow, but
838 seems to be the only way since all conversion APIs
839 could potentially execute Python code. */
840 for (i = 0; i < j; i++) {
841 PyObject *v = PyList_GET_ITEM(list, i);
842 if (!PyBytes_Check(v)) {
843 const char *buffer;
844 Py_ssize_t len;
845 if (PyObject_AsCharBuffer(v, &buffer, &len)) {
846 PyErr_SetString(PyExc_TypeError,
847 "writelines() "
848 "argument must be "
849 "a sequence of "
850 "bytes objects");
851 goto error;
852 }
853 line = PyBytes_FromStringAndSize(buffer,
854 len);
855 if (line == NULL)
856 goto error;
857 Py_DECREF(v);
858 PyList_SET_ITEM(list, i, line);
859 }
860 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000861
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000862 /* Since we are releasing the global lock, the
863 following code may *not* execute Python code. */
864 Py_BEGIN_ALLOW_THREADS
865 for (i = 0; i < j; i++) {
866 line = PyList_GET_ITEM(list, i);
867 len = PyBytes_GET_SIZE(line);
868 BZ2_bzWrite (&bzerror, self->fp,
869 PyBytes_AS_STRING(line), len);
870 if (bzerror != BZ_OK) {
871 Py_BLOCK_THREADS
872 Util_CatchBZ2Error(bzerror);
873 goto error;
874 }
875 }
876 Py_END_ALLOW_THREADS
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000877
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000878 if (j < CHUNKSIZE)
879 break;
880 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000881
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000882 Py_INCREF(Py_None);
883 ret = Py_None;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000884
885 error:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000886 RELEASE_LOCK(self);
887 Py_XDECREF(list);
888 Py_XDECREF(iter);
889 return ret;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000890#undef CHUNKSIZE
891}
892
893PyDoc_STRVAR(BZ2File_seek__doc__,
894"seek(offset [, whence]) -> None\n\
895\n\
896Move to new file position. Argument offset is a byte count. Optional\n\
897argument whence defaults to 0 (offset from start of file, offset\n\
898should be >= 0); other values are 1 (move relative to current position,\n\
899positive or negative), and 2 (move relative to end of file, usually\n\
900negative, although many platforms allow seeking beyond the end of a file).\n\
901\n\
902Note that seeking of bz2 files is emulated, and depending on the parameters\n\
903the operation may be extremely slow.\n\
904");
905
906static PyObject *
907BZ2File_seek(BZ2FileObject *self, PyObject *args)
908{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000909 int where = 0;
910 PyObject *offobj;
911 Py_off_t offset;
912 char small_buffer[SMALLCHUNK];
913 char *buffer = small_buffer;
914 size_t buffersize = SMALLCHUNK;
915 Py_off_t bytesread = 0;
916 size_t readsize;
917 int chunksize;
918 int bzerror;
919 PyObject *ret = NULL;
Tim Peterse3228092002-11-09 04:21:44 +0000920
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000921 if (!PyArg_ParseTuple(args, "O|i:seek", &offobj, &where))
922 return NULL;
Georg Brandl33a5f2a2005-08-21 14:16:04 +0000923#if !defined(HAVE_LARGEFILE_SUPPORT)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000924 offset = PyLong_AsLong(offobj);
Georg Brandl33a5f2a2005-08-21 14:16:04 +0000925#else
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000926 offset = PyLong_Check(offobj) ?
927 PyLong_AsLongLong(offobj) : PyLong_AsLong(offobj);
Georg Brandl33a5f2a2005-08-21 14:16:04 +0000928#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000929 if (PyErr_Occurred())
930 return NULL;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000931
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000932 ACQUIRE_LOCK(self);
933 Util_DropReadAhead(self);
934 switch (self->mode) {
935 case MODE_READ:
936 case MODE_READ_EOF:
937 break;
Tim Peterse3228092002-11-09 04:21:44 +0000938
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000939 case MODE_CLOSED:
940 PyErr_SetString(PyExc_ValueError,
941 "I/O operation on closed file");
942 goto cleanup;
Tim Peterse3228092002-11-09 04:21:44 +0000943
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000944 default:
945 PyErr_SetString(PyExc_IOError,
946 "seek works only while reading");
947 goto cleanup;
948 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000949
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000950 if (where == 2) {
951 if (self->size == -1) {
952 assert(self->mode != MODE_READ_EOF);
953 for (;;) {
954 Py_BEGIN_ALLOW_THREADS
955 chunksize = BZ2_bzRead(&bzerror, self->fp,
956 buffer, buffersize);
957 self->pos += chunksize;
958 Py_END_ALLOW_THREADS
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000959
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000960 bytesread += chunksize;
961 if (bzerror == BZ_STREAM_END) {
962 break;
963 } else if (bzerror != BZ_OK) {
964 Util_CatchBZ2Error(bzerror);
965 goto cleanup;
966 }
967 }
968 self->mode = MODE_READ_EOF;
969 self->size = self->pos;
970 bytesread = 0;
971 }
972 offset = self->size + offset;
973 } else if (where == 1) {
974 offset = self->pos + offset;
975 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000976
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000977 /* Before getting here, offset must be the absolute position the file
978 * pointer should be set to. */
Georg Brandl47fab922006-02-18 21:57:25 +0000979
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000980 if (offset >= self->pos) {
981 /* we can move forward */
982 offset -= self->pos;
983 } else {
984 /* we cannot move back, so rewind the stream */
985 BZ2_bzReadClose(&bzerror, self->fp);
986 if (bzerror != BZ_OK) {
987 Util_CatchBZ2Error(bzerror);
988 goto cleanup;
989 }
990 rewind(self->rawfp);
991 self->pos = 0;
992 self->fp = BZ2_bzReadOpen(&bzerror, self->rawfp,
993 0, 0, NULL, 0);
994 if (bzerror != BZ_OK) {
995 Util_CatchBZ2Error(bzerror);
996 goto cleanup;
997 }
998 self->mode = MODE_READ;
999 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001000
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001001 if (offset <= 0 || self->mode == MODE_READ_EOF)
1002 goto exit;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001003
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001004 /* Before getting here, offset must be set to the number of bytes
1005 * to walk forward. */
1006 for (;;) {
1007 if (offset-bytesread > buffersize)
1008 readsize = buffersize;
1009 else
1010 /* offset might be wider that readsize, but the result
1011 * of the subtraction is bound by buffersize (see the
1012 * condition above). buffersize is 8192. */
1013 readsize = (size_t)(offset-bytesread);
1014 Py_BEGIN_ALLOW_THREADS
1015 chunksize = BZ2_bzRead(&bzerror, self->fp, buffer, readsize);
1016 self->pos += chunksize;
1017 Py_END_ALLOW_THREADS
1018 bytesread += chunksize;
1019 if (bzerror == BZ_STREAM_END) {
1020 self->size = self->pos;
1021 self->mode = MODE_READ_EOF;
1022 break;
1023 } else if (bzerror != BZ_OK) {
1024 Util_CatchBZ2Error(bzerror);
1025 goto cleanup;
1026 }
1027 if (bytesread == offset)
1028 break;
1029 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001030
1031exit:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001032 Py_INCREF(Py_None);
1033 ret = Py_None;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001034
1035cleanup:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001036 RELEASE_LOCK(self);
1037 return ret;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001038}
1039
1040PyDoc_STRVAR(BZ2File_tell__doc__,
1041"tell() -> int\n\
1042\n\
1043Return the current file position, an integer (may be a long integer).\n\
1044");
1045
1046static PyObject *
1047BZ2File_tell(BZ2FileObject *self, PyObject *args)
1048{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001049 PyObject *ret = NULL;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001050
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001051 if (self->mode == MODE_CLOSED) {
1052 PyErr_SetString(PyExc_ValueError,
1053 "I/O operation on closed file");
1054 goto cleanup;
1055 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001056
Georg Brandla8bcecc2005-09-03 07:49:53 +00001057#if !defined(HAVE_LARGEFILE_SUPPORT)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001058 ret = PyLong_FromLong(self->pos);
Georg Brandla8bcecc2005-09-03 07:49:53 +00001059#else
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001060 ret = PyLong_FromLongLong(self->pos);
Georg Brandla8bcecc2005-09-03 07:49:53 +00001061#endif
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001062
1063cleanup:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001064 return ret;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001065}
1066
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001067PyDoc_STRVAR(BZ2File_close__doc__,
1068"close() -> None or (perhaps) an integer\n\
1069\n\
1070Close the file. Sets data attribute .closed to true. A closed file\n\
1071cannot be used for further I/O operations. close() may be called more\n\
1072than once without error.\n\
1073");
1074
1075static PyObject *
1076BZ2File_close(BZ2FileObject *self)
1077{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001078 PyObject *ret = NULL;
1079 int bzerror = BZ_OK;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001080
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001081 if (self->mode == MODE_CLOSED) {
1082 Py_RETURN_NONE;
1083 }
Guido van Rossumf09ca142007-06-13 00:03:05 +00001084
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001085 ACQUIRE_LOCK(self);
1086 switch (self->mode) {
1087 case MODE_READ:
1088 case MODE_READ_EOF:
1089 BZ2_bzReadClose(&bzerror, self->fp);
1090 break;
1091 case MODE_WRITE:
1092 BZ2_bzWriteClose(&bzerror, self->fp,
1093 0, NULL, NULL);
1094 break;
1095 }
1096 self->mode = MODE_CLOSED;
1097 fclose(self->rawfp);
1098 self->rawfp = NULL;
1099 if (bzerror == BZ_OK) {
1100 Py_INCREF(Py_None);
1101 ret = Py_None;
1102 }
1103 else {
1104 Util_CatchBZ2Error(bzerror);
1105 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001106
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001107 RELEASE_LOCK(self);
1108 return ret;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001109}
1110
Antoine Pitrou308705e2009-01-10 16:22:51 +00001111PyDoc_STRVAR(BZ2File_enter_doc,
1112"__enter__() -> self.");
1113
1114static PyObject *
1115BZ2File_enter(BZ2FileObject *self)
1116{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001117 if (self->mode == MODE_CLOSED) {
1118 PyErr_SetString(PyExc_ValueError,
1119 "I/O operation on closed file");
1120 return NULL;
1121 }
1122 Py_INCREF(self);
1123 return (PyObject *) self;
Antoine Pitrou308705e2009-01-10 16:22:51 +00001124}
1125
1126PyDoc_STRVAR(BZ2File_exit_doc,
1127"__exit__(*excinfo) -> None. Closes the file.");
1128
1129static PyObject *
1130BZ2File_exit(BZ2FileObject *self, PyObject *args)
1131{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001132 PyObject *ret = PyObject_CallMethod((PyObject *) self, "close", NULL);
1133 if (!ret)
1134 /* If error occurred, pass through */
1135 return NULL;
1136 Py_DECREF(ret);
1137 Py_RETURN_NONE;
Antoine Pitrou308705e2009-01-10 16:22:51 +00001138}
1139
1140
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001141static PyObject *BZ2File_getiter(BZ2FileObject *self);
1142
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001143static PyMethodDef BZ2File_methods[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001144 {"read", (PyCFunction)BZ2File_read, METH_VARARGS, BZ2File_read__doc__},
1145 {"readline", (PyCFunction)BZ2File_readline, METH_VARARGS, BZ2File_readline__doc__},
1146 {"readlines", (PyCFunction)BZ2File_readlines, METH_VARARGS, BZ2File_readlines__doc__},
1147 {"write", (PyCFunction)BZ2File_write, METH_VARARGS, BZ2File_write__doc__},
1148 {"writelines", (PyCFunction)BZ2File_writelines, METH_O, BZ2File_writelines__doc__},
1149 {"seek", (PyCFunction)BZ2File_seek, METH_VARARGS, BZ2File_seek__doc__},
1150 {"tell", (PyCFunction)BZ2File_tell, METH_NOARGS, BZ2File_tell__doc__},
1151 {"close", (PyCFunction)BZ2File_close, METH_NOARGS, BZ2File_close__doc__},
1152 {"__enter__", (PyCFunction)BZ2File_enter, METH_NOARGS, BZ2File_enter_doc},
1153 {"__exit__", (PyCFunction)BZ2File_exit, METH_VARARGS, BZ2File_exit_doc},
1154 {NULL, NULL} /* sentinel */
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001155};
1156
1157
1158/* ===================================================================== */
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001159/* Getters and setters of BZ2File. */
1160
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001161static PyObject *
1162BZ2File_get_closed(BZ2FileObject *self, void *closure)
1163{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001164 return PyLong_FromLong(self->mode == MODE_CLOSED);
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001165}
1166
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001167static PyGetSetDef BZ2File_getset[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001168 {"closed", (getter)BZ2File_get_closed, NULL,
1169 "True if the file is closed"},
1170 {NULL} /* Sentinel */
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001171};
1172
1173
1174/* ===================================================================== */
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001175/* Slot definitions for BZ2File_Type. */
1176
1177static int
1178BZ2File_init(BZ2FileObject *self, PyObject *args, PyObject *kwargs)
1179{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001180 static char *kwlist[] = {"filename", "mode", "buffering",
1181 "compresslevel", 0};
1182 PyObject *name_obj = NULL;
1183 char *name;
1184 char *mode = "r";
1185 int buffering = -1;
1186 int compresslevel = 9;
1187 int bzerror;
1188 int mode_char = 0;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001189
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001190 self->size = -1;
Tim Peterse3228092002-11-09 04:21:44 +00001191
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001192 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O&|sii:BZ2File",
1193 kwlist, PyUnicode_FSConverter, &name_obj,
1194 &mode, &buffering,
1195 &compresslevel))
1196 return -1;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001197
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001198 name = PyBytes_AsString(name_obj);
1199 if (compresslevel < 1 || compresslevel > 9) {
1200 PyErr_SetString(PyExc_ValueError,
1201 "compresslevel must be between 1 and 9");
1202 Py_DECREF(name_obj);
1203 return -1;
1204 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001205
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001206 for (;;) {
1207 int error = 0;
1208 switch (*mode) {
1209 case 'r':
1210 case 'w':
1211 if (mode_char)
1212 error = 1;
1213 mode_char = *mode;
1214 break;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001215
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001216 case 'b':
1217 break;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001218
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001219 default:
1220 error = 1;
1221 break;
1222 }
1223 if (error) {
1224 PyErr_Format(PyExc_ValueError,
1225 "invalid mode char %c", *mode);
1226 Py_DECREF(name_obj);
1227 return -1;
1228 }
1229 mode++;
1230 if (*mode == '\0')
1231 break;
1232 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001233
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001234 if (mode_char == 0) {
1235 mode_char = 'r';
1236 }
Georg Brandl6b95f1d2005-06-03 19:47:00 +00001237
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001238 mode = (mode_char == 'r') ? "rb" : "wb";
Tim Peterse3228092002-11-09 04:21:44 +00001239
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001240 self->rawfp = fopen(name, mode);
1241 Py_DECREF(name_obj);
1242 if (self->rawfp == NULL) {
1243 PyErr_SetFromErrno(PyExc_IOError);
1244 return -1;
1245 }
1246 /* XXX Ignore buffering */
Gustavo Niemeyer49ea7be2002-11-08 14:31:49 +00001247
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001248 /* From now on, we have stuff to dealloc, so jump to error label
1249 * instead of returning */
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001250
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001251#ifdef WITH_THREAD
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001252 self->lock = PyThread_allocate_lock();
1253 if (!self->lock) {
1254 PyErr_SetString(PyExc_MemoryError, "unable to allocate lock");
1255 goto error;
1256 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001257#endif
1258
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001259 if (mode_char == 'r')
1260 self->fp = BZ2_bzReadOpen(&bzerror, self->rawfp,
1261 0, 0, NULL, 0);
1262 else
1263 self->fp = BZ2_bzWriteOpen(&bzerror, self->rawfp,
1264 compresslevel, 0, 0);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001265
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001266 if (bzerror != BZ_OK) {
1267 Util_CatchBZ2Error(bzerror);
1268 goto error;
1269 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001270
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001271 self->mode = (mode_char == 'r') ? MODE_READ : MODE_WRITE;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001272
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001273 return 0;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001274
1275error:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001276 fclose(self->rawfp);
1277 self->rawfp = NULL;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001278#ifdef WITH_THREAD
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001279 if (self->lock) {
1280 PyThread_free_lock(self->lock);
1281 self->lock = NULL;
1282 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001283#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001284 return -1;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001285}
1286
1287static void
1288BZ2File_dealloc(BZ2FileObject *self)
1289{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001290 int bzerror;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001291#ifdef WITH_THREAD
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001292 if (self->lock)
1293 PyThread_free_lock(self->lock);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001294#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001295 switch (self->mode) {
1296 case MODE_READ:
1297 case MODE_READ_EOF:
1298 BZ2_bzReadClose(&bzerror, self->fp);
1299 break;
1300 case MODE_WRITE:
1301 BZ2_bzWriteClose(&bzerror, self->fp,
1302 0, NULL, NULL);
1303 break;
1304 }
1305 Util_DropReadAhead(self);
1306 if (self->rawfp != NULL)
1307 fclose(self->rawfp);
1308 Py_TYPE(self)->tp_free((PyObject *)self);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001309}
1310
1311/* This is a hacked version of Python's fileobject.c:file_getiter(). */
1312static PyObject *
1313BZ2File_getiter(BZ2FileObject *self)
1314{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001315 if (self->mode == MODE_CLOSED) {
1316 PyErr_SetString(PyExc_ValueError,
1317 "I/O operation on closed file");
1318 return NULL;
1319 }
1320 Py_INCREF((PyObject*)self);
1321 return (PyObject *)self;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001322}
1323
1324/* This is a hacked version of Python's fileobject.c:file_iternext(). */
1325#define READAHEAD_BUFSIZE 8192
1326static PyObject *
1327BZ2File_iternext(BZ2FileObject *self)
1328{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001329 PyBytesObject* ret;
1330 ACQUIRE_LOCK(self);
1331 if (self->mode == MODE_CLOSED) {
1332 RELEASE_LOCK(self);
1333 PyErr_SetString(PyExc_ValueError,
1334 "I/O operation on closed file");
1335 return NULL;
1336 }
1337 ret = Util_ReadAheadGetLineSkip(self, 0, READAHEAD_BUFSIZE);
1338 RELEASE_LOCK(self);
1339 if (ret == NULL || PyBytes_GET_SIZE(ret) == 0) {
1340 Py_XDECREF(ret);
1341 return NULL;
1342 }
1343 return (PyObject *)ret;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001344}
1345
1346/* ===================================================================== */
1347/* BZ2File_Type definition. */
1348
1349PyDoc_VAR(BZ2File__doc__) =
1350PyDoc_STR(
1351"BZ2File(name [, mode='r', buffering=0, compresslevel=9]) -> file object\n\
1352\n\
1353Open a bz2 file. The mode can be 'r' or 'w', for reading (default) or\n\
1354writing. When opened for writing, the file will be created if it doesn't\n\
1355exist, and truncated otherwise. If the buffering argument is given, 0 means\n\
1356unbuffered, and larger numbers specify the buffer size. If compresslevel\n\
1357is given, must be a number between 1 and 9.\n\
Guido van Rossum88e860c2007-06-13 01:46:31 +00001358Data read is always returned in bytes; data written ought to be bytes.\n\
1359");
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001360
Gustavo Niemeyer49ea7be2002-11-08 14:31:49 +00001361static PyTypeObject BZ2File_Type = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001362 PyVarObject_HEAD_INIT(NULL, 0)
1363 "bz2.BZ2File", /*tp_name*/
1364 sizeof(BZ2FileObject), /*tp_basicsize*/
1365 0, /*tp_itemsize*/
1366 (destructor)BZ2File_dealloc, /*tp_dealloc*/
1367 0, /*tp_print*/
1368 0, /*tp_getattr*/
1369 0, /*tp_setattr*/
1370 0, /*tp_reserved*/
1371 0, /*tp_repr*/
1372 0, /*tp_as_number*/
1373 0, /*tp_as_sequence*/
1374 0, /*tp_as_mapping*/
1375 0, /*tp_hash*/
1376 0, /*tp_call*/
1377 0, /*tp_str*/
1378 PyObject_GenericGetAttr,/*tp_getattro*/
1379 PyObject_GenericSetAttr,/*tp_setattro*/
1380 0, /*tp_as_buffer*/
1381 Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE, /*tp_flags*/
1382 BZ2File__doc__, /*tp_doc*/
1383 0, /*tp_traverse*/
1384 0, /*tp_clear*/
1385 0, /*tp_richcompare*/
1386 0, /*tp_weaklistoffset*/
1387 (getiterfunc)BZ2File_getiter, /*tp_iter*/
1388 (iternextfunc)BZ2File_iternext, /*tp_iternext*/
1389 BZ2File_methods, /*tp_methods*/
1390 0, /*tp_members*/
1391 BZ2File_getset, /*tp_getset*/
1392 0, /*tp_base*/
1393 0, /*tp_dict*/
1394 0, /*tp_descr_get*/
1395 0, /*tp_descr_set*/
1396 0, /*tp_dictoffset*/
1397 (initproc)BZ2File_init, /*tp_init*/
1398 PyType_GenericAlloc, /*tp_alloc*/
1399 PyType_GenericNew, /*tp_new*/
1400 PyObject_Free, /*tp_free*/
1401 0, /*tp_is_gc*/
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001402};
1403
1404
1405/* ===================================================================== */
1406/* Methods of BZ2Comp. */
1407
1408PyDoc_STRVAR(BZ2Comp_compress__doc__,
1409"compress(data) -> string\n\
1410\n\
1411Provide more data to the compressor object. It will return chunks of\n\
1412compressed data whenever possible. When you've finished providing data\n\
1413to compress, call the flush() method to finish the compression process,\n\
1414and return what is left in the internal buffers.\n\
1415");
1416
1417static PyObject *
1418BZ2Comp_compress(BZ2CompObject *self, PyObject *args)
1419{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001420 Py_buffer pdata;
1421 char *data;
1422 int datasize;
1423 int bufsize = SMALLCHUNK;
1424 PY_LONG_LONG totalout;
1425 PyObject *ret = NULL;
1426 bz_stream *bzs = &self->bzs;
1427 int bzerror;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001428
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001429 if (!PyArg_ParseTuple(args, "y*:compress", &pdata))
1430 return NULL;
1431 data = pdata.buf;
1432 datasize = pdata.len;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001433
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001434 if (datasize == 0) {
1435 PyBuffer_Release(&pdata);
1436 return PyBytes_FromStringAndSize("", 0);
1437 }
Gustavo Niemeyera6e436e2004-02-14 00:02:45 +00001438
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001439 ACQUIRE_LOCK(self);
1440 if (!self->running) {
1441 PyErr_SetString(PyExc_ValueError,
1442 "this object was already flushed");
1443 goto error;
1444 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001445
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001446 ret = PyBytes_FromStringAndSize(NULL, bufsize);
1447 if (!ret)
1448 goto error;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001449
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001450 bzs->next_in = data;
1451 bzs->avail_in = datasize;
1452 bzs->next_out = BUF(ret);
1453 bzs->avail_out = bufsize;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001454
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001455 totalout = BZS_TOTAL_OUT(bzs);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001456
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001457 for (;;) {
1458 Py_BEGIN_ALLOW_THREADS
1459 bzerror = BZ2_bzCompress(bzs, BZ_RUN);
1460 Py_END_ALLOW_THREADS
1461 if (bzerror != BZ_RUN_OK) {
1462 Util_CatchBZ2Error(bzerror);
1463 goto error;
1464 }
1465 if (bzs->avail_in == 0)
1466 break; /* no more input data */
1467 if (bzs->avail_out == 0) {
1468 bufsize = Util_NewBufferSize(bufsize);
1469 if (_PyBytes_Resize(&ret, bufsize) < 0) {
1470 BZ2_bzCompressEnd(bzs);
1471 goto error;
1472 }
1473 bzs->next_out = BUF(ret) + (BZS_TOTAL_OUT(bzs)
1474 - totalout);
1475 bzs->avail_out = bufsize - (bzs->next_out - BUF(ret));
1476 }
1477 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001478
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001479 if (_PyBytes_Resize(&ret,
1480 (Py_ssize_t)(BZS_TOTAL_OUT(bzs) - totalout)) < 0)
1481 goto error;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001482
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001483 RELEASE_LOCK(self);
1484 PyBuffer_Release(&pdata);
1485 return ret;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001486
1487error:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001488 RELEASE_LOCK(self);
1489 PyBuffer_Release(&pdata);
1490 Py_XDECREF(ret);
1491 return NULL;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001492}
1493
1494PyDoc_STRVAR(BZ2Comp_flush__doc__,
1495"flush() -> string\n\
1496\n\
1497Finish the compression process and return what is left in internal buffers.\n\
1498You must not use the compressor object after calling this method.\n\
1499");
1500
1501static PyObject *
1502BZ2Comp_flush(BZ2CompObject *self)
1503{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001504 int bufsize = SMALLCHUNK;
1505 PyObject *ret = NULL;
1506 bz_stream *bzs = &self->bzs;
1507 PY_LONG_LONG totalout;
1508 int bzerror;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001509
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001510 ACQUIRE_LOCK(self);
1511 if (!self->running) {
1512 PyErr_SetString(PyExc_ValueError, "object was already "
1513 "flushed");
1514 goto error;
1515 }
1516 self->running = 0;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001517
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001518 ret = PyBytes_FromStringAndSize(NULL, bufsize);
1519 if (!ret)
1520 goto error;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001521
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001522 bzs->next_out = BUF(ret);
1523 bzs->avail_out = bufsize;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001524
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001525 totalout = BZS_TOTAL_OUT(bzs);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001526
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001527 for (;;) {
1528 Py_BEGIN_ALLOW_THREADS
1529 bzerror = BZ2_bzCompress(bzs, BZ_FINISH);
1530 Py_END_ALLOW_THREADS
1531 if (bzerror == BZ_STREAM_END) {
1532 break;
1533 } else if (bzerror != BZ_FINISH_OK) {
1534 Util_CatchBZ2Error(bzerror);
1535 goto error;
1536 }
1537 if (bzs->avail_out == 0) {
1538 bufsize = Util_NewBufferSize(bufsize);
1539 if (_PyBytes_Resize(&ret, bufsize) < 0)
1540 goto error;
1541 bzs->next_out = BUF(ret);
1542 bzs->next_out = BUF(ret) + (BZS_TOTAL_OUT(bzs)
1543 - totalout);
1544 bzs->avail_out = bufsize - (bzs->next_out - BUF(ret));
1545 }
1546 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001547
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001548 if (bzs->avail_out != 0) {
1549 if (_PyBytes_Resize(&ret,
1550 (Py_ssize_t)(BZS_TOTAL_OUT(bzs) - totalout)) < 0)
1551 goto error;
1552 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001553
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001554 RELEASE_LOCK(self);
1555 return ret;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001556
1557error:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001558 RELEASE_LOCK(self);
1559 Py_XDECREF(ret);
1560 return NULL;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001561}
1562
1563static PyMethodDef BZ2Comp_methods[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001564 {"compress", (PyCFunction)BZ2Comp_compress, METH_VARARGS,
1565 BZ2Comp_compress__doc__},
1566 {"flush", (PyCFunction)BZ2Comp_flush, METH_NOARGS,
1567 BZ2Comp_flush__doc__},
1568 {NULL, NULL} /* sentinel */
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001569};
1570
1571
1572/* ===================================================================== */
1573/* Slot definitions for BZ2Comp_Type. */
1574
1575static int
1576BZ2Comp_init(BZ2CompObject *self, PyObject *args, PyObject *kwargs)
1577{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001578 int compresslevel = 9;
1579 int bzerror;
1580 static char *kwlist[] = {"compresslevel", 0};
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001581
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001582 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|i:BZ2Compressor",
1583 kwlist, &compresslevel))
1584 return -1;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001585
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001586 if (compresslevel < 1 || compresslevel > 9) {
1587 PyErr_SetString(PyExc_ValueError,
1588 "compresslevel must be between 1 and 9");
1589 goto error;
1590 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001591
1592#ifdef WITH_THREAD
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001593 self->lock = PyThread_allocate_lock();
1594 if (!self->lock) {
1595 PyErr_SetString(PyExc_MemoryError, "unable to allocate lock");
1596 goto error;
1597 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001598#endif
1599
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001600 memset(&self->bzs, 0, sizeof(bz_stream));
1601 bzerror = BZ2_bzCompressInit(&self->bzs, compresslevel, 0, 0);
1602 if (bzerror != BZ_OK) {
1603 Util_CatchBZ2Error(bzerror);
1604 goto error;
1605 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001606
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001607 self->running = 1;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001608
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001609 return 0;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001610error:
1611#ifdef WITH_THREAD
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001612 if (self->lock) {
1613 PyThread_free_lock(self->lock);
1614 self->lock = NULL;
1615 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001616#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001617 return -1;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001618}
1619
1620static void
1621BZ2Comp_dealloc(BZ2CompObject *self)
1622{
1623#ifdef WITH_THREAD
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001624 if (self->lock)
1625 PyThread_free_lock(self->lock);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001626#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001627 BZ2_bzCompressEnd(&self->bzs);
1628 Py_TYPE(self)->tp_free((PyObject *)self);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001629}
1630
1631
1632/* ===================================================================== */
1633/* BZ2Comp_Type definition. */
1634
1635PyDoc_STRVAR(BZ2Comp__doc__,
1636"BZ2Compressor([compresslevel=9]) -> compressor object\n\
1637\n\
1638Create a new compressor object. This object may be used to compress\n\
1639data sequentially. If you want to compress data in one shot, use the\n\
1640compress() function instead. The compresslevel parameter, if given,\n\
1641must be a number between 1 and 9.\n\
1642");
1643
Gustavo Niemeyer49ea7be2002-11-08 14:31:49 +00001644static PyTypeObject BZ2Comp_Type = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001645 PyVarObject_HEAD_INIT(NULL, 0)
1646 "bz2.BZ2Compressor", /*tp_name*/
1647 sizeof(BZ2CompObject), /*tp_basicsize*/
1648 0, /*tp_itemsize*/
1649 (destructor)BZ2Comp_dealloc, /*tp_dealloc*/
1650 0, /*tp_print*/
1651 0, /*tp_getattr*/
1652 0, /*tp_setattr*/
1653 0, /*tp_reserved*/
1654 0, /*tp_repr*/
1655 0, /*tp_as_number*/
1656 0, /*tp_as_sequence*/
1657 0, /*tp_as_mapping*/
1658 0, /*tp_hash*/
1659 0, /*tp_call*/
1660 0, /*tp_str*/
1661 PyObject_GenericGetAttr,/*tp_getattro*/
1662 PyObject_GenericSetAttr,/*tp_setattro*/
1663 0, /*tp_as_buffer*/
1664 Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE, /*tp_flags*/
1665 BZ2Comp__doc__, /*tp_doc*/
1666 0, /*tp_traverse*/
1667 0, /*tp_clear*/
1668 0, /*tp_richcompare*/
1669 0, /*tp_weaklistoffset*/
1670 0, /*tp_iter*/
1671 0, /*tp_iternext*/
1672 BZ2Comp_methods, /*tp_methods*/
1673 0, /*tp_members*/
1674 0, /*tp_getset*/
1675 0, /*tp_base*/
1676 0, /*tp_dict*/
1677 0, /*tp_descr_get*/
1678 0, /*tp_descr_set*/
1679 0, /*tp_dictoffset*/
1680 (initproc)BZ2Comp_init, /*tp_init*/
1681 PyType_GenericAlloc, /*tp_alloc*/
1682 PyType_GenericNew, /*tp_new*/
1683 PyObject_Free, /*tp_free*/
1684 0, /*tp_is_gc*/
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001685};
1686
1687
1688/* ===================================================================== */
1689/* Members of BZ2Decomp. */
1690
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001691#undef OFF
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001692#define OFF(x) offsetof(BZ2DecompObject, x)
1693
1694static PyMemberDef BZ2Decomp_members[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001695 {"unused_data", T_OBJECT, OFF(unused_data), READONLY},
1696 {NULL} /* Sentinel */
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001697};
1698
1699
1700/* ===================================================================== */
1701/* Methods of BZ2Decomp. */
1702
1703PyDoc_STRVAR(BZ2Decomp_decompress__doc__,
1704"decompress(data) -> string\n\
1705\n\
1706Provide more data to the decompressor object. It will return chunks\n\
1707of decompressed data whenever possible. If you try to decompress data\n\
1708after the end of stream is found, EOFError will be raised. If any data\n\
1709was found after the end of stream, it'll be ignored and saved in\n\
1710unused_data attribute.\n\
1711");
1712
1713static PyObject *
1714BZ2Decomp_decompress(BZ2DecompObject *self, PyObject *args)
1715{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001716 Py_buffer pdata;
1717 char *data;
1718 int datasize;
1719 int bufsize = SMALLCHUNK;
1720 PY_LONG_LONG totalout;
1721 PyObject *ret = NULL;
1722 bz_stream *bzs = &self->bzs;
1723 int bzerror;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001724
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001725 if (!PyArg_ParseTuple(args, "y*:decompress", &pdata))
1726 return NULL;
1727 data = pdata.buf;
1728 datasize = pdata.len;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001729
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001730 ACQUIRE_LOCK(self);
1731 if (!self->running) {
1732 PyErr_SetString(PyExc_EOFError, "end of stream was "
1733 "already found");
1734 goto error;
1735 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001736
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001737 ret = PyBytes_FromStringAndSize(NULL, bufsize);
1738 if (!ret)
1739 goto error;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001740
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001741 bzs->next_in = data;
1742 bzs->avail_in = datasize;
1743 bzs->next_out = BUF(ret);
1744 bzs->avail_out = bufsize;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001745
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001746 totalout = BZS_TOTAL_OUT(bzs);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001747
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001748 for (;;) {
1749 Py_BEGIN_ALLOW_THREADS
1750 bzerror = BZ2_bzDecompress(bzs);
1751 Py_END_ALLOW_THREADS
1752 if (bzerror == BZ_STREAM_END) {
1753 if (bzs->avail_in != 0) {
1754 Py_DECREF(self->unused_data);
1755 self->unused_data =
1756 PyBytes_FromStringAndSize(bzs->next_in,
1757 bzs->avail_in);
1758 }
1759 self->running = 0;
1760 break;
1761 }
1762 if (bzerror != BZ_OK) {
1763 Util_CatchBZ2Error(bzerror);
1764 goto error;
1765 }
1766 if (bzs->avail_in == 0)
1767 break; /* no more input data */
1768 if (bzs->avail_out == 0) {
1769 bufsize = Util_NewBufferSize(bufsize);
1770 if (_PyBytes_Resize(&ret, bufsize) < 0) {
1771 BZ2_bzDecompressEnd(bzs);
1772 goto error;
1773 }
1774 bzs->next_out = BUF(ret);
1775 bzs->next_out = BUF(ret) + (BZS_TOTAL_OUT(bzs)
1776 - totalout);
1777 bzs->avail_out = bufsize - (bzs->next_out - BUF(ret));
1778 }
1779 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001780
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001781 if (bzs->avail_out != 0) {
1782 if (_PyBytes_Resize(&ret,
1783 (Py_ssize_t)(BZS_TOTAL_OUT(bzs) - totalout)) < 0)
1784 goto error;
1785 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001786
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001787 RELEASE_LOCK(self);
1788 PyBuffer_Release(&pdata);
1789 return ret;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001790
1791error:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001792 RELEASE_LOCK(self);
1793 PyBuffer_Release(&pdata);
1794 Py_XDECREF(ret);
1795 return NULL;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001796}
1797
1798static PyMethodDef BZ2Decomp_methods[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001799 {"decompress", (PyCFunction)BZ2Decomp_decompress, METH_VARARGS, BZ2Decomp_decompress__doc__},
1800 {NULL, NULL} /* sentinel */
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001801};
1802
1803
1804/* ===================================================================== */
1805/* Slot definitions for BZ2Decomp_Type. */
1806
1807static int
1808BZ2Decomp_init(BZ2DecompObject *self, PyObject *args, PyObject *kwargs)
1809{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001810 int bzerror;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001811
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001812 if (!PyArg_ParseTuple(args, ":BZ2Decompressor"))
1813 return -1;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001814
1815#ifdef WITH_THREAD
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001816 self->lock = PyThread_allocate_lock();
1817 if (!self->lock) {
1818 PyErr_SetString(PyExc_MemoryError, "unable to allocate lock");
1819 goto error;
1820 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001821#endif
1822
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001823 self->unused_data = PyBytes_FromStringAndSize("", 0);
1824 if (!self->unused_data)
1825 goto error;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001826
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001827 memset(&self->bzs, 0, sizeof(bz_stream));
1828 bzerror = BZ2_bzDecompressInit(&self->bzs, 0, 0);
1829 if (bzerror != BZ_OK) {
1830 Util_CatchBZ2Error(bzerror);
1831 goto error;
1832 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001833
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001834 self->running = 1;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001835
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001836 return 0;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001837
1838error:
1839#ifdef WITH_THREAD
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001840 if (self->lock) {
1841 PyThread_free_lock(self->lock);
1842 self->lock = NULL;
1843 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001844#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001845 Py_CLEAR(self->unused_data);
1846 return -1;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001847}
1848
1849static void
1850BZ2Decomp_dealloc(BZ2DecompObject *self)
1851{
1852#ifdef WITH_THREAD
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001853 if (self->lock)
1854 PyThread_free_lock(self->lock);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001855#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001856 Py_XDECREF(self->unused_data);
1857 BZ2_bzDecompressEnd(&self->bzs);
1858 Py_TYPE(self)->tp_free((PyObject *)self);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001859}
1860
1861
1862/* ===================================================================== */
1863/* BZ2Decomp_Type definition. */
1864
1865PyDoc_STRVAR(BZ2Decomp__doc__,
1866"BZ2Decompressor() -> decompressor object\n\
1867\n\
1868Create a new decompressor object. This object may be used to decompress\n\
1869data sequentially. If you want to decompress data in one shot, use the\n\
1870decompress() function instead.\n\
1871");
1872
Gustavo Niemeyer49ea7be2002-11-08 14:31:49 +00001873static PyTypeObject BZ2Decomp_Type = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001874 PyVarObject_HEAD_INIT(NULL, 0)
1875 "bz2.BZ2Decompressor", /*tp_name*/
1876 sizeof(BZ2DecompObject), /*tp_basicsize*/
1877 0, /*tp_itemsize*/
1878 (destructor)BZ2Decomp_dealloc, /*tp_dealloc*/
1879 0, /*tp_print*/
1880 0, /*tp_getattr*/
1881 0, /*tp_setattr*/
1882 0, /*tp_reserved*/
1883 0, /*tp_repr*/
1884 0, /*tp_as_number*/
1885 0, /*tp_as_sequence*/
1886 0, /*tp_as_mapping*/
1887 0, /*tp_hash*/
1888 0, /*tp_call*/
1889 0, /*tp_str*/
1890 PyObject_GenericGetAttr,/*tp_getattro*/
1891 PyObject_GenericSetAttr,/*tp_setattro*/
1892 0, /*tp_as_buffer*/
1893 Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE, /*tp_flags*/
1894 BZ2Decomp__doc__, /*tp_doc*/
1895 0, /*tp_traverse*/
1896 0, /*tp_clear*/
1897 0, /*tp_richcompare*/
1898 0, /*tp_weaklistoffset*/
1899 0, /*tp_iter*/
1900 0, /*tp_iternext*/
1901 BZ2Decomp_methods, /*tp_methods*/
1902 BZ2Decomp_members, /*tp_members*/
1903 0, /*tp_getset*/
1904 0, /*tp_base*/
1905 0, /*tp_dict*/
1906 0, /*tp_descr_get*/
1907 0, /*tp_descr_set*/
1908 0, /*tp_dictoffset*/
1909 (initproc)BZ2Decomp_init, /*tp_init*/
1910 PyType_GenericAlloc, /*tp_alloc*/
1911 PyType_GenericNew, /*tp_new*/
1912 PyObject_Free, /*tp_free*/
1913 0, /*tp_is_gc*/
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001914};
1915
1916
1917/* ===================================================================== */
1918/* Module functions. */
1919
1920PyDoc_STRVAR(bz2_compress__doc__,
1921"compress(data [, compresslevel=9]) -> string\n\
1922\n\
1923Compress data in one shot. If you want to compress data sequentially,\n\
1924use an instance of BZ2Compressor instead. The compresslevel parameter, if\n\
1925given, must be a number between 1 and 9.\n\
1926");
1927
1928static PyObject *
1929bz2_compress(PyObject *self, PyObject *args, PyObject *kwargs)
1930{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001931 int compresslevel=9;
1932 Py_buffer pdata;
1933 char *data;
1934 int datasize;
1935 int bufsize;
1936 PyObject *ret = NULL;
1937 bz_stream _bzs;
1938 bz_stream *bzs = &_bzs;
1939 int bzerror;
1940 static char *kwlist[] = {"data", "compresslevel", 0};
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001941
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001942 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "y*|i",
1943 kwlist, &pdata,
1944 &compresslevel))
1945 return NULL;
1946 data = pdata.buf;
1947 datasize = pdata.len;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001948
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001949 if (compresslevel < 1 || compresslevel > 9) {
1950 PyErr_SetString(PyExc_ValueError,
1951 "compresslevel must be between 1 and 9");
1952 PyBuffer_Release(&pdata);
1953 return NULL;
1954 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001955
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001956 /* Conforming to bz2 manual, this is large enough to fit compressed
1957 * data in one shot. We will check it later anyway. */
1958 bufsize = datasize + (datasize/100+1) + 600;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001959
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001960 ret = PyBytes_FromStringAndSize(NULL, bufsize);
1961 if (!ret) {
1962 PyBuffer_Release(&pdata);
1963 return NULL;
1964 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001965
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001966 memset(bzs, 0, sizeof(bz_stream));
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001967
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001968 bzs->next_in = data;
1969 bzs->avail_in = datasize;
1970 bzs->next_out = BUF(ret);
1971 bzs->avail_out = bufsize;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001972
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001973 bzerror = BZ2_bzCompressInit(bzs, compresslevel, 0, 0);
1974 if (bzerror != BZ_OK) {
1975 Util_CatchBZ2Error(bzerror);
1976 PyBuffer_Release(&pdata);
1977 Py_DECREF(ret);
1978 return NULL;
1979 }
Tim Peterse3228092002-11-09 04:21:44 +00001980
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001981 for (;;) {
1982 Py_BEGIN_ALLOW_THREADS
1983 bzerror = BZ2_bzCompress(bzs, BZ_FINISH);
1984 Py_END_ALLOW_THREADS
1985 if (bzerror == BZ_STREAM_END) {
1986 break;
1987 } else if (bzerror != BZ_FINISH_OK) {
1988 BZ2_bzCompressEnd(bzs);
1989 Util_CatchBZ2Error(bzerror);
1990 PyBuffer_Release(&pdata);
1991 Py_DECREF(ret);
1992 return NULL;
1993 }
1994 if (bzs->avail_out == 0) {
1995 bufsize = Util_NewBufferSize(bufsize);
1996 if (_PyBytes_Resize(&ret, bufsize) < 0) {
1997 BZ2_bzCompressEnd(bzs);
1998 PyBuffer_Release(&pdata);
1999 return NULL;
2000 }
2001 bzs->next_out = BUF(ret) + BZS_TOTAL_OUT(bzs);
2002 bzs->avail_out = bufsize - (bzs->next_out - BUF(ret));
2003 }
2004 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002005
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002006 if (bzs->avail_out != 0) {
2007 if (_PyBytes_Resize(&ret, (Py_ssize_t)BZS_TOTAL_OUT(bzs)) < 0) {
2008 ret = NULL;
2009 }
2010 }
2011 BZ2_bzCompressEnd(bzs);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002012
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002013 PyBuffer_Release(&pdata);
2014 return ret;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002015}
2016
2017PyDoc_STRVAR(bz2_decompress__doc__,
2018"decompress(data) -> decompressed data\n\
2019\n\
2020Decompress data in one shot. If you want to decompress data sequentially,\n\
2021use an instance of BZ2Decompressor instead.\n\
2022");
2023
2024static PyObject *
2025bz2_decompress(PyObject *self, PyObject *args)
2026{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002027 Py_buffer pdata;
2028 char *data;
2029 int datasize;
2030 int bufsize = SMALLCHUNK;
2031 PyObject *ret;
2032 bz_stream _bzs;
2033 bz_stream *bzs = &_bzs;
2034 int bzerror;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002035
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002036 if (!PyArg_ParseTuple(args, "y*:decompress", &pdata))
2037 return NULL;
2038 data = pdata.buf;
2039 datasize = pdata.len;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002040
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002041 if (datasize == 0) {
2042 PyBuffer_Release(&pdata);
2043 return PyBytes_FromStringAndSize("", 0);
2044 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002045
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002046 ret = PyBytes_FromStringAndSize(NULL, bufsize);
2047 if (!ret) {
2048 PyBuffer_Release(&pdata);
2049 return NULL;
2050 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002051
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002052 memset(bzs, 0, sizeof(bz_stream));
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002053
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002054 bzs->next_in = data;
2055 bzs->avail_in = datasize;
2056 bzs->next_out = BUF(ret);
2057 bzs->avail_out = bufsize;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002058
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002059 bzerror = BZ2_bzDecompressInit(bzs, 0, 0);
2060 if (bzerror != BZ_OK) {
2061 Util_CatchBZ2Error(bzerror);
2062 Py_DECREF(ret);
2063 PyBuffer_Release(&pdata);
2064 return NULL;
2065 }
Tim Peterse3228092002-11-09 04:21:44 +00002066
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002067 for (;;) {
2068 Py_BEGIN_ALLOW_THREADS
2069 bzerror = BZ2_bzDecompress(bzs);
2070 Py_END_ALLOW_THREADS
2071 if (bzerror == BZ_STREAM_END) {
2072 break;
2073 } else if (bzerror != BZ_OK) {
2074 BZ2_bzDecompressEnd(bzs);
2075 Util_CatchBZ2Error(bzerror);
2076 PyBuffer_Release(&pdata);
2077 Py_DECREF(ret);
2078 return NULL;
2079 }
2080 if (bzs->avail_in == 0) {
2081 BZ2_bzDecompressEnd(bzs);
2082 PyErr_SetString(PyExc_ValueError,
2083 "couldn't find end of stream");
2084 PyBuffer_Release(&pdata);
2085 Py_DECREF(ret);
2086 return NULL;
2087 }
2088 if (bzs->avail_out == 0) {
2089 bufsize = Util_NewBufferSize(bufsize);
2090 if (_PyBytes_Resize(&ret, bufsize) < 0) {
2091 BZ2_bzDecompressEnd(bzs);
2092 PyBuffer_Release(&pdata);
2093 return NULL;
2094 }
2095 bzs->next_out = BUF(ret) + BZS_TOTAL_OUT(bzs);
2096 bzs->avail_out = bufsize - (bzs->next_out - BUF(ret));
2097 }
2098 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002099
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002100 if (bzs->avail_out != 0) {
2101 if (_PyBytes_Resize(&ret, (Py_ssize_t)BZS_TOTAL_OUT(bzs)) < 0) {
2102 ret = NULL;
2103 }
2104 }
2105 BZ2_bzDecompressEnd(bzs);
2106 PyBuffer_Release(&pdata);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002107
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002108 return ret;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002109}
2110
2111static PyMethodDef bz2_methods[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002112 {"compress", (PyCFunction) bz2_compress, METH_VARARGS|METH_KEYWORDS,
2113 bz2_compress__doc__},
2114 {"decompress", (PyCFunction) bz2_decompress, METH_VARARGS,
2115 bz2_decompress__doc__},
2116 {NULL, NULL} /* sentinel */
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002117};
2118
2119/* ===================================================================== */
2120/* Initialization function. */
2121
2122PyDoc_STRVAR(bz2__doc__,
2123"The python bz2 module provides a comprehensive interface for\n\
2124the bz2 compression library. It implements a complete file\n\
2125interface, one shot (de)compression functions, and types for\n\
2126sequential (de)compression.\n\
2127");
2128
Martin v. Löwis1a214512008-06-11 05:26:20 +00002129
2130static struct PyModuleDef bz2module = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002131 PyModuleDef_HEAD_INIT,
2132 "bz2",
2133 bz2__doc__,
2134 -1,
2135 bz2_methods,
2136 NULL,
2137 NULL,
2138 NULL,
2139 NULL
Martin v. Löwis1a214512008-06-11 05:26:20 +00002140};
2141
Neal Norwitz21d896c2003-07-01 20:15:21 +00002142PyMODINIT_FUNC
Martin v. Löwis1a214512008-06-11 05:26:20 +00002143PyInit_bz2(void)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002144{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002145 PyObject *m;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002146
Antoine Pitrou70c60442010-09-23 19:51:39 +00002147 if (PyType_Ready(&BZ2File_Type) < 0)
2148 return NULL;
2149 if (PyType_Ready(&BZ2Comp_Type) < 0)
2150 return NULL;
2151 if (PyType_Ready(&BZ2Decomp_Type) < 0)
2152 return NULL;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002153
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002154 m = PyModule_Create(&bz2module);
2155 if (m == NULL)
2156 return NULL;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002157
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002158 PyModule_AddObject(m, "__author__", PyUnicode_FromString(__author__));
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002159
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002160 Py_INCREF(&BZ2File_Type);
2161 PyModule_AddObject(m, "BZ2File", (PyObject *)&BZ2File_Type);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002162
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002163 Py_INCREF(&BZ2Comp_Type);
2164 PyModule_AddObject(m, "BZ2Compressor", (PyObject *)&BZ2Comp_Type);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002165
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002166 Py_INCREF(&BZ2Decomp_Type);
2167 PyModule_AddObject(m, "BZ2Decompressor", (PyObject *)&BZ2Decomp_Type);
2168 return m;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002169}