blob: 3e55202bd8bae9ed25e42a36056bb47509057321 [file] [log] [blame]
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001/*
2
3python-bz2 - python bz2 library interface
4
5Copyright (c) 2002 Gustavo Niemeyer <niemeyer@conectiva.com>
6Copyright (c) 2002 Python Software Foundation; All Rights Reserved
7
8*/
9
Martin v. Löwise17af7b2002-11-23 09:16:19 +000010#include "Python.h"
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +000011#include <stdio.h>
12#include <bzlib.h>
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +000013#include "structmember.h"
14
15#ifdef WITH_THREAD
16#include "pythread.h"
17#endif
18
19static char __author__[] =
20"The bz2 python module was written by:\n\
21\n\
22 Gustavo Niemeyer <niemeyer@conectiva.com>\n\
23";
24
Georg Brandl33a5f2a2005-08-21 14:16:04 +000025/* Our very own off_t-like type, 64-bit if possible */
26/* copied from Objects/fileobject.c */
27#if !defined(HAVE_LARGEFILE_SUPPORT)
28typedef off_t Py_off_t;
29#elif SIZEOF_OFF_T >= 8
30typedef off_t Py_off_t;
31#elif SIZEOF_FPOS_T >= 8
32typedef fpos_t Py_off_t;
33#else
34#error "Large file support, but neither off_t nor fpos_t is large enough."
35#endif
36
Christian Heimes72b710a2008-05-26 13:28:38 +000037#define BUF(v) PyBytes_AS_STRING(v)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +000038
39#define MODE_CLOSED 0
40#define MODE_READ 1
41#define MODE_READ_EOF 2
42#define MODE_WRITE 3
43
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000044#define BZ2FileObject_Check(v) (Py_TYPE(v) == &BZ2File_Type)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +000045
Gustavo Niemeyer7628f1f2003-04-27 06:25:24 +000046
47#ifdef BZ_CONFIG_ERROR
48
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +000049#if SIZEOF_LONG >= 8
50#define BZS_TOTAL_OUT(bzs) \
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000051 (((long)bzs->total_out_hi32 << 32) + bzs->total_out_lo32)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +000052#elif SIZEOF_LONG_LONG >= 8
53#define BZS_TOTAL_OUT(bzs) \
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000054 (((PY_LONG_LONG)bzs->total_out_hi32 << 32) + bzs->total_out_lo32)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +000055#else
56#define BZS_TOTAL_OUT(bzs) \
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000057 bzs->total_out_lo32
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +000058#endif
59
Gustavo Niemeyer7628f1f2003-04-27 06:25:24 +000060#else /* ! BZ_CONFIG_ERROR */
61
62#define BZ2_bzRead bzRead
63#define BZ2_bzReadOpen bzReadOpen
64#define BZ2_bzReadClose bzReadClose
65#define BZ2_bzWrite bzWrite
66#define BZ2_bzWriteOpen bzWriteOpen
67#define BZ2_bzWriteClose bzWriteClose
68#define BZ2_bzCompress bzCompress
69#define BZ2_bzCompressInit bzCompressInit
70#define BZ2_bzCompressEnd bzCompressEnd
71#define BZ2_bzDecompress bzDecompress
72#define BZ2_bzDecompressInit bzDecompressInit
73#define BZ2_bzDecompressEnd bzDecompressEnd
74
75#define BZS_TOTAL_OUT(bzs) bzs->total_out
76
77#endif /* ! BZ_CONFIG_ERROR */
78
79
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +000080#ifdef WITH_THREAD
Antoine Pitrouc66363f2009-10-27 17:47:14 +000081#define ACQUIRE_LOCK(obj) do { \
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000082 if (!PyThread_acquire_lock(obj->lock, 0)) { \
83 Py_BEGIN_ALLOW_THREADS \
84 PyThread_acquire_lock(obj->lock, 1); \
85 Py_END_ALLOW_THREADS \
86 } } while(0)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +000087#define RELEASE_LOCK(obj) PyThread_release_lock(obj->lock)
88#else
89#define ACQUIRE_LOCK(obj)
90#define RELEASE_LOCK(obj)
91#endif
92
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +000093/* Bits in f_newlinetypes */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000094#define NEWLINE_UNKNOWN 0 /* No newline seen, yet */
95#define NEWLINE_CR 1 /* \r newline seen */
96#define NEWLINE_LF 2 /* \n newline seen */
97#define NEWLINE_CRLF 4 /* \r\n newline seen */
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +000098
99/* ===================================================================== */
100/* Structure definitions. */
101
102typedef struct {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000103 PyObject_HEAD
104 FILE *rawfp;
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000105
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000106 char* f_buf; /* Allocated readahead buffer */
107 char* f_bufend; /* Points after last occupied position */
108 char* f_bufptr; /* Current buffer position */
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000109
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000110 BZFILE *fp;
111 int mode;
112 Py_off_t pos;
113 Py_off_t size;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000114#ifdef WITH_THREAD
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000115 PyThread_type_lock lock;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000116#endif
117} BZ2FileObject;
118
119typedef struct {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000120 PyObject_HEAD
121 bz_stream bzs;
122 int running;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000123#ifdef WITH_THREAD
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000124 PyThread_type_lock lock;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000125#endif
126} BZ2CompObject;
127
128typedef struct {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000129 PyObject_HEAD
130 bz_stream bzs;
131 int running;
132 PyObject *unused_data;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000133#ifdef WITH_THREAD
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000134 PyThread_type_lock lock;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000135#endif
136} BZ2DecompObject;
137
138/* ===================================================================== */
139/* Utility functions. */
140
Antoine Pitrou7ffa1962010-08-01 20:08:46 +0000141/* Refuse regular I/O if there's data in the iteration-buffer.
142 * Mixing them would cause data to arrive out of order, as the read*
143 * methods don't use the iteration buffer. */
144static int
145check_iterbuffered(BZ2FileObject *f)
146{
147 if (f->f_buf != NULL &&
148 (f->f_bufend - f->f_bufptr) > 0 &&
149 f->f_buf[0] != '\0') {
150 PyErr_SetString(PyExc_ValueError,
151 "Mixing iteration and read methods would lose data");
152 return -1;
153 }
154 return 0;
155}
156
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000157static int
158Util_CatchBZ2Error(int bzerror)
159{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000160 int ret = 0;
161 switch(bzerror) {
162 case BZ_OK:
163 case BZ_STREAM_END:
164 break;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000165
Gustavo Niemeyer7628f1f2003-04-27 06:25:24 +0000166#ifdef BZ_CONFIG_ERROR
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000167 case BZ_CONFIG_ERROR:
168 PyErr_SetString(PyExc_SystemError,
169 "the bz2 library was not compiled "
170 "correctly");
171 ret = 1;
172 break;
Gustavo Niemeyer7628f1f2003-04-27 06:25:24 +0000173#endif
Tim Peterse3228092002-11-09 04:21:44 +0000174
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000175 case BZ_PARAM_ERROR:
176 PyErr_SetString(PyExc_ValueError,
177 "the bz2 library has received wrong "
178 "parameters");
179 ret = 1;
180 break;
Tim Peterse3228092002-11-09 04:21:44 +0000181
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000182 case BZ_MEM_ERROR:
183 PyErr_NoMemory();
184 ret = 1;
185 break;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000186
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000187 case BZ_DATA_ERROR:
188 case BZ_DATA_ERROR_MAGIC:
189 PyErr_SetString(PyExc_IOError, "invalid data stream");
190 ret = 1;
191 break;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000192
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000193 case BZ_IO_ERROR:
194 PyErr_SetString(PyExc_IOError, "unknown IO error");
195 ret = 1;
196 break;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000197
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000198 case BZ_UNEXPECTED_EOF:
199 PyErr_SetString(PyExc_EOFError,
200 "compressed file ended before the "
201 "logical end-of-stream was detected");
202 ret = 1;
203 break;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000204
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000205 case BZ_SEQUENCE_ERROR:
206 PyErr_SetString(PyExc_RuntimeError,
207 "wrong sequence of bz2 library "
208 "commands used");
209 ret = 1;
210 break;
211 }
212 return ret;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000213}
214
215#if BUFSIZ < 8192
216#define SMALLCHUNK 8192
217#else
218#define SMALLCHUNK BUFSIZ
219#endif
220
221#if SIZEOF_INT < 4
222#define BIGCHUNK (512 * 32)
223#else
224#define BIGCHUNK (512 * 1024)
225#endif
226
227/* This is a hacked version of Python's fileobject.c:new_buffersize(). */
228static size_t
229Util_NewBufferSize(size_t currentsize)
230{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000231 if (currentsize > SMALLCHUNK) {
232 /* Keep doubling until we reach BIGCHUNK;
233 then keep adding BIGCHUNK. */
234 if (currentsize <= BIGCHUNK)
235 return currentsize + currentsize;
236 else
237 return currentsize + BIGCHUNK;
238 }
239 return currentsize + SMALLCHUNK;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000240}
241
242/* This is a hacked version of Python's fileobject.c:get_line(). */
243static PyObject *
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000244Util_GetLine(BZ2FileObject *f, int n)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000245{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000246 char c;
247 char *buf, *end;
248 size_t total_v_size; /* total # of slots in buffer */
249 size_t used_v_size; /* # used slots in buffer */
250 size_t increment; /* amount to increment the buffer */
251 PyObject *v;
252 int bzerror;
253 int bytes_read;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000254
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000255 total_v_size = n > 0 ? n : 100;
256 v = PyBytes_FromStringAndSize((char *)NULL, total_v_size);
257 if (v == NULL)
258 return NULL;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000259
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000260 buf = BUF(v);
261 end = buf + total_v_size;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000262
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000263 for (;;) {
264 Py_BEGIN_ALLOW_THREADS
265 do {
266 bytes_read = BZ2_bzRead(&bzerror, f->fp, &c, 1);
267 f->pos++;
268 if (bytes_read == 0)
269 break;
270 *buf++ = c;
271 } while (bzerror == BZ_OK && c != '\n' && buf != end);
272 Py_END_ALLOW_THREADS
273 if (bzerror == BZ_STREAM_END) {
274 f->size = f->pos;
275 f->mode = MODE_READ_EOF;
276 break;
277 } else if (bzerror != BZ_OK) {
278 Util_CatchBZ2Error(bzerror);
279 Py_DECREF(v);
280 return NULL;
281 }
282 if (c == '\n')
283 break;
284 /* Must be because buf == end */
285 if (n > 0)
286 break;
287 used_v_size = total_v_size;
288 increment = total_v_size >> 2; /* mild exponential growth */
289 total_v_size += increment;
290 if (total_v_size > INT_MAX) {
291 PyErr_SetString(PyExc_OverflowError,
292 "line is longer than a Python string can hold");
293 Py_DECREF(v);
294 return NULL;
295 }
296 if (_PyBytes_Resize(&v, total_v_size) < 0) {
297 return NULL;
298 }
299 buf = BUF(v) + used_v_size;
300 end = BUF(v) + total_v_size;
301 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000302
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000303 used_v_size = buf - BUF(v);
304 if (used_v_size != total_v_size) {
305 if (_PyBytes_Resize(&v, used_v_size) < 0) {
306 v = NULL;
307 }
308 }
309 return v;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000310}
311
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000312/* This is a hacked version of Python's fileobject.c:drop_readahead(). */
313static void
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000314Util_DropReadAhead(BZ2FileObject *f)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000315{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000316 if (f->f_buf != NULL) {
317 PyMem_Free(f->f_buf);
318 f->f_buf = NULL;
319 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000320}
321
322/* This is a hacked version of Python's fileobject.c:readahead(). */
323static int
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000324Util_ReadAhead(BZ2FileObject *f, int bufsize)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000325{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000326 int chunksize;
327 int bzerror;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000328
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000329 if (f->f_buf != NULL) {
330 if((f->f_bufend - f->f_bufptr) >= 1)
331 return 0;
332 else
333 Util_DropReadAhead(f);
334 }
335 if (f->mode == MODE_READ_EOF) {
336 f->f_bufptr = f->f_buf;
337 f->f_bufend = f->f_buf;
338 return 0;
339 }
340 if ((f->f_buf = PyMem_Malloc(bufsize)) == NULL) {
341 PyErr_NoMemory();
342 return -1;
343 }
344 Py_BEGIN_ALLOW_THREADS
345 chunksize = BZ2_bzRead(&bzerror, f->fp, f->f_buf, bufsize);
346 Py_END_ALLOW_THREADS
347 f->pos += chunksize;
348 if (bzerror == BZ_STREAM_END) {
349 f->size = f->pos;
350 f->mode = MODE_READ_EOF;
351 } else if (bzerror != BZ_OK) {
352 Util_CatchBZ2Error(bzerror);
353 Util_DropReadAhead(f);
354 return -1;
355 }
356 f->f_bufptr = f->f_buf;
357 f->f_bufend = f->f_buf + chunksize;
358 return 0;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000359}
360
361/* This is a hacked version of Python's
362 * fileobject.c:readahead_get_line_skip(). */
Christian Heimes72b710a2008-05-26 13:28:38 +0000363static PyBytesObject *
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000364Util_ReadAheadGetLineSkip(BZ2FileObject *f, int skip, int bufsize)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000365{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000366 PyBytesObject* s;
367 char *bufptr;
368 char *buf;
369 int len;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000370
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000371 if (f->f_buf == NULL)
372 if (Util_ReadAhead(f, bufsize) < 0)
373 return NULL;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000374
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000375 len = f->f_bufend - f->f_bufptr;
376 if (len == 0)
377 return (PyBytesObject *)
378 PyBytes_FromStringAndSize(NULL, skip);
379 bufptr = memchr(f->f_bufptr, '\n', len);
380 if (bufptr != NULL) {
381 bufptr++; /* Count the '\n' */
382 len = bufptr - f->f_bufptr;
383 s = (PyBytesObject *)
384 PyBytes_FromStringAndSize(NULL, skip+len);
385 if (s == NULL)
386 return NULL;
387 memcpy(PyBytes_AS_STRING(s)+skip, f->f_bufptr, len);
388 f->f_bufptr = bufptr;
389 if (bufptr == f->f_bufend)
390 Util_DropReadAhead(f);
391 } else {
392 bufptr = f->f_bufptr;
393 buf = f->f_buf;
394 f->f_buf = NULL; /* Force new readahead buffer */
395 s = Util_ReadAheadGetLineSkip(f, skip+len,
396 bufsize + (bufsize>>2));
397 if (s == NULL) {
398 PyMem_Free(buf);
399 return NULL;
400 }
401 memcpy(PyBytes_AS_STRING(s)+skip, bufptr, len);
402 PyMem_Free(buf);
403 }
404 return s;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000405}
406
407/* ===================================================================== */
408/* Methods of BZ2File. */
409
410PyDoc_STRVAR(BZ2File_read__doc__,
411"read([size]) -> string\n\
412\n\
413Read at most size uncompressed bytes, returned as a string. If the size\n\
414argument is negative or omitted, read until EOF is reached.\n\
415");
416
417/* This is a hacked version of Python's fileobject.c:file_read(). */
418static PyObject *
419BZ2File_read(BZ2FileObject *self, PyObject *args)
420{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000421 long bytesrequested = -1;
422 size_t bytesread, buffersize, chunksize;
423 int bzerror;
424 PyObject *ret = NULL;
Tim Peterse3228092002-11-09 04:21:44 +0000425
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000426 if (!PyArg_ParseTuple(args, "|l:read", &bytesrequested))
427 return NULL;
Tim Peterse3228092002-11-09 04:21:44 +0000428
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000429 ACQUIRE_LOCK(self);
430 switch (self->mode) {
431 case MODE_READ:
432 break;
433 case MODE_READ_EOF:
434 ret = PyBytes_FromStringAndSize("", 0);
435 goto cleanup;
436 case MODE_CLOSED:
437 PyErr_SetString(PyExc_ValueError,
438 "I/O operation on closed file");
439 goto cleanup;
440 default:
441 PyErr_SetString(PyExc_IOError,
442 "file is not ready for reading");
443 goto cleanup;
444 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000445
Antoine Pitrou7ffa1962010-08-01 20:08:46 +0000446 /* refuse to mix with f.next() */
447 if (check_iterbuffered(self))
448 goto cleanup;
449
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000450 if (bytesrequested < 0)
451 buffersize = Util_NewBufferSize((size_t)0);
452 else
453 buffersize = bytesrequested;
454 if (buffersize > INT_MAX) {
455 PyErr_SetString(PyExc_OverflowError,
456 "requested number of bytes is "
457 "more than a Python string can hold");
458 goto cleanup;
459 }
460 ret = PyBytes_FromStringAndSize((char *)NULL, buffersize);
461 if (ret == NULL || buffersize == 0)
462 goto cleanup;
463 bytesread = 0;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000464
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000465 for (;;) {
466 Py_BEGIN_ALLOW_THREADS
467 chunksize = BZ2_bzRead(&bzerror, self->fp,
468 BUF(ret)+bytesread,
469 buffersize-bytesread);
470 self->pos += chunksize;
471 Py_END_ALLOW_THREADS
472 bytesread += chunksize;
473 if (bzerror == BZ_STREAM_END) {
474 self->size = self->pos;
475 self->mode = MODE_READ_EOF;
476 break;
477 } else if (bzerror != BZ_OK) {
478 Util_CatchBZ2Error(bzerror);
479 Py_DECREF(ret);
480 ret = NULL;
481 goto cleanup;
482 }
483 if (bytesrequested < 0) {
484 buffersize = Util_NewBufferSize(buffersize);
485 if (_PyBytes_Resize(&ret, buffersize) < 0) {
486 ret = NULL;
487 goto cleanup;
488 }
489 } else {
490 break;
491 }
492 }
493 if (bytesread != buffersize) {
494 if (_PyBytes_Resize(&ret, bytesread) < 0) {
495 ret = NULL;
496 }
497 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000498
499cleanup:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000500 RELEASE_LOCK(self);
501 return ret;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000502}
503
504PyDoc_STRVAR(BZ2File_readline__doc__,
505"readline([size]) -> string\n\
506\n\
507Return the next line from the file, as a string, retaining newline.\n\
508A non-negative size argument will limit the maximum number of bytes to\n\
509return (an incomplete line may be returned then). Return an empty\n\
510string at EOF.\n\
511");
512
513static PyObject *
514BZ2File_readline(BZ2FileObject *self, PyObject *args)
515{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000516 PyObject *ret = NULL;
517 int sizehint = -1;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000518
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000519 if (!PyArg_ParseTuple(args, "|i:readline", &sizehint))
520 return NULL;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000521
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000522 ACQUIRE_LOCK(self);
523 switch (self->mode) {
524 case MODE_READ:
525 break;
526 case MODE_READ_EOF:
527 ret = PyBytes_FromStringAndSize("", 0);
528 goto cleanup;
529 case MODE_CLOSED:
530 PyErr_SetString(PyExc_ValueError,
531 "I/O operation on closed file");
532 goto cleanup;
533 default:
534 PyErr_SetString(PyExc_IOError,
535 "file is not ready for reading");
536 goto cleanup;
537 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000538
Antoine Pitrou7ffa1962010-08-01 20:08:46 +0000539 /* refuse to mix with f.next() */
540 if (check_iterbuffered(self))
541 goto cleanup;
542
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000543 if (sizehint == 0)
544 ret = PyBytes_FromStringAndSize("", 0);
545 else
546 ret = Util_GetLine(self, (sizehint < 0) ? 0 : sizehint);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000547
548cleanup:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000549 RELEASE_LOCK(self);
550 return ret;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000551}
552
553PyDoc_STRVAR(BZ2File_readlines__doc__,
554"readlines([size]) -> list\n\
555\n\
556Call readline() repeatedly and return a list of lines read.\n\
557The optional size argument, if given, is an approximate bound on the\n\
558total number of bytes in the lines returned.\n\
559");
560
561/* This is a hacked version of Python's fileobject.c:file_readlines(). */
562static PyObject *
563BZ2File_readlines(BZ2FileObject *self, PyObject *args)
564{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000565 long sizehint = 0;
566 PyObject *list = NULL;
567 PyObject *line;
568 char small_buffer[SMALLCHUNK];
569 char *buffer = small_buffer;
570 size_t buffersize = SMALLCHUNK;
571 PyObject *big_buffer = NULL;
572 size_t nfilled = 0;
573 size_t nread;
574 size_t totalread = 0;
575 char *p, *q, *end;
576 int err;
577 int shortread = 0;
578 int bzerror;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000579
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000580 if (!PyArg_ParseTuple(args, "|l:readlines", &sizehint))
581 return NULL;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000582
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000583 ACQUIRE_LOCK(self);
584 switch (self->mode) {
585 case MODE_READ:
586 break;
587 case MODE_READ_EOF:
588 list = PyList_New(0);
589 goto cleanup;
590 case MODE_CLOSED:
591 PyErr_SetString(PyExc_ValueError,
592 "I/O operation on closed file");
593 goto cleanup;
594 default:
595 PyErr_SetString(PyExc_IOError,
596 "file is not ready for reading");
597 goto cleanup;
598 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000599
Antoine Pitrou7ffa1962010-08-01 20:08:46 +0000600 /* refuse to mix with f.next() */
601 if (check_iterbuffered(self))
602 goto cleanup;
603
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000604 if ((list = PyList_New(0)) == NULL)
605 goto cleanup;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000606
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000607 for (;;) {
608 Py_BEGIN_ALLOW_THREADS
609 nread = BZ2_bzRead(&bzerror, self->fp,
610 buffer+nfilled, buffersize-nfilled);
611 self->pos += nread;
612 Py_END_ALLOW_THREADS
613 if (bzerror == BZ_STREAM_END) {
614 self->size = self->pos;
615 self->mode = MODE_READ_EOF;
616 if (nread == 0) {
617 sizehint = 0;
618 break;
619 }
620 shortread = 1;
621 } else if (bzerror != BZ_OK) {
622 Util_CatchBZ2Error(bzerror);
623 error:
624 Py_DECREF(list);
625 list = NULL;
626 goto cleanup;
627 }
628 totalread += nread;
629 p = memchr(buffer+nfilled, '\n', nread);
630 if (!shortread && p == NULL) {
631 /* Need a larger buffer to fit this line */
632 nfilled += nread;
633 buffersize *= 2;
634 if (buffersize > INT_MAX) {
635 PyErr_SetString(PyExc_OverflowError,
636 "line is longer than a Python string can hold");
637 goto error;
638 }
639 if (big_buffer == NULL) {
640 /* Create the big buffer */
641 big_buffer = PyBytes_FromStringAndSize(
642 NULL, buffersize);
643 if (big_buffer == NULL)
644 goto error;
645 buffer = PyBytes_AS_STRING(big_buffer);
646 memcpy(buffer, small_buffer, nfilled);
647 }
648 else {
649 /* Grow the big buffer */
650 if (_PyBytes_Resize(&big_buffer, buffersize) < 0){
651 big_buffer = NULL;
652 goto error;
653 }
654 buffer = PyBytes_AS_STRING(big_buffer);
655 }
656 continue;
657 }
658 end = buffer+nfilled+nread;
659 q = buffer;
660 while (p != NULL) {
661 /* Process complete lines */
662 p++;
663 line = PyBytes_FromStringAndSize(q, p-q);
664 if (line == NULL)
665 goto error;
666 err = PyList_Append(list, line);
667 Py_DECREF(line);
668 if (err != 0)
669 goto error;
670 q = p;
671 p = memchr(q, '\n', end-q);
672 }
673 /* Move the remaining incomplete line to the start */
674 nfilled = end-q;
675 memmove(buffer, q, nfilled);
676 if (sizehint > 0)
677 if (totalread >= (size_t)sizehint)
678 break;
679 if (shortread) {
680 sizehint = 0;
681 break;
682 }
683 }
684 if (nfilled != 0) {
685 /* Partial last line */
686 line = PyBytes_FromStringAndSize(buffer, nfilled);
687 if (line == NULL)
688 goto error;
689 if (sizehint > 0) {
690 /* Need to complete the last line */
691 PyObject *rest = Util_GetLine(self, 0);
692 if (rest == NULL) {
693 Py_DECREF(line);
694 goto error;
695 }
696 PyBytes_Concat(&line, rest);
697 Py_DECREF(rest);
698 if (line == NULL)
699 goto error;
700 }
701 err = PyList_Append(list, line);
702 Py_DECREF(line);
703 if (err != 0)
704 goto error;
705 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000706
707 cleanup:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000708 RELEASE_LOCK(self);
709 if (big_buffer) {
710 Py_DECREF(big_buffer);
711 }
712 return list;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000713}
714
715PyDoc_STRVAR(BZ2File_write__doc__,
716"write(data) -> None\n\
717\n\
718Write the 'data' string to file. Note that due to buffering, close() may\n\
719be needed before the file on disk reflects the data written.\n\
720");
721
722/* This is a hacked version of Python's fileobject.c:file_write(). */
723static PyObject *
724BZ2File_write(BZ2FileObject *self, PyObject *args)
725{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000726 PyObject *ret = NULL;
727 Py_buffer pbuf;
728 char *buf;
729 int len;
730 int bzerror;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000731
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000732 if (!PyArg_ParseTuple(args, "y*:write", &pbuf))
733 return NULL;
734 buf = pbuf.buf;
735 len = pbuf.len;
Tim Peterse3228092002-11-09 04:21:44 +0000736
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000737 ACQUIRE_LOCK(self);
738 switch (self->mode) {
739 case MODE_WRITE:
740 break;
Tim Peterse3228092002-11-09 04:21:44 +0000741
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000742 case MODE_CLOSED:
743 PyErr_SetString(PyExc_ValueError,
744 "I/O operation on closed file");
745 goto cleanup;
Tim Peterse3228092002-11-09 04:21:44 +0000746
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000747 default:
748 PyErr_SetString(PyExc_IOError,
749 "file is not ready for writing");
750 goto cleanup;
751 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000752
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000753 Py_BEGIN_ALLOW_THREADS
754 BZ2_bzWrite (&bzerror, self->fp, buf, len);
755 self->pos += len;
756 Py_END_ALLOW_THREADS
Tim Peterse3228092002-11-09 04:21:44 +0000757
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000758 if (bzerror != BZ_OK) {
759 Util_CatchBZ2Error(bzerror);
760 goto cleanup;
761 }
Tim Peterse3228092002-11-09 04:21:44 +0000762
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000763 Py_INCREF(Py_None);
764 ret = Py_None;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000765
766cleanup:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000767 PyBuffer_Release(&pbuf);
768 RELEASE_LOCK(self);
769 return ret;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000770}
771
772PyDoc_STRVAR(BZ2File_writelines__doc__,
773"writelines(sequence_of_strings) -> None\n\
774\n\
775Write the sequence of strings to the file. Note that newlines are not\n\
776added. The sequence can be any iterable object producing strings. This is\n\
777equivalent to calling write() for each string.\n\
778");
779
780/* This is a hacked version of Python's fileobject.c:file_writelines(). */
781static PyObject *
782BZ2File_writelines(BZ2FileObject *self, PyObject *seq)
783{
784#define CHUNKSIZE 1000
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000785 PyObject *list = NULL;
786 PyObject *iter = NULL;
787 PyObject *ret = NULL;
788 PyObject *line;
789 int i, j, index, len, islist;
790 int bzerror;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000791
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000792 ACQUIRE_LOCK(self);
793 switch (self->mode) {
794 case MODE_WRITE:
795 break;
Thomas Wouters00ee7ba2006-08-21 19:07:27 +0000796
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000797 case MODE_CLOSED:
798 PyErr_SetString(PyExc_ValueError,
799 "I/O operation on closed file");
800 goto error;
Thomas Wouters00ee7ba2006-08-21 19:07:27 +0000801
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000802 default:
803 PyErr_SetString(PyExc_IOError,
804 "file is not ready for writing");
805 goto error;
806 }
Thomas Wouters00ee7ba2006-08-21 19:07:27 +0000807
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000808 islist = PyList_Check(seq);
809 if (!islist) {
810 iter = PyObject_GetIter(seq);
811 if (iter == NULL) {
812 PyErr_SetString(PyExc_TypeError,
813 "writelines() requires an iterable argument");
814 goto error;
815 }
816 list = PyList_New(CHUNKSIZE);
817 if (list == NULL)
818 goto error;
819 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000820
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000821 /* Strategy: slurp CHUNKSIZE lines into a private list,
822 checking that they are all strings, then write that list
823 without holding the interpreter lock, then come back for more. */
824 for (index = 0; ; index += CHUNKSIZE) {
825 if (islist) {
826 Py_XDECREF(list);
827 list = PyList_GetSlice(seq, index, index+CHUNKSIZE);
828 if (list == NULL)
829 goto error;
830 j = PyList_GET_SIZE(list);
831 }
832 else {
833 for (j = 0; j < CHUNKSIZE; j++) {
834 line = PyIter_Next(iter);
835 if (line == NULL) {
836 if (PyErr_Occurred())
837 goto error;
838 break;
839 }
840 PyList_SetItem(list, j, line);
841 }
842 }
843 if (j == 0)
844 break;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000845
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000846 /* Check that all entries are indeed byte strings. If not,
847 apply the same rules as for file.write() and
848 convert the rets to strings. This is slow, but
849 seems to be the only way since all conversion APIs
850 could potentially execute Python code. */
851 for (i = 0; i < j; i++) {
852 PyObject *v = PyList_GET_ITEM(list, i);
853 if (!PyBytes_Check(v)) {
854 const char *buffer;
855 Py_ssize_t len;
856 if (PyObject_AsCharBuffer(v, &buffer, &len)) {
857 PyErr_SetString(PyExc_TypeError,
858 "writelines() "
859 "argument must be "
860 "a sequence of "
861 "bytes objects");
862 goto error;
863 }
864 line = PyBytes_FromStringAndSize(buffer,
865 len);
866 if (line == NULL)
867 goto error;
868 Py_DECREF(v);
869 PyList_SET_ITEM(list, i, line);
870 }
871 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000872
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000873 /* Since we are releasing the global lock, the
874 following code may *not* execute Python code. */
875 Py_BEGIN_ALLOW_THREADS
876 for (i = 0; i < j; i++) {
877 line = PyList_GET_ITEM(list, i);
878 len = PyBytes_GET_SIZE(line);
879 BZ2_bzWrite (&bzerror, self->fp,
880 PyBytes_AS_STRING(line), len);
881 if (bzerror != BZ_OK) {
882 Py_BLOCK_THREADS
883 Util_CatchBZ2Error(bzerror);
884 goto error;
885 }
886 }
887 Py_END_ALLOW_THREADS
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000888
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000889 if (j < CHUNKSIZE)
890 break;
891 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000892
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000893 Py_INCREF(Py_None);
894 ret = Py_None;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000895
896 error:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000897 RELEASE_LOCK(self);
898 Py_XDECREF(list);
899 Py_XDECREF(iter);
900 return ret;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000901#undef CHUNKSIZE
902}
903
904PyDoc_STRVAR(BZ2File_seek__doc__,
905"seek(offset [, whence]) -> None\n\
906\n\
907Move to new file position. Argument offset is a byte count. Optional\n\
908argument whence defaults to 0 (offset from start of file, offset\n\
909should be >= 0); other values are 1 (move relative to current position,\n\
910positive or negative), and 2 (move relative to end of file, usually\n\
911negative, although many platforms allow seeking beyond the end of a file).\n\
912\n\
913Note that seeking of bz2 files is emulated, and depending on the parameters\n\
914the operation may be extremely slow.\n\
915");
916
917static PyObject *
918BZ2File_seek(BZ2FileObject *self, PyObject *args)
919{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000920 int where = 0;
921 PyObject *offobj;
922 Py_off_t offset;
923 char small_buffer[SMALLCHUNK];
924 char *buffer = small_buffer;
925 size_t buffersize = SMALLCHUNK;
926 Py_off_t bytesread = 0;
927 size_t readsize;
928 int chunksize;
929 int bzerror;
930 PyObject *ret = NULL;
Tim Peterse3228092002-11-09 04:21:44 +0000931
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000932 if (!PyArg_ParseTuple(args, "O|i:seek", &offobj, &where))
933 return NULL;
Georg Brandl33a5f2a2005-08-21 14:16:04 +0000934#if !defined(HAVE_LARGEFILE_SUPPORT)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000935 offset = PyLong_AsLong(offobj);
Georg Brandl33a5f2a2005-08-21 14:16:04 +0000936#else
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000937 offset = PyLong_Check(offobj) ?
938 PyLong_AsLongLong(offobj) : PyLong_AsLong(offobj);
Georg Brandl33a5f2a2005-08-21 14:16:04 +0000939#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000940 if (PyErr_Occurred())
941 return NULL;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000942
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000943 ACQUIRE_LOCK(self);
944 Util_DropReadAhead(self);
945 switch (self->mode) {
946 case MODE_READ:
947 case MODE_READ_EOF:
948 break;
Tim Peterse3228092002-11-09 04:21:44 +0000949
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000950 case MODE_CLOSED:
951 PyErr_SetString(PyExc_ValueError,
952 "I/O operation on closed file");
953 goto cleanup;
Tim Peterse3228092002-11-09 04:21:44 +0000954
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000955 default:
956 PyErr_SetString(PyExc_IOError,
957 "seek works only while reading");
958 goto cleanup;
959 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000960
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000961 if (where == 2) {
962 if (self->size == -1) {
963 assert(self->mode != MODE_READ_EOF);
964 for (;;) {
965 Py_BEGIN_ALLOW_THREADS
966 chunksize = BZ2_bzRead(&bzerror, self->fp,
967 buffer, buffersize);
968 self->pos += chunksize;
969 Py_END_ALLOW_THREADS
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000970
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000971 bytesread += chunksize;
972 if (bzerror == BZ_STREAM_END) {
973 break;
974 } else if (bzerror != BZ_OK) {
975 Util_CatchBZ2Error(bzerror);
976 goto cleanup;
977 }
978 }
979 self->mode = MODE_READ_EOF;
980 self->size = self->pos;
981 bytesread = 0;
982 }
983 offset = self->size + offset;
984 } else if (where == 1) {
985 offset = self->pos + offset;
986 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000987
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000988 /* Before getting here, offset must be the absolute position the file
989 * pointer should be set to. */
Georg Brandl47fab922006-02-18 21:57:25 +0000990
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000991 if (offset >= self->pos) {
992 /* we can move forward */
993 offset -= self->pos;
994 } else {
995 /* we cannot move back, so rewind the stream */
996 BZ2_bzReadClose(&bzerror, self->fp);
997 if (bzerror != BZ_OK) {
998 Util_CatchBZ2Error(bzerror);
999 goto cleanup;
1000 }
1001 rewind(self->rawfp);
1002 self->pos = 0;
1003 self->fp = BZ2_bzReadOpen(&bzerror, self->rawfp,
1004 0, 0, NULL, 0);
1005 if (bzerror != BZ_OK) {
1006 Util_CatchBZ2Error(bzerror);
1007 goto cleanup;
1008 }
1009 self->mode = MODE_READ;
1010 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001011
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001012 if (offset <= 0 || self->mode == MODE_READ_EOF)
1013 goto exit;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001014
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001015 /* Before getting here, offset must be set to the number of bytes
1016 * to walk forward. */
1017 for (;;) {
1018 if (offset-bytesread > buffersize)
1019 readsize = buffersize;
1020 else
1021 /* offset might be wider that readsize, but the result
1022 * of the subtraction is bound by buffersize (see the
1023 * condition above). buffersize is 8192. */
1024 readsize = (size_t)(offset-bytesread);
1025 Py_BEGIN_ALLOW_THREADS
1026 chunksize = BZ2_bzRead(&bzerror, self->fp, buffer, readsize);
1027 self->pos += chunksize;
1028 Py_END_ALLOW_THREADS
1029 bytesread += chunksize;
1030 if (bzerror == BZ_STREAM_END) {
1031 self->size = self->pos;
1032 self->mode = MODE_READ_EOF;
1033 break;
1034 } else if (bzerror != BZ_OK) {
1035 Util_CatchBZ2Error(bzerror);
1036 goto cleanup;
1037 }
1038 if (bytesread == offset)
1039 break;
1040 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001041
1042exit:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001043 Py_INCREF(Py_None);
1044 ret = Py_None;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001045
1046cleanup:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001047 RELEASE_LOCK(self);
1048 return ret;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001049}
1050
1051PyDoc_STRVAR(BZ2File_tell__doc__,
1052"tell() -> int\n\
1053\n\
1054Return the current file position, an integer (may be a long integer).\n\
1055");
1056
1057static PyObject *
1058BZ2File_tell(BZ2FileObject *self, PyObject *args)
1059{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001060 PyObject *ret = NULL;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001061
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001062 if (self->mode == MODE_CLOSED) {
1063 PyErr_SetString(PyExc_ValueError,
1064 "I/O operation on closed file");
1065 goto cleanup;
1066 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001067
Georg Brandla8bcecc2005-09-03 07:49:53 +00001068#if !defined(HAVE_LARGEFILE_SUPPORT)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001069 ret = PyLong_FromLong(self->pos);
Georg Brandla8bcecc2005-09-03 07:49:53 +00001070#else
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001071 ret = PyLong_FromLongLong(self->pos);
Georg Brandla8bcecc2005-09-03 07:49:53 +00001072#endif
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001073
1074cleanup:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001075 return ret;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001076}
1077
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001078PyDoc_STRVAR(BZ2File_close__doc__,
1079"close() -> None or (perhaps) an integer\n\
1080\n\
1081Close the file. Sets data attribute .closed to true. A closed file\n\
1082cannot be used for further I/O operations. close() may be called more\n\
1083than once without error.\n\
1084");
1085
1086static PyObject *
1087BZ2File_close(BZ2FileObject *self)
1088{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001089 PyObject *ret = NULL;
1090 int bzerror = BZ_OK;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001091
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001092 if (self->mode == MODE_CLOSED) {
1093 Py_RETURN_NONE;
1094 }
Guido van Rossumf09ca142007-06-13 00:03:05 +00001095
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001096 ACQUIRE_LOCK(self);
1097 switch (self->mode) {
1098 case MODE_READ:
1099 case MODE_READ_EOF:
1100 BZ2_bzReadClose(&bzerror, self->fp);
1101 break;
1102 case MODE_WRITE:
1103 BZ2_bzWriteClose(&bzerror, self->fp,
1104 0, NULL, NULL);
1105 break;
1106 }
1107 self->mode = MODE_CLOSED;
1108 fclose(self->rawfp);
1109 self->rawfp = NULL;
1110 if (bzerror == BZ_OK) {
1111 Py_INCREF(Py_None);
1112 ret = Py_None;
1113 }
1114 else {
1115 Util_CatchBZ2Error(bzerror);
1116 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001117
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001118 RELEASE_LOCK(self);
1119 return ret;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001120}
1121
Antoine Pitrou308705e2009-01-10 16:22:51 +00001122PyDoc_STRVAR(BZ2File_enter_doc,
1123"__enter__() -> self.");
1124
1125static PyObject *
1126BZ2File_enter(BZ2FileObject *self)
1127{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001128 if (self->mode == MODE_CLOSED) {
1129 PyErr_SetString(PyExc_ValueError,
1130 "I/O operation on closed file");
1131 return NULL;
1132 }
1133 Py_INCREF(self);
1134 return (PyObject *) self;
Antoine Pitrou308705e2009-01-10 16:22:51 +00001135}
1136
1137PyDoc_STRVAR(BZ2File_exit_doc,
1138"__exit__(*excinfo) -> None. Closes the file.");
1139
1140static PyObject *
1141BZ2File_exit(BZ2FileObject *self, PyObject *args)
1142{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001143 PyObject *ret = PyObject_CallMethod((PyObject *) self, "close", NULL);
1144 if (!ret)
1145 /* If error occurred, pass through */
1146 return NULL;
1147 Py_DECREF(ret);
1148 Py_RETURN_NONE;
Antoine Pitrou308705e2009-01-10 16:22:51 +00001149}
1150
1151
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001152static PyObject *BZ2File_getiter(BZ2FileObject *self);
1153
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001154static PyMethodDef BZ2File_methods[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001155 {"read", (PyCFunction)BZ2File_read, METH_VARARGS, BZ2File_read__doc__},
1156 {"readline", (PyCFunction)BZ2File_readline, METH_VARARGS, BZ2File_readline__doc__},
1157 {"readlines", (PyCFunction)BZ2File_readlines, METH_VARARGS, BZ2File_readlines__doc__},
1158 {"write", (PyCFunction)BZ2File_write, METH_VARARGS, BZ2File_write__doc__},
1159 {"writelines", (PyCFunction)BZ2File_writelines, METH_O, BZ2File_writelines__doc__},
1160 {"seek", (PyCFunction)BZ2File_seek, METH_VARARGS, BZ2File_seek__doc__},
1161 {"tell", (PyCFunction)BZ2File_tell, METH_NOARGS, BZ2File_tell__doc__},
1162 {"close", (PyCFunction)BZ2File_close, METH_NOARGS, BZ2File_close__doc__},
1163 {"__enter__", (PyCFunction)BZ2File_enter, METH_NOARGS, BZ2File_enter_doc},
1164 {"__exit__", (PyCFunction)BZ2File_exit, METH_VARARGS, BZ2File_exit_doc},
1165 {NULL, NULL} /* sentinel */
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001166};
1167
1168
1169/* ===================================================================== */
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001170/* Getters and setters of BZ2File. */
1171
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001172static PyObject *
1173BZ2File_get_closed(BZ2FileObject *self, void *closure)
1174{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001175 return PyLong_FromLong(self->mode == MODE_CLOSED);
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001176}
1177
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001178static PyGetSetDef BZ2File_getset[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001179 {"closed", (getter)BZ2File_get_closed, NULL,
1180 "True if the file is closed"},
1181 {NULL} /* Sentinel */
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001182};
1183
1184
1185/* ===================================================================== */
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001186/* Slot definitions for BZ2File_Type. */
1187
1188static int
1189BZ2File_init(BZ2FileObject *self, PyObject *args, PyObject *kwargs)
1190{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001191 static char *kwlist[] = {"filename", "mode", "buffering",
1192 "compresslevel", 0};
1193 PyObject *name_obj = NULL;
1194 char *name;
1195 char *mode = "r";
1196 int buffering = -1;
1197 int compresslevel = 9;
1198 int bzerror;
1199 int mode_char = 0;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001200
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001201 self->size = -1;
Tim Peterse3228092002-11-09 04:21:44 +00001202
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001203 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O&|sii:BZ2File",
1204 kwlist, PyUnicode_FSConverter, &name_obj,
1205 &mode, &buffering,
1206 &compresslevel))
1207 return -1;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001208
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001209 name = PyBytes_AsString(name_obj);
1210 if (compresslevel < 1 || compresslevel > 9) {
1211 PyErr_SetString(PyExc_ValueError,
1212 "compresslevel must be between 1 and 9");
1213 Py_DECREF(name_obj);
1214 return -1;
1215 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001216
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001217 for (;;) {
1218 int error = 0;
1219 switch (*mode) {
1220 case 'r':
1221 case 'w':
1222 if (mode_char)
1223 error = 1;
1224 mode_char = *mode;
1225 break;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001226
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001227 case 'b':
1228 break;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001229
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001230 default:
1231 error = 1;
1232 break;
1233 }
1234 if (error) {
1235 PyErr_Format(PyExc_ValueError,
1236 "invalid mode char %c", *mode);
1237 Py_DECREF(name_obj);
1238 return -1;
1239 }
1240 mode++;
1241 if (*mode == '\0')
1242 break;
1243 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001244
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001245 if (mode_char == 0) {
1246 mode_char = 'r';
1247 }
Georg Brandl6b95f1d2005-06-03 19:47:00 +00001248
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001249 mode = (mode_char == 'r') ? "rb" : "wb";
Tim Peterse3228092002-11-09 04:21:44 +00001250
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001251 self->rawfp = fopen(name, mode);
1252 Py_DECREF(name_obj);
1253 if (self->rawfp == NULL) {
1254 PyErr_SetFromErrno(PyExc_IOError);
1255 return -1;
1256 }
1257 /* XXX Ignore buffering */
Gustavo Niemeyer49ea7be2002-11-08 14:31:49 +00001258
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001259 /* From now on, we have stuff to dealloc, so jump to error label
1260 * instead of returning */
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001261
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001262#ifdef WITH_THREAD
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001263 self->lock = PyThread_allocate_lock();
1264 if (!self->lock) {
1265 PyErr_SetString(PyExc_MemoryError, "unable to allocate lock");
1266 goto error;
1267 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001268#endif
1269
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001270 if (mode_char == 'r')
1271 self->fp = BZ2_bzReadOpen(&bzerror, self->rawfp,
1272 0, 0, NULL, 0);
1273 else
1274 self->fp = BZ2_bzWriteOpen(&bzerror, self->rawfp,
1275 compresslevel, 0, 0);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001276
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001277 if (bzerror != BZ_OK) {
1278 Util_CatchBZ2Error(bzerror);
1279 goto error;
1280 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001281
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001282 self->mode = (mode_char == 'r') ? MODE_READ : MODE_WRITE;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001283
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001284 return 0;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001285
1286error:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001287 fclose(self->rawfp);
1288 self->rawfp = NULL;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001289#ifdef WITH_THREAD
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001290 if (self->lock) {
1291 PyThread_free_lock(self->lock);
1292 self->lock = NULL;
1293 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001294#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001295 return -1;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001296}
1297
1298static void
1299BZ2File_dealloc(BZ2FileObject *self)
1300{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001301 int bzerror;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001302#ifdef WITH_THREAD
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001303 if (self->lock)
1304 PyThread_free_lock(self->lock);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001305#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001306 switch (self->mode) {
1307 case MODE_READ:
1308 case MODE_READ_EOF:
1309 BZ2_bzReadClose(&bzerror, self->fp);
1310 break;
1311 case MODE_WRITE:
1312 BZ2_bzWriteClose(&bzerror, self->fp,
1313 0, NULL, NULL);
1314 break;
1315 }
1316 Util_DropReadAhead(self);
1317 if (self->rawfp != NULL)
1318 fclose(self->rawfp);
1319 Py_TYPE(self)->tp_free((PyObject *)self);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001320}
1321
1322/* This is a hacked version of Python's fileobject.c:file_getiter(). */
1323static PyObject *
1324BZ2File_getiter(BZ2FileObject *self)
1325{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001326 if (self->mode == MODE_CLOSED) {
1327 PyErr_SetString(PyExc_ValueError,
1328 "I/O operation on closed file");
1329 return NULL;
1330 }
1331 Py_INCREF((PyObject*)self);
1332 return (PyObject *)self;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001333}
1334
1335/* This is a hacked version of Python's fileobject.c:file_iternext(). */
1336#define READAHEAD_BUFSIZE 8192
1337static PyObject *
1338BZ2File_iternext(BZ2FileObject *self)
1339{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001340 PyBytesObject* ret;
1341 ACQUIRE_LOCK(self);
1342 if (self->mode == MODE_CLOSED) {
1343 RELEASE_LOCK(self);
1344 PyErr_SetString(PyExc_ValueError,
1345 "I/O operation on closed file");
1346 return NULL;
1347 }
1348 ret = Util_ReadAheadGetLineSkip(self, 0, READAHEAD_BUFSIZE);
1349 RELEASE_LOCK(self);
1350 if (ret == NULL || PyBytes_GET_SIZE(ret) == 0) {
1351 Py_XDECREF(ret);
1352 return NULL;
1353 }
1354 return (PyObject *)ret;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001355}
1356
1357/* ===================================================================== */
1358/* BZ2File_Type definition. */
1359
1360PyDoc_VAR(BZ2File__doc__) =
1361PyDoc_STR(
1362"BZ2File(name [, mode='r', buffering=0, compresslevel=9]) -> file object\n\
1363\n\
1364Open a bz2 file. The mode can be 'r' or 'w', for reading (default) or\n\
1365writing. When opened for writing, the file will be created if it doesn't\n\
1366exist, and truncated otherwise. If the buffering argument is given, 0 means\n\
1367unbuffered, and larger numbers specify the buffer size. If compresslevel\n\
1368is given, must be a number between 1 and 9.\n\
Guido van Rossum88e860c2007-06-13 01:46:31 +00001369Data read is always returned in bytes; data written ought to be bytes.\n\
1370");
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001371
Gustavo Niemeyer49ea7be2002-11-08 14:31:49 +00001372static PyTypeObject BZ2File_Type = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001373 PyVarObject_HEAD_INIT(NULL, 0)
1374 "bz2.BZ2File", /*tp_name*/
1375 sizeof(BZ2FileObject), /*tp_basicsize*/
1376 0, /*tp_itemsize*/
1377 (destructor)BZ2File_dealloc, /*tp_dealloc*/
1378 0, /*tp_print*/
1379 0, /*tp_getattr*/
1380 0, /*tp_setattr*/
1381 0, /*tp_reserved*/
1382 0, /*tp_repr*/
1383 0, /*tp_as_number*/
1384 0, /*tp_as_sequence*/
1385 0, /*tp_as_mapping*/
1386 0, /*tp_hash*/
1387 0, /*tp_call*/
1388 0, /*tp_str*/
1389 PyObject_GenericGetAttr,/*tp_getattro*/
1390 PyObject_GenericSetAttr,/*tp_setattro*/
1391 0, /*tp_as_buffer*/
1392 Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE, /*tp_flags*/
1393 BZ2File__doc__, /*tp_doc*/
1394 0, /*tp_traverse*/
1395 0, /*tp_clear*/
1396 0, /*tp_richcompare*/
1397 0, /*tp_weaklistoffset*/
1398 (getiterfunc)BZ2File_getiter, /*tp_iter*/
1399 (iternextfunc)BZ2File_iternext, /*tp_iternext*/
1400 BZ2File_methods, /*tp_methods*/
1401 0, /*tp_members*/
1402 BZ2File_getset, /*tp_getset*/
1403 0, /*tp_base*/
1404 0, /*tp_dict*/
1405 0, /*tp_descr_get*/
1406 0, /*tp_descr_set*/
1407 0, /*tp_dictoffset*/
1408 (initproc)BZ2File_init, /*tp_init*/
1409 PyType_GenericAlloc, /*tp_alloc*/
1410 PyType_GenericNew, /*tp_new*/
1411 PyObject_Free, /*tp_free*/
1412 0, /*tp_is_gc*/
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001413};
1414
1415
1416/* ===================================================================== */
1417/* Methods of BZ2Comp. */
1418
1419PyDoc_STRVAR(BZ2Comp_compress__doc__,
1420"compress(data) -> string\n\
1421\n\
1422Provide more data to the compressor object. It will return chunks of\n\
1423compressed data whenever possible. When you've finished providing data\n\
1424to compress, call the flush() method to finish the compression process,\n\
1425and return what is left in the internal buffers.\n\
1426");
1427
1428static PyObject *
1429BZ2Comp_compress(BZ2CompObject *self, PyObject *args)
1430{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001431 Py_buffer pdata;
1432 char *data;
1433 int datasize;
1434 int bufsize = SMALLCHUNK;
1435 PY_LONG_LONG totalout;
1436 PyObject *ret = NULL;
1437 bz_stream *bzs = &self->bzs;
1438 int bzerror;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001439
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001440 if (!PyArg_ParseTuple(args, "y*:compress", &pdata))
1441 return NULL;
1442 data = pdata.buf;
1443 datasize = pdata.len;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001444
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001445 if (datasize == 0) {
1446 PyBuffer_Release(&pdata);
1447 return PyBytes_FromStringAndSize("", 0);
1448 }
Gustavo Niemeyera6e436e2004-02-14 00:02:45 +00001449
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001450 ACQUIRE_LOCK(self);
1451 if (!self->running) {
1452 PyErr_SetString(PyExc_ValueError,
1453 "this object was already flushed");
1454 goto error;
1455 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001456
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001457 ret = PyBytes_FromStringAndSize(NULL, bufsize);
1458 if (!ret)
1459 goto error;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001460
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001461 bzs->next_in = data;
1462 bzs->avail_in = datasize;
1463 bzs->next_out = BUF(ret);
1464 bzs->avail_out = bufsize;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001465
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001466 totalout = BZS_TOTAL_OUT(bzs);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001467
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001468 for (;;) {
1469 Py_BEGIN_ALLOW_THREADS
1470 bzerror = BZ2_bzCompress(bzs, BZ_RUN);
1471 Py_END_ALLOW_THREADS
1472 if (bzerror != BZ_RUN_OK) {
1473 Util_CatchBZ2Error(bzerror);
1474 goto error;
1475 }
1476 if (bzs->avail_in == 0)
1477 break; /* no more input data */
1478 if (bzs->avail_out == 0) {
1479 bufsize = Util_NewBufferSize(bufsize);
1480 if (_PyBytes_Resize(&ret, bufsize) < 0) {
1481 BZ2_bzCompressEnd(bzs);
1482 goto error;
1483 }
1484 bzs->next_out = BUF(ret) + (BZS_TOTAL_OUT(bzs)
1485 - totalout);
1486 bzs->avail_out = bufsize - (bzs->next_out - BUF(ret));
1487 }
1488 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001489
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001490 if (_PyBytes_Resize(&ret,
1491 (Py_ssize_t)(BZS_TOTAL_OUT(bzs) - totalout)) < 0)
1492 goto error;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001493
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001494 RELEASE_LOCK(self);
1495 PyBuffer_Release(&pdata);
1496 return ret;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001497
1498error:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001499 RELEASE_LOCK(self);
1500 PyBuffer_Release(&pdata);
1501 Py_XDECREF(ret);
1502 return NULL;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001503}
1504
1505PyDoc_STRVAR(BZ2Comp_flush__doc__,
1506"flush() -> string\n\
1507\n\
1508Finish the compression process and return what is left in internal buffers.\n\
1509You must not use the compressor object after calling this method.\n\
1510");
1511
1512static PyObject *
1513BZ2Comp_flush(BZ2CompObject *self)
1514{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001515 int bufsize = SMALLCHUNK;
1516 PyObject *ret = NULL;
1517 bz_stream *bzs = &self->bzs;
1518 PY_LONG_LONG totalout;
1519 int bzerror;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001520
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001521 ACQUIRE_LOCK(self);
1522 if (!self->running) {
1523 PyErr_SetString(PyExc_ValueError, "object was already "
1524 "flushed");
1525 goto error;
1526 }
1527 self->running = 0;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001528
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001529 ret = PyBytes_FromStringAndSize(NULL, bufsize);
1530 if (!ret)
1531 goto error;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001532
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001533 bzs->next_out = BUF(ret);
1534 bzs->avail_out = bufsize;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001535
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001536 totalout = BZS_TOTAL_OUT(bzs);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001537
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001538 for (;;) {
1539 Py_BEGIN_ALLOW_THREADS
1540 bzerror = BZ2_bzCompress(bzs, BZ_FINISH);
1541 Py_END_ALLOW_THREADS
1542 if (bzerror == BZ_STREAM_END) {
1543 break;
1544 } else if (bzerror != BZ_FINISH_OK) {
1545 Util_CatchBZ2Error(bzerror);
1546 goto error;
1547 }
1548 if (bzs->avail_out == 0) {
1549 bufsize = Util_NewBufferSize(bufsize);
1550 if (_PyBytes_Resize(&ret, bufsize) < 0)
1551 goto error;
1552 bzs->next_out = BUF(ret);
1553 bzs->next_out = BUF(ret) + (BZS_TOTAL_OUT(bzs)
1554 - totalout);
1555 bzs->avail_out = bufsize - (bzs->next_out - BUF(ret));
1556 }
1557 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001558
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001559 if (bzs->avail_out != 0) {
1560 if (_PyBytes_Resize(&ret,
1561 (Py_ssize_t)(BZS_TOTAL_OUT(bzs) - totalout)) < 0)
1562 goto error;
1563 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001564
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001565 RELEASE_LOCK(self);
1566 return ret;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001567
1568error:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001569 RELEASE_LOCK(self);
1570 Py_XDECREF(ret);
1571 return NULL;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001572}
1573
1574static PyMethodDef BZ2Comp_methods[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001575 {"compress", (PyCFunction)BZ2Comp_compress, METH_VARARGS,
1576 BZ2Comp_compress__doc__},
1577 {"flush", (PyCFunction)BZ2Comp_flush, METH_NOARGS,
1578 BZ2Comp_flush__doc__},
1579 {NULL, NULL} /* sentinel */
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001580};
1581
1582
1583/* ===================================================================== */
1584/* Slot definitions for BZ2Comp_Type. */
1585
1586static int
1587BZ2Comp_init(BZ2CompObject *self, PyObject *args, PyObject *kwargs)
1588{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001589 int compresslevel = 9;
1590 int bzerror;
1591 static char *kwlist[] = {"compresslevel", 0};
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001592
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001593 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|i:BZ2Compressor",
1594 kwlist, &compresslevel))
1595 return -1;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001596
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001597 if (compresslevel < 1 || compresslevel > 9) {
1598 PyErr_SetString(PyExc_ValueError,
1599 "compresslevel must be between 1 and 9");
1600 goto error;
1601 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001602
1603#ifdef WITH_THREAD
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001604 self->lock = PyThread_allocate_lock();
1605 if (!self->lock) {
1606 PyErr_SetString(PyExc_MemoryError, "unable to allocate lock");
1607 goto error;
1608 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001609#endif
1610
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001611 memset(&self->bzs, 0, sizeof(bz_stream));
1612 bzerror = BZ2_bzCompressInit(&self->bzs, compresslevel, 0, 0);
1613 if (bzerror != BZ_OK) {
1614 Util_CatchBZ2Error(bzerror);
1615 goto error;
1616 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001617
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001618 self->running = 1;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001619
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001620 return 0;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001621error:
1622#ifdef WITH_THREAD
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001623 if (self->lock) {
1624 PyThread_free_lock(self->lock);
1625 self->lock = NULL;
1626 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001627#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001628 return -1;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001629}
1630
1631static void
1632BZ2Comp_dealloc(BZ2CompObject *self)
1633{
1634#ifdef WITH_THREAD
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001635 if (self->lock)
1636 PyThread_free_lock(self->lock);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001637#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001638 BZ2_bzCompressEnd(&self->bzs);
1639 Py_TYPE(self)->tp_free((PyObject *)self);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001640}
1641
1642
1643/* ===================================================================== */
1644/* BZ2Comp_Type definition. */
1645
1646PyDoc_STRVAR(BZ2Comp__doc__,
1647"BZ2Compressor([compresslevel=9]) -> compressor object\n\
1648\n\
1649Create a new compressor object. This object may be used to compress\n\
1650data sequentially. If you want to compress data in one shot, use the\n\
1651compress() function instead. The compresslevel parameter, if given,\n\
1652must be a number between 1 and 9.\n\
1653");
1654
Gustavo Niemeyer49ea7be2002-11-08 14:31:49 +00001655static PyTypeObject BZ2Comp_Type = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001656 PyVarObject_HEAD_INIT(NULL, 0)
1657 "bz2.BZ2Compressor", /*tp_name*/
1658 sizeof(BZ2CompObject), /*tp_basicsize*/
1659 0, /*tp_itemsize*/
1660 (destructor)BZ2Comp_dealloc, /*tp_dealloc*/
1661 0, /*tp_print*/
1662 0, /*tp_getattr*/
1663 0, /*tp_setattr*/
1664 0, /*tp_reserved*/
1665 0, /*tp_repr*/
1666 0, /*tp_as_number*/
1667 0, /*tp_as_sequence*/
1668 0, /*tp_as_mapping*/
1669 0, /*tp_hash*/
1670 0, /*tp_call*/
1671 0, /*tp_str*/
1672 PyObject_GenericGetAttr,/*tp_getattro*/
1673 PyObject_GenericSetAttr,/*tp_setattro*/
1674 0, /*tp_as_buffer*/
1675 Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE, /*tp_flags*/
1676 BZ2Comp__doc__, /*tp_doc*/
1677 0, /*tp_traverse*/
1678 0, /*tp_clear*/
1679 0, /*tp_richcompare*/
1680 0, /*tp_weaklistoffset*/
1681 0, /*tp_iter*/
1682 0, /*tp_iternext*/
1683 BZ2Comp_methods, /*tp_methods*/
1684 0, /*tp_members*/
1685 0, /*tp_getset*/
1686 0, /*tp_base*/
1687 0, /*tp_dict*/
1688 0, /*tp_descr_get*/
1689 0, /*tp_descr_set*/
1690 0, /*tp_dictoffset*/
1691 (initproc)BZ2Comp_init, /*tp_init*/
1692 PyType_GenericAlloc, /*tp_alloc*/
1693 PyType_GenericNew, /*tp_new*/
1694 PyObject_Free, /*tp_free*/
1695 0, /*tp_is_gc*/
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001696};
1697
1698
1699/* ===================================================================== */
1700/* Members of BZ2Decomp. */
1701
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001702#undef OFF
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001703#define OFF(x) offsetof(BZ2DecompObject, x)
1704
1705static PyMemberDef BZ2Decomp_members[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001706 {"unused_data", T_OBJECT, OFF(unused_data), READONLY},
1707 {NULL} /* Sentinel */
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001708};
1709
1710
1711/* ===================================================================== */
1712/* Methods of BZ2Decomp. */
1713
1714PyDoc_STRVAR(BZ2Decomp_decompress__doc__,
1715"decompress(data) -> string\n\
1716\n\
1717Provide more data to the decompressor object. It will return chunks\n\
1718of decompressed data whenever possible. If you try to decompress data\n\
1719after the end of stream is found, EOFError will be raised. If any data\n\
1720was found after the end of stream, it'll be ignored and saved in\n\
1721unused_data attribute.\n\
1722");
1723
1724static PyObject *
1725BZ2Decomp_decompress(BZ2DecompObject *self, PyObject *args)
1726{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001727 Py_buffer pdata;
1728 char *data;
1729 int datasize;
1730 int bufsize = SMALLCHUNK;
1731 PY_LONG_LONG totalout;
1732 PyObject *ret = NULL;
1733 bz_stream *bzs = &self->bzs;
1734 int bzerror;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001735
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001736 if (!PyArg_ParseTuple(args, "y*:decompress", &pdata))
1737 return NULL;
1738 data = pdata.buf;
1739 datasize = pdata.len;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001740
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001741 ACQUIRE_LOCK(self);
1742 if (!self->running) {
1743 PyErr_SetString(PyExc_EOFError, "end of stream was "
1744 "already found");
1745 goto error;
1746 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001747
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001748 ret = PyBytes_FromStringAndSize(NULL, bufsize);
1749 if (!ret)
1750 goto error;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001751
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001752 bzs->next_in = data;
1753 bzs->avail_in = datasize;
1754 bzs->next_out = BUF(ret);
1755 bzs->avail_out = bufsize;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001756
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001757 totalout = BZS_TOTAL_OUT(bzs);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001758
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001759 for (;;) {
1760 Py_BEGIN_ALLOW_THREADS
1761 bzerror = BZ2_bzDecompress(bzs);
1762 Py_END_ALLOW_THREADS
1763 if (bzerror == BZ_STREAM_END) {
1764 if (bzs->avail_in != 0) {
1765 Py_DECREF(self->unused_data);
1766 self->unused_data =
1767 PyBytes_FromStringAndSize(bzs->next_in,
1768 bzs->avail_in);
1769 }
1770 self->running = 0;
1771 break;
1772 }
1773 if (bzerror != BZ_OK) {
1774 Util_CatchBZ2Error(bzerror);
1775 goto error;
1776 }
1777 if (bzs->avail_in == 0)
1778 break; /* no more input data */
1779 if (bzs->avail_out == 0) {
1780 bufsize = Util_NewBufferSize(bufsize);
1781 if (_PyBytes_Resize(&ret, bufsize) < 0) {
1782 BZ2_bzDecompressEnd(bzs);
1783 goto error;
1784 }
1785 bzs->next_out = BUF(ret);
1786 bzs->next_out = BUF(ret) + (BZS_TOTAL_OUT(bzs)
1787 - totalout);
1788 bzs->avail_out = bufsize - (bzs->next_out - BUF(ret));
1789 }
1790 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001791
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001792 if (bzs->avail_out != 0) {
1793 if (_PyBytes_Resize(&ret,
1794 (Py_ssize_t)(BZS_TOTAL_OUT(bzs) - totalout)) < 0)
1795 goto error;
1796 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001797
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001798 RELEASE_LOCK(self);
1799 PyBuffer_Release(&pdata);
1800 return ret;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001801
1802error:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001803 RELEASE_LOCK(self);
1804 PyBuffer_Release(&pdata);
1805 Py_XDECREF(ret);
1806 return NULL;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001807}
1808
1809static PyMethodDef BZ2Decomp_methods[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001810 {"decompress", (PyCFunction)BZ2Decomp_decompress, METH_VARARGS, BZ2Decomp_decompress__doc__},
1811 {NULL, NULL} /* sentinel */
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001812};
1813
1814
1815/* ===================================================================== */
1816/* Slot definitions for BZ2Decomp_Type. */
1817
1818static int
1819BZ2Decomp_init(BZ2DecompObject *self, PyObject *args, PyObject *kwargs)
1820{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001821 int bzerror;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001822
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001823 if (!PyArg_ParseTuple(args, ":BZ2Decompressor"))
1824 return -1;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001825
1826#ifdef WITH_THREAD
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001827 self->lock = PyThread_allocate_lock();
1828 if (!self->lock) {
1829 PyErr_SetString(PyExc_MemoryError, "unable to allocate lock");
1830 goto error;
1831 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001832#endif
1833
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001834 self->unused_data = PyBytes_FromStringAndSize("", 0);
1835 if (!self->unused_data)
1836 goto error;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001837
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001838 memset(&self->bzs, 0, sizeof(bz_stream));
1839 bzerror = BZ2_bzDecompressInit(&self->bzs, 0, 0);
1840 if (bzerror != BZ_OK) {
1841 Util_CatchBZ2Error(bzerror);
1842 goto error;
1843 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001844
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001845 self->running = 1;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001846
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001847 return 0;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001848
1849error:
1850#ifdef WITH_THREAD
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001851 if (self->lock) {
1852 PyThread_free_lock(self->lock);
1853 self->lock = NULL;
1854 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001855#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001856 Py_CLEAR(self->unused_data);
1857 return -1;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001858}
1859
1860static void
1861BZ2Decomp_dealloc(BZ2DecompObject *self)
1862{
1863#ifdef WITH_THREAD
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001864 if (self->lock)
1865 PyThread_free_lock(self->lock);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001866#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001867 Py_XDECREF(self->unused_data);
1868 BZ2_bzDecompressEnd(&self->bzs);
1869 Py_TYPE(self)->tp_free((PyObject *)self);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001870}
1871
1872
1873/* ===================================================================== */
1874/* BZ2Decomp_Type definition. */
1875
1876PyDoc_STRVAR(BZ2Decomp__doc__,
1877"BZ2Decompressor() -> decompressor object\n\
1878\n\
1879Create a new decompressor object. This object may be used to decompress\n\
1880data sequentially. If you want to decompress data in one shot, use the\n\
1881decompress() function instead.\n\
1882");
1883
Gustavo Niemeyer49ea7be2002-11-08 14:31:49 +00001884static PyTypeObject BZ2Decomp_Type = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001885 PyVarObject_HEAD_INIT(NULL, 0)
1886 "bz2.BZ2Decompressor", /*tp_name*/
1887 sizeof(BZ2DecompObject), /*tp_basicsize*/
1888 0, /*tp_itemsize*/
1889 (destructor)BZ2Decomp_dealloc, /*tp_dealloc*/
1890 0, /*tp_print*/
1891 0, /*tp_getattr*/
1892 0, /*tp_setattr*/
1893 0, /*tp_reserved*/
1894 0, /*tp_repr*/
1895 0, /*tp_as_number*/
1896 0, /*tp_as_sequence*/
1897 0, /*tp_as_mapping*/
1898 0, /*tp_hash*/
1899 0, /*tp_call*/
1900 0, /*tp_str*/
1901 PyObject_GenericGetAttr,/*tp_getattro*/
1902 PyObject_GenericSetAttr,/*tp_setattro*/
1903 0, /*tp_as_buffer*/
1904 Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE, /*tp_flags*/
1905 BZ2Decomp__doc__, /*tp_doc*/
1906 0, /*tp_traverse*/
1907 0, /*tp_clear*/
1908 0, /*tp_richcompare*/
1909 0, /*tp_weaklistoffset*/
1910 0, /*tp_iter*/
1911 0, /*tp_iternext*/
1912 BZ2Decomp_methods, /*tp_methods*/
1913 BZ2Decomp_members, /*tp_members*/
1914 0, /*tp_getset*/
1915 0, /*tp_base*/
1916 0, /*tp_dict*/
1917 0, /*tp_descr_get*/
1918 0, /*tp_descr_set*/
1919 0, /*tp_dictoffset*/
1920 (initproc)BZ2Decomp_init, /*tp_init*/
1921 PyType_GenericAlloc, /*tp_alloc*/
1922 PyType_GenericNew, /*tp_new*/
1923 PyObject_Free, /*tp_free*/
1924 0, /*tp_is_gc*/
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001925};
1926
1927
1928/* ===================================================================== */
1929/* Module functions. */
1930
1931PyDoc_STRVAR(bz2_compress__doc__,
1932"compress(data [, compresslevel=9]) -> string\n\
1933\n\
1934Compress data in one shot. If you want to compress data sequentially,\n\
1935use an instance of BZ2Compressor instead. The compresslevel parameter, if\n\
1936given, must be a number between 1 and 9.\n\
1937");
1938
1939static PyObject *
1940bz2_compress(PyObject *self, PyObject *args, PyObject *kwargs)
1941{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001942 int compresslevel=9;
1943 Py_buffer pdata;
1944 char *data;
1945 int datasize;
1946 int bufsize;
1947 PyObject *ret = NULL;
1948 bz_stream _bzs;
1949 bz_stream *bzs = &_bzs;
1950 int bzerror;
1951 static char *kwlist[] = {"data", "compresslevel", 0};
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001952
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001953 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "y*|i",
1954 kwlist, &pdata,
1955 &compresslevel))
1956 return NULL;
1957 data = pdata.buf;
1958 datasize = pdata.len;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001959
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001960 if (compresslevel < 1 || compresslevel > 9) {
1961 PyErr_SetString(PyExc_ValueError,
1962 "compresslevel must be between 1 and 9");
1963 PyBuffer_Release(&pdata);
1964 return NULL;
1965 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001966
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001967 /* Conforming to bz2 manual, this is large enough to fit compressed
1968 * data in one shot. We will check it later anyway. */
1969 bufsize = datasize + (datasize/100+1) + 600;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001970
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001971 ret = PyBytes_FromStringAndSize(NULL, bufsize);
1972 if (!ret) {
1973 PyBuffer_Release(&pdata);
1974 return NULL;
1975 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001976
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001977 memset(bzs, 0, sizeof(bz_stream));
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001978
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001979 bzs->next_in = data;
1980 bzs->avail_in = datasize;
1981 bzs->next_out = BUF(ret);
1982 bzs->avail_out = bufsize;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001983
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001984 bzerror = BZ2_bzCompressInit(bzs, compresslevel, 0, 0);
1985 if (bzerror != BZ_OK) {
1986 Util_CatchBZ2Error(bzerror);
1987 PyBuffer_Release(&pdata);
1988 Py_DECREF(ret);
1989 return NULL;
1990 }
Tim Peterse3228092002-11-09 04:21:44 +00001991
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001992 for (;;) {
1993 Py_BEGIN_ALLOW_THREADS
1994 bzerror = BZ2_bzCompress(bzs, BZ_FINISH);
1995 Py_END_ALLOW_THREADS
1996 if (bzerror == BZ_STREAM_END) {
1997 break;
1998 } else if (bzerror != BZ_FINISH_OK) {
1999 BZ2_bzCompressEnd(bzs);
2000 Util_CatchBZ2Error(bzerror);
2001 PyBuffer_Release(&pdata);
2002 Py_DECREF(ret);
2003 return NULL;
2004 }
2005 if (bzs->avail_out == 0) {
2006 bufsize = Util_NewBufferSize(bufsize);
2007 if (_PyBytes_Resize(&ret, bufsize) < 0) {
2008 BZ2_bzCompressEnd(bzs);
2009 PyBuffer_Release(&pdata);
2010 return NULL;
2011 }
2012 bzs->next_out = BUF(ret) + BZS_TOTAL_OUT(bzs);
2013 bzs->avail_out = bufsize - (bzs->next_out - BUF(ret));
2014 }
2015 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002016
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002017 if (bzs->avail_out != 0) {
2018 if (_PyBytes_Resize(&ret, (Py_ssize_t)BZS_TOTAL_OUT(bzs)) < 0) {
2019 ret = NULL;
2020 }
2021 }
2022 BZ2_bzCompressEnd(bzs);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002023
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002024 PyBuffer_Release(&pdata);
2025 return ret;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002026}
2027
2028PyDoc_STRVAR(bz2_decompress__doc__,
2029"decompress(data) -> decompressed data\n\
2030\n\
2031Decompress data in one shot. If you want to decompress data sequentially,\n\
2032use an instance of BZ2Decompressor instead.\n\
2033");
2034
2035static PyObject *
2036bz2_decompress(PyObject *self, PyObject *args)
2037{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002038 Py_buffer pdata;
2039 char *data;
2040 int datasize;
2041 int bufsize = SMALLCHUNK;
2042 PyObject *ret;
2043 bz_stream _bzs;
2044 bz_stream *bzs = &_bzs;
2045 int bzerror;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002046
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002047 if (!PyArg_ParseTuple(args, "y*:decompress", &pdata))
2048 return NULL;
2049 data = pdata.buf;
2050 datasize = pdata.len;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002051
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002052 if (datasize == 0) {
2053 PyBuffer_Release(&pdata);
2054 return PyBytes_FromStringAndSize("", 0);
2055 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002056
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002057 ret = PyBytes_FromStringAndSize(NULL, bufsize);
2058 if (!ret) {
2059 PyBuffer_Release(&pdata);
2060 return NULL;
2061 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002062
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002063 memset(bzs, 0, sizeof(bz_stream));
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002064
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002065 bzs->next_in = data;
2066 bzs->avail_in = datasize;
2067 bzs->next_out = BUF(ret);
2068 bzs->avail_out = bufsize;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002069
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002070 bzerror = BZ2_bzDecompressInit(bzs, 0, 0);
2071 if (bzerror != BZ_OK) {
2072 Util_CatchBZ2Error(bzerror);
2073 Py_DECREF(ret);
2074 PyBuffer_Release(&pdata);
2075 return NULL;
2076 }
Tim Peterse3228092002-11-09 04:21:44 +00002077
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002078 for (;;) {
2079 Py_BEGIN_ALLOW_THREADS
2080 bzerror = BZ2_bzDecompress(bzs);
2081 Py_END_ALLOW_THREADS
2082 if (bzerror == BZ_STREAM_END) {
2083 break;
2084 } else if (bzerror != BZ_OK) {
2085 BZ2_bzDecompressEnd(bzs);
2086 Util_CatchBZ2Error(bzerror);
2087 PyBuffer_Release(&pdata);
2088 Py_DECREF(ret);
2089 return NULL;
2090 }
2091 if (bzs->avail_in == 0) {
2092 BZ2_bzDecompressEnd(bzs);
2093 PyErr_SetString(PyExc_ValueError,
2094 "couldn't find end of stream");
2095 PyBuffer_Release(&pdata);
2096 Py_DECREF(ret);
2097 return NULL;
2098 }
2099 if (bzs->avail_out == 0) {
2100 bufsize = Util_NewBufferSize(bufsize);
2101 if (_PyBytes_Resize(&ret, bufsize) < 0) {
2102 BZ2_bzDecompressEnd(bzs);
2103 PyBuffer_Release(&pdata);
2104 return NULL;
2105 }
2106 bzs->next_out = BUF(ret) + BZS_TOTAL_OUT(bzs);
2107 bzs->avail_out = bufsize - (bzs->next_out - BUF(ret));
2108 }
2109 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002110
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002111 if (bzs->avail_out != 0) {
2112 if (_PyBytes_Resize(&ret, (Py_ssize_t)BZS_TOTAL_OUT(bzs)) < 0) {
2113 ret = NULL;
2114 }
2115 }
2116 BZ2_bzDecompressEnd(bzs);
2117 PyBuffer_Release(&pdata);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002118
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002119 return ret;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002120}
2121
2122static PyMethodDef bz2_methods[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002123 {"compress", (PyCFunction) bz2_compress, METH_VARARGS|METH_KEYWORDS,
2124 bz2_compress__doc__},
2125 {"decompress", (PyCFunction) bz2_decompress, METH_VARARGS,
2126 bz2_decompress__doc__},
2127 {NULL, NULL} /* sentinel */
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002128};
2129
2130/* ===================================================================== */
2131/* Initialization function. */
2132
2133PyDoc_STRVAR(bz2__doc__,
2134"The python bz2 module provides a comprehensive interface for\n\
2135the bz2 compression library. It implements a complete file\n\
2136interface, one shot (de)compression functions, and types for\n\
2137sequential (de)compression.\n\
2138");
2139
Martin v. Löwis1a214512008-06-11 05:26:20 +00002140
2141static struct PyModuleDef bz2module = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002142 PyModuleDef_HEAD_INIT,
2143 "bz2",
2144 bz2__doc__,
2145 -1,
2146 bz2_methods,
2147 NULL,
2148 NULL,
2149 NULL,
2150 NULL
Martin v. Löwis1a214512008-06-11 05:26:20 +00002151};
2152
Neal Norwitz21d896c2003-07-01 20:15:21 +00002153PyMODINIT_FUNC
Martin v. Löwis1a214512008-06-11 05:26:20 +00002154PyInit_bz2(void)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002155{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002156 PyObject *m;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002157
Antoine Pitrou70c60442010-09-23 19:51:39 +00002158 if (PyType_Ready(&BZ2File_Type) < 0)
2159 return NULL;
2160 if (PyType_Ready(&BZ2Comp_Type) < 0)
2161 return NULL;
2162 if (PyType_Ready(&BZ2Decomp_Type) < 0)
2163 return NULL;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002164
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002165 m = PyModule_Create(&bz2module);
2166 if (m == NULL)
2167 return NULL;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002168
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002169 PyModule_AddObject(m, "__author__", PyUnicode_FromString(__author__));
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002170
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002171 Py_INCREF(&BZ2File_Type);
2172 PyModule_AddObject(m, "BZ2File", (PyObject *)&BZ2File_Type);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002173
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002174 Py_INCREF(&BZ2Comp_Type);
2175 PyModule_AddObject(m, "BZ2Compressor", (PyObject *)&BZ2Comp_Type);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002176
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00002177 Py_INCREF(&BZ2Decomp_Type);
2178 PyModule_AddObject(m, "BZ2Decompressor", (PyObject *)&BZ2Decomp_Type);
2179 return m;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002180}