blob: dcb5dc67e2a9f3e301dbee4ec1df5a6000d5c912 [file] [log] [blame]
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001/*
2
3python-bz2 - python bz2 library interface
4
5Copyright (c) 2002 Gustavo Niemeyer <niemeyer@conectiva.com>
6Copyright (c) 2002 Python Software Foundation; All Rights Reserved
7
8*/
9
Martin v. Löwise17af7b2002-11-23 09:16:19 +000010#include "Python.h"
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +000011#include <stdio.h>
12#include <bzlib.h>
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +000013#include "structmember.h"
14
15#ifdef WITH_THREAD
16#include "pythread.h"
17#endif
18
19static char __author__[] =
20"The bz2 python module was written by:\n\
21\n\
22 Gustavo Niemeyer <niemeyer@conectiva.com>\n\
23";
24
Georg Brandl33a5f2a2005-08-21 14:16:04 +000025/* Our very own off_t-like type, 64-bit if possible */
26/* copied from Objects/fileobject.c */
27#if !defined(HAVE_LARGEFILE_SUPPORT)
28typedef off_t Py_off_t;
29#elif SIZEOF_OFF_T >= 8
30typedef off_t Py_off_t;
31#elif SIZEOF_FPOS_T >= 8
32typedef fpos_t Py_off_t;
33#else
34#error "Large file support, but neither off_t nor fpos_t is large enough."
35#endif
36
Gregory P. Smithdd96db62008-06-09 04:58:54 +000037#define BUF(v) PyString_AS_STRING((PyStringObject *)v)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +000038
39#define MODE_CLOSED 0
40#define MODE_READ 1
41#define MODE_READ_EOF 2
42#define MODE_WRITE 3
43
Antoine Pitrouc7c96a92010-05-09 15:15:40 +000044#define BZ2FileObject_Check(v) (Py_TYPE(v) == &BZ2File_Type)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +000045
Gustavo Niemeyer7628f1f2003-04-27 06:25:24 +000046
47#ifdef BZ_CONFIG_ERROR
48
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +000049#if SIZEOF_LONG >= 8
50#define BZS_TOTAL_OUT(bzs) \
Antoine Pitrouc7c96a92010-05-09 15:15:40 +000051 (((long)bzs->total_out_hi32 << 32) + bzs->total_out_lo32)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +000052#elif SIZEOF_LONG_LONG >= 8
53#define BZS_TOTAL_OUT(bzs) \
Antoine Pitrouc7c96a92010-05-09 15:15:40 +000054 (((PY_LONG_LONG)bzs->total_out_hi32 << 32) + bzs->total_out_lo32)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +000055#else
56#define BZS_TOTAL_OUT(bzs) \
Antoine Pitrouc7c96a92010-05-09 15:15:40 +000057 bzs->total_out_lo32
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +000058#endif
59
Gustavo Niemeyer7628f1f2003-04-27 06:25:24 +000060#else /* ! BZ_CONFIG_ERROR */
61
62#define BZ2_bzRead bzRead
63#define BZ2_bzReadOpen bzReadOpen
64#define BZ2_bzReadClose bzReadClose
65#define BZ2_bzWrite bzWrite
66#define BZ2_bzWriteOpen bzWriteOpen
67#define BZ2_bzWriteClose bzWriteClose
68#define BZ2_bzCompress bzCompress
69#define BZ2_bzCompressInit bzCompressInit
70#define BZ2_bzCompressEnd bzCompressEnd
71#define BZ2_bzDecompress bzDecompress
72#define BZ2_bzDecompressInit bzDecompressInit
73#define BZ2_bzDecompressEnd bzDecompressEnd
74
75#define BZS_TOTAL_OUT(bzs) bzs->total_out
76
77#endif /* ! BZ_CONFIG_ERROR */
78
79
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +000080#ifdef WITH_THREAD
Antoine Pitrouf3a0ff62009-10-27 17:46:09 +000081#define ACQUIRE_LOCK(obj) do { \
Antoine Pitrouc7c96a92010-05-09 15:15:40 +000082 if (!PyThread_acquire_lock(obj->lock, 0)) { \
83 Py_BEGIN_ALLOW_THREADS \
84 PyThread_acquire_lock(obj->lock, 1); \
85 Py_END_ALLOW_THREADS \
86 } } while(0)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +000087#define RELEASE_LOCK(obj) PyThread_release_lock(obj->lock)
88#else
89#define ACQUIRE_LOCK(obj)
90#define RELEASE_LOCK(obj)
91#endif
92
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +000093/* Bits in f_newlinetypes */
Antoine Pitrouc7c96a92010-05-09 15:15:40 +000094#define NEWLINE_UNKNOWN 0 /* No newline seen, yet */
95#define NEWLINE_CR 1 /* \r newline seen */
96#define NEWLINE_LF 2 /* \n newline seen */
97#define NEWLINE_CRLF 4 /* \r\n newline seen */
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +000098
99/* ===================================================================== */
100/* Structure definitions. */
101
102typedef struct {
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000103 PyObject_HEAD
104 PyObject *file;
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000105
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000106 char* f_buf; /* Allocated readahead buffer */
107 char* f_bufend; /* Points after last occupied position */
108 char* f_bufptr; /* Current buffer position */
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000109
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000110 int f_softspace; /* Flag used by 'print' command */
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000111
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000112 int f_univ_newline; /* Handle any newline convention */
113 int f_newlinetypes; /* Types of newlines seen */
114 int f_skipnextlf; /* Skip next \n */
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000115
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000116 BZFILE *fp;
117 int mode;
118 Py_off_t pos;
119 Py_off_t size;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000120#ifdef WITH_THREAD
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000121 PyThread_type_lock lock;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000122#endif
123} BZ2FileObject;
124
125typedef struct {
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000126 PyObject_HEAD
127 bz_stream bzs;
128 int running;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000129#ifdef WITH_THREAD
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000130 PyThread_type_lock lock;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000131#endif
132} BZ2CompObject;
133
134typedef struct {
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000135 PyObject_HEAD
136 bz_stream bzs;
137 int running;
138 PyObject *unused_data;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000139#ifdef WITH_THREAD
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000140 PyThread_type_lock lock;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000141#endif
142} BZ2DecompObject;
143
144/* ===================================================================== */
145/* Utility functions. */
146
147static int
148Util_CatchBZ2Error(int bzerror)
149{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000150 int ret = 0;
151 switch(bzerror) {
152 case BZ_OK:
153 case BZ_STREAM_END:
154 break;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000155
Gustavo Niemeyer7628f1f2003-04-27 06:25:24 +0000156#ifdef BZ_CONFIG_ERROR
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000157 case BZ_CONFIG_ERROR:
158 PyErr_SetString(PyExc_SystemError,
159 "the bz2 library was not compiled "
160 "correctly");
161 ret = 1;
162 break;
Gustavo Niemeyer7628f1f2003-04-27 06:25:24 +0000163#endif
Tim Peterse3228092002-11-09 04:21:44 +0000164
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000165 case BZ_PARAM_ERROR:
166 PyErr_SetString(PyExc_ValueError,
167 "the bz2 library has received wrong "
168 "parameters");
169 ret = 1;
170 break;
Tim Peterse3228092002-11-09 04:21:44 +0000171
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000172 case BZ_MEM_ERROR:
173 PyErr_NoMemory();
174 ret = 1;
175 break;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000176
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000177 case BZ_DATA_ERROR:
178 case BZ_DATA_ERROR_MAGIC:
179 PyErr_SetString(PyExc_IOError, "invalid data stream");
180 ret = 1;
181 break;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000182
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000183 case BZ_IO_ERROR:
184 PyErr_SetString(PyExc_IOError, "unknown IO error");
185 ret = 1;
186 break;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000187
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000188 case BZ_UNEXPECTED_EOF:
189 PyErr_SetString(PyExc_EOFError,
190 "compressed file ended before the "
191 "logical end-of-stream was detected");
192 ret = 1;
193 break;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000194
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000195 case BZ_SEQUENCE_ERROR:
196 PyErr_SetString(PyExc_RuntimeError,
197 "wrong sequence of bz2 library "
198 "commands used");
199 ret = 1;
200 break;
201 }
202 return ret;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000203}
204
205#if BUFSIZ < 8192
206#define SMALLCHUNK 8192
207#else
208#define SMALLCHUNK BUFSIZ
209#endif
210
211#if SIZEOF_INT < 4
212#define BIGCHUNK (512 * 32)
213#else
214#define BIGCHUNK (512 * 1024)
215#endif
216
217/* This is a hacked version of Python's fileobject.c:new_buffersize(). */
218static size_t
219Util_NewBufferSize(size_t currentsize)
220{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000221 if (currentsize > SMALLCHUNK) {
222 /* Keep doubling until we reach BIGCHUNK;
223 then keep adding BIGCHUNK. */
224 if (currentsize <= BIGCHUNK)
225 return currentsize + currentsize;
226 else
227 return currentsize + BIGCHUNK;
228 }
229 return currentsize + SMALLCHUNK;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000230}
231
232/* This is a hacked version of Python's fileobject.c:get_line(). */
233static PyObject *
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000234Util_GetLine(BZ2FileObject *f, int n)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000235{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000236 char c;
237 char *buf, *end;
238 size_t total_v_size; /* total # of slots in buffer */
239 size_t used_v_size; /* # used slots in buffer */
240 size_t increment; /* amount to increment the buffer */
241 PyObject *v;
242 int bzerror;
243 int bytes_read;
244 int newlinetypes = f->f_newlinetypes;
245 int skipnextlf = f->f_skipnextlf;
246 int univ_newline = f->f_univ_newline;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000247
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000248 total_v_size = n > 0 ? n : 100;
249 v = PyString_FromStringAndSize((char *)NULL, total_v_size);
250 if (v == NULL)
251 return NULL;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000252
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000253 buf = BUF(v);
254 end = buf + total_v_size;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000255
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000256 for (;;) {
257 Py_BEGIN_ALLOW_THREADS
258 while (buf != end) {
259 bytes_read = BZ2_bzRead(&bzerror, f->fp, &c, 1);
260 f->pos++;
261 if (bytes_read == 0) break;
262 if (univ_newline) {
263 if (skipnextlf) {
264 skipnextlf = 0;
265 if (c == '\n') {
266 /* Seeing a \n here with skipnextlf true means we
267 * saw a \r before.
268 */
269 newlinetypes |= NEWLINE_CRLF;
270 if (bzerror != BZ_OK) break;
271 bytes_read = BZ2_bzRead(&bzerror, f->fp, &c, 1);
272 f->pos++;
273 if (bytes_read == 0) break;
274 } else {
275 newlinetypes |= NEWLINE_CR;
276 }
277 }
278 if (c == '\r') {
279 skipnextlf = 1;
280 c = '\n';
281 } else if (c == '\n')
282 newlinetypes |= NEWLINE_LF;
283 }
284 *buf++ = c;
285 if (bzerror != BZ_OK || c == '\n') break;
286 }
287 if (univ_newline && bzerror == BZ_STREAM_END && skipnextlf)
288 newlinetypes |= NEWLINE_CR;
289 Py_END_ALLOW_THREADS
290 f->f_newlinetypes = newlinetypes;
291 f->f_skipnextlf = skipnextlf;
292 if (bzerror == BZ_STREAM_END) {
293 f->size = f->pos;
294 f->mode = MODE_READ_EOF;
295 break;
296 } else if (bzerror != BZ_OK) {
297 Util_CatchBZ2Error(bzerror);
298 Py_DECREF(v);
299 return NULL;
300 }
301 if (c == '\n')
302 break;
303 /* Must be because buf == end */
304 if (n > 0)
305 break;
306 used_v_size = total_v_size;
307 increment = total_v_size >> 2; /* mild exponential growth */
308 total_v_size += increment;
309 if (total_v_size > INT_MAX) {
310 PyErr_SetString(PyExc_OverflowError,
311 "line is longer than a Python string can hold");
312 Py_DECREF(v);
313 return NULL;
314 }
315 if (_PyString_Resize(&v, total_v_size) < 0)
316 return NULL;
317 buf = BUF(v) + used_v_size;
318 end = BUF(v) + total_v_size;
319 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000320
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000321 used_v_size = buf - BUF(v);
322 if (used_v_size != total_v_size)
323 _PyString_Resize(&v, used_v_size);
324 return v;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000325}
326
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000327/* This is a hacked version of Python's
328 * fileobject.c:Py_UniversalNewlineFread(). */
329size_t
330Util_UnivNewlineRead(int *bzerror, BZFILE *stream,
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000331 char* buf, size_t n, BZ2FileObject *f)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000332{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000333 char *dst = buf;
334 int newlinetypes, skipnextlf;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000335
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000336 assert(buf != NULL);
337 assert(stream != NULL);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000338
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000339 if (!f->f_univ_newline)
340 return BZ2_bzRead(bzerror, stream, buf, n);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000341
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000342 newlinetypes = f->f_newlinetypes;
343 skipnextlf = f->f_skipnextlf;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000344
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000345 /* Invariant: n is the number of bytes remaining to be filled
346 * in the buffer.
347 */
348 while (n) {
349 size_t nread;
350 int shortread;
351 char *src = dst;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000352
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000353 nread = BZ2_bzRead(bzerror, stream, dst, n);
354 assert(nread <= n);
355 n -= nread; /* assuming 1 byte out for each in; will adjust */
356 shortread = n != 0; /* true iff EOF or error */
357 while (nread--) {
358 char c = *src++;
359 if (c == '\r') {
360 /* Save as LF and set flag to skip next LF. */
361 *dst++ = '\n';
362 skipnextlf = 1;
363 }
364 else if (skipnextlf && c == '\n') {
365 /* Skip LF, and remember we saw CR LF. */
366 skipnextlf = 0;
367 newlinetypes |= NEWLINE_CRLF;
368 ++n;
369 }
370 else {
371 /* Normal char to be stored in buffer. Also
372 * update the newlinetypes flag if either this
373 * is an LF or the previous char was a CR.
374 */
375 if (c == '\n')
376 newlinetypes |= NEWLINE_LF;
377 else if (skipnextlf)
378 newlinetypes |= NEWLINE_CR;
379 *dst++ = c;
380 skipnextlf = 0;
381 }
382 }
383 if (shortread) {
384 /* If this is EOF, update type flags. */
385 if (skipnextlf && *bzerror == BZ_STREAM_END)
386 newlinetypes |= NEWLINE_CR;
387 break;
388 }
389 }
390 f->f_newlinetypes = newlinetypes;
391 f->f_skipnextlf = skipnextlf;
392 return dst - buf;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000393}
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000394
395/* This is a hacked version of Python's fileobject.c:drop_readahead(). */
396static void
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000397Util_DropReadAhead(BZ2FileObject *f)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000398{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000399 if (f->f_buf != NULL) {
400 PyMem_Free(f->f_buf);
401 f->f_buf = NULL;
402 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000403}
404
405/* This is a hacked version of Python's fileobject.c:readahead(). */
406static int
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000407Util_ReadAhead(BZ2FileObject *f, int bufsize)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000408{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000409 int chunksize;
410 int bzerror;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000411
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000412 if (f->f_buf != NULL) {
413 if((f->f_bufend - f->f_bufptr) >= 1)
414 return 0;
415 else
416 Util_DropReadAhead(f);
417 }
418 if (f->mode == MODE_READ_EOF) {
419 f->f_bufptr = f->f_buf;
420 f->f_bufend = f->f_buf;
421 return 0;
422 }
423 if ((f->f_buf = PyMem_Malloc(bufsize)) == NULL) {
424 PyErr_NoMemory();
425 return -1;
426 }
427 Py_BEGIN_ALLOW_THREADS
428 chunksize = Util_UnivNewlineRead(&bzerror, f->fp, f->f_buf,
429 bufsize, f);
430 Py_END_ALLOW_THREADS
431 f->pos += chunksize;
432 if (bzerror == BZ_STREAM_END) {
433 f->size = f->pos;
434 f->mode = MODE_READ_EOF;
435 } else if (bzerror != BZ_OK) {
436 Util_CatchBZ2Error(bzerror);
437 Util_DropReadAhead(f);
438 return -1;
439 }
440 f->f_bufptr = f->f_buf;
441 f->f_bufend = f->f_buf + chunksize;
442 return 0;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000443}
444
445/* This is a hacked version of Python's
446 * fileobject.c:readahead_get_line_skip(). */
Gregory P. Smithdd96db62008-06-09 04:58:54 +0000447static PyStringObject *
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000448Util_ReadAheadGetLineSkip(BZ2FileObject *f, int skip, int bufsize)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000449{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000450 PyStringObject* s;
451 char *bufptr;
452 char *buf;
453 int len;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000454
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000455 if (f->f_buf == NULL)
456 if (Util_ReadAhead(f, bufsize) < 0)
457 return NULL;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000458
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000459 len = f->f_bufend - f->f_bufptr;
460 if (len == 0)
461 return (PyStringObject *)
462 PyString_FromStringAndSize(NULL, skip);
463 bufptr = memchr(f->f_bufptr, '\n', len);
464 if (bufptr != NULL) {
465 bufptr++; /* Count the '\n' */
466 len = bufptr - f->f_bufptr;
467 s = (PyStringObject *)
468 PyString_FromStringAndSize(NULL, skip+len);
469 if (s == NULL)
470 return NULL;
471 memcpy(PyString_AS_STRING(s)+skip, f->f_bufptr, len);
472 f->f_bufptr = bufptr;
473 if (bufptr == f->f_bufend)
474 Util_DropReadAhead(f);
475 } else {
476 bufptr = f->f_bufptr;
477 buf = f->f_buf;
478 f->f_buf = NULL; /* Force new readahead buffer */
479 s = Util_ReadAheadGetLineSkip(f, skip+len,
480 bufsize + (bufsize>>2));
481 if (s == NULL) {
482 PyMem_Free(buf);
483 return NULL;
484 }
485 memcpy(PyString_AS_STRING(s)+skip, bufptr, len);
486 PyMem_Free(buf);
487 }
488 return s;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000489}
490
491/* ===================================================================== */
492/* Methods of BZ2File. */
493
494PyDoc_STRVAR(BZ2File_read__doc__,
495"read([size]) -> string\n\
496\n\
497Read at most size uncompressed bytes, returned as a string. If the size\n\
498argument is negative or omitted, read until EOF is reached.\n\
499");
500
501/* This is a hacked version of Python's fileobject.c:file_read(). */
502static PyObject *
503BZ2File_read(BZ2FileObject *self, PyObject *args)
504{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000505 long bytesrequested = -1;
506 size_t bytesread, buffersize, chunksize;
507 int bzerror;
508 PyObject *ret = NULL;
Tim Peterse3228092002-11-09 04:21:44 +0000509
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000510 if (!PyArg_ParseTuple(args, "|l:read", &bytesrequested))
511 return NULL;
Tim Peterse3228092002-11-09 04:21:44 +0000512
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000513 ACQUIRE_LOCK(self);
514 switch (self->mode) {
515 case MODE_READ:
516 break;
517 case MODE_READ_EOF:
518 ret = PyString_FromString("");
519 goto cleanup;
520 case MODE_CLOSED:
521 PyErr_SetString(PyExc_ValueError,
522 "I/O operation on closed file");
523 goto cleanup;
524 default:
525 PyErr_SetString(PyExc_IOError,
526 "file is not ready for reading");
527 goto cleanup;
528 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000529
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000530 if (bytesrequested < 0)
531 buffersize = Util_NewBufferSize((size_t)0);
532 else
533 buffersize = bytesrequested;
534 if (buffersize > INT_MAX) {
535 PyErr_SetString(PyExc_OverflowError,
536 "requested number of bytes is "
537 "more than a Python string can hold");
538 goto cleanup;
539 }
540 ret = PyString_FromStringAndSize((char *)NULL, buffersize);
541 if (ret == NULL)
542 goto cleanup;
543 bytesread = 0;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000544
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000545 for (;;) {
546 Py_BEGIN_ALLOW_THREADS
547 chunksize = Util_UnivNewlineRead(&bzerror, self->fp,
548 BUF(ret)+bytesread,
549 buffersize-bytesread,
550 self);
551 self->pos += chunksize;
552 Py_END_ALLOW_THREADS
553 bytesread += chunksize;
554 if (bzerror == BZ_STREAM_END) {
555 self->size = self->pos;
556 self->mode = MODE_READ_EOF;
557 break;
558 } else if (bzerror != BZ_OK) {
559 Util_CatchBZ2Error(bzerror);
560 Py_DECREF(ret);
561 ret = NULL;
562 goto cleanup;
563 }
564 if (bytesrequested < 0) {
565 buffersize = Util_NewBufferSize(buffersize);
566 if (_PyString_Resize(&ret, buffersize) < 0)
567 goto cleanup;
568 } else {
569 break;
570 }
571 }
572 if (bytesread != buffersize)
573 _PyString_Resize(&ret, bytesread);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000574
575cleanup:
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000576 RELEASE_LOCK(self);
577 return ret;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000578}
579
580PyDoc_STRVAR(BZ2File_readline__doc__,
581"readline([size]) -> string\n\
582\n\
583Return the next line from the file, as a string, retaining newline.\n\
584A non-negative size argument will limit the maximum number of bytes to\n\
585return (an incomplete line may be returned then). Return an empty\n\
586string at EOF.\n\
587");
588
589static PyObject *
590BZ2File_readline(BZ2FileObject *self, PyObject *args)
591{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000592 PyObject *ret = NULL;
593 int sizehint = -1;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000594
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000595 if (!PyArg_ParseTuple(args, "|i:readline", &sizehint))
596 return NULL;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000597
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000598 ACQUIRE_LOCK(self);
599 switch (self->mode) {
600 case MODE_READ:
601 break;
602 case MODE_READ_EOF:
603 ret = PyString_FromString("");
604 goto cleanup;
605 case MODE_CLOSED:
606 PyErr_SetString(PyExc_ValueError,
607 "I/O operation on closed file");
608 goto cleanup;
609 default:
610 PyErr_SetString(PyExc_IOError,
611 "file is not ready for reading");
612 goto cleanup;
613 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000614
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000615 if (sizehint == 0)
616 ret = PyString_FromString("");
617 else
618 ret = Util_GetLine(self, (sizehint < 0) ? 0 : sizehint);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000619
620cleanup:
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000621 RELEASE_LOCK(self);
622 return ret;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000623}
624
625PyDoc_STRVAR(BZ2File_readlines__doc__,
626"readlines([size]) -> list\n\
627\n\
628Call readline() repeatedly and return a list of lines read.\n\
629The optional size argument, if given, is an approximate bound on the\n\
630total number of bytes in the lines returned.\n\
631");
632
633/* This is a hacked version of Python's fileobject.c:file_readlines(). */
634static PyObject *
635BZ2File_readlines(BZ2FileObject *self, PyObject *args)
636{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000637 long sizehint = 0;
638 PyObject *list = NULL;
639 PyObject *line;
640 char small_buffer[SMALLCHUNK];
641 char *buffer = small_buffer;
642 size_t buffersize = SMALLCHUNK;
643 PyObject *big_buffer = NULL;
644 size_t nfilled = 0;
645 size_t nread;
646 size_t totalread = 0;
647 char *p, *q, *end;
648 int err;
649 int shortread = 0;
650 int bzerror;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000651
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000652 if (!PyArg_ParseTuple(args, "|l:readlines", &sizehint))
653 return NULL;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000654
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000655 ACQUIRE_LOCK(self);
656 switch (self->mode) {
657 case MODE_READ:
658 break;
659 case MODE_READ_EOF:
660 list = PyList_New(0);
661 goto cleanup;
662 case MODE_CLOSED:
663 PyErr_SetString(PyExc_ValueError,
664 "I/O operation on closed file");
665 goto cleanup;
666 default:
667 PyErr_SetString(PyExc_IOError,
668 "file is not ready for reading");
669 goto cleanup;
670 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000671
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000672 if ((list = PyList_New(0)) == NULL)
673 goto cleanup;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000674
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000675 for (;;) {
676 Py_BEGIN_ALLOW_THREADS
677 nread = Util_UnivNewlineRead(&bzerror, self->fp,
678 buffer+nfilled,
679 buffersize-nfilled, self);
680 self->pos += nread;
681 Py_END_ALLOW_THREADS
682 if (bzerror == BZ_STREAM_END) {
683 self->size = self->pos;
684 self->mode = MODE_READ_EOF;
685 if (nread == 0) {
686 sizehint = 0;
687 break;
688 }
689 shortread = 1;
690 } else if (bzerror != BZ_OK) {
691 Util_CatchBZ2Error(bzerror);
692 error:
693 Py_DECREF(list);
694 list = NULL;
695 goto cleanup;
696 }
697 totalread += nread;
698 p = memchr(buffer+nfilled, '\n', nread);
699 if (!shortread && p == NULL) {
700 /* Need a larger buffer to fit this line */
701 nfilled += nread;
702 buffersize *= 2;
703 if (buffersize > INT_MAX) {
704 PyErr_SetString(PyExc_OverflowError,
705 "line is longer than a Python string can hold");
706 goto error;
707 }
708 if (big_buffer == NULL) {
709 /* Create the big buffer */
710 big_buffer = PyString_FromStringAndSize(
711 NULL, buffersize);
712 if (big_buffer == NULL)
713 goto error;
714 buffer = PyString_AS_STRING(big_buffer);
715 memcpy(buffer, small_buffer, nfilled);
716 }
717 else {
718 /* Grow the big buffer */
719 _PyString_Resize(&big_buffer, buffersize);
720 buffer = PyString_AS_STRING(big_buffer);
721 }
722 continue;
723 }
724 end = buffer+nfilled+nread;
725 q = buffer;
726 while (p != NULL) {
727 /* Process complete lines */
728 p++;
729 line = PyString_FromStringAndSize(q, p-q);
730 if (line == NULL)
731 goto error;
732 err = PyList_Append(list, line);
733 Py_DECREF(line);
734 if (err != 0)
735 goto error;
736 q = p;
737 p = memchr(q, '\n', end-q);
738 }
739 /* Move the remaining incomplete line to the start */
740 nfilled = end-q;
741 memmove(buffer, q, nfilled);
742 if (sizehint > 0)
743 if (totalread >= (size_t)sizehint)
744 break;
745 if (shortread) {
746 sizehint = 0;
747 break;
748 }
749 }
750 if (nfilled != 0) {
751 /* Partial last line */
752 line = PyString_FromStringAndSize(buffer, nfilled);
753 if (line == NULL)
754 goto error;
755 if (sizehint > 0) {
756 /* Need to complete the last line */
757 PyObject *rest = Util_GetLine(self, 0);
758 if (rest == NULL) {
759 Py_DECREF(line);
760 goto error;
761 }
762 PyString_Concat(&line, rest);
763 Py_DECREF(rest);
764 if (line == NULL)
765 goto error;
766 }
767 err = PyList_Append(list, line);
768 Py_DECREF(line);
769 if (err != 0)
770 goto error;
771 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000772
773 cleanup:
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000774 RELEASE_LOCK(self);
775 if (big_buffer) {
776 Py_DECREF(big_buffer);
777 }
778 return list;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000779}
780
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000781PyDoc_STRVAR(BZ2File_xreadlines__doc__,
782"xreadlines() -> self\n\
783\n\
784For backward compatibility. BZ2File objects now include the performance\n\
785optimizations previously implemented in the xreadlines module.\n\
786");
787
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000788PyDoc_STRVAR(BZ2File_write__doc__,
789"write(data) -> None\n\
790\n\
791Write the 'data' string to file. Note that due to buffering, close() may\n\
792be needed before the file on disk reflects the data written.\n\
793");
794
795/* This is a hacked version of Python's fileobject.c:file_write(). */
796static PyObject *
797BZ2File_write(BZ2FileObject *self, PyObject *args)
798{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000799 PyObject *ret = NULL;
800 Py_buffer pbuf;
801 char *buf;
802 int len;
803 int bzerror;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000804
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000805 if (!PyArg_ParseTuple(args, "s*:write", &pbuf))
806 return NULL;
807 buf = pbuf.buf;
808 len = pbuf.len;
Tim Peterse3228092002-11-09 04:21:44 +0000809
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000810 ACQUIRE_LOCK(self);
811 switch (self->mode) {
812 case MODE_WRITE:
813 break;
Tim Peterse3228092002-11-09 04:21:44 +0000814
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000815 case MODE_CLOSED:
816 PyErr_SetString(PyExc_ValueError,
817 "I/O operation on closed file");
818 goto cleanup;
Tim Peterse3228092002-11-09 04:21:44 +0000819
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000820 default:
821 PyErr_SetString(PyExc_IOError,
822 "file is not ready for writing");
823 goto cleanup;
824 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000825
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000826 self->f_softspace = 0;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000827
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000828 Py_BEGIN_ALLOW_THREADS
829 BZ2_bzWrite (&bzerror, self->fp, buf, len);
830 self->pos += len;
831 Py_END_ALLOW_THREADS
Tim Peterse3228092002-11-09 04:21:44 +0000832
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000833 if (bzerror != BZ_OK) {
834 Util_CatchBZ2Error(bzerror);
835 goto cleanup;
836 }
Tim Peterse3228092002-11-09 04:21:44 +0000837
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000838 Py_INCREF(Py_None);
839 ret = Py_None;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000840
841cleanup:
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000842 PyBuffer_Release(&pbuf);
843 RELEASE_LOCK(self);
844 return ret;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000845}
846
847PyDoc_STRVAR(BZ2File_writelines__doc__,
848"writelines(sequence_of_strings) -> None\n\
849\n\
850Write the sequence of strings to the file. Note that newlines are not\n\
851added. The sequence can be any iterable object producing strings. This is\n\
852equivalent to calling write() for each string.\n\
853");
854
855/* This is a hacked version of Python's fileobject.c:file_writelines(). */
856static PyObject *
857BZ2File_writelines(BZ2FileObject *self, PyObject *seq)
858{
859#define CHUNKSIZE 1000
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000860 PyObject *list = NULL;
861 PyObject *iter = NULL;
862 PyObject *ret = NULL;
863 PyObject *line;
864 int i, j, index, len, islist;
865 int bzerror;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000866
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000867 ACQUIRE_LOCK(self);
868 switch (self->mode) {
869 case MODE_WRITE:
870 break;
Georg Brandl3335a7a2006-08-14 21:42:55 +0000871
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000872 case MODE_CLOSED:
873 PyErr_SetString(PyExc_ValueError,
874 "I/O operation on closed file");
875 goto error;
Georg Brandl3335a7a2006-08-14 21:42:55 +0000876
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000877 default:
878 PyErr_SetString(PyExc_IOError,
879 "file is not ready for writing");
880 goto error;
881 }
Georg Brandl3335a7a2006-08-14 21:42:55 +0000882
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000883 islist = PyList_Check(seq);
884 if (!islist) {
885 iter = PyObject_GetIter(seq);
886 if (iter == NULL) {
887 PyErr_SetString(PyExc_TypeError,
888 "writelines() requires an iterable argument");
889 goto error;
890 }
891 list = PyList_New(CHUNKSIZE);
892 if (list == NULL)
893 goto error;
894 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000895
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000896 /* Strategy: slurp CHUNKSIZE lines into a private list,
897 checking that they are all strings, then write that list
898 without holding the interpreter lock, then come back for more. */
899 for (index = 0; ; index += CHUNKSIZE) {
900 if (islist) {
901 Py_XDECREF(list);
902 list = PyList_GetSlice(seq, index, index+CHUNKSIZE);
903 if (list == NULL)
904 goto error;
905 j = PyList_GET_SIZE(list);
906 }
907 else {
908 for (j = 0; j < CHUNKSIZE; j++) {
909 line = PyIter_Next(iter);
910 if (line == NULL) {
911 if (PyErr_Occurred())
912 goto error;
913 break;
914 }
915 PyList_SetItem(list, j, line);
916 }
917 }
918 if (j == 0)
919 break;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000920
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000921 /* Check that all entries are indeed strings. If not,
922 apply the same rules as for file.write() and
923 convert the rets to strings. This is slow, but
924 seems to be the only way since all conversion APIs
925 could potentially execute Python code. */
926 for (i = 0; i < j; i++) {
927 PyObject *v = PyList_GET_ITEM(list, i);
928 if (!PyString_Check(v)) {
929 const char *buffer;
930 Py_ssize_t len;
931 if (PyObject_AsCharBuffer(v, &buffer, &len)) {
932 PyErr_SetString(PyExc_TypeError,
933 "writelines() "
934 "argument must be "
935 "a sequence of "
936 "strings");
937 goto error;
938 }
939 line = PyString_FromStringAndSize(buffer,
940 len);
941 if (line == NULL)
942 goto error;
943 Py_DECREF(v);
944 PyList_SET_ITEM(list, i, line);
945 }
946 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000947
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000948 self->f_softspace = 0;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000949
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000950 /* Since we are releasing the global lock, the
951 following code may *not* execute Python code. */
952 Py_BEGIN_ALLOW_THREADS
953 for (i = 0; i < j; i++) {
954 line = PyList_GET_ITEM(list, i);
955 len = PyString_GET_SIZE(line);
956 BZ2_bzWrite (&bzerror, self->fp,
957 PyString_AS_STRING(line), len);
958 if (bzerror != BZ_OK) {
959 Py_BLOCK_THREADS
960 Util_CatchBZ2Error(bzerror);
961 goto error;
962 }
963 }
964 Py_END_ALLOW_THREADS
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000965
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000966 if (j < CHUNKSIZE)
967 break;
968 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000969
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000970 Py_INCREF(Py_None);
971 ret = Py_None;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000972
973 error:
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000974 RELEASE_LOCK(self);
975 Py_XDECREF(list);
976 Py_XDECREF(iter);
977 return ret;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000978#undef CHUNKSIZE
979}
980
981PyDoc_STRVAR(BZ2File_seek__doc__,
982"seek(offset [, whence]) -> None\n\
983\n\
984Move to new file position. Argument offset is a byte count. Optional\n\
985argument whence defaults to 0 (offset from start of file, offset\n\
986should be >= 0); other values are 1 (move relative to current position,\n\
987positive or negative), and 2 (move relative to end of file, usually\n\
988negative, although many platforms allow seeking beyond the end of a file).\n\
989\n\
990Note that seeking of bz2 files is emulated, and depending on the parameters\n\
991the operation may be extremely slow.\n\
992");
993
994static PyObject *
995BZ2File_seek(BZ2FileObject *self, PyObject *args)
996{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +0000997 int where = 0;
998 PyObject *offobj;
999 Py_off_t offset;
1000 char small_buffer[SMALLCHUNK];
1001 char *buffer = small_buffer;
1002 size_t buffersize = SMALLCHUNK;
1003 Py_off_t bytesread = 0;
1004 size_t readsize;
1005 int chunksize;
1006 int bzerror;
1007 PyObject *ret = NULL;
Tim Peterse3228092002-11-09 04:21:44 +00001008
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001009 if (!PyArg_ParseTuple(args, "O|i:seek", &offobj, &where))
1010 return NULL;
Georg Brandl33a5f2a2005-08-21 14:16:04 +00001011#if !defined(HAVE_LARGEFILE_SUPPORT)
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001012 offset = PyInt_AsLong(offobj);
Georg Brandl33a5f2a2005-08-21 14:16:04 +00001013#else
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001014 offset = PyLong_Check(offobj) ?
1015 PyLong_AsLongLong(offobj) : PyInt_AsLong(offobj);
Georg Brandl33a5f2a2005-08-21 14:16:04 +00001016#endif
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001017 if (PyErr_Occurred())
1018 return NULL;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001019
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001020 ACQUIRE_LOCK(self);
1021 Util_DropReadAhead(self);
1022 switch (self->mode) {
1023 case MODE_READ:
1024 case MODE_READ_EOF:
1025 break;
Tim Peterse3228092002-11-09 04:21:44 +00001026
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001027 case MODE_CLOSED:
1028 PyErr_SetString(PyExc_ValueError,
1029 "I/O operation on closed file");
1030 goto cleanup;
Tim Peterse3228092002-11-09 04:21:44 +00001031
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001032 default:
1033 PyErr_SetString(PyExc_IOError,
1034 "seek works only while reading");
1035 goto cleanup;
1036 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001037
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001038 if (where == 2) {
1039 if (self->size == -1) {
1040 assert(self->mode != MODE_READ_EOF);
1041 for (;;) {
1042 Py_BEGIN_ALLOW_THREADS
1043 chunksize = Util_UnivNewlineRead(
1044 &bzerror, self->fp,
1045 buffer, buffersize,
1046 self);
1047 self->pos += chunksize;
1048 Py_END_ALLOW_THREADS
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001049
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001050 bytesread += chunksize;
1051 if (bzerror == BZ_STREAM_END) {
1052 break;
1053 } else if (bzerror != BZ_OK) {
1054 Util_CatchBZ2Error(bzerror);
1055 goto cleanup;
1056 }
1057 }
1058 self->mode = MODE_READ_EOF;
1059 self->size = self->pos;
1060 bytesread = 0;
1061 }
1062 offset = self->size + offset;
1063 } else if (where == 1) {
1064 offset = self->pos + offset;
1065 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001066
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001067 /* Before getting here, offset must be the absolute position the file
1068 * pointer should be set to. */
Georg Brandl47fab922006-02-18 21:57:25 +00001069
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001070 if (offset >= self->pos) {
1071 /* we can move forward */
1072 offset -= self->pos;
1073 } else {
1074 /* we cannot move back, so rewind the stream */
1075 BZ2_bzReadClose(&bzerror, self->fp);
1076 if (self->fp) {
1077 PyFile_DecUseCount((PyFileObject *)self->file);
1078 self->fp = NULL;
1079 }
1080 if (bzerror != BZ_OK) {
1081 Util_CatchBZ2Error(bzerror);
1082 goto cleanup;
1083 }
1084 ret = PyObject_CallMethod(self->file, "seek", "(i)", 0);
1085 if (!ret)
1086 goto cleanup;
1087 Py_DECREF(ret);
1088 ret = NULL;
1089 self->pos = 0;
1090 self->fp = BZ2_bzReadOpen(&bzerror, PyFile_AsFile(self->file),
1091 0, 0, NULL, 0);
1092 if (self->fp)
1093 PyFile_IncUseCount((PyFileObject *)self->file);
1094 if (bzerror != BZ_OK) {
1095 Util_CatchBZ2Error(bzerror);
1096 goto cleanup;
1097 }
1098 self->mode = MODE_READ;
1099 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001100
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001101 if (offset <= 0 || self->mode == MODE_READ_EOF)
1102 goto exit;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001103
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001104 /* Before getting here, offset must be set to the number of bytes
1105 * to walk forward. */
1106 for (;;) {
1107 if (offset-bytesread > buffersize)
1108 readsize = buffersize;
1109 else
1110 /* offset might be wider that readsize, but the result
1111 * of the subtraction is bound by buffersize (see the
1112 * condition above). buffersize is 8192. */
1113 readsize = (size_t)(offset-bytesread);
1114 Py_BEGIN_ALLOW_THREADS
1115 chunksize = Util_UnivNewlineRead(&bzerror, self->fp,
1116 buffer, readsize, self);
1117 self->pos += chunksize;
1118 Py_END_ALLOW_THREADS
1119 bytesread += chunksize;
1120 if (bzerror == BZ_STREAM_END) {
1121 self->size = self->pos;
1122 self->mode = MODE_READ_EOF;
1123 break;
1124 } else if (bzerror != BZ_OK) {
1125 Util_CatchBZ2Error(bzerror);
1126 goto cleanup;
1127 }
1128 if (bytesread == offset)
1129 break;
1130 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001131
1132exit:
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001133 Py_INCREF(Py_None);
1134 ret = Py_None;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001135
1136cleanup:
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001137 RELEASE_LOCK(self);
1138 return ret;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001139}
1140
1141PyDoc_STRVAR(BZ2File_tell__doc__,
1142"tell() -> int\n\
1143\n\
1144Return the current file position, an integer (may be a long integer).\n\
1145");
1146
1147static PyObject *
1148BZ2File_tell(BZ2FileObject *self, PyObject *args)
1149{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001150 PyObject *ret = NULL;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001151
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001152 if (self->mode == MODE_CLOSED) {
1153 PyErr_SetString(PyExc_ValueError,
1154 "I/O operation on closed file");
1155 goto cleanup;
1156 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001157
Georg Brandla8bcecc2005-09-03 07:49:53 +00001158#if !defined(HAVE_LARGEFILE_SUPPORT)
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001159 ret = PyInt_FromLong(self->pos);
Georg Brandla8bcecc2005-09-03 07:49:53 +00001160#else
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001161 ret = PyLong_FromLongLong(self->pos);
Georg Brandla8bcecc2005-09-03 07:49:53 +00001162#endif
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001163
1164cleanup:
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001165 return ret;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001166}
1167
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001168PyDoc_STRVAR(BZ2File_close__doc__,
1169"close() -> None or (perhaps) an integer\n\
1170\n\
1171Close the file. Sets data attribute .closed to true. A closed file\n\
1172cannot be used for further I/O operations. close() may be called more\n\
1173than once without error.\n\
1174");
1175
1176static PyObject *
1177BZ2File_close(BZ2FileObject *self)
1178{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001179 PyObject *ret = NULL;
1180 int bzerror = BZ_OK;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001181
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001182 ACQUIRE_LOCK(self);
1183 switch (self->mode) {
1184 case MODE_READ:
1185 case MODE_READ_EOF:
1186 BZ2_bzReadClose(&bzerror, self->fp);
1187 break;
1188 case MODE_WRITE:
1189 BZ2_bzWriteClose(&bzerror, self->fp,
1190 0, NULL, NULL);
1191 break;
1192 }
1193 if (self->fp) {
1194 PyFile_DecUseCount((PyFileObject *)self->file);
1195 self->fp = NULL;
1196 }
1197 self->mode = MODE_CLOSED;
1198 ret = PyObject_CallMethod(self->file, "close", NULL);
1199 if (bzerror != BZ_OK) {
1200 Util_CatchBZ2Error(bzerror);
1201 Py_XDECREF(ret);
1202 ret = NULL;
1203 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001204
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001205 RELEASE_LOCK(self);
1206 return ret;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001207}
1208
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001209static PyObject *BZ2File_getiter(BZ2FileObject *self);
1210
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001211static PyMethodDef BZ2File_methods[] = {
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001212 {"read", (PyCFunction)BZ2File_read, METH_VARARGS, BZ2File_read__doc__},
1213 {"readline", (PyCFunction)BZ2File_readline, METH_VARARGS, BZ2File_readline__doc__},
1214 {"readlines", (PyCFunction)BZ2File_readlines, METH_VARARGS, BZ2File_readlines__doc__},
1215 {"xreadlines", (PyCFunction)BZ2File_getiter, METH_VARARGS, BZ2File_xreadlines__doc__},
1216 {"write", (PyCFunction)BZ2File_write, METH_VARARGS, BZ2File_write__doc__},
1217 {"writelines", (PyCFunction)BZ2File_writelines, METH_O, BZ2File_writelines__doc__},
1218 {"seek", (PyCFunction)BZ2File_seek, METH_VARARGS, BZ2File_seek__doc__},
1219 {"tell", (PyCFunction)BZ2File_tell, METH_NOARGS, BZ2File_tell__doc__},
1220 {"close", (PyCFunction)BZ2File_close, METH_NOARGS, BZ2File_close__doc__},
1221 {NULL, NULL} /* sentinel */
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001222};
1223
1224
1225/* ===================================================================== */
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001226/* Getters and setters of BZ2File. */
1227
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001228/* This is a hacked version of Python's fileobject.c:get_newlines(). */
1229static PyObject *
1230BZ2File_get_newlines(BZ2FileObject *self, void *closure)
1231{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001232 switch (self->f_newlinetypes) {
1233 case NEWLINE_UNKNOWN:
1234 Py_INCREF(Py_None);
1235 return Py_None;
1236 case NEWLINE_CR:
1237 return PyString_FromString("\r");
1238 case NEWLINE_LF:
1239 return PyString_FromString("\n");
1240 case NEWLINE_CR|NEWLINE_LF:
1241 return Py_BuildValue("(ss)", "\r", "\n");
1242 case NEWLINE_CRLF:
1243 return PyString_FromString("\r\n");
1244 case NEWLINE_CR|NEWLINE_CRLF:
1245 return Py_BuildValue("(ss)", "\r", "\r\n");
1246 case NEWLINE_LF|NEWLINE_CRLF:
1247 return Py_BuildValue("(ss)", "\n", "\r\n");
1248 case NEWLINE_CR|NEWLINE_LF|NEWLINE_CRLF:
1249 return Py_BuildValue("(sss)", "\r", "\n", "\r\n");
1250 default:
1251 PyErr_Format(PyExc_SystemError,
1252 "Unknown newlines value 0x%x\n",
1253 self->f_newlinetypes);
1254 return NULL;
1255 }
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001256}
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001257
1258static PyObject *
1259BZ2File_get_closed(BZ2FileObject *self, void *closure)
1260{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001261 return PyInt_FromLong(self->mode == MODE_CLOSED);
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001262}
1263
1264static PyObject *
1265BZ2File_get_mode(BZ2FileObject *self, void *closure)
1266{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001267 return PyObject_GetAttrString(self->file, "mode");
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001268}
1269
1270static PyObject *
1271BZ2File_get_name(BZ2FileObject *self, void *closure)
1272{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001273 return PyObject_GetAttrString(self->file, "name");
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001274}
1275
1276static PyGetSetDef BZ2File_getset[] = {
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001277 {"closed", (getter)BZ2File_get_closed, NULL,
1278 "True if the file is closed"},
1279 {"newlines", (getter)BZ2File_get_newlines, NULL,
1280 "end-of-line convention used in this file"},
1281 {"mode", (getter)BZ2File_get_mode, NULL,
1282 "file mode ('r', 'w', or 'U')"},
1283 {"name", (getter)BZ2File_get_name, NULL,
1284 "file name"},
1285 {NULL} /* Sentinel */
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001286};
1287
1288
1289/* ===================================================================== */
1290/* Members of BZ2File_Type. */
1291
1292#undef OFF
1293#define OFF(x) offsetof(BZ2FileObject, x)
1294
1295static PyMemberDef BZ2File_members[] = {
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001296 {"softspace", T_INT, OFF(f_softspace), 0,
1297 "flag indicating that a space needs to be printed; used by print"},
1298 {NULL} /* Sentinel */
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001299};
1300
1301/* ===================================================================== */
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001302/* Slot definitions for BZ2File_Type. */
1303
1304static int
1305BZ2File_init(BZ2FileObject *self, PyObject *args, PyObject *kwargs)
1306{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001307 static char *kwlist[] = {"filename", "mode", "buffering",
1308 "compresslevel", 0};
1309 PyObject *name;
1310 char *mode = "r";
1311 int buffering = -1;
1312 int compresslevel = 9;
1313 int bzerror;
1314 int mode_char = 0;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001315
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001316 self->size = -1;
Tim Peterse3228092002-11-09 04:21:44 +00001317
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001318 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|sii:BZ2File",
1319 kwlist, &name, &mode, &buffering,
1320 &compresslevel))
1321 return -1;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001322
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001323 if (compresslevel < 1 || compresslevel > 9) {
1324 PyErr_SetString(PyExc_ValueError,
1325 "compresslevel must be between 1 and 9");
1326 return -1;
1327 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001328
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001329 for (;;) {
1330 int error = 0;
1331 switch (*mode) {
1332 case 'r':
1333 case 'w':
1334 if (mode_char)
1335 error = 1;
1336 mode_char = *mode;
1337 break;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001338
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001339 case 'b':
1340 break;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001341
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001342 case 'U':
Neal Norwitz2a30cd02006-07-10 01:18:57 +00001343#ifdef __VMS
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001344 self->f_univ_newline = 0;
Neal Norwitz2a30cd02006-07-10 01:18:57 +00001345#else
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001346 self->f_univ_newline = 1;
Neal Norwitz2a30cd02006-07-10 01:18:57 +00001347#endif
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001348 break;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001349
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001350 default:
1351 error = 1;
1352 break;
1353 }
1354 if (error) {
1355 PyErr_Format(PyExc_ValueError,
1356 "invalid mode char %c", *mode);
1357 return -1;
1358 }
1359 mode++;
1360 if (*mode == '\0')
1361 break;
1362 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001363
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001364 if (mode_char == 0) {
1365 mode_char = 'r';
1366 }
Georg Brandl6b95f1d2005-06-03 19:47:00 +00001367
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001368 mode = (mode_char == 'r') ? "rb" : "wb";
Tim Peterse3228092002-11-09 04:21:44 +00001369
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001370 self->file = PyObject_CallFunction((PyObject*)&PyFile_Type, "(Osi)",
1371 name, mode, buffering);
1372 if (self->file == NULL)
1373 return -1;
Gustavo Niemeyer49ea7be2002-11-08 14:31:49 +00001374
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001375 /* From now on, we have stuff to dealloc, so jump to error label
1376 * instead of returning */
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001377
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001378#ifdef WITH_THREAD
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001379 self->lock = PyThread_allocate_lock();
1380 if (!self->lock) {
1381 PyErr_SetString(PyExc_MemoryError, "unable to allocate lock");
1382 goto error;
1383 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001384#endif
1385
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001386 if (mode_char == 'r')
1387 self->fp = BZ2_bzReadOpen(&bzerror,
1388 PyFile_AsFile(self->file),
1389 0, 0, NULL, 0);
1390 else
1391 self->fp = BZ2_bzWriteOpen(&bzerror,
1392 PyFile_AsFile(self->file),
1393 compresslevel, 0, 0);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001394
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001395 if (bzerror != BZ_OK) {
1396 Util_CatchBZ2Error(bzerror);
1397 goto error;
1398 }
1399 PyFile_IncUseCount((PyFileObject *)self->file);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001400
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001401 self->mode = (mode_char == 'r') ? MODE_READ : MODE_WRITE;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001402
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001403 return 0;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001404
1405error:
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001406 Py_CLEAR(self->file);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001407#ifdef WITH_THREAD
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001408 if (self->lock) {
1409 PyThread_free_lock(self->lock);
1410 self->lock = NULL;
1411 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001412#endif
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001413 return -1;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001414}
1415
1416static void
1417BZ2File_dealloc(BZ2FileObject *self)
1418{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001419 int bzerror;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001420#ifdef WITH_THREAD
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001421 if (self->lock)
1422 PyThread_free_lock(self->lock);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001423#endif
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001424 switch (self->mode) {
1425 case MODE_READ:
1426 case MODE_READ_EOF:
1427 BZ2_bzReadClose(&bzerror, self->fp);
1428 break;
1429 case MODE_WRITE:
1430 BZ2_bzWriteClose(&bzerror, self->fp,
1431 0, NULL, NULL);
1432 break;
1433 }
1434 if (self->fp) {
1435 PyFile_DecUseCount((PyFileObject *)self->file);
1436 self->fp = NULL;
1437 }
1438 Util_DropReadAhead(self);
1439 Py_XDECREF(self->file);
1440 Py_TYPE(self)->tp_free((PyObject *)self);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001441}
1442
1443/* This is a hacked version of Python's fileobject.c:file_getiter(). */
1444static PyObject *
1445BZ2File_getiter(BZ2FileObject *self)
1446{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001447 if (self->mode == MODE_CLOSED) {
1448 PyErr_SetString(PyExc_ValueError,
1449 "I/O operation on closed file");
1450 return NULL;
1451 }
1452 Py_INCREF((PyObject*)self);
1453 return (PyObject *)self;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001454}
1455
1456/* This is a hacked version of Python's fileobject.c:file_iternext(). */
1457#define READAHEAD_BUFSIZE 8192
1458static PyObject *
1459BZ2File_iternext(BZ2FileObject *self)
1460{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001461 PyStringObject* ret;
1462 ACQUIRE_LOCK(self);
1463 if (self->mode == MODE_CLOSED) {
1464 RELEASE_LOCK(self);
1465 PyErr_SetString(PyExc_ValueError,
1466 "I/O operation on closed file");
1467 return NULL;
1468 }
1469 ret = Util_ReadAheadGetLineSkip(self, 0, READAHEAD_BUFSIZE);
1470 RELEASE_LOCK(self);
1471 if (ret == NULL || PyString_GET_SIZE(ret) == 0) {
1472 Py_XDECREF(ret);
1473 return NULL;
1474 }
1475 return (PyObject *)ret;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001476}
1477
1478/* ===================================================================== */
1479/* BZ2File_Type definition. */
1480
1481PyDoc_VAR(BZ2File__doc__) =
1482PyDoc_STR(
1483"BZ2File(name [, mode='r', buffering=0, compresslevel=9]) -> file object\n\
1484\n\
1485Open a bz2 file. The mode can be 'r' or 'w', for reading (default) or\n\
1486writing. When opened for writing, the file will be created if it doesn't\n\
1487exist, and truncated otherwise. If the buffering argument is given, 0 means\n\
1488unbuffered, and larger numbers specify the buffer size. If compresslevel\n\
1489is given, must be a number between 1 and 9.\n\
1490")
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001491PyDoc_STR(
1492"\n\
1493Add a 'U' to mode to open the file for input with universal newline\n\
1494support. Any line ending in the input file will be seen as a '\\n' in\n\
1495Python. Also, a file so opened gains the attribute 'newlines'; the value\n\
1496for this attribute is one of None (no newline read yet), '\\r', '\\n',\n\
1497'\\r\\n' or a tuple containing all the newline types seen. Universal\n\
1498newlines are available only when reading.\n\
1499")
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001500;
1501
Gustavo Niemeyer49ea7be2002-11-08 14:31:49 +00001502static PyTypeObject BZ2File_Type = {
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001503 PyVarObject_HEAD_INIT(NULL, 0)
1504 "bz2.BZ2File", /*tp_name*/
1505 sizeof(BZ2FileObject), /*tp_basicsize*/
1506 0, /*tp_itemsize*/
1507 (destructor)BZ2File_dealloc, /*tp_dealloc*/
1508 0, /*tp_print*/
1509 0, /*tp_getattr*/
1510 0, /*tp_setattr*/
1511 0, /*tp_compare*/
1512 0, /*tp_repr*/
1513 0, /*tp_as_number*/
1514 0, /*tp_as_sequence*/
1515 0, /*tp_as_mapping*/
1516 0, /*tp_hash*/
1517 0, /*tp_call*/
1518 0, /*tp_str*/
1519 PyObject_GenericGetAttr,/*tp_getattro*/
1520 PyObject_GenericSetAttr,/*tp_setattro*/
1521 0, /*tp_as_buffer*/
1522 Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE, /*tp_flags*/
1523 BZ2File__doc__, /*tp_doc*/
1524 0, /*tp_traverse*/
1525 0, /*tp_clear*/
1526 0, /*tp_richcompare*/
1527 0, /*tp_weaklistoffset*/
1528 (getiterfunc)BZ2File_getiter, /*tp_iter*/
1529 (iternextfunc)BZ2File_iternext, /*tp_iternext*/
1530 BZ2File_methods, /*tp_methods*/
1531 BZ2File_members, /*tp_members*/
1532 BZ2File_getset, /*tp_getset*/
1533 0, /*tp_base*/
1534 0, /*tp_dict*/
1535 0, /*tp_descr_get*/
1536 0, /*tp_descr_set*/
1537 0, /*tp_dictoffset*/
1538 (initproc)BZ2File_init, /*tp_init*/
1539 PyType_GenericAlloc, /*tp_alloc*/
1540 PyType_GenericNew, /*tp_new*/
1541 _PyObject_Del, /*tp_free*/
1542 0, /*tp_is_gc*/
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001543};
1544
1545
1546/* ===================================================================== */
1547/* Methods of BZ2Comp. */
1548
1549PyDoc_STRVAR(BZ2Comp_compress__doc__,
1550"compress(data) -> string\n\
1551\n\
1552Provide more data to the compressor object. It will return chunks of\n\
1553compressed data whenever possible. When you've finished providing data\n\
1554to compress, call the flush() method to finish the compression process,\n\
1555and return what is left in the internal buffers.\n\
1556");
1557
1558static PyObject *
1559BZ2Comp_compress(BZ2CompObject *self, PyObject *args)
1560{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001561 Py_buffer pdata;
1562 char *data;
1563 int datasize;
1564 int bufsize = SMALLCHUNK;
1565 PY_LONG_LONG totalout;
1566 PyObject *ret = NULL;
1567 bz_stream *bzs = &self->bzs;
1568 int bzerror;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001569
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001570 if (!PyArg_ParseTuple(args, "s*:compress", &pdata))
1571 return NULL;
1572 data = pdata.buf;
1573 datasize = pdata.len;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001574
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001575 if (datasize == 0) {
1576 PyBuffer_Release(&pdata);
1577 return PyString_FromString("");
1578 }
Gustavo Niemeyera6e436e2004-02-14 00:02:45 +00001579
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001580 ACQUIRE_LOCK(self);
1581 if (!self->running) {
1582 PyErr_SetString(PyExc_ValueError,
1583 "this object was already flushed");
1584 goto error;
1585 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001586
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001587 ret = PyString_FromStringAndSize(NULL, bufsize);
1588 if (!ret)
1589 goto error;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001590
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001591 bzs->next_in = data;
1592 bzs->avail_in = datasize;
1593 bzs->next_out = BUF(ret);
1594 bzs->avail_out = bufsize;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001595
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001596 totalout = BZS_TOTAL_OUT(bzs);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001597
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001598 for (;;) {
1599 Py_BEGIN_ALLOW_THREADS
1600 bzerror = BZ2_bzCompress(bzs, BZ_RUN);
1601 Py_END_ALLOW_THREADS
1602 if (bzerror != BZ_RUN_OK) {
1603 Util_CatchBZ2Error(bzerror);
1604 goto error;
1605 }
1606 if (bzs->avail_in == 0)
1607 break; /* no more input data */
1608 if (bzs->avail_out == 0) {
1609 bufsize = Util_NewBufferSize(bufsize);
1610 if (_PyString_Resize(&ret, bufsize) < 0) {
1611 BZ2_bzCompressEnd(bzs);
1612 goto error;
1613 }
1614 bzs->next_out = BUF(ret) + (BZS_TOTAL_OUT(bzs)
1615 - totalout);
1616 bzs->avail_out = bufsize - (bzs->next_out - BUF(ret));
1617 }
1618 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001619
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001620 _PyString_Resize(&ret, (Py_ssize_t)(BZS_TOTAL_OUT(bzs) - totalout));
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001621
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001622 RELEASE_LOCK(self);
1623 PyBuffer_Release(&pdata);
1624 return ret;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001625
1626error:
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001627 RELEASE_LOCK(self);
1628 PyBuffer_Release(&pdata);
1629 Py_XDECREF(ret);
1630 return NULL;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001631}
1632
1633PyDoc_STRVAR(BZ2Comp_flush__doc__,
1634"flush() -> string\n\
1635\n\
1636Finish the compression process and return what is left in internal buffers.\n\
1637You must not use the compressor object after calling this method.\n\
1638");
1639
1640static PyObject *
1641BZ2Comp_flush(BZ2CompObject *self)
1642{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001643 int bufsize = SMALLCHUNK;
1644 PyObject *ret = NULL;
1645 bz_stream *bzs = &self->bzs;
1646 PY_LONG_LONG totalout;
1647 int bzerror;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001648
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001649 ACQUIRE_LOCK(self);
1650 if (!self->running) {
1651 PyErr_SetString(PyExc_ValueError, "object was already "
1652 "flushed");
1653 goto error;
1654 }
1655 self->running = 0;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001656
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001657 ret = PyString_FromStringAndSize(NULL, bufsize);
1658 if (!ret)
1659 goto error;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001660
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001661 bzs->next_out = BUF(ret);
1662 bzs->avail_out = bufsize;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001663
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001664 totalout = BZS_TOTAL_OUT(bzs);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001665
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001666 for (;;) {
1667 Py_BEGIN_ALLOW_THREADS
1668 bzerror = BZ2_bzCompress(bzs, BZ_FINISH);
1669 Py_END_ALLOW_THREADS
1670 if (bzerror == BZ_STREAM_END) {
1671 break;
1672 } else if (bzerror != BZ_FINISH_OK) {
1673 Util_CatchBZ2Error(bzerror);
1674 goto error;
1675 }
1676 if (bzs->avail_out == 0) {
1677 bufsize = Util_NewBufferSize(bufsize);
1678 if (_PyString_Resize(&ret, bufsize) < 0)
1679 goto error;
1680 bzs->next_out = BUF(ret);
1681 bzs->next_out = BUF(ret) + (BZS_TOTAL_OUT(bzs)
1682 - totalout);
1683 bzs->avail_out = bufsize - (bzs->next_out - BUF(ret));
1684 }
1685 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001686
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001687 if (bzs->avail_out != 0)
1688 _PyString_Resize(&ret, (Py_ssize_t)(BZS_TOTAL_OUT(bzs) - totalout));
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001689
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001690 RELEASE_LOCK(self);
1691 return ret;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001692
1693error:
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001694 RELEASE_LOCK(self);
1695 Py_XDECREF(ret);
1696 return NULL;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001697}
1698
1699static PyMethodDef BZ2Comp_methods[] = {
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001700 {"compress", (PyCFunction)BZ2Comp_compress, METH_VARARGS,
1701 BZ2Comp_compress__doc__},
1702 {"flush", (PyCFunction)BZ2Comp_flush, METH_NOARGS,
1703 BZ2Comp_flush__doc__},
1704 {NULL, NULL} /* sentinel */
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001705};
1706
1707
1708/* ===================================================================== */
1709/* Slot definitions for BZ2Comp_Type. */
1710
1711static int
1712BZ2Comp_init(BZ2CompObject *self, PyObject *args, PyObject *kwargs)
1713{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001714 int compresslevel = 9;
1715 int bzerror;
1716 static char *kwlist[] = {"compresslevel", 0};
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001717
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001718 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|i:BZ2Compressor",
1719 kwlist, &compresslevel))
1720 return -1;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001721
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001722 if (compresslevel < 1 || compresslevel > 9) {
1723 PyErr_SetString(PyExc_ValueError,
1724 "compresslevel must be between 1 and 9");
1725 goto error;
1726 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001727
1728#ifdef WITH_THREAD
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001729 self->lock = PyThread_allocate_lock();
1730 if (!self->lock) {
1731 PyErr_SetString(PyExc_MemoryError, "unable to allocate lock");
1732 goto error;
1733 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001734#endif
1735
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001736 memset(&self->bzs, 0, sizeof(bz_stream));
1737 bzerror = BZ2_bzCompressInit(&self->bzs, compresslevel, 0, 0);
1738 if (bzerror != BZ_OK) {
1739 Util_CatchBZ2Error(bzerror);
1740 goto error;
1741 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001742
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001743 self->running = 1;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001744
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001745 return 0;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001746error:
1747#ifdef WITH_THREAD
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001748 if (self->lock) {
1749 PyThread_free_lock(self->lock);
1750 self->lock = NULL;
1751 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001752#endif
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001753 return -1;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001754}
1755
1756static void
1757BZ2Comp_dealloc(BZ2CompObject *self)
1758{
1759#ifdef WITH_THREAD
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001760 if (self->lock)
1761 PyThread_free_lock(self->lock);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001762#endif
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001763 BZ2_bzCompressEnd(&self->bzs);
1764 Py_TYPE(self)->tp_free((PyObject *)self);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001765}
1766
1767
1768/* ===================================================================== */
1769/* BZ2Comp_Type definition. */
1770
1771PyDoc_STRVAR(BZ2Comp__doc__,
1772"BZ2Compressor([compresslevel=9]) -> compressor object\n\
1773\n\
1774Create a new compressor object. This object may be used to compress\n\
1775data sequentially. If you want to compress data in one shot, use the\n\
1776compress() function instead. The compresslevel parameter, if given,\n\
1777must be a number between 1 and 9.\n\
1778");
1779
Gustavo Niemeyer49ea7be2002-11-08 14:31:49 +00001780static PyTypeObject BZ2Comp_Type = {
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001781 PyVarObject_HEAD_INIT(NULL, 0)
1782 "bz2.BZ2Compressor", /*tp_name*/
1783 sizeof(BZ2CompObject), /*tp_basicsize*/
1784 0, /*tp_itemsize*/
1785 (destructor)BZ2Comp_dealloc, /*tp_dealloc*/
1786 0, /*tp_print*/
1787 0, /*tp_getattr*/
1788 0, /*tp_setattr*/
1789 0, /*tp_compare*/
1790 0, /*tp_repr*/
1791 0, /*tp_as_number*/
1792 0, /*tp_as_sequence*/
1793 0, /*tp_as_mapping*/
1794 0, /*tp_hash*/
1795 0, /*tp_call*/
1796 0, /*tp_str*/
1797 PyObject_GenericGetAttr,/*tp_getattro*/
1798 PyObject_GenericSetAttr,/*tp_setattro*/
1799 0, /*tp_as_buffer*/
1800 Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE, /*tp_flags*/
1801 BZ2Comp__doc__, /*tp_doc*/
1802 0, /*tp_traverse*/
1803 0, /*tp_clear*/
1804 0, /*tp_richcompare*/
1805 0, /*tp_weaklistoffset*/
1806 0, /*tp_iter*/
1807 0, /*tp_iternext*/
1808 BZ2Comp_methods, /*tp_methods*/
1809 0, /*tp_members*/
1810 0, /*tp_getset*/
1811 0, /*tp_base*/
1812 0, /*tp_dict*/
1813 0, /*tp_descr_get*/
1814 0, /*tp_descr_set*/
1815 0, /*tp_dictoffset*/
1816 (initproc)BZ2Comp_init, /*tp_init*/
1817 PyType_GenericAlloc, /*tp_alloc*/
1818 PyType_GenericNew, /*tp_new*/
1819 _PyObject_Del, /*tp_free*/
1820 0, /*tp_is_gc*/
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001821};
1822
1823
1824/* ===================================================================== */
1825/* Members of BZ2Decomp. */
1826
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001827#undef OFF
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001828#define OFF(x) offsetof(BZ2DecompObject, x)
1829
1830static PyMemberDef BZ2Decomp_members[] = {
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001831 {"unused_data", T_OBJECT, OFF(unused_data), RO},
1832 {NULL} /* Sentinel */
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001833};
1834
1835
1836/* ===================================================================== */
1837/* Methods of BZ2Decomp. */
1838
1839PyDoc_STRVAR(BZ2Decomp_decompress__doc__,
1840"decompress(data) -> string\n\
1841\n\
1842Provide more data to the decompressor object. It will return chunks\n\
1843of decompressed data whenever possible. If you try to decompress data\n\
1844after the end of stream is found, EOFError will be raised. If any data\n\
1845was found after the end of stream, it'll be ignored and saved in\n\
1846unused_data attribute.\n\
1847");
1848
1849static PyObject *
1850BZ2Decomp_decompress(BZ2DecompObject *self, PyObject *args)
1851{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001852 Py_buffer pdata;
1853 char *data;
1854 int datasize;
1855 int bufsize = SMALLCHUNK;
1856 PY_LONG_LONG totalout;
1857 PyObject *ret = NULL;
1858 bz_stream *bzs = &self->bzs;
1859 int bzerror;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001860
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001861 if (!PyArg_ParseTuple(args, "s*:decompress", &pdata))
1862 return NULL;
1863 data = pdata.buf;
1864 datasize = pdata.len;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001865
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001866 ACQUIRE_LOCK(self);
1867 if (!self->running) {
1868 PyErr_SetString(PyExc_EOFError, "end of stream was "
1869 "already found");
1870 goto error;
1871 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001872
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001873 ret = PyString_FromStringAndSize(NULL, bufsize);
1874 if (!ret)
1875 goto error;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001876
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001877 bzs->next_in = data;
1878 bzs->avail_in = datasize;
1879 bzs->next_out = BUF(ret);
1880 bzs->avail_out = bufsize;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001881
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001882 totalout = BZS_TOTAL_OUT(bzs);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001883
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001884 for (;;) {
1885 Py_BEGIN_ALLOW_THREADS
1886 bzerror = BZ2_bzDecompress(bzs);
1887 Py_END_ALLOW_THREADS
1888 if (bzerror == BZ_STREAM_END) {
1889 if (bzs->avail_in != 0) {
1890 Py_DECREF(self->unused_data);
1891 self->unused_data =
1892 PyString_FromStringAndSize(bzs->next_in,
1893 bzs->avail_in);
1894 }
1895 self->running = 0;
1896 break;
1897 }
1898 if (bzerror != BZ_OK) {
1899 Util_CatchBZ2Error(bzerror);
1900 goto error;
1901 }
1902 if (bzs->avail_in == 0)
1903 break; /* no more input data */
1904 if (bzs->avail_out == 0) {
1905 bufsize = Util_NewBufferSize(bufsize);
1906 if (_PyString_Resize(&ret, bufsize) < 0) {
1907 BZ2_bzDecompressEnd(bzs);
1908 goto error;
1909 }
1910 bzs->next_out = BUF(ret);
1911 bzs->next_out = BUF(ret) + (BZS_TOTAL_OUT(bzs)
1912 - totalout);
1913 bzs->avail_out = bufsize - (bzs->next_out - BUF(ret));
1914 }
1915 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001916
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001917 if (bzs->avail_out != 0)
1918 _PyString_Resize(&ret, (Py_ssize_t)(BZS_TOTAL_OUT(bzs) - totalout));
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001919
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001920 RELEASE_LOCK(self);
1921 PyBuffer_Release(&pdata);
1922 return ret;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001923
1924error:
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001925 RELEASE_LOCK(self);
1926 PyBuffer_Release(&pdata);
1927 Py_XDECREF(ret);
1928 return NULL;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001929}
1930
1931static PyMethodDef BZ2Decomp_methods[] = {
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001932 {"decompress", (PyCFunction)BZ2Decomp_decompress, METH_VARARGS, BZ2Decomp_decompress__doc__},
1933 {NULL, NULL} /* sentinel */
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001934};
1935
1936
1937/* ===================================================================== */
1938/* Slot definitions for BZ2Decomp_Type. */
1939
1940static int
1941BZ2Decomp_init(BZ2DecompObject *self, PyObject *args, PyObject *kwargs)
1942{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001943 int bzerror;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001944
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001945 if (!PyArg_ParseTuple(args, ":BZ2Decompressor"))
1946 return -1;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001947
1948#ifdef WITH_THREAD
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001949 self->lock = PyThread_allocate_lock();
1950 if (!self->lock) {
1951 PyErr_SetString(PyExc_MemoryError, "unable to allocate lock");
1952 goto error;
1953 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001954#endif
1955
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001956 self->unused_data = PyString_FromString("");
1957 if (!self->unused_data)
1958 goto error;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001959
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001960 memset(&self->bzs, 0, sizeof(bz_stream));
1961 bzerror = BZ2_bzDecompressInit(&self->bzs, 0, 0);
1962 if (bzerror != BZ_OK) {
1963 Util_CatchBZ2Error(bzerror);
1964 goto error;
1965 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001966
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001967 self->running = 1;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001968
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001969 return 0;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001970
1971error:
1972#ifdef WITH_THREAD
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001973 if (self->lock) {
1974 PyThread_free_lock(self->lock);
1975 self->lock = NULL;
1976 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001977#endif
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001978 Py_CLEAR(self->unused_data);
1979 return -1;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001980}
1981
1982static void
1983BZ2Decomp_dealloc(BZ2DecompObject *self)
1984{
1985#ifdef WITH_THREAD
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001986 if (self->lock)
1987 PyThread_free_lock(self->lock);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001988#endif
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00001989 Py_XDECREF(self->unused_data);
1990 BZ2_bzDecompressEnd(&self->bzs);
1991 Py_TYPE(self)->tp_free((PyObject *)self);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001992}
1993
1994
1995/* ===================================================================== */
1996/* BZ2Decomp_Type definition. */
1997
1998PyDoc_STRVAR(BZ2Decomp__doc__,
1999"BZ2Decompressor() -> decompressor object\n\
2000\n\
2001Create a new decompressor object. This object may be used to decompress\n\
2002data sequentially. If you want to decompress data in one shot, use the\n\
2003decompress() function instead.\n\
2004");
2005
Gustavo Niemeyer49ea7be2002-11-08 14:31:49 +00002006static PyTypeObject BZ2Decomp_Type = {
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002007 PyVarObject_HEAD_INIT(NULL, 0)
2008 "bz2.BZ2Decompressor", /*tp_name*/
2009 sizeof(BZ2DecompObject), /*tp_basicsize*/
2010 0, /*tp_itemsize*/
2011 (destructor)BZ2Decomp_dealloc, /*tp_dealloc*/
2012 0, /*tp_print*/
2013 0, /*tp_getattr*/
2014 0, /*tp_setattr*/
2015 0, /*tp_compare*/
2016 0, /*tp_repr*/
2017 0, /*tp_as_number*/
2018 0, /*tp_as_sequence*/
2019 0, /*tp_as_mapping*/
2020 0, /*tp_hash*/
2021 0, /*tp_call*/
2022 0, /*tp_str*/
2023 PyObject_GenericGetAttr,/*tp_getattro*/
2024 PyObject_GenericSetAttr,/*tp_setattro*/
2025 0, /*tp_as_buffer*/
2026 Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE, /*tp_flags*/
2027 BZ2Decomp__doc__, /*tp_doc*/
2028 0, /*tp_traverse*/
2029 0, /*tp_clear*/
2030 0, /*tp_richcompare*/
2031 0, /*tp_weaklistoffset*/
2032 0, /*tp_iter*/
2033 0, /*tp_iternext*/
2034 BZ2Decomp_methods, /*tp_methods*/
2035 BZ2Decomp_members, /*tp_members*/
2036 0, /*tp_getset*/
2037 0, /*tp_base*/
2038 0, /*tp_dict*/
2039 0, /*tp_descr_get*/
2040 0, /*tp_descr_set*/
2041 0, /*tp_dictoffset*/
2042 (initproc)BZ2Decomp_init, /*tp_init*/
2043 PyType_GenericAlloc, /*tp_alloc*/
2044 PyType_GenericNew, /*tp_new*/
2045 _PyObject_Del, /*tp_free*/
2046 0, /*tp_is_gc*/
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002047};
2048
2049
2050/* ===================================================================== */
2051/* Module functions. */
2052
2053PyDoc_STRVAR(bz2_compress__doc__,
2054"compress(data [, compresslevel=9]) -> string\n\
2055\n\
2056Compress data in one shot. If you want to compress data sequentially,\n\
2057use an instance of BZ2Compressor instead. The compresslevel parameter, if\n\
2058given, must be a number between 1 and 9.\n\
2059");
2060
2061static PyObject *
2062bz2_compress(PyObject *self, PyObject *args, PyObject *kwargs)
2063{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002064 int compresslevel=9;
2065 Py_buffer pdata;
2066 char *data;
2067 int datasize;
2068 int bufsize;
2069 PyObject *ret = NULL;
2070 bz_stream _bzs;
2071 bz_stream *bzs = &_bzs;
2072 int bzerror;
2073 static char *kwlist[] = {"data", "compresslevel", 0};
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002074
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002075 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s*|i",
2076 kwlist, &pdata,
2077 &compresslevel))
2078 return NULL;
2079 data = pdata.buf;
2080 datasize = pdata.len;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002081
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002082 if (compresslevel < 1 || compresslevel > 9) {
2083 PyErr_SetString(PyExc_ValueError,
2084 "compresslevel must be between 1 and 9");
2085 PyBuffer_Release(&pdata);
2086 return NULL;
2087 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002088
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002089 /* Conforming to bz2 manual, this is large enough to fit compressed
2090 * data in one shot. We will check it later anyway. */
2091 bufsize = datasize + (datasize/100+1) + 600;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002092
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002093 ret = PyString_FromStringAndSize(NULL, bufsize);
2094 if (!ret) {
2095 PyBuffer_Release(&pdata);
2096 return NULL;
2097 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002098
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002099 memset(bzs, 0, sizeof(bz_stream));
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002100
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002101 bzs->next_in = data;
2102 bzs->avail_in = datasize;
2103 bzs->next_out = BUF(ret);
2104 bzs->avail_out = bufsize;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002105
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002106 bzerror = BZ2_bzCompressInit(bzs, compresslevel, 0, 0);
2107 if (bzerror != BZ_OK) {
2108 Util_CatchBZ2Error(bzerror);
2109 PyBuffer_Release(&pdata);
2110 Py_DECREF(ret);
2111 return NULL;
2112 }
Tim Peterse3228092002-11-09 04:21:44 +00002113
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002114 for (;;) {
2115 Py_BEGIN_ALLOW_THREADS
2116 bzerror = BZ2_bzCompress(bzs, BZ_FINISH);
2117 Py_END_ALLOW_THREADS
2118 if (bzerror == BZ_STREAM_END) {
2119 break;
2120 } else if (bzerror != BZ_FINISH_OK) {
2121 BZ2_bzCompressEnd(bzs);
2122 Util_CatchBZ2Error(bzerror);
2123 PyBuffer_Release(&pdata);
2124 Py_DECREF(ret);
2125 return NULL;
2126 }
2127 if (bzs->avail_out == 0) {
2128 bufsize = Util_NewBufferSize(bufsize);
2129 if (_PyString_Resize(&ret, bufsize) < 0) {
2130 BZ2_bzCompressEnd(bzs);
2131 PyBuffer_Release(&pdata);
2132 Py_DECREF(ret);
2133 return NULL;
2134 }
2135 bzs->next_out = BUF(ret) + BZS_TOTAL_OUT(bzs);
2136 bzs->avail_out = bufsize - (bzs->next_out - BUF(ret));
2137 }
2138 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002139
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002140 if (bzs->avail_out != 0)
2141 _PyString_Resize(&ret, (Py_ssize_t)BZS_TOTAL_OUT(bzs));
2142 BZ2_bzCompressEnd(bzs);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002143
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002144 PyBuffer_Release(&pdata);
2145 return ret;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002146}
2147
2148PyDoc_STRVAR(bz2_decompress__doc__,
2149"decompress(data) -> decompressed data\n\
2150\n\
2151Decompress data in one shot. If you want to decompress data sequentially,\n\
2152use an instance of BZ2Decompressor instead.\n\
2153");
2154
2155static PyObject *
2156bz2_decompress(PyObject *self, PyObject *args)
2157{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002158 Py_buffer pdata;
2159 char *data;
2160 int datasize;
2161 int bufsize = SMALLCHUNK;
2162 PyObject *ret;
2163 bz_stream _bzs;
2164 bz_stream *bzs = &_bzs;
2165 int bzerror;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002166
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002167 if (!PyArg_ParseTuple(args, "s*:decompress", &pdata))
2168 return NULL;
2169 data = pdata.buf;
2170 datasize = pdata.len;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002171
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002172 if (datasize == 0) {
2173 PyBuffer_Release(&pdata);
2174 return PyString_FromString("");
2175 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002176
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002177 ret = PyString_FromStringAndSize(NULL, bufsize);
2178 if (!ret) {
2179 PyBuffer_Release(&pdata);
2180 return NULL;
2181 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002182
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002183 memset(bzs, 0, sizeof(bz_stream));
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002184
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002185 bzs->next_in = data;
2186 bzs->avail_in = datasize;
2187 bzs->next_out = BUF(ret);
2188 bzs->avail_out = bufsize;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002189
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002190 bzerror = BZ2_bzDecompressInit(bzs, 0, 0);
2191 if (bzerror != BZ_OK) {
2192 Util_CatchBZ2Error(bzerror);
2193 Py_DECREF(ret);
2194 PyBuffer_Release(&pdata);
2195 return NULL;
2196 }
Tim Peterse3228092002-11-09 04:21:44 +00002197
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002198 for (;;) {
2199 Py_BEGIN_ALLOW_THREADS
2200 bzerror = BZ2_bzDecompress(bzs);
2201 Py_END_ALLOW_THREADS
2202 if (bzerror == BZ_STREAM_END) {
2203 break;
2204 } else if (bzerror != BZ_OK) {
2205 BZ2_bzDecompressEnd(bzs);
2206 Util_CatchBZ2Error(bzerror);
2207 PyBuffer_Release(&pdata);
2208 Py_DECREF(ret);
2209 return NULL;
2210 }
2211 if (bzs->avail_in == 0) {
2212 BZ2_bzDecompressEnd(bzs);
2213 PyErr_SetString(PyExc_ValueError,
2214 "couldn't find end of stream");
2215 PyBuffer_Release(&pdata);
2216 Py_DECREF(ret);
2217 return NULL;
2218 }
2219 if (bzs->avail_out == 0) {
2220 bufsize = Util_NewBufferSize(bufsize);
2221 if (_PyString_Resize(&ret, bufsize) < 0) {
2222 BZ2_bzDecompressEnd(bzs);
2223 PyBuffer_Release(&pdata);
2224 Py_DECREF(ret);
2225 return NULL;
2226 }
2227 bzs->next_out = BUF(ret) + BZS_TOTAL_OUT(bzs);
2228 bzs->avail_out = bufsize - (bzs->next_out - BUF(ret));
2229 }
2230 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002231
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002232 if (bzs->avail_out != 0)
2233 _PyString_Resize(&ret, (Py_ssize_t)BZS_TOTAL_OUT(bzs));
2234 BZ2_bzDecompressEnd(bzs);
2235 PyBuffer_Release(&pdata);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002236
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002237 return ret;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002238}
2239
2240static PyMethodDef bz2_methods[] = {
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002241 {"compress", (PyCFunction) bz2_compress, METH_VARARGS|METH_KEYWORDS,
2242 bz2_compress__doc__},
2243 {"decompress", (PyCFunction) bz2_decompress, METH_VARARGS,
2244 bz2_decompress__doc__},
2245 {NULL, NULL} /* sentinel */
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002246};
2247
2248/* ===================================================================== */
2249/* Initialization function. */
2250
2251PyDoc_STRVAR(bz2__doc__,
2252"The python bz2 module provides a comprehensive interface for\n\
2253the bz2 compression library. It implements a complete file\n\
2254interface, one shot (de)compression functions, and types for\n\
2255sequential (de)compression.\n\
2256");
2257
Neal Norwitz21d896c2003-07-01 20:15:21 +00002258PyMODINIT_FUNC
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002259initbz2(void)
2260{
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002261 PyObject *m;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002262
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002263 Py_TYPE(&BZ2File_Type) = &PyType_Type;
2264 Py_TYPE(&BZ2Comp_Type) = &PyType_Type;
2265 Py_TYPE(&BZ2Decomp_Type) = &PyType_Type;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002266
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002267 m = Py_InitModule3("bz2", bz2_methods, bz2__doc__);
2268 if (m == NULL)
2269 return;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002270
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002271 PyModule_AddObject(m, "__author__", PyString_FromString(__author__));
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002272
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002273 Py_INCREF(&BZ2File_Type);
2274 PyModule_AddObject(m, "BZ2File", (PyObject *)&BZ2File_Type);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002275
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002276 Py_INCREF(&BZ2Comp_Type);
2277 PyModule_AddObject(m, "BZ2Compressor", (PyObject *)&BZ2Comp_Type);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002278
Antoine Pitrouc7c96a92010-05-09 15:15:40 +00002279 Py_INCREF(&BZ2Decomp_Type);
2280 PyModule_AddObject(m, "BZ2Decompressor", (PyObject *)&BZ2Decomp_Type);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002281}