blob: 6dd1dbbc4c79d0ab5fd9fb62cbb806b27f1930b7 [file] [log] [blame]
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001/*
2
3python-bz2 - python bz2 library interface
4
5Copyright (c) 2002 Gustavo Niemeyer <niemeyer@conectiva.com>
6Copyright (c) 2002 Python Software Foundation; All Rights Reserved
7
8*/
9
Martin v. Löwise17af7b2002-11-23 09:16:19 +000010#include "Python.h"
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +000011#include <stdio.h>
12#include <bzlib.h>
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +000013#include "structmember.h"
14
15#ifdef WITH_THREAD
16#include "pythread.h"
17#endif
18
19static char __author__[] =
20"The bz2 python module was written by:\n\
21\n\
22 Gustavo Niemeyer <niemeyer@conectiva.com>\n\
23";
24
Georg Brandl33a5f2a2005-08-21 14:16:04 +000025/* Our very own off_t-like type, 64-bit if possible */
26/* copied from Objects/fileobject.c */
27#if !defined(HAVE_LARGEFILE_SUPPORT)
28typedef off_t Py_off_t;
29#elif SIZEOF_OFF_T >= 8
30typedef off_t Py_off_t;
31#elif SIZEOF_FPOS_T >= 8
32typedef fpos_t Py_off_t;
33#else
34#error "Large file support, but neither off_t nor fpos_t is large enough."
35#endif
36
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +000037#define BUF(v) PyString_AS_STRING((PyStringObject *)v)
38
39#define MODE_CLOSED 0
40#define MODE_READ 1
41#define MODE_READ_EOF 2
42#define MODE_WRITE 3
43
44#define BZ2FileObject_Check(v) ((v)->ob_type == &BZ2File_Type)
45
Gustavo Niemeyer7628f1f2003-04-27 06:25:24 +000046
47#ifdef BZ_CONFIG_ERROR
48
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +000049#if SIZEOF_LONG >= 8
50#define BZS_TOTAL_OUT(bzs) \
51 (((long)bzs->total_out_hi32 << 32) + bzs->total_out_lo32)
52#elif SIZEOF_LONG_LONG >= 8
53#define BZS_TOTAL_OUT(bzs) \
Martin v. Löwisb9a0f912003-03-29 10:06:18 +000054 (((PY_LONG_LONG)bzs->total_out_hi32 << 32) + bzs->total_out_lo32)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +000055#else
56#define BZS_TOTAL_OUT(bzs) \
Neal Norwitz20bad742006-01-17 05:27:39 +000057 bzs->total_out_lo32
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +000058#endif
59
Gustavo Niemeyer7628f1f2003-04-27 06:25:24 +000060#else /* ! BZ_CONFIG_ERROR */
61
62#define BZ2_bzRead bzRead
63#define BZ2_bzReadOpen bzReadOpen
64#define BZ2_bzReadClose bzReadClose
65#define BZ2_bzWrite bzWrite
66#define BZ2_bzWriteOpen bzWriteOpen
67#define BZ2_bzWriteClose bzWriteClose
68#define BZ2_bzCompress bzCompress
69#define BZ2_bzCompressInit bzCompressInit
70#define BZ2_bzCompressEnd bzCompressEnd
71#define BZ2_bzDecompress bzDecompress
72#define BZ2_bzDecompressInit bzDecompressInit
73#define BZ2_bzDecompressEnd bzDecompressEnd
74
75#define BZS_TOTAL_OUT(bzs) bzs->total_out
76
77#endif /* ! BZ_CONFIG_ERROR */
78
79
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +000080#ifdef WITH_THREAD
81#define ACQUIRE_LOCK(obj) PyThread_acquire_lock(obj->lock, 1)
82#define RELEASE_LOCK(obj) PyThread_release_lock(obj->lock)
83#else
84#define ACQUIRE_LOCK(obj)
85#define RELEASE_LOCK(obj)
86#endif
87
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +000088/* Bits in f_newlinetypes */
89#define NEWLINE_UNKNOWN 0 /* No newline seen, yet */
90#define NEWLINE_CR 1 /* \r newline seen */
91#define NEWLINE_LF 2 /* \n newline seen */
92#define NEWLINE_CRLF 4 /* \r\n newline seen */
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +000093
94/* ===================================================================== */
95/* Structure definitions. */
96
97typedef struct {
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +000098 PyObject_HEAD
99 PyObject *file;
100
101 char* f_buf; /* Allocated readahead buffer */
102 char* f_bufend; /* Points after last occupied position */
103 char* f_bufptr; /* Current buffer position */
104
105 int f_softspace; /* Flag used by 'print' command */
106
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000107 int f_univ_newline; /* Handle any newline convention */
108 int f_newlinetypes; /* Types of newlines seen */
109 int f_skipnextlf; /* Skip next \n */
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000110
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000111 BZFILE *fp;
112 int mode;
Georg Brandla8bcecc2005-09-03 07:49:53 +0000113 Py_off_t pos;
114 Py_off_t size;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000115#ifdef WITH_THREAD
116 PyThread_type_lock lock;
117#endif
118} BZ2FileObject;
119
120typedef struct {
121 PyObject_HEAD
122 bz_stream bzs;
123 int running;
124#ifdef WITH_THREAD
125 PyThread_type_lock lock;
126#endif
127} BZ2CompObject;
128
129typedef struct {
130 PyObject_HEAD
131 bz_stream bzs;
132 int running;
133 PyObject *unused_data;
134#ifdef WITH_THREAD
135 PyThread_type_lock lock;
136#endif
137} BZ2DecompObject;
138
139/* ===================================================================== */
140/* Utility functions. */
141
142static int
143Util_CatchBZ2Error(int bzerror)
144{
145 int ret = 0;
146 switch(bzerror) {
147 case BZ_OK:
148 case BZ_STREAM_END:
149 break;
150
Gustavo Niemeyer7628f1f2003-04-27 06:25:24 +0000151#ifdef BZ_CONFIG_ERROR
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000152 case BZ_CONFIG_ERROR:
153 PyErr_SetString(PyExc_SystemError,
154 "the bz2 library was not compiled "
155 "correctly");
156 ret = 1;
157 break;
Gustavo Niemeyer7628f1f2003-04-27 06:25:24 +0000158#endif
Tim Peterse3228092002-11-09 04:21:44 +0000159
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000160 case BZ_PARAM_ERROR:
161 PyErr_SetString(PyExc_ValueError,
162 "the bz2 library has received wrong "
163 "parameters");
164 ret = 1;
165 break;
Tim Peterse3228092002-11-09 04:21:44 +0000166
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000167 case BZ_MEM_ERROR:
168 PyErr_NoMemory();
169 ret = 1;
170 break;
171
172 case BZ_DATA_ERROR:
173 case BZ_DATA_ERROR_MAGIC:
174 PyErr_SetString(PyExc_IOError, "invalid data stream");
175 ret = 1;
176 break;
177
178 case BZ_IO_ERROR:
179 PyErr_SetString(PyExc_IOError, "unknown IO error");
180 ret = 1;
181 break;
182
183 case BZ_UNEXPECTED_EOF:
184 PyErr_SetString(PyExc_EOFError,
185 "compressed file ended before the "
186 "logical end-of-stream was detected");
187 ret = 1;
188 break;
189
190 case BZ_SEQUENCE_ERROR:
191 PyErr_SetString(PyExc_RuntimeError,
192 "wrong sequence of bz2 library "
193 "commands used");
194 ret = 1;
195 break;
196 }
197 return ret;
198}
199
200#if BUFSIZ < 8192
201#define SMALLCHUNK 8192
202#else
203#define SMALLCHUNK BUFSIZ
204#endif
205
206#if SIZEOF_INT < 4
207#define BIGCHUNK (512 * 32)
208#else
209#define BIGCHUNK (512 * 1024)
210#endif
211
212/* This is a hacked version of Python's fileobject.c:new_buffersize(). */
213static size_t
214Util_NewBufferSize(size_t currentsize)
215{
216 if (currentsize > SMALLCHUNK) {
217 /* Keep doubling until we reach BIGCHUNK;
218 then keep adding BIGCHUNK. */
219 if (currentsize <= BIGCHUNK)
220 return currentsize + currentsize;
221 else
222 return currentsize + BIGCHUNK;
223 }
224 return currentsize + SMALLCHUNK;
225}
226
227/* This is a hacked version of Python's fileobject.c:get_line(). */
228static PyObject *
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000229Util_GetLine(BZ2FileObject *f, int n)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000230{
231 char c;
232 char *buf, *end;
233 size_t total_v_size; /* total # of slots in buffer */
234 size_t used_v_size; /* # used slots in buffer */
235 size_t increment; /* amount to increment the buffer */
236 PyObject *v;
237 int bzerror;
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000238 int newlinetypes = f->f_newlinetypes;
239 int skipnextlf = f->f_skipnextlf;
240 int univ_newline = f->f_univ_newline;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000241
242 total_v_size = n > 0 ? n : 100;
243 v = PyString_FromStringAndSize((char *)NULL, total_v_size);
244 if (v == NULL)
245 return NULL;
246
247 buf = BUF(v);
248 end = buf + total_v_size;
249
250 for (;;) {
251 Py_BEGIN_ALLOW_THREADS
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000252 if (univ_newline) {
253 while (1) {
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000254 BZ2_bzRead(&bzerror, f->fp, &c, 1);
255 f->pos++;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000256 if (bzerror != BZ_OK || buf == end)
257 break;
Gustavo Niemeyer49ea7be2002-11-08 14:31:49 +0000258 if (skipnextlf) {
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000259 skipnextlf = 0;
260 if (c == '\n') {
Tim Peterse3228092002-11-09 04:21:44 +0000261 /* Seeing a \n here with
262 * skipnextlf true means we
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000263 * saw a \r before.
264 */
265 newlinetypes |= NEWLINE_CRLF;
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000266 BZ2_bzRead(&bzerror, f->fp,
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000267 &c, 1);
268 if (bzerror != BZ_OK)
269 break;
270 } else {
271 newlinetypes |= NEWLINE_CR;
272 }
273 }
274 if (c == '\r') {
275 skipnextlf = 1;
276 c = '\n';
277 } else if ( c == '\n')
278 newlinetypes |= NEWLINE_LF;
279 *buf++ = c;
280 if (c == '\n') break;
281 }
282 if (bzerror == BZ_STREAM_END && skipnextlf)
283 newlinetypes |= NEWLINE_CR;
284 } else /* If not universal newlines use the normal loop */
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000285 do {
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000286 BZ2_bzRead(&bzerror, f->fp, &c, 1);
287 f->pos++;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000288 *buf++ = c;
289 } while (bzerror == BZ_OK && c != '\n' && buf != end);
290 Py_END_ALLOW_THREADS
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000291 f->f_newlinetypes = newlinetypes;
292 f->f_skipnextlf = skipnextlf;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000293 if (bzerror == BZ_STREAM_END) {
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000294 f->size = f->pos;
295 f->mode = MODE_READ_EOF;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000296 break;
297 } else if (bzerror != BZ_OK) {
298 Util_CatchBZ2Error(bzerror);
299 Py_DECREF(v);
300 return NULL;
301 }
302 if (c == '\n')
303 break;
304 /* Must be because buf == end */
305 if (n > 0)
306 break;
307 used_v_size = total_v_size;
308 increment = total_v_size >> 2; /* mild exponential growth */
309 total_v_size += increment;
310 if (total_v_size > INT_MAX) {
311 PyErr_SetString(PyExc_OverflowError,
312 "line is longer than a Python string can hold");
313 Py_DECREF(v);
314 return NULL;
315 }
316 if (_PyString_Resize(&v, total_v_size) < 0)
317 return NULL;
318 buf = BUF(v) + used_v_size;
319 end = BUF(v) + total_v_size;
320 }
321
322 used_v_size = buf - BUF(v);
323 if (used_v_size != total_v_size)
324 _PyString_Resize(&v, used_v_size);
325 return v;
326}
327
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000328/* This is a hacked version of Python's
329 * fileobject.c:Py_UniversalNewlineFread(). */
330size_t
331Util_UnivNewlineRead(int *bzerror, BZFILE *stream,
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000332 char* buf, size_t n, BZ2FileObject *f)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000333{
334 char *dst = buf;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000335 int newlinetypes, skipnextlf;
336
337 assert(buf != NULL);
338 assert(stream != NULL);
339
340 if (!f->f_univ_newline)
341 return BZ2_bzRead(bzerror, stream, buf, n);
342
343 newlinetypes = f->f_newlinetypes;
344 skipnextlf = f->f_skipnextlf;
345
346 /* Invariant: n is the number of bytes remaining to be filled
347 * in the buffer.
348 */
349 while (n) {
350 size_t nread;
351 int shortread;
352 char *src = dst;
353
354 nread = BZ2_bzRead(bzerror, stream, dst, n);
355 assert(nread <= n);
356 n -= nread; /* assuming 1 byte out for each in; will adjust */
357 shortread = n != 0; /* true iff EOF or error */
358 while (nread--) {
359 char c = *src++;
360 if (c == '\r') {
361 /* Save as LF and set flag to skip next LF. */
362 *dst++ = '\n';
363 skipnextlf = 1;
364 }
365 else if (skipnextlf && c == '\n') {
366 /* Skip LF, and remember we saw CR LF. */
367 skipnextlf = 0;
368 newlinetypes |= NEWLINE_CRLF;
369 ++n;
370 }
371 else {
372 /* Normal char to be stored in buffer. Also
373 * update the newlinetypes flag if either this
374 * is an LF or the previous char was a CR.
375 */
376 if (c == '\n')
377 newlinetypes |= NEWLINE_LF;
378 else if (skipnextlf)
379 newlinetypes |= NEWLINE_CR;
380 *dst++ = c;
381 skipnextlf = 0;
382 }
383 }
384 if (shortread) {
385 /* If this is EOF, update type flags. */
386 if (skipnextlf && *bzerror == BZ_STREAM_END)
387 newlinetypes |= NEWLINE_CR;
388 break;
389 }
390 }
391 f->f_newlinetypes = newlinetypes;
392 f->f_skipnextlf = skipnextlf;
393 return dst - buf;
394}
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000395
396/* This is a hacked version of Python's fileobject.c:drop_readahead(). */
397static void
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000398Util_DropReadAhead(BZ2FileObject *f)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000399{
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000400 if (f->f_buf != NULL) {
401 PyMem_Free(f->f_buf);
402 f->f_buf = NULL;
403 }
404}
405
406/* This is a hacked version of Python's fileobject.c:readahead(). */
407static int
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000408Util_ReadAhead(BZ2FileObject *f, int bufsize)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000409{
410 int chunksize;
411 int bzerror;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000412
413 if (f->f_buf != NULL) {
Tim Peterse3228092002-11-09 04:21:44 +0000414 if((f->f_bufend - f->f_bufptr) >= 1)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000415 return 0;
416 else
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000417 Util_DropReadAhead(f);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000418 }
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000419 if (f->mode == MODE_READ_EOF) {
Georg Brandl33a5f2a2005-08-21 14:16:04 +0000420 f->f_bufptr = f->f_buf;
421 f->f_bufend = f->f_buf;
422 return 0;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000423 }
424 if ((f->f_buf = PyMem_Malloc(bufsize)) == NULL) {
425 return -1;
426 }
427 Py_BEGIN_ALLOW_THREADS
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000428 chunksize = Util_UnivNewlineRead(&bzerror, f->fp, f->f_buf,
429 bufsize, f);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000430 Py_END_ALLOW_THREADS
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000431 f->pos += chunksize;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000432 if (bzerror == BZ_STREAM_END) {
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000433 f->size = f->pos;
434 f->mode = MODE_READ_EOF;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000435 } else if (bzerror != BZ_OK) {
436 Util_CatchBZ2Error(bzerror);
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000437 Util_DropReadAhead(f);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000438 return -1;
439 }
440 f->f_bufptr = f->f_buf;
441 f->f_bufend = f->f_buf + chunksize;
442 return 0;
443}
444
445/* This is a hacked version of Python's
446 * fileobject.c:readahead_get_line_skip(). */
447static PyStringObject *
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000448Util_ReadAheadGetLineSkip(BZ2FileObject *f, int skip, int bufsize)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000449{
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000450 PyStringObject* s;
451 char *bufptr;
452 char *buf;
453 int len;
454
455 if (f->f_buf == NULL)
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000456 if (Util_ReadAhead(f, bufsize) < 0)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000457 return NULL;
458
459 len = f->f_bufend - f->f_bufptr;
Tim Peterse3228092002-11-09 04:21:44 +0000460 if (len == 0)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000461 return (PyStringObject *)
462 PyString_FromStringAndSize(NULL, skip);
463 bufptr = memchr(f->f_bufptr, '\n', len);
464 if (bufptr != NULL) {
465 bufptr++; /* Count the '\n' */
466 len = bufptr - f->f_bufptr;
467 s = (PyStringObject *)
468 PyString_FromStringAndSize(NULL, skip+len);
Tim Peterse3228092002-11-09 04:21:44 +0000469 if (s == NULL)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000470 return NULL;
471 memcpy(PyString_AS_STRING(s)+skip, f->f_bufptr, len);
472 f->f_bufptr = bufptr;
473 if (bufptr == f->f_bufend)
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000474 Util_DropReadAhead(f);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000475 } else {
476 bufptr = f->f_bufptr;
477 buf = f->f_buf;
478 f->f_buf = NULL; /* Force new readahead buffer */
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000479 s = Util_ReadAheadGetLineSkip(f, skip+len,
480 bufsize + (bufsize>>2));
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000481 if (s == NULL) {
482 PyMem_Free(buf);
483 return NULL;
484 }
485 memcpy(PyString_AS_STRING(s)+skip, bufptr, len);
486 PyMem_Free(buf);
487 }
488 return s;
489}
490
491/* ===================================================================== */
492/* Methods of BZ2File. */
493
494PyDoc_STRVAR(BZ2File_read__doc__,
495"read([size]) -> string\n\
496\n\
497Read at most size uncompressed bytes, returned as a string. If the size\n\
498argument is negative or omitted, read until EOF is reached.\n\
499");
500
501/* This is a hacked version of Python's fileobject.c:file_read(). */
502static PyObject *
503BZ2File_read(BZ2FileObject *self, PyObject *args)
504{
505 long bytesrequested = -1;
506 size_t bytesread, buffersize, chunksize;
507 int bzerror;
508 PyObject *ret = NULL;
Tim Peterse3228092002-11-09 04:21:44 +0000509
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000510 if (!PyArg_ParseTuple(args, "|l:read", &bytesrequested))
511 return NULL;
Tim Peterse3228092002-11-09 04:21:44 +0000512
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000513 ACQUIRE_LOCK(self);
514 switch (self->mode) {
515 case MODE_READ:
516 break;
517 case MODE_READ_EOF:
518 ret = PyString_FromString("");
519 goto cleanup;
520 case MODE_CLOSED:
521 PyErr_SetString(PyExc_ValueError,
522 "I/O operation on closed file");
523 goto cleanup;
524 default:
525 PyErr_SetString(PyExc_IOError,
526 "file is not ready for reading");
527 goto cleanup;
528 }
529
530 if (bytesrequested < 0)
531 buffersize = Util_NewBufferSize((size_t)0);
532 else
533 buffersize = bytesrequested;
534 if (buffersize > INT_MAX) {
535 PyErr_SetString(PyExc_OverflowError,
Gustavo Niemeyer49ea7be2002-11-08 14:31:49 +0000536 "requested number of bytes is "
537 "more than a Python string can hold");
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000538 goto cleanup;
539 }
540 ret = PyString_FromStringAndSize((char *)NULL, buffersize);
541 if (ret == NULL)
542 goto cleanup;
543 bytesread = 0;
544
545 for (;;) {
546 Py_BEGIN_ALLOW_THREADS
547 chunksize = Util_UnivNewlineRead(&bzerror, self->fp,
548 BUF(ret)+bytesread,
549 buffersize-bytesread,
550 self);
551 self->pos += chunksize;
552 Py_END_ALLOW_THREADS
553 bytesread += chunksize;
554 if (bzerror == BZ_STREAM_END) {
555 self->size = self->pos;
556 self->mode = MODE_READ_EOF;
557 break;
558 } else if (bzerror != BZ_OK) {
559 Util_CatchBZ2Error(bzerror);
560 Py_DECREF(ret);
561 ret = NULL;
562 goto cleanup;
563 }
564 if (bytesrequested < 0) {
565 buffersize = Util_NewBufferSize(buffersize);
566 if (_PyString_Resize(&ret, buffersize) < 0)
567 goto cleanup;
568 } else {
569 break;
570 }
571 }
572 if (bytesread != buffersize)
573 _PyString_Resize(&ret, bytesread);
574
575cleanup:
576 RELEASE_LOCK(self);
577 return ret;
578}
579
580PyDoc_STRVAR(BZ2File_readline__doc__,
581"readline([size]) -> string\n\
582\n\
583Return the next line from the file, as a string, retaining newline.\n\
584A non-negative size argument will limit the maximum number of bytes to\n\
585return (an incomplete line may be returned then). Return an empty\n\
586string at EOF.\n\
587");
588
589static PyObject *
590BZ2File_readline(BZ2FileObject *self, PyObject *args)
591{
592 PyObject *ret = NULL;
593 int sizehint = -1;
594
595 if (!PyArg_ParseTuple(args, "|i:readline", &sizehint))
596 return NULL;
597
598 ACQUIRE_LOCK(self);
599 switch (self->mode) {
600 case MODE_READ:
601 break;
602 case MODE_READ_EOF:
603 ret = PyString_FromString("");
604 goto cleanup;
605 case MODE_CLOSED:
606 PyErr_SetString(PyExc_ValueError,
607 "I/O operation on closed file");
608 goto cleanup;
609 default:
610 PyErr_SetString(PyExc_IOError,
611 "file is not ready for reading");
612 goto cleanup;
613 }
614
615 if (sizehint == 0)
616 ret = PyString_FromString("");
617 else
618 ret = Util_GetLine(self, (sizehint < 0) ? 0 : sizehint);
619
620cleanup:
621 RELEASE_LOCK(self);
622 return ret;
623}
624
625PyDoc_STRVAR(BZ2File_readlines__doc__,
626"readlines([size]) -> list\n\
627\n\
628Call readline() repeatedly and return a list of lines read.\n\
629The optional size argument, if given, is an approximate bound on the\n\
630total number of bytes in the lines returned.\n\
631");
632
633/* This is a hacked version of Python's fileobject.c:file_readlines(). */
634static PyObject *
635BZ2File_readlines(BZ2FileObject *self, PyObject *args)
636{
637 long sizehint = 0;
638 PyObject *list = NULL;
639 PyObject *line;
640 char small_buffer[SMALLCHUNK];
641 char *buffer = small_buffer;
642 size_t buffersize = SMALLCHUNK;
643 PyObject *big_buffer = NULL;
644 size_t nfilled = 0;
645 size_t nread;
646 size_t totalread = 0;
647 char *p, *q, *end;
648 int err;
649 int shortread = 0;
650 int bzerror;
651
652 if (!PyArg_ParseTuple(args, "|l:readlines", &sizehint))
653 return NULL;
654
655 ACQUIRE_LOCK(self);
656 switch (self->mode) {
657 case MODE_READ:
658 break;
659 case MODE_READ_EOF:
660 list = PyList_New(0);
661 goto cleanup;
662 case MODE_CLOSED:
663 PyErr_SetString(PyExc_ValueError,
664 "I/O operation on closed file");
665 goto cleanup;
666 default:
667 PyErr_SetString(PyExc_IOError,
668 "file is not ready for reading");
669 goto cleanup;
670 }
671
672 if ((list = PyList_New(0)) == NULL)
673 goto cleanup;
674
675 for (;;) {
676 Py_BEGIN_ALLOW_THREADS
677 nread = Util_UnivNewlineRead(&bzerror, self->fp,
678 buffer+nfilled,
679 buffersize-nfilled, self);
680 self->pos += nread;
681 Py_END_ALLOW_THREADS
682 if (bzerror == BZ_STREAM_END) {
683 self->size = self->pos;
684 self->mode = MODE_READ_EOF;
685 if (nread == 0) {
686 sizehint = 0;
687 break;
688 }
689 shortread = 1;
690 } else if (bzerror != BZ_OK) {
691 Util_CatchBZ2Error(bzerror);
692 error:
693 Py_DECREF(list);
694 list = NULL;
695 goto cleanup;
696 }
697 totalread += nread;
698 p = memchr(buffer+nfilled, '\n', nread);
Georg Brandl33a5f2a2005-08-21 14:16:04 +0000699 if (!shortread && p == NULL) {
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000700 /* Need a larger buffer to fit this line */
701 nfilled += nread;
702 buffersize *= 2;
703 if (buffersize > INT_MAX) {
704 PyErr_SetString(PyExc_OverflowError,
Georg Brandl33a5f2a2005-08-21 14:16:04 +0000705 "line is longer than a Python string can hold");
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000706 goto error;
707 }
708 if (big_buffer == NULL) {
709 /* Create the big buffer */
710 big_buffer = PyString_FromStringAndSize(
711 NULL, buffersize);
712 if (big_buffer == NULL)
713 goto error;
714 buffer = PyString_AS_STRING(big_buffer);
715 memcpy(buffer, small_buffer, nfilled);
716 }
717 else {
718 /* Grow the big buffer */
719 _PyString_Resize(&big_buffer, buffersize);
720 buffer = PyString_AS_STRING(big_buffer);
721 }
Georg Brandl33a5f2a2005-08-21 14:16:04 +0000722 continue;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000723 }
724 end = buffer+nfilled+nread;
725 q = buffer;
Georg Brandl33a5f2a2005-08-21 14:16:04 +0000726 while (p != NULL) {
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000727 /* Process complete lines */
728 p++;
729 line = PyString_FromStringAndSize(q, p-q);
730 if (line == NULL)
731 goto error;
732 err = PyList_Append(list, line);
733 Py_DECREF(line);
734 if (err != 0)
735 goto error;
736 q = p;
737 p = memchr(q, '\n', end-q);
Georg Brandl33a5f2a2005-08-21 14:16:04 +0000738 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000739 /* Move the remaining incomplete line to the start */
740 nfilled = end-q;
741 memmove(buffer, q, nfilled);
742 if (sizehint > 0)
743 if (totalread >= (size_t)sizehint)
744 break;
745 if (shortread) {
746 sizehint = 0;
747 break;
748 }
749 }
750 if (nfilled != 0) {
751 /* Partial last line */
752 line = PyString_FromStringAndSize(buffer, nfilled);
753 if (line == NULL)
754 goto error;
755 if (sizehint > 0) {
756 /* Need to complete the last line */
757 PyObject *rest = Util_GetLine(self, 0);
758 if (rest == NULL) {
759 Py_DECREF(line);
760 goto error;
761 }
762 PyString_Concat(&line, rest);
763 Py_DECREF(rest);
764 if (line == NULL)
765 goto error;
766 }
767 err = PyList_Append(list, line);
768 Py_DECREF(line);
769 if (err != 0)
770 goto error;
771 }
772
773 cleanup:
774 RELEASE_LOCK(self);
775 if (big_buffer) {
776 Py_DECREF(big_buffer);
777 }
778 return list;
779}
780
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000781PyDoc_STRVAR(BZ2File_xreadlines__doc__,
782"xreadlines() -> self\n\
783\n\
784For backward compatibility. BZ2File objects now include the performance\n\
785optimizations previously implemented in the xreadlines module.\n\
786");
787
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000788PyDoc_STRVAR(BZ2File_write__doc__,
789"write(data) -> None\n\
790\n\
791Write the 'data' string to file. Note that due to buffering, close() may\n\
792be needed before the file on disk reflects the data written.\n\
793");
794
795/* This is a hacked version of Python's fileobject.c:file_write(). */
796static PyObject *
797BZ2File_write(BZ2FileObject *self, PyObject *args)
798{
799 PyObject *ret = NULL;
800 char *buf;
801 int len;
802 int bzerror;
803
Walter Dörwaldbb9c7392004-11-01 17:10:19 +0000804 if (!PyArg_ParseTuple(args, "s#:write", &buf, &len))
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000805 return NULL;
Tim Peterse3228092002-11-09 04:21:44 +0000806
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000807 ACQUIRE_LOCK(self);
808 switch (self->mode) {
809 case MODE_WRITE:
810 break;
Tim Peterse3228092002-11-09 04:21:44 +0000811
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000812 case MODE_CLOSED:
813 PyErr_SetString(PyExc_ValueError,
814 "I/O operation on closed file");
815 goto cleanup;;
Tim Peterse3228092002-11-09 04:21:44 +0000816
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000817 default:
818 PyErr_SetString(PyExc_IOError,
819 "file is not ready for writing");
820 goto cleanup;;
821 }
822
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000823 self->f_softspace = 0;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000824
825 Py_BEGIN_ALLOW_THREADS
826 BZ2_bzWrite (&bzerror, self->fp, buf, len);
827 self->pos += len;
828 Py_END_ALLOW_THREADS
Tim Peterse3228092002-11-09 04:21:44 +0000829
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000830 if (bzerror != BZ_OK) {
831 Util_CatchBZ2Error(bzerror);
832 goto cleanup;
833 }
Tim Peterse3228092002-11-09 04:21:44 +0000834
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000835 Py_INCREF(Py_None);
836 ret = Py_None;
837
838cleanup:
839 RELEASE_LOCK(self);
840 return ret;
841}
842
843PyDoc_STRVAR(BZ2File_writelines__doc__,
844"writelines(sequence_of_strings) -> None\n\
845\n\
846Write the sequence of strings to the file. Note that newlines are not\n\
847added. The sequence can be any iterable object producing strings. This is\n\
848equivalent to calling write() for each string.\n\
849");
850
851/* This is a hacked version of Python's fileobject.c:file_writelines(). */
852static PyObject *
853BZ2File_writelines(BZ2FileObject *self, PyObject *seq)
854{
855#define CHUNKSIZE 1000
856 PyObject *list = NULL;
857 PyObject *iter = NULL;
858 PyObject *ret = NULL;
859 PyObject *line;
860 int i, j, index, len, islist;
861 int bzerror;
862
863 ACQUIRE_LOCK(self);
864 islist = PyList_Check(seq);
865 if (!islist) {
866 iter = PyObject_GetIter(seq);
867 if (iter == NULL) {
868 PyErr_SetString(PyExc_TypeError,
869 "writelines() requires an iterable argument");
870 goto error;
871 }
872 list = PyList_New(CHUNKSIZE);
873 if (list == NULL)
874 goto error;
875 }
876
877 /* Strategy: slurp CHUNKSIZE lines into a private list,
878 checking that they are all strings, then write that list
879 without holding the interpreter lock, then come back for more. */
880 for (index = 0; ; index += CHUNKSIZE) {
881 if (islist) {
882 Py_XDECREF(list);
883 list = PyList_GetSlice(seq, index, index+CHUNKSIZE);
884 if (list == NULL)
885 goto error;
886 j = PyList_GET_SIZE(list);
887 }
888 else {
889 for (j = 0; j < CHUNKSIZE; j++) {
890 line = PyIter_Next(iter);
891 if (line == NULL) {
892 if (PyErr_Occurred())
893 goto error;
894 break;
895 }
896 PyList_SetItem(list, j, line);
897 }
898 }
899 if (j == 0)
900 break;
901
902 /* Check that all entries are indeed strings. If not,
903 apply the same rules as for file.write() and
904 convert the rets to strings. This is slow, but
905 seems to be the only way since all conversion APIs
906 could potentially execute Python code. */
907 for (i = 0; i < j; i++) {
908 PyObject *v = PyList_GET_ITEM(list, i);
909 if (!PyString_Check(v)) {
910 const char *buffer;
Martin v. Löwis18e16552006-02-15 17:27:45 +0000911 Py_ssize_t len;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000912 if (PyObject_AsCharBuffer(v, &buffer, &len)) {
913 PyErr_SetString(PyExc_TypeError,
914 "writelines() "
915 "argument must be "
916 "a sequence of "
917 "strings");
918 goto error;
919 }
920 line = PyString_FromStringAndSize(buffer,
921 len);
922 if (line == NULL)
923 goto error;
924 Py_DECREF(v);
925 PyList_SET_ITEM(list, i, line);
926 }
927 }
928
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +0000929 self->f_softspace = 0;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000930
931 /* Since we are releasing the global lock, the
932 following code may *not* execute Python code. */
933 Py_BEGIN_ALLOW_THREADS
934 for (i = 0; i < j; i++) {
935 line = PyList_GET_ITEM(list, i);
936 len = PyString_GET_SIZE(line);
937 BZ2_bzWrite (&bzerror, self->fp,
938 PyString_AS_STRING(line), len);
939 if (bzerror != BZ_OK) {
940 Py_BLOCK_THREADS
941 Util_CatchBZ2Error(bzerror);
942 goto error;
943 }
944 }
945 Py_END_ALLOW_THREADS
946
947 if (j < CHUNKSIZE)
948 break;
949 }
950
951 Py_INCREF(Py_None);
952 ret = Py_None;
953
954 error:
955 RELEASE_LOCK(self);
956 Py_XDECREF(list);
957 Py_XDECREF(iter);
958 return ret;
959#undef CHUNKSIZE
960}
961
962PyDoc_STRVAR(BZ2File_seek__doc__,
963"seek(offset [, whence]) -> None\n\
964\n\
965Move to new file position. Argument offset is a byte count. Optional\n\
966argument whence defaults to 0 (offset from start of file, offset\n\
967should be >= 0); other values are 1 (move relative to current position,\n\
968positive or negative), and 2 (move relative to end of file, usually\n\
969negative, although many platforms allow seeking beyond the end of a file).\n\
970\n\
971Note that seeking of bz2 files is emulated, and depending on the parameters\n\
972the operation may be extremely slow.\n\
973");
974
975static PyObject *
976BZ2File_seek(BZ2FileObject *self, PyObject *args)
977{
978 int where = 0;
Georg Brandl33a5f2a2005-08-21 14:16:04 +0000979 PyObject *offobj;
980 Py_off_t offset;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000981 char small_buffer[SMALLCHUNK];
982 char *buffer = small_buffer;
983 size_t buffersize = SMALLCHUNK;
984 int bytesread = 0;
Georg Brandla8bcecc2005-09-03 07:49:53 +0000985 size_t readsize;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000986 int chunksize;
987 int bzerror;
988 int rewind = 0;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +0000989 PyObject *ret = NULL;
Tim Peterse3228092002-11-09 04:21:44 +0000990
Georg Brandl33a5f2a2005-08-21 14:16:04 +0000991 if (!PyArg_ParseTuple(args, "O|i:seek", &offobj, &where))
992 return NULL;
993#if !defined(HAVE_LARGEFILE_SUPPORT)
994 offset = PyInt_AsLong(offobj);
995#else
996 offset = PyLong_Check(offobj) ?
997 PyLong_AsLongLong(offobj) : PyInt_AsLong(offobj);
998#endif
999 if (PyErr_Occurred())
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001000 return NULL;
1001
1002 ACQUIRE_LOCK(self);
1003 Util_DropReadAhead(self);
1004 switch (self->mode) {
1005 case MODE_READ:
1006 case MODE_READ_EOF:
1007 break;
Tim Peterse3228092002-11-09 04:21:44 +00001008
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001009 case MODE_CLOSED:
1010 PyErr_SetString(PyExc_ValueError,
1011 "I/O operation on closed file");
1012 goto cleanup;;
Tim Peterse3228092002-11-09 04:21:44 +00001013
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001014 default:
1015 PyErr_SetString(PyExc_IOError,
1016 "seek works only while reading");
1017 goto cleanup;;
1018 }
1019
Georg Brandl47fab922006-02-18 21:57:25 +00001020 if (where == 2) {
1021 if (self->size == -1) {
1022 assert(self->mode != MODE_READ_EOF);
1023 for (;;) {
1024 Py_BEGIN_ALLOW_THREADS
1025 chunksize = Util_UnivNewlineRead(
1026 &bzerror, self->fp,
1027 buffer, buffersize,
1028 self);
1029 self->pos += chunksize;
1030 Py_END_ALLOW_THREADS
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001031
Georg Brandl47fab922006-02-18 21:57:25 +00001032 bytesread += chunksize;
1033 if (bzerror == BZ_STREAM_END) {
1034 break;
1035 } else if (bzerror != BZ_OK) {
1036 Util_CatchBZ2Error(bzerror);
1037 goto cleanup;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001038 }
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001039 }
Georg Brandl47fab922006-02-18 21:57:25 +00001040 self->mode = MODE_READ_EOF;
1041 self->size = self->pos;
1042 bytesread = 0;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001043 }
Georg Brandl47fab922006-02-18 21:57:25 +00001044 offset = self->size + offset;
1045 } else if (where == 1) {
1046 offset = self->pos + offset;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001047 }
1048
Georg Brandl47fab922006-02-18 21:57:25 +00001049 /* Before getting here, offset must be the absolute position the file
1050 * pointer should be set to. */
1051
1052 if (offset >= self->pos) {
1053 /* we can move forward */
1054 offset -= self->pos;
1055 } else {
1056 /* we cannot move back, so rewind the stream */
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001057 BZ2_bzReadClose(&bzerror, self->fp);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001058 if (bzerror != BZ_OK) {
1059 Util_CatchBZ2Error(bzerror);
1060 goto cleanup;
1061 }
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001062 ret = PyObject_CallMethod(self->file, "seek", "(i)", 0);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001063 if (!ret)
1064 goto cleanup;
1065 Py_DECREF(ret);
1066 ret = NULL;
1067 self->pos = 0;
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001068 self->fp = BZ2_bzReadOpen(&bzerror, PyFile_AsFile(self->file),
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001069 0, 0, NULL, 0);
1070 if (bzerror != BZ_OK) {
1071 Util_CatchBZ2Error(bzerror);
1072 goto cleanup;
1073 }
1074 self->mode = MODE_READ;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001075 }
1076
Georg Brandl47fab922006-02-18 21:57:25 +00001077 if (offset <= 0 || self->mode == MODE_READ_EOF)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001078 goto exit;
1079
1080 /* Before getting here, offset must be set to the number of bytes
1081 * to walk forward. */
1082 for (;;) {
Georg Brandla8bcecc2005-09-03 07:49:53 +00001083 if (offset-bytesread > buffersize)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001084 readsize = buffersize;
1085 else
Georg Brandla8bcecc2005-09-03 07:49:53 +00001086 /* offset might be wider that readsize, but the result
1087 * of the subtraction is bound by buffersize (see the
1088 * condition above). buffersize is 8192. */
1089 readsize = (size_t)(offset-bytesread);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001090 Py_BEGIN_ALLOW_THREADS
1091 chunksize = Util_UnivNewlineRead(&bzerror, self->fp,
1092 buffer, readsize, self);
1093 self->pos += chunksize;
1094 Py_END_ALLOW_THREADS
1095 bytesread += chunksize;
1096 if (bzerror == BZ_STREAM_END) {
1097 self->size = self->pos;
1098 self->mode = MODE_READ_EOF;
1099 break;
1100 } else if (bzerror != BZ_OK) {
1101 Util_CatchBZ2Error(bzerror);
1102 goto cleanup;
1103 }
1104 if (bytesread == offset)
1105 break;
1106 }
1107
1108exit:
1109 Py_INCREF(Py_None);
1110 ret = Py_None;
1111
1112cleanup:
1113 RELEASE_LOCK(self);
1114 return ret;
1115}
1116
1117PyDoc_STRVAR(BZ2File_tell__doc__,
1118"tell() -> int\n\
1119\n\
1120Return the current file position, an integer (may be a long integer).\n\
1121");
1122
1123static PyObject *
1124BZ2File_tell(BZ2FileObject *self, PyObject *args)
1125{
1126 PyObject *ret = NULL;
1127
1128 if (self->mode == MODE_CLOSED) {
1129 PyErr_SetString(PyExc_ValueError,
1130 "I/O operation on closed file");
1131 goto cleanup;
1132 }
1133
Georg Brandla8bcecc2005-09-03 07:49:53 +00001134#if !defined(HAVE_LARGEFILE_SUPPORT)
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001135 ret = PyInt_FromLong(self->pos);
Georg Brandla8bcecc2005-09-03 07:49:53 +00001136#else
1137 ret = PyLong_FromLongLong(self->pos);
1138#endif
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001139
1140cleanup:
1141 return ret;
1142}
1143
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001144PyDoc_STRVAR(BZ2File_close__doc__,
1145"close() -> None or (perhaps) an integer\n\
1146\n\
1147Close the file. Sets data attribute .closed to true. A closed file\n\
1148cannot be used for further I/O operations. close() may be called more\n\
1149than once without error.\n\
1150");
1151
1152static PyObject *
1153BZ2File_close(BZ2FileObject *self)
1154{
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001155 PyObject *ret = NULL;
1156 int bzerror = BZ_OK;
1157
1158 ACQUIRE_LOCK(self);
1159 switch (self->mode) {
1160 case MODE_READ:
1161 case MODE_READ_EOF:
1162 BZ2_bzReadClose(&bzerror, self->fp);
1163 break;
1164 case MODE_WRITE:
1165 BZ2_bzWriteClose(&bzerror, self->fp,
1166 0, NULL, NULL);
1167 break;
1168 }
1169 self->mode = MODE_CLOSED;
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001170 ret = PyObject_CallMethod(self->file, "close", NULL);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001171 if (bzerror != BZ_OK) {
1172 Util_CatchBZ2Error(bzerror);
1173 Py_XDECREF(ret);
1174 ret = NULL;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001175 }
1176
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001177 RELEASE_LOCK(self);
1178 return ret;
1179}
1180
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001181static PyObject *BZ2File_getiter(BZ2FileObject *self);
1182
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001183static PyMethodDef BZ2File_methods[] = {
1184 {"read", (PyCFunction)BZ2File_read, METH_VARARGS, BZ2File_read__doc__},
1185 {"readline", (PyCFunction)BZ2File_readline, METH_VARARGS, BZ2File_readline__doc__},
1186 {"readlines", (PyCFunction)BZ2File_readlines, METH_VARARGS, BZ2File_readlines__doc__},
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001187 {"xreadlines", (PyCFunction)BZ2File_getiter, METH_VARARGS, BZ2File_xreadlines__doc__},
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001188 {"write", (PyCFunction)BZ2File_write, METH_VARARGS, BZ2File_write__doc__},
1189 {"writelines", (PyCFunction)BZ2File_writelines, METH_O, BZ2File_writelines__doc__},
1190 {"seek", (PyCFunction)BZ2File_seek, METH_VARARGS, BZ2File_seek__doc__},
1191 {"tell", (PyCFunction)BZ2File_tell, METH_NOARGS, BZ2File_tell__doc__},
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001192 {"close", (PyCFunction)BZ2File_close, METH_NOARGS, BZ2File_close__doc__},
1193 {NULL, NULL} /* sentinel */
1194};
1195
1196
1197/* ===================================================================== */
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001198/* Getters and setters of BZ2File. */
1199
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001200/* This is a hacked version of Python's fileobject.c:get_newlines(). */
1201static PyObject *
1202BZ2File_get_newlines(BZ2FileObject *self, void *closure)
1203{
1204 switch (self->f_newlinetypes) {
1205 case NEWLINE_UNKNOWN:
1206 Py_INCREF(Py_None);
1207 return Py_None;
1208 case NEWLINE_CR:
1209 return PyString_FromString("\r");
1210 case NEWLINE_LF:
1211 return PyString_FromString("\n");
1212 case NEWLINE_CR|NEWLINE_LF:
1213 return Py_BuildValue("(ss)", "\r", "\n");
1214 case NEWLINE_CRLF:
1215 return PyString_FromString("\r\n");
1216 case NEWLINE_CR|NEWLINE_CRLF:
1217 return Py_BuildValue("(ss)", "\r", "\r\n");
1218 case NEWLINE_LF|NEWLINE_CRLF:
1219 return Py_BuildValue("(ss)", "\n", "\r\n");
1220 case NEWLINE_CR|NEWLINE_LF|NEWLINE_CRLF:
1221 return Py_BuildValue("(sss)", "\r", "\n", "\r\n");
1222 default:
1223 PyErr_Format(PyExc_SystemError,
1224 "Unknown newlines value 0x%x\n",
1225 self->f_newlinetypes);
1226 return NULL;
1227 }
1228}
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001229
1230static PyObject *
1231BZ2File_get_closed(BZ2FileObject *self, void *closure)
1232{
1233 return PyInt_FromLong(self->mode == MODE_CLOSED);
1234}
1235
1236static PyObject *
1237BZ2File_get_mode(BZ2FileObject *self, void *closure)
1238{
1239 return PyObject_GetAttrString(self->file, "mode");
1240}
1241
1242static PyObject *
1243BZ2File_get_name(BZ2FileObject *self, void *closure)
1244{
1245 return PyObject_GetAttrString(self->file, "name");
1246}
1247
1248static PyGetSetDef BZ2File_getset[] = {
1249 {"closed", (getter)BZ2File_get_closed, NULL,
1250 "True if the file is closed"},
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001251 {"newlines", (getter)BZ2File_get_newlines, NULL,
1252 "end-of-line convention used in this file"},
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001253 {"mode", (getter)BZ2File_get_mode, NULL,
1254 "file mode ('r', 'w', or 'U')"},
1255 {"name", (getter)BZ2File_get_name, NULL,
1256 "file name"},
1257 {NULL} /* Sentinel */
1258};
1259
1260
1261/* ===================================================================== */
1262/* Members of BZ2File_Type. */
1263
1264#undef OFF
1265#define OFF(x) offsetof(BZ2FileObject, x)
1266
1267static PyMemberDef BZ2File_members[] = {
1268 {"softspace", T_INT, OFF(f_softspace), 0,
1269 "flag indicating that a space needs to be printed; used by print"},
1270 {NULL} /* Sentinel */
1271};
1272
1273/* ===================================================================== */
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001274/* Slot definitions for BZ2File_Type. */
1275
1276static int
1277BZ2File_init(BZ2FileObject *self, PyObject *args, PyObject *kwargs)
1278{
Jeremy Hyltonaf68c872005-12-10 18:50:16 +00001279 static const char *kwlist[] = {"filename", "mode", "buffering",
1280 "compresslevel", 0};
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001281 PyObject *name;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001282 char *mode = "r";
1283 int buffering = -1;
1284 int compresslevel = 9;
1285 int bzerror;
1286 int mode_char = 0;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001287
1288 self->size = -1;
Tim Peterse3228092002-11-09 04:21:44 +00001289
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001290 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|sii:BZ2File",
1291 kwlist, &name, &mode, &buffering,
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001292 &compresslevel))
1293 return -1;
1294
1295 if (compresslevel < 1 || compresslevel > 9) {
1296 PyErr_SetString(PyExc_ValueError,
1297 "compresslevel must be between 1 and 9");
1298 return -1;
1299 }
1300
1301 for (;;) {
1302 int error = 0;
1303 switch (*mode) {
1304 case 'r':
1305 case 'w':
1306 if (mode_char)
1307 error = 1;
1308 mode_char = *mode;
1309 break;
1310
1311 case 'b':
1312 break;
1313
1314 case 'U':
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001315 self->f_univ_newline = 1;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001316 break;
1317
1318 default:
1319 error = 1;
Gustavo Niemeyer49ea7be2002-11-08 14:31:49 +00001320 break;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001321 }
1322 if (error) {
Gustavo Niemeyer49ea7be2002-11-08 14:31:49 +00001323 PyErr_Format(PyExc_ValueError,
1324 "invalid mode char %c", *mode);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001325 return -1;
1326 }
1327 mode++;
Gustavo Niemeyer49ea7be2002-11-08 14:31:49 +00001328 if (*mode == '\0')
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001329 break;
1330 }
1331
Georg Brandl6b95f1d2005-06-03 19:47:00 +00001332 if (mode_char == 0) {
1333 mode_char = 'r';
1334 }
1335
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001336 mode = (mode_char == 'r') ? "rb" : "wb";
Tim Peterse3228092002-11-09 04:21:44 +00001337
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001338 self->file = PyObject_CallFunction((PyObject*)&PyFile_Type, "(Osi)",
1339 name, mode, buffering);
1340 if (self->file == NULL)
Gustavo Niemeyer49ea7be2002-11-08 14:31:49 +00001341 return -1;
1342
1343 /* From now on, we have stuff to dealloc, so jump to error label
1344 * instead of returning */
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001345
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001346#ifdef WITH_THREAD
1347 self->lock = PyThread_allocate_lock();
1348 if (!self->lock)
1349 goto error;
1350#endif
1351
1352 if (mode_char == 'r')
1353 self->fp = BZ2_bzReadOpen(&bzerror,
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001354 PyFile_AsFile(self->file),
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001355 0, 0, NULL, 0);
1356 else
1357 self->fp = BZ2_bzWriteOpen(&bzerror,
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001358 PyFile_AsFile(self->file),
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001359 compresslevel, 0, 0);
1360
1361 if (bzerror != BZ_OK) {
1362 Util_CatchBZ2Error(bzerror);
1363 goto error;
1364 }
1365
1366 self->mode = (mode_char == 'r') ? MODE_READ : MODE_WRITE;
1367
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001368 return 0;
1369
1370error:
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001371 Py_DECREF(self->file);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001372#ifdef WITH_THREAD
1373 if (self->lock)
1374 PyThread_free_lock(self->lock);
1375#endif
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001376 return -1;
1377}
1378
1379static void
1380BZ2File_dealloc(BZ2FileObject *self)
1381{
1382 int bzerror;
1383#ifdef WITH_THREAD
1384 if (self->lock)
1385 PyThread_free_lock(self->lock);
1386#endif
1387 switch (self->mode) {
1388 case MODE_READ:
1389 case MODE_READ_EOF:
1390 BZ2_bzReadClose(&bzerror, self->fp);
1391 break;
1392 case MODE_WRITE:
1393 BZ2_bzWriteClose(&bzerror, self->fp,
1394 0, NULL, NULL);
1395 break;
1396 }
Gustavo Niemeyer49ea7be2002-11-08 14:31:49 +00001397 Util_DropReadAhead(self);
Gustavo Niemeyer572f5232003-04-29 14:53:08 +00001398 Py_XDECREF(self->file);
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001399 self->ob_type->tp_free((PyObject *)self);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001400}
1401
1402/* This is a hacked version of Python's fileobject.c:file_getiter(). */
1403static PyObject *
1404BZ2File_getiter(BZ2FileObject *self)
1405{
1406 if (self->mode == MODE_CLOSED) {
1407 PyErr_SetString(PyExc_ValueError,
1408 "I/O operation on closed file");
1409 return NULL;
1410 }
1411 Py_INCREF((PyObject*)self);
1412 return (PyObject *)self;
1413}
1414
1415/* This is a hacked version of Python's fileobject.c:file_iternext(). */
1416#define READAHEAD_BUFSIZE 8192
1417static PyObject *
1418BZ2File_iternext(BZ2FileObject *self)
1419{
1420 PyStringObject* ret;
1421 ACQUIRE_LOCK(self);
1422 if (self->mode == MODE_CLOSED) {
1423 PyErr_SetString(PyExc_ValueError,
1424 "I/O operation on closed file");
1425 return NULL;
1426 }
1427 ret = Util_ReadAheadGetLineSkip(self, 0, READAHEAD_BUFSIZE);
1428 RELEASE_LOCK(self);
1429 if (ret == NULL || PyString_GET_SIZE(ret) == 0) {
1430 Py_XDECREF(ret);
1431 return NULL;
1432 }
1433 return (PyObject *)ret;
1434}
1435
1436/* ===================================================================== */
1437/* BZ2File_Type definition. */
1438
1439PyDoc_VAR(BZ2File__doc__) =
1440PyDoc_STR(
1441"BZ2File(name [, mode='r', buffering=0, compresslevel=9]) -> file object\n\
1442\n\
1443Open a bz2 file. The mode can be 'r' or 'w', for reading (default) or\n\
1444writing. When opened for writing, the file will be created if it doesn't\n\
1445exist, and truncated otherwise. If the buffering argument is given, 0 means\n\
1446unbuffered, and larger numbers specify the buffer size. If compresslevel\n\
1447is given, must be a number between 1 and 9.\n\
1448")
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001449PyDoc_STR(
1450"\n\
1451Add a 'U' to mode to open the file for input with universal newline\n\
1452support. Any line ending in the input file will be seen as a '\\n' in\n\
1453Python. Also, a file so opened gains the attribute 'newlines'; the value\n\
1454for this attribute is one of None (no newline read yet), '\\r', '\\n',\n\
1455'\\r\\n' or a tuple containing all the newline types seen. Universal\n\
1456newlines are available only when reading.\n\
1457")
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001458;
1459
Gustavo Niemeyer49ea7be2002-11-08 14:31:49 +00001460static PyTypeObject BZ2File_Type = {
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001461 PyObject_HEAD_INIT(NULL)
1462 0, /*ob_size*/
1463 "bz2.BZ2File", /*tp_name*/
1464 sizeof(BZ2FileObject), /*tp_basicsize*/
1465 0, /*tp_itemsize*/
1466 (destructor)BZ2File_dealloc, /*tp_dealloc*/
1467 0, /*tp_print*/
1468 0, /*tp_getattr*/
1469 0, /*tp_setattr*/
1470 0, /*tp_compare*/
1471 0, /*tp_repr*/
1472 0, /*tp_as_number*/
1473 0, /*tp_as_sequence*/
1474 0, /*tp_as_mapping*/
1475 0, /*tp_hash*/
1476 0, /*tp_call*/
1477 0, /*tp_str*/
Jason Tishlerfb8595d2003-01-06 12:41:26 +00001478 PyObject_GenericGetAttr,/*tp_getattro*/
1479 PyObject_GenericSetAttr,/*tp_setattro*/
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001480 0, /*tp_as_buffer*/
1481 Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE, /*tp_flags*/
1482 BZ2File__doc__, /*tp_doc*/
1483 0, /*tp_traverse*/
1484 0, /*tp_clear*/
1485 0, /*tp_richcompare*/
1486 0, /*tp_weaklistoffset*/
1487 (getiterfunc)BZ2File_getiter, /*tp_iter*/
1488 (iternextfunc)BZ2File_iternext, /*tp_iternext*/
1489 BZ2File_methods, /*tp_methods*/
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001490 BZ2File_members, /*tp_members*/
1491 BZ2File_getset, /*tp_getset*/
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001492 0, /*tp_base*/
1493 0, /*tp_dict*/
1494 0, /*tp_descr_get*/
1495 0, /*tp_descr_set*/
1496 0, /*tp_dictoffset*/
1497 (initproc)BZ2File_init, /*tp_init*/
Jason Tishlerfb8595d2003-01-06 12:41:26 +00001498 PyType_GenericAlloc, /*tp_alloc*/
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001499 PyType_GenericNew, /*tp_new*/
Jason Tishlerfb8595d2003-01-06 12:41:26 +00001500 _PyObject_Del, /*tp_free*/
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001501 0, /*tp_is_gc*/
1502};
1503
1504
1505/* ===================================================================== */
1506/* Methods of BZ2Comp. */
1507
1508PyDoc_STRVAR(BZ2Comp_compress__doc__,
1509"compress(data) -> string\n\
1510\n\
1511Provide more data to the compressor object. It will return chunks of\n\
1512compressed data whenever possible. When you've finished providing data\n\
1513to compress, call the flush() method to finish the compression process,\n\
1514and return what is left in the internal buffers.\n\
1515");
1516
1517static PyObject *
1518BZ2Comp_compress(BZ2CompObject *self, PyObject *args)
1519{
1520 char *data;
1521 int datasize;
1522 int bufsize = SMALLCHUNK;
Martin v. Löwisb9a0f912003-03-29 10:06:18 +00001523 PY_LONG_LONG totalout;
Gustavo Niemeyer7d7930b2002-11-05 18:41:53 +00001524 PyObject *ret = NULL;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001525 bz_stream *bzs = &self->bzs;
1526 int bzerror;
1527
Walter Dörwaldbb9c7392004-11-01 17:10:19 +00001528 if (!PyArg_ParseTuple(args, "s#:compress", &data, &datasize))
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001529 return NULL;
1530
Gustavo Niemeyera6e436e2004-02-14 00:02:45 +00001531 if (datasize == 0)
1532 return PyString_FromString("");
1533
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001534 ACQUIRE_LOCK(self);
1535 if (!self->running) {
Gustavo Niemeyer49ea7be2002-11-08 14:31:49 +00001536 PyErr_SetString(PyExc_ValueError,
1537 "this object was already flushed");
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001538 goto error;
1539 }
1540
1541 ret = PyString_FromStringAndSize(NULL, bufsize);
1542 if (!ret)
1543 goto error;
1544
1545 bzs->next_in = data;
1546 bzs->avail_in = datasize;
1547 bzs->next_out = BUF(ret);
1548 bzs->avail_out = bufsize;
1549
1550 totalout = BZS_TOTAL_OUT(bzs);
1551
1552 for (;;) {
1553 Py_BEGIN_ALLOW_THREADS
1554 bzerror = BZ2_bzCompress(bzs, BZ_RUN);
1555 Py_END_ALLOW_THREADS
1556 if (bzerror != BZ_RUN_OK) {
1557 Util_CatchBZ2Error(bzerror);
1558 goto error;
1559 }
1560 if (bzs->avail_out == 0) {
1561 bufsize = Util_NewBufferSize(bufsize);
1562 if (_PyString_Resize(&ret, bufsize) < 0) {
1563 BZ2_bzCompressEnd(bzs);
1564 goto error;
1565 }
1566 bzs->next_out = BUF(ret) + (BZS_TOTAL_OUT(bzs)
1567 - totalout);
1568 bzs->avail_out = bufsize - (bzs->next_out - BUF(ret));
1569 } else if (bzs->avail_in == 0) {
1570 break;
1571 }
1572 }
1573
Tim Petersf29f0c62002-11-09 04:28:17 +00001574 _PyString_Resize(&ret, (int)(BZS_TOTAL_OUT(bzs) - totalout));
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001575
1576 RELEASE_LOCK(self);
1577 return ret;
1578
1579error:
1580 RELEASE_LOCK(self);
1581 Py_XDECREF(ret);
1582 return NULL;
1583}
1584
1585PyDoc_STRVAR(BZ2Comp_flush__doc__,
1586"flush() -> string\n\
1587\n\
1588Finish the compression process and return what is left in internal buffers.\n\
1589You must not use the compressor object after calling this method.\n\
1590");
1591
1592static PyObject *
1593BZ2Comp_flush(BZ2CompObject *self)
1594{
1595 int bufsize = SMALLCHUNK;
Gustavo Niemeyer7d7930b2002-11-05 18:41:53 +00001596 PyObject *ret = NULL;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001597 bz_stream *bzs = &self->bzs;
Martin v. Löwisb9a0f912003-03-29 10:06:18 +00001598 PY_LONG_LONG totalout;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001599 int bzerror;
1600
1601 ACQUIRE_LOCK(self);
1602 if (!self->running) {
1603 PyErr_SetString(PyExc_ValueError, "object was already "
1604 "flushed");
1605 goto error;
1606 }
1607 self->running = 0;
1608
1609 ret = PyString_FromStringAndSize(NULL, bufsize);
1610 if (!ret)
1611 goto error;
1612
1613 bzs->next_out = BUF(ret);
1614 bzs->avail_out = bufsize;
1615
1616 totalout = BZS_TOTAL_OUT(bzs);
1617
1618 for (;;) {
1619 Py_BEGIN_ALLOW_THREADS
1620 bzerror = BZ2_bzCompress(bzs, BZ_FINISH);
1621 Py_END_ALLOW_THREADS
1622 if (bzerror == BZ_STREAM_END) {
1623 break;
1624 } else if (bzerror != BZ_FINISH_OK) {
1625 Util_CatchBZ2Error(bzerror);
1626 goto error;
1627 }
1628 if (bzs->avail_out == 0) {
1629 bufsize = Util_NewBufferSize(bufsize);
1630 if (_PyString_Resize(&ret, bufsize) < 0)
1631 goto error;
1632 bzs->next_out = BUF(ret);
1633 bzs->next_out = BUF(ret) + (BZS_TOTAL_OUT(bzs)
1634 - totalout);
1635 bzs->avail_out = bufsize - (bzs->next_out - BUF(ret));
1636 }
1637 }
1638
1639 if (bzs->avail_out != 0)
Tim Peters2858e5e2002-11-09 04:30:08 +00001640 _PyString_Resize(&ret, (int)(BZS_TOTAL_OUT(bzs) - totalout));
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001641
1642 RELEASE_LOCK(self);
1643 return ret;
1644
1645error:
1646 RELEASE_LOCK(self);
1647 Py_XDECREF(ret);
1648 return NULL;
1649}
1650
1651static PyMethodDef BZ2Comp_methods[] = {
Gustavo Niemeyer49ea7be2002-11-08 14:31:49 +00001652 {"compress", (PyCFunction)BZ2Comp_compress, METH_VARARGS,
1653 BZ2Comp_compress__doc__},
1654 {"flush", (PyCFunction)BZ2Comp_flush, METH_NOARGS,
1655 BZ2Comp_flush__doc__},
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001656 {NULL, NULL} /* sentinel */
1657};
1658
1659
1660/* ===================================================================== */
1661/* Slot definitions for BZ2Comp_Type. */
1662
1663static int
1664BZ2Comp_init(BZ2CompObject *self, PyObject *args, PyObject *kwargs)
1665{
1666 int compresslevel = 9;
1667 int bzerror;
Jeremy Hyltonaf68c872005-12-10 18:50:16 +00001668 static const char *kwlist[] = {"compresslevel", 0};
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001669
1670 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|i:BZ2Compressor",
1671 kwlist, &compresslevel))
1672 return -1;
1673
1674 if (compresslevel < 1 || compresslevel > 9) {
1675 PyErr_SetString(PyExc_ValueError,
1676 "compresslevel must be between 1 and 9");
1677 goto error;
1678 }
1679
1680#ifdef WITH_THREAD
1681 self->lock = PyThread_allocate_lock();
1682 if (!self->lock)
1683 goto error;
1684#endif
1685
1686 memset(&self->bzs, 0, sizeof(bz_stream));
1687 bzerror = BZ2_bzCompressInit(&self->bzs, compresslevel, 0, 0);
1688 if (bzerror != BZ_OK) {
1689 Util_CatchBZ2Error(bzerror);
1690 goto error;
1691 }
1692
1693 self->running = 1;
1694
1695 return 0;
1696error:
1697#ifdef WITH_THREAD
1698 if (self->lock)
1699 PyThread_free_lock(self->lock);
1700#endif
1701 return -1;
1702}
1703
1704static void
1705BZ2Comp_dealloc(BZ2CompObject *self)
1706{
1707#ifdef WITH_THREAD
1708 if (self->lock)
1709 PyThread_free_lock(self->lock);
1710#endif
1711 BZ2_bzCompressEnd(&self->bzs);
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001712 self->ob_type->tp_free((PyObject *)self);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001713}
1714
1715
1716/* ===================================================================== */
1717/* BZ2Comp_Type definition. */
1718
1719PyDoc_STRVAR(BZ2Comp__doc__,
1720"BZ2Compressor([compresslevel=9]) -> compressor object\n\
1721\n\
1722Create a new compressor object. This object may be used to compress\n\
1723data sequentially. If you want to compress data in one shot, use the\n\
1724compress() function instead. The compresslevel parameter, if given,\n\
1725must be a number between 1 and 9.\n\
1726");
1727
Gustavo Niemeyer49ea7be2002-11-08 14:31:49 +00001728static PyTypeObject BZ2Comp_Type = {
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001729 PyObject_HEAD_INIT(NULL)
1730 0, /*ob_size*/
1731 "bz2.BZ2Compressor", /*tp_name*/
1732 sizeof(BZ2CompObject), /*tp_basicsize*/
1733 0, /*tp_itemsize*/
1734 (destructor)BZ2Comp_dealloc, /*tp_dealloc*/
1735 0, /*tp_print*/
1736 0, /*tp_getattr*/
1737 0, /*tp_setattr*/
1738 0, /*tp_compare*/
1739 0, /*tp_repr*/
1740 0, /*tp_as_number*/
1741 0, /*tp_as_sequence*/
1742 0, /*tp_as_mapping*/
1743 0, /*tp_hash*/
1744 0, /*tp_call*/
1745 0, /*tp_str*/
Jason Tishlerfb8595d2003-01-06 12:41:26 +00001746 PyObject_GenericGetAttr,/*tp_getattro*/
1747 PyObject_GenericSetAttr,/*tp_setattro*/
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001748 0, /*tp_as_buffer*/
1749 Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE, /*tp_flags*/
1750 BZ2Comp__doc__, /*tp_doc*/
1751 0, /*tp_traverse*/
1752 0, /*tp_clear*/
1753 0, /*tp_richcompare*/
1754 0, /*tp_weaklistoffset*/
1755 0, /*tp_iter*/
1756 0, /*tp_iternext*/
1757 BZ2Comp_methods, /*tp_methods*/
1758 0, /*tp_members*/
1759 0, /*tp_getset*/
1760 0, /*tp_base*/
1761 0, /*tp_dict*/
1762 0, /*tp_descr_get*/
1763 0, /*tp_descr_set*/
1764 0, /*tp_dictoffset*/
1765 (initproc)BZ2Comp_init, /*tp_init*/
Jason Tishlerfb8595d2003-01-06 12:41:26 +00001766 PyType_GenericAlloc, /*tp_alloc*/
1767 PyType_GenericNew, /*tp_new*/
1768 _PyObject_Del, /*tp_free*/
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001769 0, /*tp_is_gc*/
1770};
1771
1772
1773/* ===================================================================== */
1774/* Members of BZ2Decomp. */
1775
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001776#undef OFF
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001777#define OFF(x) offsetof(BZ2DecompObject, x)
1778
1779static PyMemberDef BZ2Decomp_members[] = {
1780 {"unused_data", T_OBJECT, OFF(unused_data), RO},
1781 {NULL} /* Sentinel */
1782};
1783
1784
1785/* ===================================================================== */
1786/* Methods of BZ2Decomp. */
1787
1788PyDoc_STRVAR(BZ2Decomp_decompress__doc__,
1789"decompress(data) -> string\n\
1790\n\
1791Provide more data to the decompressor object. It will return chunks\n\
1792of decompressed data whenever possible. If you try to decompress data\n\
1793after the end of stream is found, EOFError will be raised. If any data\n\
1794was found after the end of stream, it'll be ignored and saved in\n\
1795unused_data attribute.\n\
1796");
1797
1798static PyObject *
1799BZ2Decomp_decompress(BZ2DecompObject *self, PyObject *args)
1800{
1801 char *data;
1802 int datasize;
1803 int bufsize = SMALLCHUNK;
Martin v. Löwisb9a0f912003-03-29 10:06:18 +00001804 PY_LONG_LONG totalout;
Neal Norwitz18142c02002-11-05 18:17:32 +00001805 PyObject *ret = NULL;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001806 bz_stream *bzs = &self->bzs;
1807 int bzerror;
1808
Walter Dörwaldbb9c7392004-11-01 17:10:19 +00001809 if (!PyArg_ParseTuple(args, "s#:decompress", &data, &datasize))
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001810 return NULL;
1811
1812 ACQUIRE_LOCK(self);
1813 if (!self->running) {
1814 PyErr_SetString(PyExc_EOFError, "end of stream was "
1815 "already found");
1816 goto error;
1817 }
1818
1819 ret = PyString_FromStringAndSize(NULL, bufsize);
1820 if (!ret)
1821 goto error;
1822
1823 bzs->next_in = data;
1824 bzs->avail_in = datasize;
1825 bzs->next_out = BUF(ret);
1826 bzs->avail_out = bufsize;
1827
1828 totalout = BZS_TOTAL_OUT(bzs);
1829
1830 for (;;) {
1831 Py_BEGIN_ALLOW_THREADS
1832 bzerror = BZ2_bzDecompress(bzs);
1833 Py_END_ALLOW_THREADS
1834 if (bzerror == BZ_STREAM_END) {
1835 if (bzs->avail_in != 0) {
1836 Py_DECREF(self->unused_data);
1837 self->unused_data =
1838 PyString_FromStringAndSize(bzs->next_in,
1839 bzs->avail_in);
1840 }
1841 self->running = 0;
1842 break;
1843 }
1844 if (bzerror != BZ_OK) {
1845 Util_CatchBZ2Error(bzerror);
1846 goto error;
1847 }
1848 if (bzs->avail_out == 0) {
1849 bufsize = Util_NewBufferSize(bufsize);
1850 if (_PyString_Resize(&ret, bufsize) < 0) {
1851 BZ2_bzDecompressEnd(bzs);
1852 goto error;
1853 }
1854 bzs->next_out = BUF(ret);
1855 bzs->next_out = BUF(ret) + (BZS_TOTAL_OUT(bzs)
1856 - totalout);
1857 bzs->avail_out = bufsize - (bzs->next_out - BUF(ret));
1858 } else if (bzs->avail_in == 0) {
1859 break;
1860 }
1861 }
1862
1863 if (bzs->avail_out != 0)
Tim Peters39185d62002-11-09 04:31:38 +00001864 _PyString_Resize(&ret, (int)(BZS_TOTAL_OUT(bzs) - totalout));
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001865
1866 RELEASE_LOCK(self);
1867 return ret;
1868
1869error:
1870 RELEASE_LOCK(self);
1871 Py_XDECREF(ret);
1872 return NULL;
1873}
1874
1875static PyMethodDef BZ2Decomp_methods[] = {
1876 {"decompress", (PyCFunction)BZ2Decomp_decompress, METH_VARARGS, BZ2Decomp_decompress__doc__},
1877 {NULL, NULL} /* sentinel */
1878};
1879
1880
1881/* ===================================================================== */
1882/* Slot definitions for BZ2Decomp_Type. */
1883
1884static int
1885BZ2Decomp_init(BZ2DecompObject *self, PyObject *args, PyObject *kwargs)
1886{
1887 int bzerror;
1888
1889 if (!PyArg_ParseTuple(args, ":BZ2Decompressor"))
1890 return -1;
1891
1892#ifdef WITH_THREAD
1893 self->lock = PyThread_allocate_lock();
1894 if (!self->lock)
1895 goto error;
1896#endif
1897
1898 self->unused_data = PyString_FromString("");
1899 if (!self->unused_data)
1900 goto error;
1901
1902 memset(&self->bzs, 0, sizeof(bz_stream));
1903 bzerror = BZ2_bzDecompressInit(&self->bzs, 0, 0);
1904 if (bzerror != BZ_OK) {
1905 Util_CatchBZ2Error(bzerror);
1906 goto error;
1907 }
1908
1909 self->running = 1;
1910
1911 return 0;
1912
1913error:
1914#ifdef WITH_THREAD
1915 if (self->lock)
1916 PyThread_free_lock(self->lock);
1917#endif
1918 Py_XDECREF(self->unused_data);
1919 return -1;
1920}
1921
1922static void
1923BZ2Decomp_dealloc(BZ2DecompObject *self)
1924{
1925#ifdef WITH_THREAD
1926 if (self->lock)
1927 PyThread_free_lock(self->lock);
1928#endif
1929 Py_XDECREF(self->unused_data);
1930 BZ2_bzDecompressEnd(&self->bzs);
Gustavo Niemeyera33d0aa2003-02-11 18:46:20 +00001931 self->ob_type->tp_free((PyObject *)self);
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001932}
1933
1934
1935/* ===================================================================== */
1936/* BZ2Decomp_Type definition. */
1937
1938PyDoc_STRVAR(BZ2Decomp__doc__,
1939"BZ2Decompressor() -> decompressor object\n\
1940\n\
1941Create a new decompressor object. This object may be used to decompress\n\
1942data sequentially. If you want to decompress data in one shot, use the\n\
1943decompress() function instead.\n\
1944");
1945
Gustavo Niemeyer49ea7be2002-11-08 14:31:49 +00001946static PyTypeObject BZ2Decomp_Type = {
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001947 PyObject_HEAD_INIT(NULL)
1948 0, /*ob_size*/
1949 "bz2.BZ2Decompressor", /*tp_name*/
1950 sizeof(BZ2DecompObject), /*tp_basicsize*/
1951 0, /*tp_itemsize*/
1952 (destructor)BZ2Decomp_dealloc, /*tp_dealloc*/
1953 0, /*tp_print*/
1954 0, /*tp_getattr*/
1955 0, /*tp_setattr*/
1956 0, /*tp_compare*/
1957 0, /*tp_repr*/
1958 0, /*tp_as_number*/
1959 0, /*tp_as_sequence*/
1960 0, /*tp_as_mapping*/
1961 0, /*tp_hash*/
1962 0, /*tp_call*/
1963 0, /*tp_str*/
Jason Tishlerfb8595d2003-01-06 12:41:26 +00001964 PyObject_GenericGetAttr,/*tp_getattro*/
1965 PyObject_GenericSetAttr,/*tp_setattro*/
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001966 0, /*tp_as_buffer*/
1967 Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE, /*tp_flags*/
1968 BZ2Decomp__doc__, /*tp_doc*/
1969 0, /*tp_traverse*/
1970 0, /*tp_clear*/
1971 0, /*tp_richcompare*/
1972 0, /*tp_weaklistoffset*/
1973 0, /*tp_iter*/
1974 0, /*tp_iternext*/
1975 BZ2Decomp_methods, /*tp_methods*/
1976 BZ2Decomp_members, /*tp_members*/
1977 0, /*tp_getset*/
1978 0, /*tp_base*/
1979 0, /*tp_dict*/
1980 0, /*tp_descr_get*/
1981 0, /*tp_descr_set*/
1982 0, /*tp_dictoffset*/
1983 (initproc)BZ2Decomp_init, /*tp_init*/
Jason Tishlerfb8595d2003-01-06 12:41:26 +00001984 PyType_GenericAlloc, /*tp_alloc*/
1985 PyType_GenericNew, /*tp_new*/
1986 _PyObject_Del, /*tp_free*/
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00001987 0, /*tp_is_gc*/
1988};
1989
1990
1991/* ===================================================================== */
1992/* Module functions. */
1993
1994PyDoc_STRVAR(bz2_compress__doc__,
1995"compress(data [, compresslevel=9]) -> string\n\
1996\n\
1997Compress data in one shot. If you want to compress data sequentially,\n\
1998use an instance of BZ2Compressor instead. The compresslevel parameter, if\n\
1999given, must be a number between 1 and 9.\n\
2000");
2001
2002static PyObject *
2003bz2_compress(PyObject *self, PyObject *args, PyObject *kwargs)
2004{
2005 int compresslevel=9;
2006 char *data;
2007 int datasize;
2008 int bufsize;
Gustavo Niemeyer7d7930b2002-11-05 18:41:53 +00002009 PyObject *ret = NULL;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002010 bz_stream _bzs;
2011 bz_stream *bzs = &_bzs;
2012 int bzerror;
Jeremy Hyltonaf68c872005-12-10 18:50:16 +00002013 static const char *kwlist[] = {"data", "compresslevel", 0};
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002014
2015 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s#|i",
2016 kwlist, &data, &datasize,
2017 &compresslevel))
2018 return NULL;
2019
2020 if (compresslevel < 1 || compresslevel > 9) {
2021 PyErr_SetString(PyExc_ValueError,
2022 "compresslevel must be between 1 and 9");
2023 return NULL;
2024 }
2025
2026 /* Conforming to bz2 manual, this is large enough to fit compressed
2027 * data in one shot. We will check it later anyway. */
2028 bufsize = datasize + (datasize/100+1) + 600;
2029
2030 ret = PyString_FromStringAndSize(NULL, bufsize);
2031 if (!ret)
2032 return NULL;
2033
2034 memset(bzs, 0, sizeof(bz_stream));
2035
2036 bzs->next_in = data;
2037 bzs->avail_in = datasize;
2038 bzs->next_out = BUF(ret);
2039 bzs->avail_out = bufsize;
2040
2041 bzerror = BZ2_bzCompressInit(bzs, compresslevel, 0, 0);
2042 if (bzerror != BZ_OK) {
2043 Util_CatchBZ2Error(bzerror);
2044 Py_DECREF(ret);
2045 return NULL;
2046 }
Tim Peterse3228092002-11-09 04:21:44 +00002047
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002048 for (;;) {
2049 Py_BEGIN_ALLOW_THREADS
2050 bzerror = BZ2_bzCompress(bzs, BZ_FINISH);
2051 Py_END_ALLOW_THREADS
2052 if (bzerror == BZ_STREAM_END) {
2053 break;
2054 } else if (bzerror != BZ_FINISH_OK) {
2055 BZ2_bzCompressEnd(bzs);
2056 Util_CatchBZ2Error(bzerror);
2057 Py_DECREF(ret);
2058 return NULL;
2059 }
2060 if (bzs->avail_out == 0) {
2061 bufsize = Util_NewBufferSize(bufsize);
2062 if (_PyString_Resize(&ret, bufsize) < 0) {
2063 BZ2_bzCompressEnd(bzs);
2064 Py_DECREF(ret);
2065 return NULL;
2066 }
2067 bzs->next_out = BUF(ret) + BZS_TOTAL_OUT(bzs);
2068 bzs->avail_out = bufsize - (bzs->next_out - BUF(ret));
2069 }
2070 }
2071
2072 if (bzs->avail_out != 0)
Tim Peters6ee6db82002-11-09 04:33:36 +00002073 _PyString_Resize(&ret, (int)BZS_TOTAL_OUT(bzs));
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002074 BZ2_bzCompressEnd(bzs);
2075
2076 return ret;
2077}
2078
2079PyDoc_STRVAR(bz2_decompress__doc__,
2080"decompress(data) -> decompressed data\n\
2081\n\
2082Decompress data in one shot. If you want to decompress data sequentially,\n\
2083use an instance of BZ2Decompressor instead.\n\
2084");
2085
2086static PyObject *
2087bz2_decompress(PyObject *self, PyObject *args)
2088{
2089 char *data;
2090 int datasize;
2091 int bufsize = SMALLCHUNK;
2092 PyObject *ret;
2093 bz_stream _bzs;
2094 bz_stream *bzs = &_bzs;
2095 int bzerror;
2096
Walter Dörwaldbb9c7392004-11-01 17:10:19 +00002097 if (!PyArg_ParseTuple(args, "s#:decompress", &data, &datasize))
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002098 return NULL;
2099
2100 if (datasize == 0)
2101 return PyString_FromString("");
2102
2103 ret = PyString_FromStringAndSize(NULL, bufsize);
2104 if (!ret)
2105 return NULL;
2106
2107 memset(bzs, 0, sizeof(bz_stream));
2108
2109 bzs->next_in = data;
2110 bzs->avail_in = datasize;
2111 bzs->next_out = BUF(ret);
2112 bzs->avail_out = bufsize;
2113
2114 bzerror = BZ2_bzDecompressInit(bzs, 0, 0);
2115 if (bzerror != BZ_OK) {
2116 Util_CatchBZ2Error(bzerror);
2117 Py_DECREF(ret);
2118 return NULL;
2119 }
Tim Peterse3228092002-11-09 04:21:44 +00002120
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002121 for (;;) {
2122 Py_BEGIN_ALLOW_THREADS
2123 bzerror = BZ2_bzDecompress(bzs);
2124 Py_END_ALLOW_THREADS
2125 if (bzerror == BZ_STREAM_END) {
2126 break;
2127 } else if (bzerror != BZ_OK) {
2128 BZ2_bzDecompressEnd(bzs);
2129 Util_CatchBZ2Error(bzerror);
2130 Py_DECREF(ret);
2131 return NULL;
2132 }
2133 if (bzs->avail_out == 0) {
2134 bufsize = Util_NewBufferSize(bufsize);
2135 if (_PyString_Resize(&ret, bufsize) < 0) {
2136 BZ2_bzDecompressEnd(bzs);
2137 Py_DECREF(ret);
2138 return NULL;
2139 }
2140 bzs->next_out = BUF(ret) + BZS_TOTAL_OUT(bzs);
2141 bzs->avail_out = bufsize - (bzs->next_out - BUF(ret));
2142 } else if (bzs->avail_in == 0) {
2143 BZ2_bzDecompressEnd(bzs);
2144 PyErr_SetString(PyExc_ValueError,
2145 "couldn't find end of stream");
2146 Py_DECREF(ret);
2147 return NULL;
2148 }
2149 }
2150
2151 if (bzs->avail_out != 0)
Tim Peters6ee6db82002-11-09 04:33:36 +00002152 _PyString_Resize(&ret, (int)BZS_TOTAL_OUT(bzs));
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002153 BZ2_bzDecompressEnd(bzs);
2154
2155 return ret;
2156}
2157
2158static PyMethodDef bz2_methods[] = {
2159 {"compress", (PyCFunction) bz2_compress, METH_VARARGS|METH_KEYWORDS,
2160 bz2_compress__doc__},
2161 {"decompress", (PyCFunction) bz2_decompress, METH_VARARGS,
2162 bz2_decompress__doc__},
2163 {NULL, NULL} /* sentinel */
2164};
2165
2166/* ===================================================================== */
2167/* Initialization function. */
2168
2169PyDoc_STRVAR(bz2__doc__,
2170"The python bz2 module provides a comprehensive interface for\n\
2171the bz2 compression library. It implements a complete file\n\
2172interface, one shot (de)compression functions, and types for\n\
2173sequential (de)compression.\n\
2174");
2175
Neal Norwitz21d896c2003-07-01 20:15:21 +00002176PyMODINIT_FUNC
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002177initbz2(void)
2178{
2179 PyObject *m;
2180
2181 BZ2File_Type.ob_type = &PyType_Type;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002182 BZ2Comp_Type.ob_type = &PyType_Type;
2183 BZ2Decomp_Type.ob_type = &PyType_Type;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002184
2185 m = Py_InitModule3("bz2", bz2_methods, bz2__doc__);
Neal Norwitz1ac754f2006-01-19 06:09:39 +00002186 if (m == NULL)
2187 return;
Gustavo Niemeyerf8ca8362002-11-05 16:50:05 +00002188
2189 PyModule_AddObject(m, "__author__", PyString_FromString(__author__));
2190
2191 Py_INCREF(&BZ2File_Type);
2192 PyModule_AddObject(m, "BZ2File", (PyObject *)&BZ2File_Type);
2193
2194 Py_INCREF(&BZ2Comp_Type);
2195 PyModule_AddObject(m, "BZ2Compressor", (PyObject *)&BZ2Comp_Type);
2196
2197 Py_INCREF(&BZ2Decomp_Type);
2198 PyModule_AddObject(m, "BZ2Decompressor", (PyObject *)&BZ2Decomp_Type);
2199}